Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1RemSet.hpp"
32 32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
33 33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
34 34 #include "gc_implementation/shared/vmGCOperations.hpp"
35 35 #include "memory/genOopClosures.inline.hpp"
36 36 #include "memory/referencePolicy.hpp"
37 37 #include "memory/resourceArea.hpp"
38 38 #include "oops/oop.inline.hpp"
39 39 #include "runtime/handles.inline.hpp"
40 40 #include "runtime/java.hpp"
41 41
42 42 //
43 43 // CMS Bit Map Wrapper
44 44
45 45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
46 46 _bm((uintptr_t*)NULL,0),
47 47 _shifter(shifter) {
48 48 _bmStartWord = (HeapWord*)(rs.base());
49 49 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
50 50 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
51 51 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
52 52
53 53 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
54 54 // For now we'll just commit all of the bit map up fromt.
55 55 // Later on we'll try to be more parsimonious with swap.
56 56 guarantee(_virtual_space.initialize(brs, brs.size()),
57 57 "couldn't reseve backing store for CMS bit map");
58 58 assert(_virtual_space.committed_size() == brs.size(),
59 59 "didn't reserve backing store for all of CMS bit map?");
60 60 _bm.set_map((uintptr_t*)_virtual_space.low());
61 61 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
62 62 _bmWordSize, "inconsistency in bit map sizing");
63 63 _bm.set_size(_bmWordSize >> _shifter);
64 64 }
65 65
66 66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
67 67 HeapWord* limit) const {
68 68 // First we must round addr *up* to a possible object boundary.
69 69 addr = (HeapWord*)align_size_up((intptr_t)addr,
70 70 HeapWordSize << _shifter);
71 71 size_t addrOffset = heapWordToOffset(addr);
72 72 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
73 73 size_t limitOffset = heapWordToOffset(limit);
74 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
75 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
76 76 assert(nextAddr >= addr, "get_next_one postcondition");
77 77 assert(nextAddr == limit || isMarked(nextAddr),
78 78 "get_next_one postcondition");
79 79 return nextAddr;
80 80 }
81 81
82 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
83 83 HeapWord* limit) const {
84 84 size_t addrOffset = heapWordToOffset(addr);
85 85 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
86 86 size_t limitOffset = heapWordToOffset(limit);
87 87 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
88 88 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
89 89 assert(nextAddr >= addr, "get_next_one postcondition");
90 90 assert(nextAddr == limit || !isMarked(nextAddr),
91 91 "get_next_one postcondition");
92 92 return nextAddr;
93 93 }
94 94
95 95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
96 96 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
97 97 return (int) (diff >> _shifter);
98 98 }
99 99
100 100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
101 101 HeapWord* left = MAX2(_bmStartWord, mr.start());
102 102 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
103 103 if (right > left) {
104 104 // Right-open interval [leftOffset, rightOffset).
105 105 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
106 106 } else {
107 107 return true;
108 108 }
109 109 }
110 110
111 111 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
112 112 size_t from_start_index,
113 113 HeapWord* to_start_word,
114 114 size_t word_num) {
115 115 _bm.mostly_disjoint_range_union(from_bitmap,
116 116 from_start_index,
117 117 heapWordToOffset(to_start_word),
118 118 word_num);
119 119 }
120 120
121 121 #ifndef PRODUCT
122 122 bool CMBitMapRO::covers(ReservedSpace rs) const {
123 123 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
124 124 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
125 125 "size inconsistency");
126 126 return _bmStartWord == (HeapWord*)(rs.base()) &&
127 127 _bmWordSize == rs.size()>>LogHeapWordSize;
128 128 }
129 129 #endif
130 130
131 131 void CMBitMap::clearAll() {
132 132 _bm.clear();
133 133 return;
134 134 }
135 135
136 136 void CMBitMap::markRange(MemRegion mr) {
137 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
138 138 assert(!mr.is_empty(), "unexpected empty region");
139 139 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
140 140 ((HeapWord *) mr.end())),
141 141 "markRange memory region end is not card aligned");
142 142 // convert address range into offset range
143 143 _bm.at_put_range(heapWordToOffset(mr.start()),
144 144 heapWordToOffset(mr.end()), true);
145 145 }
146 146
147 147 void CMBitMap::clearRange(MemRegion mr) {
148 148 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
149 149 assert(!mr.is_empty(), "unexpected empty region");
150 150 // convert address range into offset range
151 151 _bm.at_put_range(heapWordToOffset(mr.start()),
152 152 heapWordToOffset(mr.end()), false);
153 153 }
154 154
155 155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
156 156 HeapWord* end_addr) {
157 157 HeapWord* start = getNextMarkedWordAddress(addr);
158 158 start = MIN2(start, end_addr);
159 159 HeapWord* end = getNextUnmarkedWordAddress(start);
160 160 end = MIN2(end, end_addr);
161 161 assert(start <= end, "Consistency check");
162 162 MemRegion mr(start, end);
163 163 if (!mr.is_empty()) {
164 164 clearRange(mr);
165 165 }
166 166 return mr;
167 167 }
168 168
169 169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
170 170 _base(NULL), _cm(cm)
171 171 #ifdef ASSERT
172 172 , _drain_in_progress(false)
173 173 , _drain_in_progress_yields(false)
174 174 #endif
175 175 {}
176 176
177 177 void CMMarkStack::allocate(size_t size) {
178 178 _base = NEW_C_HEAP_ARRAY(oop, size);
179 179 if (_base == NULL)
180 180 vm_exit_during_initialization("Failed to allocate "
181 181 "CM region mark stack");
182 182 _index = 0;
183 183 // QQQQ cast ...
184 184 _capacity = (jint) size;
185 185 _oops_do_bound = -1;
186 186 NOT_PRODUCT(_max_depth = 0);
187 187 }
188 188
189 189 CMMarkStack::~CMMarkStack() {
190 190 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
191 191 }
192 192
193 193 void CMMarkStack::par_push(oop ptr) {
194 194 while (true) {
195 195 if (isFull()) {
196 196 _overflow = true;
197 197 return;
198 198 }
199 199 // Otherwise...
200 200 jint index = _index;
201 201 jint next_index = index+1;
202 202 jint res = Atomic::cmpxchg(next_index, &_index, index);
203 203 if (res == index) {
204 204 _base[index] = ptr;
205 205 // Note that we don't maintain this atomically. We could, but it
206 206 // doesn't seem necessary.
207 207 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
208 208 return;
209 209 }
210 210 // Otherwise, we need to try again.
211 211 }
212 212 }
213 213
214 214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
215 215 while (true) {
216 216 if (isFull()) {
217 217 _overflow = true;
218 218 return;
219 219 }
220 220 // Otherwise...
221 221 jint index = _index;
222 222 jint next_index = index + n;
223 223 if (next_index > _capacity) {
224 224 _overflow = true;
225 225 return;
226 226 }
227 227 jint res = Atomic::cmpxchg(next_index, &_index, index);
228 228 if (res == index) {
229 229 for (int i = 0; i < n; i++) {
230 230 int ind = index + i;
231 231 assert(ind < _capacity, "By overflow test above.");
232 232 _base[ind] = ptr_arr[i];
233 233 }
234 234 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
235 235 return;
236 236 }
237 237 // Otherwise, we need to try again.
238 238 }
239 239 }
240 240
241 241
242 242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
243 243 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
244 244 jint start = _index;
245 245 jint next_index = start + n;
246 246 if (next_index > _capacity) {
247 247 _overflow = true;
248 248 return;
249 249 }
250 250 // Otherwise.
251 251 _index = next_index;
252 252 for (int i = 0; i < n; i++) {
253 253 int ind = start + i;
254 254 assert(ind < _capacity, "By overflow test above.");
255 255 _base[ind] = ptr_arr[i];
256 256 }
257 257 }
258 258
259 259
260 260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
261 261 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
262 262 jint index = _index;
263 263 if (index == 0) {
264 264 *n = 0;
265 265 return false;
266 266 } else {
267 267 int k = MIN2(max, index);
268 268 jint new_ind = index - k;
269 269 for (int j = 0; j < k; j++) {
270 270 ptr_arr[j] = _base[new_ind + j];
271 271 }
272 272 _index = new_ind;
273 273 *n = k;
274 274 return true;
275 275 }
276 276 }
277 277
278 278
279 279 CMRegionStack::CMRegionStack() : _base(NULL) {}
280 280
281 281 void CMRegionStack::allocate(size_t size) {
282 282 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
283 283 if (_base == NULL)
284 284 vm_exit_during_initialization("Failed to allocate "
285 285 "CM region mark stack");
286 286 _index = 0;
287 287 // QQQQ cast ...
288 288 _capacity = (jint) size;
289 289 }
290 290
291 291 CMRegionStack::~CMRegionStack() {
292 292 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
293 293 }
294 294
295 295 void CMRegionStack::push_lock_free(MemRegion mr) {
296 296 assert(mr.word_size() > 0, "Precondition");
297 297 while (true) {
298 298 jint index = _index;
299 299
300 300 if (index >= _capacity) {
301 301 _overflow = true;
302 302 return;
303 303 }
304 304 // Otherwise...
305 305 jint next_index = index+1;
306 306 jint res = Atomic::cmpxchg(next_index, &_index, index);
307 307 if (res == index) {
308 308 _base[index] = mr;
309 309 return;
310 310 }
311 311 // Otherwise, we need to try again.
312 312 }
313 313 }
314 314
315 315 // Lock-free pop of the region stack. Called during the concurrent
316 316 // marking / remark phases. Should only be called in tandem with
317 317 // other lock-free pops.
318 318 MemRegion CMRegionStack::pop_lock_free() {
319 319 while (true) {
320 320 jint index = _index;
321 321
322 322 if (index == 0) {
323 323 return MemRegion();
324 324 }
325 325 // Otherwise...
326 326 jint next_index = index-1;
327 327 jint res = Atomic::cmpxchg(next_index, &_index, index);
328 328 if (res == index) {
329 329 MemRegion mr = _base[next_index];
330 330 if (mr.start() != NULL) {
331 331 assert(mr.end() != NULL, "invariant");
332 332 assert(mr.word_size() > 0, "invariant");
333 333 return mr;
334 334 } else {
335 335 // that entry was invalidated... let's skip it
336 336 assert(mr.end() == NULL, "invariant");
337 337 }
338 338 }
339 339 // Otherwise, we need to try again.
340 340 }
341 341 }
342 342
343 343 #if 0
344 344 // The routines that manipulate the region stack with a lock are
345 345 // not currently used. They should be retained, however, as a
346 346 // diagnostic aid.
347 347
348 348 void CMRegionStack::push_with_lock(MemRegion mr) {
349 349 assert(mr.word_size() > 0, "Precondition");
350 350 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
351 351
352 352 if (isFull()) {
353 353 _overflow = true;
354 354 return;
355 355 }
356 356
357 357 _base[_index] = mr;
358 358 _index += 1;
359 359 }
360 360
361 361 MemRegion CMRegionStack::pop_with_lock() {
362 362 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
363 363
364 364 while (true) {
365 365 if (_index == 0) {
366 366 return MemRegion();
367 367 }
368 368 _index -= 1;
369 369
370 370 MemRegion mr = _base[_index];
371 371 if (mr.start() != NULL) {
372 372 assert(mr.end() != NULL, "invariant");
373 373 assert(mr.word_size() > 0, "invariant");
374 374 return mr;
375 375 } else {
376 376 // that entry was invalidated... let's skip it
377 377 assert(mr.end() == NULL, "invariant");
378 378 }
379 379 }
380 380 }
381 381 #endif
382 382
383 383 bool CMRegionStack::invalidate_entries_into_cset() {
384 384 bool result = false;
385 385 G1CollectedHeap* g1h = G1CollectedHeap::heap();
386 386 for (int i = 0; i < _oops_do_bound; ++i) {
387 387 MemRegion mr = _base[i];
388 388 if (mr.start() != NULL) {
389 389 assert(mr.end() != NULL, "invariant");
390 390 assert(mr.word_size() > 0, "invariant");
391 391 HeapRegion* hr = g1h->heap_region_containing(mr.start());
392 392 assert(hr != NULL, "invariant");
393 393 if (hr->in_collection_set()) {
394 394 // The region points into the collection set
395 395 _base[i] = MemRegion();
396 396 result = true;
397 397 }
398 398 } else {
399 399 // that entry was invalidated... let's skip it
400 400 assert(mr.end() == NULL, "invariant");
401 401 }
402 402 }
403 403 return result;
404 404 }
405 405
406 406 template<class OopClosureClass>
407 407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
408 408 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
409 409 || SafepointSynchronize::is_at_safepoint(),
410 410 "Drain recursion must be yield-safe.");
411 411 bool res = true;
412 412 debug_only(_drain_in_progress = true);
413 413 debug_only(_drain_in_progress_yields = yield_after);
414 414 while (!isEmpty()) {
415 415 oop newOop = pop();
416 416 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
417 417 assert(newOop->is_oop(), "Expected an oop");
418 418 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
419 419 "only grey objects on this stack");
420 420 // iterate over the oops in this oop, marking and pushing
421 421 // the ones in CMS generation.
422 422 newOop->oop_iterate(cl);
423 423 if (yield_after && _cm->do_yield_check()) {
424 424 res = false; break;
425 425 }
426 426 }
427 427 debug_only(_drain_in_progress = false);
428 428 return res;
429 429 }
430 430
431 431 void CMMarkStack::oops_do(OopClosure* f) {
432 432 if (_index == 0) return;
433 433 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
434 434 "Bound must be set.");
435 435 for (int i = 0; i < _oops_do_bound; i++) {
436 436 f->do_oop(&_base[i]);
437 437 }
438 438 _oops_do_bound = -1;
439 439 }
440 440
441 441 bool ConcurrentMark::not_yet_marked(oop obj) const {
442 442 return (_g1h->is_obj_ill(obj)
443 443 || (_g1h->is_in_permanent(obj)
444 444 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
445 445 }
446 446
447 447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
448 448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
449 449 #endif // _MSC_VER
450 450
451 451 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
452 452 int max_regions) :
453 453 _markBitMap1(rs, MinObjAlignment - 1),
454 454 _markBitMap2(rs, MinObjAlignment - 1),
455 455
456 456 _parallel_marking_threads(0),
457 457 _sleep_factor(0.0),
458 458 _marking_task_overhead(1.0),
459 459 _cleanup_sleep_factor(0.0),
460 460 _cleanup_task_overhead(1.0),
461 461 _cleanup_list("Cleanup List"),
462 462 _region_bm(max_regions, false /* in_resource_area*/),
463 463 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
464 464 CardTableModRefBS::card_shift,
465 465 false /* in_resource_area*/),
466 466 _prevMarkBitMap(&_markBitMap1),
467 467 _nextMarkBitMap(&_markBitMap2),
468 468 _at_least_one_mark_complete(false),
469 469
470 470 _markStack(this),
471 471 _regionStack(),
472 472 // _finger set in set_non_marking_state
473 473
474 474 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
475 475 // _active_tasks set in set_non_marking_state
476 476 // _tasks set inside the constructor
477 477 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
478 478 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
479 479
480 480 _has_overflown(false),
481 481 _concurrent(false),
482 482 _has_aborted(false),
483 483 _restart_for_overflow(false),
484 484 _concurrent_marking_in_progress(false),
485 485 _should_gray_objects(false),
486 486
487 487 // _verbose_level set below
488 488
489 489 _init_times(),
490 490 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
491 491 _cleanup_times(),
492 492 _total_counting_time(0.0),
493 493 _total_rs_scrub_time(0.0),
494 494
495 495 _parallel_workers(NULL)
496 496 {
497 497 CMVerboseLevel verbose_level =
498 498 (CMVerboseLevel) G1MarkingVerboseLevel;
499 499 if (verbose_level < no_verbose)
500 500 verbose_level = no_verbose;
501 501 if (verbose_level > high_verbose)
502 502 verbose_level = high_verbose;
503 503 _verbose_level = verbose_level;
504 504
505 505 if (verbose_low())
506 506 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
507 507 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
508 508
509 509 _markStack.allocate(MarkStackSize);
510 510 _regionStack.allocate(G1MarkRegionStackSize);
511 511
512 512 // Create & start a ConcurrentMark thread.
513 513 _cmThread = new ConcurrentMarkThread(this);
514 514 assert(cmThread() != NULL, "CM Thread should have been created");
515 515 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
516 516
517 517 _g1h = G1CollectedHeap::heap();
518 518 assert(CGC_lock != NULL, "Where's the CGC_lock?");
519 519 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
520 520 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
521 521
522 522 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
523 523 satb_qs.set_buffer_size(G1SATBBufferSize);
524 524
525 525 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
526 526 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
527 527
528 528 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
529 529 _active_tasks = _max_task_num;
530 530 for (int i = 0; i < (int) _max_task_num; ++i) {
531 531 CMTaskQueue* task_queue = new CMTaskQueue();
532 532 task_queue->initialize();
533 533 _task_queues->register_queue(i, task_queue);
534 534
535 535 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
536 536 _accum_task_vtime[i] = 0.0;
537 537 }
538 538
539 539 if (ConcGCThreads > ParallelGCThreads) {
540 540 vm_exit_during_initialization("Can't have more ConcGCThreads "
541 541 "than ParallelGCThreads.");
542 542 }
543 543 if (ParallelGCThreads == 0) {
544 544 // if we are not running with any parallel GC threads we will not
545 545 // spawn any marking threads either
546 546 _parallel_marking_threads = 0;
547 547 _sleep_factor = 0.0;
548 548 _marking_task_overhead = 1.0;
549 549 } else {
550 550 if (ConcGCThreads > 0) {
551 551 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
552 552 // if both are set
553 553
554 554 _parallel_marking_threads = ConcGCThreads;
555 555 _sleep_factor = 0.0;
556 556 _marking_task_overhead = 1.0;
557 557 } else if (G1MarkingOverheadPercent > 0) {
558 558 // we will calculate the number of parallel marking threads
559 559 // based on a target overhead with respect to the soft real-time
560 560 // goal
561 561
562 562 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
563 563 double overall_cm_overhead =
564 564 (double) MaxGCPauseMillis * marking_overhead /
565 565 (double) GCPauseIntervalMillis;
566 566 double cpu_ratio = 1.0 / (double) os::processor_count();
567 567 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
568 568 double marking_task_overhead =
569 569 overall_cm_overhead / marking_thread_num *
570 570 (double) os::processor_count();
571 571 double sleep_factor =
572 572 (1.0 - marking_task_overhead) / marking_task_overhead;
573 573
574 574 _parallel_marking_threads = (size_t) marking_thread_num;
575 575 _sleep_factor = sleep_factor;
576 576 _marking_task_overhead = marking_task_overhead;
577 577 } else {
578 578 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
579 579 _sleep_factor = 0.0;
580 580 _marking_task_overhead = 1.0;
581 581 }
582 582
583 583 if (parallel_marking_threads() > 1)
584 584 _cleanup_task_overhead = 1.0;
585 585 else
586 586 _cleanup_task_overhead = marking_task_overhead();
587 587 _cleanup_sleep_factor =
588 588 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
589 589
590 590 #if 0
591 591 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
592 592 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
593 593 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
594 594 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
595 595 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
596 596 #endif
597 597
598 598 guarantee(parallel_marking_threads() > 0, "peace of mind");
599 599 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
600 600 (int) _parallel_marking_threads, false, true);
601 601 if (_parallel_workers == NULL) {
602 602 vm_exit_during_initialization("Failed necessary allocation.");
603 603 } else {
604 604 _parallel_workers->initialize_workers();
605 605 }
606 606 }
607 607
608 608 // so that the call below can read a sensible value
609 609 _heap_start = (HeapWord*) rs.base();
610 610 set_non_marking_state();
611 611 }
612 612
613 613 void ConcurrentMark::update_g1_committed(bool force) {
614 614 // If concurrent marking is not in progress, then we do not need to
615 615 // update _heap_end. This has a subtle and important
616 616 // side-effect. Imagine that two evacuation pauses happen between
617 617 // marking completion and remark. The first one can grow the
618 618 // heap (hence now the finger is below the heap end). Then, the
619 619 // second one could unnecessarily push regions on the region
620 620 // stack. This causes the invariant that the region stack is empty
621 621 // at the beginning of remark to be false. By ensuring that we do
622 622 // not observe heap expansions after marking is complete, then we do
623 623 // not have this problem.
624 624 if (!concurrent_marking_in_progress() && !force)
625 625 return;
626 626
627 627 MemRegion committed = _g1h->g1_committed();
628 628 assert(committed.start() == _heap_start, "start shouldn't change");
629 629 HeapWord* new_end = committed.end();
630 630 if (new_end > _heap_end) {
631 631 // The heap has been expanded.
632 632
633 633 _heap_end = new_end;
634 634 }
635 635 // Notice that the heap can also shrink. However, this only happens
636 636 // during a Full GC (at least currently) and the entire marking
637 637 // phase will bail out and the task will not be restarted. So, let's
638 638 // do nothing.
639 639 }
640 640
641 641 void ConcurrentMark::reset() {
642 642 // Starting values for these two. This should be called in a STW
643 643 // phase. CM will be notified of any future g1_committed expansions
644 644 // will be at the end of evacuation pauses, when tasks are
645 645 // inactive.
646 646 MemRegion committed = _g1h->g1_committed();
647 647 _heap_start = committed.start();
648 648 _heap_end = committed.end();
649 649
650 650 // Separated the asserts so that we know which one fires.
651 651 assert(_heap_start != NULL, "heap bounds should look ok");
652 652 assert(_heap_end != NULL, "heap bounds should look ok");
653 653 assert(_heap_start < _heap_end, "heap bounds should look ok");
654 654
655 655 // reset all the marking data structures and any necessary flags
656 656 clear_marking_state();
657 657
658 658 if (verbose_low())
659 659 gclog_or_tty->print_cr("[global] resetting");
660 660
661 661 // We do reset all of them, since different phases will use
662 662 // different number of active threads. So, it's easiest to have all
663 663 // of them ready.
664 664 for (int i = 0; i < (int) _max_task_num; ++i) {
665 665 _tasks[i]->reset(_nextMarkBitMap);
666 666 }
667 667
668 668 // we need this to make sure that the flag is on during the evac
669 669 // pause with initial mark piggy-backed
670 670 set_concurrent_marking_in_progress();
671 671 }
672 672
673 673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
674 674 assert(active_tasks <= _max_task_num, "we should not have more");
675 675
676 676 _active_tasks = active_tasks;
677 677 // Need to update the three data structures below according to the
678 678 // number of active threads for this phase.
679 679 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
680 680 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
681 681 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
682 682
683 683 _concurrent = concurrent;
684 684 // We propagate this to all tasks, not just the active ones.
685 685 for (int i = 0; i < (int) _max_task_num; ++i)
686 686 _tasks[i]->set_concurrent(concurrent);
687 687
688 688 if (concurrent) {
689 689 set_concurrent_marking_in_progress();
690 690 } else {
691 691 // We currently assume that the concurrent flag has been set to
692 692 // false before we start remark. At this point we should also be
693 693 // in a STW phase.
694 694 assert(!concurrent_marking_in_progress(), "invariant");
695 695 assert(_finger == _heap_end, "only way to get here");
696 696 update_g1_committed(true);
697 697 }
698 698 }
699 699
700 700 void ConcurrentMark::set_non_marking_state() {
701 701 // We set the global marking state to some default values when we're
702 702 // not doing marking.
703 703 clear_marking_state();
704 704 _active_tasks = 0;
705 705 clear_concurrent_marking_in_progress();
706 706 }
707 707
708 708 ConcurrentMark::~ConcurrentMark() {
709 709 for (int i = 0; i < (int) _max_task_num; ++i) {
710 710 delete _task_queues->queue(i);
711 711 delete _tasks[i];
712 712 }
713 713 delete _task_queues;
714 714 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
715 715 }
716 716
717 717 // This closure is used to mark refs into the g1 generation
718 718 // from external roots in the CMS bit map.
719 719 // Called at the first checkpoint.
720 720 //
721 721
722 722 void ConcurrentMark::clearNextBitmap() {
723 723 G1CollectedHeap* g1h = G1CollectedHeap::heap();
724 724 G1CollectorPolicy* g1p = g1h->g1_policy();
725 725
726 726 // Make sure that the concurrent mark thread looks to still be in
727 727 // the current cycle.
728 728 guarantee(cmThread()->during_cycle(), "invariant");
729 729
730 730 // We are finishing up the current cycle by clearing the next
731 731 // marking bitmap and getting it ready for the next cycle. During
732 732 // this time no other cycle can start. So, let's make sure that this
733 733 // is the case.
734 734 guarantee(!g1h->mark_in_progress(), "invariant");
735 735
736 736 // clear the mark bitmap (no grey objects to start with).
737 737 // We need to do this in chunks and offer to yield in between
738 738 // each chunk.
739 739 HeapWord* start = _nextMarkBitMap->startWord();
740 740 HeapWord* end = _nextMarkBitMap->endWord();
741 741 HeapWord* cur = start;
742 742 size_t chunkSize = M;
743 743 while (cur < end) {
744 744 HeapWord* next = cur + chunkSize;
745 745 if (next > end)
746 746 next = end;
747 747 MemRegion mr(cur,next);
748 748 _nextMarkBitMap->clearRange(mr);
749 749 cur = next;
750 750 do_yield_check();
751 751
752 752 // Repeat the asserts from above. We'll do them as asserts here to
753 753 // minimize their overhead on the product. However, we'll have
754 754 // them as guarantees at the beginning / end of the bitmap
755 755 // clearing to get some checking in the product.
756 756 assert(cmThread()->during_cycle(), "invariant");
757 757 assert(!g1h->mark_in_progress(), "invariant");
758 758 }
759 759
760 760 // Repeat the asserts from above.
761 761 guarantee(cmThread()->during_cycle(), "invariant");
762 762 guarantee(!g1h->mark_in_progress(), "invariant");
763 763 }
764 764
765 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
766 766 public:
767 767 bool doHeapRegion(HeapRegion* r) {
768 768 if (!r->continuesHumongous()) {
769 769 r->note_start_of_marking(true);
770 770 }
771 771 return false;
772 772 }
773 773 };
774 774
775 775 void ConcurrentMark::checkpointRootsInitialPre() {
776 776 G1CollectedHeap* g1h = G1CollectedHeap::heap();
777 777 G1CollectorPolicy* g1p = g1h->g1_policy();
778 778
779 779 _has_aborted = false;
780 780
781 781 #ifndef PRODUCT
782 782 if (G1PrintReachableAtInitialMark) {
783 783 print_reachable("at-cycle-start",
784 784 true /* use_prev_marking */, true /* all */);
785 785 }
786 786 #endif
787 787
788 788 // Initialise marking structures. This has to be done in a STW phase.
789 789 reset();
790 790 }
791 791
792 792 class CMMarkRootsClosure: public OopsInGenClosure {
793 793 private:
794 794 ConcurrentMark* _cm;
795 795 G1CollectedHeap* _g1h;
796 796 bool _do_barrier;
797 797
798 798 public:
799 799 CMMarkRootsClosure(ConcurrentMark* cm,
800 800 G1CollectedHeap* g1h,
801 801 bool do_barrier) : _cm(cm), _g1h(g1h),
802 802 _do_barrier(do_barrier) { }
803 803
804 804 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
805 805 virtual void do_oop( oop* p) { do_oop_work(p); }
806 806
807 807 template <class T> void do_oop_work(T* p) {
808 808 T heap_oop = oopDesc::load_heap_oop(p);
809 809 if (!oopDesc::is_null(heap_oop)) {
810 810 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
811 811 assert(obj->is_oop() || obj->mark() == NULL,
812 812 "expected an oop, possibly with mark word displaced");
813 813 HeapWord* addr = (HeapWord*)obj;
814 814 if (_g1h->is_in_g1_reserved(addr)) {
815 815 _cm->grayRoot(obj);
816 816 }
817 817 }
818 818 if (_do_barrier) {
819 819 assert(!_g1h->is_in_g1_reserved(p),
820 820 "Should be called on external roots");
821 821 do_barrier(p);
822 822 }
823 823 }
824 824 };
825 825
826 826 void ConcurrentMark::checkpointRootsInitialPost() {
827 827 G1CollectedHeap* g1h = G1CollectedHeap::heap();
828 828
829 829 // For each region note start of marking.
830 830 NoteStartOfMarkHRClosure startcl;
831 831 g1h->heap_region_iterate(&startcl);
832 832
833 833 // Start weak-reference discovery.
834 834 ReferenceProcessor* rp = g1h->ref_processor();
835 835 rp->verify_no_references_recorded();
836 836 rp->enable_discovery(); // enable ("weak") refs discovery
837 837 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
838 838
839 839 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
840 840 // This is the start of the marking cycle, we're expected all
841 841 // threads to have SATB queues with active set to false.
842 842 satb_mq_set.set_active_all_threads(true, /* new active value */
843 843 false /* expected_active */);
844 844
845 845 // update_g1_committed() will be called at the end of an evac pause
846 846 // when marking is on. So, it's also called at the end of the
847 847 // initial-mark pause to update the heap end, if the heap expands
848 848 // during it. No need to call it here.
849 849 }
850 850
851 851 // Checkpoint the roots into this generation from outside
852 852 // this generation. [Note this initial checkpoint need only
853 853 // be approximate -- we'll do a catch up phase subsequently.]
854 854 void ConcurrentMark::checkpointRootsInitial() {
855 855 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
856 856 G1CollectedHeap* g1h = G1CollectedHeap::heap();
857 857
858 858 double start = os::elapsedTime();
859 859
860 860 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
861 861 g1p->record_concurrent_mark_init_start();
862 862 checkpointRootsInitialPre();
863 863
864 864 // YSR: when concurrent precleaning is in place, we'll
865 865 // need to clear the cached card table here
866 866
867 867 ResourceMark rm;
868 868 HandleMark hm;
869 869
870 870 g1h->ensure_parsability(false);
871 871 g1h->perm_gen()->save_marks();
872 872
873 873 CMMarkRootsClosure notOlder(this, g1h, false);
874 874 CMMarkRootsClosure older(this, g1h, true);
875 875
876 876 g1h->set_marking_started();
877 877 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
878 878
879 879 g1h->process_strong_roots(true, // activate StrongRootsScope
880 880 false, // fake perm gen collection
881 881 SharedHeap::SO_AllClasses,
882 882 ¬Older, // Regular roots
883 883 NULL, // do not visit active blobs
884 884 &older // Perm Gen Roots
885 885 );
886 886 checkpointRootsInitialPost();
887 887
888 888 // Statistics.
889 889 double end = os::elapsedTime();
890 890 _init_times.add((end - start) * 1000.0);
891 891
892 892 g1p->record_concurrent_mark_init_end();
893 893 }
894 894
895 895 /*
896 896 Notice that in the next two methods, we actually leave the STS
897 897 during the barrier sync and join it immediately afterwards. If we
898 898 do not do this, this then the following deadlock can occur: one
899 899 thread could be in the barrier sync code, waiting for the other
900 900 thread to also sync up, whereas another one could be trying to
901 901 yield, while also waiting for the other threads to sync up too.
902 902
903 903 Because the thread that does the sync barrier has left the STS, it
904 904 is possible to be suspended for a Full GC or an evacuation pause
905 905 could occur. This is actually safe, since the entering the sync
906 906 barrier is one of the last things do_marking_step() does, and it
907 907 doesn't manipulate any data structures afterwards.
908 908 */
909 909
910 910 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
911 911 if (verbose_low())
912 912 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
913 913
914 914 ConcurrentGCThread::stsLeave();
915 915 _first_overflow_barrier_sync.enter();
916 916 ConcurrentGCThread::stsJoin();
917 917 // at this point everyone should have synced up and not be doing any
918 918 // more work
919 919
920 920 if (verbose_low())
921 921 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
922 922
923 923 // let task 0 do this
924 924 if (task_num == 0) {
925 925 // task 0 is responsible for clearing the global data structures
926 926 clear_marking_state();
927 927
928 928 if (PrintGC) {
929 929 gclog_or_tty->date_stamp(PrintGCDateStamps);
930 930 gclog_or_tty->stamp(PrintGCTimeStamps);
931 931 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
932 932 }
933 933 }
934 934
935 935 // after this, each task should reset its own data structures then
936 936 // then go into the second barrier
937 937 }
938 938
939 939 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
940 940 if (verbose_low())
941 941 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
942 942
943 943 ConcurrentGCThread::stsLeave();
944 944 _second_overflow_barrier_sync.enter();
945 945 ConcurrentGCThread::stsJoin();
946 946 // at this point everything should be re-initialised and ready to go
947 947
948 948 if (verbose_low())
949 949 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
950 950 }
951 951
952 952 void ConcurrentMark::grayRoot(oop p) {
953 953 HeapWord* addr = (HeapWord*) p;
954 954 // We can't really check against _heap_start and _heap_end, since it
955 955 // is possible during an evacuation pause with piggy-backed
956 956 // initial-mark that the committed space is expanded during the
957 957 // pause without CM observing this change. So the assertions below
958 958 // is a bit conservative; but better than nothing.
959 959 assert(_g1h->g1_committed().contains(addr),
960 960 "address should be within the heap bounds");
961 961
962 962 if (!_nextMarkBitMap->isMarked(addr))
963 963 _nextMarkBitMap->parMark(addr);
964 964 }
965 965
966 966 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
967 967 // The objects on the region have already been marked "in bulk" by
968 968 // the caller. We only need to decide whether to push the region on
969 969 // the region stack or not.
970 970
971 971 if (!concurrent_marking_in_progress() || !_should_gray_objects)
972 972 // We're done with marking and waiting for remark. We do not need to
973 973 // push anything else on the region stack.
974 974 return;
975 975
976 976 HeapWord* finger = _finger;
977 977
978 978 if (verbose_low())
979 979 gclog_or_tty->print_cr("[global] attempting to push "
980 980 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
981 981 PTR_FORMAT, mr.start(), mr.end(), finger);
982 982
983 983 if (mr.start() < finger) {
984 984 // The finger is always heap region aligned and it is not possible
985 985 // for mr to span heap regions.
986 986 assert(mr.end() <= finger, "invariant");
987 987
988 988 // Separated the asserts so that we know which one fires.
989 989 assert(mr.start() <= mr.end(),
990 990 "region boundaries should fall within the committed space");
991 991 assert(_heap_start <= mr.start(),
992 992 "region boundaries should fall within the committed space");
993 993 assert(mr.end() <= _heap_end,
994 994 "region boundaries should fall within the committed space");
995 995 if (verbose_low())
996 996 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
997 997 "below the finger, pushing it",
998 998 mr.start(), mr.end());
999 999
1000 1000 if (!region_stack_push_lock_free(mr)) {
1001 1001 if (verbose_low())
1002 1002 gclog_or_tty->print_cr("[global] region stack has overflown.");
1003 1003 }
1004 1004 }
1005 1005 }
1006 1006
1007 1007 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1008 1008 // The object is not marked by the caller. We need to at least mark
1009 1009 // it and maybe push in on the stack.
1010 1010
1011 1011 HeapWord* addr = (HeapWord*)p;
1012 1012 if (!_nextMarkBitMap->isMarked(addr)) {
1013 1013 // We definitely need to mark it, irrespective whether we bail out
1014 1014 // because we're done with marking.
1015 1015 if (_nextMarkBitMap->parMark(addr)) {
1016 1016 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1017 1017 // If we're done with concurrent marking and we're waiting for
1018 1018 // remark, then we're not pushing anything on the stack.
1019 1019 return;
1020 1020
1021 1021 // No OrderAccess:store_load() is needed. It is implicit in the
1022 1022 // CAS done in parMark(addr) above
1023 1023 HeapWord* finger = _finger;
1024 1024
1025 1025 if (addr < finger) {
1026 1026 if (!mark_stack_push(oop(addr))) {
1027 1027 if (verbose_low())
1028 1028 gclog_or_tty->print_cr("[global] global stack overflow "
1029 1029 "during parMark");
1030 1030 }
1031 1031 }
1032 1032 }
1033 1033 }
1034 1034 }
1035 1035
1036 1036 class CMConcurrentMarkingTask: public AbstractGangTask {
1037 1037 private:
1038 1038 ConcurrentMark* _cm;
1039 1039 ConcurrentMarkThread* _cmt;
1040 1040
1041 1041 public:
1042 1042 void work(int worker_i) {
1043 1043 assert(Thread::current()->is_ConcurrentGC_thread(),
1044 1044 "this should only be done by a conc GC thread");
1045 1045 ResourceMark rm;
1046 1046
1047 1047 double start_vtime = os::elapsedVTime();
1048 1048
1049 1049 ConcurrentGCThread::stsJoin();
1050 1050
1051 1051 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052 1052 CMTask* the_task = _cm->task(worker_i);
1053 1053 the_task->record_start_time();
1054 1054 if (!_cm->has_aborted()) {
1055 1055 do {
1056 1056 double start_vtime_sec = os::elapsedVTime();
1057 1057 double start_time_sec = os::elapsedTime();
1058 1058 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1059 1059
1060 1060 the_task->do_marking_step(mark_step_duration_ms,
1061 1061 true /* do_stealing */,
1062 1062 true /* do_termination */);
1063 1063
1064 1064 double end_time_sec = os::elapsedTime();
1065 1065 double end_vtime_sec = os::elapsedVTime();
1066 1066 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1067 1067 double elapsed_time_sec = end_time_sec - start_time_sec;
1068 1068 _cm->clear_has_overflown();
1069 1069
1070 1070 bool ret = _cm->do_yield_check(worker_i);
1071 1071
1072 1072 jlong sleep_time_ms;
1073 1073 if (!_cm->has_aborted() && the_task->has_aborted()) {
1074 1074 sleep_time_ms =
1075 1075 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1076 1076 ConcurrentGCThread::stsLeave();
1077 1077 os::sleep(Thread::current(), sleep_time_ms, false);
1078 1078 ConcurrentGCThread::stsJoin();
1079 1079 }
1080 1080 double end_time2_sec = os::elapsedTime();
1081 1081 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1082 1082
1083 1083 #if 0
1084 1084 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1085 1085 "overhead %1.4lf",
1086 1086 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1087 1087 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1088 1088 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1089 1089 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1090 1090 #endif
1091 1091 } while (!_cm->has_aborted() && the_task->has_aborted());
1092 1092 }
1093 1093 the_task->record_end_time();
1094 1094 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1095 1095
1096 1096 ConcurrentGCThread::stsLeave();
1097 1097
1098 1098 double end_vtime = os::elapsedVTime();
1099 1099 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1100 1100 }
1101 1101
1102 1102 CMConcurrentMarkingTask(ConcurrentMark* cm,
1103 1103 ConcurrentMarkThread* cmt) :
1104 1104 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1105 1105
1106 1106 ~CMConcurrentMarkingTask() { }
1107 1107 };
1108 1108
1109 1109 void ConcurrentMark::markFromRoots() {
1110 1110 // we might be tempted to assert that:
1111 1111 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1112 1112 // "inconsistent argument?");
1113 1113 // However that wouldn't be right, because it's possible that
1114 1114 // a safepoint is indeed in progress as a younger generation
1115 1115 // stop-the-world GC happens even as we mark in this generation.
1116 1116
1117 1117 _restart_for_overflow = false;
1118 1118
1119 1119 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1120 1120 set_phase(active_workers, true /* concurrent */);
1121 1121
1122 1122 CMConcurrentMarkingTask markingTask(this, cmThread());
1123 1123 if (parallel_marking_threads() > 0)
1124 1124 _parallel_workers->run_task(&markingTask);
1125 1125 else
1126 1126 markingTask.work(0);
1127 1127 print_stats();
1128 1128 }
1129 1129
1130 1130 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1131 1131 // world is stopped at this checkpoint
1132 1132 assert(SafepointSynchronize::is_at_safepoint(),
1133 1133 "world should be stopped");
1134 1134 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1135 1135
1136 1136 // If a full collection has happened, we shouldn't do this.
1137 1137 if (has_aborted()) {
1138 1138 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1139 1139 return;
1140 1140 }
1141 1141
1142 1142 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1143 1143
1144 1144 if (VerifyDuringGC) {
1145 1145 HandleMark hm; // handle scope
1146 1146 gclog_or_tty->print(" VerifyDuringGC:(before)");
1147 1147 Universe::heap()->prepare_for_verify();
1148 1148 Universe::verify(true, false, true);
1149 1149 }
1150 1150
1151 1151 G1CollectorPolicy* g1p = g1h->g1_policy();
1152 1152 g1p->record_concurrent_mark_remark_start();
1153 1153
1154 1154 double start = os::elapsedTime();
1155 1155
1156 1156 checkpointRootsFinalWork();
1157 1157
1158 1158 double mark_work_end = os::elapsedTime();
1159 1159
1160 1160 weakRefsWork(clear_all_soft_refs);
1161 1161
1162 1162 if (has_overflown()) {
1163 1163 // Oops. We overflowed. Restart concurrent marking.
1164 1164 _restart_for_overflow = true;
1165 1165 // Clear the flag. We do not need it any more.
1166 1166 clear_has_overflown();
1167 1167 if (G1TraceMarkStackOverflow)
1168 1168 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1169 1169 } else {
1170 1170 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1171 1171 // We're done with marking.
1172 1172 // This is the end of the marking cycle, we're expected all
1173 1173 // threads to have SATB queues with active set to true.
1174 1174 satb_mq_set.set_active_all_threads(false, /* new active value */
1175 1175 true /* expected_active */);
1176 1176
1177 1177 if (VerifyDuringGC) {
1178 1178 HandleMark hm; // handle scope
1179 1179 gclog_or_tty->print(" VerifyDuringGC:(after)");
1180 1180 Universe::heap()->prepare_for_verify();
1181 1181 Universe::heap()->verify(/* allow_dirty */ true,
1182 1182 /* silent */ false,
1183 1183 /* use_prev_marking */ false);
1184 1184 }
1185 1185 assert(!restart_for_overflow(), "sanity");
1186 1186 }
1187 1187
1188 1188 // Reset the marking state if marking completed
1189 1189 if (!restart_for_overflow()) {
1190 1190 set_non_marking_state();
1191 1191 }
1192 1192
1193 1193 #if VERIFY_OBJS_PROCESSED
1194 1194 _scan_obj_cl.objs_processed = 0;
1195 1195 ThreadLocalObjQueue::objs_enqueued = 0;
1196 1196 #endif
1197 1197
1198 1198 // Statistics
1199 1199 double now = os::elapsedTime();
1200 1200 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1201 1201 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1202 1202 _remark_times.add((now - start) * 1000.0);
1203 1203
1204 1204 g1p->record_concurrent_mark_remark_end();
1205 1205 }
1206 1206
1207 1207
1208 1208 #define CARD_BM_TEST_MODE 0
1209 1209
1210 1210 class CalcLiveObjectsClosure: public HeapRegionClosure {
1211 1211
1212 1212 CMBitMapRO* _bm;
1213 1213 ConcurrentMark* _cm;
1214 1214 bool _changed;
1215 1215 bool _yield;
1216 1216 size_t _words_done;
1217 1217 size_t _tot_live;
1218 1218 size_t _tot_used;
1219 1219 size_t _regions_done;
1220 1220 double _start_vtime_sec;
1221 1221
1222 1222 BitMap* _region_bm;
1223 1223 BitMap* _card_bm;
1224 1224 intptr_t _bottom_card_num;
1225 1225 bool _final;
1226 1226
1227 1227 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1228 1228 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1229 1229 #if CARD_BM_TEST_MODE
1230 1230 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1231 1231 #else
1232 1232 _card_bm->par_at_put(i - _bottom_card_num, 1);
1233 1233 #endif
1234 1234 }
1235 1235 }
1236 1236
1237 1237 public:
1238 1238 CalcLiveObjectsClosure(bool final,
1239 1239 CMBitMapRO *bm, ConcurrentMark *cm,
1240 1240 BitMap* region_bm, BitMap* card_bm) :
1241 1241 _bm(bm), _cm(cm), _changed(false), _yield(true),
1242 1242 _words_done(0), _tot_live(0), _tot_used(0),
1243 1243 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1244 1244 _regions_done(0), _start_vtime_sec(0.0)
1245 1245 {
1246 1246 _bottom_card_num =
1247 1247 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1248 1248 CardTableModRefBS::card_shift);
1249 1249 }
1250 1250
1251 1251 // It takes a region that's not empty (i.e., it has at least one
1252 1252 // live object in it and sets its corresponding bit on the region
1253 1253 // bitmap to 1. If the region is "starts humongous" it will also set
1254 1254 // to 1 the bits on the region bitmap that correspond to its
1255 1255 // associated "continues humongous" regions.
1256 1256 void set_bit_for_region(HeapRegion* hr) {
1257 1257 assert(!hr->continuesHumongous(), "should have filtered those out");
1258 1258
1259 1259 size_t index = hr->hrs_index();
1260 1260 if (!hr->startsHumongous()) {
1261 1261 // Normal (non-humongous) case: just set the bit.
1262 1262 _region_bm->par_at_put((BitMap::idx_t) index, true);
1263 1263 } else {
1264 1264 // Starts humongous case: calculate how many regions are part of
1265 1265 // this humongous region and then set the bit range. It might
1266 1266 // have been a bit more efficient to look at the object that
1267 1267 // spans these humongous regions to calculate their number from
1268 1268 // the object's size. However, it's a good idea to calculate
1269 1269 // this based on the metadata itself, and not the region
1270 1270 // contents, so that this code is not aware of what goes into
1271 1271 // the humongous regions (in case this changes in the future).
1272 1272 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1273 1273 size_t end_index = index + 1;
1274 1274 while (end_index < g1h->n_regions()) {
1275 1275 HeapRegion* chr = g1h->region_at(end_index);
1276 1276 if (!chr->continuesHumongous()) {
1277 1277 break;
1278 1278 }
1279 1279 end_index += 1;
1280 1280 }
1281 1281 _region_bm->par_at_put_range((BitMap::idx_t) index,
1282 1282 (BitMap::idx_t) end_index, true);
1283 1283 }
1284 1284 }
1285 1285
1286 1286 bool doHeapRegion(HeapRegion* hr) {
1287 1287 if (!_final && _regions_done == 0)
1288 1288 _start_vtime_sec = os::elapsedVTime();
1289 1289
1290 1290 if (hr->continuesHumongous()) {
1291 1291 // We will ignore these here and process them when their
1292 1292 // associated "starts humongous" region is processed (see
1293 1293 // set_bit_for_heap_region()). Note that we cannot rely on their
1294 1294 // associated "starts humongous" region to have their bit set to
1295 1295 // 1 since, due to the region chunking in the parallel region
1296 1296 // iteration, a "continues humongous" region might be visited
1297 1297 // before its associated "starts humongous".
1298 1298 return false;
1299 1299 }
1300 1300
1301 1301 HeapWord* nextTop = hr->next_top_at_mark_start();
1302 1302 HeapWord* start = hr->top_at_conc_mark_count();
1303 1303 assert(hr->bottom() <= start && start <= hr->end() &&
1304 1304 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1305 1305 start <= nextTop,
1306 1306 "Preconditions.");
1307 1307 // Otherwise, record the number of word's we'll examine.
1308 1308 size_t words_done = (nextTop - start);
1309 1309 // Find the first marked object at or after "start".
1310 1310 start = _bm->getNextMarkedWordAddress(start, nextTop);
1311 1311 size_t marked_bytes = 0;
1312 1312
1313 1313 // Below, the term "card num" means the result of shifting an address
1314 1314 // by the card shift -- address 0 corresponds to card number 0. One
1315 1315 // must subtract the card num of the bottom of the heap to obtain a
1316 1316 // card table index.
1317 1317 // The first card num of the sequence of live cards currently being
1318 1318 // constructed. -1 ==> no sequence.
1319 1319 intptr_t start_card_num = -1;
1320 1320 // The last card num of the sequence of live cards currently being
1321 1321 // constructed. -1 ==> no sequence.
1322 1322 intptr_t last_card_num = -1;
1323 1323
1324 1324 while (start < nextTop) {
1325 1325 if (_yield && _cm->do_yield_check()) {
1326 1326 // We yielded. It might be for a full collection, in which case
1327 1327 // all bets are off; terminate the traversal.
1328 1328 if (_cm->has_aborted()) {
1329 1329 _changed = false;
1330 1330 return true;
1331 1331 } else {
1332 1332 // Otherwise, it might be a collection pause, and the region
1333 1333 // we're looking at might be in the collection set. We'll
1334 1334 // abandon this region.
1335 1335 return false;
1336 1336 }
1337 1337 }
1338 1338 oop obj = oop(start);
1339 1339 int obj_sz = obj->size();
1340 1340 // The card num of the start of the current object.
1341 1341 intptr_t obj_card_num =
1342 1342 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1343 1343
1344 1344 HeapWord* obj_last = start + obj_sz - 1;
1345 1345 intptr_t obj_last_card_num =
1346 1346 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1347 1347
1348 1348 if (obj_card_num != last_card_num) {
1349 1349 if (start_card_num == -1) {
1350 1350 assert(last_card_num == -1, "Both or neither.");
1351 1351 start_card_num = obj_card_num;
1352 1352 } else {
1353 1353 assert(last_card_num != -1, "Both or neither.");
1354 1354 assert(obj_card_num >= last_card_num, "Inv");
1355 1355 if ((obj_card_num - last_card_num) > 1) {
1356 1356 // Mark the last run, and start a new one.
1357 1357 mark_card_num_range(start_card_num, last_card_num);
1358 1358 start_card_num = obj_card_num;
1359 1359 }
1360 1360 }
1361 1361 #if CARD_BM_TEST_MODE
1362 1362 /*
1363 1363 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1364 1364 obj_card_num - _bottom_card_num,
1365 1365 obj_last_card_num - _bottom_card_num);
1366 1366 */
1367 1367 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1368 1368 _card_bm->par_at_put(j - _bottom_card_num, 1);
1369 1369 }
1370 1370 #endif
1371 1371 }
1372 1372 // In any case, we set the last card num.
1373 1373 last_card_num = obj_last_card_num;
1374 1374
1375 1375 marked_bytes += (size_t)obj_sz * HeapWordSize;
1376 1376 // Find the next marked object after this one.
1377 1377 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1378 1378 _changed = true;
1379 1379 }
1380 1380 // Handle the last range, if any.
1381 1381 if (start_card_num != -1)
1382 1382 mark_card_num_range(start_card_num, last_card_num);
1383 1383 if (_final) {
1384 1384 // Mark the allocated-since-marking portion...
1385 1385 HeapWord* tp = hr->top();
1386 1386 if (nextTop < tp) {
1387 1387 start_card_num =
1388 1388 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1389 1389 last_card_num =
1390 1390 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1391 1391 mark_card_num_range(start_card_num, last_card_num);
1392 1392 // This definitely means the region has live objects.
1393 1393 set_bit_for_region(hr);
1394 1394 }
1395 1395 }
1396 1396
1397 1397 hr->add_to_marked_bytes(marked_bytes);
1398 1398 // Update the live region bitmap.
1399 1399 if (marked_bytes > 0) {
1400 1400 set_bit_for_region(hr);
1401 1401 }
1402 1402 hr->set_top_at_conc_mark_count(nextTop);
1403 1403 _tot_live += hr->next_live_bytes();
1404 1404 _tot_used += hr->used();
1405 1405 _words_done = words_done;
1406 1406
1407 1407 if (!_final) {
1408 1408 ++_regions_done;
1409 1409 if (_regions_done % 10 == 0) {
1410 1410 double end_vtime_sec = os::elapsedVTime();
1411 1411 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1412 1412 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1413 1413 jlong sleep_time_ms =
1414 1414 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1415 1415 os::sleep(Thread::current(), sleep_time_ms, false);
1416 1416 _start_vtime_sec = end_vtime_sec;
1417 1417 }
1418 1418 }
1419 1419 }
1420 1420
1421 1421 return false;
1422 1422 }
1423 1423
1424 1424 bool changed() { return _changed; }
1425 1425 void reset() { _changed = false; _words_done = 0; }
1426 1426 void no_yield() { _yield = false; }
1427 1427 size_t words_done() { return _words_done; }
1428 1428 size_t tot_live() { return _tot_live; }
1429 1429 size_t tot_used() { return _tot_used; }
1430 1430 };
1431 1431
1432 1432
1433 1433 void ConcurrentMark::calcDesiredRegions() {
1434 1434 _region_bm.clear();
1435 1435 _card_bm.clear();
1436 1436 CalcLiveObjectsClosure calccl(false /*final*/,
1437 1437 nextMarkBitMap(), this,
1438 1438 &_region_bm, &_card_bm);
1439 1439 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1440 1440 g1h->heap_region_iterate(&calccl);
1441 1441
1442 1442 do {
1443 1443 calccl.reset();
1444 1444 g1h->heap_region_iterate(&calccl);
1445 1445 } while (calccl.changed());
1446 1446 }
1447 1447
1448 1448 class G1ParFinalCountTask: public AbstractGangTask {
1449 1449 protected:
1450 1450 G1CollectedHeap* _g1h;
1451 1451 CMBitMap* _bm;
1452 1452 size_t _n_workers;
1453 1453 size_t *_live_bytes;
1454 1454 size_t *_used_bytes;
1455 1455 BitMap* _region_bm;
1456 1456 BitMap* _card_bm;
1457 1457 public:
1458 1458 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1459 1459 BitMap* region_bm, BitMap* card_bm) :
1460 1460 AbstractGangTask("G1 final counting"), _g1h(g1h),
1461 1461 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1462 1462 {
1463 1463 if (ParallelGCThreads > 0)
1464 1464 _n_workers = _g1h->workers()->total_workers();
1465 1465 else
1466 1466 _n_workers = 1;
1467 1467 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1468 1468 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1469 1469 }
1470 1470
1471 1471 ~G1ParFinalCountTask() {
1472 1472 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1473 1473 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1474 1474 }
1475 1475
1476 1476 void work(int i) {
1477 1477 CalcLiveObjectsClosure calccl(true /*final*/,
1478 1478 _bm, _g1h->concurrent_mark(),
1479 1479 _region_bm, _card_bm);
1480 1480 calccl.no_yield();
1481 1481 if (G1CollectedHeap::use_parallel_gc_threads()) {
1482 1482 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1483 1483 HeapRegion::FinalCountClaimValue);
1484 1484 } else {
1485 1485 _g1h->heap_region_iterate(&calccl);
1486 1486 }
1487 1487 assert(calccl.complete(), "Shouldn't have yielded!");
1488 1488
1489 1489 assert((size_t) i < _n_workers, "invariant");
1490 1490 _live_bytes[i] = calccl.tot_live();
1491 1491 _used_bytes[i] = calccl.tot_used();
1492 1492 }
1493 1493 size_t live_bytes() {
1494 1494 size_t live_bytes = 0;
1495 1495 for (size_t i = 0; i < _n_workers; ++i)
1496 1496 live_bytes += _live_bytes[i];
1497 1497 return live_bytes;
1498 1498 }
1499 1499 size_t used_bytes() {
1500 1500 size_t used_bytes = 0;
1501 1501 for (size_t i = 0; i < _n_workers; ++i)
1502 1502 used_bytes += _used_bytes[i];
1503 1503 return used_bytes;
1504 1504 }
1505 1505 };
1506 1506
1507 1507 class G1ParNoteEndTask;
1508 1508
1509 1509 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1510 1510 G1CollectedHeap* _g1;
1511 1511 int _worker_num;
1512 1512 size_t _max_live_bytes;
1513 1513 size_t _regions_claimed;
1514 1514 size_t _freed_bytes;
1515 1515 FreeRegionList* _local_cleanup_list;
1516 1516 HumongousRegionSet* _humongous_proxy_set;
1517 1517 HRRSCleanupTask* _hrrs_cleanup_task;
1518 1518 double _claimed_region_time;
1519 1519 double _max_region_time;
1520 1520
1521 1521 public:
1522 1522 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1523 1523 int worker_num,
1524 1524 FreeRegionList* local_cleanup_list,
1525 1525 HumongousRegionSet* humongous_proxy_set,
1526 1526 HRRSCleanupTask* hrrs_cleanup_task);
1527 1527 size_t freed_bytes() { return _freed_bytes; }
1528 1528
1529 1529 bool doHeapRegion(HeapRegion *r);
1530 1530
1531 1531 size_t max_live_bytes() { return _max_live_bytes; }
1532 1532 size_t regions_claimed() { return _regions_claimed; }
1533 1533 double claimed_region_time_sec() { return _claimed_region_time; }
1534 1534 double max_region_time_sec() { return _max_region_time; }
1535 1535 };
1536 1536
1537 1537 class G1ParNoteEndTask: public AbstractGangTask {
1538 1538 friend class G1NoteEndOfConcMarkClosure;
1539 1539
1540 1540 protected:
1541 1541 G1CollectedHeap* _g1h;
1542 1542 size_t _max_live_bytes;
1543 1543 size_t _freed_bytes;
1544 1544 FreeRegionList* _cleanup_list;
1545 1545
1546 1546 public:
1547 1547 G1ParNoteEndTask(G1CollectedHeap* g1h,
1548 1548 FreeRegionList* cleanup_list) :
1549 1549 AbstractGangTask("G1 note end"), _g1h(g1h),
1550 1550 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1551 1551
1552 1552 void work(int i) {
1553 1553 double start = os::elapsedTime();
1554 1554 FreeRegionList local_cleanup_list("Local Cleanup List");
1555 1555 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1556 1556 HRRSCleanupTask hrrs_cleanup_task;
1557 1557 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1558 1558 &humongous_proxy_set,
1559 1559 &hrrs_cleanup_task);
1560 1560 if (G1CollectedHeap::use_parallel_gc_threads()) {
1561 1561 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1562 1562 HeapRegion::NoteEndClaimValue);
1563 1563 } else {
1564 1564 _g1h->heap_region_iterate(&g1_note_end);
1565 1565 }
1566 1566 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1567 1567
1568 1568 // Now update the lists
1569 1569 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1570 1570 NULL /* free_list */,
1571 1571 &humongous_proxy_set,
1572 1572 true /* par */);
1573 1573 {
1574 1574 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1575 1575 _max_live_bytes += g1_note_end.max_live_bytes();
1576 1576 _freed_bytes += g1_note_end.freed_bytes();
1577 1577
1578 1578 _cleanup_list->add_as_tail(&local_cleanup_list);
1579 1579 assert(local_cleanup_list.is_empty(), "post-condition");
1580 1580
1581 1581 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1582 1582 }
1583 1583 double end = os::elapsedTime();
1584 1584 if (G1PrintParCleanupStats) {
1585 1585 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1586 1586 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1587 1587 i, start, end, (end-start)*1000.0,
1588 1588 g1_note_end.regions_claimed(),
1589 1589 g1_note_end.claimed_region_time_sec()*1000.0,
1590 1590 g1_note_end.max_region_time_sec()*1000.0);
1591 1591 }
1592 1592 }
1593 1593 size_t max_live_bytes() { return _max_live_bytes; }
1594 1594 size_t freed_bytes() { return _freed_bytes; }
1595 1595 };
1596 1596
1597 1597 class G1ParScrubRemSetTask: public AbstractGangTask {
1598 1598 protected:
1599 1599 G1RemSet* _g1rs;
1600 1600 BitMap* _region_bm;
1601 1601 BitMap* _card_bm;
1602 1602 public:
1603 1603 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1604 1604 BitMap* region_bm, BitMap* card_bm) :
1605 1605 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1606 1606 _region_bm(region_bm), _card_bm(card_bm)
1607 1607 {}
1608 1608
1609 1609 void work(int i) {
1610 1610 if (G1CollectedHeap::use_parallel_gc_threads()) {
1611 1611 _g1rs->scrub_par(_region_bm, _card_bm, i,
1612 1612 HeapRegion::ScrubRemSetClaimValue);
1613 1613 } else {
1614 1614 _g1rs->scrub(_region_bm, _card_bm);
1615 1615 }
1616 1616 }
1617 1617
1618 1618 };
1619 1619
1620 1620 G1NoteEndOfConcMarkClosure::
1621 1621 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1622 1622 int worker_num,
1623 1623 FreeRegionList* local_cleanup_list,
1624 1624 HumongousRegionSet* humongous_proxy_set,
1625 1625 HRRSCleanupTask* hrrs_cleanup_task)
1626 1626 : _g1(g1), _worker_num(worker_num),
1627 1627 _max_live_bytes(0), _regions_claimed(0),
1628 1628 _freed_bytes(0),
1629 1629 _claimed_region_time(0.0), _max_region_time(0.0),
1630 1630 _local_cleanup_list(local_cleanup_list),
1631 1631 _humongous_proxy_set(humongous_proxy_set),
1632 1632 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1633 1633
1634 1634 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1635 1635 // We use a claim value of zero here because all regions
1636 1636 // were claimed with value 1 in the FinalCount task.
1637 1637 hr->reset_gc_time_stamp();
1638 1638 if (!hr->continuesHumongous()) {
1639 1639 double start = os::elapsedTime();
1640 1640 _regions_claimed++;
1641 1641 hr->note_end_of_marking();
1642 1642 _max_live_bytes += hr->max_live_bytes();
1643 1643 _g1->free_region_if_empty(hr,
1644 1644 &_freed_bytes,
1645 1645 _local_cleanup_list,
1646 1646 _humongous_proxy_set,
1647 1647 _hrrs_cleanup_task,
1648 1648 true /* par */);
1649 1649 double region_time = (os::elapsedTime() - start);
1650 1650 _claimed_region_time += region_time;
1651 1651 if (region_time > _max_region_time) _max_region_time = region_time;
1652 1652 }
1653 1653 return false;
1654 1654 }
1655 1655
1656 1656 void ConcurrentMark::cleanup() {
1657 1657 // world is stopped at this checkpoint
1658 1658 assert(SafepointSynchronize::is_at_safepoint(),
1659 1659 "world should be stopped");
1660 1660 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1661 1661
1662 1662 // If a full collection has happened, we shouldn't do this.
1663 1663 if (has_aborted()) {
1664 1664 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1665 1665 return;
1666 1666 }
1667 1667
1668 1668 g1h->verify_region_sets_optional();
1669 1669
1670 1670 if (VerifyDuringGC) {
1671 1671 HandleMark hm; // handle scope
1672 1672 gclog_or_tty->print(" VerifyDuringGC:(before)");
1673 1673 Universe::heap()->prepare_for_verify();
1674 1674 Universe::verify(/* allow dirty */ true,
1675 1675 /* silent */ false,
1676 1676 /* prev marking */ true);
1677 1677 }
1678 1678
1679 1679 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1680 1680 g1p->record_concurrent_mark_cleanup_start();
1681 1681
1682 1682 double start = os::elapsedTime();
1683 1683
1684 1684 HeapRegionRemSet::reset_for_cleanup_tasks();
1685 1685
1686 1686 // Do counting once more with the world stopped for good measure.
1687 1687 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1688 1688 &_region_bm, &_card_bm);
1689 1689 if (G1CollectedHeap::use_parallel_gc_threads()) {
1690 1690 assert(g1h->check_heap_region_claim_values(
1691 1691 HeapRegion::InitialClaimValue),
1692 1692 "sanity check");
1693 1693
1694 1694 int n_workers = g1h->workers()->total_workers();
1695 1695 g1h->set_par_threads(n_workers);
1696 1696 g1h->workers()->run_task(&g1_par_count_task);
1697 1697 g1h->set_par_threads(0);
1698 1698
1699 1699 assert(g1h->check_heap_region_claim_values(
1700 1700 HeapRegion::FinalCountClaimValue),
1701 1701 "sanity check");
1702 1702 } else {
1703 1703 g1_par_count_task.work(0);
1704 1704 }
1705 1705
1706 1706 size_t known_garbage_bytes =
1707 1707 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1708 1708 #if 0
1709 1709 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1710 1710 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1711 1711 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1712 1712 (double) known_garbage_bytes / (double) (1024 * 1024));
1713 1713 #endif // 0
1714 1714 g1p->set_known_garbage_bytes(known_garbage_bytes);
1715 1715
1716 1716 size_t start_used_bytes = g1h->used();
1717 1717 _at_least_one_mark_complete = true;
1718 1718 g1h->set_marking_complete();
1719 1719
1720 1720 double count_end = os::elapsedTime();
1721 1721 double this_final_counting_time = (count_end - start);
1722 1722 if (G1PrintParCleanupStats) {
1723 1723 gclog_or_tty->print_cr("Cleanup:");
1724 1724 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1725 1725 this_final_counting_time*1000.0);
1726 1726 }
1727 1727 _total_counting_time += this_final_counting_time;
1728 1728
1729 1729 // Install newly created mark bitMap as "prev".
1730 1730 swapMarkBitMaps();
1731 1731
1732 1732 g1h->reset_gc_time_stamp();
1733 1733
1734 1734 // Note end of marking in all heap regions.
1735 1735 double note_end_start = os::elapsedTime();
1736 1736 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1737 1737 if (G1CollectedHeap::use_parallel_gc_threads()) {
1738 1738 int n_workers = g1h->workers()->total_workers();
1739 1739 g1h->set_par_threads(n_workers);
1740 1740 g1h->workers()->run_task(&g1_par_note_end_task);
1741 1741 g1h->set_par_threads(0);
1742 1742
1743 1743 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1744 1744 "sanity check");
1745 1745 } else {
1746 1746 g1_par_note_end_task.work(0);
1747 1747 }
1748 1748
1749 1749 if (!cleanup_list_is_empty()) {
1750 1750 // The cleanup list is not empty, so we'll have to process it
1751 1751 // concurrently. Notify anyone else that might be wanting free
1752 1752 // regions that there will be more free regions coming soon.
1753 1753 g1h->set_free_regions_coming();
1754 1754 }
1755 1755 double note_end_end = os::elapsedTime();
1756 1756 if (G1PrintParCleanupStats) {
1757 1757 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1758 1758 (note_end_end - note_end_start)*1000.0);
1759 1759 }
1760 1760
1761 1761
1762 1762 // call below, since it affects the metric by which we sort the heap
1763 1763 // regions.
1764 1764 if (G1ScrubRemSets) {
1765 1765 double rs_scrub_start = os::elapsedTime();
1766 1766 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1767 1767 if (G1CollectedHeap::use_parallel_gc_threads()) {
1768 1768 int n_workers = g1h->workers()->total_workers();
1769 1769 g1h->set_par_threads(n_workers);
1770 1770 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1771 1771 g1h->set_par_threads(0);
1772 1772
1773 1773 assert(g1h->check_heap_region_claim_values(
1774 1774 HeapRegion::ScrubRemSetClaimValue),
1775 1775 "sanity check");
1776 1776 } else {
1777 1777 g1_par_scrub_rs_task.work(0);
1778 1778 }
1779 1779
1780 1780 double rs_scrub_end = os::elapsedTime();
1781 1781 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1782 1782 _total_rs_scrub_time += this_rs_scrub_time;
1783 1783 }
1784 1784
1785 1785 // this will also free any regions totally full of garbage objects,
1786 1786 // and sort the regions.
1787 1787 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1788 1788 g1_par_note_end_task.freed_bytes(),
1789 1789 g1_par_note_end_task.max_live_bytes());
1790 1790
1791 1791 // Statistics.
1792 1792 double end = os::elapsedTime();
1793 1793 _cleanup_times.add((end - start) * 1000.0);
1794 1794
1795 1795 // G1CollectedHeap::heap()->print();
1796 1796 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1797 1797 // G1CollectedHeap::heap()->get_gc_time_stamp());
1798 1798
1799 1799 if (PrintGC || PrintGCDetails) {
1800 1800 g1h->print_size_transition(gclog_or_tty,
1801 1801 start_used_bytes,
1802 1802 g1h->used(),
1803 1803 g1h->capacity());
1804 1804 }
1805 1805
1806 1806 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1807 1807 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1808 1808
1809 1809 // We need to make this be a "collection" so any collection pause that
1810 1810 // races with it goes around and waits for completeCleanup to finish.
1811 1811 g1h->increment_total_collections();
1812 1812
1813 1813 if (VerifyDuringGC) {
1814 1814 HandleMark hm; // handle scope
1815 1815 gclog_or_tty->print(" VerifyDuringGC:(after)");
1816 1816 Universe::heap()->prepare_for_verify();
1817 1817 Universe::verify(/* allow dirty */ true,
1818 1818 /* silent */ false,
1819 1819 /* prev marking */ true);
1820 1820 }
1821 1821
1822 1822 g1h->verify_region_sets_optional();
1823 1823 }
1824 1824
1825 1825 void ConcurrentMark::completeCleanup() {
1826 1826 if (has_aborted()) return;
1827 1827
1828 1828 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1829 1829
1830 1830 _cleanup_list.verify_optional();
1831 1831 FreeRegionList tmp_free_list("Tmp Free List");
1832 1832
1833 1833 if (G1ConcRegionFreeingVerbose) {
1834 1834 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1835 1835 "cleanup list has "SIZE_FORMAT" entries",
1836 1836 _cleanup_list.length());
1837 1837 }
1838 1838
1839 1839 // Noone else should be accessing the _cleanup_list at this point,
1840 1840 // so it's not necessary to take any locks
1841 1841 while (!_cleanup_list.is_empty()) {
1842 1842 HeapRegion* hr = _cleanup_list.remove_head();
1843 1843 assert(hr != NULL, "the list was not empty");
1844 1844 hr->rem_set()->clear();
1845 1845 tmp_free_list.add_as_tail(hr);
1846 1846
1847 1847 // Instead of adding one region at a time to the secondary_free_list,
1848 1848 // we accumulate them in the local list and move them a few at a
1849 1849 // time. This also cuts down on the number of notify_all() calls
1850 1850 // we do during this process. We'll also append the local list when
1851 1851 // _cleanup_list is empty (which means we just removed the last
1852 1852 // region from the _cleanup_list).
1853 1853 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1854 1854 _cleanup_list.is_empty()) {
1855 1855 if (G1ConcRegionFreeingVerbose) {
1856 1856 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1857 1857 "appending "SIZE_FORMAT" entries to the "
1858 1858 "secondary_free_list, clean list still has "
1859 1859 SIZE_FORMAT" entries",
1860 1860 tmp_free_list.length(),
1861 1861 _cleanup_list.length());
1862 1862 }
1863 1863
1864 1864 {
1865 1865 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1866 1866 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1867 1867 SecondaryFreeList_lock->notify_all();
1868 1868 }
1869 1869
1870 1870 if (G1StressConcRegionFreeing) {
1871 1871 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1872 1872 os::sleep(Thread::current(), (jlong) 1, false);
1873 1873 }
1874 1874 }
1875 1875 }
1876 1876 }
1877 1877 assert(tmp_free_list.is_empty(), "post-condition");
1878 1878 }
1879 1879
1880 1880 // Support closures for reference procssing in G1
1881 1881
1882 1882 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1883 1883 HeapWord* addr = (HeapWord*)obj;
1884 1884 return addr != NULL &&
1885 1885 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1886 1886 }
1887 1887
1888 1888 class G1CMKeepAliveClosure: public OopClosure {
1889 1889 G1CollectedHeap* _g1;
1890 1890 ConcurrentMark* _cm;
1891 1891 CMBitMap* _bitMap;
1892 1892 public:
1893 1893 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1894 1894 CMBitMap* bitMap) :
1895 1895 _g1(g1), _cm(cm),
1896 1896 _bitMap(bitMap) {}
1897 1897
1898 1898 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1899 1899 virtual void do_oop( oop* p) { do_oop_work(p); }
1900 1900
1901 1901 template <class T> void do_oop_work(T* p) {
1902 1902 oop obj = oopDesc::load_decode_heap_oop(p);
1903 1903 HeapWord* addr = (HeapWord*)obj;
1904 1904
1905 1905 if (_cm->verbose_high())
1906 1906 gclog_or_tty->print_cr("\t[0] we're looking at location "
1907 1907 "*"PTR_FORMAT" = "PTR_FORMAT,
1908 1908 p, (void*) obj);
1909 1909
1910 1910 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1911 1911 _bitMap->mark(addr);
1912 1912 _cm->mark_stack_push(obj);
1913 1913 }
1914 1914 }
1915 1915 };
1916 1916
1917 1917 class G1CMDrainMarkingStackClosure: public VoidClosure {
1918 1918 CMMarkStack* _markStack;
1919 1919 CMBitMap* _bitMap;
1920 1920 G1CMKeepAliveClosure* _oopClosure;
1921 1921 public:
1922 1922 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1923 1923 G1CMKeepAliveClosure* oopClosure) :
1924 1924 _bitMap(bitMap),
1925 1925 _markStack(markStack),
1926 1926 _oopClosure(oopClosure)
1927 1927 {}
1928 1928
1929 1929 void do_void() {
1930 1930 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1931 1931 }
1932 1932 };
1933 1933
1934 1934 // 'Keep Alive' closure used by parallel reference processing.
1935 1935 // An instance of this closure is used in the parallel reference processing
1936 1936 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1937 1937 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1938 1938 // placed on to discovered ref lists once so we can mark and push with no
1939 1939 // need to check whether the object has already been marked. Using the
1940 1940 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1941 1941 // operating on the global mark stack. This means that an individual
1942 1942 // worker would be doing lock-free pushes while it processes its own
1943 1943 // discovered ref list followed by drain call. If the discovered ref lists
1944 1944 // are unbalanced then this could cause interference with the other
1945 1945 // workers. Using a CMTask (and its embedded local data structures)
1946 1946 // avoids that potential interference.
1947 1947 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
1948 1948 ConcurrentMark* _cm;
1949 1949 CMTask* _task;
1950 1950 CMBitMap* _bitMap;
1951 1951 int _ref_counter_limit;
1952 1952 int _ref_counter;
1953 1953 public:
1954 1954 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
1955 1955 CMTask* task,
1956 1956 CMBitMap* bitMap) :
1957 1957 _cm(cm), _task(task), _bitMap(bitMap),
1958 1958 _ref_counter_limit(G1RefProcDrainInterval)
1959 1959 {
1960 1960 assert(_ref_counter_limit > 0, "sanity");
1961 1961 _ref_counter = _ref_counter_limit;
1962 1962 }
1963 1963
1964 1964 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1965 1965 virtual void do_oop( oop* p) { do_oop_work(p); }
1966 1966
1967 1967 template <class T> void do_oop_work(T* p) {
1968 1968 if (!_cm->has_overflown()) {
1969 1969 oop obj = oopDesc::load_decode_heap_oop(p);
1970 1970 if (_cm->verbose_high())
1971 1971 gclog_or_tty->print_cr("\t[%d] we're looking at location "
1972 1972 "*"PTR_FORMAT" = "PTR_FORMAT,
1973 1973 _task->task_id(), p, (void*) obj);
1974 1974
1975 1975 _task->deal_with_reference(obj);
1976 1976 _ref_counter--;
1977 1977
1978 1978 if (_ref_counter == 0) {
1979 1979 // We have dealt with _ref_counter_limit references, pushing them and objects
1980 1980 // reachable from them on to the local stack (and possibly the global stack).
1981 1981 // Call do_marking_step() to process these entries. We call the routine in a
1982 1982 // loop, which we'll exit if there's nothing more to do (i.e. we're done
1983 1983 // with the entries that we've pushed as a result of the deal_with_reference
1984 1984 // calls above) or we overflow.
1985 1985 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
1986 1986 // while there may still be some work to do. (See the comment at the
1987 1987 // beginning of CMTask::do_marking_step() for those conditions - one of which
1988 1988 // is reaching the specified time target.) It is only when
1989 1989 // CMTask::do_marking_step() returns without setting the has_aborted() flag
1990 1990 // that the marking has completed.
1991 1991 do {
1992 1992 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1993 1993 _task->do_marking_step(mark_step_duration_ms,
1994 1994 false /* do_stealing */,
1995 1995 false /* do_termination */);
1996 1996 } while (_task->has_aborted() && !_cm->has_overflown());
1997 1997 _ref_counter = _ref_counter_limit;
1998 1998 }
1999 1999 } else {
2000 2000 if (_cm->verbose_high())
2001 2001 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2002 2002 }
2003 2003 }
2004 2004 };
2005 2005
2006 2006 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2007 2007 ConcurrentMark* _cm;
2008 2008 CMTask* _task;
2009 2009 public:
2010 2010 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2011 2011 _cm(cm), _task(task)
2012 2012 {}
2013 2013
2014 2014 void do_void() {
2015 2015 do {
2016 2016 if (_cm->verbose_high())
2017 2017 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id());
2018 2018
2019 2019 // We call CMTask::do_marking_step() to completely drain the local and
2020 2020 // global marking stacks. The routine is called in a loop, which we'll
2021 2021 // exit if there's nothing more to do (i.e. we'completely drained the
2022 2022 // entries that were pushed as a result of applying the
2023 2023 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2024 2024 // lists above) or we overflow the global marking stack.
2025 2025 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2026 2026 // while there may still be some work to do. (See the comment at the
2027 2027 // beginning of CMTask::do_marking_step() for those conditions - one of which
2028 2028 // is reaching the specified time target.) It is only when
2029 2029 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2030 2030 // that the marking has completed.
2031 2031
2032 2032 _task->do_marking_step(1000000000.0 /* something very large */,
2033 2033 true /* do_stealing */,
2034 2034 true /* do_termination */);
2035 2035 } while (_task->has_aborted() && !_cm->has_overflown());
2036 2036 }
2037 2037 };
2038 2038
2039 2039 // Implementation of AbstractRefProcTaskExecutor for G1
2040 2040 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2041 2041 private:
2042 2042 G1CollectedHeap* _g1h;
2043 2043 ConcurrentMark* _cm;
2044 2044 CMBitMap* _bitmap;
2045 2045 WorkGang* _workers;
2046 2046 int _active_workers;
2047 2047
2048 2048 public:
2049 2049 G1RefProcTaskExecutor(G1CollectedHeap* g1h,
2050 2050 ConcurrentMark* cm,
2051 2051 CMBitMap* bitmap,
2052 2052 WorkGang* workers,
2053 2053 int n_workers) :
2054 2054 _g1h(g1h), _cm(cm), _bitmap(bitmap),
2055 2055 _workers(workers), _active_workers(n_workers)
2056 2056 { }
2057 2057
2058 2058 // Executes the given task using concurrent marking worker threads.
2059 2059 virtual void execute(ProcessTask& task);
2060 2060 virtual void execute(EnqueueTask& task);
2061 2061 };
2062 2062
2063 2063 class G1RefProcTaskProxy: public AbstractGangTask {
2064 2064 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2065 2065 ProcessTask& _proc_task;
2066 2066 G1CollectedHeap* _g1h;
2067 2067 ConcurrentMark* _cm;
2068 2068 CMBitMap* _bitmap;
2069 2069
2070 2070 public:
2071 2071 G1RefProcTaskProxy(ProcessTask& proc_task,
2072 2072 G1CollectedHeap* g1h,
2073 2073 ConcurrentMark* cm,
2074 2074 CMBitMap* bitmap) :
2075 2075 AbstractGangTask("Process reference objects in parallel"),
2076 2076 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2077 2077 {}
2078 2078
2079 2079 virtual void work(int i) {
2080 2080 CMTask* marking_task = _cm->task(i);
2081 2081 G1CMIsAliveClosure g1_is_alive(_g1h);
2082 2082 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2083 2083 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2084 2084
2085 2085 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2086 2086 }
2087 2087 };
2088 2088
2089 2089 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
2090 2090 assert(_workers != NULL, "Need parallel worker threads.");
2091 2091
2092 2092 G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2093 2093
2094 2094 // We need to reset the phase for each task execution so that
2095 2095 // the termination protocol of CMTask::do_marking_step works.
2096 2096 _cm->set_phase(_active_workers, false /* concurrent */);
2097 2097 _g1h->set_par_threads(_active_workers);
2098 2098 _workers->run_task(&proc_task_proxy);
2099 2099 _g1h->set_par_threads(0);
2100 2100 }
2101 2101
2102 2102 class G1RefEnqueueTaskProxy: public AbstractGangTask {
2103 2103 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2104 2104 EnqueueTask& _enq_task;
2105 2105
2106 2106 public:
2107 2107 G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
2108 2108 AbstractGangTask("Enqueue reference objects in parallel"),
2109 2109 _enq_task(enq_task)
2110 2110 { }
2111 2111
2112 2112 virtual void work(int i) {
2113 2113 _enq_task.work(i);
2114 2114 }
2115 2115 };
2116 2116
2117 2117 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2118 2118 assert(_workers != NULL, "Need parallel worker threads.");
2119 2119
2120 2120 G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
2121 2121
2122 2122 _g1h->set_par_threads(_active_workers);
2123 2123 _workers->run_task(&enq_task_proxy);
2124 2124 _g1h->set_par_threads(0);
2125 2125 }
2126 2126
2127 2127 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2128 2128 ResourceMark rm;
2129 2129 HandleMark hm;
2130 2130 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2131 2131 ReferenceProcessor* rp = g1h->ref_processor();
2132 2132
2133 2133 // See the comment in G1CollectedHeap::ref_processing_init()
↓ open down ↓ |
2133 lines elided |
↑ open up ↑ |
2134 2134 // about how reference processing currently works in G1.
2135 2135
2136 2136 // Process weak references.
2137 2137 rp->setup_policy(clear_all_soft_refs);
2138 2138 assert(_markStack.isEmpty(), "mark stack should be empty");
2139 2139
2140 2140 G1CMIsAliveClosure g1_is_alive(g1h);
2141 2141 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2142 2142 G1CMDrainMarkingStackClosure
2143 2143 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2144 -
2145 2144 // We use the work gang from the G1CollectedHeap and we utilize all
2146 2145 // the worker threads.
2147 - int active_workers = MAX2(MIN2(g1h->workers()->total_workers(), (int)_max_task_num), 1);
2146 + int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2147 + active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2148 2148
2149 2149 G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2150 2150 g1h->workers(), active_workers);
2151 2151
2152 +
2152 2153 if (rp->processing_is_mt()) {
2153 2154 // Set the degree of MT here. If the discovery is done MT, there
2154 2155 // may have been a different number of threads doing the discovery
2155 2156 // and a different number of discovered lists may have Ref objects.
2156 2157 // That is OK as long as the Reference lists are balanced (see
2157 2158 // balance_all_queues() and balance_queues()).
2158 - rp->set_mt_degree(active_workers);
2159 + rp->set_active_mt_degree(active_workers);
2159 2160
2160 2161 rp->process_discovered_references(&g1_is_alive,
2161 2162 &g1_keep_alive,
2162 2163 &g1_drain_mark_stack,
2163 2164 &par_task_executor);
2164 2165
2165 2166 // The work routines of the parallel keep_alive and drain_marking_stack
2166 2167 // will set the has_overflown flag if we overflow the global marking
2167 2168 // stack.
2168 2169 } else {
2169 2170 rp->process_discovered_references(&g1_is_alive,
2170 2171 &g1_keep_alive,
2171 2172 &g1_drain_mark_stack,
2172 2173 NULL);
2173 2174
2174 2175 }
2175 2176
2176 2177 assert(_markStack.overflow() || _markStack.isEmpty(),
2177 2178 "mark stack should be empty (unless it overflowed)");
2178 2179 if (_markStack.overflow()) {
2179 2180 // Should have been done already when we tried to push an
2180 2181 // entry on to the global mark stack. But let's do it again.
2181 2182 set_has_overflown();
2182 2183 }
2183 2184
2184 2185 if (rp->processing_is_mt()) {
2185 2186 assert(rp->num_q() == active_workers, "why not");
2186 2187 rp->enqueue_discovered_references(&par_task_executor);
2187 2188 } else {
2188 2189 rp->enqueue_discovered_references();
2189 2190 }
2190 2191
2191 2192 rp->verify_no_references_recorded();
2192 2193 assert(!rp->discovery_enabled(), "should have been disabled");
2193 2194
2194 2195 // Now clean up stale oops in StringTable
2195 2196 StringTable::unlink(&g1_is_alive);
2196 2197 // Clean up unreferenced symbols in symbol table.
2197 2198 SymbolTable::unlink();
2198 2199 }
2199 2200
2200 2201 void ConcurrentMark::swapMarkBitMaps() {
2201 2202 CMBitMapRO* temp = _prevMarkBitMap;
2202 2203 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2203 2204 _nextMarkBitMap = (CMBitMap*) temp;
2204 2205 }
2205 2206
2206 2207 class CMRemarkTask: public AbstractGangTask {
2207 2208 private:
2208 2209 ConcurrentMark *_cm;
2209 2210
2210 2211 public:
2211 2212 void work(int worker_i) {
2212 2213 // Since all available tasks are actually started, we should
2213 2214 // only proceed if we're supposed to be actived.
2214 2215 if ((size_t)worker_i < _cm->active_tasks()) {
2215 2216 CMTask* task = _cm->task(worker_i);
2216 2217 task->record_start_time();
2217 2218 do {
2218 2219 task->do_marking_step(1000000000.0 /* something very large */,
2219 2220 true /* do_stealing */,
2220 2221 true /* do_termination */);
2221 2222 } while (task->has_aborted() && !_cm->has_overflown());
2222 2223 // If we overflow, then we do not want to restart. We instead
2223 2224 // want to abort remark and do concurrent marking again.
2224 2225 task->record_end_time();
2225 2226 }
2226 2227 }
2227 2228
2228 2229 CMRemarkTask(ConcurrentMark* cm) :
2229 2230 AbstractGangTask("Par Remark"), _cm(cm) { }
2230 2231 };
2231 2232
2232 2233 void ConcurrentMark::checkpointRootsFinalWork() {
2233 2234 ResourceMark rm;
2234 2235 HandleMark hm;
2235 2236 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2236 2237
2237 2238 g1h->ensure_parsability(false);
2238 2239
2239 2240 if (G1CollectedHeap::use_parallel_gc_threads()) {
2240 2241 G1CollectedHeap::StrongRootsScope srs(g1h);
2241 2242 // this is remark, so we'll use up all available threads
2242 2243 int active_workers = ParallelGCThreads;
2243 2244 set_phase(active_workers, false /* concurrent */);
2244 2245
2245 2246 CMRemarkTask remarkTask(this);
2246 2247 // We will start all available threads, even if we decide that the
2247 2248 // active_workers will be fewer. The extra ones will just bail out
2248 2249 // immediately.
2249 2250 int n_workers = g1h->workers()->total_workers();
2250 2251 g1h->set_par_threads(n_workers);
2251 2252 g1h->workers()->run_task(&remarkTask);
2252 2253 g1h->set_par_threads(0);
2253 2254 } else {
2254 2255 G1CollectedHeap::StrongRootsScope srs(g1h);
2255 2256 // this is remark, so we'll use up all available threads
2256 2257 int active_workers = 1;
2257 2258 set_phase(active_workers, false /* concurrent */);
2258 2259
2259 2260 CMRemarkTask remarkTask(this);
2260 2261 // We will start all available threads, even if we decide that the
2261 2262 // active_workers will be fewer. The extra ones will just bail out
2262 2263 // immediately.
2263 2264 remarkTask.work(0);
2264 2265 }
2265 2266 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2266 2267 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2267 2268
2268 2269 print_stats();
2269 2270
2270 2271 #if VERIFY_OBJS_PROCESSED
2271 2272 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2272 2273 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2273 2274 _scan_obj_cl.objs_processed,
2274 2275 ThreadLocalObjQueue::objs_enqueued);
2275 2276 guarantee(_scan_obj_cl.objs_processed ==
2276 2277 ThreadLocalObjQueue::objs_enqueued,
2277 2278 "Different number of objs processed and enqueued.");
2278 2279 }
2279 2280 #endif
2280 2281 }
2281 2282
2282 2283 #ifndef PRODUCT
2283 2284
2284 2285 class PrintReachableOopClosure: public OopClosure {
2285 2286 private:
2286 2287 G1CollectedHeap* _g1h;
2287 2288 CMBitMapRO* _bitmap;
2288 2289 outputStream* _out;
2289 2290 bool _use_prev_marking;
2290 2291 bool _all;
2291 2292
2292 2293 public:
2293 2294 PrintReachableOopClosure(CMBitMapRO* bitmap,
2294 2295 outputStream* out,
2295 2296 bool use_prev_marking,
2296 2297 bool all) :
2297 2298 _g1h(G1CollectedHeap::heap()),
2298 2299 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2299 2300
2300 2301 void do_oop(narrowOop* p) { do_oop_work(p); }
2301 2302 void do_oop( oop* p) { do_oop_work(p); }
2302 2303
2303 2304 template <class T> void do_oop_work(T* p) {
2304 2305 oop obj = oopDesc::load_decode_heap_oop(p);
2305 2306 const char* str = NULL;
2306 2307 const char* str2 = "";
2307 2308
2308 2309 if (obj == NULL) {
2309 2310 str = "";
2310 2311 } else if (!_g1h->is_in_g1_reserved(obj)) {
2311 2312 str = " O";
2312 2313 } else {
2313 2314 HeapRegion* hr = _g1h->heap_region_containing(obj);
2314 2315 guarantee(hr != NULL, "invariant");
2315 2316 bool over_tams = false;
2316 2317 if (_use_prev_marking) {
2317 2318 over_tams = hr->obj_allocated_since_prev_marking(obj);
2318 2319 } else {
2319 2320 over_tams = hr->obj_allocated_since_next_marking(obj);
2320 2321 }
2321 2322 bool marked = _bitmap->isMarked((HeapWord*) obj);
2322 2323
2323 2324 if (over_tams) {
2324 2325 str = " >";
2325 2326 if (marked) {
2326 2327 str2 = " AND MARKED";
2327 2328 }
2328 2329 } else if (marked) {
2329 2330 str = " M";
2330 2331 } else {
2331 2332 str = " NOT";
2332 2333 }
2333 2334 }
2334 2335
2335 2336 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2336 2337 p, (void*) obj, str, str2);
2337 2338 }
2338 2339 };
2339 2340
2340 2341 class PrintReachableObjectClosure : public ObjectClosure {
2341 2342 private:
2342 2343 CMBitMapRO* _bitmap;
2343 2344 outputStream* _out;
2344 2345 bool _use_prev_marking;
2345 2346 bool _all;
2346 2347 HeapRegion* _hr;
2347 2348
2348 2349 public:
2349 2350 PrintReachableObjectClosure(CMBitMapRO* bitmap,
2350 2351 outputStream* out,
2351 2352 bool use_prev_marking,
2352 2353 bool all,
2353 2354 HeapRegion* hr) :
2354 2355 _bitmap(bitmap), _out(out),
2355 2356 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2356 2357
2357 2358 void do_object(oop o) {
2358 2359 bool over_tams;
2359 2360 if (_use_prev_marking) {
2360 2361 over_tams = _hr->obj_allocated_since_prev_marking(o);
2361 2362 } else {
2362 2363 over_tams = _hr->obj_allocated_since_next_marking(o);
2363 2364 }
2364 2365 bool marked = _bitmap->isMarked((HeapWord*) o);
2365 2366 bool print_it = _all || over_tams || marked;
2366 2367
2367 2368 if (print_it) {
2368 2369 _out->print_cr(" "PTR_FORMAT"%s",
2369 2370 o, (over_tams) ? " >" : (marked) ? " M" : "");
2370 2371 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2371 2372 o->oop_iterate(&oopCl);
2372 2373 }
2373 2374 }
2374 2375 };
2375 2376
2376 2377 class PrintReachableRegionClosure : public HeapRegionClosure {
2377 2378 private:
2378 2379 CMBitMapRO* _bitmap;
2379 2380 outputStream* _out;
2380 2381 bool _use_prev_marking;
2381 2382 bool _all;
2382 2383
2383 2384 public:
2384 2385 bool doHeapRegion(HeapRegion* hr) {
2385 2386 HeapWord* b = hr->bottom();
2386 2387 HeapWord* e = hr->end();
2387 2388 HeapWord* t = hr->top();
2388 2389 HeapWord* p = NULL;
2389 2390 if (_use_prev_marking) {
2390 2391 p = hr->prev_top_at_mark_start();
2391 2392 } else {
2392 2393 p = hr->next_top_at_mark_start();
2393 2394 }
2394 2395 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2395 2396 "TAMS: "PTR_FORMAT, b, e, t, p);
2396 2397 _out->cr();
2397 2398
2398 2399 HeapWord* from = b;
2399 2400 HeapWord* to = t;
2400 2401
2401 2402 if (to > from) {
2402 2403 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2403 2404 _out->cr();
2404 2405 PrintReachableObjectClosure ocl(_bitmap, _out,
2405 2406 _use_prev_marking, _all, hr);
2406 2407 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2407 2408 _out->cr();
2408 2409 }
2409 2410
2410 2411 return false;
2411 2412 }
2412 2413
2413 2414 PrintReachableRegionClosure(CMBitMapRO* bitmap,
2414 2415 outputStream* out,
2415 2416 bool use_prev_marking,
2416 2417 bool all) :
2417 2418 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2418 2419 };
2419 2420
2420 2421 void ConcurrentMark::print_reachable(const char* str,
2421 2422 bool use_prev_marking,
2422 2423 bool all) {
2423 2424 gclog_or_tty->cr();
2424 2425 gclog_or_tty->print_cr("== Doing heap dump... ");
2425 2426
2426 2427 if (G1PrintReachableBaseFile == NULL) {
2427 2428 gclog_or_tty->print_cr(" #### error: no base file defined");
2428 2429 return;
2429 2430 }
2430 2431
2431 2432 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2432 2433 (JVM_MAXPATHLEN - 1)) {
2433 2434 gclog_or_tty->print_cr(" #### error: file name too long");
2434 2435 return;
2435 2436 }
2436 2437
2437 2438 char file_name[JVM_MAXPATHLEN];
2438 2439 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2439 2440 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2440 2441
2441 2442 fileStream fout(file_name);
2442 2443 if (!fout.is_open()) {
2443 2444 gclog_or_tty->print_cr(" #### error: could not open file");
2444 2445 return;
2445 2446 }
2446 2447
2447 2448 outputStream* out = &fout;
2448 2449
2449 2450 CMBitMapRO* bitmap = NULL;
2450 2451 if (use_prev_marking) {
2451 2452 bitmap = _prevMarkBitMap;
2452 2453 } else {
2453 2454 bitmap = _nextMarkBitMap;
2454 2455 }
2455 2456
2456 2457 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2457 2458 out->cr();
2458 2459
2459 2460 out->print_cr("--- ITERATING OVER REGIONS");
2460 2461 out->cr();
2461 2462 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2462 2463 _g1h->heap_region_iterate(&rcl);
2463 2464 out->cr();
2464 2465
2465 2466 gclog_or_tty->print_cr(" done");
2466 2467 gclog_or_tty->flush();
2467 2468 }
2468 2469
2469 2470 #endif // PRODUCT
2470 2471
2471 2472 // This note is for drainAllSATBBuffers and the code in between.
2472 2473 // In the future we could reuse a task to do this work during an
2473 2474 // evacuation pause (since now tasks are not active and can be claimed
2474 2475 // during an evacuation pause). This was a late change to the code and
2475 2476 // is currently not being taken advantage of.
2476 2477
2477 2478 class CMGlobalObjectClosure : public ObjectClosure {
2478 2479 private:
2479 2480 ConcurrentMark* _cm;
2480 2481
2481 2482 public:
2482 2483 void do_object(oop obj) {
2483 2484 _cm->deal_with_reference(obj);
2484 2485 }
2485 2486
2486 2487 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2487 2488 };
2488 2489
2489 2490 void ConcurrentMark::deal_with_reference(oop obj) {
2490 2491 if (verbose_high())
2491 2492 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2492 2493 (void*) obj);
2493 2494
2494 2495
2495 2496 HeapWord* objAddr = (HeapWord*) obj;
2496 2497 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2497 2498 if (_g1h->is_in_g1_reserved(objAddr)) {
2498 2499 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2499 2500 HeapRegion* hr = _g1h->heap_region_containing(obj);
2500 2501 if (_g1h->is_obj_ill(obj, hr)) {
2501 2502 if (verbose_high())
2502 2503 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2503 2504 "marked", (void*) obj);
2504 2505
2505 2506 // we need to mark it first
2506 2507 if (_nextMarkBitMap->parMark(objAddr)) {
2507 2508 // No OrderAccess:store_load() is needed. It is implicit in the
2508 2509 // CAS done in parMark(objAddr) above
2509 2510 HeapWord* finger = _finger;
2510 2511 if (objAddr < finger) {
2511 2512 if (verbose_high())
2512 2513 gclog_or_tty->print_cr("[global] below the global finger "
2513 2514 "("PTR_FORMAT"), pushing it", finger);
2514 2515 if (!mark_stack_push(obj)) {
2515 2516 if (verbose_low())
2516 2517 gclog_or_tty->print_cr("[global] global stack overflow during "
2517 2518 "deal_with_reference");
2518 2519 }
2519 2520 }
2520 2521 }
2521 2522 }
2522 2523 }
2523 2524 }
2524 2525
2525 2526 void ConcurrentMark::drainAllSATBBuffers() {
2526 2527 CMGlobalObjectClosure oc(this);
2527 2528 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2528 2529 satb_mq_set.set_closure(&oc);
2529 2530
2530 2531 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2531 2532 if (verbose_medium())
2532 2533 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2533 2534 }
2534 2535
2535 2536 // no need to check whether we should do this, as this is only
2536 2537 // called during an evacuation pause
2537 2538 satb_mq_set.iterate_closure_all_threads();
2538 2539
2539 2540 satb_mq_set.set_closure(NULL);
2540 2541 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2541 2542 }
2542 2543
2543 2544 void ConcurrentMark::markPrev(oop p) {
2544 2545 // Note we are overriding the read-only view of the prev map here, via
2545 2546 // the cast.
2546 2547 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2547 2548 }
2548 2549
2549 2550 void ConcurrentMark::clear(oop p) {
2550 2551 assert(p != NULL && p->is_oop(), "expected an oop");
2551 2552 HeapWord* addr = (HeapWord*)p;
2552 2553 assert(addr >= _nextMarkBitMap->startWord() ||
2553 2554 addr < _nextMarkBitMap->endWord(), "in a region");
2554 2555
2555 2556 _nextMarkBitMap->clear(addr);
2556 2557 }
2557 2558
2558 2559 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2559 2560 // Note we are overriding the read-only view of the prev map here, via
2560 2561 // the cast.
2561 2562 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2562 2563 _nextMarkBitMap->clearRange(mr);
2563 2564 }
2564 2565
2565 2566 HeapRegion*
2566 2567 ConcurrentMark::claim_region(int task_num) {
2567 2568 // "checkpoint" the finger
2568 2569 HeapWord* finger = _finger;
2569 2570
2570 2571 // _heap_end will not change underneath our feet; it only changes at
2571 2572 // yield points.
2572 2573 while (finger < _heap_end) {
2573 2574 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2574 2575
2575 2576 // is the gap between reading the finger and doing the CAS too long?
2576 2577
2577 2578 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2578 2579 HeapWord* bottom = curr_region->bottom();
2579 2580 HeapWord* end = curr_region->end();
2580 2581 HeapWord* limit = curr_region->next_top_at_mark_start();
2581 2582
2582 2583 if (verbose_low())
2583 2584 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2584 2585 "["PTR_FORMAT", "PTR_FORMAT"), "
2585 2586 "limit = "PTR_FORMAT,
2586 2587 task_num, curr_region, bottom, end, limit);
2587 2588
2588 2589 HeapWord* res =
2589 2590 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2590 2591 if (res == finger) {
2591 2592 // we succeeded
2592 2593
2593 2594 // notice that _finger == end cannot be guaranteed here since,
2594 2595 // someone else might have moved the finger even further
2595 2596 assert(_finger >= end, "the finger should have moved forward");
2596 2597
2597 2598 if (verbose_low())
2598 2599 gclog_or_tty->print_cr("[%d] we were successful with region = "
2599 2600 PTR_FORMAT, task_num, curr_region);
2600 2601
2601 2602 if (limit > bottom) {
2602 2603 if (verbose_low())
2603 2604 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2604 2605 "returning it ", task_num, curr_region);
2605 2606 return curr_region;
2606 2607 } else {
2607 2608 assert(limit == bottom,
2608 2609 "the region limit should be at bottom");
2609 2610 if (verbose_low())
2610 2611 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2611 2612 "returning NULL", task_num, curr_region);
2612 2613 // we return NULL and the caller should try calling
2613 2614 // claim_region() again.
2614 2615 return NULL;
2615 2616 }
2616 2617 } else {
2617 2618 assert(_finger > finger, "the finger should have moved forward");
2618 2619 if (verbose_low())
2619 2620 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2620 2621 "global finger = "PTR_FORMAT", "
2621 2622 "our finger = "PTR_FORMAT,
2622 2623 task_num, _finger, finger);
2623 2624
2624 2625 // read it again
2625 2626 finger = _finger;
2626 2627 }
2627 2628 }
2628 2629
2629 2630 return NULL;
2630 2631 }
2631 2632
2632 2633 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2633 2634 bool result = false;
2634 2635 for (int i = 0; i < (int)_max_task_num; ++i) {
2635 2636 CMTask* the_task = _tasks[i];
2636 2637 MemRegion mr = the_task->aborted_region();
2637 2638 if (mr.start() != NULL) {
2638 2639 assert(mr.end() != NULL, "invariant");
2639 2640 assert(mr.word_size() > 0, "invariant");
2640 2641 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2641 2642 assert(hr != NULL, "invariant");
2642 2643 if (hr->in_collection_set()) {
2643 2644 // The region points into the collection set
2644 2645 the_task->set_aborted_region(MemRegion());
2645 2646 result = true;
2646 2647 }
2647 2648 }
2648 2649 }
2649 2650 return result;
2650 2651 }
2651 2652
2652 2653 bool ConcurrentMark::has_aborted_regions() {
2653 2654 for (int i = 0; i < (int)_max_task_num; ++i) {
2654 2655 CMTask* the_task = _tasks[i];
2655 2656 MemRegion mr = the_task->aborted_region();
2656 2657 if (mr.start() != NULL) {
2657 2658 assert(mr.end() != NULL, "invariant");
2658 2659 assert(mr.word_size() > 0, "invariant");
2659 2660 return true;
2660 2661 }
2661 2662 }
2662 2663 return false;
2663 2664 }
2664 2665
2665 2666 void ConcurrentMark::oops_do(OopClosure* cl) {
2666 2667 if (_markStack.size() > 0 && verbose_low())
2667 2668 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2668 2669 "size = %d", _markStack.size());
2669 2670 // we first iterate over the contents of the mark stack...
2670 2671 _markStack.oops_do(cl);
2671 2672
2672 2673 for (int i = 0; i < (int)_max_task_num; ++i) {
2673 2674 OopTaskQueue* queue = _task_queues->queue((int)i);
2674 2675
2675 2676 if (queue->size() > 0 && verbose_low())
2676 2677 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2677 2678 "size = %d", i, queue->size());
2678 2679
2679 2680 // ...then over the contents of the all the task queues.
2680 2681 queue->oops_do(cl);
2681 2682 }
2682 2683
2683 2684 // Invalidate any entries, that are in the region stack, that
2684 2685 // point into the collection set
2685 2686 if (_regionStack.invalidate_entries_into_cset()) {
2686 2687 // otherwise, any gray objects copied during the evacuation pause
2687 2688 // might not be visited.
2688 2689 assert(_should_gray_objects, "invariant");
2689 2690 }
2690 2691
2691 2692 // Invalidate any aborted regions, recorded in the individual CM
2692 2693 // tasks, that point into the collection set.
2693 2694 if (invalidate_aborted_regions_in_cset()) {
2694 2695 // otherwise, any gray objects copied during the evacuation pause
2695 2696 // might not be visited.
2696 2697 assert(_should_gray_objects, "invariant");
2697 2698 }
2698 2699
2699 2700 }
2700 2701
2701 2702 void ConcurrentMark::clear_marking_state() {
2702 2703 _markStack.setEmpty();
2703 2704 _markStack.clear_overflow();
2704 2705 _regionStack.setEmpty();
2705 2706 _regionStack.clear_overflow();
2706 2707 clear_has_overflown();
2707 2708 _finger = _heap_start;
2708 2709
2709 2710 for (int i = 0; i < (int)_max_task_num; ++i) {
2710 2711 OopTaskQueue* queue = _task_queues->queue(i);
2711 2712 queue->set_empty();
2712 2713 // Clear any partial regions from the CMTasks
2713 2714 _tasks[i]->clear_aborted_region();
2714 2715 }
2715 2716 }
2716 2717
2717 2718 void ConcurrentMark::print_stats() {
2718 2719 if (verbose_stats()) {
2719 2720 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2720 2721 for (size_t i = 0; i < _active_tasks; ++i) {
2721 2722 _tasks[i]->print_stats();
2722 2723 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2723 2724 }
2724 2725 }
2725 2726 }
2726 2727
2727 2728 class CSMarkOopClosure: public OopClosure {
2728 2729 friend class CSMarkBitMapClosure;
2729 2730
2730 2731 G1CollectedHeap* _g1h;
2731 2732 CMBitMap* _bm;
2732 2733 ConcurrentMark* _cm;
2733 2734 oop* _ms;
2734 2735 jint* _array_ind_stack;
2735 2736 int _ms_size;
2736 2737 int _ms_ind;
2737 2738 int _array_increment;
2738 2739
2739 2740 bool push(oop obj, int arr_ind = 0) {
2740 2741 if (_ms_ind == _ms_size) {
2741 2742 gclog_or_tty->print_cr("Mark stack is full.");
2742 2743 return false;
2743 2744 }
2744 2745 _ms[_ms_ind] = obj;
2745 2746 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2746 2747 _ms_ind++;
2747 2748 return true;
2748 2749 }
2749 2750
2750 2751 oop pop() {
2751 2752 if (_ms_ind == 0) return NULL;
2752 2753 else {
2753 2754 _ms_ind--;
2754 2755 return _ms[_ms_ind];
2755 2756 }
2756 2757 }
2757 2758
2758 2759 template <class T> bool drain() {
2759 2760 while (_ms_ind > 0) {
2760 2761 oop obj = pop();
2761 2762 assert(obj != NULL, "Since index was non-zero.");
2762 2763 if (obj->is_objArray()) {
2763 2764 jint arr_ind = _array_ind_stack[_ms_ind];
2764 2765 objArrayOop aobj = objArrayOop(obj);
2765 2766 jint len = aobj->length();
2766 2767 jint next_arr_ind = arr_ind + _array_increment;
2767 2768 if (next_arr_ind < len) {
2768 2769 push(obj, next_arr_ind);
2769 2770 }
2770 2771 // Now process this portion of this one.
2771 2772 int lim = MIN2(next_arr_ind, len);
2772 2773 for (int j = arr_ind; j < lim; j++) {
2773 2774 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2774 2775 }
2775 2776
2776 2777 } else {
2777 2778 obj->oop_iterate(this);
2778 2779 }
2779 2780 if (abort()) return false;
2780 2781 }
2781 2782 return true;
2782 2783 }
2783 2784
2784 2785 public:
2785 2786 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2786 2787 _g1h(G1CollectedHeap::heap()),
2787 2788 _cm(cm),
2788 2789 _bm(cm->nextMarkBitMap()),
2789 2790 _ms_size(ms_size), _ms_ind(0),
2790 2791 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2791 2792 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2792 2793 _array_increment(MAX2(ms_size/8, 16))
2793 2794 {}
2794 2795
2795 2796 ~CSMarkOopClosure() {
2796 2797 FREE_C_HEAP_ARRAY(oop, _ms);
2797 2798 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2798 2799 }
2799 2800
2800 2801 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2801 2802 virtual void do_oop( oop* p) { do_oop_work(p); }
2802 2803
2803 2804 template <class T> void do_oop_work(T* p) {
2804 2805 T heap_oop = oopDesc::load_heap_oop(p);
2805 2806 if (oopDesc::is_null(heap_oop)) return;
2806 2807 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2807 2808 if (obj->is_forwarded()) {
2808 2809 // If the object has already been forwarded, we have to make sure
2809 2810 // that it's marked. So follow the forwarding pointer. Note that
2810 2811 // this does the right thing for self-forwarding pointers in the
2811 2812 // evacuation failure case.
2812 2813 obj = obj->forwardee();
2813 2814 }
2814 2815 HeapRegion* hr = _g1h->heap_region_containing(obj);
2815 2816 if (hr != NULL) {
2816 2817 if (hr->in_collection_set()) {
2817 2818 if (_g1h->is_obj_ill(obj)) {
2818 2819 _bm->mark((HeapWord*)obj);
2819 2820 if (!push(obj)) {
2820 2821 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2821 2822 set_abort();
2822 2823 }
2823 2824 }
2824 2825 } else {
2825 2826 // Outside the collection set; we need to gray it
2826 2827 _cm->deal_with_reference(obj);
2827 2828 }
2828 2829 }
2829 2830 }
2830 2831 };
2831 2832
2832 2833 class CSMarkBitMapClosure: public BitMapClosure {
2833 2834 G1CollectedHeap* _g1h;
2834 2835 CMBitMap* _bitMap;
2835 2836 ConcurrentMark* _cm;
2836 2837 CSMarkOopClosure _oop_cl;
2837 2838 public:
2838 2839 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2839 2840 _g1h(G1CollectedHeap::heap()),
2840 2841 _bitMap(cm->nextMarkBitMap()),
2841 2842 _oop_cl(cm, ms_size)
2842 2843 {}
2843 2844
2844 2845 ~CSMarkBitMapClosure() {}
2845 2846
2846 2847 bool do_bit(size_t offset) {
2847 2848 // convert offset into a HeapWord*
2848 2849 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2849 2850 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2850 2851 "address out of range");
2851 2852 assert(_bitMap->isMarked(addr), "tautology");
2852 2853 oop obj = oop(addr);
2853 2854 if (!obj->is_forwarded()) {
2854 2855 if (!_oop_cl.push(obj)) return false;
2855 2856 if (UseCompressedOops) {
2856 2857 if (!_oop_cl.drain<narrowOop>()) return false;
2857 2858 } else {
2858 2859 if (!_oop_cl.drain<oop>()) return false;
2859 2860 }
2860 2861 }
2861 2862 // Otherwise...
2862 2863 return true;
2863 2864 }
2864 2865 };
2865 2866
2866 2867
2867 2868 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2868 2869 CMBitMap* _bm;
2869 2870 CSMarkBitMapClosure _bit_cl;
2870 2871 enum SomePrivateConstants {
2871 2872 MSSize = 1000
2872 2873 };
2873 2874 bool _completed;
2874 2875 public:
2875 2876 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2876 2877 _bm(cm->nextMarkBitMap()),
2877 2878 _bit_cl(cm, MSSize),
2878 2879 _completed(true)
2879 2880 {}
2880 2881
2881 2882 ~CompleteMarkingInCSHRClosure() {}
2882 2883
2883 2884 bool doHeapRegion(HeapRegion* r) {
2884 2885 if (!r->evacuation_failed()) {
2885 2886 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2886 2887 if (!mr.is_empty()) {
2887 2888 if (!_bm->iterate(&_bit_cl, mr)) {
2888 2889 _completed = false;
2889 2890 return true;
2890 2891 }
2891 2892 }
2892 2893 }
2893 2894 return false;
2894 2895 }
2895 2896
2896 2897 bool completed() { return _completed; }
2897 2898 };
2898 2899
2899 2900 class ClearMarksInHRClosure: public HeapRegionClosure {
2900 2901 CMBitMap* _bm;
2901 2902 public:
2902 2903 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2903 2904
2904 2905 bool doHeapRegion(HeapRegion* r) {
2905 2906 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2906 2907 MemRegion usedMR = r->used_region();
2907 2908 _bm->clearRange(r->used_region());
2908 2909 }
2909 2910 return false;
2910 2911 }
2911 2912 };
2912 2913
2913 2914 void ConcurrentMark::complete_marking_in_collection_set() {
2914 2915 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2915 2916
2916 2917 if (!g1h->mark_in_progress()) {
2917 2918 g1h->g1_policy()->record_mark_closure_time(0.0);
2918 2919 return;
2919 2920 }
2920 2921
2921 2922 int i = 1;
2922 2923 double start = os::elapsedTime();
2923 2924 while (true) {
2924 2925 i++;
2925 2926 CompleteMarkingInCSHRClosure cmplt(this);
2926 2927 g1h->collection_set_iterate(&cmplt);
2927 2928 if (cmplt.completed()) break;
2928 2929 }
2929 2930 double end_time = os::elapsedTime();
2930 2931 double elapsed_time_ms = (end_time - start) * 1000.0;
2931 2932 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2932 2933
2933 2934 ClearMarksInHRClosure clr(nextMarkBitMap());
2934 2935 g1h->collection_set_iterate(&clr);
2935 2936 }
2936 2937
2937 2938 // The next two methods deal with the following optimisation. Some
2938 2939 // objects are gray by being marked and located above the finger. If
2939 2940 // they are copied, during an evacuation pause, below the finger then
2940 2941 // the need to be pushed on the stack. The observation is that, if
2941 2942 // there are no regions in the collection set located above the
2942 2943 // finger, then the above cannot happen, hence we do not need to
2943 2944 // explicitly gray any objects when copying them to below the
2944 2945 // finger. The global stack will be scanned to ensure that, if it
2945 2946 // points to objects being copied, it will update their
2946 2947 // location. There is a tricky situation with the gray objects in
2947 2948 // region stack that are being coped, however. See the comment in
2948 2949 // newCSet().
2949 2950
2950 2951 void ConcurrentMark::newCSet() {
2951 2952 if (!concurrent_marking_in_progress())
2952 2953 // nothing to do if marking is not in progress
2953 2954 return;
2954 2955
2955 2956 // find what the lowest finger is among the global and local fingers
2956 2957 _min_finger = _finger;
2957 2958 for (int i = 0; i < (int)_max_task_num; ++i) {
2958 2959 CMTask* task = _tasks[i];
2959 2960 HeapWord* task_finger = task->finger();
2960 2961 if (task_finger != NULL && task_finger < _min_finger)
2961 2962 _min_finger = task_finger;
2962 2963 }
2963 2964
2964 2965 _should_gray_objects = false;
2965 2966
2966 2967 // This fixes a very subtle and fustrating bug. It might be the case
2967 2968 // that, during en evacuation pause, heap regions that contain
2968 2969 // objects that are gray (by being in regions contained in the
2969 2970 // region stack) are included in the collection set. Since such gray
2970 2971 // objects will be moved, and because it's not easy to redirect
2971 2972 // region stack entries to point to a new location (because objects
2972 2973 // in one region might be scattered to multiple regions after they
2973 2974 // are copied), one option is to ensure that all marked objects
2974 2975 // copied during a pause are pushed on the stack. Notice, however,
2975 2976 // that this problem can only happen when the region stack is not
2976 2977 // empty during an evacuation pause. So, we make the fix a bit less
2977 2978 // conservative and ensure that regions are pushed on the stack,
2978 2979 // irrespective whether all collection set regions are below the
2979 2980 // finger, if the region stack is not empty. This is expected to be
2980 2981 // a rare case, so I don't think it's necessary to be smarted about it.
2981 2982 if (!region_stack_empty() || has_aborted_regions())
2982 2983 _should_gray_objects = true;
2983 2984 }
2984 2985
2985 2986 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2986 2987 if (!concurrent_marking_in_progress())
2987 2988 return;
2988 2989
2989 2990 HeapWord* region_end = hr->end();
2990 2991 if (region_end > _min_finger)
2991 2992 _should_gray_objects = true;
2992 2993 }
2993 2994
2994 2995 // abandon current marking iteration due to a Full GC
2995 2996 void ConcurrentMark::abort() {
2996 2997 // Clear all marks to force marking thread to do nothing
2997 2998 _nextMarkBitMap->clearAll();
2998 2999 // Empty mark stack
2999 3000 clear_marking_state();
3000 3001 for (int i = 0; i < (int)_max_task_num; ++i) {
3001 3002 _tasks[i]->clear_region_fields();
3002 3003 }
3003 3004 _has_aborted = true;
3004 3005
3005 3006 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3006 3007 satb_mq_set.abandon_partial_marking();
3007 3008 // This can be called either during or outside marking, we'll read
3008 3009 // the expected_active value from the SATB queue set.
3009 3010 satb_mq_set.set_active_all_threads(
3010 3011 false, /* new active value */
3011 3012 satb_mq_set.is_active() /* expected_active */);
3012 3013 }
3013 3014
3014 3015 static void print_ms_time_info(const char* prefix, const char* name,
3015 3016 NumberSeq& ns) {
3016 3017 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3017 3018 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3018 3019 if (ns.num() > 0) {
3019 3020 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3020 3021 prefix, ns.sd(), ns.maximum());
3021 3022 }
3022 3023 }
3023 3024
3024 3025 void ConcurrentMark::print_summary_info() {
3025 3026 gclog_or_tty->print_cr(" Concurrent marking:");
3026 3027 print_ms_time_info(" ", "init marks", _init_times);
3027 3028 print_ms_time_info(" ", "remarks", _remark_times);
3028 3029 {
3029 3030 print_ms_time_info(" ", "final marks", _remark_mark_times);
3030 3031 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3031 3032
3032 3033 }
3033 3034 print_ms_time_info(" ", "cleanups", _cleanup_times);
3034 3035 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3035 3036 _total_counting_time,
3036 3037 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3037 3038 (double)_cleanup_times.num()
3038 3039 : 0.0));
3039 3040 if (G1ScrubRemSets) {
3040 3041 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3041 3042 _total_rs_scrub_time,
3042 3043 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3043 3044 (double)_cleanup_times.num()
3044 3045 : 0.0));
3045 3046 }
3046 3047 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3047 3048 (_init_times.sum() + _remark_times.sum() +
3048 3049 _cleanup_times.sum())/1000.0);
3049 3050 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3050 3051 "(%8.2f s marking, %8.2f s counting).",
3051 3052 cmThread()->vtime_accum(),
3052 3053 cmThread()->vtime_mark_accum(),
3053 3054 cmThread()->vtime_count_accum());
3054 3055 }
3055 3056
3056 3057 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3057 3058 _parallel_workers->print_worker_threads_on(st);
3058 3059 }
3059 3060
3060 3061 // Closures
3061 3062 // XXX: there seems to be a lot of code duplication here;
3062 3063 // should refactor and consolidate the shared code.
3063 3064
3064 3065 // This closure is used to mark refs into the CMS generation in
3065 3066 // the CMS bit map. Called at the first checkpoint.
3066 3067
3067 3068 // We take a break if someone is trying to stop the world.
3068 3069 bool ConcurrentMark::do_yield_check(int worker_i) {
3069 3070 if (should_yield()) {
3070 3071 if (worker_i == 0)
3071 3072 _g1h->g1_policy()->record_concurrent_pause();
3072 3073 cmThread()->yield();
3073 3074 if (worker_i == 0)
3074 3075 _g1h->g1_policy()->record_concurrent_pause_end();
3075 3076 return true;
3076 3077 } else {
3077 3078 return false;
3078 3079 }
3079 3080 }
3080 3081
3081 3082 bool ConcurrentMark::should_yield() {
3082 3083 return cmThread()->should_yield();
3083 3084 }
3084 3085
3085 3086 bool ConcurrentMark::containing_card_is_marked(void* p) {
3086 3087 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3087 3088 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3088 3089 }
3089 3090
3090 3091 bool ConcurrentMark::containing_cards_are_marked(void* start,
3091 3092 void* last) {
3092 3093 return
3093 3094 containing_card_is_marked(start) &&
3094 3095 containing_card_is_marked(last);
3095 3096 }
3096 3097
3097 3098 #ifndef PRODUCT
3098 3099 // for debugging purposes
3099 3100 void ConcurrentMark::print_finger() {
3100 3101 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3101 3102 _heap_start, _heap_end, _finger);
3102 3103 for (int i = 0; i < (int) _max_task_num; ++i) {
3103 3104 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3104 3105 }
3105 3106 gclog_or_tty->print_cr("");
3106 3107 }
3107 3108 #endif
3108 3109
3109 3110 // Closure for iteration over bitmaps
3110 3111 class CMBitMapClosure : public BitMapClosure {
3111 3112 private:
3112 3113 // the bitmap that is being iterated over
3113 3114 CMBitMap* _nextMarkBitMap;
3114 3115 ConcurrentMark* _cm;
3115 3116 CMTask* _task;
3116 3117 // true if we're scanning a heap region claimed by the task (so that
3117 3118 // we move the finger along), false if we're not, i.e. currently when
3118 3119 // scanning a heap region popped from the region stack (so that we
3119 3120 // do not move the task finger along; it'd be a mistake if we did so).
3120 3121 bool _scanning_heap_region;
3121 3122
3122 3123 public:
3123 3124 CMBitMapClosure(CMTask *task,
3124 3125 ConcurrentMark* cm,
3125 3126 CMBitMap* nextMarkBitMap)
3126 3127 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3127 3128
3128 3129 void set_scanning_heap_region(bool scanning_heap_region) {
3129 3130 _scanning_heap_region = scanning_heap_region;
3130 3131 }
3131 3132
3132 3133 bool do_bit(size_t offset) {
3133 3134 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3134 3135 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3135 3136 assert( addr < _cm->finger(), "invariant");
3136 3137
3137 3138 if (_scanning_heap_region) {
3138 3139 statsOnly( _task->increase_objs_found_on_bitmap() );
3139 3140 assert(addr >= _task->finger(), "invariant");
3140 3141 // We move that task's local finger along.
3141 3142 _task->move_finger_to(addr);
3142 3143 } else {
3143 3144 // We move the task's region finger along.
3144 3145 _task->move_region_finger_to(addr);
3145 3146 }
3146 3147
3147 3148 _task->scan_object(oop(addr));
3148 3149 // we only partially drain the local queue and global stack
3149 3150 _task->drain_local_queue(true);
3150 3151 _task->drain_global_stack(true);
3151 3152
3152 3153 // if the has_aborted flag has been raised, we need to bail out of
3153 3154 // the iteration
3154 3155 return !_task->has_aborted();
3155 3156 }
3156 3157 };
3157 3158
3158 3159 // Closure for iterating over objects, currently only used for
3159 3160 // processing SATB buffers.
3160 3161 class CMObjectClosure : public ObjectClosure {
3161 3162 private:
3162 3163 CMTask* _task;
3163 3164
3164 3165 public:
3165 3166 void do_object(oop obj) {
3166 3167 _task->deal_with_reference(obj);
3167 3168 }
3168 3169
3169 3170 CMObjectClosure(CMTask* task) : _task(task) { }
3170 3171 };
3171 3172
3172 3173 // Closure for iterating over object fields
3173 3174 class CMOopClosure : public OopClosure {
3174 3175 private:
3175 3176 G1CollectedHeap* _g1h;
3176 3177 ConcurrentMark* _cm;
3177 3178 CMTask* _task;
3178 3179
3179 3180 public:
3180 3181 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3181 3182 virtual void do_oop( oop* p) { do_oop_work(p); }
3182 3183
3183 3184 template <class T> void do_oop_work(T* p) {
3184 3185 assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
3185 3186 assert(!_g1h->is_on_master_free_list(
3186 3187 _g1h->heap_region_containing((HeapWord*) p)), "invariant");
3187 3188
3188 3189 oop obj = oopDesc::load_decode_heap_oop(p);
3189 3190 if (_cm->verbose_high())
3190 3191 gclog_or_tty->print_cr("[%d] we're looking at location "
3191 3192 "*"PTR_FORMAT" = "PTR_FORMAT,
3192 3193 _task->task_id(), p, (void*) obj);
3193 3194 _task->deal_with_reference(obj);
3194 3195 }
3195 3196
3196 3197 CMOopClosure(G1CollectedHeap* g1h,
3197 3198 ConcurrentMark* cm,
3198 3199 CMTask* task)
3199 3200 : _g1h(g1h), _cm(cm), _task(task)
3200 3201 {
3201 3202 _ref_processor = g1h->ref_processor();
3202 3203 assert(_ref_processor != NULL, "should not be NULL");
3203 3204 }
3204 3205 };
3205 3206
3206 3207 void CMTask::setup_for_region(HeapRegion* hr) {
3207 3208 // Separated the asserts so that we know which one fires.
3208 3209 assert(hr != NULL,
3209 3210 "claim_region() should have filtered out continues humongous regions");
3210 3211 assert(!hr->continuesHumongous(),
3211 3212 "claim_region() should have filtered out continues humongous regions");
3212 3213
3213 3214 if (_cm->verbose_low())
3214 3215 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3215 3216 _task_id, hr);
3216 3217
3217 3218 _curr_region = hr;
3218 3219 _finger = hr->bottom();
3219 3220 update_region_limit();
3220 3221 }
3221 3222
3222 3223 void CMTask::update_region_limit() {
3223 3224 HeapRegion* hr = _curr_region;
3224 3225 HeapWord* bottom = hr->bottom();
3225 3226 HeapWord* limit = hr->next_top_at_mark_start();
3226 3227
3227 3228 if (limit == bottom) {
3228 3229 if (_cm->verbose_low())
3229 3230 gclog_or_tty->print_cr("[%d] found an empty region "
3230 3231 "["PTR_FORMAT", "PTR_FORMAT")",
3231 3232 _task_id, bottom, limit);
3232 3233 // The region was collected underneath our feet.
3233 3234 // We set the finger to bottom to ensure that the bitmap
3234 3235 // iteration that will follow this will not do anything.
3235 3236 // (this is not a condition that holds when we set the region up,
3236 3237 // as the region is not supposed to be empty in the first place)
3237 3238 _finger = bottom;
3238 3239 } else if (limit >= _region_limit) {
3239 3240 assert(limit >= _finger, "peace of mind");
3240 3241 } else {
3241 3242 assert(limit < _region_limit, "only way to get here");
3242 3243 // This can happen under some pretty unusual circumstances. An
3243 3244 // evacuation pause empties the region underneath our feet (NTAMS
3244 3245 // at bottom). We then do some allocation in the region (NTAMS
3245 3246 // stays at bottom), followed by the region being used as a GC
3246 3247 // alloc region (NTAMS will move to top() and the objects
3247 3248 // originally below it will be grayed). All objects now marked in
3248 3249 // the region are explicitly grayed, if below the global finger,
3249 3250 // and we do not need in fact to scan anything else. So, we simply
3250 3251 // set _finger to be limit to ensure that the bitmap iteration
3251 3252 // doesn't do anything.
3252 3253 _finger = limit;
3253 3254 }
3254 3255
3255 3256 _region_limit = limit;
3256 3257 }
3257 3258
3258 3259 void CMTask::giveup_current_region() {
3259 3260 assert(_curr_region != NULL, "invariant");
3260 3261 if (_cm->verbose_low())
3261 3262 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3262 3263 _task_id, _curr_region);
3263 3264 clear_region_fields();
3264 3265 }
3265 3266
3266 3267 void CMTask::clear_region_fields() {
3267 3268 // Values for these three fields that indicate that we're not
3268 3269 // holding on to a region.
3269 3270 _curr_region = NULL;
3270 3271 _finger = NULL;
3271 3272 _region_limit = NULL;
3272 3273
3273 3274 _region_finger = NULL;
3274 3275 }
3275 3276
3276 3277 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3277 3278 guarantee(nextMarkBitMap != NULL, "invariant");
3278 3279
3279 3280 if (_cm->verbose_low())
3280 3281 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3281 3282
3282 3283 _nextMarkBitMap = nextMarkBitMap;
3283 3284 clear_region_fields();
3284 3285 assert(_aborted_region.is_empty(), "should have been cleared");
3285 3286
3286 3287 _calls = 0;
3287 3288 _elapsed_time_ms = 0.0;
3288 3289 _termination_time_ms = 0.0;
3289 3290 _termination_start_time_ms = 0.0;
3290 3291
3291 3292 #if _MARKING_STATS_
3292 3293 _local_pushes = 0;
3293 3294 _local_pops = 0;
3294 3295 _local_max_size = 0;
3295 3296 _objs_scanned = 0;
3296 3297 _global_pushes = 0;
3297 3298 _global_pops = 0;
3298 3299 _global_max_size = 0;
3299 3300 _global_transfers_to = 0;
3300 3301 _global_transfers_from = 0;
3301 3302 _region_stack_pops = 0;
3302 3303 _regions_claimed = 0;
3303 3304 _objs_found_on_bitmap = 0;
3304 3305 _satb_buffers_processed = 0;
3305 3306 _steal_attempts = 0;
3306 3307 _steals = 0;
3307 3308 _aborted = 0;
3308 3309 _aborted_overflow = 0;
3309 3310 _aborted_cm_aborted = 0;
3310 3311 _aborted_yield = 0;
3311 3312 _aborted_timed_out = 0;
3312 3313 _aborted_satb = 0;
3313 3314 _aborted_termination = 0;
3314 3315 #endif // _MARKING_STATS_
3315 3316 }
3316 3317
3317 3318 bool CMTask::should_exit_termination() {
3318 3319 regular_clock_call();
3319 3320 // This is called when we are in the termination protocol. We should
3320 3321 // quit if, for some reason, this task wants to abort or the global
3321 3322 // stack is not empty (this means that we can get work from it).
3322 3323 return !_cm->mark_stack_empty() || has_aborted();
3323 3324 }
3324 3325
3325 3326 // This determines whether the method below will check both the local
3326 3327 // and global fingers when determining whether to push on the stack a
3327 3328 // gray object (value 1) or whether it will only check the global one
3328 3329 // (value 0). The tradeoffs are that the former will be a bit more
3329 3330 // accurate and possibly push less on the stack, but it might also be
3330 3331 // a little bit slower.
3331 3332
3332 3333 #define _CHECK_BOTH_FINGERS_ 1
3333 3334
3334 3335 void CMTask::deal_with_reference(oop obj) {
3335 3336 if (_cm->verbose_high())
3336 3337 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3337 3338 _task_id, (void*) obj);
3338 3339
3339 3340 ++_refs_reached;
3340 3341
3341 3342 HeapWord* objAddr = (HeapWord*) obj;
3342 3343 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3343 3344 if (_g1h->is_in_g1_reserved(objAddr)) {
3344 3345 assert(obj != NULL, "is_in_g1_reserved should ensure this");
3345 3346 HeapRegion* hr = _g1h->heap_region_containing(obj);
3346 3347 if (_g1h->is_obj_ill(obj, hr)) {
3347 3348 if (_cm->verbose_high())
3348 3349 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3349 3350 _task_id, (void*) obj);
3350 3351
3351 3352 // we need to mark it first
3352 3353 if (_nextMarkBitMap->parMark(objAddr)) {
3353 3354 // No OrderAccess:store_load() is needed. It is implicit in the
3354 3355 // CAS done in parMark(objAddr) above
3355 3356 HeapWord* global_finger = _cm->finger();
3356 3357
3357 3358 #if _CHECK_BOTH_FINGERS_
3358 3359 // we will check both the local and global fingers
3359 3360
3360 3361 if (_finger != NULL && objAddr < _finger) {
3361 3362 if (_cm->verbose_high())
3362 3363 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3363 3364 "pushing it", _task_id, _finger);
3364 3365 push(obj);
3365 3366 } else if (_curr_region != NULL && objAddr < _region_limit) {
3366 3367 // do nothing
3367 3368 } else if (objAddr < global_finger) {
3368 3369 // Notice that the global finger might be moving forward
3369 3370 // concurrently. This is not a problem. In the worst case, we
3370 3371 // mark the object while it is above the global finger and, by
3371 3372 // the time we read the global finger, it has moved forward
3372 3373 // passed this object. In this case, the object will probably
3373 3374 // be visited when a task is scanning the region and will also
3374 3375 // be pushed on the stack. So, some duplicate work, but no
3375 3376 // correctness problems.
3376 3377
3377 3378 if (_cm->verbose_high())
3378 3379 gclog_or_tty->print_cr("[%d] below the global finger "
3379 3380 "("PTR_FORMAT"), pushing it",
3380 3381 _task_id, global_finger);
3381 3382 push(obj);
3382 3383 } else {
3383 3384 // do nothing
3384 3385 }
3385 3386 #else // _CHECK_BOTH_FINGERS_
3386 3387 // we will only check the global finger
3387 3388
3388 3389 if (objAddr < global_finger) {
3389 3390 // see long comment above
3390 3391
3391 3392 if (_cm->verbose_high())
3392 3393 gclog_or_tty->print_cr("[%d] below the global finger "
3393 3394 "("PTR_FORMAT"), pushing it",
3394 3395 _task_id, global_finger);
3395 3396 push(obj);
3396 3397 }
3397 3398 #endif // _CHECK_BOTH_FINGERS_
3398 3399 }
3399 3400 }
3400 3401 }
3401 3402 }
3402 3403
3403 3404 void CMTask::push(oop obj) {
3404 3405 HeapWord* objAddr = (HeapWord*) obj;
3405 3406 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3406 3407 assert(!_g1h->is_on_master_free_list(
3407 3408 _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
3408 3409 assert(!_g1h->is_obj_ill(obj), "invariant");
3409 3410 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3410 3411
3411 3412 if (_cm->verbose_high())
3412 3413 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3413 3414
3414 3415 if (!_task_queue->push(obj)) {
3415 3416 // The local task queue looks full. We need to push some entries
3416 3417 // to the global stack.
3417 3418
3418 3419 if (_cm->verbose_medium())
3419 3420 gclog_or_tty->print_cr("[%d] task queue overflow, "
3420 3421 "moving entries to the global stack",
3421 3422 _task_id);
3422 3423 move_entries_to_global_stack();
3423 3424
3424 3425 // this should succeed since, even if we overflow the global
3425 3426 // stack, we should have definitely removed some entries from the
3426 3427 // local queue. So, there must be space on it.
3427 3428 bool success = _task_queue->push(obj);
3428 3429 assert(success, "invariant");
3429 3430 }
3430 3431
3431 3432 statsOnly( int tmp_size = _task_queue->size();
3432 3433 if (tmp_size > _local_max_size)
3433 3434 _local_max_size = tmp_size;
3434 3435 ++_local_pushes );
3435 3436 }
3436 3437
3437 3438 void CMTask::reached_limit() {
3438 3439 assert(_words_scanned >= _words_scanned_limit ||
3439 3440 _refs_reached >= _refs_reached_limit ,
3440 3441 "shouldn't have been called otherwise");
3441 3442 regular_clock_call();
3442 3443 }
3443 3444
3444 3445 void CMTask::regular_clock_call() {
3445 3446 if (has_aborted())
3446 3447 return;
3447 3448
3448 3449 // First, we need to recalculate the words scanned and refs reached
3449 3450 // limits for the next clock call.
3450 3451 recalculate_limits();
3451 3452
3452 3453 // During the regular clock call we do the following
3453 3454
3454 3455 // (1) If an overflow has been flagged, then we abort.
3455 3456 if (_cm->has_overflown()) {
3456 3457 set_has_aborted();
3457 3458 return;
3458 3459 }
3459 3460
3460 3461 // If we are not concurrent (i.e. we're doing remark) we don't need
3461 3462 // to check anything else. The other steps are only needed during
3462 3463 // the concurrent marking phase.
3463 3464 if (!concurrent())
3464 3465 return;
3465 3466
3466 3467 // (2) If marking has been aborted for Full GC, then we also abort.
3467 3468 if (_cm->has_aborted()) {
3468 3469 set_has_aborted();
3469 3470 statsOnly( ++_aborted_cm_aborted );
3470 3471 return;
3471 3472 }
3472 3473
3473 3474 double curr_time_ms = os::elapsedVTime() * 1000.0;
3474 3475
3475 3476 // (3) If marking stats are enabled, then we update the step history.
3476 3477 #if _MARKING_STATS_
3477 3478 if (_words_scanned >= _words_scanned_limit)
3478 3479 ++_clock_due_to_scanning;
3479 3480 if (_refs_reached >= _refs_reached_limit)
3480 3481 ++_clock_due_to_marking;
3481 3482
3482 3483 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3483 3484 _interval_start_time_ms = curr_time_ms;
3484 3485 _all_clock_intervals_ms.add(last_interval_ms);
3485 3486
3486 3487 if (_cm->verbose_medium()) {
3487 3488 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3488 3489 "scanned = %d%s, refs reached = %d%s",
3489 3490 _task_id, last_interval_ms,
3490 3491 _words_scanned,
3491 3492 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3492 3493 _refs_reached,
3493 3494 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3494 3495 }
3495 3496 #endif // _MARKING_STATS_
3496 3497
3497 3498 // (4) We check whether we should yield. If we have to, then we abort.
3498 3499 if (_cm->should_yield()) {
3499 3500 // We should yield. To do this we abort the task. The caller is
3500 3501 // responsible for yielding.
3501 3502 set_has_aborted();
3502 3503 statsOnly( ++_aborted_yield );
3503 3504 return;
3504 3505 }
3505 3506
3506 3507 // (5) We check whether we've reached our time quota. If we have,
3507 3508 // then we abort.
3508 3509 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3509 3510 if (elapsed_time_ms > _time_target_ms) {
3510 3511 set_has_aborted();
3511 3512 _has_timed_out = true;
3512 3513 statsOnly( ++_aborted_timed_out );
3513 3514 return;
3514 3515 }
3515 3516
3516 3517 // (6) Finally, we check whether there are enough completed STAB
3517 3518 // buffers available for processing. If there are, we abort.
3518 3519 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3519 3520 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3520 3521 if (_cm->verbose_low())
3521 3522 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3522 3523 _task_id);
3523 3524 // we do need to process SATB buffers, we'll abort and restart
3524 3525 // the marking task to do so
3525 3526 set_has_aborted();
3526 3527 statsOnly( ++_aborted_satb );
3527 3528 return;
3528 3529 }
3529 3530 }
3530 3531
3531 3532 void CMTask::recalculate_limits() {
3532 3533 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3533 3534 _words_scanned_limit = _real_words_scanned_limit;
3534 3535
3535 3536 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3536 3537 _refs_reached_limit = _real_refs_reached_limit;
3537 3538 }
3538 3539
3539 3540 void CMTask::decrease_limits() {
3540 3541 // This is called when we believe that we're going to do an infrequent
3541 3542 // operation which will increase the per byte scanned cost (i.e. move
3542 3543 // entries to/from the global stack). It basically tries to decrease the
3543 3544 // scanning limit so that the clock is called earlier.
3544 3545
3545 3546 if (_cm->verbose_medium())
3546 3547 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3547 3548
3548 3549 _words_scanned_limit = _real_words_scanned_limit -
3549 3550 3 * words_scanned_period / 4;
3550 3551 _refs_reached_limit = _real_refs_reached_limit -
3551 3552 3 * refs_reached_period / 4;
3552 3553 }
3553 3554
3554 3555 void CMTask::move_entries_to_global_stack() {
3555 3556 // local array where we'll store the entries that will be popped
3556 3557 // from the local queue
3557 3558 oop buffer[global_stack_transfer_size];
3558 3559
3559 3560 int n = 0;
3560 3561 oop obj;
3561 3562 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3562 3563 buffer[n] = obj;
3563 3564 ++n;
3564 3565 }
3565 3566
3566 3567 if (n > 0) {
3567 3568 // we popped at least one entry from the local queue
3568 3569
3569 3570 statsOnly( ++_global_transfers_to; _local_pops += n );
3570 3571
3571 3572 if (!_cm->mark_stack_push(buffer, n)) {
3572 3573 if (_cm->verbose_low())
3573 3574 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3574 3575 set_has_aborted();
3575 3576 } else {
3576 3577 // the transfer was successful
3577 3578
3578 3579 if (_cm->verbose_medium())
3579 3580 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3580 3581 _task_id, n);
3581 3582 statsOnly( int tmp_size = _cm->mark_stack_size();
3582 3583 if (tmp_size > _global_max_size)
3583 3584 _global_max_size = tmp_size;
3584 3585 _global_pushes += n );
3585 3586 }
3586 3587 }
3587 3588
3588 3589 // this operation was quite expensive, so decrease the limits
3589 3590 decrease_limits();
3590 3591 }
3591 3592
3592 3593 void CMTask::get_entries_from_global_stack() {
3593 3594 // local array where we'll store the entries that will be popped
3594 3595 // from the global stack.
3595 3596 oop buffer[global_stack_transfer_size];
3596 3597 int n;
3597 3598 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3598 3599 assert(n <= global_stack_transfer_size,
3599 3600 "we should not pop more than the given limit");
3600 3601 if (n > 0) {
3601 3602 // yes, we did actually pop at least one entry
3602 3603
3603 3604 statsOnly( ++_global_transfers_from; _global_pops += n );
3604 3605 if (_cm->verbose_medium())
3605 3606 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3606 3607 _task_id, n);
3607 3608 for (int i = 0; i < n; ++i) {
3608 3609 bool success = _task_queue->push(buffer[i]);
3609 3610 // We only call this when the local queue is empty or under a
3610 3611 // given target limit. So, we do not expect this push to fail.
3611 3612 assert(success, "invariant");
3612 3613 }
3613 3614
3614 3615 statsOnly( int tmp_size = _task_queue->size();
3615 3616 if (tmp_size > _local_max_size)
3616 3617 _local_max_size = tmp_size;
3617 3618 _local_pushes += n );
3618 3619 }
3619 3620
3620 3621 // this operation was quite expensive, so decrease the limits
3621 3622 decrease_limits();
3622 3623 }
3623 3624
3624 3625 void CMTask::drain_local_queue(bool partially) {
3625 3626 if (has_aborted())
3626 3627 return;
3627 3628
3628 3629 // Decide what the target size is, depending whether we're going to
3629 3630 // drain it partially (so that other tasks can steal if they run out
3630 3631 // of things to do) or totally (at the very end).
3631 3632 size_t target_size;
3632 3633 if (partially)
3633 3634 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3634 3635 else
3635 3636 target_size = 0;
3636 3637
3637 3638 if (_task_queue->size() > target_size) {
3638 3639 if (_cm->verbose_high())
3639 3640 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3640 3641 _task_id, target_size);
3641 3642
3642 3643 oop obj;
3643 3644 bool ret = _task_queue->pop_local(obj);
3644 3645 while (ret) {
3645 3646 statsOnly( ++_local_pops );
3646 3647
3647 3648 if (_cm->verbose_high())
3648 3649 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3649 3650 (void*) obj);
3650 3651
3651 3652 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3652 3653 assert(!_g1h->is_on_master_free_list(
3653 3654 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3654 3655
3655 3656 scan_object(obj);
3656 3657
3657 3658 if (_task_queue->size() <= target_size || has_aborted())
3658 3659 ret = false;
3659 3660 else
3660 3661 ret = _task_queue->pop_local(obj);
3661 3662 }
3662 3663
3663 3664 if (_cm->verbose_high())
3664 3665 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3665 3666 _task_id, _task_queue->size());
3666 3667 }
3667 3668 }
3668 3669
3669 3670 void CMTask::drain_global_stack(bool partially) {
3670 3671 if (has_aborted())
3671 3672 return;
3672 3673
3673 3674 // We have a policy to drain the local queue before we attempt to
3674 3675 // drain the global stack.
3675 3676 assert(partially || _task_queue->size() == 0, "invariant");
3676 3677
3677 3678 // Decide what the target size is, depending whether we're going to
3678 3679 // drain it partially (so that other tasks can steal if they run out
3679 3680 // of things to do) or totally (at the very end). Notice that,
3680 3681 // because we move entries from the global stack in chunks or
3681 3682 // because another task might be doing the same, we might in fact
3682 3683 // drop below the target. But, this is not a problem.
3683 3684 size_t target_size;
3684 3685 if (partially)
3685 3686 target_size = _cm->partial_mark_stack_size_target();
3686 3687 else
3687 3688 target_size = 0;
3688 3689
3689 3690 if (_cm->mark_stack_size() > target_size) {
3690 3691 if (_cm->verbose_low())
3691 3692 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3692 3693 _task_id, target_size);
3693 3694
3694 3695 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3695 3696 get_entries_from_global_stack();
3696 3697 drain_local_queue(partially);
3697 3698 }
3698 3699
3699 3700 if (_cm->verbose_low())
3700 3701 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3701 3702 _task_id, _cm->mark_stack_size());
3702 3703 }
3703 3704 }
3704 3705
3705 3706 // SATB Queue has several assumptions on whether to call the par or
3706 3707 // non-par versions of the methods. this is why some of the code is
3707 3708 // replicated. We should really get rid of the single-threaded version
3708 3709 // of the code to simplify things.
3709 3710 void CMTask::drain_satb_buffers() {
3710 3711 if (has_aborted())
3711 3712 return;
3712 3713
3713 3714 // We set this so that the regular clock knows that we're in the
3714 3715 // middle of draining buffers and doesn't set the abort flag when it
3715 3716 // notices that SATB buffers are available for draining. It'd be
3716 3717 // very counter productive if it did that. :-)
3717 3718 _draining_satb_buffers = true;
3718 3719
3719 3720 CMObjectClosure oc(this);
3720 3721 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3721 3722 if (G1CollectedHeap::use_parallel_gc_threads())
3722 3723 satb_mq_set.set_par_closure(_task_id, &oc);
3723 3724 else
3724 3725 satb_mq_set.set_closure(&oc);
3725 3726
3726 3727 // This keeps claiming and applying the closure to completed buffers
3727 3728 // until we run out of buffers or we need to abort.
3728 3729 if (G1CollectedHeap::use_parallel_gc_threads()) {
3729 3730 while (!has_aborted() &&
3730 3731 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3731 3732 if (_cm->verbose_medium())
3732 3733 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3733 3734 statsOnly( ++_satb_buffers_processed );
3734 3735 regular_clock_call();
3735 3736 }
3736 3737 } else {
3737 3738 while (!has_aborted() &&
3738 3739 satb_mq_set.apply_closure_to_completed_buffer()) {
3739 3740 if (_cm->verbose_medium())
3740 3741 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3741 3742 statsOnly( ++_satb_buffers_processed );
3742 3743 regular_clock_call();
3743 3744 }
3744 3745 }
3745 3746
3746 3747 if (!concurrent() && !has_aborted()) {
3747 3748 // We should only do this during remark.
3748 3749 if (G1CollectedHeap::use_parallel_gc_threads())
3749 3750 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3750 3751 else
3751 3752 satb_mq_set.iterate_closure_all_threads();
3752 3753 }
3753 3754
3754 3755 _draining_satb_buffers = false;
3755 3756
3756 3757 assert(has_aborted() ||
3757 3758 concurrent() ||
3758 3759 satb_mq_set.completed_buffers_num() == 0, "invariant");
3759 3760
3760 3761 if (G1CollectedHeap::use_parallel_gc_threads())
3761 3762 satb_mq_set.set_par_closure(_task_id, NULL);
3762 3763 else
3763 3764 satb_mq_set.set_closure(NULL);
3764 3765
3765 3766 // again, this was a potentially expensive operation, decrease the
3766 3767 // limits to get the regular clock call early
3767 3768 decrease_limits();
3768 3769 }
3769 3770
3770 3771 void CMTask::drain_region_stack(BitMapClosure* bc) {
3771 3772 if (has_aborted())
3772 3773 return;
3773 3774
3774 3775 assert(_region_finger == NULL,
3775 3776 "it should be NULL when we're not scanning a region");
3776 3777
3777 3778 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3778 3779 if (_cm->verbose_low())
3779 3780 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3780 3781 _task_id, _cm->region_stack_size());
3781 3782
3782 3783 MemRegion mr;
3783 3784
3784 3785 if (!_aborted_region.is_empty()) {
3785 3786 mr = _aborted_region;
3786 3787 _aborted_region = MemRegion();
3787 3788
3788 3789 if (_cm->verbose_low())
3789 3790 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3790 3791 _task_id, mr.start(), mr.end());
3791 3792 } else {
3792 3793 mr = _cm->region_stack_pop_lock_free();
3793 3794 // it returns MemRegion() if the pop fails
3794 3795 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3795 3796 }
3796 3797
3797 3798 while (mr.start() != NULL) {
3798 3799 if (_cm->verbose_medium())
3799 3800 gclog_or_tty->print_cr("[%d] we are scanning region "
3800 3801 "["PTR_FORMAT", "PTR_FORMAT")",
3801 3802 _task_id, mr.start(), mr.end());
3802 3803
3803 3804 assert(mr.end() <= _cm->finger(),
3804 3805 "otherwise the region shouldn't be on the stack");
3805 3806 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3806 3807 if (_nextMarkBitMap->iterate(bc, mr)) {
3807 3808 assert(!has_aborted(),
3808 3809 "cannot abort the task without aborting the bitmap iteration");
3809 3810
3810 3811 // We finished iterating over the region without aborting.
3811 3812 regular_clock_call();
3812 3813 if (has_aborted())
3813 3814 mr = MemRegion();
3814 3815 else {
3815 3816 mr = _cm->region_stack_pop_lock_free();
3816 3817 // it returns MemRegion() if the pop fails
3817 3818 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3818 3819 }
3819 3820 } else {
3820 3821 assert(has_aborted(), "currently the only way to do so");
3821 3822
3822 3823 // The only way to abort the bitmap iteration is to return
3823 3824 // false from the do_bit() method. However, inside the
3824 3825 // do_bit() method we move the _region_finger to point to the
3825 3826 // object currently being looked at. So, if we bail out, we
3826 3827 // have definitely set _region_finger to something non-null.
3827 3828 assert(_region_finger != NULL, "invariant");
3828 3829
3829 3830 // Make sure that any previously aborted region has been
3830 3831 // cleared.
3831 3832 assert(_aborted_region.is_empty(), "aborted region not cleared");
3832 3833
3833 3834 // The iteration was actually aborted. So now _region_finger
3834 3835 // points to the address of the object we last scanned. If we
3835 3836 // leave it there, when we restart this task, we will rescan
3836 3837 // the object. It is easy to avoid this. We move the finger by
3837 3838 // enough to point to the next possible object header (the
3838 3839 // bitmap knows by how much we need to move it as it knows its
3839 3840 // granularity).
3840 3841 MemRegion newRegion =
3841 3842 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3842 3843
3843 3844 if (!newRegion.is_empty()) {
3844 3845 if (_cm->verbose_low()) {
3845 3846 gclog_or_tty->print_cr("[%d] recording unscanned region"
3846 3847 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3847 3848 _task_id,
3848 3849 newRegion.start(), newRegion.end());
3849 3850 }
3850 3851 // Now record the part of the region we didn't scan to
3851 3852 // make sure this task scans it later.
3852 3853 _aborted_region = newRegion;
3853 3854 }
3854 3855 // break from while
3855 3856 mr = MemRegion();
3856 3857 }
3857 3858 _region_finger = NULL;
3858 3859 }
3859 3860
3860 3861 if (_cm->verbose_low())
3861 3862 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3862 3863 _task_id, _cm->region_stack_size());
3863 3864 }
3864 3865 }
3865 3866
3866 3867 void CMTask::print_stats() {
3867 3868 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3868 3869 _task_id, _calls);
3869 3870 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3870 3871 _elapsed_time_ms, _termination_time_ms);
3871 3872 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3872 3873 _step_times_ms.num(), _step_times_ms.avg(),
3873 3874 _step_times_ms.sd());
3874 3875 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3875 3876 _step_times_ms.maximum(), _step_times_ms.sum());
3876 3877
3877 3878 #if _MARKING_STATS_
3878 3879 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3879 3880 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3880 3881 _all_clock_intervals_ms.sd());
3881 3882 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3882 3883 _all_clock_intervals_ms.maximum(),
3883 3884 _all_clock_intervals_ms.sum());
3884 3885 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3885 3886 _clock_due_to_scanning, _clock_due_to_marking);
3886 3887 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3887 3888 _objs_scanned, _objs_found_on_bitmap);
3888 3889 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3889 3890 _local_pushes, _local_pops, _local_max_size);
3890 3891 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3891 3892 _global_pushes, _global_pops, _global_max_size);
3892 3893 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3893 3894 _global_transfers_to,_global_transfers_from);
3894 3895 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3895 3896 _regions_claimed, _region_stack_pops);
3896 3897 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3897 3898 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3898 3899 _steal_attempts, _steals);
3899 3900 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3900 3901 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3901 3902 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3902 3903 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3903 3904 _aborted_timed_out, _aborted_satb, _aborted_termination);
3904 3905 #endif // _MARKING_STATS_
3905 3906 }
3906 3907
3907 3908 /*****************************************************************************
3908 3909
3909 3910 The do_marking_step(time_target_ms) method is the building block
3910 3911 of the parallel marking framework. It can be called in parallel
3911 3912 with other invocations of do_marking_step() on different tasks
3912 3913 (but only one per task, obviously) and concurrently with the
3913 3914 mutator threads, or during remark, hence it eliminates the need
3914 3915 for two versions of the code. When called during remark, it will
3915 3916 pick up from where the task left off during the concurrent marking
3916 3917 phase. Interestingly, tasks are also claimable during evacuation
3917 3918 pauses too, since do_marking_step() ensures that it aborts before
3918 3919 it needs to yield.
3919 3920
3920 3921 The data structures that is uses to do marking work are the
3921 3922 following:
3922 3923
3923 3924 (1) Marking Bitmap. If there are gray objects that appear only
3924 3925 on the bitmap (this happens either when dealing with an overflow
3925 3926 or when the initial marking phase has simply marked the roots
3926 3927 and didn't push them on the stack), then tasks claim heap
3927 3928 regions whose bitmap they then scan to find gray objects. A
3928 3929 global finger indicates where the end of the last claimed region
3929 3930 is. A local finger indicates how far into the region a task has
3930 3931 scanned. The two fingers are used to determine how to gray an
3931 3932 object (i.e. whether simply marking it is OK, as it will be
3932 3933 visited by a task in the future, or whether it needs to be also
3933 3934 pushed on a stack).
3934 3935
3935 3936 (2) Local Queue. The local queue of the task which is accessed
3936 3937 reasonably efficiently by the task. Other tasks can steal from
3937 3938 it when they run out of work. Throughout the marking phase, a
3938 3939 task attempts to keep its local queue short but not totally
3939 3940 empty, so that entries are available for stealing by other
3940 3941 tasks. Only when there is no more work, a task will totally
3941 3942 drain its local queue.
3942 3943
3943 3944 (3) Global Mark Stack. This handles local queue overflow. During
3944 3945 marking only sets of entries are moved between it and the local
3945 3946 queues, as access to it requires a mutex and more fine-grain
3946 3947 interaction with it which might cause contention. If it
3947 3948 overflows, then the marking phase should restart and iterate
3948 3949 over the bitmap to identify gray objects. Throughout the marking
3949 3950 phase, tasks attempt to keep the global mark stack at a small
3950 3951 length but not totally empty, so that entries are available for
3951 3952 popping by other tasks. Only when there is no more work, tasks
3952 3953 will totally drain the global mark stack.
3953 3954
3954 3955 (4) Global Region Stack. Entries on it correspond to areas of
3955 3956 the bitmap that need to be scanned since they contain gray
3956 3957 objects. Pushes on the region stack only happen during
3957 3958 evacuation pauses and typically correspond to areas covered by
3958 3959 GC LABS. If it overflows, then the marking phase should restart
3959 3960 and iterate over the bitmap to identify gray objects. Tasks will
3960 3961 try to totally drain the region stack as soon as possible.
3961 3962
3962 3963 (5) SATB Buffer Queue. This is where completed SATB buffers are
3963 3964 made available. Buffers are regularly removed from this queue
3964 3965 and scanned for roots, so that the queue doesn't get too
3965 3966 long. During remark, all completed buffers are processed, as
3966 3967 well as the filled in parts of any uncompleted buffers.
3967 3968
3968 3969 The do_marking_step() method tries to abort when the time target
3969 3970 has been reached. There are a few other cases when the
3970 3971 do_marking_step() method also aborts:
3971 3972
3972 3973 (1) When the marking phase has been aborted (after a Full GC).
3973 3974
3974 3975 (2) When a global overflow (either on the global stack or the
3975 3976 region stack) has been triggered. Before the task aborts, it
3976 3977 will actually sync up with the other tasks to ensure that all
3977 3978 the marking data structures (local queues, stacks, fingers etc.)
3978 3979 are re-initialised so that when do_marking_step() completes,
3979 3980 the marking phase can immediately restart.
3980 3981
3981 3982 (3) When enough completed SATB buffers are available. The
3982 3983 do_marking_step() method only tries to drain SATB buffers right
3983 3984 at the beginning. So, if enough buffers are available, the
3984 3985 marking step aborts and the SATB buffers are processed at
3985 3986 the beginning of the next invocation.
3986 3987
3987 3988 (4) To yield. when we have to yield then we abort and yield
3988 3989 right at the end of do_marking_step(). This saves us from a lot
3989 3990 of hassle as, by yielding we might allow a Full GC. If this
3990 3991 happens then objects will be compacted underneath our feet, the
3991 3992 heap might shrink, etc. We save checking for this by just
3992 3993 aborting and doing the yield right at the end.
3993 3994
3994 3995 From the above it follows that the do_marking_step() method should
3995 3996 be called in a loop (or, otherwise, regularly) until it completes.
3996 3997
3997 3998 If a marking step completes without its has_aborted() flag being
3998 3999 true, it means it has completed the current marking phase (and
3999 4000 also all other marking tasks have done so and have all synced up).
4000 4001
4001 4002 A method called regular_clock_call() is invoked "regularly" (in
4002 4003 sub ms intervals) throughout marking. It is this clock method that
4003 4004 checks all the abort conditions which were mentioned above and
4004 4005 decides when the task should abort. A work-based scheme is used to
4005 4006 trigger this clock method: when the number of object words the
4006 4007 marking phase has scanned or the number of references the marking
4007 4008 phase has visited reach a given limit. Additional invocations to
4008 4009 the method clock have been planted in a few other strategic places
4009 4010 too. The initial reason for the clock method was to avoid calling
4010 4011 vtime too regularly, as it is quite expensive. So, once it was in
4011 4012 place, it was natural to piggy-back all the other conditions on it
4012 4013 too and not constantly check them throughout the code.
4013 4014
4014 4015 *****************************************************************************/
4015 4016
4016 4017 void CMTask::do_marking_step(double time_target_ms,
4017 4018 bool do_stealing,
4018 4019 bool do_termination) {
4019 4020 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4020 4021 assert(concurrent() == _cm->concurrent(), "they should be the same");
4021 4022
4022 4023 assert(concurrent() || _cm->region_stack_empty(),
4023 4024 "the region stack should have been cleared before remark");
4024 4025 assert(concurrent() || !_cm->has_aborted_regions(),
4025 4026 "aborted regions should have been cleared before remark");
4026 4027 assert(_region_finger == NULL,
4027 4028 "this should be non-null only when a region is being scanned");
4028 4029
4029 4030 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4030 4031 assert(_task_queues != NULL, "invariant");
4031 4032 assert(_task_queue != NULL, "invariant");
4032 4033 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4033 4034
4034 4035 assert(!_claimed,
4035 4036 "only one thread should claim this task at any one time");
4036 4037
4037 4038 // OK, this doesn't safeguard again all possible scenarios, as it is
4038 4039 // possible for two threads to set the _claimed flag at the same
4039 4040 // time. But it is only for debugging purposes anyway and it will
4040 4041 // catch most problems.
4041 4042 _claimed = true;
4042 4043
4043 4044 _start_time_ms = os::elapsedVTime() * 1000.0;
4044 4045 statsOnly( _interval_start_time_ms = _start_time_ms );
4045 4046
4046 4047 double diff_prediction_ms =
4047 4048 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4048 4049 _time_target_ms = time_target_ms - diff_prediction_ms;
4049 4050
4050 4051 // set up the variables that are used in the work-based scheme to
4051 4052 // call the regular clock method
4052 4053 _words_scanned = 0;
4053 4054 _refs_reached = 0;
4054 4055 recalculate_limits();
4055 4056
4056 4057 // clear all flags
4057 4058 clear_has_aborted();
4058 4059 _has_timed_out = false;
4059 4060 _draining_satb_buffers = false;
4060 4061
4061 4062 ++_calls;
4062 4063
4063 4064 if (_cm->verbose_low())
4064 4065 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4065 4066 "target = %1.2lfms >>>>>>>>>>",
4066 4067 _task_id, _calls, _time_target_ms);
4067 4068
4068 4069 // Set up the bitmap and oop closures. Anything that uses them is
4069 4070 // eventually called from this method, so it is OK to allocate these
4070 4071 // statically.
4071 4072 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4072 4073 CMOopClosure oop_closure(_g1h, _cm, this);
4073 4074 set_oop_closure(&oop_closure);
4074 4075
4075 4076 if (_cm->has_overflown()) {
4076 4077 // This can happen if the region stack or the mark stack overflows
4077 4078 // during a GC pause and this task, after a yield point,
4078 4079 // restarts. We have to abort as we need to get into the overflow
4079 4080 // protocol which happens right at the end of this task.
4080 4081 set_has_aborted();
4081 4082 }
4082 4083
4083 4084 // First drain any available SATB buffers. After this, we will not
4084 4085 // look at SATB buffers before the next invocation of this method.
4085 4086 // If enough completed SATB buffers are queued up, the regular clock
4086 4087 // will abort this task so that it restarts.
4087 4088 drain_satb_buffers();
4088 4089 // ...then partially drain the local queue and the global stack
4089 4090 drain_local_queue(true);
4090 4091 drain_global_stack(true);
4091 4092
4092 4093 // Then totally drain the region stack. We will not look at
4093 4094 // it again before the next invocation of this method. Entries on
4094 4095 // the region stack are only added during evacuation pauses, for
4095 4096 // which we have to yield. When we do, we abort the task anyway so
4096 4097 // it will look at the region stack again when it restarts.
4097 4098 bitmap_closure.set_scanning_heap_region(false);
4098 4099 drain_region_stack(&bitmap_closure);
4099 4100 // ...then partially drain the local queue and the global stack
4100 4101 drain_local_queue(true);
4101 4102 drain_global_stack(true);
4102 4103
4103 4104 do {
4104 4105 if (!has_aborted() && _curr_region != NULL) {
4105 4106 // This means that we're already holding on to a region.
4106 4107 assert(_finger != NULL, "if region is not NULL, then the finger "
4107 4108 "should not be NULL either");
4108 4109
4109 4110 // We might have restarted this task after an evacuation pause
4110 4111 // which might have evacuated the region we're holding on to
4111 4112 // underneath our feet. Let's read its limit again to make sure
4112 4113 // that we do not iterate over a region of the heap that
4113 4114 // contains garbage (update_region_limit() will also move
4114 4115 // _finger to the start of the region if it is found empty).
4115 4116 update_region_limit();
4116 4117 // We will start from _finger not from the start of the region,
4117 4118 // as we might be restarting this task after aborting half-way
4118 4119 // through scanning this region. In this case, _finger points to
4119 4120 // the address where we last found a marked object. If this is a
4120 4121 // fresh region, _finger points to start().
4121 4122 MemRegion mr = MemRegion(_finger, _region_limit);
4122 4123
4123 4124 if (_cm->verbose_low())
4124 4125 gclog_or_tty->print_cr("[%d] we're scanning part "
4125 4126 "["PTR_FORMAT", "PTR_FORMAT") "
4126 4127 "of region "PTR_FORMAT,
4127 4128 _task_id, _finger, _region_limit, _curr_region);
4128 4129
4129 4130 // Let's iterate over the bitmap of the part of the
4130 4131 // region that is left.
4131 4132 bitmap_closure.set_scanning_heap_region(true);
4132 4133 if (mr.is_empty() ||
4133 4134 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4134 4135 // We successfully completed iterating over the region. Now,
4135 4136 // let's give up the region.
4136 4137 giveup_current_region();
4137 4138 regular_clock_call();
4138 4139 } else {
4139 4140 assert(has_aborted(), "currently the only way to do so");
4140 4141 // The only way to abort the bitmap iteration is to return
4141 4142 // false from the do_bit() method. However, inside the
4142 4143 // do_bit() method we move the _finger to point to the
4143 4144 // object currently being looked at. So, if we bail out, we
4144 4145 // have definitely set _finger to something non-null.
4145 4146 assert(_finger != NULL, "invariant");
4146 4147
4147 4148 // Region iteration was actually aborted. So now _finger
4148 4149 // points to the address of the object we last scanned. If we
4149 4150 // leave it there, when we restart this task, we will rescan
4150 4151 // the object. It is easy to avoid this. We move the finger by
4151 4152 // enough to point to the next possible object header (the
4152 4153 // bitmap knows by how much we need to move it as it knows its
4153 4154 // granularity).
4154 4155 assert(_finger < _region_limit, "invariant");
4155 4156 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4156 4157 // Check if bitmap iteration was aborted while scanning the last object
4157 4158 if (new_finger >= _region_limit) {
4158 4159 giveup_current_region();
4159 4160 } else {
4160 4161 move_finger_to(new_finger);
4161 4162 }
4162 4163 }
4163 4164 }
4164 4165 // At this point we have either completed iterating over the
4165 4166 // region we were holding on to, or we have aborted.
4166 4167
4167 4168 // We then partially drain the local queue and the global stack.
4168 4169 // (Do we really need this?)
4169 4170 drain_local_queue(true);
4170 4171 drain_global_stack(true);
4171 4172
4172 4173 // Read the note on the claim_region() method on why it might
4173 4174 // return NULL with potentially more regions available for
4174 4175 // claiming and why we have to check out_of_regions() to determine
4175 4176 // whether we're done or not.
4176 4177 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4177 4178 // We are going to try to claim a new region. We should have
4178 4179 // given up on the previous one.
4179 4180 // Separated the asserts so that we know which one fires.
4180 4181 assert(_curr_region == NULL, "invariant");
4181 4182 assert(_finger == NULL, "invariant");
4182 4183 assert(_region_limit == NULL, "invariant");
4183 4184 if (_cm->verbose_low())
4184 4185 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4185 4186 HeapRegion* claimed_region = _cm->claim_region(_task_id);
4186 4187 if (claimed_region != NULL) {
4187 4188 // Yes, we managed to claim one
4188 4189 statsOnly( ++_regions_claimed );
4189 4190
4190 4191 if (_cm->verbose_low())
4191 4192 gclog_or_tty->print_cr("[%d] we successfully claimed "
4192 4193 "region "PTR_FORMAT,
4193 4194 _task_id, claimed_region);
4194 4195
4195 4196 setup_for_region(claimed_region);
4196 4197 assert(_curr_region == claimed_region, "invariant");
4197 4198 }
4198 4199 // It is important to call the regular clock here. It might take
4199 4200 // a while to claim a region if, for example, we hit a large
4200 4201 // block of empty regions. So we need to call the regular clock
4201 4202 // method once round the loop to make sure it's called
4202 4203 // frequently enough.
4203 4204 regular_clock_call();
4204 4205 }
4205 4206
4206 4207 if (!has_aborted() && _curr_region == NULL) {
4207 4208 assert(_cm->out_of_regions(),
4208 4209 "at this point we should be out of regions");
4209 4210 }
4210 4211 } while ( _curr_region != NULL && !has_aborted());
4211 4212
4212 4213 if (!has_aborted()) {
4213 4214 // We cannot check whether the global stack is empty, since other
4214 4215 // tasks might be pushing objects to it concurrently. We also cannot
4215 4216 // check if the region stack is empty because if a thread is aborting
4216 4217 // it can push a partially done region back.
4217 4218 assert(_cm->out_of_regions(),
4218 4219 "at this point we should be out of regions");
4219 4220
4220 4221 if (_cm->verbose_low())
4221 4222 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4222 4223
4223 4224 // Try to reduce the number of available SATB buffers so that
4224 4225 // remark has less work to do.
4225 4226 drain_satb_buffers();
4226 4227 }
4227 4228
4228 4229 // Since we've done everything else, we can now totally drain the
4229 4230 // local queue and global stack.
4230 4231 drain_local_queue(false);
4231 4232 drain_global_stack(false);
4232 4233
4233 4234 // Attempt at work stealing from other task's queues.
4234 4235 if (do_stealing && !has_aborted()) {
4235 4236 // We have not aborted. This means that we have finished all that
4236 4237 // we could. Let's try to do some stealing...
4237 4238
4238 4239 // We cannot check whether the global stack is empty, since other
4239 4240 // tasks might be pushing objects to it concurrently. We also cannot
4240 4241 // check if the region stack is empty because if a thread is aborting
4241 4242 // it can push a partially done region back.
4242 4243 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4243 4244 "only way to reach here");
4244 4245
4245 4246 if (_cm->verbose_low())
4246 4247 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4247 4248
4248 4249 while (!has_aborted()) {
4249 4250 oop obj;
4250 4251 statsOnly( ++_steal_attempts );
4251 4252
4252 4253 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4253 4254 if (_cm->verbose_medium())
4254 4255 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4255 4256 _task_id, (void*) obj);
4256 4257
4257 4258 statsOnly( ++_steals );
4258 4259
4259 4260 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4260 4261 "any stolen object should be marked");
4261 4262 scan_object(obj);
4262 4263
4263 4264 // And since we're towards the end, let's totally drain the
4264 4265 // local queue and global stack.
4265 4266 drain_local_queue(false);
4266 4267 drain_global_stack(false);
4267 4268 } else {
4268 4269 break;
4269 4270 }
4270 4271 }
4271 4272 }
4272 4273
4273 4274 // We still haven't aborted. Now, let's try to get into the
4274 4275 // termination protocol.
4275 4276 if (do_termination && !has_aborted()) {
4276 4277 // We cannot check whether the global stack is empty, since other
4277 4278 // tasks might be concurrently pushing objects on it. We also cannot
4278 4279 // check if the region stack is empty because if a thread is aborting
4279 4280 // it can push a partially done region back.
4280 4281 // Separated the asserts so that we know which one fires.
4281 4282 assert(_cm->out_of_regions(), "only way to reach here");
4282 4283 assert(_task_queue->size() == 0, "only way to reach here");
4283 4284
4284 4285 if (_cm->verbose_low())
4285 4286 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4286 4287
4287 4288 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4288 4289 // The CMTask class also extends the TerminatorTerminator class,
4289 4290 // hence its should_exit_termination() method will also decide
4290 4291 // whether to exit the termination protocol or not.
4291 4292 bool finished = _cm->terminator()->offer_termination(this);
4292 4293 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4293 4294 _termination_time_ms +=
4294 4295 termination_end_time_ms - _termination_start_time_ms;
4295 4296
4296 4297 if (finished) {
4297 4298 // We're all done.
4298 4299
4299 4300 if (_task_id == 0) {
4300 4301 // let's allow task 0 to do this
4301 4302 if (concurrent()) {
4302 4303 assert(_cm->concurrent_marking_in_progress(), "invariant");
4303 4304 // we need to set this to false before the next
4304 4305 // safepoint. This way we ensure that the marking phase
4305 4306 // doesn't observe any more heap expansions.
4306 4307 _cm->clear_concurrent_marking_in_progress();
4307 4308 }
4308 4309 }
4309 4310
4310 4311 // We can now guarantee that the global stack is empty, since
4311 4312 // all other tasks have finished. We separated the guarantees so
4312 4313 // that, if a condition is false, we can immediately find out
4313 4314 // which one.
4314 4315 guarantee(_cm->out_of_regions(), "only way to reach here");
4315 4316 guarantee(_aborted_region.is_empty(), "only way to reach here");
4316 4317 guarantee(_cm->region_stack_empty(), "only way to reach here");
4317 4318 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4318 4319 guarantee(_task_queue->size() == 0, "only way to reach here");
4319 4320 guarantee(!_cm->has_overflown(), "only way to reach here");
4320 4321 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4321 4322 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4322 4323
4323 4324 if (_cm->verbose_low())
4324 4325 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4325 4326 } else {
4326 4327 // Apparently there's more work to do. Let's abort this task. It
4327 4328 // will restart it and we can hopefully find more things to do.
4328 4329
4329 4330 if (_cm->verbose_low())
4330 4331 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4331 4332
4332 4333 set_has_aborted();
4333 4334 statsOnly( ++_aborted_termination );
4334 4335 }
4335 4336 }
4336 4337
4337 4338 // Mainly for debugging purposes to make sure that a pointer to the
4338 4339 // closure which was statically allocated in this frame doesn't
4339 4340 // escape it by accident.
4340 4341 set_oop_closure(NULL);
4341 4342 double end_time_ms = os::elapsedVTime() * 1000.0;
4342 4343 double elapsed_time_ms = end_time_ms - _start_time_ms;
4343 4344 // Update the step history.
4344 4345 _step_times_ms.add(elapsed_time_ms);
4345 4346
4346 4347 if (has_aborted()) {
4347 4348 // The task was aborted for some reason.
4348 4349
4349 4350 statsOnly( ++_aborted );
4350 4351
4351 4352 if (_has_timed_out) {
4352 4353 double diff_ms = elapsed_time_ms - _time_target_ms;
4353 4354 // Keep statistics of how well we did with respect to hitting
4354 4355 // our target only if we actually timed out (if we aborted for
4355 4356 // other reasons, then the results might get skewed).
4356 4357 _marking_step_diffs_ms.add(diff_ms);
4357 4358 }
4358 4359
4359 4360 if (_cm->has_overflown()) {
4360 4361 // This is the interesting one. We aborted because a global
4361 4362 // overflow was raised. This means we have to restart the
4362 4363 // marking phase and start iterating over regions. However, in
4363 4364 // order to do this we have to make sure that all tasks stop
4364 4365 // what they are doing and re-initialise in a safe manner. We
4365 4366 // will achieve this with the use of two barrier sync points.
4366 4367
4367 4368 if (_cm->verbose_low())
4368 4369 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4369 4370
4370 4371 _cm->enter_first_sync_barrier(_task_id);
4371 4372 // When we exit this sync barrier we know that all tasks have
4372 4373 // stopped doing marking work. So, it's now safe to
4373 4374 // re-initialise our data structures. At the end of this method,
4374 4375 // task 0 will clear the global data structures.
4375 4376
4376 4377 statsOnly( ++_aborted_overflow );
4377 4378
4378 4379 // We clear the local state of this task...
4379 4380 clear_region_fields();
4380 4381
4381 4382 // ...and enter the second barrier.
4382 4383 _cm->enter_second_sync_barrier(_task_id);
4383 4384 // At this point everything has bee re-initialised and we're
4384 4385 // ready to restart.
4385 4386 }
4386 4387
4387 4388 if (_cm->verbose_low()) {
4388 4389 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4389 4390 "elapsed = %1.2lfms <<<<<<<<<<",
4390 4391 _task_id, _time_target_ms, elapsed_time_ms);
4391 4392 if (_cm->has_aborted())
4392 4393 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4393 4394 _task_id);
4394 4395 }
4395 4396 } else {
4396 4397 if (_cm->verbose_low())
4397 4398 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4398 4399 "elapsed = %1.2lfms <<<<<<<<<<",
4399 4400 _task_id, _time_target_ms, elapsed_time_ms);
4400 4401 }
4401 4402
4402 4403 _claimed = false;
4403 4404 }
4404 4405
4405 4406 CMTask::CMTask(int task_id,
4406 4407 ConcurrentMark* cm,
4407 4408 CMTaskQueue* task_queue,
4408 4409 CMTaskQueueSet* task_queues)
4409 4410 : _g1h(G1CollectedHeap::heap()),
4410 4411 _task_id(task_id), _cm(cm),
4411 4412 _claimed(false),
4412 4413 _nextMarkBitMap(NULL), _hash_seed(17),
4413 4414 _task_queue(task_queue),
4414 4415 _task_queues(task_queues),
4415 4416 _oop_closure(NULL),
4416 4417 _aborted_region(MemRegion()) {
4417 4418 guarantee(task_queue != NULL, "invariant");
4418 4419 guarantee(task_queues != NULL, "invariant");
4419 4420
4420 4421 statsOnly( _clock_due_to_scanning = 0;
4421 4422 _clock_due_to_marking = 0 );
4422 4423
4423 4424 _marking_step_diffs_ms.add(0.5);
4424 4425 }
↓ open down ↓ |
2256 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX