46 #include "memory/space.hpp"
47 #include "oops/objArrayOop.hpp"
48 #include "oops/oop.inline.hpp"
49 #include "oops/oop.pcgc.inline.hpp"
50 #include "runtime/atomic.inline.hpp"
51 #include "runtime/handles.hpp"
52 #include "runtime/handles.inline.hpp"
53 #include "runtime/java.hpp"
54 #include "runtime/thread.inline.hpp"
55 #include "utilities/copy.hpp"
56 #include "utilities/globalDefinitions.hpp"
57 #include "utilities/workgroup.hpp"
58
59 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
60
61 #ifdef _MSC_VER
62 #pragma warning( push )
63 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
64 #endif
65 ParScanThreadState::ParScanThreadState(Space* to_space_,
66 ParNewGeneration* gen_,
67 Generation* old_gen_,
68 int thread_num_,
69 ObjToScanQueueSet* work_queue_set_,
70 Stack<oop, mtGC>* overflow_stacks_,
71 size_t desired_plab_sz_,
72 ParallelTaskTerminator& term_) :
73 _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_),
74 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false),
75 _overflow_stack(overflow_stacks_ ? overflow_stacks_ + thread_num_ : NULL),
76 _ageTable(false), // false ==> not the global age table, no perf data.
77 _to_space_alloc_buffer(desired_plab_sz_),
78 _to_space_closure(gen_, this), _old_gen_closure(gen_, this),
79 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this),
80 _older_gen_closure(gen_, this),
81 _evacuate_followers(this, &_to_space_closure, &_old_gen_closure,
82 &_to_space_root_closure, gen_, &_old_gen_root_closure,
83 work_queue_set_, &term_),
84 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
85 _keep_alive_closure(&_scan_weak_ref_closure),
86 _strong_roots_time(0.0), _term_time(0.0)
87 {
88 #if TASKQUEUE_STATS
89 _term_attempts = 0;
90 _overflow_refills = 0;
91 _overflow_refill_objs = 0;
92 #endif // TASKQUEUE_STATS
93
94 _survivor_chunk_array =
95 (ChunkArray*) old_gen()->get_data_recorder(thread_num());
96 _hash_seed = 17; // Might want to take time-based random value.
97 _start = os::elapsedTime();
98 _old_gen_closure.set_generation(old_gen_);
99 _old_gen_root_closure.set_generation(old_gen_);
100 }
101 #ifdef _MSC_VER
102 #pragma warning( pop )
103 #endif
104
105 void ParScanThreadState::record_survivor_plab(HeapWord* plab_start,
106 size_t plab_word_size) {
107 ChunkArray* sca = survivor_chunk_array();
108 if (sca != NULL) {
109 // A non-null SCA implies that we want the PLAB data recorded.
110 sca->record_sample(plab_start, plab_word_size);
111 }
112 }
113
114 bool ParScanThreadState::should_be_partially_scanned(oop new_obj, oop old_obj) const {
115 return new_obj->is_objArray() &&
138 bool ok = work_queue()->push(old);
139 assert(ok, "just popped, push must be okay");
140 } else {
141 // Restore length so that it can be used if there
142 // is a promotion failure and forwarding pointers
143 // must be removed.
144 arrayOop(old)->set_length(end);
145 }
146
147 // process our set of indices (include header in first chunk)
148 // should make sure end is even (aligned to HeapWord in case of compressed oops)
149 if ((HeapWord *)obj < young_old_boundary()) {
150 // object is in to_space
151 obj->oop_iterate_range(&_to_space_closure, start, end);
152 } else {
153 // object is in old generation
154 obj->oop_iterate_range(&_old_gen_closure, start, end);
155 }
156 }
157
158
159 void ParScanThreadState::trim_queues(int max_size) {
160 ObjToScanQueue* queue = work_queue();
161 do {
162 while (queue->size() > (juint)max_size) {
163 oop obj_to_scan;
164 if (queue->pop_local(obj_to_scan)) {
165 if ((HeapWord *)obj_to_scan < young_old_boundary()) {
166 if (obj_to_scan->is_objArray() &&
167 obj_to_scan->is_forwarded() &&
168 obj_to_scan->forwardee() != obj_to_scan) {
169 scan_partial_array_and_push_remainder(obj_to_scan);
170 } else {
171 // object is in to_space
172 obj_to_scan->oop_iterate(&_to_space_closure);
173 }
174 } else {
175 // object is in old generation
176 obj_to_scan->oop_iterate(&_old_gen_closure);
177 }
178 }
206 assert(!old_gen()->is_in_reserved(cur), "Should be in young gen");
207 assert(Universe::heap()->is_in_reserved(obj_to_push), "Should be in heap");
208 if (should_be_partially_scanned(obj_to_push, cur)) {
209 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
210 obj_to_push = cur;
211 }
212 bool ok = queue->push(obj_to_push);
213 assert(ok, "Should have succeeded");
214 }
215 assert(young_gen()->overflow_list() == NULL, "Error");
216 return num_take_elems > 0; // was something transferred?
217 }
218
219 void ParScanThreadState::push_on_overflow_stack(oop p) {
220 assert(ParGCUseLocalOverflow, "Else should not call");
221 overflow_stack()->push(p);
222 assert(young_gen()->overflow_list() == NULL, "Error");
223 }
224
225 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
226
227 // Otherwise, if the object is small enough, try to reallocate the
228 // buffer.
229 HeapWord* obj = NULL;
230 if (!_to_space_full) {
231 ParGCAllocBuffer* const plab = to_space_alloc_buffer();
232 Space* const sp = to_space();
233 if (word_sz * 100 <
234 ParallelGCBufferWastePct * plab->word_sz()) {
235 // Is small enough; abandon this buffer and start a new one.
236 plab->retire(false, false);
237 size_t buf_size = plab->word_sz();
238 HeapWord* buf_space = sp->par_allocate(buf_size);
239 if (buf_space == NULL) {
240 const size_t min_bytes =
241 ParGCAllocBuffer::min_size() << LogHeapWordSize;
242 size_t free_bytes = sp->free();
243 while(buf_space == NULL && free_bytes >= min_bytes) {
244 buf_size = free_bytes >> LogHeapWordSize;
245 assert(buf_size == (size_t)align_object_size(buf_size),
246 "Invariant");
247 buf_space = sp->par_allocate(buf_size);
248 free_bytes = sp->free();
249 }
250 }
251 if (buf_space != NULL) {
252 plab->set_word_size(buf_size);
253 plab->set_buf(buf_space);
254 record_survivor_plab(buf_space, buf_size);
256 // Note that we cannot compare buf_size < word_sz below
257 // because of AlignmentReserve (see ParGCAllocBuffer::allocate()).
258 assert(obj != NULL || plab->words_remaining() < word_sz,
259 "Else should have been able to allocate");
260 // It's conceivable that we may be able to use the
261 // buffer we just grabbed for subsequent small requests
262 // even if not for this one.
263 } else {
264 // We're used up.
265 _to_space_full = true;
266 }
267
268 } else {
269 // Too large; allocate the object individually.
270 obj = sp->par_allocate(word_sz);
271 }
272 }
273 return obj;
274 }
275
276
277 void ParScanThreadState::undo_alloc_in_to_space(HeapWord* obj,
278 size_t word_sz) {
279 // Is the alloc in the current alloc buffer?
280 if (to_space_alloc_buffer()->contains(obj)) {
281 assert(to_space_alloc_buffer()->contains(obj + word_sz - 1),
282 "Should contain whole object.");
283 to_space_alloc_buffer()->undo_allocation(obj, word_sz);
284 } else {
285 CollectedHeap::fill_with_object(obj, word_sz);
286 }
287 }
288
289 void ParScanThreadState::print_promotion_failure_size() {
290 if (_promotion_failed_info.has_failed() && PrintPromotionFailure) {
291 gclog_or_tty->print(" (%d: promotion failure size = " SIZE_FORMAT ") ",
292 _thread_num, _promotion_failed_info.first_size());
293 }
294 }
295
296 class ParScanThreadStateSet: private ResourceArray {
297 public:
298 // Initializes states for the specified number of threads;
299 ParScanThreadStateSet(int num_threads,
300 Space& to_space,
301 ParNewGeneration& gen,
302 Generation& old_gen,
303 ObjToScanQueueSet& queue_set,
304 Stack<oop, mtGC>* overflow_stacks_,
305 size_t desired_plab_sz,
306 ParallelTaskTerminator& term);
307
308 ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); }
309
310 inline ParScanThreadState& thread_state(int i);
311
312 void trace_promotion_failed(YoungGCTracer& gc_tracer);
313 void reset(int active_workers, bool promotion_failed);
314 void flush();
315
316 #if TASKQUEUE_STATS
317 static void
318 print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
319 void print_termination_stats(outputStream* const st = gclog_or_tty);
320 static void
321 print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
322 void print_taskqueue_stats(outputStream* const st = gclog_or_tty);
323 void reset_stats();
324 #endif // TASKQUEUE_STATS
325
326 private:
327 ParallelTaskTerminator& _term;
328 ParNewGeneration& _gen;
329 Generation& _next_gen;
330 public:
331 bool is_valid(int id) const { return id < length(); }
332 ParallelTaskTerminator* terminator() { return &_term; }
333 };
334
335
336 ParScanThreadStateSet::ParScanThreadStateSet(
337 int num_threads, Space& to_space, ParNewGeneration& gen,
338 Generation& old_gen, ObjToScanQueueSet& queue_set,
339 Stack<oop, mtGC>* overflow_stacks,
340 size_t desired_plab_sz, ParallelTaskTerminator& term)
341 : ResourceArray(sizeof(ParScanThreadState), num_threads),
342 _gen(gen), _next_gen(old_gen), _term(term)
343 {
344 assert(num_threads > 0, "sanity check!");
345 assert(ParGCUseLocalOverflow == (overflow_stacks != NULL),
346 "overflow_stack allocation mismatch");
347 // Initialize states.
348 for (int i = 0; i < num_threads; ++i) {
349 new ((ParScanThreadState*)_data + i)
350 ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set,
351 overflow_stacks, desired_plab_sz, term);
352 }
353 }
354
355 inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i)
356 {
357 assert(i >= 0 && i < length(), "sanity check!");
358 return ((ParScanThreadState*)_data)[i];
359 }
360
361 void ParScanThreadStateSet::trace_promotion_failed(YoungGCTracer& gc_tracer) {
362 for (int i = 0; i < length(); ++i) {
363 if (thread_state(i).promotion_failed()) {
364 gc_tracer.report_promotion_failed(thread_state(i).promotion_failed_info());
365 thread_state(i).promotion_failed_info().reset();
366 }
367 }
368 }
369
370 void ParScanThreadStateSet::reset(int active_threads, bool promotion_failed)
371 {
372 _term.reset_for_reuse(active_threads);
373 if (promotion_failed) {
374 for (int i = 0; i < length(); ++i) {
375 thread_state(i).print_promotion_failure_size();
376 }
377 }
378 }
379
380 #if TASKQUEUE_STATS
381 void
382 ParScanThreadState::reset_stats()
383 {
384 taskqueue_stats().reset();
385 _term_attempts = 0;
386 _overflow_refills = 0;
387 _overflow_refill_objs = 0;
388 }
389
390 void ParScanThreadStateSet::reset_stats()
391 {
392 for (int i = 0; i < length(); ++i) {
393 thread_state(i).reset_stats();
394 }
395 }
396
397 void
398 ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st)
399 {
400 st->print_raw_cr("GC Termination Stats");
401 st->print_raw_cr(" elapsed --strong roots-- "
402 "-------termination-------");
403 st->print_raw_cr("thr ms ms % "
404 " ms % attempts");
405 st->print_raw_cr("--- --------- --------- ------ "
406 "--------- ------ --------");
407 }
408
409 void ParScanThreadStateSet::print_termination_stats(outputStream* const st)
410 {
411 print_termination_stats_hdr(st);
412
413 for (int i = 0; i < length(); ++i) {
414 const ParScanThreadState & pss = thread_state(i);
415 const double elapsed_ms = pss.elapsed_time() * 1000.0;
416 const double s_roots_ms = pss.strong_roots_time() * 1000.0;
417 const double term_ms = pss.term_time() * 1000.0;
418 st->print_cr("%3d %9.2f %9.2f %6.2f "
419 "%9.2f %6.2f " SIZE_FORMAT_W(8),
420 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
421 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts());
422 }
423 }
424
425 // Print stats related to work queue activity.
426 void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st)
427 {
428 st->print_raw_cr("GC Task Stats");
429 st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
430 st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
431 }
432
433 void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st)
434 {
435 print_taskqueue_stats_hdr(st);
436
437 TaskQueueStats totals;
438 for (int i = 0; i < length(); ++i) {
439 const ParScanThreadState & pss = thread_state(i);
440 const TaskQueueStats & stats = pss.taskqueue_stats();
441 st->print("%3d ", i); stats.print(st); st->cr();
442 totals += stats;
443
444 if (pss.overflow_refills() > 0) {
445 st->print_cr(" " SIZE_FORMAT_W(10) " overflow refills "
446 SIZE_FORMAT_W(10) " overflow objects",
447 pss.overflow_refills(), pss.overflow_refill_objs());
448 }
449 }
450 st->print("tot "); totals.print(st); st->cr();
451
452 DEBUG_ONLY(totals.verify());
453 }
454 #endif // TASKQUEUE_STATS
455
456 void ParScanThreadStateSet::flush()
457 {
458 // Work in this loop should be kept as lightweight as
459 // possible since this might otherwise become a bottleneck
460 // to scaling. Should we add heavy-weight work into this
461 // loop, consider parallelizing the loop into the worker threads.
462 for (int i = 0; i < length(); ++i) {
463 ParScanThreadState& par_scan_state = thread_state(i);
464
465 // Flush stats related to To-space PLAB activity and
466 // retire the last buffer.
467 par_scan_state.to_space_alloc_buffer()->
468 flush_stats_and_retire(_gen.plab_stats(),
469 true /* end_of_gc */,
470 false /* retain */);
471
472 // Every thread has its own age table. We need to merge
473 // them all into one.
474 ageTable *local_table = par_scan_state.age_table();
475 _gen.age_table()->merge(local_table);
476
477 // Inform old gen that we're done.
478 _next_gen.par_promote_alloc_done(i);
479 _next_gen.par_oop_since_save_marks_iterate_done(i);
480 }
481
482 if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
483 // We need to call this even when ResizeOldPLAB is disabled
484 // so as to avoid breaking some asserts. While we may be able
485 // to avoid this by reorganizing the code a bit, I am loathe
486 // to do that unless we find cases where ergo leads to bad
487 // performance.
488 CFLS_LAB::compute_desired_plab_size();
489 }
490 }
491
492 ParScanClosure::ParScanClosure(ParNewGeneration* g,
493 ParScanThreadState* par_scan_state) :
494 OopsInKlassOrGenClosure(g), _par_scan_state(par_scan_state), _g(g)
495 {
496 assert(_g->level() == 0, "Optimized for youngest generation");
497 _boundary = _g->reserved().end();
498 }
499
500 void ParScanWithBarrierClosure::do_oop(oop* p) { ParScanClosure::do_oop_work(p, true, false); }
501 void ParScanWithBarrierClosure::do_oop(narrowOop* p) { ParScanClosure::do_oop_work(p, true, false); }
502
503 void ParScanWithoutBarrierClosure::do_oop(oop* p) { ParScanClosure::do_oop_work(p, false, false); }
504 void ParScanWithoutBarrierClosure::do_oop(narrowOop* p) { ParScanClosure::do_oop_work(p, false, false); }
505
506 void ParRootScanWithBarrierTwoGensClosure::do_oop(oop* p) { ParScanClosure::do_oop_work(p, true, true); }
507 void ParRootScanWithBarrierTwoGensClosure::do_oop(narrowOop* p) { ParScanClosure::do_oop_work(p, true, true); }
508
509 void ParRootScanWithoutBarrierClosure::do_oop(oop* p) { ParScanClosure::do_oop_work(p, false, true); }
510 void ParRootScanWithoutBarrierClosure::do_oop(narrowOop* p) { ParScanClosure::do_oop_work(p, false, true); }
511
512 ParScanWeakRefClosure::ParScanWeakRefClosure(ParNewGeneration* g,
513 ParScanThreadState* par_scan_state)
514 : ScanWeakRefClosure(g), _par_scan_state(par_scan_state)
515 {}
516
517 void ParScanWeakRefClosure::do_oop(oop* p) { ParScanWeakRefClosure::do_oop_work(p); }
518 void ParScanWeakRefClosure::do_oop(narrowOop* p) { ParScanWeakRefClosure::do_oop_work(p); }
519
520 #ifdef WIN32
521 #pragma warning(disable: 4786) /* identifier was truncated to '255' characters in the browser information */
522 #endif
523
524 ParEvacuateFollowersClosure::ParEvacuateFollowersClosure(
525 ParScanThreadState* par_scan_state_,
526 ParScanWithoutBarrierClosure* to_space_closure_,
527 ParScanWithBarrierClosure* old_gen_closure_,
528 ParRootScanWithoutBarrierClosure* to_space_root_closure_,
529 ParNewGeneration* par_gen_,
530 ParRootScanWithBarrierTwoGensClosure* old_gen_root_closure_,
531 ObjToScanQueueSet* task_queues_,
532 ParallelTaskTerminator* terminator_) :
533
534 _par_scan_state(par_scan_state_),
535 _to_space_closure(to_space_closure_),
536 _old_gen_closure(old_gen_closure_),
537 _to_space_root_closure(to_space_root_closure_),
538 _old_gen_root_closure(old_gen_root_closure_),
539 _par_gen(par_gen_),
540 _task_queues(task_queues_),
541 _terminator(terminator_)
542 {}
543
544 void ParEvacuateFollowersClosure::do_void() {
545 ObjToScanQueue* work_q = par_scan_state()->work_queue();
546
547 while (true) {
548
549 // Scan to-space and old-gen objs until we run out of both.
550 oop obj_to_scan;
551 par_scan_state()->trim_queues(0);
552
553 // We have no local work, attempt to steal from other threads.
554
555 // attempt to steal work from promoted.
556 if (task_queues()->steal(par_scan_state()->thread_num(),
557 par_scan_state()->hash_seed(),
558 obj_to_scan)) {
559 bool res = work_q->push(obj_to_scan);
560 assert(res, "Empty queue should have room for a push.");
561
562 // if successful, goto Start.
563 continue;
564
565 // try global overflow list.
566 } else if (par_gen()->take_from_overflow_list(par_scan_state())) {
567 continue;
568 }
569
570 // Otherwise, offer termination.
571 par_scan_state()->start_term_time();
572 if (terminator()->offer_termination()) break;
573 par_scan_state()->end_term_time();
574 }
575 assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0,
576 "Broken overflow list?");
577 // Finish the last termination pause.
578 par_scan_state()->end_term_time();
579 }
580
581 ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* next_gen,
582 HeapWord* young_old_boundary, ParScanThreadStateSet* state_set) :
583 AbstractGangTask("ParNewGeneration collection"),
584 _gen(gen), _next_gen(next_gen),
585 _young_old_boundary(young_old_boundary),
586 _state_set(state_set)
587 {}
588
589 // Reset the terminator for the given number of
590 // active threads.
591 void ParNewGenTask::set_for_termination(int active_workers) {
592 _state_set->reset(active_workers, _gen->promotion_failed());
593 // Should the heap be passed in? There's only 1 for now so
594 // grab it instead.
595 GenCollectedHeap* gch = GenCollectedHeap::heap();
596 gch->set_n_termination(active_workers);
597 }
598
599 void ParNewGenTask::work(uint worker_id) {
600 GenCollectedHeap* gch = GenCollectedHeap::heap();
601 // Since this is being done in a separate thread, need new resource
602 // and handle marks.
603 ResourceMark rm;
604 HandleMark hm;
605 // We would need multiple old-gen queues otherwise.
606 assert(gch->n_gens() == 2, "Par young collection currently only works with one older gen.");
607
608 Generation* old_gen = gch->next_gen(_gen);
609
610 ParScanThreadState& par_scan_state = _state_set->thread_state(worker_id);
611 assert(_state_set->is_valid(worker_id), "Should not have been called");
612
613 par_scan_state.set_young_old_boundary(_young_old_boundary);
614
615 KlassScanClosure klass_scan_closure(&par_scan_state.to_space_root_closure(),
616 gch->rem_set()->klass_rem_set());
617 CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
618 &par_scan_state.to_space_root_closure(),
619 false);
620
621 par_scan_state.start_strong_roots();
622 gch->gen_process_roots(_gen->level(),
623 true, // Process younger gens, if any,
624 // as strong roots.
625 false, // no scope; this is parallel code
626 SharedHeap::SO_ScavengeCodeCache,
627 GenCollectedHeap::StrongAndWeakRoots,
628 &par_scan_state.to_space_root_closure(),
629 &par_scan_state.older_gen_closure(),
630 &cld_scan_closure);
631
632 par_scan_state.end_strong_roots();
633
634 // "evacuate followers".
635 par_scan_state.evacuate_followers_closure().do_void();
636 }
637
638 #ifdef _MSC_VER
639 #pragma warning( push )
640 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
641 #endif
642 ParNewGeneration::
643 ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level)
644 : DefNewGeneration(rs, initial_byte_size, level, "PCopy"),
645 _overflow_list(NULL),
646 _is_alive_closure(this),
647 _plab_stats(YoungPLABSize, PLABWeight)
648 {
649 NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;)
650 NOT_PRODUCT(_num_par_pushes = 0;)
651 _task_queues = new ObjToScanQueueSet(ParallelGCThreads);
652 guarantee(_task_queues != NULL, "task_queues allocation failure.");
653
654 for (uint i1 = 0; i1 < ParallelGCThreads; i1++) {
655 ObjToScanQueue *q = new ObjToScanQueue();
656 guarantee(q != NULL, "work_queue Allocation failure.");
657 _task_queues->register_queue(i1, q);
658 }
659
660 for (uint i2 = 0; i2 < ParallelGCThreads; i2++)
661 _task_queues->queue(i2)->initialize();
662
663 _overflow_stacks = NULL;
664 if (ParGCUseLocalOverflow) {
665
666 // typedef to workaround NEW_C_HEAP_ARRAY macro, which can not deal
667 // with ','
668 typedef Stack<oop, mtGC> GCOopStack;
669
670 _overflow_stacks = NEW_C_HEAP_ARRAY(GCOopStack, ParallelGCThreads, mtGC);
671 for (size_t i = 0; i < ParallelGCThreads; ++i) {
672 new (_overflow_stacks + i) Stack<oop, mtGC>();
673 }
674 }
675
676 if (UsePerfData) {
677 EXCEPTION_MARK;
678 ResourceMark rm;
679
680 const char* cname =
681 PerfDataManager::counter_name(_gen_counters->name_space(), "threads");
682 PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_None,
683 ParallelGCThreads, CHECK);
684 }
685 }
686 #ifdef _MSC_VER
687 #pragma warning( pop )
688 #endif
689
690 // ParNewGeneration::
691 ParKeepAliveClosure::ParKeepAliveClosure(ParScanWeakRefClosure* cl) :
692 DefNewGeneration::KeepAliveClosure(cl), _par_cl(cl) {}
693
694 template <class T>
695 void /*ParNewGeneration::*/ParKeepAliveClosure::do_oop_work(T* p) {
696 #ifdef ASSERT
697 {
698 assert(!oopDesc::is_null(*p), "expected non-null ref");
699 oop obj = oopDesc::load_decode_heap_oop_not_null(p);
700 // We never expect to see a null reference being processed
701 // as a weak reference.
702 assert(obj->is_oop(), "expected an oop while scanning weak refs");
703 }
704 #endif // ASSERT
705
706 _par_cl->do_oop_nv(p);
707
708 if (Universe::heap()->is_in_reserved(p)) {
709 oop obj = oopDesc::load_decode_heap_oop_not_null(p);
710 _rs->write_ref_field_gc_par(p, obj);
711 }
712 }
713
714 void /*ParNewGeneration::*/ParKeepAliveClosure::do_oop(oop* p) { ParKeepAliveClosure::do_oop_work(p); }
715 void /*ParNewGeneration::*/ParKeepAliveClosure::do_oop(narrowOop* p) { ParKeepAliveClosure::do_oop_work(p); }
716
717 // ParNewGeneration::
718 KeepAliveClosure::KeepAliveClosure(ScanWeakRefClosure* cl) :
719 DefNewGeneration::KeepAliveClosure(cl) {}
720
721 template <class T>
722 void /*ParNewGeneration::*/KeepAliveClosure::do_oop_work(T* p) {
723 #ifdef ASSERT
724 {
725 assert(!oopDesc::is_null(*p), "expected non-null ref");
726 oop obj = oopDesc::load_decode_heap_oop_not_null(p);
727 // We never expect to see a null reference being processed
728 // as a weak reference.
729 assert(obj->is_oop(), "expected an oop while scanning weak refs");
730 }
731 #endif // ASSERT
732
733 _cl->do_oop_nv(p);
734
735 if (Universe::heap()->is_in_reserved(p)) {
736 oop obj = oopDesc::load_decode_heap_oop_not_null(p);
737 _rs->write_ref_field_gc_par(p, obj);
738 }
739 }
751 ? obj->forwardee()
752 : _g->DefNewGeneration::copy_to_survivor_space(obj);
753 oopDesc::encode_store_heap_oop_not_null(p, new_obj);
754 }
755 if (_gc_barrier) {
756 // If p points to a younger generation, mark the card.
757 if ((HeapWord*)obj < _gen_boundary) {
758 _rs->write_ref_field_gc_par(p, obj);
759 }
760 }
761 }
762 }
763
764 void ScanClosureWithParBarrier::do_oop(oop* p) { ScanClosureWithParBarrier::do_oop_work(p); }
765 void ScanClosureWithParBarrier::do_oop(narrowOop* p) { ScanClosureWithParBarrier::do_oop_work(p); }
766
767 class ParNewRefProcTaskProxy: public AbstractGangTask {
768 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
769 public:
770 ParNewRefProcTaskProxy(ProcessTask& task, ParNewGeneration& gen,
771 Generation& next_gen,
772 HeapWord* young_old_boundary,
773 ParScanThreadStateSet& state_set);
774
775 private:
776 virtual void work(uint worker_id);
777 virtual void set_for_termination(int active_workers) {
778 _state_set.terminator()->reset_for_reuse(active_workers);
779 }
780 private:
781 ParNewGeneration& _gen;
782 ProcessTask& _task;
783 Generation& _next_gen;
784 HeapWord* _young_old_boundary;
785 ParScanThreadStateSet& _state_set;
786 };
787
788 ParNewRefProcTaskProxy::ParNewRefProcTaskProxy(
789 ProcessTask& task, ParNewGeneration& gen,
790 Generation& next_gen,
791 HeapWord* young_old_boundary,
792 ParScanThreadStateSet& state_set)
793 : AbstractGangTask("ParNewGeneration parallel reference processing"),
794 _gen(gen),
795 _task(task),
796 _next_gen(next_gen),
797 _young_old_boundary(young_old_boundary),
798 _state_set(state_set)
799 {
800 }
801
802 void ParNewRefProcTaskProxy::work(uint worker_id)
803 {
804 ResourceMark rm;
805 HandleMark hm;
806 ParScanThreadState& par_scan_state = _state_set.thread_state(worker_id);
807 par_scan_state.set_young_old_boundary(_young_old_boundary);
808 _task.work(worker_id, par_scan_state.is_alive_closure(),
809 par_scan_state.keep_alive_closure(),
810 par_scan_state.evacuate_followers_closure());
811 }
812
813 class ParNewRefEnqueueTaskProxy: public AbstractGangTask {
814 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
815 EnqueueTask& _task;
816
817 public:
818 ParNewRefEnqueueTaskProxy(EnqueueTask& task)
819 : AbstractGangTask("ParNewGeneration parallel reference enqueue"),
820 _task(task)
821 { }
822
823 virtual void work(uint worker_id)
824 {
825 _task.work(worker_id);
826 }
827 };
828
829
830 void ParNewRefProcTaskExecutor::execute(ProcessTask& task)
831 {
832 GenCollectedHeap* gch = GenCollectedHeap::heap();
833 assert(gch->kind() == CollectedHeap::GenCollectedHeap,
834 "not a generational heap");
835 FlexibleWorkGang* workers = gch->workers();
836 assert(workers != NULL, "Need parallel worker threads.");
837 _state_set.reset(workers->active_workers(), _generation.promotion_failed());
838 ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(),
839 _generation.reserved().end(), _state_set);
840 workers->run_task(&rp_task);
841 _state_set.reset(0 /* bad value in debug if not reset */,
842 _generation.promotion_failed());
843 }
844
845 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
846 {
847 GenCollectedHeap* gch = GenCollectedHeap::heap();
848 FlexibleWorkGang* workers = gch->workers();
849 assert(workers != NULL, "Need parallel worker threads.");
850 ParNewRefEnqueueTaskProxy enq_task(task);
851 workers->run_task(&enq_task);
852 }
853
854 void ParNewRefProcTaskExecutor::set_single_threaded_mode()
855 {
856 _state_set.flush();
857 GenCollectedHeap* gch = GenCollectedHeap::heap();
858 gch->set_par_threads(0); // 0 ==> non-parallel.
859 gch->save_marks();
860 }
861
862 ScanClosureWithParBarrier::
863 ScanClosureWithParBarrier(ParNewGeneration* g, bool gc_barrier) :
864 ScanClosure(g, gc_barrier) {}
865
866 EvacuateFollowersClosureGeneral::
867 EvacuateFollowersClosureGeneral(GenCollectedHeap* gch, int level,
868 OopsInGenClosure* cur,
869 OopsInGenClosure* older) :
870 _gch(gch), _level(level),
871 _scan_cur_or_nonheap(cur), _scan_older(older)
872 {}
873
874 void EvacuateFollowersClosureGeneral::do_void() {
875 do {
876 // Beware: this call will lead to closure applications via virtual
877 // calls.
878 _gch->oop_since_save_marks_iterate(_level,
879 _scan_cur_or_nonheap,
880 _scan_older);
881 } while (!_gch->no_allocs_since_save_marks(_level));
882 }
883
884
885 // A Generation that does parallel young-gen collection.
886
887 bool ParNewGeneration::_avoid_promotion_undo = false;
888
889 void ParNewGeneration::handle_promotion_failed(GenCollectedHeap* gch, ParScanThreadStateSet& thread_state_set, ParNewTracer& gc_tracer) {
890 assert(_promo_failure_scan_stack.is_empty(), "post condition");
891 _promo_failure_scan_stack.clear(true); // Clear cached segments.
892
893 remove_forwarding_pointers();
894 if (PrintGCDetails) {
895 gclog_or_tty->print(" (promotion failed)");
896 }
897 // All the spaces are in play for mark-sweep.
898 swap_spaces(); // Make life simpler for CMS || rescan; see 6483690.
899 from()->set_next_compaction_space(to());
900 gch->set_incremental_collection_failed();
901 // Inform the next generation that a promotion failure occurred.
902 _next_gen->promotion_failure_occurred();
903
904 // Trace promotion failure in the parallel GC threads
905 thread_state_set.trace_promotion_failed(gc_tracer);
906 // Single threaded code may have reported promotion failure to the global state
907 if (_promotion_failed_info.has_failed()) {
908 gc_tracer.report_promotion_failed(_promotion_failed_info);
909 }
910 // Reset the PromotionFailureALot counters.
911 NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
912 }
913
914 void ParNewGeneration::collect(bool full,
915 bool clear_all_soft_refs,
916 size_t size,
917 bool is_tlab) {
918 assert(full || size > 0, "otherwise we don't want to collect");
919
920 GenCollectedHeap* gch = GenCollectedHeap::heap();
921
922 _gc_timer->register_gc_start();
923
924 assert(gch->kind() == CollectedHeap::GenCollectedHeap,
925 "not a CMS generational heap");
926 AdaptiveSizePolicy* size_policy = gch->gen_policy()->size_policy();
927 FlexibleWorkGang* workers = gch->workers();
928 assert(workers != NULL, "Need workgang for parallel work");
929 int active_workers =
930 AdaptiveSizePolicy::calc_active_workers(workers->total_workers(),
931 workers->active_workers(),
932 Threads::number_of_non_daemon_threads());
933 workers->set_active_workers(active_workers);
934 assert(gch->n_gens() == 2,
935 "Par collection currently only works with single older gen.");
936 _next_gen = gch->next_gen(this);
937 // Do we have to avoid promotion_undo?
938 if (gch->collector_policy()->is_concurrent_mark_sweep_policy()) {
939 set_avoid_promotion_undo(true);
940 }
941
942 // If the next generation is too full to accommodate worst-case promotion
943 // from this generation, pass on collection; let the next generation
944 // do it.
945 if (!collection_attempt_is_safe()) {
946 gch->set_incremental_collection_failed(); // slight lie, in that we did not even attempt one
947 return;
948 }
949 assert(to()->is_empty(), "Else not collection_attempt_is_safe");
950
951 ParNewTracer gc_tracer;
952 gc_tracer.report_gc_start(gch->gc_cause(), _gc_timer->gc_start());
953 gch->trace_heap_before_gc(&gc_tracer);
954
955 init_assuming_no_promotion_failure();
956
962 GCTraceTime t1(GCCauseString("GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, NULL, gc_tracer.gc_id());
963 // Capture heap used before collection (for printing).
964 size_t gch_prev_used = gch->used();
965
966 SpecializationStats::clear();
967
968 age_table()->clear();
969 to()->clear(SpaceDecorator::Mangle);
970
971 gch->save_marks();
972 assert(workers != NULL, "Need parallel worker threads.");
973 int n_workers = active_workers;
974
975 // Set the correct parallelism (number of queues) in the reference processor
976 ref_processor()->set_active_mt_degree(n_workers);
977
978 // Always set the terminator for the active number of workers
979 // because only those workers go through the termination protocol.
980 ParallelTaskTerminator _term(n_workers, task_queues());
981 ParScanThreadStateSet thread_state_set(workers->active_workers(),
982 *to(), *this, *_next_gen, *task_queues(),
983 _overflow_stacks, desired_plab_sz(), _term);
984
985 ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set);
986 gch->set_par_threads(n_workers);
987 gch->rem_set()->prepare_for_younger_refs_iterate(true);
988 // It turns out that even when we're using 1 thread, doing the work in a
989 // separate thread causes wide variance in run times. We can't help this
990 // in the multi-threaded case, but we special-case n=1 here to get
991 // repeatable measurements of the 1-thread overhead of the parallel code.
992 if (n_workers > 1) {
993 GenCollectedHeap::StrongRootsScope srs(gch);
994 workers->run_task(&tsk);
995 } else {
996 GenCollectedHeap::StrongRootsScope srs(gch);
997 tsk.work(0);
998 }
999 thread_state_set.reset(0 /* Bad value in debug if not reset */,
1000 promotion_failed());
1001
1002 // Process (weak) reference objects found during scavenge.
1003 ReferenceProcessor* rp = ref_processor();
1004 IsAliveClosure is_alive(this);
1005 ScanWeakRefClosure scan_weak_ref(this);
1006 KeepAliveClosure keep_alive(&scan_weak_ref);
1007 ScanClosure scan_without_gc_barrier(this, false);
1008 ScanClosureWithParBarrier scan_with_gc_barrier(this, true);
1009 set_promo_failure_scan_stack_closure(&scan_without_gc_barrier);
1010 EvacuateFollowersClosureGeneral evacuate_followers(gch, _level,
1011 &scan_without_gc_barrier, &scan_with_gc_barrier);
1012 rp->setup_policy(clear_all_soft_refs);
1013 // Can the mt_degree be set later (at run_task() time would be best)?
1014 rp->set_active_mt_degree(active_workers);
1015 ReferenceProcessorStats stats;
1016 if (rp->processing_is_mt()) {
1017 ParNewRefProcTaskExecutor task_executor(*this, thread_state_set);
1018 stats = rp->process_discovered_references(&is_alive, &keep_alive,
1019 &evacuate_followers, &task_executor,
1020 _gc_timer, gc_tracer.gc_id());
1021 } else {
1022 thread_state_set.flush();
1023 gch->set_par_threads(0); // 0 ==> non-parallel.
1024 gch->save_marks();
1025 stats = rp->process_discovered_references(&is_alive, &keep_alive,
1026 &evacuate_followers, NULL,
1027 _gc_timer, gc_tracer.gc_id());
1028 }
1029 gc_tracer.report_gc_reference_stats(stats);
1030 if (!promotion_failed()) {
1179 // Try allocating obj in to-space (unless too old)
1180 if (dummyOld.age() < tenuring_threshold()) {
1181 new_obj = (oop)par_scan_state->alloc_in_to_space(sz);
1182 if (new_obj == NULL) {
1183 set_survivor_overflow(true);
1184 }
1185 }
1186
1187 if (new_obj == NULL) {
1188 // Either to-space is full or we decided to promote
1189 // try allocating obj tenured
1190
1191 // Attempt to install a null forwarding pointer (atomically),
1192 // to claim the right to install the real forwarding pointer.
1193 forward_ptr = old->forward_to_atomic(ClaimedForwardPtr);
1194 if (forward_ptr != NULL) {
1195 // someone else beat us to it.
1196 return real_forwardee(old);
1197 }
1198
1199 new_obj = _next_gen->par_promote(par_scan_state->thread_num(),
1200 old, m, sz);
1201
1202 if (new_obj == NULL) {
1203 // promotion failed, forward to self
1204 _promotion_failed = true;
1205 new_obj = old;
1206
1207 preserve_mark_if_necessary(old, m);
1208 par_scan_state->register_promotion_failure(sz);
1209 }
1210
1211 old->forward_to(new_obj);
1212 forward_ptr = NULL;
1213 } else {
1214 // Is in to-space; do copying ourselves.
1215 Copy::aligned_disjoint_words((HeapWord*)old, (HeapWord*)new_obj, sz);
1216 forward_ptr = old->forward_to_atomic(new_obj);
1217 // Restore the mark word copied above.
1218 new_obj->set_mark(m);
1219 // Increment age if obj still in new generation
1220 new_obj->incr_age();
1221 par_scan_state->age_table()->add(new_obj, sz);
1222 }
1223 assert(new_obj != NULL, "just checking");
1224
1225 #ifndef PRODUCT
1226 // This code must come after the CAS test, or it will print incorrect
1227 // information.
1228 if (TraceScavenge) {
1229 gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
1230 is_in_reserved(new_obj) ? "copying" : "tenuring",
1231 new_obj->klass()->internal_name(), (void *)old, (void *)new_obj, new_obj->size());
1232 }
1233 #endif
1234
1235 if (forward_ptr == NULL) {
1236 oop obj_to_push = new_obj;
1237 if (par_scan_state->should_be_partially_scanned(obj_to_push, old)) {
1238 // Length field used as index of next element to be scanned.
1239 // Real length can be obtained from real_forwardee()
1240 arrayOop(old)->set_length(0);
1241 obj_to_push = old;
1242 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push,
1243 "push forwarded object");
1244 }
1245 // Push it on one of the queues of to-be-scanned objects.
1246 bool simulate_overflow = false;
1247 NOT_PRODUCT(
1248 if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
1249 // simulate a stack overflow
1250 simulate_overflow = true;
1251 }
1297 oopDesc dummyOld;
1298 dummyOld.set_mark(m);
1299 assert(!dummyOld.is_forwarded(),
1300 "should not be called with forwarding pointer mark word.");
1301
1302 bool failed_to_promote = false;
1303 oop new_obj = NULL;
1304 oop forward_ptr;
1305
1306 // Try allocating obj in to-space (unless too old)
1307 if (dummyOld.age() < tenuring_threshold()) {
1308 new_obj = (oop)par_scan_state->alloc_in_to_space(sz);
1309 if (new_obj == NULL) {
1310 set_survivor_overflow(true);
1311 }
1312 }
1313
1314 if (new_obj == NULL) {
1315 // Either to-space is full or we decided to promote
1316 // try allocating obj tenured
1317 new_obj = _next_gen->par_promote(par_scan_state->thread_num(),
1318 old, m, sz);
1319
1320 if (new_obj == NULL) {
1321 // promotion failed, forward to self
1322 forward_ptr = old->forward_to_atomic(old);
1323 new_obj = old;
1324
1325 if (forward_ptr != NULL) {
1326 return forward_ptr; // someone else succeeded
1327 }
1328
1329 _promotion_failed = true;
1330 failed_to_promote = true;
1331
1332 preserve_mark_if_necessary(old, m);
1333 par_scan_state->register_promotion_failure(sz);
1334 }
1335 } else {
1336 // Is in to-space; do copying ourselves.
1337 Copy::aligned_disjoint_words((HeapWord*)old, (HeapWord*)new_obj, sz);
1338 // Restore the mark word copied above.
1339 new_obj->set_mark(m);
1340 // Increment age if new_obj still in new generation
1341 new_obj->incr_age();
1342 par_scan_state->age_table()->add(new_obj, sz);
1343 }
1344 assert(new_obj != NULL, "just checking");
1345
1346 #ifndef PRODUCT
1347 // This code must come after the CAS test, or it will print incorrect
1348 // information.
1349 if (TraceScavenge) {
1350 gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
1351 is_in_reserved(new_obj) ? "copying" : "tenuring",
1352 new_obj->klass()->internal_name(), (void *)old, (void *)new_obj, new_obj->size());
1353 }
1354 #endif
1355
1356 // Now attempt to install the forwarding pointer (atomically).
1357 // We have to copy the mark word before overwriting with forwarding
1358 // ptr, so we can restore it below in the copy.
1359 if (!failed_to_promote) {
1360 forward_ptr = old->forward_to_atomic(new_obj);
1361 }
1362
1363 if (forward_ptr == NULL) {
1364 oop obj_to_push = new_obj;
1365 if (par_scan_state->should_be_partially_scanned(obj_to_push, old)) {
1366 // Length field used as index of next element to be scanned.
1367 // Real length can be obtained from real_forwardee()
1368 arrayOop(old)->set_length(0);
1369 obj_to_push = old;
1370 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push,
1371 "push forwarded object");
1372 }
1378 simulate_overflow = true;
1379 }
1380 )
1381 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
1382 // Add stats for overflow pushes.
1383 push_on_overflow_list(old, par_scan_state);
1384 TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
1385 }
1386
1387 return new_obj;
1388 }
1389
1390 // Oops. Someone beat us to it. Undo the allocation. Where did we
1391 // allocate it?
1392 if (is_in_reserved(new_obj)) {
1393 // Must be in to_space.
1394 assert(to()->is_in_reserved(new_obj), "Checking");
1395 par_scan_state->undo_alloc_in_to_space((HeapWord*)new_obj, sz);
1396 } else {
1397 assert(!_avoid_promotion_undo, "Should not be here if avoiding.");
1398 _next_gen->par_promote_alloc_undo(par_scan_state->thread_num(),
1399 (HeapWord*)new_obj, sz);
1400 }
1401
1402 return forward_ptr;
1403 }
1404
1405 #ifndef PRODUCT
1406 // It's OK to call this multi-threaded; the worst thing
1407 // that can happen is that we'll get a bunch of closely
1408 // spaced simulated overflows, but that's OK, in fact
1409 // probably good as it would exercise the overflow code
1410 // under contention.
1411 bool ParNewGeneration::should_simulate_overflow() {
1412 if (_overflow_counter-- <= 0) { // just being defensive
1413 _overflow_counter = ParGCWorkQueueOverflowInterval;
1414 return true;
1415 } else {
1416 return false;
1417 }
1418 }
1494 // except that in the CMS case we thread the objects
1495 // directly into the list via their mark word, and do
1496 // not need to deal with special cases below related
1497 // to chunking of object arrays and promotion failure
1498 // handling.
1499 // CR 6797058 has been filed to attempt consolidation of
1500 // the common code.
1501 // Because of the common code, if you make any changes in
1502 // the code below, please check the CMS version to see if
1503 // similar changes might be needed.
1504 // See CMSCollector::par_take_from_overflow_list() for
1505 // more extensive documentation comments.
1506 bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan_state) {
1507 ObjToScanQueue* work_q = par_scan_state->work_queue();
1508 // How many to take?
1509 size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
1510 (size_t)ParGCDesiredObjsFromOverflowList);
1511
1512 assert(!UseCompressedOops, "Error");
1513 assert(par_scan_state->overflow_stack() == NULL, "Error");
1514 if (_overflow_list == NULL) return false;
1515
1516 // Otherwise, there was something there; try claiming the list.
1517 oop prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
1518 // Trim off a prefix of at most objsFromOverflow items
1519 Thread* tid = Thread::current();
1520 size_t spin_count = (size_t)ParallelGCThreads;
1521 size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100);
1522 for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) {
1523 // someone grabbed it before we did ...
1524 // ... we spin for a short while...
1525 os::sleep(tid, sleep_time_millis, false);
1526 if (_overflow_list == NULL) {
1527 // nothing left to take
1528 return false;
1529 } else if (_overflow_list != BUSY) {
1530 // try and grab the prefix
1531 prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
1532 }
1533 }
1534 if (prefix == NULL || prefix == BUSY) {
|
46 #include "memory/space.hpp"
47 #include "oops/objArrayOop.hpp"
48 #include "oops/oop.inline.hpp"
49 #include "oops/oop.pcgc.inline.hpp"
50 #include "runtime/atomic.inline.hpp"
51 #include "runtime/handles.hpp"
52 #include "runtime/handles.inline.hpp"
53 #include "runtime/java.hpp"
54 #include "runtime/thread.inline.hpp"
55 #include "utilities/copy.hpp"
56 #include "utilities/globalDefinitions.hpp"
57 #include "utilities/workgroup.hpp"
58
59 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
60
61 #ifdef _MSC_VER
62 #pragma warning( push )
63 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
64 #endif
65 ParScanThreadState::ParScanThreadState(Space* to_space_,
66 ParNewGeneration* young_gen_,
67 Generation* old_gen_,
68 int thread_num_,
69 ObjToScanQueueSet* work_queue_set_,
70 Stack<oop, mtGC>* overflow_stacks_,
71 size_t desired_plab_sz_,
72 ParallelTaskTerminator& term_)
73 : _to_space(to_space_),
74 _old_gen(old_gen_),
75 _young_gen(young_gen_),
76 _thread_num(thread_num_),
77 _work_queue(work_queue_set_->queue(thread_num_)),
78 _to_space_full(false),
79 _overflow_stack(overflow_stacks_ ? overflow_stacks_ + thread_num_ : NULL),
80 _ageTable(false), // false ==> not the global age table, no perf data.
81 _to_space_alloc_buffer(desired_plab_sz_),
82 _to_space_closure(young_gen_, this),
83 _old_gen_closure(young_gen_, this),
84 _to_space_root_closure(young_gen_, this),
85 _old_gen_root_closure(young_gen_, this),
86 _older_gen_closure(young_gen_, this),
87 _evacuate_followers(this,
88 &_to_space_closure,
89 &_old_gen_closure,
90 &_to_space_root_closure,
91 young_gen_,
92 &_old_gen_root_closure,
93 work_queue_set_,
94 &term_),
95 _is_alive_closure(young_gen_),
96 _scan_weak_ref_closure(young_gen_, this),
97 _keep_alive_closure(&_scan_weak_ref_closure),
98 _strong_roots_time(0.0),
99 _term_time(0.0) {
100 #if TASKQUEUE_STATS
101 _term_attempts = 0;
102 _overflow_refills = 0;
103 _overflow_refill_objs = 0;
104 #endif // TASKQUEUE_STATS
105
106 _survivor_chunk_array = (ChunkArray*) old_gen()->get_data_recorder(thread_num());
107 _hash_seed = 17; // Might want to take time-based random value.
108 _start = os::elapsedTime();
109 _old_gen_closure.set_generation(old_gen_);
110 _old_gen_root_closure.set_generation(old_gen_);
111 }
112 #ifdef _MSC_VER
113 #pragma warning( pop )
114 #endif
115
116 void ParScanThreadState::record_survivor_plab(HeapWord* plab_start,
117 size_t plab_word_size) {
118 ChunkArray* sca = survivor_chunk_array();
119 if (sca != NULL) {
120 // A non-null SCA implies that we want the PLAB data recorded.
121 sca->record_sample(plab_start, plab_word_size);
122 }
123 }
124
125 bool ParScanThreadState::should_be_partially_scanned(oop new_obj, oop old_obj) const {
126 return new_obj->is_objArray() &&
149 bool ok = work_queue()->push(old);
150 assert(ok, "just popped, push must be okay");
151 } else {
152 // Restore length so that it can be used if there
153 // is a promotion failure and forwarding pointers
154 // must be removed.
155 arrayOop(old)->set_length(end);
156 }
157
158 // process our set of indices (include header in first chunk)
159 // should make sure end is even (aligned to HeapWord in case of compressed oops)
160 if ((HeapWord *)obj < young_old_boundary()) {
161 // object is in to_space
162 obj->oop_iterate_range(&_to_space_closure, start, end);
163 } else {
164 // object is in old generation
165 obj->oop_iterate_range(&_old_gen_closure, start, end);
166 }
167 }
168
169 void ParScanThreadState::trim_queues(int max_size) {
170 ObjToScanQueue* queue = work_queue();
171 do {
172 while (queue->size() > (juint)max_size) {
173 oop obj_to_scan;
174 if (queue->pop_local(obj_to_scan)) {
175 if ((HeapWord *)obj_to_scan < young_old_boundary()) {
176 if (obj_to_scan->is_objArray() &&
177 obj_to_scan->is_forwarded() &&
178 obj_to_scan->forwardee() != obj_to_scan) {
179 scan_partial_array_and_push_remainder(obj_to_scan);
180 } else {
181 // object is in to_space
182 obj_to_scan->oop_iterate(&_to_space_closure);
183 }
184 } else {
185 // object is in old generation
186 obj_to_scan->oop_iterate(&_old_gen_closure);
187 }
188 }
216 assert(!old_gen()->is_in_reserved(cur), "Should be in young gen");
217 assert(Universe::heap()->is_in_reserved(obj_to_push), "Should be in heap");
218 if (should_be_partially_scanned(obj_to_push, cur)) {
219 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
220 obj_to_push = cur;
221 }
222 bool ok = queue->push(obj_to_push);
223 assert(ok, "Should have succeeded");
224 }
225 assert(young_gen()->overflow_list() == NULL, "Error");
226 return num_take_elems > 0; // was something transferred?
227 }
228
229 void ParScanThreadState::push_on_overflow_stack(oop p) {
230 assert(ParGCUseLocalOverflow, "Else should not call");
231 overflow_stack()->push(p);
232 assert(young_gen()->overflow_list() == NULL, "Error");
233 }
234
235 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
236 // If the object is small enough, try to reallocate the buffer.
237 HeapWord* obj = NULL;
238 if (!_to_space_full) {
239 ParGCAllocBuffer* const plab = to_space_alloc_buffer();
240 Space* const sp = to_space();
241 if (word_sz * 100 < ParallelGCBufferWastePct * plab->word_sz()) {
242 // Is small enough; abandon this buffer and start a new one.
243 plab->retire(false, false);
244 size_t buf_size = plab->word_sz();
245 HeapWord* buf_space = sp->par_allocate(buf_size);
246 if (buf_space == NULL) {
247 const size_t min_bytes =
248 ParGCAllocBuffer::min_size() << LogHeapWordSize;
249 size_t free_bytes = sp->free();
250 while(buf_space == NULL && free_bytes >= min_bytes) {
251 buf_size = free_bytes >> LogHeapWordSize;
252 assert(buf_size == (size_t)align_object_size(buf_size),
253 "Invariant");
254 buf_space = sp->par_allocate(buf_size);
255 free_bytes = sp->free();
256 }
257 }
258 if (buf_space != NULL) {
259 plab->set_word_size(buf_size);
260 plab->set_buf(buf_space);
261 record_survivor_plab(buf_space, buf_size);
263 // Note that we cannot compare buf_size < word_sz below
264 // because of AlignmentReserve (see ParGCAllocBuffer::allocate()).
265 assert(obj != NULL || plab->words_remaining() < word_sz,
266 "Else should have been able to allocate");
267 // It's conceivable that we may be able to use the
268 // buffer we just grabbed for subsequent small requests
269 // even if not for this one.
270 } else {
271 // We're used up.
272 _to_space_full = true;
273 }
274
275 } else {
276 // Too large; allocate the object individually.
277 obj = sp->par_allocate(word_sz);
278 }
279 }
280 return obj;
281 }
282
283 void ParScanThreadState::undo_alloc_in_to_space(HeapWord* obj, size_t word_sz) {
284 // Is the alloc in the current alloc buffer?
285 if (to_space_alloc_buffer()->contains(obj)) {
286 assert(to_space_alloc_buffer()->contains(obj + word_sz - 1),
287 "Should contain whole object.");
288 to_space_alloc_buffer()->undo_allocation(obj, word_sz);
289 } else {
290 CollectedHeap::fill_with_object(obj, word_sz);
291 }
292 }
293
294 void ParScanThreadState::print_promotion_failure_size() {
295 if (_promotion_failed_info.has_failed() && PrintPromotionFailure) {
296 gclog_or_tty->print(" (%d: promotion failure size = " SIZE_FORMAT ") ",
297 _thread_num, _promotion_failed_info.first_size());
298 }
299 }
300
301 class ParScanThreadStateSet: private ResourceArray {
302 public:
303 // Initializes states for the specified number of threads;
304 ParScanThreadStateSet(int num_threads,
305 Space& to_space,
306 ParNewGeneration& gen,
307 Generation& old_gen,
308 ObjToScanQueueSet& queue_set,
309 Stack<oop, mtGC>* overflow_stacks,
310 size_t desired_plab_sz,
311 ParallelTaskTerminator& term);
312
313 ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); }
314
315 inline ParScanThreadState& thread_state(int i);
316
317 void trace_promotion_failed(YoungGCTracer& gc_tracer);
318 void reset(int active_workers, bool promotion_failed);
319 void flush();
320
321 #if TASKQUEUE_STATS
322 static void
323 print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
324 void print_termination_stats(outputStream* const st = gclog_or_tty);
325 static void
326 print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
327 void print_taskqueue_stats(outputStream* const st = gclog_or_tty);
328 void reset_stats();
329 #endif // TASKQUEUE_STATS
330
331 private:
332 ParallelTaskTerminator& _term;
333 ParNewGeneration& _gen;
334 Generation& _old_gen;
335 public:
336 bool is_valid(int id) const { return id < length(); }
337 ParallelTaskTerminator* terminator() { return &_term; }
338 };
339
340 ParScanThreadStateSet::ParScanThreadStateSet(int num_threads,
341 Space& to_space,
342 ParNewGeneration& gen,
343 Generation& old_gen,
344 ObjToScanQueueSet& queue_set,
345 Stack<oop, mtGC>* overflow_stacks,
346 size_t desired_plab_sz,
347 ParallelTaskTerminator& term)
348 : ResourceArray(sizeof(ParScanThreadState), num_threads),
349 _gen(gen),
350 _old_gen(old_gen),
351 _term(term) {
352 assert(num_threads > 0, "sanity check!");
353 assert(ParGCUseLocalOverflow == (overflow_stacks != NULL),
354 "overflow_stack allocation mismatch");
355 // Initialize states.
356 for (int i = 0; i < num_threads; ++i) {
357 new ((ParScanThreadState*)_data + i)
358 ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set,
359 overflow_stacks, desired_plab_sz, term);
360 }
361 }
362
363 inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i) {
364 assert(i >= 0 && i < length(), "sanity check!");
365 return ((ParScanThreadState*)_data)[i];
366 }
367
368 void ParScanThreadStateSet::trace_promotion_failed(YoungGCTracer& gc_tracer) {
369 for (int i = 0; i < length(); ++i) {
370 if (thread_state(i).promotion_failed()) {
371 gc_tracer.report_promotion_failed(thread_state(i).promotion_failed_info());
372 thread_state(i).promotion_failed_info().reset();
373 }
374 }
375 }
376
377 void ParScanThreadStateSet::reset(int active_threads, bool promotion_failed) {
378 _term.reset_for_reuse(active_threads);
379 if (promotion_failed) {
380 for (int i = 0; i < length(); ++i) {
381 thread_state(i).print_promotion_failure_size();
382 }
383 }
384 }
385
386 #if TASKQUEUE_STATS
387 void
388 ParScanThreadState::reset_stats() {
389 taskqueue_stats().reset();
390 _term_attempts = 0;
391 _overflow_refills = 0;
392 _overflow_refill_objs = 0;
393 }
394
395 void ParScanThreadStateSet::reset_stats() {
396 for (int i = 0; i < length(); ++i) {
397 thread_state(i).reset_stats();
398 }
399 }
400
401 void ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st) {
402 st->print_raw_cr("GC Termination Stats");
403 st->print_raw_cr(" elapsed --strong roots-- "
404 "-------termination-------");
405 st->print_raw_cr("thr ms ms % "
406 " ms % attempts");
407 st->print_raw_cr("--- --------- --------- ------ "
408 "--------- ------ --------");
409 }
410
411 void ParScanThreadStateSet::print_termination_stats(outputStream* const st) {
412 print_termination_stats_hdr(st);
413
414 for (int i = 0; i < length(); ++i) {
415 const ParScanThreadState & pss = thread_state(i);
416 const double elapsed_ms = pss.elapsed_time() * 1000.0;
417 const double s_roots_ms = pss.strong_roots_time() * 1000.0;
418 const double term_ms = pss.term_time() * 1000.0;
419 st->print_cr("%3d %9.2f %9.2f %6.2f "
420 "%9.2f %6.2f " SIZE_FORMAT_W(8),
421 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
422 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts());
423 }
424 }
425
426 // Print stats related to work queue activity.
427 void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st) {
428 st->print_raw_cr("GC Task Stats");
429 st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
430 st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
431 }
432
433 void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st) {
434 print_taskqueue_stats_hdr(st);
435
436 TaskQueueStats totals;
437 for (int i = 0; i < length(); ++i) {
438 const ParScanThreadState & pss = thread_state(i);
439 const TaskQueueStats & stats = pss.taskqueue_stats();
440 st->print("%3d ", i); stats.print(st); st->cr();
441 totals += stats;
442
443 if (pss.overflow_refills() > 0) {
444 st->print_cr(" " SIZE_FORMAT_W(10) " overflow refills "
445 SIZE_FORMAT_W(10) " overflow objects",
446 pss.overflow_refills(), pss.overflow_refill_objs());
447 }
448 }
449 st->print("tot "); totals.print(st); st->cr();
450
451 DEBUG_ONLY(totals.verify());
452 }
453 #endif // TASKQUEUE_STATS
454
455 void ParScanThreadStateSet::flush() {
456 // Work in this loop should be kept as lightweight as
457 // possible since this might otherwise become a bottleneck
458 // to scaling. Should we add heavy-weight work into this
459 // loop, consider parallelizing the loop into the worker threads.
460 for (int i = 0; i < length(); ++i) {
461 ParScanThreadState& par_scan_state = thread_state(i);
462
463 // Flush stats related to To-space PLAB activity and
464 // retire the last buffer.
465 par_scan_state.to_space_alloc_buffer()->
466 flush_stats_and_retire(_gen.plab_stats(),
467 true /* end_of_gc */,
468 false /* retain */);
469
470 // Every thread has its own age table. We need to merge
471 // them all into one.
472 ageTable *local_table = par_scan_state.age_table();
473 _gen.age_table()->merge(local_table);
474
475 // Inform old gen that we're done.
476 _old_gen.par_promote_alloc_done(i);
477 _old_gen.par_oop_since_save_marks_iterate_done(i);
478 }
479
480 if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
481 // We need to call this even when ResizeOldPLAB is disabled
482 // so as to avoid breaking some asserts. While we may be able
483 // to avoid this by reorganizing the code a bit, I am loathe
484 // to do that unless we find cases where ergo leads to bad
485 // performance.
486 CFLS_LAB::compute_desired_plab_size();
487 }
488 }
489
490 ParScanClosure::ParScanClosure(ParNewGeneration* g,
491 ParScanThreadState* par_scan_state)
492 : OopsInKlassOrGenClosure(g),
493 _par_scan_state(par_scan_state),
494 _g(g) {
495 _boundary = _g->reserved().end();
496 }
497
498 void ParScanWithBarrierClosure::do_oop(oop* p) { ParScanClosure::do_oop_work(p, true, false); }
499 void ParScanWithBarrierClosure::do_oop(narrowOop* p) { ParScanClosure::do_oop_work(p, true, false); }
500
501 void ParScanWithoutBarrierClosure::do_oop(oop* p) { ParScanClosure::do_oop_work(p, false, false); }
502 void ParScanWithoutBarrierClosure::do_oop(narrowOop* p) { ParScanClosure::do_oop_work(p, false, false); }
503
504 void ParRootScanWithBarrierTwoGensClosure::do_oop(oop* p) { ParScanClosure::do_oop_work(p, true, true); }
505 void ParRootScanWithBarrierTwoGensClosure::do_oop(narrowOop* p) { ParScanClosure::do_oop_work(p, true, true); }
506
507 void ParRootScanWithoutBarrierClosure::do_oop(oop* p) { ParScanClosure::do_oop_work(p, false, true); }
508 void ParRootScanWithoutBarrierClosure::do_oop(narrowOop* p) { ParScanClosure::do_oop_work(p, false, true); }
509
510 ParScanWeakRefClosure::ParScanWeakRefClosure(ParNewGeneration* g,
511 ParScanThreadState* par_scan_state)
512 : ScanWeakRefClosure(g),
513 _par_scan_state(par_scan_state) {
514 }
515
516 void ParScanWeakRefClosure::do_oop(oop* p) { ParScanWeakRefClosure::do_oop_work(p); }
517 void ParScanWeakRefClosure::do_oop(narrowOop* p) { ParScanWeakRefClosure::do_oop_work(p); }
518
519 #ifdef WIN32
520 #pragma warning(disable: 4786) /* identifier was truncated to '255' characters in the browser information */
521 #endif
522
523 ParEvacuateFollowersClosure::ParEvacuateFollowersClosure(ParScanThreadState* par_scan_state,
524 ParScanWithoutBarrierClosure* to_space_closure,
525 ParScanWithBarrierClosure* old_gen_closure,
526 ParRootScanWithoutBarrierClosure* to_space_root_closure,
527 ParNewGeneration* par_gen,
528 ParRootScanWithBarrierTwoGensClosure* old_gen_root_closure,
529 ObjToScanQueueSet* task_queues,
530 ParallelTaskTerminator* terminator)
531 : _par_scan_state(par_scan_state),
532 _to_space_closure(to_space_closure),
533 _old_gen_closure(old_gen_closure),
534 _to_space_root_closure(to_space_root_closure),
535 _old_gen_root_closure(old_gen_root_closure),
536 _par_gen(par_gen),
537 _task_queues(task_queues),
538 _terminator(terminator) {
539 }
540
541 void ParEvacuateFollowersClosure::do_void() {
542 ObjToScanQueue* work_q = par_scan_state()->work_queue();
543
544 while (true) {
545 // Scan to-space and old-gen objs until we run out of both.
546 oop obj_to_scan;
547 par_scan_state()->trim_queues(0);
548
549 // We have no local work, attempt to steal from other threads.
550
551 // attempt to steal work from promoted.
552 if (task_queues()->steal(par_scan_state()->thread_num(),
553 par_scan_state()->hash_seed(),
554 obj_to_scan)) {
555 bool res = work_q->push(obj_to_scan);
556 assert(res, "Empty queue should have room for a push.");
557
558 // if successful, goto Start.
559 continue;
560
561 // try global overflow list.
562 } else if (par_gen()->take_from_overflow_list(par_scan_state())) {
563 continue;
564 }
565
566 // Otherwise, offer termination.
567 par_scan_state()->start_term_time();
568 if (terminator()->offer_termination()) break;
569 par_scan_state()->end_term_time();
570 }
571 assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0,
572 "Broken overflow list?");
573 // Finish the last termination pause.
574 par_scan_state()->end_term_time();
575 }
576
577 ParNewGenTask::ParNewGenTask(ParNewGeneration* young_gen,
578 Generation* old_gen,
579 HeapWord* young_old_boundary,
580 ParScanThreadStateSet* state_set)
581 : AbstractGangTask("ParNewGeneration collection"),
582 _young_gen(young_gen), _old_gen(old_gen),
583 _young_old_boundary(young_old_boundary),
584 _state_set(state_set) {
585 }
586
587 // Reset the terminator for the given number of
588 // active threads.
589 void ParNewGenTask::set_for_termination(int active_workers) {
590 _state_set->reset(active_workers, _young_gen->promotion_failed());
591 // Should the heap be passed in? There's only 1 for now so
592 // grab it instead.
593 GenCollectedHeap* gch = GenCollectedHeap::heap();
594 gch->set_n_termination(active_workers);
595 }
596
597 void ParNewGenTask::work(uint worker_id) {
598 GenCollectedHeap* gch = GenCollectedHeap::heap();
599 // Since this is being done in a separate thread, need new resource
600 // and handle marks.
601 ResourceMark rm;
602 HandleMark hm;
603
604 Generation* old_gen = gch->old_gen();
605
606 ParScanThreadState& par_scan_state = _state_set->thread_state(worker_id);
607 assert(_state_set->is_valid(worker_id), "Should not have been called");
608
609 par_scan_state.set_young_old_boundary(_young_old_boundary);
610
611 KlassScanClosure klass_scan_closure(&par_scan_state.to_space_root_closure(),
612 gch->rem_set()->klass_rem_set());
613 CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
614 &par_scan_state.to_space_root_closure(),
615 false);
616
617 par_scan_state.start_strong_roots();
618 gch->gen_process_roots(Generation::Young,
619 true, // Process younger gens, if any,
620 // as strong roots.
621 false, // no scope; this is parallel code
622 SharedHeap::SO_ScavengeCodeCache,
623 GenCollectedHeap::StrongAndWeakRoots,
624 &par_scan_state.to_space_root_closure(),
625 &par_scan_state.older_gen_closure(),
626 &cld_scan_closure);
627
628 par_scan_state.end_strong_roots();
629
630 // "evacuate followers".
631 par_scan_state.evacuate_followers_closure().do_void();
632 }
633
634 #ifdef _MSC_VER
635 #pragma warning( push )
636 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
637 #endif
638 ParNewGeneration::
639 ParNewGeneration(ReservedSpace rs, size_t initial_byte_size)
640 : DefNewGeneration(rs, initial_byte_size, "PCopy"),
641 _overflow_list(NULL),
642 _is_alive_closure(this),
643 _plab_stats(YoungPLABSize, PLABWeight) {
644 NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;)
645 NOT_PRODUCT(_num_par_pushes = 0;)
646 _task_queues = new ObjToScanQueueSet(ParallelGCThreads);
647 guarantee(_task_queues != NULL, "task_queues allocation failure.");
648
649 for (uint i1 = 0; i1 < ParallelGCThreads; i1++) {
650 ObjToScanQueue *q = new ObjToScanQueue();
651 guarantee(q != NULL, "work_queue Allocation failure.");
652 _task_queues->register_queue(i1, q);
653 }
654
655 for (uint i2 = 0; i2 < ParallelGCThreads; i2++) {
656 _task_queues->queue(i2)->initialize();
657 }
658
659 _overflow_stacks = NULL;
660 if (ParGCUseLocalOverflow) {
661 // typedef to workaround NEW_C_HEAP_ARRAY macro, which can not deal
662 // with ','
663 typedef Stack<oop, mtGC> GCOopStack;
664
665 _overflow_stacks = NEW_C_HEAP_ARRAY(GCOopStack, ParallelGCThreads, mtGC);
666 for (size_t i = 0; i < ParallelGCThreads; ++i) {
667 new (_overflow_stacks + i) Stack<oop, mtGC>();
668 }
669 }
670
671 if (UsePerfData) {
672 EXCEPTION_MARK;
673 ResourceMark rm;
674
675 const char* cname =
676 PerfDataManager::counter_name(_gen_counters->name_space(), "threads");
677 PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_None,
678 ParallelGCThreads, CHECK);
679 }
680 }
681 #ifdef _MSC_VER
682 #pragma warning( pop )
683 #endif
684
685 // ParNewGeneration::
686 ParKeepAliveClosure::ParKeepAliveClosure(ParScanWeakRefClosure* cl)
687 : DefNewGeneration::KeepAliveClosure(cl), _par_cl(cl) {
688 }
689
690 template <class T>
691 void /*ParNewGeneration::*/ParKeepAliveClosure::do_oop_work(T* p) {
692 #ifdef ASSERT
693 {
694 assert(!oopDesc::is_null(*p), "expected non-null ref");
695 oop obj = oopDesc::load_decode_heap_oop_not_null(p);
696 // We never expect to see a null reference being processed
697 // as a weak reference.
698 assert(obj->is_oop(), "expected an oop while scanning weak refs");
699 }
700 #endif // ASSERT
701
702 _par_cl->do_oop_nv(p);
703
704 if (Universe::heap()->is_in_reserved(p)) {
705 oop obj = oopDesc::load_decode_heap_oop_not_null(p);
706 _rs->write_ref_field_gc_par(p, obj);
707 }
708 }
709
710 void /*ParNewGeneration::*/ParKeepAliveClosure::do_oop(oop* p) { ParKeepAliveClosure::do_oop_work(p); }
711 void /*ParNewGeneration::*/ParKeepAliveClosure::do_oop(narrowOop* p) { ParKeepAliveClosure::do_oop_work(p); }
712
713 // ParNewGeneration::
714 KeepAliveClosure::KeepAliveClosure(ScanWeakRefClosure* cl)
715 : DefNewGeneration::KeepAliveClosure(cl) {
716 }
717
718 template <class T>
719 void /*ParNewGeneration::*/KeepAliveClosure::do_oop_work(T* p) {
720 #ifdef ASSERT
721 {
722 assert(!oopDesc::is_null(*p), "expected non-null ref");
723 oop obj = oopDesc::load_decode_heap_oop_not_null(p);
724 // We never expect to see a null reference being processed
725 // as a weak reference.
726 assert(obj->is_oop(), "expected an oop while scanning weak refs");
727 }
728 #endif // ASSERT
729
730 _cl->do_oop_nv(p);
731
732 if (Universe::heap()->is_in_reserved(p)) {
733 oop obj = oopDesc::load_decode_heap_oop_not_null(p);
734 _rs->write_ref_field_gc_par(p, obj);
735 }
736 }
748 ? obj->forwardee()
749 : _g->DefNewGeneration::copy_to_survivor_space(obj);
750 oopDesc::encode_store_heap_oop_not_null(p, new_obj);
751 }
752 if (_gc_barrier) {
753 // If p points to a younger generation, mark the card.
754 if ((HeapWord*)obj < _gen_boundary) {
755 _rs->write_ref_field_gc_par(p, obj);
756 }
757 }
758 }
759 }
760
761 void ScanClosureWithParBarrier::do_oop(oop* p) { ScanClosureWithParBarrier::do_oop_work(p); }
762 void ScanClosureWithParBarrier::do_oop(narrowOop* p) { ScanClosureWithParBarrier::do_oop_work(p); }
763
764 class ParNewRefProcTaskProxy: public AbstractGangTask {
765 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
766 public:
767 ParNewRefProcTaskProxy(ProcessTask& task, ParNewGeneration& gen,
768 Generation& old_gen,
769 HeapWord* young_old_boundary,
770 ParScanThreadStateSet& state_set);
771
772 private:
773 virtual void work(uint worker_id);
774 virtual void set_for_termination(int active_workers) {
775 _state_set.terminator()->reset_for_reuse(active_workers);
776 }
777 private:
778 ParNewGeneration& _young_gen;
779 ProcessTask& _task;
780 Generation& _old_gen;
781 HeapWord* _young_old_boundary;
782 ParScanThreadStateSet& _state_set;
783 };
784
785 ParNewRefProcTaskProxy::ParNewRefProcTaskProxy(ProcessTask& task,
786 ParNewGeneration& young_gen,
787 Generation& old_gen,
788 HeapWord* young_old_boundary,
789 ParScanThreadStateSet& state_set)
790 : AbstractGangTask("ParNewGeneration parallel reference processing"),
791 _young_gen(young_gen),
792 _task(task),
793 _old_gen(old_gen),
794 _young_old_boundary(young_old_boundary),
795 _state_set(state_set) {
796 }
797
798 void ParNewRefProcTaskProxy::work(uint worker_id) {
799 ResourceMark rm;
800 HandleMark hm;
801 ParScanThreadState& par_scan_state = _state_set.thread_state(worker_id);
802 par_scan_state.set_young_old_boundary(_young_old_boundary);
803 _task.work(worker_id, par_scan_state.is_alive_closure(),
804 par_scan_state.keep_alive_closure(),
805 par_scan_state.evacuate_followers_closure());
806 }
807
808 class ParNewRefEnqueueTaskProxy: public AbstractGangTask {
809 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
810 EnqueueTask& _task;
811
812 public:
813 ParNewRefEnqueueTaskProxy(EnqueueTask& task)
814 : AbstractGangTask("ParNewGeneration parallel reference enqueue"),
815 _task(task) {
816 }
817
818 virtual void work(uint worker_id) {
819 _task.work(worker_id);
820 }
821 };
822
823 void ParNewRefProcTaskExecutor::execute(ProcessTask& task) {
824 GenCollectedHeap* gch = GenCollectedHeap::heap();
825 assert(gch->kind() == CollectedHeap::GenCollectedHeap,
826 "not a generational heap");
827 FlexibleWorkGang* workers = gch->workers();
828 assert(workers != NULL, "Need parallel worker threads.");
829 _state_set.reset(workers->active_workers(), _generation.promotion_failed());
830 ParNewRefProcTaskProxy rp_task(task, _generation, *(gch->old_gen()),
831 _generation.reserved().end(), _state_set);
832 workers->run_task(&rp_task);
833 _state_set.reset(0 /* bad value in debug if not reset */,
834 _generation.promotion_failed());
835 }
836
837 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) {
838 GenCollectedHeap* gch = GenCollectedHeap::heap();
839 FlexibleWorkGang* workers = gch->workers();
840 assert(workers != NULL, "Need parallel worker threads.");
841 ParNewRefEnqueueTaskProxy enq_task(task);
842 workers->run_task(&enq_task);
843 }
844
845 void ParNewRefProcTaskExecutor::set_single_threaded_mode() {
846 _state_set.flush();
847 GenCollectedHeap* gch = GenCollectedHeap::heap();
848 gch->set_par_threads(0); // 0 ==> non-parallel.
849 gch->save_marks();
850 }
851
852 ScanClosureWithParBarrier::ScanClosureWithParBarrier(ParNewGeneration* g,
853 bool gc_barrier)
854 : ScanClosure(g, gc_barrier) {
855 }
856
857 EvacuateFollowersClosureGeneral::
858 EvacuateFollowersClosureGeneral(GenCollectedHeap* gch,
859 OopsInGenClosure* cur,
860 OopsInGenClosure* older)
861 : _gch(gch),
862 _scan_cur_or_nonheap(cur),
863 _scan_older(older) {
864 }
865
866 void EvacuateFollowersClosureGeneral::do_void() {
867 do {
868 // Beware: this call will lead to closure applications via virtual
869 // calls.
870 _gch->oop_since_save_marks_iterate(Generation::Young,
871 _scan_cur_or_nonheap,
872 _scan_older);
873 } while (!_gch->no_allocs_since_save_marks(true /* include_young */));
874 }
875
876
877 // A Generation that does parallel young-gen collection.
878
879 bool ParNewGeneration::_avoid_promotion_undo = false;
880
881 void ParNewGeneration::handle_promotion_failed(GenCollectedHeap* gch,
882 ParScanThreadStateSet& thread_state_set,
883 ParNewTracer& gc_tracer) {
884 assert(_promo_failure_scan_stack.is_empty(), "post condition");
885 _promo_failure_scan_stack.clear(true); // Clear cached segments.
886
887 remove_forwarding_pointers();
888 if (PrintGCDetails) {
889 gclog_or_tty->print(" (promotion failed)");
890 }
891 // All the spaces are in play for mark-sweep.
892 swap_spaces(); // Make life simpler for CMS || rescan; see 6483690.
893 from()->set_next_compaction_space(to());
894 gch->set_incremental_collection_failed();
895 // Inform the next generation that a promotion failure occurred.
896 _old_gen->promotion_failure_occurred();
897
898 // Trace promotion failure in the parallel GC threads
899 thread_state_set.trace_promotion_failed(gc_tracer);
900 // Single threaded code may have reported promotion failure to the global state
901 if (_promotion_failed_info.has_failed()) {
902 gc_tracer.report_promotion_failed(_promotion_failed_info);
903 }
904 // Reset the PromotionFailureALot counters.
905 NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
906 }
907
908 void ParNewGeneration::collect(bool full,
909 bool clear_all_soft_refs,
910 size_t size,
911 bool is_tlab) {
912 assert(full || size > 0, "otherwise we don't want to collect");
913
914 GenCollectedHeap* gch = GenCollectedHeap::heap();
915
916 _gc_timer->register_gc_start();
917
918 assert(gch->kind() == CollectedHeap::GenCollectedHeap,
919 "not a CMS generational heap");
920 AdaptiveSizePolicy* size_policy = gch->gen_policy()->size_policy();
921 FlexibleWorkGang* workers = gch->workers();
922 assert(workers != NULL, "Need workgang for parallel work");
923 int active_workers =
924 AdaptiveSizePolicy::calc_active_workers(workers->total_workers(),
925 workers->active_workers(),
926 Threads::number_of_non_daemon_threads());
927 workers->set_active_workers(active_workers);
928 _old_gen = gch->old_gen();
929 // Do we have to avoid promotion_undo?
930 if (gch->collector_policy()->is_concurrent_mark_sweep_policy()) {
931 set_avoid_promotion_undo(true);
932 }
933
934 // If the next generation is too full to accommodate worst-case promotion
935 // from this generation, pass on collection; let the next generation
936 // do it.
937 if (!collection_attempt_is_safe()) {
938 gch->set_incremental_collection_failed(); // slight lie, in that we did not even attempt one
939 return;
940 }
941 assert(to()->is_empty(), "Else not collection_attempt_is_safe");
942
943 ParNewTracer gc_tracer;
944 gc_tracer.report_gc_start(gch->gc_cause(), _gc_timer->gc_start());
945 gch->trace_heap_before_gc(&gc_tracer);
946
947 init_assuming_no_promotion_failure();
948
954 GCTraceTime t1(GCCauseString("GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, NULL, gc_tracer.gc_id());
955 // Capture heap used before collection (for printing).
956 size_t gch_prev_used = gch->used();
957
958 SpecializationStats::clear();
959
960 age_table()->clear();
961 to()->clear(SpaceDecorator::Mangle);
962
963 gch->save_marks();
964 assert(workers != NULL, "Need parallel worker threads.");
965 int n_workers = active_workers;
966
967 // Set the correct parallelism (number of queues) in the reference processor
968 ref_processor()->set_active_mt_degree(n_workers);
969
970 // Always set the terminator for the active number of workers
971 // because only those workers go through the termination protocol.
972 ParallelTaskTerminator _term(n_workers, task_queues());
973 ParScanThreadStateSet thread_state_set(workers->active_workers(),
974 *to(), *this, *_old_gen, *task_queues(),
975 _overflow_stacks, desired_plab_sz(), _term);
976
977 ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set);
978 gch->set_par_threads(n_workers);
979 gch->rem_set()->prepare_for_younger_refs_iterate(true);
980 // It turns out that even when we're using 1 thread, doing the work in a
981 // separate thread causes wide variance in run times. We can't help this
982 // in the multi-threaded case, but we special-case n=1 here to get
983 // repeatable measurements of the 1-thread overhead of the parallel code.
984 if (n_workers > 1) {
985 GenCollectedHeap::StrongRootsScope srs(gch);
986 workers->run_task(&tsk);
987 } else {
988 GenCollectedHeap::StrongRootsScope srs(gch);
989 tsk.work(0);
990 }
991 thread_state_set.reset(0 /* Bad value in debug if not reset */,
992 promotion_failed());
993
994 // Process (weak) reference objects found during scavenge.
995 ReferenceProcessor* rp = ref_processor();
996 IsAliveClosure is_alive(this);
997 ScanWeakRefClosure scan_weak_ref(this);
998 KeepAliveClosure keep_alive(&scan_weak_ref);
999 ScanClosure scan_without_gc_barrier(this, false);
1000 ScanClosureWithParBarrier scan_with_gc_barrier(this, true);
1001 set_promo_failure_scan_stack_closure(&scan_without_gc_barrier);
1002 EvacuateFollowersClosureGeneral evacuate_followers(gch,
1003 &scan_without_gc_barrier, &scan_with_gc_barrier);
1004 rp->setup_policy(clear_all_soft_refs);
1005 // Can the mt_degree be set later (at run_task() time would be best)?
1006 rp->set_active_mt_degree(active_workers);
1007 ReferenceProcessorStats stats;
1008 if (rp->processing_is_mt()) {
1009 ParNewRefProcTaskExecutor task_executor(*this, thread_state_set);
1010 stats = rp->process_discovered_references(&is_alive, &keep_alive,
1011 &evacuate_followers, &task_executor,
1012 _gc_timer, gc_tracer.gc_id());
1013 } else {
1014 thread_state_set.flush();
1015 gch->set_par_threads(0); // 0 ==> non-parallel.
1016 gch->save_marks();
1017 stats = rp->process_discovered_references(&is_alive, &keep_alive,
1018 &evacuate_followers, NULL,
1019 _gc_timer, gc_tracer.gc_id());
1020 }
1021 gc_tracer.report_gc_reference_stats(stats);
1022 if (!promotion_failed()) {
1171 // Try allocating obj in to-space (unless too old)
1172 if (dummyOld.age() < tenuring_threshold()) {
1173 new_obj = (oop)par_scan_state->alloc_in_to_space(sz);
1174 if (new_obj == NULL) {
1175 set_survivor_overflow(true);
1176 }
1177 }
1178
1179 if (new_obj == NULL) {
1180 // Either to-space is full or we decided to promote
1181 // try allocating obj tenured
1182
1183 // Attempt to install a null forwarding pointer (atomically),
1184 // to claim the right to install the real forwarding pointer.
1185 forward_ptr = old->forward_to_atomic(ClaimedForwardPtr);
1186 if (forward_ptr != NULL) {
1187 // someone else beat us to it.
1188 return real_forwardee(old);
1189 }
1190
1191 new_obj = _old_gen->par_promote(par_scan_state->thread_num(), old, m, sz);
1192
1193 if (new_obj == NULL) {
1194 // promotion failed, forward to self
1195 _promotion_failed = true;
1196 new_obj = old;
1197
1198 preserve_mark_if_necessary(old, m);
1199 par_scan_state->register_promotion_failure(sz);
1200 }
1201
1202 old->forward_to(new_obj);
1203 forward_ptr = NULL;
1204 } else {
1205 // Is in to-space; do copying ourselves.
1206 Copy::aligned_disjoint_words((HeapWord*)old, (HeapWord*)new_obj, sz);
1207 forward_ptr = old->forward_to_atomic(new_obj);
1208 // Restore the mark word copied above.
1209 new_obj->set_mark(m);
1210 // Increment age if obj still in new generation
1211 new_obj->incr_age();
1212 par_scan_state->age_table()->add(new_obj, sz);
1213 }
1214 assert(new_obj != NULL, "just checking");
1215
1216 #ifndef PRODUCT
1217 // This code must come after the CAS test, or it will print incorrect
1218 // information.
1219 if (TraceScavenge) {
1220 gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
1221 is_in_reserved(new_obj) ? "copying" : "tenuring",
1222 new_obj->klass()->internal_name(),
1223 (void *)old,
1224 (void *)new_obj,
1225 new_obj->size());
1226 }
1227 #endif
1228
1229 if (forward_ptr == NULL) {
1230 oop obj_to_push = new_obj;
1231 if (par_scan_state->should_be_partially_scanned(obj_to_push, old)) {
1232 // Length field used as index of next element to be scanned.
1233 // Real length can be obtained from real_forwardee()
1234 arrayOop(old)->set_length(0);
1235 obj_to_push = old;
1236 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push,
1237 "push forwarded object");
1238 }
1239 // Push it on one of the queues of to-be-scanned objects.
1240 bool simulate_overflow = false;
1241 NOT_PRODUCT(
1242 if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
1243 // simulate a stack overflow
1244 simulate_overflow = true;
1245 }
1291 oopDesc dummyOld;
1292 dummyOld.set_mark(m);
1293 assert(!dummyOld.is_forwarded(),
1294 "should not be called with forwarding pointer mark word.");
1295
1296 bool failed_to_promote = false;
1297 oop new_obj = NULL;
1298 oop forward_ptr;
1299
1300 // Try allocating obj in to-space (unless too old)
1301 if (dummyOld.age() < tenuring_threshold()) {
1302 new_obj = (oop)par_scan_state->alloc_in_to_space(sz);
1303 if (new_obj == NULL) {
1304 set_survivor_overflow(true);
1305 }
1306 }
1307
1308 if (new_obj == NULL) {
1309 // Either to-space is full or we decided to promote
1310 // try allocating obj tenured
1311 new_obj = _old_gen->par_promote(par_scan_state->thread_num(),
1312 old, m, sz);
1313
1314 if (new_obj == NULL) {
1315 // promotion failed, forward to self
1316 forward_ptr = old->forward_to_atomic(old);
1317 new_obj = old;
1318
1319 if (forward_ptr != NULL) {
1320 return forward_ptr; // someone else succeeded
1321 }
1322
1323 _promotion_failed = true;
1324 failed_to_promote = true;
1325
1326 preserve_mark_if_necessary(old, m);
1327 par_scan_state->register_promotion_failure(sz);
1328 }
1329 } else {
1330 // Is in to-space; do copying ourselves.
1331 Copy::aligned_disjoint_words((HeapWord*)old, (HeapWord*)new_obj, sz);
1332 // Restore the mark word copied above.
1333 new_obj->set_mark(m);
1334 // Increment age if new_obj still in new generation
1335 new_obj->incr_age();
1336 par_scan_state->age_table()->add(new_obj, sz);
1337 }
1338 assert(new_obj != NULL, "just checking");
1339
1340 #ifndef PRODUCT
1341 // This code must come after the CAS test, or it will print incorrect
1342 // information.
1343 if (TraceScavenge) {
1344 gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
1345 is_in_reserved(new_obj) ? "copying" : "tenuring",
1346 new_obj->klass()->internal_name(),
1347 (void*)old,
1348 (void*)new_obj,
1349 new_obj->size());
1350 }
1351 #endif
1352
1353 // Now attempt to install the forwarding pointer (atomically).
1354 // We have to copy the mark word before overwriting with forwarding
1355 // ptr, so we can restore it below in the copy.
1356 if (!failed_to_promote) {
1357 forward_ptr = old->forward_to_atomic(new_obj);
1358 }
1359
1360 if (forward_ptr == NULL) {
1361 oop obj_to_push = new_obj;
1362 if (par_scan_state->should_be_partially_scanned(obj_to_push, old)) {
1363 // Length field used as index of next element to be scanned.
1364 // Real length can be obtained from real_forwardee()
1365 arrayOop(old)->set_length(0);
1366 obj_to_push = old;
1367 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push,
1368 "push forwarded object");
1369 }
1375 simulate_overflow = true;
1376 }
1377 )
1378 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
1379 // Add stats for overflow pushes.
1380 push_on_overflow_list(old, par_scan_state);
1381 TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
1382 }
1383
1384 return new_obj;
1385 }
1386
1387 // Oops. Someone beat us to it. Undo the allocation. Where did we
1388 // allocate it?
1389 if (is_in_reserved(new_obj)) {
1390 // Must be in to_space.
1391 assert(to()->is_in_reserved(new_obj), "Checking");
1392 par_scan_state->undo_alloc_in_to_space((HeapWord*)new_obj, sz);
1393 } else {
1394 assert(!_avoid_promotion_undo, "Should not be here if avoiding.");
1395 _old_gen->par_promote_alloc_undo(par_scan_state->thread_num(),
1396 (HeapWord*)new_obj, sz);
1397 }
1398
1399 return forward_ptr;
1400 }
1401
1402 #ifndef PRODUCT
1403 // It's OK to call this multi-threaded; the worst thing
1404 // that can happen is that we'll get a bunch of closely
1405 // spaced simulated overflows, but that's OK, in fact
1406 // probably good as it would exercise the overflow code
1407 // under contention.
1408 bool ParNewGeneration::should_simulate_overflow() {
1409 if (_overflow_counter-- <= 0) { // just being defensive
1410 _overflow_counter = ParGCWorkQueueOverflowInterval;
1411 return true;
1412 } else {
1413 return false;
1414 }
1415 }
1491 // except that in the CMS case we thread the objects
1492 // directly into the list via their mark word, and do
1493 // not need to deal with special cases below related
1494 // to chunking of object arrays and promotion failure
1495 // handling.
1496 // CR 6797058 has been filed to attempt consolidation of
1497 // the common code.
1498 // Because of the common code, if you make any changes in
1499 // the code below, please check the CMS version to see if
1500 // similar changes might be needed.
1501 // See CMSCollector::par_take_from_overflow_list() for
1502 // more extensive documentation comments.
1503 bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan_state) {
1504 ObjToScanQueue* work_q = par_scan_state->work_queue();
1505 // How many to take?
1506 size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
1507 (size_t)ParGCDesiredObjsFromOverflowList);
1508
1509 assert(!UseCompressedOops, "Error");
1510 assert(par_scan_state->overflow_stack() == NULL, "Error");
1511 if (_overflow_list == NULL) {
1512 return false;
1513 }
1514
1515 // Otherwise, there was something there; try claiming the list.
1516 oop prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
1517 // Trim off a prefix of at most objsFromOverflow items
1518 Thread* tid = Thread::current();
1519 size_t spin_count = (size_t)ParallelGCThreads;
1520 size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100);
1521 for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) {
1522 // someone grabbed it before we did ...
1523 // ... we spin for a short while...
1524 os::sleep(tid, sleep_time_millis, false);
1525 if (_overflow_list == NULL) {
1526 // nothing left to take
1527 return false;
1528 } else if (_overflow_list != BUSY) {
1529 // try and grab the prefix
1530 prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
1531 }
1532 }
1533 if (prefix == NULL || prefix == BUSY) {
|