src/share/vm/gc_implementation/g1/concurrentMark.cpp

Print this page
rev 2724 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by:


 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 462                                int max_regions) :
 463   _markBitMap1(rs, MinObjAlignment - 1),
 464   _markBitMap2(rs, MinObjAlignment - 1),
 465 
 466   _parallel_marking_threads(0),
 467   _sleep_factor(0.0),
 468   _marking_task_overhead(1.0),
 469   _cleanup_sleep_factor(0.0),
 470   _cleanup_task_overhead(1.0),
 471   _cleanup_list("Cleanup List"),
 472   _region_bm(max_regions, false /* in_resource_area*/),
 473   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 474            CardTableModRefBS::card_shift,
 475            false /* in_resource_area*/),

 476   _prevMarkBitMap(&_markBitMap1),
 477   _nextMarkBitMap(&_markBitMap2),
 478   _at_least_one_mark_complete(false),
 479 
 480   _markStack(this),
 481   _regionStack(),
 482   // _finger set in set_non_marking_state
 483 
 484   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 485   // _active_tasks set in set_non_marking_state
 486   // _tasks set inside the constructor
 487   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 488   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 489 
 490   _has_overflown(false),
 491   _concurrent(false),
 492   _has_aborted(false),
 493   _restart_for_overflow(false),
 494   _concurrent_marking_in_progress(false),
 495   _should_gray_objects(false),
 496 
 497   // _verbose_level set below
 498 
 499   _init_times(),
 500   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 501   _cleanup_times(),
 502   _total_counting_time(0.0),
 503   _total_rs_scrub_time(0.0),
 504 
 505   _parallel_workers(NULL) {




 506   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 507   if (verbose_level < no_verbose) {
 508     verbose_level = no_verbose;
 509   }
 510   if (verbose_level > high_verbose) {
 511     verbose_level = high_verbose;
 512   }
 513   _verbose_level = verbose_level;
 514 
 515   if (verbose_low()) {
 516     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 517                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 518   }
 519 
 520   _markStack.allocate(MarkStackSize);
 521   _regionStack.allocate(G1MarkRegionStackSize);
 522 
 523   // Create & start a ConcurrentMark thread.
 524   _cmThread = new ConcurrentMarkThread(this);
 525   assert(cmThread() != NULL, "CM Thread should have been created");
 526   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 527 
 528   _g1h = G1CollectedHeap::heap();
 529   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 530   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 531   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 532 
 533   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 534   satb_qs.set_buffer_size(G1SATBBufferSize);
 535 
 536   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 537   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 538 





 539   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 540   _active_tasks = _max_task_num;
 541   for (int i = 0; i < (int) _max_task_num; ++i) {
 542     CMTaskQueue* task_queue = new CMTaskQueue();
 543     task_queue->initialize();
 544     _task_queues->register_queue(i, task_queue);
 545 
 546     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 547     _accum_task_vtime[i] = 0.0;



 548   }
 549 
 550   if (ConcGCThreads > ParallelGCThreads) {
 551     vm_exit_during_initialization("Can't have more ConcGCThreads "
 552                                   "than ParallelGCThreads.");
 553   }
 554   if (ParallelGCThreads == 0) {
 555     // if we are not running with any parallel GC threads we will not
 556     // spawn any marking threads either
 557     _parallel_marking_threads =   0;
 558     _sleep_factor             = 0.0;
 559     _marking_task_overhead    = 1.0;
 560   } else {
 561     if (ConcGCThreads > 0) {
 562       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 563       // if both are set
 564 
 565       _parallel_marking_threads = ConcGCThreads;
 566       _sleep_factor             = 0.0;
 567       _marking_task_overhead    = 1.0;


 649   // do nothing.
 650 }
 651 
 652 void ConcurrentMark::reset() {
 653   // Starting values for these two. This should be called in a STW
 654   // phase. CM will be notified of any future g1_committed expansions
 655   // will be at the end of evacuation pauses, when tasks are
 656   // inactive.
 657   MemRegion committed = _g1h->g1_committed();
 658   _heap_start = committed.start();
 659   _heap_end   = committed.end();
 660 
 661   // Separated the asserts so that we know which one fires.
 662   assert(_heap_start != NULL, "heap bounds should look ok");
 663   assert(_heap_end != NULL, "heap bounds should look ok");
 664   assert(_heap_start < _heap_end, "heap bounds should look ok");
 665 
 666   // reset all the marking data structures and any necessary flags
 667   clear_marking_state();
 668 


 669   if (verbose_low()) {
 670     gclog_or_tty->print_cr("[global] resetting");
 671   }
 672 
 673   // We do reset all of them, since different phases will use
 674   // different number of active threads. So, it's easiest to have all
 675   // of them ready.
 676   for (int i = 0; i < (int) _max_task_num; ++i) {
 677     _tasks[i]->reset(_nextMarkBitMap);
 678   }
 679 
 680   // we need this to make sure that the flag is on during the evac
 681   // pause with initial mark piggy-backed
 682   set_concurrent_marking_in_progress();
 683 }
 684 
 685 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 686   assert(active_tasks <= _max_task_num, "we should not have more");
 687 
 688   _active_tasks = active_tasks;


 704     // false before we start remark. At this point we should also be
 705     // in a STW phase.
 706     assert(!concurrent_marking_in_progress(), "invariant");
 707     assert(_finger == _heap_end, "only way to get here");
 708     update_g1_committed(true);
 709   }
 710 }
 711 
 712 void ConcurrentMark::set_non_marking_state() {
 713   // We set the global marking state to some default values when we're
 714   // not doing marking.
 715   clear_marking_state();
 716   _active_tasks = 0;
 717   clear_concurrent_marking_in_progress();
 718 }
 719 
 720 ConcurrentMark::~ConcurrentMark() {
 721   for (int i = 0; i < (int) _max_task_num; ++i) {
 722     delete _task_queues->queue(i);
 723     delete _tasks[i];



 724   }

 725   delete _task_queues;
 726   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);




 727 }
 728 
 729 // This closure is used to mark refs into the g1 generation
 730 // from external roots in the CMS bit map.
 731 // Called at the first checkpoint.
 732 //
 733 
 734 void ConcurrentMark::clearNextBitmap() {
 735   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 736   G1CollectorPolicy* g1p = g1h->g1_policy();
 737 
 738   // Make sure that the concurrent mark thread looks to still be in
 739   // the current cycle.
 740   guarantee(cmThread()->during_cycle(), "invariant");
 741 
 742   // We are finishing up the current cycle by clearing the next
 743   // marking bitmap and getting it ready for the next cycle. During
 744   // this time no other cycle can start. So, let's make sure that this
 745   // is the case.
 746   guarantee(!g1h->mark_in_progress(), "invariant");


 925 
 926 void ForceOverflowSettings::update() {
 927   if (_num_remaining > 0) {
 928     _num_remaining -= 1;
 929     _force = true;
 930   } else {
 931     _force = false;
 932   }
 933 }
 934 
 935 bool ForceOverflowSettings::should_force() {
 936   if (_force) {
 937     _force = false;
 938     return true;
 939   } else {
 940     return false;
 941   }
 942 }
 943 #endif // !PRODUCT
 944 
 945 void ConcurrentMark::grayRoot(oop p) {
 946   HeapWord* addr = (HeapWord*) p;
 947   // We can't really check against _heap_start and _heap_end, since it
 948   // is possible during an evacuation pause with piggy-backed
 949   // initial-mark that the committed space is expanded during the
 950   // pause without CM observing this change. So the assertions below
 951   // is a bit conservative; but better than nothing.
 952   assert(_g1h->g1_committed().contains(addr),
 953          "address should be within the heap bounds");
 954 
 955   if (!_nextMarkBitMap->isMarked(addr)) {
 956     _nextMarkBitMap->parMark(addr);



 957   }
 958 }
 959 
 960 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 961   // The objects on the region have already been marked "in bulk" by
 962   // the caller. We only need to decide whether to push the region on
 963   // the region stack or not.
 964 
 965   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
 966     // We're done with marking and waiting for remark. We do not need to
 967     // push anything else on the region stack.
 968     return;
 969   }
 970 
 971   HeapWord* finger = _finger;
 972 
 973   if (verbose_low()) {
 974     gclog_or_tty->print_cr("[global] attempting to push "
 975                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 976                            PTR_FORMAT, mr.start(), mr.end(), finger);


 985     assert(mr.start() <= mr.end(),
 986            "region boundaries should fall within the committed space");
 987     assert(_heap_start <= mr.start(),
 988            "region boundaries should fall within the committed space");
 989     assert(mr.end() <= _heap_end,
 990            "region boundaries should fall within the committed space");
 991     if (verbose_low()) {
 992       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
 993                              "below the finger, pushing it",
 994                              mr.start(), mr.end());
 995     }
 996 
 997     if (!region_stack_push_lock_free(mr)) {
 998       if (verbose_low()) {
 999         gclog_or_tty->print_cr("[global] region stack has overflown.");
1000       }
1001     }
1002   }
1003 }
1004 
1005 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1006   // The object is not marked by the caller. We need to at least mark
1007   // it and maybe push in on the stack.
1008 
1009   HeapWord* addr = (HeapWord*)p;
1010   if (!_nextMarkBitMap->isMarked(addr)) {
1011     // We definitely need to mark it, irrespective whether we bail out
1012     // because we're done with marking.
1013     if (_nextMarkBitMap->parMark(addr)) {



1014       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1015         // If we're done with concurrent marking and we're waiting for
1016         // remark, then we're not pushing anything on the stack.
1017         return;
1018       }
1019 
1020       // No OrderAccess:store_load() is needed. It is implicit in the
1021       // CAS done in parMark(addr) above
1022       HeapWord* finger = _finger;
1023 
1024       if (addr < finger) {
1025         if (!mark_stack_push(oop(addr))) {
1026           if (verbose_low()) {
1027             gclog_or_tty->print_cr("[global] global stack overflow "
1028                                    "during parMark");
1029           }
1030         }
1031       }
1032     }
1033   }


1156   G1CollectorPolicy* g1p = g1h->g1_policy();
1157   g1p->record_concurrent_mark_remark_start();
1158 
1159   double start = os::elapsedTime();
1160 
1161   checkpointRootsFinalWork();
1162 
1163   double mark_work_end = os::elapsedTime();
1164 
1165   weakRefsWork(clear_all_soft_refs);
1166 
1167   if (has_overflown()) {
1168     // Oops.  We overflowed.  Restart concurrent marking.
1169     _restart_for_overflow = true;
1170     // Clear the flag. We do not need it any more.
1171     clear_has_overflown();
1172     if (G1TraceMarkStackOverflow) {
1173       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1174     }
1175   } else {




1176     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1177     // We're done with marking.
1178     // This is the end of  the marking cycle, we're expected all
1179     // threads to have SATB queues with active set to true.
1180     satb_mq_set.set_active_all_threads(false, /* new active value */
1181                                        true /* expected_active */);
1182 
1183     if (VerifyDuringGC) {
1184       HandleMark hm;  // handle scope
1185       gclog_or_tty->print(" VerifyDuringGC:(after)");
1186       Universe::heap()->prepare_for_verify();
1187       Universe::verify(/* allow dirty */ true,
1188                        /* silent      */ false,
1189                        /* option      */ VerifyOption_G1UseNextMarking);
1190     }
1191     assert(!restart_for_overflow(), "sanity");
1192   }
1193 
1194   // Reset the marking state if marking completed
1195   if (!restart_for_overflow()) {
1196     set_non_marking_state();
1197   }
1198 
1199 #if VERIFY_OBJS_PROCESSED
1200   _scan_obj_cl.objs_processed = 0;
1201   ThreadLocalObjQueue::objs_enqueued = 0;
1202 #endif
1203 
1204   // Statistics
1205   double now = os::elapsedTime();
1206   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1207   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1208   _remark_times.add((now - start) * 1000.0);
1209 
1210   g1p->record_concurrent_mark_remark_end();
1211 }
1212 
1213 #define CARD_BM_TEST_MODE 0
1214 


1215 class CalcLiveObjectsClosure: public HeapRegionClosure {
1216 
1217   CMBitMapRO* _bm;
1218   ConcurrentMark* _cm;
1219   bool _changed;
1220   bool _yield;
1221   size_t _words_done;

1222   size_t _tot_live;
1223   size_t _tot_used;
1224   size_t _regions_done;
1225   double _start_vtime_sec;
1226 
1227   BitMap* _region_bm;
1228   BitMap* _card_bm;
1229   intptr_t _bottom_card_num;
1230   bool _final;
1231 
1232   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1233     for (intptr_t i = start_card_num; i <= last_card_num; i++) {



1234 #if CARD_BM_TEST_MODE
1235       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1236 #else
1237       _card_bm->par_at_put(i - _bottom_card_num, 1);
1238 #endif
1239     }
1240   }
1241 
1242 public:
1243   CalcLiveObjectsClosure(bool final,
1244                          CMBitMapRO *bm, ConcurrentMark *cm,
1245                          BitMap* region_bm, BitMap* card_bm) :
1246     _bm(bm), _cm(cm), _changed(false), _yield(true),
1247     _words_done(0), _tot_live(0), _tot_used(0),
1248     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1249     _regions_done(0), _start_vtime_sec(0.0)
1250   {
1251     _bottom_card_num =
1252       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1253                CardTableModRefBS::card_shift);
1254   }
1255 
1256   // It takes a region that's not empty (i.e., it has at least one
1257   // live object in it and sets its corresponding bit on the region
1258   // bitmap to 1. If the region is "starts humongous" it will also set
1259   // to 1 the bits on the region bitmap that correspond to its
1260   // associated "continues humongous" regions.
1261   void set_bit_for_region(HeapRegion* hr) {
1262     assert(!hr->continuesHumongous(), "should have filtered those out");
1263 
1264     size_t index = hr->hrs_index();
1265     if (!hr->startsHumongous()) {
1266       // Normal (non-humongous) case: just set the bit.
1267       _region_bm->par_at_put((BitMap::idx_t) index, true);
1268     } else {
1269       // Starts humongous case: calculate how many regions are part of
1270       // this humongous region and then set the bit range. It might
1271       // have been a bit more efficient to look at the object that
1272       // spans these humongous regions to calculate their number from
1273       // the object's size. However, it's a good idea to calculate
1274       // this based on the metadata itself, and not the region
1275       // contents, so that this code is not aware of what goes into
1276       // the humongous regions (in case this changes in the future).
1277       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1278       size_t end_index = index + 1;
1279       while (end_index < g1h->n_regions()) {
1280         HeapRegion* chr = g1h->region_at(end_index);
1281         if (!chr->continuesHumongous()) break;
1282         end_index += 1;
1283       }
1284       _region_bm->par_at_put_range((BitMap::idx_t) index,
1285                                    (BitMap::idx_t) end_index, true);
1286     }
1287   }
1288 
1289   bool doHeapRegion(HeapRegion* hr) {
1290     if (!_final && _regions_done == 0) {
1291       _start_vtime_sec = os::elapsedVTime();
1292     }
1293 
1294     if (hr->continuesHumongous()) {
1295       // We will ignore these here and process them when their
1296       // associated "starts humongous" region is processed (see
1297       // set_bit_for_heap_region()). Note that we cannot rely on their
1298       // associated "starts humongous" region to have their bit set to
1299       // 1 since, due to the region chunking in the parallel region
1300       // iteration, a "continues humongous" region might be visited
1301       // before its associated "starts humongous".
1302       return false;
1303     }
1304 
1305     HeapWord* nextTop = hr->next_top_at_mark_start();
1306     HeapWord* start   = hr->top_at_conc_mark_count();
1307     assert(hr->bottom() <= start && start <= hr->end() &&
1308            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1309            start <= nextTop,
1310            "Preconditions.");
1311     // Otherwise, record the number of word's we'll examine.

1312     size_t words_done = (nextTop - start);

1313     // Find the first marked object at or after "start".
1314     start = _bm->getNextMarkedWordAddress(start, nextTop);

1315     size_t marked_bytes = 0;

1316 
1317     // Below, the term "card num" means the result of shifting an address
1318     // by the card shift -- address 0 corresponds to card number 0.  One
1319     // must subtract the card num of the bottom of the heap to obtain a
1320     // card table index.

1321     // The first card num of the sequence of live cards currently being
1322     // constructed.  -1 ==> no sequence.
1323     intptr_t start_card_num = -1;

1324     // The last card num of the sequence of live cards currently being
1325     // constructed.  -1 ==> no sequence.
1326     intptr_t last_card_num = -1;
1327 
1328     while (start < nextTop) {
1329       if (_yield && _cm->do_yield_check()) {
1330         // We yielded.  It might be for a full collection, in which case
1331         // all bets are off; terminate the traversal.
1332         if (_cm->has_aborted()) {
1333           _changed = false;
1334           return true;
1335         } else {
1336           // Otherwise, it might be a collection pause, and the region
1337           // we're looking at might be in the collection set.  We'll
1338           // abandon this region.
1339           return false;
1340         }
1341       }
1342       oop obj = oop(start);
1343       int obj_sz = obj->size();

1344       // The card num of the start of the current object.
1345       intptr_t obj_card_num =
1346         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1347 
1348       HeapWord* obj_last = start + obj_sz - 1;
1349       intptr_t obj_last_card_num =
1350         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1351 
1352       if (obj_card_num != last_card_num) {
1353         if (start_card_num == -1) {
1354           assert(last_card_num == -1, "Both or neither.");
1355           start_card_num = obj_card_num;
1356         } else {
1357           assert(last_card_num != -1, "Both or neither.");
1358           assert(obj_card_num >= last_card_num, "Inv");
1359           if ((obj_card_num - last_card_num) > 1) {
1360             // Mark the last run, and start a new one.
1361             mark_card_num_range(start_card_num, last_card_num);
1362             start_card_num = obj_card_num;
1363           }
1364         }
1365 #if CARD_BM_TEST_MODE
1366         /*
1367         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1368                                obj_card_num - _bottom_card_num,
1369                                obj_last_card_num - _bottom_card_num);
1370         */
1371         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1372           _card_bm->par_at_put(j - _bottom_card_num, 1);
1373         }
1374 #endif
1375       }
1376       // In any case, we set the last card num.
1377       last_card_num = obj_last_card_num;
1378 
1379       marked_bytes += (size_t)obj_sz * HeapWordSize;

1380       // Find the next marked object after this one.
1381       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1382       _changed = true;
1383     }

1384     // Handle the last range, if any.
1385     if (start_card_num != -1) {
1386       mark_card_num_range(start_card_num, last_card_num);
1387     }
1388     if (_final) {
1389       // Mark the allocated-since-marking portion...
1390       HeapWord* tp = hr->top();
1391       if (nextTop < tp) {
1392         start_card_num =
1393           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1394         last_card_num =
1395           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1396         mark_card_num_range(start_card_num, last_card_num);

1397         // This definitely means the region has live objects.
1398         set_bit_for_region(hr);
1399       }
1400     }
1401 
1402     hr->add_to_marked_bytes(marked_bytes);
1403     // Update the live region bitmap.
1404     if (marked_bytes > 0) {
1405       set_bit_for_region(hr);
1406     }
1407     hr->set_top_at_conc_mark_count(nextTop);




1408     _tot_live += hr->next_live_bytes();
1409     _tot_used += hr->used();
1410     _words_done = words_done;
1411 
1412     if (!_final) {
1413       ++_regions_done;
1414       if (_regions_done % 10 == 0) {
1415         double end_vtime_sec = os::elapsedVTime();
1416         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1417         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1418           jlong sleep_time_ms =
1419             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1420           os::sleep(Thread::current(), sleep_time_ms, false);
1421           _start_vtime_sec = end_vtime_sec;
1422         }





























1423       }
1424     }
1425 

















1426     return false;
1427   }
1428 
1429   bool changed() { return _changed;  }
1430   void reset()   { _changed = false; _words_done = 0; }
1431   void no_yield() { _yield = false; }
1432   size_t words_done() { return _words_done; }
1433   size_t tot_live() { return _tot_live; }
1434   size_t tot_used() { return _tot_used; }
1435 };
1436 



1437 
1438 void ConcurrentMark::calcDesiredRegions() {
1439   _region_bm.clear();









1440   _card_bm.clear();
1441   CalcLiveObjectsClosure calccl(false /*final*/,
1442                                 nextMarkBitMap(), this,
1443                                 &_region_bm, &_card_bm);
1444   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1445   g1h->heap_region_iterate(&calccl);
1446 
1447   do {
1448     calccl.reset();
1449     g1h->heap_region_iterate(&calccl);
1450   } while (calccl.changed());












1451 }
1452 


















































































































































































































































































































1453 class G1ParFinalCountTask: public AbstractGangTask {
1454 protected:
1455   G1CollectedHeap* _g1h;
1456   CMBitMap* _bm;
1457   size_t _n_workers;
1458   size_t *_live_bytes;
1459   size_t *_used_bytes;
1460   BitMap* _region_bm;
1461   BitMap* _card_bm;







1462 public:
1463   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1464                       BitMap* region_bm, BitMap* card_bm)
1465     : AbstractGangTask("G1 final counting"), _g1h(g1h),
1466       _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {




1467     if (ParallelGCThreads > 0) {
1468       _n_workers = _g1h->workers()->total_workers();
1469     } else {
1470       _n_workers = 1;
1471     }

1472     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1473     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);





1474   }
1475 
1476   ~G1ParFinalCountTask() {




1477     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1478     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1479   }
1480 
1481   void work(int i) {
1482     CalcLiveObjectsClosure calccl(true /*final*/,
1483                                   _bm, _g1h->concurrent_mark(),
1484                                   _region_bm, _card_bm);
1485     calccl.no_yield();













1486     if (G1CollectedHeap::use_parallel_gc_threads()) {
1487       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1488                                             HeapRegion::FinalCountClaimValue);
1489     } else {
1490       _g1h->heap_region_iterate(&calccl);
1491     }
1492     assert(calccl.complete(), "Shouldn't have yielded!");
1493 
1494     assert((size_t) i < _n_workers, "invariant");
1495     _live_bytes[i] = calccl.tot_live();
1496     _used_bytes[i] = calccl.tot_used();




1497   }

1498   size_t live_bytes()  {
1499     size_t live_bytes = 0;
1500     for (size_t i = 0; i < _n_workers; ++i)
1501       live_bytes += _live_bytes[i];
1502     return live_bytes;
1503   }

1504   size_t used_bytes()  {
1505     size_t used_bytes = 0;
1506     for (size_t i = 0; i < _n_workers; ++i)
1507       used_bytes += _used_bytes[i];
1508     return used_bytes;
1509   }


1510 };
1511 
1512 class G1ParNoteEndTask;
1513 
1514 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1515   G1CollectedHeap* _g1;
1516   int _worker_num;
1517   size_t _max_live_bytes;
1518   size_t _regions_claimed;
1519   size_t _freed_bytes;
1520   FreeRegionList* _local_cleanup_list;
1521   HumongousRegionSet* _humongous_proxy_set;
1522   HRRSCleanupTask* _hrrs_cleanup_task;
1523   double _claimed_region_time;
1524   double _max_region_time;
1525 
1526 public:
1527   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1528                              int worker_num,
1529                              FreeRegionList* local_cleanup_list,


1690   }
1691 
1692   g1h->verify_region_sets_optional();
1693 
1694   if (VerifyDuringGC) {
1695     HandleMark hm;  // handle scope
1696     gclog_or_tty->print(" VerifyDuringGC:(before)");
1697     Universe::heap()->prepare_for_verify();
1698     Universe::verify(/* allow dirty */ true,
1699                      /* silent      */ false,
1700                      /* option      */ VerifyOption_G1UsePrevMarking);
1701   }
1702 
1703   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1704   g1p->record_concurrent_mark_cleanup_start();
1705 
1706   double start = os::elapsedTime();
1707 
1708   HeapRegionRemSet::reset_for_cleanup_tasks();
1709 




1710   // Do counting once more with the world stopped for good measure.
1711   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1712                                         &_region_bm, &_card_bm);

1713   if (G1CollectedHeap::use_parallel_gc_threads()) {
1714     assert(g1h->check_heap_region_claim_values(
1715                                                HeapRegion::InitialClaimValue),
1716            "sanity check");
1717 
1718     int n_workers = g1h->workers()->total_workers();
1719     g1h->set_par_threads(n_workers);
1720     g1h->workers()->run_task(&g1_par_count_task);
1721     g1h->set_par_threads(0);
1722 
1723     assert(g1h->check_heap_region_claim_values(
1724                                              HeapRegion::FinalCountClaimValue),
1725            "sanity check");
1726   } else {
1727     g1_par_count_task.work(0);
1728   }
1729 






1730   size_t known_garbage_bytes =
1731     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1732   g1p->set_known_garbage_bytes(known_garbage_bytes);
1733 
1734   size_t start_used_bytes = g1h->used();
1735   _at_least_one_mark_complete = true;
1736   g1h->set_marking_complete();
1737 
1738   ergo_verbose4(ErgoConcCycles,
1739            "finish cleanup",
1740            ergo_format_byte("occupancy")
1741            ergo_format_byte("capacity")
1742            ergo_format_byte_perc("known garbage"),
1743            start_used_bytes, g1h->capacity(),
1744            known_garbage_bytes,
1745            ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1746 
1747   double count_end = os::elapsedTime();
1748   double this_final_counting_time = (count_end - start);
1749   if (G1PrintParCleanupStats) {


1912   }
1913   assert(tmp_free_list.is_empty(), "post-condition");
1914 }
1915 
1916 // Support closures for reference procssing in G1
1917 
1918 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1919   HeapWord* addr = (HeapWord*)obj;
1920   return addr != NULL &&
1921          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1922 }
1923 
1924 class G1CMKeepAliveClosure: public OopClosure {
1925   G1CollectedHeap* _g1;
1926   ConcurrentMark*  _cm;
1927   CMBitMap*        _bitMap;
1928  public:
1929   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1930                        CMBitMap* bitMap) :
1931     _g1(g1), _cm(cm),
1932     _bitMap(bitMap) {}



1933 
1934   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1935   virtual void do_oop(      oop* p) { do_oop_work(p); }
1936 
1937   template <class T> void do_oop_work(T* p) {
1938     oop obj = oopDesc::load_decode_heap_oop(p);
1939     HeapWord* addr = (HeapWord*)obj;
1940 
1941     if (_cm->verbose_high()) {
1942       gclog_or_tty->print_cr("\t[0] we're looking at location "
1943                              "*"PTR_FORMAT" = "PTR_FORMAT,
1944                              p, (void*) obj);
1945     }
1946 
1947     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1948       _bitMap->mark(addr);



1949       _cm->mark_stack_push(obj);
1950     }
1951   }
1952 };
1953 
1954 class G1CMDrainMarkingStackClosure: public VoidClosure {
1955   CMMarkStack*                  _markStack;
1956   CMBitMap*                     _bitMap;
1957   G1CMKeepAliveClosure*         _oopClosure;
1958  public:
1959   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1960                                G1CMKeepAliveClosure* oopClosure) :
1961     _bitMap(bitMap),
1962     _markStack(markStack),
1963     _oopClosure(oopClosure)
1964   {}
1965 
1966   void do_void() {
1967     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1968   }


2579                            (void*) obj);
2580   }
2581 
2582   HeapWord* objAddr = (HeapWord*) obj;
2583   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2584   if (_g1h->is_in_g1_reserved(objAddr)) {
2585     assert(obj != NULL, "null check is implicit");
2586     if (!_nextMarkBitMap->isMarked(objAddr)) {
2587       // Only get the containing region if the object is not marked on the
2588       // bitmap (otherwise, it's a waste of time since we won't do
2589       // anything with it).
2590       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2591       if (!hr->obj_allocated_since_next_marking(obj)) {
2592         if (verbose_high()) {
2593           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2594                                  "marked", (void*) obj);
2595         }
2596 
2597         // we need to mark it first
2598         if (_nextMarkBitMap->parMark(objAddr)) {



2599           // No OrderAccess:store_load() is needed. It is implicit in the
2600           // CAS done in parMark(objAddr) above
2601           HeapWord* finger = _finger;
2602           if (objAddr < finger) {
2603             if (verbose_high()) {
2604               gclog_or_tty->print_cr("[global] below the global finger "
2605                                      "("PTR_FORMAT"), pushing it", finger);
2606             }
2607             if (!mark_stack_push(obj)) {
2608               if (verbose_low()) {
2609                 gclog_or_tty->print_cr("[global] global stack overflow during "
2610                                        "deal_with_reference");
2611               }
2612             }
2613           }
2614         }
2615       }
2616     }
2617   }
2618 }


2825 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2826   _markStack.setEmpty();
2827   _markStack.clear_overflow();
2828   _regionStack.setEmpty();
2829   _regionStack.clear_overflow();
2830   if (clear_overflow) {
2831     clear_has_overflown();
2832   } else {
2833     assert(has_overflown(), "pre-condition");
2834   }
2835   _finger = _heap_start;
2836 
2837   for (int i = 0; i < (int)_max_task_num; ++i) {
2838     OopTaskQueue* queue = _task_queues->queue(i);
2839     queue->set_empty();
2840     // Clear any partial regions from the CMTasks
2841     _tasks[i]->clear_aborted_region();
2842   }
2843 }
2844 






















































































































































































2845 void ConcurrentMark::print_stats() {
2846   if (verbose_stats()) {
2847     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2848     for (size_t i = 0; i < _active_tasks; ++i) {
2849       _tasks[i]->print_stats();
2850       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2851     }
2852   }
2853 }
2854 
2855 class CSMarkOopClosure: public OopClosure {
2856   friend class CSMarkBitMapClosure;
2857 
2858   G1CollectedHeap* _g1h;
2859   CMBitMap*        _bm;
2860   ConcurrentMark*  _cm;
2861   oop*             _ms;
2862   jint*            _array_ind_stack;
2863   int              _ms_size;
2864   int              _ms_ind;


2930 
2931   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2932   virtual void do_oop(      oop* p) { do_oop_work(p); }
2933 
2934   template <class T> void do_oop_work(T* p) {
2935     T heap_oop = oopDesc::load_heap_oop(p);
2936     if (oopDesc::is_null(heap_oop)) return;
2937     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2938     if (obj->is_forwarded()) {
2939       // If the object has already been forwarded, we have to make sure
2940       // that it's marked.  So follow the forwarding pointer.  Note that
2941       // this does the right thing for self-forwarding pointers in the
2942       // evacuation failure case.
2943       obj = obj->forwardee();
2944     }
2945     HeapRegion* hr = _g1h->heap_region_containing(obj);
2946     if (hr != NULL) {
2947       if (hr->in_collection_set()) {
2948         if (_g1h->is_obj_ill(obj)) {
2949           _bm->mark((HeapWord*)obj);



2950           if (!push(obj)) {
2951             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2952             set_abort();
2953           }
2954         }
2955       } else {
2956         // Outside the collection set; we need to gray it
2957         _cm->deal_with_reference(obj);
2958       }
2959     }
2960   }
2961 };
2962 
2963 class CSMarkBitMapClosure: public BitMapClosure {
2964   G1CollectedHeap* _g1h;
2965   CMBitMap*        _bitMap;
2966   ConcurrentMark*  _cm;
2967   CSMarkOopClosure _oop_cl;
2968 public:
2969   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :


3011 
3012   ~CompleteMarkingInCSHRClosure() {}
3013 
3014   bool doHeapRegion(HeapRegion* r) {
3015     if (!r->evacuation_failed()) {
3016       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3017       if (!mr.is_empty()) {
3018         if (!_bm->iterate(&_bit_cl, mr)) {
3019           _completed = false;
3020           return true;
3021         }
3022       }
3023     }
3024     return false;
3025   }
3026 
3027   bool completed() { return _completed; }
3028 };
3029 
3030 class ClearMarksInHRClosure: public HeapRegionClosure {

3031   CMBitMap* _bm;
3032 public:
3033   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }


3034 
3035   bool doHeapRegion(HeapRegion* r) {
3036     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3037       MemRegion usedMR = r->used_region();
3038       _bm->clearRange(r->used_region());


3039     }
3040     return false;
3041   }
3042 };
3043 
3044 void ConcurrentMark::complete_marking_in_collection_set() {
3045   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3046 
3047   if (!g1h->mark_in_progress()) {
3048     g1h->g1_policy()->record_mark_closure_time(0.0);
3049     return;
3050   }
3051 
3052   int i = 1;
3053   double start = os::elapsedTime();
3054   while (true) {
3055     i++;
3056     CompleteMarkingInCSHRClosure cmplt(this);
3057     g1h->collection_set_iterate(&cmplt);
3058     if (cmplt.completed()) break;
3059   }
3060   double end_time = os::elapsedTime();
3061   double elapsed_time_ms = (end_time - start) * 1000.0;
3062   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3063 
3064   ClearMarksInHRClosure clr(nextMarkBitMap());
3065   g1h->collection_set_iterate(&clr);
3066 }
3067 
3068 // The next two methods deal with the following optimisation. Some
3069 // objects are gray by being marked and located above the finger. If
3070 // they are copied, during an evacuation pause, below the finger then
3071 // the need to be pushed on the stack. The observation is that, if
3072 // there are no regions in the collection set located above the
3073 // finger, then the above cannot happen, hence we do not need to
3074 // explicitly gray any objects when copying them to below the
3075 // finger. The global stack will be scanned to ensure that, if it
3076 // points to objects being copied, it will update their
3077 // location. There is a tricky situation with the gray objects in
3078 // region stack that are being coped, however. See the comment in
3079 // newCSet().
3080 
3081 void ConcurrentMark::newCSet() {
3082   if (!concurrent_marking_in_progress()) {
3083     // nothing to do if marking is not in progress
3084     return;


3186     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3187 
3188   }
3189   print_ms_time_info("  ", "cleanups", _cleanup_times);
3190   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3191                          _total_counting_time,
3192                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3193                           (double)_cleanup_times.num()
3194                          : 0.0));
3195   if (G1ScrubRemSets) {
3196     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3197                            _total_rs_scrub_time,
3198                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3199                             (double)_cleanup_times.num()
3200                            : 0.0));
3201   }
3202   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3203                          (_init_times.sum() + _remark_times.sum() +
3204                           _cleanup_times.sum())/1000.0);
3205   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3206                 "(%8.2f s marking, %8.2f s counting).",
3207                 cmThread()->vtime_accum(),
3208                 cmThread()->vtime_mark_accum(),
3209                 cmThread()->vtime_count_accum());
3210 }
3211 
3212 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3213   _parallel_workers->print_worker_threads_on(st);
3214 }
3215 
3216 // Closures
3217 // XXX: there seems to be a lot of code  duplication here;
3218 // should refactor and consolidate the shared code.
3219 
3220 // This closure is used to mark refs into the CMS generation in
3221 // the CMS bit map. Called at the first checkpoint.
3222 
3223 // We take a break if someone is trying to stop the world.
3224 bool ConcurrentMark::do_yield_check(int worker_i) {
3225   if (should_yield()) {
3226     if (worker_i == 0) {
3227       _g1h->g1_policy()->record_concurrent_pause();
3228     }
3229     cmThread()->yield();




 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 462                                int max_regions) :
 463   _markBitMap1(rs, MinObjAlignment - 1),
 464   _markBitMap2(rs, MinObjAlignment - 1),
 465 
 466   _parallel_marking_threads(0),
 467   _sleep_factor(0.0),
 468   _marking_task_overhead(1.0),
 469   _cleanup_sleep_factor(0.0),
 470   _cleanup_task_overhead(1.0),
 471   _cleanup_list("Cleanup List"),
 472   _region_bm(max_regions, false /* in_resource_area*/),
 473   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 474            CardTableModRefBS::card_shift,
 475            false /* in_resource_area*/),
 476 
 477   _prevMarkBitMap(&_markBitMap1),
 478   _nextMarkBitMap(&_markBitMap2),
 479   _at_least_one_mark_complete(false),
 480 
 481   _markStack(this),
 482   _regionStack(),
 483   // _finger set in set_non_marking_state
 484 
 485   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 486   // _active_tasks set in set_non_marking_state
 487   // _tasks set inside the constructor
 488   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 489   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 490 
 491   _has_overflown(false),
 492   _concurrent(false),
 493   _has_aborted(false),
 494   _restart_for_overflow(false),
 495   _concurrent_marking_in_progress(false),
 496   _should_gray_objects(false),
 497 
 498   // _verbose_level set below
 499 
 500   _init_times(),
 501   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 502   _cleanup_times(),
 503   _total_counting_time(0.0),
 504   _total_rs_scrub_time(0.0),
 505 
 506   _parallel_workers(NULL),
 507 
 508   _count_card_bitmaps(NULL),
 509   _count_marked_bytes(NULL)
 510 {
 511   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 512   if (verbose_level < no_verbose) {
 513     verbose_level = no_verbose;
 514   }
 515   if (verbose_level > high_verbose) {
 516     verbose_level = high_verbose;
 517   }
 518   _verbose_level = verbose_level;
 519 
 520   if (verbose_low()) {
 521     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 522                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 523   }
 524 
 525   _markStack.allocate(MarkStackSize);
 526   _regionStack.allocate(G1MarkRegionStackSize);
 527 
 528   // Create & start a ConcurrentMark thread.
 529   _cmThread = new ConcurrentMarkThread(this);
 530   assert(cmThread() != NULL, "CM Thread should have been created");
 531   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 532 
 533   _g1h = G1CollectedHeap::heap();
 534   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 535   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 536   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 537 
 538   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 539   satb_qs.set_buffer_size(G1SATBBufferSize);
 540 
 541   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 542   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 543 
 544   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num);
 545   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
 546 
 547   BitMap::idx_t card_bm_size = _card_bm.size();
 548 
 549   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 550   _active_tasks = _max_task_num;
 551   for (int i = 0; i < (int) _max_task_num; ++i) {
 552     CMTaskQueue* task_queue = new CMTaskQueue();
 553     task_queue->initialize();
 554     _task_queues->register_queue(i, task_queue);
 555 
 556     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 557     _accum_task_vtime[i] = 0.0;
 558 
 559     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 560     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
 561   }
 562 
 563   if (ConcGCThreads > ParallelGCThreads) {
 564     vm_exit_during_initialization("Can't have more ConcGCThreads "
 565                                   "than ParallelGCThreads.");
 566   }
 567   if (ParallelGCThreads == 0) {
 568     // if we are not running with any parallel GC threads we will not
 569     // spawn any marking threads either
 570     _parallel_marking_threads =   0;
 571     _sleep_factor             = 0.0;
 572     _marking_task_overhead    = 1.0;
 573   } else {
 574     if (ConcGCThreads > 0) {
 575       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 576       // if both are set
 577 
 578       _parallel_marking_threads = ConcGCThreads;
 579       _sleep_factor             = 0.0;
 580       _marking_task_overhead    = 1.0;


 662   // do nothing.
 663 }
 664 
 665 void ConcurrentMark::reset() {
 666   // Starting values for these two. This should be called in a STW
 667   // phase. CM will be notified of any future g1_committed expansions
 668   // will be at the end of evacuation pauses, when tasks are
 669   // inactive.
 670   MemRegion committed = _g1h->g1_committed();
 671   _heap_start = committed.start();
 672   _heap_end   = committed.end();
 673 
 674   // Separated the asserts so that we know which one fires.
 675   assert(_heap_start != NULL, "heap bounds should look ok");
 676   assert(_heap_end != NULL, "heap bounds should look ok");
 677   assert(_heap_start < _heap_end, "heap bounds should look ok");
 678 
 679   // reset all the marking data structures and any necessary flags
 680   clear_marking_state();
 681 
 682   clear_all_count_data();
 683 
 684   if (verbose_low()) {
 685     gclog_or_tty->print_cr("[global] resetting");
 686   }
 687 
 688   // We do reset all of them, since different phases will use
 689   // different number of active threads. So, it's easiest to have all
 690   // of them ready.
 691   for (int i = 0; i < (int) _max_task_num; ++i) {
 692     _tasks[i]->reset(_nextMarkBitMap);
 693   }
 694 
 695   // we need this to make sure that the flag is on during the evac
 696   // pause with initial mark piggy-backed
 697   set_concurrent_marking_in_progress();
 698 }
 699 
 700 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 701   assert(active_tasks <= _max_task_num, "we should not have more");
 702 
 703   _active_tasks = active_tasks;


 719     // false before we start remark. At this point we should also be
 720     // in a STW phase.
 721     assert(!concurrent_marking_in_progress(), "invariant");
 722     assert(_finger == _heap_end, "only way to get here");
 723     update_g1_committed(true);
 724   }
 725 }
 726 
 727 void ConcurrentMark::set_non_marking_state() {
 728   // We set the global marking state to some default values when we're
 729   // not doing marking.
 730   clear_marking_state();
 731   _active_tasks = 0;
 732   clear_concurrent_marking_in_progress();
 733 }
 734 
 735 ConcurrentMark::~ConcurrentMark() {
 736   for (int i = 0; i < (int) _max_task_num; ++i) {
 737     delete _task_queues->queue(i);
 738     delete _tasks[i];
 739 
 740     _count_card_bitmaps[i].resize(0, false);
 741     FREE_C_HEAP_ARRAY(size_t, _count_marked_bytes[i]);
 742   }
 743 
 744   delete _task_queues;
 745   FREE_C_HEAP_ARRAY(CMTask*, _tasks);
 746   FREE_C_HEAP_ARRAY(double, _accum_task_vtime);
 747 
 748   FREE_C_HEAP_ARRAY(BitMap*, _count_card_bitmaps);
 749   FREE_C_HEAP_ARRAY(size_t*, _count_marked_bytes);
 750 }
 751 
 752 // This closure is used to mark refs into the g1 generation
 753 // from external roots in the CMS bit map.
 754 // Called at the first checkpoint.
 755 //
 756 
 757 void ConcurrentMark::clearNextBitmap() {
 758   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 759   G1CollectorPolicy* g1p = g1h->g1_policy();
 760 
 761   // Make sure that the concurrent mark thread looks to still be in
 762   // the current cycle.
 763   guarantee(cmThread()->during_cycle(), "invariant");
 764 
 765   // We are finishing up the current cycle by clearing the next
 766   // marking bitmap and getting it ready for the next cycle. During
 767   // this time no other cycle can start. So, let's make sure that this
 768   // is the case.
 769   guarantee(!g1h->mark_in_progress(), "invariant");


 948 
 949 void ForceOverflowSettings::update() {
 950   if (_num_remaining > 0) {
 951     _num_remaining -= 1;
 952     _force = true;
 953   } else {
 954     _force = false;
 955   }
 956 }
 957 
 958 bool ForceOverflowSettings::should_force() {
 959   if (_force) {
 960     _force = false;
 961     return true;
 962   } else {
 963     return false;
 964   }
 965 }
 966 #endif // !PRODUCT
 967 
 968 void ConcurrentMark::grayRoot(oop p, int worker_i) {
 969   HeapWord* addr = (HeapWord*) p;
 970   // We can't really check against _heap_start and _heap_end, since it
 971   // is possible during an evacuation pause with piggy-backed
 972   // initial-mark that the committed space is expanded during the
 973   // pause without CM observing this change. So the assertions below
 974   // is a bit conservative; but better than nothing.
 975   assert(_g1h->g1_committed().contains(addr),
 976          "address should be within the heap bounds");
 977 
 978   if (!_nextMarkBitMap->isMarked(addr)) {
 979     if (_nextMarkBitMap->parMark(addr)) {
 980       // Update the task specific count data for object p.
 981       add_to_count_data_for(p, worker_i);
 982     }
 983   }
 984 }
 985 
 986 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 987   // The objects on the region have already been marked "in bulk" by
 988   // the caller. We only need to decide whether to push the region on
 989   // the region stack or not.
 990 
 991   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
 992     // We're done with marking and waiting for remark. We do not need to
 993     // push anything else on the region stack.
 994     return;
 995   }
 996 
 997   HeapWord* finger = _finger;
 998 
 999   if (verbose_low()) {
1000     gclog_or_tty->print_cr("[global] attempting to push "
1001                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1002                            PTR_FORMAT, mr.start(), mr.end(), finger);


1011     assert(mr.start() <= mr.end(),
1012            "region boundaries should fall within the committed space");
1013     assert(_heap_start <= mr.start(),
1014            "region boundaries should fall within the committed space");
1015     assert(mr.end() <= _heap_end,
1016            "region boundaries should fall within the committed space");
1017     if (verbose_low()) {
1018       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1019                              "below the finger, pushing it",
1020                              mr.start(), mr.end());
1021     }
1022 
1023     if (!region_stack_push_lock_free(mr)) {
1024       if (verbose_low()) {
1025         gclog_or_tty->print_cr("[global] region stack has overflown.");
1026       }
1027     }
1028   }
1029 }
1030 
1031 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) {
1032   // The object is not marked by the caller. We need to at least mark
1033   // it and maybe push in on the stack.
1034 
1035   HeapWord* addr = (HeapWord*)p;
1036   if (!_nextMarkBitMap->isMarked(addr)) {
1037     // We definitely need to mark it, irrespective whether we bail out
1038     // because we're done with marking.
1039     if (_nextMarkBitMap->parMark(addr)) {
1040       // Update the task specific count data for object p
1041       add_to_count_data_for(p, worker_i);
1042       
1043       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1044         // If we're done with concurrent marking and we're waiting for
1045         // remark, then we're not pushing anything on the stack.
1046         return;
1047       }
1048 
1049       // No OrderAccess:store_load() is needed. It is implicit in the
1050       // CAS done in parMark(addr) above
1051       HeapWord* finger = _finger;
1052 
1053       if (addr < finger) {
1054         if (!mark_stack_push(oop(addr))) {
1055           if (verbose_low()) {
1056             gclog_or_tty->print_cr("[global] global stack overflow "
1057                                    "during parMark");
1058           }
1059         }
1060       }
1061     }
1062   }


1185   G1CollectorPolicy* g1p = g1h->g1_policy();
1186   g1p->record_concurrent_mark_remark_start();
1187 
1188   double start = os::elapsedTime();
1189 
1190   checkpointRootsFinalWork();
1191 
1192   double mark_work_end = os::elapsedTime();
1193 
1194   weakRefsWork(clear_all_soft_refs);
1195 
1196   if (has_overflown()) {
1197     // Oops.  We overflowed.  Restart concurrent marking.
1198     _restart_for_overflow = true;
1199     // Clear the flag. We do not need it any more.
1200     clear_has_overflown();
1201     if (G1TraceMarkStackOverflow) {
1202       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1203     }
1204   } else {
1205     // Aggregate the per-task counting data that we have accumulated
1206     // while marking.
1207     aggregate_all_count_data();
1208 
1209     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1210     // We're done with marking.
1211     // This is the end of  the marking cycle, we're expected all
1212     // threads to have SATB queues with active set to true.
1213     satb_mq_set.set_active_all_threads(false, /* new active value */
1214                                        true /* expected_active */);
1215 
1216     if (VerifyDuringGC) {
1217       HandleMark hm;  // handle scope
1218       gclog_or_tty->print(" VerifyDuringGC:(after)");
1219       Universe::heap()->prepare_for_verify();
1220       Universe::verify(/* allow dirty */ true,
1221                        /* silent      */ false,
1222                        /* option      */ VerifyOption_G1UseNextMarking);
1223     }
1224     assert(!restart_for_overflow(), "sanity");
1225   }
1226 
1227   // Reset the marking state if marking completed
1228   if (!restart_for_overflow()) {
1229     set_non_marking_state();
1230   }
1231 
1232 #if VERIFY_OBJS_PROCESSED
1233   _scan_obj_cl.objs_processed = 0;
1234   ThreadLocalObjQueue::objs_enqueued = 0;
1235 #endif
1236 
1237   // Statistics
1238   double now = os::elapsedTime();
1239   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1240   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1241   _remark_times.add((now - start) * 1000.0);
1242 
1243   g1p->record_concurrent_mark_remark_end();
1244 }
1245 
1246 #define CARD_BM_TEST_MODE 0
1247 
1248 // Used to calculate the # live objects per region
1249 // for verification purposes
1250 class CalcLiveObjectsClosure: public HeapRegionClosure {
1251 
1252   CMBitMapRO* _bm;
1253   ConcurrentMark* _cm;
1254   BitMap* _region_bm;
1255   BitMap* _card_bm;
1256 
1257   size_t _tot_words_done;
1258   size_t _tot_live;
1259   size_t _tot_used;


1260 
1261   size_t _region_marked_bytes;
1262 
1263   intptr_t _bottom_card_num;

1264 
1265   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1266     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1267     BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1268     
1269     for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1270 #if CARD_BM_TEST_MODE
1271       guarantee(_card_bm->at(i), "Should already be set.");
1272 #else
1273       _card_bm->par_at_put(i, 1);
1274 #endif
1275     }
1276   }
1277 
1278 public:
1279   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,

1280                          BitMap* region_bm, BitMap* card_bm) :
1281     _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1282     _region_marked_bytes(0), _tot_words_done(0),
1283     _tot_live(0), _tot_used(0)

1284   {
1285     _bottom_card_num =
1286       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1287                CardTableModRefBS::card_shift);
1288   }
1289 
1290   // It takes a region that's not empty (i.e., it has at least one
1291   // live object in it and sets its corresponding bit on the region
1292   // bitmap to 1. If the region is "starts humongous" it will also set
1293   // to 1 the bits on the region bitmap that correspond to its
1294   // associated "continues humongous" regions.
1295   void set_bit_for_region(HeapRegion* hr) {
1296     assert(!hr->continuesHumongous(), "should have filtered those out");
1297 
1298     size_t index = hr->hrs_index();
1299     if (!hr->startsHumongous()) {
1300       // Normal (non-humongous) case: just set the bit.
1301       _region_bm->par_at_put((BitMap::idx_t) index, true);
1302     } else {
1303       // Starts humongous case: calculate how many regions are part of
1304       // this humongous region and then set the bit range. It might
1305       // have been a bit more efficient to look at the object that
1306       // spans these humongous regions to calculate their number from
1307       // the object's size. However, it's a good idea to calculate
1308       // this based on the metadata itself, and not the region
1309       // contents, so that this code is not aware of what goes into
1310       // the humongous regions (in case this changes in the future).
1311       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1312       size_t end_index = index + 1;
1313       while (end_index < g1h->n_regions()) {
1314         HeapRegion* chr = g1h->region_at(end_index);
1315         if (!chr->continuesHumongous()) break;
1316         end_index += 1;
1317       }
1318       _region_bm->par_at_put_range((BitMap::idx_t) index,
1319                                    (BitMap::idx_t) end_index, true);
1320     }
1321   }
1322 
1323   bool doHeapRegion(HeapRegion* hr) {



1324 
1325     if (hr->continuesHumongous()) {
1326       // We will ignore these here and process them when their
1327       // associated "starts humongous" region is processed (see
1328       // set_bit_for_heap_region()). Note that we cannot rely on their
1329       // associated "starts humongous" region to have their bit set to
1330       // 1 since, due to the region chunking in the parallel region
1331       // iteration, a "continues humongous" region might be visited
1332       // before its associated "starts humongous".
1333       return false;
1334     }
1335 
1336     HeapWord* nextTop = hr->next_top_at_mark_start();
1337     HeapWord* start   = hr->bottom();
1338 
1339     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),

1340                 "Preconditions.");
1341 
1342     // Record the number of word's we'll examine.
1343     size_t words_done = (nextTop - start);
1344 
1345     // Find the first marked object at or after "start".
1346     start = _bm->getNextMarkedWordAddress(start, nextTop);
1347 
1348     size_t marked_bytes = 0;
1349     _region_marked_bytes = 0;
1350 
1351     // Below, the term "card num" means the result of shifting an address
1352     // by the card shift -- address 0 corresponds to card number 0.  One
1353     // must subtract the card num of the bottom of the heap to obtain a
1354     // card table index.
1355 
1356     // The first card num of the sequence of live cards currently being
1357     // constructed.  -1 ==> no sequence.
1358     intptr_t start_card_num = -1;
1359 
1360     // The last card num of the sequence of live cards currently being
1361     // constructed.  -1 ==> no sequence.
1362     intptr_t last_card_num = -1;
1363 
1364     while (start < nextTop) {













1365       oop obj = oop(start);
1366       int obj_sz = obj->size();
1367 
1368       // The card num of the start of the current object.
1369       intptr_t obj_card_num =
1370         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);

1371       HeapWord* obj_last = start + obj_sz - 1;
1372       intptr_t obj_last_card_num =
1373         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1374 
1375       if (obj_card_num != last_card_num) {
1376         if (start_card_num == -1) {
1377           assert(last_card_num == -1, "Both or neither.");
1378           start_card_num = obj_card_num;
1379         } else {
1380           assert(last_card_num != -1, "Both or neither.");
1381           assert(obj_card_num >= last_card_num, "Inv");
1382           if ((obj_card_num - last_card_num) > 1) {
1383             // Mark the last run, and start a new one.
1384             mark_card_num_range(start_card_num, last_card_num);
1385             start_card_num = obj_card_num;
1386           }
1387         }
1388 #if CARD_BM_TEST_MODE
1389         /*
1390         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1391                                obj_card_num - _bottom_card_num,
1392                                obj_last_card_num - _bottom_card_num);
1393         */
1394         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1395           _card_bm->par_at_put(j - _bottom_card_num, 1);
1396         }
1397 #endif
1398       }
1399       // In any case, we set the last card num.
1400       last_card_num = obj_last_card_num;
1401 
1402       marked_bytes += (size_t)obj_sz * HeapWordSize;
1403 
1404       // Find the next marked object after this one.
1405       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);

1406     }
1407 
1408     // Handle the last range, if any.
1409     if (start_card_num != -1) {
1410       mark_card_num_range(start_card_num, last_card_num);
1411     }
1412 
1413     // Mark the allocated-since-marking portion...
1414     HeapWord* top = hr->top();
1415     if (nextTop < top) {
1416       start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1417       last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1418 

1419       mark_card_num_range(start_card_num, last_card_num);
1420       
1421       // This definitely means the region has live objects.
1422       set_bit_for_region(hr);
1423     }

1424 

1425     // Update the live region bitmap.
1426     if (marked_bytes > 0) {
1427       set_bit_for_region(hr);
1428     }
1429 
1430     // Set the marked bytes for the current region so that
1431     // it can be queried by a calling verificiation routine
1432     _region_marked_bytes = marked_bytes;
1433 
1434     _tot_live += hr->next_live_bytes();
1435     _tot_used += hr->used();
1436     _tot_words_done = words_done;
1437 
1438     return false;









1439   }
1440 
1441   size_t region_marked_bytes() const { return _region_marked_bytes; }
1442   size_t tot_words_done() const      { return _tot_words_done; }
1443   size_t tot_live() const            { return _tot_live; }
1444   size_t tot_used() const            { return _tot_used; }
1445 };
1446 
1447 // Aggregate the counting data that was constructed concurrently
1448 // with marking.
1449 class AddToMarkedBytesClosure: public HeapRegionClosure {
1450   ConcurrentMark* _cm;
1451   size_t _task_num;
1452   size_t _max_task_num;
1453 
1454   bool _final;
1455 
1456 public:
1457   AddToMarkedBytesClosure(ConcurrentMark *cm,
1458                           size_t task_num,
1459                           size_t max_task_num) :
1460     _cm(cm),
1461     _task_num(task_num),
1462     _max_task_num(max_task_num),
1463     _final(false)
1464   {
1465     assert(0 <= _task_num && _task_num < _max_task_num, "sanity");
1466     if ((_max_task_num - _task_num) == 1) {
1467       // Last task
1468       _final = true;
1469     }
1470   }
1471 
1472   bool doHeapRegion(HeapRegion* hr) {
1473     // Adds the value in the counted marked bytes array for
1474     // _task_num for region hr to the value cached in heap
1475     // region itself.
1476     // For the final task we also set the top at conc count
1477     // for the region.
1478     // The bits in the live region bitmap are set for regions
1479     // that contain live data during the cleanup pause.
1480 
1481     if (hr->continuesHumongous()) {
1482       // We will ignore these here and process them when their
1483       // associated "starts humongous" region is processed.
1484       // Note that we cannot rely on their associated
1485       // "starts humongous" region to have their bit set to 1
1486       // since, due to the region chunking in the parallel region
1487       // iteration, a "continues humongous" region might be visited
1488       // before its associated "starts humongous".
1489       return false;
1490     }
1491 
1492     int hrs_index = hr->hrs_index();
1493     size_t* marked_bytes_array = _cm->count_marked_bytes_for(_task_num);
1494     size_t marked_bytes = marked_bytes_array[hrs_index];
1495     hr->add_to_marked_bytes(marked_bytes);



1496 
1497     if (_final) {
1498       HeapWord* ntams = hr->next_top_at_mark_start();
1499       HeapWord* start = hr->bottom();
1500       
1501       assert(start <= ntams && ntams <= hr->top() && hr->top() <= hr->end(),
1502              "Preconditions.");
1503 
1504       hr->set_top_at_conc_mark_count(ntams);
1505     }
1506 
1507     return false;
1508   }
1509 };
1510 
1511 void ConcurrentMark::aggregate_all_count_data() {
1512   _card_bm.clear();





1513 
1514   // Unions the per task card bitmaps into the global card bitmap,
1515   // and aggregates the per task marked bytes for each region into
1516   // the heap region itself.
1517 
1518   for (int i = 0; i < _max_task_num; i += 1) {
1519     BitMap& task_card_bm = count_card_bitmap_for(i);
1520     _card_bm.set_union(task_card_bm);
1521 
1522     // Update the marked bytes for each region
1523     AddToMarkedBytesClosure cl(this, i, _max_task_num);
1524     _g1h->heap_region_iterate(&cl);
1525   }
1526 
1527   // We're done with the accumulated per-task concurrent
1528   // counting data so let's clear it for the next marking.
1529   clear_all_count_data();
1530 }
1531 
1532 // Final update of count data (during cleanup).
1533 // Adds [top_at_count, NTAMS) to the marked bytes for each
1534 // region. Sets the bits in the card bitmap corresponding
1535 // to the interval [top_at_count, top], and sets the
1536 // liveness bit for each region containing live data
1537 // in the region bitmap.
1538 
1539 class FinalCountDataUpdateClosure: public HeapRegionClosure {
1540   ConcurrentMark* _cm;
1541   BitMap* _region_bm;
1542   BitMap* _card_bm;
1543   intptr_t _bottom_card_num;
1544 
1545   size_t _total_live_bytes;
1546   size_t _total_used_bytes;
1547   size_t _total_words_done;
1548 
1549   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1550     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1551     BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1552     
1553     // Inclusive bit range [start_idx, last_idx]. par_at_put_range
1554     // is exclusive so we have to also set the bit for last_idx.
1555     // Passing last_idx+1 to the clear_range would work in
1556     // most cases but could trip an OOB assertion.
1557 
1558     if ((last_idx - start_idx) > 0) {
1559       _card_bm->par_at_put_range(start_idx, last_idx, true);
1560     }
1561     _card_bm->par_set_bit(last_idx);
1562   }
1563 
1564   // It takes a region that's not empty (i.e., it has at least one
1565   // live object in it and sets its corresponding bit on the region
1566   // bitmap to 1. If the region is "starts humongous" it will also set
1567   // to 1 the bits on the region bitmap that correspond to its
1568   // associated "continues humongous" regions.
1569   void set_bit_for_region(HeapRegion* hr) {
1570     assert(!hr->continuesHumongous(), "should have filtered those out");
1571 
1572     size_t index = hr->hrs_index();
1573     if (!hr->startsHumongous()) {
1574       // Normal (non-humongous) case: just set the bit.
1575       _region_bm->par_set_bit((BitMap::idx_t) index);
1576     } else {
1577       // Starts humongous case: calculate how many regions are part of
1578       // this humongous region and then set the bit range. It might
1579       // have been a bit more efficient to look at the object that
1580       // spans these humongous regions to calculate their number from
1581       // the object's size. However, it's a good idea to calculate
1582       // this based on the metadata itself, and not the region
1583       // contents, so that this code is not aware of what goes into
1584       // the humongous regions (in case this changes in the future).
1585       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1586       size_t end_index = index + 1;
1587       while (end_index < g1h->n_regions()) {
1588         HeapRegion* chr = g1h->region_at(end_index);
1589         if (!chr->continuesHumongous()) break;
1590         end_index += 1;
1591       }
1592       _region_bm->par_at_put_range((BitMap::idx_t) index,
1593                                    (BitMap::idx_t) end_index, true);
1594     }
1595   }
1596 
1597  public:
1598   FinalCountDataUpdateClosure(ConcurrentMark* cm,
1599                               BitMap* region_bm,
1600                               BitMap* card_bm) :
1601     _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1602     _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0)
1603   {
1604     _bottom_card_num =
1605       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1606                CardTableModRefBS::card_shift);
1607   }
1608 
1609   bool doHeapRegion(HeapRegion* hr) {
1610 
1611     if (hr->continuesHumongous()) {
1612       // We will ignore these here and process them when their
1613       // associated "starts humongous" region is processed (see
1614       // set_bit_for_heap_region()). Note that we cannot rely on their
1615       // associated "starts humongous" region to have their bit set to
1616       // 1 since, due to the region chunking in the parallel region
1617       // iteration, a "continues humongous" region might be visited
1618       // before its associated "starts humongous".
1619       return false;
1620     }
1621 
1622     HeapWord* start = hr->top_at_conc_mark_count();
1623     HeapWord* ntams = hr->next_top_at_mark_start();
1624     HeapWord* top   = hr->top();
1625     
1626     assert(hr->bottom() <= start && start <= hr->end() &&
1627            hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1628     
1629     size_t words_done = ntams - hr->bottom();
1630 
1631     intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1632     intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1633 
1634 
1635     if (start < ntams) {
1636       // Region was changed between remark and cleanup pauses
1637       // We need to add (ntams - start) to the marked bytes
1638       // for this region, and set bits for the range
1639       // [ card_num(start), card_num(ntams) ) in the
1640       // card bitmap.
1641       size_t live_bytes = (ntams - start) * HeapWordSize;
1642       hr->add_to_marked_bytes(live_bytes);
1643       
1644       // Record the new top at conc count
1645       hr->set_top_at_conc_mark_count(ntams);
1646 
1647       // The setting of the bits card bitmap takes place below
1648     }
1649 
1650     // Mark the allocated-since-marking portion...
1651     if (ntams < top) {
1652       // This definitely means the region has live objects.
1653       set_bit_for_region(hr);
1654     }
1655 
1656     // Now set the bits for [start, top]
1657     mark_card_num_range(start_card_num, last_card_num);
1658 
1659     // Set the bit for the region if it contains live data
1660     if (hr->next_marked_bytes() > 0) {
1661       set_bit_for_region(hr);
1662     }
1663 
1664     _total_words_done += words_done;
1665     _total_used_bytes += hr->used();
1666     _total_live_bytes += hr->next_marked_bytes();
1667 
1668     return false;
1669   }
1670 
1671   size_t total_words_done() const { return _total_words_done; }
1672   size_t total_live_bytes() const { return _total_live_bytes; }
1673   size_t total_used_bytes() const { return _total_used_bytes; }
1674 };
1675 
1676 // Heap region closure used for verifying the counting data
1677 // that was accumulated concurrently and aggregated during
1678 // the remark pause. This closure is applied to the heap
1679 // regions during the STW cleanup pause.
1680 
1681 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1682   ConcurrentMark* _cm;
1683   CalcLiveObjectsClosure _calc_cl;
1684   BitMap* _region_bm;   // Region BM to be verified
1685   BitMap* _card_bm;     // Card BM to be verified
1686   bool _verbose;        // verbose output?
1687 
1688   BitMap* _exp_region_bm; // Expected Region BM values
1689   BitMap* _exp_card_bm;   // Expected card BM values
1690 
1691   intptr_t _bottom_card_num; // Used for calculatint bitmap indices
1692 
1693   int _failures;
1694 
1695 public:
1696   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1697                                 BitMap* region_bm,
1698                                 BitMap* card_bm,
1699                                 BitMap* exp_region_bm,
1700                                 BitMap* exp_card_bm,
1701                                 bool verbose) :
1702     _cm(cm),
1703     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1704     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1705     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1706     _failures(0)
1707   { 
1708     _bottom_card_num =
1709       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1710                CardTableModRefBS::card_shift);
1711   }
1712 
1713   int failures() const { return _failures; }
1714 
1715   bool doHeapRegion(HeapRegion* hr) {
1716     if (hr->continuesHumongous()) {
1717       // We will ignore these here and process them when their
1718       // associated "starts humongous" region is processed (see
1719       // set_bit_for_heap_region()). Note that we cannot rely on their
1720       // associated "starts humongous" region to have their bit set to
1721       // 1 since, due to the region chunking in the parallel region
1722       // iteration, a "continues humongous" region might be visited
1723       // before its associated "starts humongous".
1724       return false;
1725     }
1726 
1727     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1728     // this region and set the corresponding bits in the expected region
1729     // and card bitmaps.
1730     bool res = _calc_cl.doHeapRegion(hr);
1731     assert(res == false, "should be continuing");
1732 
1733     // Note that the calculated count data could be a subset of the
1734     // count data that was accumlated during marking. See the comment
1735     // in G1ParCopyHelper::copy_to_survivor space for an explanation
1736     // why.
1737 
1738     if (_verbose) {
1739       gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: "
1740                           PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT,
1741                           hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(),
1742                           hr->top(), hr->end());
1743       gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT,
1744                              _calc_cl.region_marked_bytes(),
1745                              hr->next_marked_bytes());
1746     }
1747 
1748     // Verify that _top_at_conc_count == ntams
1749     if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
1750       if (_verbose) {
1751         gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected "
1752                                PTR_FORMAT", actual: "PTR_FORMAT,
1753                                hr->hrs_index(), hr->next_top_at_mark_start(),
1754                                hr->top_at_conc_mark_count());
1755       }
1756       _failures += 1;
1757     }
1758 
1759     // Verify the marked bytes for this region. 
1760     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1761     size_t act_marked_bytes = hr->next_marked_bytes();
1762 
1763     // We're OK if actual marked bytes >= expected.
1764     if (exp_marked_bytes > act_marked_bytes) {
1765       if (_verbose) {
1766         gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: "
1767                                SIZE_FORMAT", actual: "SIZE_FORMAT,
1768                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1769       }
1770       _failures += 1;
1771     }
1772 
1773     // Verify the bit, for this region, in the actual and expected
1774     // (which was just calculated) region bit maps.
1775     // We're not OK if the expected bit is set and the actual is not set.
1776     BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
1777     
1778     bool expected = _exp_region_bm->at(index);
1779     bool actual = _region_bm->at(index);
1780     if (expected && !actual) {
1781       if (_verbose) {
1782         gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d",
1783                                hr->hrs_index(), expected, actual);
1784       }
1785       _failures += 1;
1786     }
1787 
1788     // Verify that the card bit maps for the cards spanned by the current
1789     // region match. The set of offsets that have set bits in the expected
1790     // bitmap should be a subset of the offsets with set bits from the actual
1791     // calculated card bitmap.
1792     // Again it's more important that if the expected bit is set then the
1793     // actual bit be set.
1794     intptr_t start_card_num =
1795         intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift);
1796     intptr_t top_card_num =
1797         intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift);
1798 
1799     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1800     BitMap::idx_t end_idx = top_card_num - _bottom_card_num;
1801 
1802     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1803       expected = _exp_card_bm->at(i);
1804       actual = _card_bm->at(i);
1805       
1806       if (expected && !actual) {
1807         if (_verbose) {
1808           gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d",
1809                                  hr->hrs_index(), i, expected, actual);
1810         }
1811         _failures += 1;
1812       }
1813     }
1814     if (_failures) {
1815       // Stop iteration?
1816       return true;
1817     }
1818 
1819     return false;
1820   }
1821 };
1822 
1823 class Mux2HRClosure: public HeapRegionClosure {
1824   HeapRegionClosure* _cl1;
1825   HeapRegionClosure* _cl2;
1826 
1827 public:
1828   Mux2HRClosure(HeapRegionClosure *c1, HeapRegionClosure *c2) : _cl1(c1), _cl2(c2) { }
1829   bool doHeapRegion(HeapRegion* hr) {
1830     bool res1 = _cl1->doHeapRegion(hr);
1831     bool res2 = _cl2->doHeapRegion(hr);
1832 
1833     // Only continue if both return false;
1834     return res1 || res2;
1835   }
1836 };
1837 
1838 class G1ParFinalCountTask: public AbstractGangTask {
1839 protected:
1840   G1CollectedHeap* _g1h;
1841   CMBitMap* _bm;
1842   size_t _n_workers;
1843   size_t *_live_bytes;
1844   size_t *_used_bytes;
1845 
1846   BitMap* _actual_region_bm;
1847   BitMap* _actual_card_bm;
1848 
1849   BitMap _expected_region_bm;
1850   BitMap _expected_card_bm;
1851 
1852   int _failures;
1853 
1854 public:
1855   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1856                       BitMap* region_bm, BitMap* card_bm)
1857     : AbstractGangTask("G1 final counting"),
1858       _g1h(g1h), _bm(bm),
1859       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1860       _expected_region_bm(0, false), _expected_card_bm(0, false),
1861       _failures(0)
1862   {
1863     if (ParallelGCThreads > 0) {
1864       _n_workers = _g1h->workers()->total_workers();
1865     } else {
1866       _n_workers = 1;
1867     }
1868 
1869     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1870     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1871 
1872     if (VerifyDuringGC) {
1873       _expected_card_bm.resize(_actual_card_bm->size(), false);
1874       _expected_region_bm.resize(_actual_region_bm->size(), false);
1875     }
1876   }
1877 
1878   ~G1ParFinalCountTask() {
1879     if (VerifyDuringGC) {
1880       _expected_region_bm.resize(0);
1881       _expected_card_bm.resize(0);
1882     }
1883     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1884     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1885   }
1886 
1887   void work(int i) {
1888 
1889     FinalCountDataUpdateClosure final_update_cl(_g1h->concurrent_mark(),
1890                                                 _actual_region_bm, _actual_card_bm);
1891 
1892     VerifyLiveObjectDataHRClosure verify_cl(_g1h->concurrent_mark(),
1893                                             _actual_region_bm, _actual_card_bm,
1894                                             &_expected_region_bm,
1895                                             &_expected_card_bm,
1896                                             true /* verbose */);
1897 
1898     Mux2HRClosure update_and_verify_cl(&final_update_cl, &verify_cl);
1899 
1900     HeapRegionClosure* hr_cl = &final_update_cl;
1901     if (VerifyDuringGC) {
1902       hr_cl = &update_and_verify_cl;
1903     }
1904 
1905     if (G1CollectedHeap::use_parallel_gc_threads()) {
1906       _g1h->heap_region_par_iterate_chunked(hr_cl, i,
1907                                             HeapRegion::FinalCountClaimValue);
1908     } else {
1909       _g1h->heap_region_iterate(hr_cl);
1910     }

1911 
1912     assert((size_t) i < _n_workers, "invariant");
1913     _live_bytes[i] = final_update_cl.total_live_bytes();
1914     _used_bytes[i] = final_update_cl.total_used_bytes();
1915 
1916     if (VerifyDuringGC) {
1917       _failures += verify_cl.failures();
1918     }
1919   }
1920 
1921   size_t live_bytes()  {
1922     size_t live_bytes = 0;
1923     for (size_t i = 0; i < _n_workers; ++i)
1924       live_bytes += _live_bytes[i];
1925     return live_bytes;
1926   }
1927 
1928   size_t used_bytes()  {
1929     size_t used_bytes = 0;
1930     for (size_t i = 0; i < _n_workers; ++i)
1931       used_bytes += _used_bytes[i];
1932     return used_bytes;
1933   }
1934 
1935   int failures() const { return _failures; }
1936 };
1937 
1938 class G1ParNoteEndTask;
1939 
1940 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1941   G1CollectedHeap* _g1;
1942   int _worker_num;
1943   size_t _max_live_bytes;
1944   size_t _regions_claimed;
1945   size_t _freed_bytes;
1946   FreeRegionList* _local_cleanup_list;
1947   HumongousRegionSet* _humongous_proxy_set;
1948   HRRSCleanupTask* _hrrs_cleanup_task;
1949   double _claimed_region_time;
1950   double _max_region_time;
1951 
1952 public:
1953   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1954                              int worker_num,
1955                              FreeRegionList* local_cleanup_list,


2116   }
2117 
2118   g1h->verify_region_sets_optional();
2119 
2120   if (VerifyDuringGC) {
2121     HandleMark hm;  // handle scope
2122     gclog_or_tty->print(" VerifyDuringGC:(before)");
2123     Universe::heap()->prepare_for_verify();
2124     Universe::verify(/* allow dirty */ true,
2125                      /* silent      */ false,
2126                      /* option      */ VerifyOption_G1UsePrevMarking);
2127   }
2128 
2129   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2130   g1p->record_concurrent_mark_cleanup_start();
2131 
2132   double start = os::elapsedTime();
2133 
2134   HeapRegionRemSet::reset_for_cleanup_tasks();
2135 
2136   // Clear the global region bitmap - it will be filled as part
2137   // of the final counting task.
2138   _region_bm.clear();
2139 
2140   // Do counting once more with the world stopped for good measure.
2141   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
2142                                         &_region_bm, &_card_bm);
2143 
2144   if (G1CollectedHeap::use_parallel_gc_threads()) {
2145     assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),

2146            "sanity check");
2147 
2148     int n_workers = g1h->workers()->total_workers();
2149     g1h->set_par_threads(n_workers);
2150     g1h->workers()->run_task(&g1_par_count_task);
2151     g1h->set_par_threads(0);
2152 
2153     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),

2154            "sanity check");
2155   } else {
2156     g1_par_count_task.work(0);
2157   }
2158 
2159   // Verify that there were no verification failures of
2160   // the live counting data.
2161   if (VerifyDuringGC) {
2162     assert(g1_par_count_task.failures() == 0, "Unexpected failures");
2163   }
2164 
2165   size_t known_garbage_bytes =
2166     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
2167   g1p->set_known_garbage_bytes(known_garbage_bytes);
2168 
2169   size_t start_used_bytes = g1h->used();
2170   _at_least_one_mark_complete = true;
2171   g1h->set_marking_complete();
2172 
2173   ergo_verbose4(ErgoConcCycles,
2174            "finish cleanup",
2175            ergo_format_byte("occupancy")
2176            ergo_format_byte("capacity")
2177            ergo_format_byte_perc("known garbage"),
2178            start_used_bytes, g1h->capacity(),
2179            known_garbage_bytes,
2180            ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
2181 
2182   double count_end = os::elapsedTime();
2183   double this_final_counting_time = (count_end - start);
2184   if (G1PrintParCleanupStats) {


2347   }
2348   assert(tmp_free_list.is_empty(), "post-condition");
2349 }
2350 
2351 // Support closures for reference procssing in G1
2352 
2353 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2354   HeapWord* addr = (HeapWord*)obj;
2355   return addr != NULL &&
2356          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2357 }
2358 
2359 class G1CMKeepAliveClosure: public OopClosure {
2360   G1CollectedHeap* _g1;
2361   ConcurrentMark*  _cm;
2362   CMBitMap*        _bitMap;
2363  public:
2364   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
2365                        CMBitMap* bitMap) :
2366     _g1(g1), _cm(cm),
2367     _bitMap(bitMap)
2368   {
2369     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2370   }
2371 
2372   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2373   virtual void do_oop(      oop* p) { do_oop_work(p); }
2374 
2375   template <class T> void do_oop_work(T* p) {
2376     oop obj = oopDesc::load_decode_heap_oop(p);
2377     HeapWord* addr = (HeapWord*)obj;
2378 
2379     if (_cm->verbose_high()) {
2380       gclog_or_tty->print_cr("\t[0] we're looking at location "
2381                              "*"PTR_FORMAT" = "PTR_FORMAT,
2382                              p, (void*) obj);
2383     }
2384 
2385     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2386       _bitMap->mark(addr);
2387       // Update the task specific count data for obj
2388       _cm->add_to_count_data_for(obj, 0 /* worker_i */);
2389 
2390       _cm->mark_stack_push(obj);
2391     }
2392   }
2393 };
2394 
2395 class G1CMDrainMarkingStackClosure: public VoidClosure {
2396   CMMarkStack*                  _markStack;
2397   CMBitMap*                     _bitMap;
2398   G1CMKeepAliveClosure*         _oopClosure;
2399  public:
2400   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2401                                G1CMKeepAliveClosure* oopClosure) :
2402     _bitMap(bitMap),
2403     _markStack(markStack),
2404     _oopClosure(oopClosure)
2405   {}
2406 
2407   void do_void() {
2408     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2409   }


3020                            (void*) obj);
3021   }
3022 
3023   HeapWord* objAddr = (HeapWord*) obj;
3024   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3025   if (_g1h->is_in_g1_reserved(objAddr)) {
3026     assert(obj != NULL, "null check is implicit");
3027     if (!_nextMarkBitMap->isMarked(objAddr)) {
3028       // Only get the containing region if the object is not marked on the
3029       // bitmap (otherwise, it's a waste of time since we won't do
3030       // anything with it).
3031       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
3032       if (!hr->obj_allocated_since_next_marking(obj)) {
3033         if (verbose_high()) {
3034           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
3035                                  "marked", (void*) obj);
3036         }
3037 
3038         // we need to mark it first
3039         if (_nextMarkBitMap->parMark(objAddr)) {
3040           // Update the task specific count data for obj
3041           add_to_count_data_for(obj, hr, 0 /* worker_i */);
3042 
3043           // No OrderAccess:store_load() is needed. It is implicit in the
3044           // CAS done in parMark(objAddr) above
3045           HeapWord* finger = _finger;
3046           if (objAddr < finger) {
3047             if (verbose_high()) {
3048               gclog_or_tty->print_cr("[global] below the global finger "
3049                                      "("PTR_FORMAT"), pushing it", finger);
3050             }
3051             if (!mark_stack_push(obj)) {
3052               if (verbose_low()) {
3053                 gclog_or_tty->print_cr("[global] global stack overflow during "
3054                                        "deal_with_reference");
3055               }
3056             }
3057           }
3058         }
3059       }
3060     }
3061   }
3062 }


3269 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
3270   _markStack.setEmpty();
3271   _markStack.clear_overflow();
3272   _regionStack.setEmpty();
3273   _regionStack.clear_overflow();
3274   if (clear_overflow) {
3275     clear_has_overflown();
3276   } else {
3277     assert(has_overflown(), "pre-condition");
3278   }
3279   _finger = _heap_start;
3280 
3281   for (int i = 0; i < (int)_max_task_num; ++i) {
3282     OopTaskQueue* queue = _task_queues->queue(i);
3283     queue->set_empty();
3284     // Clear any partial regions from the CMTasks
3285     _tasks[i]->clear_aborted_region();
3286   }
3287 }
3288 
3289 // Clear the per-worker arrays used to store the per-region counting data
3290 void ConcurrentMark::clear_all_count_data() {
3291   assert(SafepointSynchronize::is_at_safepoint() ||
3292          !Universe::is_fully_initialized(), "must be");
3293 
3294   int max_regions = _g1h->max_regions();
3295   
3296   assert(_max_task_num != 0, "unitialized");
3297   assert(_count_card_bitmaps != NULL, "uninitialized");
3298   assert(_count_marked_bytes != NULL, "uninitialized");
3299 
3300   for (int i = 0; i < _max_task_num; i += 1) {
3301     BitMap& task_card_bm = count_card_bitmap_for(i);
3302     size_t* marked_bytes_array = count_marked_bytes_for(i);
3303 
3304     assert(task_card_bm.size() == _card_bm.size(), "size mismatch");
3305     assert(marked_bytes_array != NULL, "uninitialized");
3306 
3307     for (int j = 0; j < max_regions; j++) {
3308       marked_bytes_array[j] = 0;
3309     }
3310     task_card_bm.clear();
3311   }
3312 }
3313 
3314 // Adds the given region to the counting data structures
3315 // for the given task id.
3316 void ConcurrentMark::add_to_count_data_for(MemRegion mr,
3317                                            HeapRegion* hr,
3318                                            int worker_i) {
3319   G1CollectedHeap* g1h = _g1h;
3320   HeapWord* start = mr.start();
3321   HeapWord* last = mr.last();
3322   size_t index = hr->hrs_index();
3323 
3324   assert(!hr->continuesHumongous(), "should not be HC region");
3325   assert(hr == g1h->heap_region_containing(start), "sanity");
3326   assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
3327   assert(0 <= worker_i && worker_i < _max_task_num, "oob");
3328 
3329   BitMap& task_card_bm = count_card_bitmap_for(worker_i);
3330   size_t* marked_bytes_array = count_marked_bytes_for(worker_i);
3331 
3332   // Below, the term "card num" means the result of shifting an address
3333   // by the card shift -- address 0 corresponds to card number 0.  One
3334   // must subtract the card num of the bottom of the heap to obtain a
3335   // card table index.
3336 
3337   intptr_t start_card_num = 
3338     intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3339   intptr_t last_card_num =
3340     intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3341 
3342   intptr_t bottom_card_num = 
3343     intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 
3344         CardTableModRefBS::card_shift);
3345 
3346   BitMap::idx_t start_idx = start_card_num - bottom_card_num;
3347   BitMap::idx_t last_idx = last_card_num - bottom_card_num;
3348   
3349   // The card bitmap is task/worker specific => no need to use 'par' routines.
3350   // Inclusive bit range [start_idx, last_idx]. set_range is exclusive
3351   // so we have to also explicitly set the bit for last_idx.
3352   // Passing last_idx+1 to the clear_range would work in most cases
3353   // but could trip an OOB assertion.
3354 
3355   if ((last_idx - start_idx) > 0) {
3356     task_card_bm.set_range(start_idx, last_idx);
3357   }
3358   task_card_bm.set_bit(last_idx);
3359   
3360   // Add to the task local marked bytes for this region.
3361   marked_bytes_array[index] += mr.byte_size();
3362 }
3363 
3364 void ConcurrentMark::add_to_count_data_for(oop obj, HeapRegion* hr, int worker_i) {
3365   MemRegion mr((HeapWord*)obj, obj->size());
3366   add_to_count_data_for(mr, hr, worker_i);
3367 }
3368 
3369 void ConcurrentMark::add_to_count_data_for(MemRegion mr, int worker_i) {
3370   HeapRegion* hr = _g1h->heap_region_containing(mr.start());
3371   add_to_count_data_for(mr, hr, worker_i);
3372 }
3373 
3374 void ConcurrentMark::add_to_count_data_for(oop obj, int worker_i) {
3375   MemRegion mr((HeapWord*)obj, obj->size());
3376   add_to_count_data_for(mr, worker_i);
3377 }
3378 
3379 // Updates the counting data with liveness info recorded for a
3380 // region (typically a GCLab).
3381 void ConcurrentMark::add_to_count_data_for_region(MemRegion lab_mr,
3382                                                   BitMap* lab_card_bm,
3383                                                   intptr_t lab_bottom_card_num,
3384                                                   size_t lab_marked_bytes,
3385                                                   int worker_i) {
3386   HeapRegion* hr = _g1h->heap_region_containing(lab_mr.start());
3387 
3388   BitMap& task_card_bm = count_card_bitmap_for(worker_i);
3389   size_t* marked_bytes_array = count_marked_bytes_for(worker_i);
3390 
3391   // Below, the term "card num" means the result of shifting an address
3392   // by the card shift -- address 0 corresponds to card number 0.  One
3393   // must subtract the card num of the bottom of the heap to obtain a
3394   // card table index.
3395   
3396   intptr_t heap_bottom_card_num = 
3397     intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 
3398         CardTableModRefBS::card_shift);
3399 
3400   assert(intptr_t(uintptr_t(lab_mr.start()) >> CardTableModRefBS::card_shift) == lab_bottom_card_num,
3401          "sanity");
3402 
3403   // We have to map the indices of set bits in lab_card_bm, using
3404   // lab_bottom_card_num, to indices the card bitmap for the given task.
3405 
3406   BitMap::idx_t end_idx = lab_card_bm->size();
3407   BitMap::idx_t start_idx = lab_card_bm->get_next_one_offset(0, end_idx);
3408   while (start_idx < end_idx) {
3409     assert(lab_card_bm->at(start_idx), "should be set");
3410 
3411     intptr_t lab_card_num = lab_bottom_card_num + start_idx;
3412     BitMap::idx_t card_bm_idx = lab_card_num - heap_bottom_card_num;
3413 
3414     task_card_bm.set_bit(card_bm_idx);
3415 
3416     // Get the offset of the next set bit
3417     start_idx = lab_card_bm->get_next_one_offset(start_idx+1, end_idx);
3418   }
3419 
3420   // Now add to the marked bytes
3421   marked_bytes_array[hr->hrs_index()] += lab_marked_bytes;
3422 }
3423 
3424 void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) {
3425   // Clears the count data for the given region from _all_ of
3426   // the per-task counting data structures.
3427 
3428   MemRegion used_region = hr->used_region();
3429   HeapWord* start = used_region.start();
3430   HeapWord* last = used_region.last();
3431   size_t hr_index = hr->hrs_index();
3432 
3433   intptr_t bottom_card_num =
3434       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
3435                CardTableModRefBS::card_shift);
3436   
3437   intptr_t start_card_num =
3438     intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3439   intptr_t last_card_num =
3440     intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3441   
3442   BitMap::idx_t start_idx = start_card_num - bottom_card_num;
3443   BitMap::idx_t last_idx = last_card_num - bottom_card_num;
3444 
3445   size_t used_region_bytes = used_region.byte_size();
3446   size_t marked_bytes = 0;
3447 
3448   for (int i=0; i < _max_task_num; i += 1) {
3449     BitMap& task_card_bm = count_card_bitmap_for(i);
3450     size_t* marked_bytes_array = count_marked_bytes_for(i);
3451 
3452     marked_bytes += marked_bytes_array[hr_index];
3453     // clear the amount of marked bytes in the task array for this
3454     // region
3455     marked_bytes_array[hr_index] = 0;
3456     
3457     // Clear the inclusive range [start_idx, last_idx] from the
3458     // card bitmap. The clear_range routine is exclusive so we
3459     // need to also explicitly clear the bit at last_idx.
3460     // Passing last_idx+1 to the clear_range would work in
3461     // most cases but could trip an OOB assertion.
3462 
3463     if ((last_idx - start_idx) > 0) {
3464       task_card_bm.clear_range(start_idx, last_idx);
3465     }
3466     task_card_bm.clear_bit(last_idx);
3467   }
3468   // We could assert here that marked_bytes == used_region_bytes
3469 }
3470 
3471 void ConcurrentMark::print_stats() {
3472   if (verbose_stats()) {
3473     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3474     for (size_t i = 0; i < _active_tasks; ++i) {
3475       _tasks[i]->print_stats();
3476       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3477     }
3478   }
3479 }
3480 
3481 class CSMarkOopClosure: public OopClosure {
3482   friend class CSMarkBitMapClosure;
3483 
3484   G1CollectedHeap* _g1h;
3485   CMBitMap*        _bm;
3486   ConcurrentMark*  _cm;
3487   oop*             _ms;
3488   jint*            _array_ind_stack;
3489   int              _ms_size;
3490   int              _ms_ind;


3556 
3557   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3558   virtual void do_oop(      oop* p) { do_oop_work(p); }
3559 
3560   template <class T> void do_oop_work(T* p) {
3561     T heap_oop = oopDesc::load_heap_oop(p);
3562     if (oopDesc::is_null(heap_oop)) return;
3563     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
3564     if (obj->is_forwarded()) {
3565       // If the object has already been forwarded, we have to make sure
3566       // that it's marked.  So follow the forwarding pointer.  Note that
3567       // this does the right thing for self-forwarding pointers in the
3568       // evacuation failure case.
3569       obj = obj->forwardee();
3570     }
3571     HeapRegion* hr = _g1h->heap_region_containing(obj);
3572     if (hr != NULL) {
3573       if (hr->in_collection_set()) {
3574         if (_g1h->is_obj_ill(obj)) {
3575           _bm->mark((HeapWord*)obj);
3576           // Update the task specific count data for object
3577           _cm->add_to_count_data_for(obj, hr, 0 /* worker_i */);
3578 
3579           if (!push(obj)) {
3580             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
3581             set_abort();
3582           }
3583         }
3584       } else {
3585         // Outside the collection set; we need to gray it
3586         _cm->deal_with_reference(obj);
3587       }
3588     }
3589   }
3590 };
3591 
3592 class CSMarkBitMapClosure: public BitMapClosure {
3593   G1CollectedHeap* _g1h;
3594   CMBitMap*        _bitMap;
3595   ConcurrentMark*  _cm;
3596   CSMarkOopClosure _oop_cl;
3597 public:
3598   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :


3640 
3641   ~CompleteMarkingInCSHRClosure() {}
3642 
3643   bool doHeapRegion(HeapRegion* r) {
3644     if (!r->evacuation_failed()) {
3645       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3646       if (!mr.is_empty()) {
3647         if (!_bm->iterate(&_bit_cl, mr)) {
3648           _completed = false;
3649           return true;
3650         }
3651       }
3652     }
3653     return false;
3654   }
3655 
3656   bool completed() { return _completed; }
3657 };
3658 
3659 class ClearMarksInHRClosure: public HeapRegionClosure {
3660   ConcurrentMark* _cm;
3661   CMBitMap* _bm;
3662 public:
3663   ClearMarksInHRClosure(ConcurrentMark* cm, CMBitMap* bm):
3664     _cm(cm), _bm(bm)
3665   { }
3666 
3667   bool doHeapRegion(HeapRegion* r) {
3668     if (!r->used_region().is_empty() && !r->evacuation_failed()) {

3669       _bm->clearRange(r->used_region());
3670       // Need to remove values from the count info
3671       _cm->clear_count_data_for_heap_region(r);
3672     }
3673     return false;
3674   }
3675 };
3676 
3677 void ConcurrentMark::complete_marking_in_collection_set() {
3678   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3679 
3680   if (!g1h->mark_in_progress()) {
3681     g1h->g1_policy()->record_mark_closure_time(0.0);
3682     return;
3683   }
3684 
3685   int i = 1;
3686   double start = os::elapsedTime();
3687   while (true) {
3688     i++;
3689     CompleteMarkingInCSHRClosure cmplt(this);
3690     g1h->collection_set_iterate(&cmplt);
3691     if (cmplt.completed()) break;
3692   }
3693   double end_time = os::elapsedTime();
3694   double elapsed_time_ms = (end_time - start) * 1000.0;
3695   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3696 
3697   ClearMarksInHRClosure clr(this, nextMarkBitMap());
3698   g1h->collection_set_iterate(&clr);
3699 }
3700 
3701 // The next two methods deal with the following optimisation. Some
3702 // objects are gray by being marked and located above the finger. If
3703 // they are copied, during an evacuation pause, below the finger then
3704 // the need to be pushed on the stack. The observation is that, if
3705 // there are no regions in the collection set located above the
3706 // finger, then the above cannot happen, hence we do not need to
3707 // explicitly gray any objects when copying them to below the
3708 // finger. The global stack will be scanned to ensure that, if it
3709 // points to objects being copied, it will update their
3710 // location. There is a tricky situation with the gray objects in
3711 // region stack that are being coped, however. See the comment in
3712 // newCSet().
3713 
3714 void ConcurrentMark::newCSet() {
3715   if (!concurrent_marking_in_progress()) {
3716     // nothing to do if marking is not in progress
3717     return;


3819     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3820 
3821   }
3822   print_ms_time_info("  ", "cleanups", _cleanup_times);
3823   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3824                          _total_counting_time,
3825                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3826                           (double)_cleanup_times.num()
3827                          : 0.0));
3828   if (G1ScrubRemSets) {
3829     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3830                            _total_rs_scrub_time,
3831                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3832                             (double)_cleanup_times.num()
3833                            : 0.0));
3834   }
3835   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3836                          (_init_times.sum() + _remark_times.sum() +
3837                           _cleanup_times.sum())/1000.0);
3838   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3839                 "(%8.2f s marking).",
3840                 cmThread()->vtime_accum(),
3841                 cmThread()->vtime_mark_accum());

3842 }
3843 
3844 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3845   _parallel_workers->print_worker_threads_on(st);
3846 }
3847 
3848 // Closures
3849 // XXX: there seems to be a lot of code  duplication here;
3850 // should refactor and consolidate the shared code.
3851 
3852 // This closure is used to mark refs into the CMS generation in
3853 // the CMS bit map. Called at the first checkpoint.
3854 
3855 // We take a break if someone is trying to stop the world.
3856 bool ConcurrentMark::do_yield_check(int worker_i) {
3857   if (should_yield()) {
3858     if (worker_i == 0) {
3859       _g1h->g1_policy()->record_concurrent_pause();
3860     }
3861     cmThread()->yield();