hs25.40-b20-jdk8u40-b16 Wdiff hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp

Print this page

rev 6912 : 8065227: Report allocation context stats at end of cleanup
Summary: Moved allocation context update from remark to the cleanup phase.
Reviewed-by: mgerdin, jmasa

Split	Split	Close
Expand all
Collapse all

          --- old/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
          +++ new/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1    1  /*
   2    2   * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "classfile/metadataOnStackMark.hpp"
  27   27  #include "classfile/symbolTable.hpp"
  28   28  #include "code/codeCache.hpp"
  29   29  #include "gc_implementation/g1/concurrentMark.inline.hpp"
  30   30  #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  31   31  #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  32   32  #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  33   33  #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  34   34  #include "gc_implementation/g1/g1Log.hpp"
  35   35  #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  36   36  #include "gc_implementation/g1/g1RemSet.hpp"
  37   37  #include "gc_implementation/g1/heapRegion.inline.hpp"
  38   38  #include "gc_implementation/g1/heapRegionManager.inline.hpp"
  39   39  #include "gc_implementation/g1/heapRegionRemSet.hpp"
  40   40  #include "gc_implementation/g1/heapRegionSet.inline.hpp"
  41   41  #include "gc_implementation/shared/vmGCOperations.hpp"
  42   42  #include "gc_implementation/shared/gcTimer.hpp"
  43   43  #include "gc_implementation/shared/gcTrace.hpp"
  44   44  #include "gc_implementation/shared/gcTraceTime.hpp"
  45   45  #include "memory/allocation.hpp"
  46   46  #include "memory/genOopClosures.inline.hpp"
  47   47  #include "memory/referencePolicy.hpp"
  48   48  #include "memory/resourceArea.hpp"
  49   49  #include "oops/oop.inline.hpp"
  50   50  #include "runtime/handles.inline.hpp"
  51   51  #include "runtime/java.hpp"
  52   52  #include "runtime/prefetch.inline.hpp"
  53   53  #include "services/memTracker.hpp"
  54   54  
  55   55  // Concurrent marking bit map wrapper
  56   56  
  57   57  CMBitMapRO::CMBitMapRO(int shifter) :
  58   58    _bm(),
  59   59    _shifter(shifter) {
  60   60    _bmStartWord = 0;
  61   61    _bmWordSize = 0;
  62   62  }
  63   63  
  64   64  HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  65   65                                                 const HeapWord* limit) const {
  66   66    // First we must round addr *up* to a possible object boundary.
  67   67    addr = (HeapWord*)align_size_up((intptr_t)addr,
  68   68                                    HeapWordSize << _shifter);
  69   69    size_t addrOffset = heapWordToOffset(addr);
  70   70    if (limit == NULL) {
  71   71      limit = _bmStartWord + _bmWordSize;
  72   72    }
  73   73    size_t limitOffset = heapWordToOffset(limit);
  74   74    size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  75   75    HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  76   76    assert(nextAddr >= addr, "get_next_one postcondition");
  77   77    assert(nextAddr == limit || isMarked(nextAddr),
  78   78           "get_next_one postcondition");
  79   79    return nextAddr;
  80   80  }
  81   81  
  82   82  HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  83   83                                                   const HeapWord* limit) const {
  84   84    size_t addrOffset = heapWordToOffset(addr);
  85   85    if (limit == NULL) {
  86   86      limit = _bmStartWord + _bmWordSize;
  87   87    }
  88   88    size_t limitOffset = heapWordToOffset(limit);
  89   89    size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  90   90    HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  91   91    assert(nextAddr >= addr, "get_next_one postcondition");
  92   92    assert(nextAddr == limit || !isMarked(nextAddr),
  93   93           "get_next_one postcondition");
  94   94    return nextAddr;
  95   95  }
  96   96  
  97   97  int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  98   98    assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  99   99    return (int) (diff >> _shifter);
 100  100  }
 101  101  
 102  102  #ifndef PRODUCT
 103  103  bool CMBitMapRO::covers(MemRegion heap_rs) const {
 104  104    // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 105  105    assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 106  106           "size inconsistency");
 107  107    return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 108  108           _bmWordSize  == heap_rs.word_size();
 109  109  }
 110  110  #endif
 111  111  
 112  112  void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 113  113    _bm.print_on_error(st, prefix);
 114  114  }
 115  115  
 116  116  size_t CMBitMap::compute_size(size_t heap_size) {
 117  117    return heap_size / mark_distance();
 118  118  }
 119  119  
 120  120  size_t CMBitMap::mark_distance() {
 121  121    return MinObjAlignmentInBytes * BitsPerByte;
 122  122  }
 123  123  
 124  124  void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 125  125    _bmStartWord = heap.start();
 126  126    _bmWordSize = heap.word_size();
 127  127  
 128  128    _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 129  129    _bm.set_size(_bmWordSize >> _shifter);
 130  130  
 131  131    storage->set_mapping_changed_listener(&_listener);
 132  132  }
 133  133  
 134  134  void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 135  135    if (zero_filled) {
 136  136      return;
 137  137    }
 138  138    // We need to clear the bitmap on commit, removing any existing information.
 139  139    MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 140  140    _bm->clearRange(mr);
 141  141  }
 142  142  
 143  143  // Closure used for clearing the given mark bitmap.
 144  144  class ClearBitmapHRClosure : public HeapRegionClosure {
 145  145   private:
 146  146    ConcurrentMark* _cm;
 147  147    CMBitMap* _bitmap;
 148  148    bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 149  149   public:
 150  150    ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 151  151      assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 152  152    }
 153  153  
 154  154    virtual bool doHeapRegion(HeapRegion* r) {
 155  155      size_t const chunk_size_in_words = M / HeapWordSize;
 156  156  
 157  157      HeapWord* cur = r->bottom();
 158  158      HeapWord* const end = r->end();
 159  159  
 160  160      while (cur < end) {
 161  161        MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 162  162        _bitmap->clearRange(mr);
 163  163  
 164  164        cur += chunk_size_in_words;
 165  165  
 166  166        // Abort iteration if after yielding the marking has been aborted.
 167  167        if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 168  168          return true;
 169  169        }
 170  170        // Repeat the asserts from before the start of the closure. We will do them
 171  171        // as asserts here to minimize their overhead on the product. However, we
 172  172        // will have them as guarantees at the beginning / end of the bitmap
 173  173        // clearing to get some checking in the product.
 174  174        assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 175  175        assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 176  176      }
 177  177  
 178  178      return false;
 179  179    }
 180  180  };
 181  181  
 182  182  void CMBitMap::clearAll() {
 183  183    ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 184  184    G1CollectedHeap::heap()->heap_region_iterate(&cl);
 185  185    guarantee(cl.complete(), "Must have completed iteration.");
 186  186    return;
 187  187  }
 188  188  
 189  189  void CMBitMap::markRange(MemRegion mr) {
 190  190    mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 191  191    assert(!mr.is_empty(), "unexpected empty region");
 192  192    assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 193  193            ((HeapWord *) mr.end())),
 194  194           "markRange memory region end is not card aligned");
 195  195    // convert address range into offset range
 196  196    _bm.at_put_range(heapWordToOffset(mr.start()),
 197  197                     heapWordToOffset(mr.end()), true);
 198  198  }
 199  199  
 200  200  void CMBitMap::clearRange(MemRegion mr) {
 201  201    mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 202  202    assert(!mr.is_empty(), "unexpected empty region");
 203  203    // convert address range into offset range
 204  204    _bm.at_put_range(heapWordToOffset(mr.start()),
 205  205                     heapWordToOffset(mr.end()), false);
 206  206  }
 207  207  
 208  208  MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 209  209                                              HeapWord* end_addr) {
 210  210    HeapWord* start = getNextMarkedWordAddress(addr);
 211  211    start = MIN2(start, end_addr);
 212  212    HeapWord* end   = getNextUnmarkedWordAddress(start);
 213  213    end = MIN2(end, end_addr);
 214  214    assert(start <= end, "Consistency check");
 215  215    MemRegion mr(start, end);
 216  216    if (!mr.is_empty()) {
 217  217      clearRange(mr);
 218  218    }
 219  219    return mr;
 220  220  }
 221  221  
 222  222  CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 223  223    _base(NULL), _cm(cm)
 224  224  #ifdef ASSERT
 225  225    , _drain_in_progress(false)
 226  226    , _drain_in_progress_yields(false)
 227  227  #endif
 228  228  {}
 229  229  
 230  230  bool CMMarkStack::allocate(size_t capacity) {
 231  231    // allocate a stack of the requisite depth
 232  232    ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 233  233    if (!rs.is_reserved()) {
 234  234      warning("ConcurrentMark MarkStack allocation failure");
 235  235      return false;
 236  236    }
 237  237    MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 238  238    if (!_virtual_space.initialize(rs, rs.size())) {
 239  239      warning("ConcurrentMark MarkStack backing store failure");
 240  240      // Release the virtual memory reserved for the marking stack
 241  241      rs.release();
 242  242      return false;
 243  243    }
 244  244    assert(_virtual_space.committed_size() == rs.size(),
 245  245           "Didn't reserve backing store for all of ConcurrentMark stack?");
 246  246    _base = (oop*) _virtual_space.low();
 247  247    setEmpty();
 248  248    _capacity = (jint) capacity;
 249  249    _saved_index = -1;
 250  250    _should_expand = false;
 251  251    NOT_PRODUCT(_max_depth = 0);
 252  252    return true;
 253  253  }
 254  254  
 255  255  void CMMarkStack::expand() {
 256  256    // Called, during remark, if we've overflown the marking stack during marking.
 257  257    assert(isEmpty(), "stack should been emptied while handling overflow");
 258  258    assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 259  259    // Clear expansion flag
 260  260    _should_expand = false;
 261  261    if (_capacity == (jint) MarkStackSizeMax) {
 262  262      if (PrintGCDetails && Verbose) {
 263  263        gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 264  264      }
 265  265      return;
 266  266    }
 267  267    // Double capacity if possible
 268  268    jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 269  269    // Do not give up existing stack until we have managed to
 270  270    // get the double capacity that we desired.
 271  271    ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 272  272                                                             sizeof(oop)));
 273  273    if (rs.is_reserved()) {
 274  274      // Release the backing store associated with old stack
 275  275      _virtual_space.release();
 276  276      // Reinitialize virtual space for new stack
 277  277      if (!_virtual_space.initialize(rs, rs.size())) {
 278  278        fatal("Not enough swap for expanded marking stack capacity");
 279  279      }
 280  280      _base = (oop*)(_virtual_space.low());
 281  281      _index = 0;
 282  282      _capacity = new_capacity;
 283  283    } else {
 284  284      if (PrintGCDetails && Verbose) {
 285  285        // Failed to double capacity, continue;
 286  286        gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 287  287                            SIZE_FORMAT"K to " SIZE_FORMAT"K",
 288  288                            _capacity / K, new_capacity / K);
 289  289      }
 290  290    }
 291  291  }
 292  292  
 293  293  void CMMarkStack::set_should_expand() {
 294  294    // If we're resetting the marking state because of an
 295  295    // marking stack overflow, record that we should, if
 296  296    // possible, expand the stack.
 297  297    _should_expand = _cm->has_overflown();
 298  298  }
 299  299  
 300  300  CMMarkStack::~CMMarkStack() {
 301  301    if (_base != NULL) {
 302  302      _base = NULL;
 303  303      _virtual_space.release();
 304  304    }
 305  305  }
 306  306  
 307  307  void CMMarkStack::par_push(oop ptr) {
 308  308    while (true) {
 309  309      if (isFull()) {
 310  310        _overflow = true;
 311  311        return;
 312  312      }
 313  313      // Otherwise...
 314  314      jint index = _index;
 315  315      jint next_index = index+1;
 316  316      jint res = Atomic::cmpxchg(next_index, &_index, index);
 317  317      if (res == index) {
 318  318        _base[index] = ptr;
 319  319        // Note that we don't maintain this atomically.  We could, but it
 320  320        // doesn't seem necessary.
 321  321        NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 322  322        return;
 323  323      }
 324  324      // Otherwise, we need to try again.
 325  325    }
 326  326  }
 327  327  
 328  328  void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 329  329    while (true) {
 330  330      if (isFull()) {
 331  331        _overflow = true;
 332  332        return;
 333  333      }
 334  334      // Otherwise...
 335  335      jint index = _index;
 336  336      jint next_index = index + n;
 337  337      if (next_index > _capacity) {
 338  338        _overflow = true;
 339  339        return;
 340  340      }
 341  341      jint res = Atomic::cmpxchg(next_index, &_index, index);
 342  342      if (res == index) {
 343  343        for (int i = 0; i < n; i++) {
 344  344          int  ind = index + i;
 345  345          assert(ind < _capacity, "By overflow test above.");
 346  346          _base[ind] = ptr_arr[i];
 347  347        }
 348  348        NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 349  349        return;
 350  350      }
 351  351      // Otherwise, we need to try again.
 352  352    }
 353  353  }
 354  354  
 355  355  void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 356  356    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 357  357    jint start = _index;
 358  358    jint next_index = start + n;
 359  359    if (next_index > _capacity) {
 360  360      _overflow = true;
 361  361      return;
 362  362    }
 363  363    // Otherwise.
 364  364    _index = next_index;
 365  365    for (int i = 0; i < n; i++) {
 366  366      int ind = start + i;
 367  367      assert(ind < _capacity, "By overflow test above.");
 368  368      _base[ind] = ptr_arr[i];
 369  369    }
 370  370    NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 371  371  }
 372  372  
 373  373  bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 374  374    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 375  375    jint index = _index;
 376  376    if (index == 0) {
 377  377      *n = 0;
 378  378      return false;
 379  379    } else {
 380  380      int k = MIN2(max, index);
 381  381      jint  new_ind = index - k;
 382  382      for (int j = 0; j < k; j++) {
 383  383        ptr_arr[j] = _base[new_ind + j];
 384  384      }
 385  385      _index = new_ind;
 386  386      *n = k;
 387  387      return true;
 388  388    }
 389  389  }
 390  390  
 391  391  template<class OopClosureClass>
 392  392  bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 393  393    assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 394  394           || SafepointSynchronize::is_at_safepoint(),
 395  395           "Drain recursion must be yield-safe.");
 396  396    bool res = true;
 397  397    debug_only(_drain_in_progress = true);
 398  398    debug_only(_drain_in_progress_yields = yield_after);
 399  399    while (!isEmpty()) {
 400  400      oop newOop = pop();
 401  401      assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 402  402      assert(newOop->is_oop(), "Expected an oop");
 403  403      assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 404  404             "only grey objects on this stack");
 405  405      newOop->oop_iterate(cl);
 406  406      if (yield_after && _cm->do_yield_check()) {
 407  407        res = false;
 408  408        break;
 409  409      }
 410  410    }
 411  411    debug_only(_drain_in_progress = false);
 412  412    return res;
 413  413  }
 414  414  
 415  415  void CMMarkStack::note_start_of_gc() {
 416  416    assert(_saved_index == -1,
 417  417           "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 418  418    _saved_index = _index;
 419  419  }
 420  420  
 421  421  void CMMarkStack::note_end_of_gc() {
 422  422    // This is intentionally a guarantee, instead of an assert. If we
 423  423    // accidentally add something to the mark stack during GC, it
 424  424    // will be a correctness issue so it's better if we crash. we'll
 425  425    // only check this once per GC anyway, so it won't be a performance
 426  426    // issue in any way.
 427  427    guarantee(_saved_index == _index,
 428  428              err_msg("saved index: %d index: %d", _saved_index, _index));
 429  429    _saved_index = -1;
 430  430  }
 431  431  
 432  432  void CMMarkStack::oops_do(OopClosure* f) {
 433  433    assert(_saved_index == _index,
 434  434           err_msg("saved index: %d index: %d", _saved_index, _index));
 435  435    for (int i = 0; i < _index; i += 1) {
 436  436      f->do_oop(&_base[i]);
 437  437    }
 438  438  }
 439  439  
 440  440  CMRootRegions::CMRootRegions() :
 441  441    _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 442  442    _should_abort(false),  _next_survivor(NULL) { }
 443  443  
 444  444  void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 445  445    _young_list = g1h->young_list();
 446  446    _cm = cm;
 447  447  }
 448  448  
 449  449  void CMRootRegions::prepare_for_scan() {
 450  450    assert(!scan_in_progress(), "pre-condition");
 451  451  
 452  452    // Currently, only survivors can be root regions.
 453  453    assert(_next_survivor == NULL, "pre-condition");
 454  454    _next_survivor = _young_list->first_survivor_region();
 455  455    _scan_in_progress = (_next_survivor != NULL);
 456  456    _should_abort = false;
 457  457  }
 458  458  
 459  459  HeapRegion* CMRootRegions::claim_next() {
 460  460    if (_should_abort) {
 461  461      // If someone has set the should_abort flag, we return NULL to
 462  462      // force the caller to bail out of their loop.
 463  463      return NULL;
 464  464    }
 465  465  
 466  466    // Currently, only survivors can be root regions.
 467  467    HeapRegion* res = _next_survivor;
 468  468    if (res != NULL) {
 469  469      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 470  470      // Read it again in case it changed while we were waiting for the lock.
 471  471      res = _next_survivor;
 472  472      if (res != NULL) {
 473  473        if (res == _young_list->last_survivor_region()) {
 474  474          // We just claimed the last survivor so store NULL to indicate
 475  475          // that we're done.
 476  476          _next_survivor = NULL;
 477  477        } else {
 478  478          _next_survivor = res->get_next_young_region();
 479  479        }
 480  480      } else {
 481  481        // Someone else claimed the last survivor while we were trying
 482  482        // to take the lock so nothing else to do.
 483  483      }
 484  484    }
 485  485    assert(res == NULL || res->is_survivor(), "post-condition");
 486  486  
 487  487    return res;
 488  488  }
 489  489  
 490  490  void CMRootRegions::scan_finished() {
 491  491    assert(scan_in_progress(), "pre-condition");
 492  492  
 493  493    // Currently, only survivors can be root regions.
 494  494    if (!_should_abort) {
 495  495      assert(_next_survivor == NULL, "we should have claimed all survivors");
 496  496    }
 497  497    _next_survivor = NULL;
 498  498  
 499  499    {
 500  500      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 501  501      _scan_in_progress = false;
 502  502      RootRegionScan_lock->notify_all();
 503  503    }
 504  504  }
 505  505  
 506  506  bool CMRootRegions::wait_until_scan_finished() {
 507  507    if (!scan_in_progress()) return false;
 508  508  
 509  509    {
 510  510      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 511  511      while (scan_in_progress()) {
 512  512        RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 513  513      }
 514  514    }
 515  515    return true;
 516  516  }
 517  517  
 518  518  #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 519  519  #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 520  520  #endif // _MSC_VER
 521  521  
 522  522  uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 523  523    return MAX2((n_par_threads + 2) / 4, 1U);
 524  524  }
 525  525  
 526  526  ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 527  527    _g1h(g1h),
 528  528    _markBitMap1(),
 529  529    _markBitMap2(),
 530  530    _parallel_marking_threads(0),
 531  531    _max_parallel_marking_threads(0),
 532  532    _sleep_factor(0.0),
 533  533    _marking_task_overhead(1.0),
 534  534    _cleanup_sleep_factor(0.0),
 535  535    _cleanup_task_overhead(1.0),
 536  536    _cleanup_list("Cleanup List"),
 537  537    _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 538  538    _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 539  539              CardTableModRefBS::card_shift,
 540  540              false /* in_resource_area*/),
 541  541  
 542  542    _prevMarkBitMap(&_markBitMap1),
 543  543    _nextMarkBitMap(&_markBitMap2),
 544  544  
 545  545    _markStack(this),
 546  546    // _finger set in set_non_marking_state
 547  547  
 548  548    _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 549  549    // _active_tasks set in set_non_marking_state
 550  550    // _tasks set inside the constructor
 551  551    _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 552  552    _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 553  553  
 554  554    _has_overflown(false),
 555  555    _concurrent(false),
 556  556    _has_aborted(false),
 557  557    _aborted_gc_id(GCId::undefined()),
 558  558    _restart_for_overflow(false),
 559  559    _concurrent_marking_in_progress(false),
 560  560  
 561  561    // _verbose_level set below
 562  562  
 563  563    _init_times(),
 564  564    _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 565  565    _cleanup_times(),
 566  566    _total_counting_time(0.0),
 567  567    _total_rs_scrub_time(0.0),
 568  568  
 569  569    _parallel_workers(NULL),
 570  570  
 571  571    _count_card_bitmaps(NULL),
 572  572    _count_marked_bytes(NULL),
 573  573    _completed_initialization(false) {
 574  574    CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 575  575    if (verbose_level < no_verbose) {
 576  576      verbose_level = no_verbose;
 577  577    }
 578  578    if (verbose_level > high_verbose) {
 579  579      verbose_level = high_verbose;
 580  580    }
 581  581    _verbose_level = verbose_level;
 582  582  
 583  583    if (verbose_low()) {
 584  584      gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 585  585                             "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 586  586    }
 587  587  
 588  588    _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 589  589    _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 590  590  
 591  591    // Create & start a ConcurrentMark thread.
 592  592    _cmThread = new ConcurrentMarkThread(this);
 593  593    assert(cmThread() != NULL, "CM Thread should have been created");
 594  594    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 595  595    if (_cmThread->osthread() == NULL) {
 596  596        vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 597  597    }
 598  598  
 599  599    assert(CGC_lock != NULL, "Where's the CGC_lock?");
 600  600    assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 601  601    assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 602  602  
 603  603    SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 604  604    satb_qs.set_buffer_size(G1SATBBufferSize);
 605  605  
 606  606    _root_regions.init(_g1h, this);
 607  607  
 608  608    if (ConcGCThreads > ParallelGCThreads) {
 609  609      warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 610  610              "than ParallelGCThreads (" UINTX_FORMAT ").",
 611  611              ConcGCThreads, ParallelGCThreads);
 612  612      return;
 613  613    }
 614  614    if (ParallelGCThreads == 0) {
 615  615      // if we are not running with any parallel GC threads we will not
 616  616      // spawn any marking threads either
 617  617      _parallel_marking_threads =       0;
 618  618      _max_parallel_marking_threads =   0;
 619  619      _sleep_factor             =     0.0;
 620  620      _marking_task_overhead    =     1.0;
 621  621    } else {
 622  622      if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 623  623        // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 624  624        // if both are set
 625  625        _sleep_factor             = 0.0;
 626  626        _marking_task_overhead    = 1.0;
 627  627      } else if (G1MarkingOverheadPercent > 0) {
 628  628        // We will calculate the number of parallel marking threads based
 629  629        // on a target overhead with respect to the soft real-time goal
 630  630        double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 631  631        double overall_cm_overhead =
 632  632          (double) MaxGCPauseMillis * marking_overhead /
 633  633          (double) GCPauseIntervalMillis;
 634  634        double cpu_ratio = 1.0 / (double) os::processor_count();
 635  635        double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 636  636        double marking_task_overhead =
 637  637          overall_cm_overhead / marking_thread_num *
 638  638                                                  (double) os::processor_count();
 639  639        double sleep_factor =
 640  640                           (1.0 - marking_task_overhead) / marking_task_overhead;
 641  641  
 642  642        FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 643  643        _sleep_factor             = sleep_factor;
 644  644        _marking_task_overhead    = marking_task_overhead;
 645  645      } else {
 646  646        // Calculate the number of parallel marking threads by scaling
 647  647        // the number of parallel GC threads.
 648  648        uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 649  649        FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 650  650        _sleep_factor             = 0.0;
 651  651        _marking_task_overhead    = 1.0;
 652  652      }
 653  653  
 654  654      assert(ConcGCThreads > 0, "Should have been set");
 655  655      _parallel_marking_threads = (uint) ConcGCThreads;
 656  656      _max_parallel_marking_threads = _parallel_marking_threads;
 657  657  
 658  658      if (parallel_marking_threads() > 1) {
 659  659        _cleanup_task_overhead = 1.0;
 660  660      } else {
 661  661        _cleanup_task_overhead = marking_task_overhead();
 662  662      }
 663  663      _cleanup_sleep_factor =
 664  664                       (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 665  665  
 666  666  #if 0
 667  667      gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 668  668      gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 669  669      gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 670  670      gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 671  671      gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 672  672  #endif
 673  673  
 674  674      guarantee(parallel_marking_threads() > 0, "peace of mind");
 675  675      _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 676  676           _max_parallel_marking_threads, false, true);
 677  677      if (_parallel_workers == NULL) {
 678  678        vm_exit_during_initialization("Failed necessary allocation.");
 679  679      } else {
 680  680        _parallel_workers->initialize_workers();
 681  681      }
 682  682    }
 683  683  
 684  684    if (FLAG_IS_DEFAULT(MarkStackSize)) {
 685  685      uintx mark_stack_size =
 686  686        MIN2(MarkStackSizeMax,
 687  687            MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 688  688      // Verify that the calculated value for MarkStackSize is in range.
 689  689      // It would be nice to use the private utility routine from Arguments.
 690  690      if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 691  691        warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 692  692                "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 693  693                mark_stack_size, (uintx) 1, MarkStackSizeMax);
 694  694        return;
 695  695      }
 696  696      FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 697  697    } else {
 698  698      // Verify MarkStackSize is in range.
 699  699      if (FLAG_IS_CMDLINE(MarkStackSize)) {
 700  700        if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 701  701          if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 702  702            warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 703  703                    "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 704  704                    MarkStackSize, (uintx) 1, MarkStackSizeMax);
 705  705            return;
 706  706          }
 707  707        } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 708  708          if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 709  709            warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 710  710                    " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 711  711                    MarkStackSize, MarkStackSizeMax);
 712  712            return;
 713  713          }
 714  714        }
 715  715      }
 716  716    }
 717  717  
 718  718    if (!_markStack.allocate(MarkStackSize)) {
 719  719      warning("Failed to allocate CM marking stack");
 720  720      return;
 721  721    }
 722  722  
 723  723    _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 724  724    _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 725  725  
 726  726    _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 727  727    _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 728  728  
 729  729    BitMap::idx_t card_bm_size = _card_bm.size();
 730  730  
 731  731    // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 732  732    _active_tasks = _max_worker_id;
 733  733  
 734  734    size_t max_regions = (size_t) _g1h->max_regions();
 735  735    for (uint i = 0; i < _max_worker_id; ++i) {
 736  736      CMTaskQueue* task_queue = new CMTaskQueue();
 737  737      task_queue->initialize();
 738  738      _task_queues->register_queue(i, task_queue);
 739  739  
 740  740      _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 741  741      _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 742  742  
 743  743      _tasks[i] = new CMTask(i, this,
 744  744                             _count_marked_bytes[i],
 745  745                             &_count_card_bitmaps[i],
 746  746                             task_queue, _task_queues);
 747  747  
 748  748      _accum_task_vtime[i] = 0.0;
 749  749    }
 750  750  
 751  751    // Calculate the card number for the bottom of the heap. Used
 752  752    // in biasing indexes into the accounting card bitmaps.
 753  753    _heap_bottom_card_num =
 754  754      intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 755  755                                  CardTableModRefBS::card_shift);
 756  756  
 757  757    // Clear all the liveness counting data
 758  758    clear_all_count_data();
 759  759  
 760  760    // so that the call below can read a sensible value
 761  761    _heap_start = g1h->reserved_region().start();
 762  762    set_non_marking_state();
 763  763    _completed_initialization = true;
 764  764  }
 765  765  
 766  766  void ConcurrentMark::reset() {
 767  767    // Starting values for these two. This should be called in a STW
 768  768    // phase.
 769  769    MemRegion reserved = _g1h->g1_reserved();
 770  770    _heap_start = reserved.start();
 771  771    _heap_end   = reserved.end();
 772  772  
 773  773    // Separated the asserts so that we know which one fires.
 774  774    assert(_heap_start != NULL, "heap bounds should look ok");
 775  775    assert(_heap_end != NULL, "heap bounds should look ok");
 776  776    assert(_heap_start < _heap_end, "heap bounds should look ok");
 777  777  
 778  778    // Reset all the marking data structures and any necessary flags
 779  779    reset_marking_state();
 780  780  
 781  781    if (verbose_low()) {
 782  782      gclog_or_tty->print_cr("[global] resetting");
 783  783    }
 784  784  
 785  785    // We do reset all of them, since different phases will use
 786  786    // different number of active threads. So, it's easiest to have all
 787  787    // of them ready.
 788  788    for (uint i = 0; i < _max_worker_id; ++i) {
 789  789      _tasks[i]->reset(_nextMarkBitMap);
 790  790    }
 791  791  
 792  792    // we need this to make sure that the flag is on during the evac
 793  793    // pause with initial mark piggy-backed
 794  794    set_concurrent_marking_in_progress();
 795  795  }
 796  796  
 797  797  
 798  798  void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 799  799    _markStack.set_should_expand();
 800  800    _markStack.setEmpty();        // Also clears the _markStack overflow flag
 801  801    if (clear_overflow) {
 802  802      clear_has_overflown();
 803  803    } else {
 804  804      assert(has_overflown(), "pre-condition");
 805  805    }
 806  806    _finger = _heap_start;
 807  807  
 808  808    for (uint i = 0; i < _max_worker_id; ++i) {
 809  809      CMTaskQueue* queue = _task_queues->queue(i);
 810  810      queue->set_empty();
 811  811    }
 812  812  }
 813  813  
 814  814  void ConcurrentMark::set_concurrency(uint active_tasks) {
 815  815    assert(active_tasks <= _max_worker_id, "we should not have more");
 816  816  
 817  817    _active_tasks = active_tasks;
 818  818    // Need to update the three data structures below according to the
 819  819    // number of active threads for this phase.
 820  820    _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 821  821    _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 822  822    _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 823  823  }
 824  824  
 825  825  void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 826  826    set_concurrency(active_tasks);
 827  827  
 828  828    _concurrent = concurrent;
 829  829    // We propagate this to all tasks, not just the active ones.
 830  830    for (uint i = 0; i < _max_worker_id; ++i)
 831  831      _tasks[i]->set_concurrent(concurrent);
 832  832  
 833  833    if (concurrent) {
 834  834      set_concurrent_marking_in_progress();
 835  835    } else {
 836  836      // We currently assume that the concurrent flag has been set to
 837  837      // false before we start remark. At this point we should also be
 838  838      // in a STW phase.
 839  839      assert(!concurrent_marking_in_progress(), "invariant");
 840  840      assert(out_of_regions(),
 841  841             err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 842  842                     p2i(_finger), p2i(_heap_end)));
 843  843    }
 844  844  }
 845  845  
 846  846  void ConcurrentMark::set_non_marking_state() {
 847  847    // We set the global marking state to some default values when we're
 848  848    // not doing marking.
 849  849    reset_marking_state();
 850  850    _active_tasks = 0;
 851  851    clear_concurrent_marking_in_progress();
 852  852  }
 853  853  
 854  854  ConcurrentMark::~ConcurrentMark() {
 855  855    // The ConcurrentMark instance is never freed.
 856  856    ShouldNotReachHere();
 857  857  }
 858  858  
 859  859  void ConcurrentMark::clearNextBitmap() {
 860  860    G1CollectedHeap* g1h = G1CollectedHeap::heap();
 861  861  
 862  862    // Make sure that the concurrent mark thread looks to still be in
 863  863    // the current cycle.
 864  864    guarantee(cmThread()->during_cycle(), "invariant");
 865  865  
 866  866    // We are finishing up the current cycle by clearing the next
 867  867    // marking bitmap and getting it ready for the next cycle. During
 868  868    // this time no other cycle can start. So, let's make sure that this
 869  869    // is the case.
 870  870    guarantee(!g1h->mark_in_progress(), "invariant");
 871  871  
 872  872    ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 873  873    g1h->heap_region_iterate(&cl);
 874  874  
 875  875    // Clear the liveness counting data. If the marking has been aborted, the abort()
 876  876    // call already did that.
 877  877    if (cl.complete()) {
 878  878      clear_all_count_data();
 879  879    }
 880  880  
 881  881    // Repeat the asserts from above.
 882  882    guarantee(cmThread()->during_cycle(), "invariant");
 883  883    guarantee(!g1h->mark_in_progress(), "invariant");
 884  884  }
 885  885  
 886  886  class CheckBitmapClearHRClosure : public HeapRegionClosure {
 887  887    CMBitMap* _bitmap;
 888  888    bool _error;
 889  889   public:
 890  890    CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 891  891    }
 892  892  
 893  893    virtual bool doHeapRegion(HeapRegion* r) {
 894  894      // This closure can be called concurrently to the mutator, so we must make sure
 895  895      // that the result of the getNextMarkedWordAddress() call is compared to the
 896  896      // value passed to it as limit to detect any found bits.
 897  897      // We can use the region's orig_end() for the limit and the comparison value
 898  898      // as it always contains the "real" end of the region that never changes and
 899  899      // has no side effects.
 900  900      // Due to the latter, there can also be no problem with the compiler generating
 901  901      // reloads of the orig_end() call.
 902  902      HeapWord* end = r->orig_end();
 903  903      return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 904  904    }
 905  905  };
 906  906  
 907  907  bool ConcurrentMark::nextMarkBitmapIsClear() {
 908  908    CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 909  909    _g1h->heap_region_iterate(&cl);
 910  910    return cl.complete();
 911  911  }
 912  912  
 913  913  class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 914  914  public:
 915  915    bool doHeapRegion(HeapRegion* r) {
 916  916      if (!r->continuesHumongous()) {
 917  917        r->note_start_of_marking();
 918  918      }
 919  919      return false;
 920  920    }
 921  921  };
 922  922  
 923  923  void ConcurrentMark::checkpointRootsInitialPre() {
 924  924    G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 925  925    G1CollectorPolicy* g1p = g1h->g1_policy();
 926  926  
 927  927    _has_aborted = false;
 928  928  
 929  929  #ifndef PRODUCT
 930  930    if (G1PrintReachableAtInitialMark) {
 931  931      print_reachable("at-cycle-start",
 932  932                      VerifyOption_G1UsePrevMarking, true /* all */);
 933  933    }
 934  934  #endif
 935  935  
 936  936    // Initialise marking structures. This has to be done in a STW phase.
 937  937    reset();
 938  938  
 939  939    // For each region note start of marking.
 940  940    NoteStartOfMarkHRClosure startcl;
 941  941    g1h->heap_region_iterate(&startcl);
 942  942  }
 943  943  
 944  944  
 945  945  void ConcurrentMark::checkpointRootsInitialPost() {
 946  946    G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 947  947  
 948  948    // If we force an overflow during remark, the remark operation will
 949  949    // actually abort and we'll restart concurrent marking. If we always
 950  950    // force an oveflow during remark we'll never actually complete the
 951  951    // marking phase. So, we initilize this here, at the start of the
 952  952    // cycle, so that at the remaining overflow number will decrease at
 953  953    // every remark and we'll eventually not need to cause one.
 954  954    force_overflow_stw()->init();
 955  955  
 956  956    // Start Concurrent Marking weak-reference discovery.
 957  957    ReferenceProcessor* rp = g1h->ref_processor_cm();
 958  958    // enable ("weak") refs discovery
 959  959    rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 960  960    rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 961  961  
 962  962    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 963  963    // This is the start of  the marking cycle, we're expected all
 964  964    // threads to have SATB queues with active set to false.
 965  965    satb_mq_set.set_active_all_threads(true, /* new active value */
 966  966                                       false /* expected_active */);
 967  967  
 968  968    _root_regions.prepare_for_scan();
 969  969  
 970  970    // update_g1_committed() will be called at the end of an evac pause
 971  971    // when marking is on. So, it's also called at the end of the
 972  972    // initial-mark pause to update the heap end, if the heap expands
 973  973    // during it. No need to call it here.
 974  974  }
 975  975  
 976  976  /*
 977  977   * Notice that in the next two methods, we actually leave the STS
 978  978   * during the barrier sync and join it immediately afterwards. If we
 979  979   * do not do this, the following deadlock can occur: one thread could
 980  980   * be in the barrier sync code, waiting for the other thread to also
 981  981   * sync up, whereas another one could be trying to yield, while also
 982  982   * waiting for the other threads to sync up too.
 983  983   *
 984  984   * Note, however, that this code is also used during remark and in
 985  985   * this case we should not attempt to leave / enter the STS, otherwise
 986  986   * we'll either hit an asseert (debug / fastdebug) or deadlock
 987  987   * (product). So we should only leave / enter the STS if we are
 988  988   * operating concurrently.
 989  989   *
 990  990   * Because the thread that does the sync barrier has left the STS, it
 991  991   * is possible to be suspended for a Full GC or an evacuation pause
 992  992   * could occur. This is actually safe, since the entering the sync
 993  993   * barrier is one of the last things do_marking_step() does, and it
 994  994   * doesn't manipulate any data structures afterwards.
 995  995   */
 996  996  
 997  997  void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 998  998    if (verbose_low()) {
 999  999      gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
1000 1000    }
1001 1001  
1002 1002    if (concurrent()) {
1003 1003      SuspendibleThreadSet::leave();
1004 1004    }
1005 1005  
1006 1006    bool barrier_aborted = !_first_overflow_barrier_sync.enter();
1007 1007  
1008 1008    if (concurrent()) {
1009 1009      SuspendibleThreadSet::join();
1010 1010    }
1011 1011    // at this point everyone should have synced up and not be doing any
1012 1012    // more work
1013 1013  
1014 1014    if (verbose_low()) {
1015 1015      if (barrier_aborted) {
1016 1016        gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1017 1017      } else {
1018 1018        gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1019 1019      }
1020 1020    }
1021 1021  
1022 1022    if (barrier_aborted) {
1023 1023      // If the barrier aborted we ignore the overflow condition and
1024 1024      // just abort the whole marking phase as quickly as possible.
1025 1025      return;
1026 1026    }
1027 1027  
1028 1028    // If we're executing the concurrent phase of marking, reset the marking
1029 1029    // state; otherwise the marking state is reset after reference processing,
1030 1030    // during the remark pause.
1031 1031    // If we reset here as a result of an overflow during the remark we will
1032 1032    // see assertion failures from any subsequent set_concurrency_and_phase()
1033 1033    // calls.
1034 1034    if (concurrent()) {
1035 1035      // let the task associated with with worker 0 do this
1036 1036      if (worker_id == 0) {
1037 1037        // task 0 is responsible for clearing the global data structures
1038 1038        // We should be here because of an overflow. During STW we should
1039 1039        // not clear the overflow flag since we rely on it being true when
1040 1040        // we exit this method to abort the pause and restart concurent
1041 1041        // marking.
1042 1042        reset_marking_state(true /* clear_overflow */);
1043 1043        force_overflow()->update();
1044 1044  
1045 1045        if (G1Log::fine()) {
1046 1046          gclog_or_tty->gclog_stamp(concurrent_gc_id());
1047 1047          gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1048 1048        }
1049 1049      }
1050 1050    }
1051 1051  
1052 1052    // after this, each task should reset its own data structures then
1053 1053    // then go into the second barrier
1054 1054  }
1055 1055  
1056 1056  void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1057 1057    if (verbose_low()) {
1058 1058      gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1059 1059    }
1060 1060  
1061 1061    if (concurrent()) {
1062 1062      SuspendibleThreadSet::leave();
1063 1063    }
1064 1064  
1065 1065    bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1066 1066  
1067 1067    if (concurrent()) {
1068 1068      SuspendibleThreadSet::join();
1069 1069    }
1070 1070    // at this point everything should be re-initialized and ready to go
1071 1071  
1072 1072    if (verbose_low()) {
1073 1073      if (barrier_aborted) {
1074 1074        gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1075 1075      } else {
1076 1076        gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1077 1077      }
1078 1078    }
1079 1079  }
1080 1080  
1081 1081  #ifndef PRODUCT
1082 1082  void ForceOverflowSettings::init() {
1083 1083    _num_remaining = G1ConcMarkForceOverflow;
1084 1084    _force = false;
1085 1085    update();
1086 1086  }
1087 1087  
1088 1088  void ForceOverflowSettings::update() {
1089 1089    if (_num_remaining > 0) {
1090 1090      _num_remaining -= 1;
1091 1091      _force = true;
1092 1092    } else {
1093 1093      _force = false;
1094 1094    }
1095 1095  }
1096 1096  
1097 1097  bool ForceOverflowSettings::should_force() {
1098 1098    if (_force) {
1099 1099      _force = false;
1100 1100      return true;
1101 1101    } else {
1102 1102      return false;
1103 1103    }
1104 1104  }
1105 1105  #endif // !PRODUCT
1106 1106  
1107 1107  class CMConcurrentMarkingTask: public AbstractGangTask {
1108 1108  private:
1109 1109    ConcurrentMark*       _cm;
1110 1110    ConcurrentMarkThread* _cmt;
1111 1111  
1112 1112  public:
1113 1113    void work(uint worker_id) {
1114 1114      assert(Thread::current()->is_ConcurrentGC_thread(),
1115 1115             "this should only be done by a conc GC thread");
1116 1116      ResourceMark rm;
1117 1117  
1118 1118      double start_vtime = os::elapsedVTime();
1119 1119  
1120 1120      SuspendibleThreadSet::join();
1121 1121  
1122 1122      assert(worker_id < _cm->active_tasks(), "invariant");
1123 1123      CMTask* the_task = _cm->task(worker_id);
1124 1124      the_task->record_start_time();
1125 1125      if (!_cm->has_aborted()) {
1126 1126        do {
1127 1127          double start_vtime_sec = os::elapsedVTime();
1128 1128          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1129 1129  
1130 1130          the_task->do_marking_step(mark_step_duration_ms,
1131 1131                                    true  /* do_termination */,
1132 1132                                    false /* is_serial*/);
1133 1133  
1134 1134          double end_vtime_sec = os::elapsedVTime();
1135 1135          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1136 1136          _cm->clear_has_overflown();
1137 1137  
1138 1138          _cm->do_yield_check(worker_id);
1139 1139  
1140 1140          jlong sleep_time_ms;
1141 1141          if (!_cm->has_aborted() && the_task->has_aborted()) {
1142 1142            sleep_time_ms =
1143 1143              (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1144 1144            SuspendibleThreadSet::leave();
1145 1145            os::sleep(Thread::current(), sleep_time_ms, false);
1146 1146            SuspendibleThreadSet::join();
1147 1147          }
1148 1148        } while (!_cm->has_aborted() && the_task->has_aborted());
1149 1149      }
1150 1150      the_task->record_end_time();
1151 1151      guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1152 1152  
1153 1153      SuspendibleThreadSet::leave();
1154 1154  
1155 1155      double end_vtime = os::elapsedVTime();
1156 1156      _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1157 1157    }
1158 1158  
1159 1159    CMConcurrentMarkingTask(ConcurrentMark* cm,
1160 1160                            ConcurrentMarkThread* cmt) :
1161 1161        AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1162 1162  
1163 1163    ~CMConcurrentMarkingTask() { }
1164 1164  };
1165 1165  
1166 1166  // Calculates the number of active workers for a concurrent
1167 1167  // phase.
1168 1168  uint ConcurrentMark::calc_parallel_marking_threads() {
1169 1169    if (G1CollectedHeap::use_parallel_gc_threads()) {
1170 1170      uint n_conc_workers = 0;
1171 1171      if (!UseDynamicNumberOfGCThreads ||
1172 1172          (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1173 1173           !ForceDynamicNumberOfGCThreads)) {
1174 1174        n_conc_workers = max_parallel_marking_threads();
1175 1175      } else {
1176 1176        n_conc_workers =
1177 1177          AdaptiveSizePolicy::calc_default_active_workers(
1178 1178                                       max_parallel_marking_threads(),
1179 1179                                       1, /* Minimum workers */
1180 1180                                       parallel_marking_threads(),
1181 1181                                       Threads::number_of_non_daemon_threads());
1182 1182        // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1183 1183        // that scaling has already gone into "_max_parallel_marking_threads".
1184 1184      }
1185 1185      assert(n_conc_workers > 0, "Always need at least 1");
1186 1186      return n_conc_workers;
1187 1187    }
1188 1188    // If we are not running with any parallel GC threads we will not
1189 1189    // have spawned any marking threads either. Hence the number of
1190 1190    // concurrent workers should be 0.
1191 1191    return 0;
1192 1192  }
1193 1193  
1194 1194  void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1195 1195    // Currently, only survivors can be root regions.
1196 1196    assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1197 1197    G1RootRegionScanClosure cl(_g1h, this, worker_id);
1198 1198  
1199 1199    const uintx interval = PrefetchScanIntervalInBytes;
1200 1200    HeapWord* curr = hr->bottom();
1201 1201    const HeapWord* end = hr->top();
1202 1202    while (curr < end) {
1203 1203      Prefetch::read(curr, interval);
1204 1204      oop obj = oop(curr);
1205 1205      int size = obj->oop_iterate(&cl);
1206 1206      assert(size == obj->size(), "sanity");
1207 1207      curr += size;
1208 1208    }
1209 1209  }
1210 1210  
1211 1211  class CMRootRegionScanTask : public AbstractGangTask {
1212 1212  private:
1213 1213    ConcurrentMark* _cm;
1214 1214  
1215 1215  public:
1216 1216    CMRootRegionScanTask(ConcurrentMark* cm) :
1217 1217      AbstractGangTask("Root Region Scan"), _cm(cm) { }
1218 1218  
1219 1219    void work(uint worker_id) {
1220 1220      assert(Thread::current()->is_ConcurrentGC_thread(),
1221 1221             "this should only be done by a conc GC thread");
1222 1222  
1223 1223      CMRootRegions* root_regions = _cm->root_regions();
1224 1224      HeapRegion* hr = root_regions->claim_next();
1225 1225      while (hr != NULL) {
1226 1226        _cm->scanRootRegion(hr, worker_id);
1227 1227        hr = root_regions->claim_next();
1228 1228      }
1229 1229    }
1230 1230  };
1231 1231  
1232 1232  void ConcurrentMark::scanRootRegions() {
1233 1233    // Start of concurrent marking.
1234 1234    ClassLoaderDataGraph::clear_claimed_marks();
1235 1235  
1236 1236    // scan_in_progress() will have been set to true only if there was
1237 1237    // at least one root region to scan. So, if it's false, we
1238 1238    // should not attempt to do any further work.
1239 1239    if (root_regions()->scan_in_progress()) {
1240 1240      _parallel_marking_threads = calc_parallel_marking_threads();
1241 1241      assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1242 1242             "Maximum number of marking threads exceeded");
1243 1243      uint active_workers = MAX2(1U, parallel_marking_threads());
1244 1244  
1245 1245      CMRootRegionScanTask task(this);
1246 1246      if (use_parallel_marking_threads()) {
1247 1247        _parallel_workers->set_active_workers((int) active_workers);
1248 1248        _parallel_workers->run_task(&task);
1249 1249      } else {
1250 1250        task.work(0);
1251 1251      }
1252 1252  
1253 1253      // It's possible that has_aborted() is true here without actually
1254 1254      // aborting the survivor scan earlier. This is OK as it's
1255 1255      // mainly used for sanity checking.
1256 1256      root_regions()->scan_finished();
1257 1257    }
1258 1258  }
1259 1259  
1260 1260  void ConcurrentMark::markFromRoots() {
1261 1261    // we might be tempted to assert that:
1262 1262    // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1263 1263    //        "inconsistent argument?");
1264 1264    // However that wouldn't be right, because it's possible that
1265 1265    // a safepoint is indeed in progress as a younger generation
1266 1266    // stop-the-world GC happens even as we mark in this generation.
1267 1267  
1268 1268    _restart_for_overflow = false;
1269 1269    force_overflow_conc()->init();
1270 1270  
1271 1271    // _g1h has _n_par_threads
1272 1272    _parallel_marking_threads = calc_parallel_marking_threads();
1273 1273    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1274 1274      "Maximum number of marking threads exceeded");
1275 1275  
1276 1276    uint active_workers = MAX2(1U, parallel_marking_threads());
1277 1277  
1278 1278    // Parallel task terminator is set in "set_concurrency_and_phase()"
1279 1279    set_concurrency_and_phase(active_workers, true /* concurrent */);
1280 1280  
1281 1281    CMConcurrentMarkingTask markingTask(this, cmThread());
1282 1282    if (use_parallel_marking_threads()) {
1283 1283      _parallel_workers->set_active_workers((int)active_workers);
1284 1284      // Don't set _n_par_threads because it affects MT in process_roots()
1285 1285      // and the decisions on that MT processing is made elsewhere.
1286 1286      assert(_parallel_workers->active_workers() > 0, "Should have been set");
1287 1287      _parallel_workers->run_task(&markingTask);
1288 1288    } else {
1289 1289      markingTask.work(0);
1290 1290    }
1291 1291    print_stats();
1292 1292  }
1293 1293  
1294 1294  void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1295 1295    // world is stopped at this checkpoint
1296 1296    assert(SafepointSynchronize::is_at_safepoint(),
1297 1297           "world should be stopped");
1298 1298  
1299 1299    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1300 1300  
1301 1301    // If a full collection has happened, we shouldn't do this.
1302 1302    if (has_aborted()) {
1303 1303      g1h->set_marking_complete(); // So bitmap clearing isn't confused
1304 1304      return;
1305 1305    }
1306 1306  
1307 1307    SvcGCMarker sgcm(SvcGCMarker::OTHER);
1308 1308  
1309 1309    if (VerifyDuringGC) {
1310 1310      HandleMark hm;  // handle scope
1311 1311      Universe::heap()->prepare_for_verify();
1312 1312      Universe::verify(VerifyOption_G1UsePrevMarking,
1313 1313                       " VerifyDuringGC:(before)");
1314 1314    }
1315 1315    g1h->check_bitmaps("Remark Start");
1316 1316  
1317 1317    G1CollectorPolicy* g1p = g1h->g1_policy();
1318 1318    g1p->record_concurrent_mark_remark_start();
1319 1319  
1320 1320    double start = os::elapsedTime();
1321 1321  
1322 1322    checkpointRootsFinalWork();
1323 1323  
1324 1324    double mark_work_end = os::elapsedTime();
1325 1325  
1326 1326    weakRefsWork(clear_all_soft_refs);
1327 1327  
1328 1328    if (has_overflown()) {
1329 1329      // Oops.  We overflowed.  Restart concurrent marking.
1330 1330      _restart_for_overflow = true;
1331 1331      if (G1TraceMarkStackOverflow) {
1332 1332        gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1333 1333      }
1334 1334  
1335 1335      // Verify the heap w.r.t. the previous marking bitmap.
1336 1336      if (VerifyDuringGC) {
1337 1337        HandleMark hm;  // handle scope
1338 1338        Universe::heap()->prepare_for_verify();
1339 1339        Universe::verify(VerifyOption_G1UsePrevMarking,
1340 1340                         " VerifyDuringGC:(overflow)");
1341 1341      }
1342 1342  
1343 1343      // Clear the marking state because we will be restarting
1344 1344      // marking due to overflowing the global mark stack.
1345 1345      reset_marking_state();
1346 1346    } else {
1347 1347      // Aggregate the per-task counting data that we have accumulated
1348 1348      // while marking.
1349 1349      aggregate_count_data();
1350 1350  
1351 1351      SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1352 1352      // We're done with marking.
1353 1353      // This is the end of  the marking cycle, we're expected all
1354 1354      // threads to have SATB queues with active set to true.
1355 1355      satb_mq_set.set_active_all_threads(false, /* new active value */
1356 1356                                         true /* expected_active */);
1357 1357  
1358 1358      if (VerifyDuringGC) {
1359 1359        HandleMark hm;  // handle scope
1360 1360        Universe::heap()->prepare_for_verify();
1361 1361        Universe::verify(VerifyOption_G1UseNextMarking,
1362 1362                         " VerifyDuringGC:(after)");
1363 1363      }
1364 1364      g1h->check_bitmaps("Remark End");
1365 1365      assert(!restart_for_overflow(), "sanity");
1366 1366      // Completely reset the marking state since marking completed
1367 1367      set_non_marking_state();
1368 1368    }
1369 1369  
1370 1370    // Expand the marking stack, if we have to and if we can.
1371 1371    if (_markStack.should_expand()) {
1372 1372      _markStack.expand();
1373 1373    }
1374 1374  
1375 1375    // Statistics
1376 1376    double now = os::elapsedTime();
1377 1377    _remark_mark_times.add((mark_work_end - start) * 1000.0);
1378 1378    _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1379 1379    _remark_times.add((now - start) * 1000.0);
1380 1380  
1381 1381    g1p->record_concurrent_mark_remark_end();
1382 1382  
1383 1383    G1CMIsAliveClosure is_alive(g1h);
1384 1384    g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1385 1385  }
1386 1386  
1387 1387  // Base class of the closures that finalize and verify the
1388 1388  // liveness counting data.
1389 1389  class CMCountDataClosureBase: public HeapRegionClosure {
1390 1390  protected:
1391 1391    G1CollectedHeap* _g1h;
1392 1392    ConcurrentMark* _cm;
1393 1393    CardTableModRefBS* _ct_bs;
1394 1394  
1395 1395    BitMap* _region_bm;
1396 1396    BitMap* _card_bm;
1397 1397  
1398 1398    // Takes a region that's not empty (i.e., it has at least one
1399 1399    // live object in it and sets its corresponding bit on the region
1400 1400    // bitmap to 1. If the region is "starts humongous" it will also set
1401 1401    // to 1 the bits on the region bitmap that correspond to its
1402 1402    // associated "continues humongous" regions.
1403 1403    void set_bit_for_region(HeapRegion* hr) {
1404 1404      assert(!hr->continuesHumongous(), "should have filtered those out");
1405 1405  
1406 1406      BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1407 1407      if (!hr->startsHumongous()) {
1408 1408        // Normal (non-humongous) case: just set the bit.
1409 1409        _region_bm->par_at_put(index, true);
1410 1410      } else {
1411 1411        // Starts humongous case: calculate how many regions are part of
1412 1412        // this humongous region and then set the bit range.
1413 1413        BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1414 1414        _region_bm->par_at_put_range(index, end_index, true);
1415 1415      }
1416 1416    }
1417 1417  
1418 1418  public:
1419 1419    CMCountDataClosureBase(G1CollectedHeap* g1h,
1420 1420                           BitMap* region_bm, BitMap* card_bm):
1421 1421      _g1h(g1h), _cm(g1h->concurrent_mark()),
1422 1422      _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1423 1423      _region_bm(region_bm), _card_bm(card_bm) { }
1424 1424  };
1425 1425  
1426 1426  // Closure that calculates the # live objects per region. Used
1427 1427  // for verification purposes during the cleanup pause.
1428 1428  class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1429 1429    CMBitMapRO* _bm;
1430 1430    size_t _region_marked_bytes;
1431 1431  
1432 1432  public:
1433 1433    CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1434 1434                           BitMap* region_bm, BitMap* card_bm) :
1435 1435      CMCountDataClosureBase(g1h, region_bm, card_bm),
1436 1436      _bm(bm), _region_marked_bytes(0) { }
1437 1437  
1438 1438    bool doHeapRegion(HeapRegion* hr) {
1439 1439  
1440 1440      if (hr->continuesHumongous()) {
1441 1441        // We will ignore these here and process them when their
1442 1442        // associated "starts humongous" region is processed (see
1443 1443        // set_bit_for_heap_region()). Note that we cannot rely on their
1444 1444        // associated "starts humongous" region to have their bit set to
1445 1445        // 1 since, due to the region chunking in the parallel region
1446 1446        // iteration, a "continues humongous" region might be visited
1447 1447        // before its associated "starts humongous".
1448 1448        return false;
1449 1449      }
1450 1450  
1451 1451      HeapWord* ntams = hr->next_top_at_mark_start();
1452 1452      HeapWord* start = hr->bottom();
1453 1453  
1454 1454      assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1455 1455             err_msg("Preconditions not met - "
1456 1456                     "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1457 1457                     p2i(start), p2i(ntams), p2i(hr->end())));
1458 1458  
1459 1459      // Find the first marked object at or after "start".
1460 1460      start = _bm->getNextMarkedWordAddress(start, ntams);
1461 1461  
1462 1462      size_t marked_bytes = 0;
1463 1463  
1464 1464      while (start < ntams) {
1465 1465        oop obj = oop(start);
1466 1466        int obj_sz = obj->size();
1467 1467        HeapWord* obj_end = start + obj_sz;
1468 1468  
1469 1469        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1470 1470        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1471 1471  
1472 1472        // Note: if we're looking at the last region in heap - obj_end
1473 1473        // could be actually just beyond the end of the heap; end_idx
1474 1474        // will then correspond to a (non-existent) card that is also
1475 1475        // just beyond the heap.
1476 1476        if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1477 1477          // end of object is not card aligned - increment to cover
1478 1478          // all the cards spanned by the object
1479 1479          end_idx += 1;
1480 1480        }
1481 1481  
1482 1482        // Set the bits in the card BM for the cards spanned by this object.
1483 1483        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1484 1484  
1485 1485        // Add the size of this object to the number of marked bytes.
1486 1486        marked_bytes += (size_t)obj_sz * HeapWordSize;
1487 1487  
1488 1488        // Find the next marked object after this one.
1489 1489        start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1490 1490      }
1491 1491  
1492 1492      // Mark the allocated-since-marking portion...
1493 1493      HeapWord* top = hr->top();
1494 1494      if (ntams < top) {
1495 1495        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1496 1496        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1497 1497  
1498 1498        // Note: if we're looking at the last region in heap - top
1499 1499        // could be actually just beyond the end of the heap; end_idx
1500 1500        // will then correspond to a (non-existent) card that is also
1501 1501        // just beyond the heap.
1502 1502        if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1503 1503          // end of object is not card aligned - increment to cover
1504 1504          // all the cards spanned by the object
1505 1505          end_idx += 1;
1506 1506        }
1507 1507        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1508 1508  
1509 1509        // This definitely means the region has live objects.
1510 1510        set_bit_for_region(hr);
1511 1511      }
1512 1512  
1513 1513      // Update the live region bitmap.
1514 1514      if (marked_bytes > 0) {
1515 1515        set_bit_for_region(hr);
1516 1516      }
1517 1517  
1518 1518      // Set the marked bytes for the current region so that
1519 1519      // it can be queried by a calling verificiation routine
1520 1520      _region_marked_bytes = marked_bytes;
1521 1521  
1522 1522      return false;
1523 1523    }
1524 1524  
1525 1525    size_t region_marked_bytes() const { return _region_marked_bytes; }
1526 1526  };
1527 1527  
1528 1528  // Heap region closure used for verifying the counting data
1529 1529  // that was accumulated concurrently and aggregated during
1530 1530  // the remark pause. This closure is applied to the heap
1531 1531  // regions during the STW cleanup pause.
1532 1532  
1533 1533  class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1534 1534    G1CollectedHeap* _g1h;
1535 1535    ConcurrentMark* _cm;
1536 1536    CalcLiveObjectsClosure _calc_cl;
1537 1537    BitMap* _region_bm;   // Region BM to be verified
1538 1538    BitMap* _card_bm;     // Card BM to be verified
1539 1539    bool _verbose;        // verbose output?
1540 1540  
1541 1541    BitMap* _exp_region_bm; // Expected Region BM values
1542 1542    BitMap* _exp_card_bm;   // Expected card BM values
1543 1543  
1544 1544    int _failures;
1545 1545  
1546 1546  public:
1547 1547    VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1548 1548                                  BitMap* region_bm,
1549 1549                                  BitMap* card_bm,
1550 1550                                  BitMap* exp_region_bm,
1551 1551                                  BitMap* exp_card_bm,
1552 1552                                  bool verbose) :
1553 1553      _g1h(g1h), _cm(g1h->concurrent_mark()),
1554 1554      _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1555 1555      _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1556 1556      _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1557 1557      _failures(0) { }
1558 1558  
1559 1559    int failures() const { return _failures; }
1560 1560  
1561 1561    bool doHeapRegion(HeapRegion* hr) {
1562 1562      if (hr->continuesHumongous()) {
1563 1563        // We will ignore these here and process them when their
1564 1564        // associated "starts humongous" region is processed (see
1565 1565        // set_bit_for_heap_region()). Note that we cannot rely on their
1566 1566        // associated "starts humongous" region to have their bit set to
1567 1567        // 1 since, due to the region chunking in the parallel region
1568 1568        // iteration, a "continues humongous" region might be visited
1569 1569        // before its associated "starts humongous".
1570 1570        return false;
1571 1571      }
1572 1572  
1573 1573      int failures = 0;
1574 1574  
1575 1575      // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1576 1576      // this region and set the corresponding bits in the expected region
1577 1577      // and card bitmaps.
1578 1578      bool res = _calc_cl.doHeapRegion(hr);
1579 1579      assert(res == false, "should be continuing");
1580 1580  
1581 1581      MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1582 1582                      Mutex::_no_safepoint_check_flag);
1583 1583  
1584 1584      // Verify the marked bytes for this region.
1585 1585      size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1586 1586      size_t act_marked_bytes = hr->next_marked_bytes();
1587 1587  
1588 1588      // We're not OK if expected marked bytes > actual marked bytes. It means
1589 1589      // we have missed accounting some objects during the actual marking.
1590 1590      if (exp_marked_bytes > act_marked_bytes) {
1591 1591        if (_verbose) {
1592 1592          gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1593 1593                                 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1594 1594                                 hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1595 1595        }
1596 1596        failures += 1;
1597 1597      }
1598 1598  
1599 1599      // Verify the bit, for this region, in the actual and expected
1600 1600      // (which was just calculated) region bit maps.
1601 1601      // We're not OK if the bit in the calculated expected region
1602 1602      // bitmap is set and the bit in the actual region bitmap is not.
1603 1603      BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1604 1604  
1605 1605      bool expected = _exp_region_bm->at(index);
1606 1606      bool actual = _region_bm->at(index);
1607 1607      if (expected && !actual) {
1608 1608        if (_verbose) {
1609 1609          gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1610 1610                                 "expected: %s, actual: %s",
1611 1611                                 hr->hrm_index(),
1612 1612                                 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1613 1613        }
1614 1614        failures += 1;
1615 1615      }
1616 1616  
1617 1617      // Verify that the card bit maps for the cards spanned by the current
1618 1618      // region match. We have an error if we have a set bit in the expected
1619 1619      // bit map and the corresponding bit in the actual bitmap is not set.
1620 1620  
1621 1621      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1622 1622      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1623 1623  
1624 1624      for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1625 1625        expected = _exp_card_bm->at(i);
1626 1626        actual = _card_bm->at(i);
1627 1627  
1628 1628        if (expected && !actual) {
1629 1629          if (_verbose) {
1630 1630            gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1631 1631                                   "expected: %s, actual: %s",
1632 1632                                   hr->hrm_index(), i,
1633 1633                                   BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1634 1634          }
1635 1635          failures += 1;
1636 1636        }
1637 1637      }
1638 1638  
1639 1639      if (failures > 0 && _verbose)  {
1640 1640        gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1641 1641                               "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1642 1642                               HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1643 1643                               _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1644 1644      }
1645 1645  
1646 1646      _failures += failures;
1647 1647  
1648 1648      // We could stop iteration over the heap when we
1649 1649      // find the first violating region by returning true.
1650 1650      return false;
1651 1651    }
1652 1652  };
1653 1653  
1654 1654  class G1ParVerifyFinalCountTask: public AbstractGangTask {
1655 1655  protected:
1656 1656    G1CollectedHeap* _g1h;
1657 1657    ConcurrentMark* _cm;
1658 1658    BitMap* _actual_region_bm;
1659 1659    BitMap* _actual_card_bm;
1660 1660  
1661 1661    uint    _n_workers;
1662 1662  
1663 1663    BitMap* _expected_region_bm;
1664 1664    BitMap* _expected_card_bm;
1665 1665  
1666 1666    int  _failures;
1667 1667    bool _verbose;
1668 1668  
1669 1669  public:
1670 1670    G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1671 1671                              BitMap* region_bm, BitMap* card_bm,
1672 1672                              BitMap* expected_region_bm, BitMap* expected_card_bm)
1673 1673      : AbstractGangTask("G1 verify final counting"),
1674 1674        _g1h(g1h), _cm(_g1h->concurrent_mark()),
1675 1675        _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1676 1676        _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1677 1677        _failures(0), _verbose(false),
1678 1678        _n_workers(0) {
1679 1679      assert(VerifyDuringGC, "don't call this otherwise");
1680 1680  
1681 1681      // Use the value already set as the number of active threads
1682 1682      // in the call to run_task().
1683 1683      if (G1CollectedHeap::use_parallel_gc_threads()) {
1684 1684        assert( _g1h->workers()->active_workers() > 0,
1685 1685          "Should have been previously set");
1686 1686        _n_workers = _g1h->workers()->active_workers();
1687 1687      } else {
1688 1688        _n_workers = 1;
1689 1689      }
1690 1690  
1691 1691      assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1692 1692      assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1693 1693  
1694 1694      _verbose = _cm->verbose_medium();
1695 1695    }
1696 1696  
1697 1697    void work(uint worker_id) {
1698 1698      assert(worker_id < _n_workers, "invariant");
1699 1699  
1700 1700      VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1701 1701                                              _actual_region_bm, _actual_card_bm,
1702 1702                                              _expected_region_bm,
1703 1703                                              _expected_card_bm,
1704 1704                                              _verbose);
1705 1705  
1706 1706      if (G1CollectedHeap::use_parallel_gc_threads()) {
1707 1707        _g1h->heap_region_par_iterate_chunked(&verify_cl,
1708 1708                                              worker_id,
1709 1709                                              _n_workers,
1710 1710                                              HeapRegion::VerifyCountClaimValue);
1711 1711      } else {
1712 1712        _g1h->heap_region_iterate(&verify_cl);
1713 1713      }
1714 1714  
1715 1715      Atomic::add(verify_cl.failures(), &_failures);
1716 1716    }
1717 1717  
1718 1718    int failures() const { return _failures; }
1719 1719  };
1720 1720  
1721 1721  // Closure that finalizes the liveness counting data.
1722 1722  // Used during the cleanup pause.
1723 1723  // Sets the bits corresponding to the interval [NTAMS, top]
1724 1724  // (which contains the implicitly live objects) in the
1725 1725  // card liveness bitmap. Also sets the bit for each region,
1726 1726  // containing live data, in the region liveness bitmap.
1727 1727  
1728 1728  class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1729 1729   public:
1730 1730    FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1731 1731                                BitMap* region_bm,
1732 1732                                BitMap* card_bm) :
1733 1733      CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1734 1734  
1735 1735    bool doHeapRegion(HeapRegion* hr) {
1736 1736  
1737 1737      if (hr->continuesHumongous()) {
1738 1738        // We will ignore these here and process them when their
1739 1739        // associated "starts humongous" region is processed (see
1740 1740        // set_bit_for_heap_region()). Note that we cannot rely on their
1741 1741        // associated "starts humongous" region to have their bit set to
1742 1742        // 1 since, due to the region chunking in the parallel region
1743 1743        // iteration, a "continues humongous" region might be visited
1744 1744        // before its associated "starts humongous".
1745 1745        return false;
1746 1746      }
1747 1747  
1748 1748      HeapWord* ntams = hr->next_top_at_mark_start();
1749 1749      HeapWord* top   = hr->top();
1750 1750  
1751 1751      assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1752 1752  
1753 1753      // Mark the allocated-since-marking portion...
1754 1754      if (ntams < top) {
1755 1755        // This definitely means the region has live objects.
1756 1756        set_bit_for_region(hr);
1757 1757  
1758 1758        // Now set the bits in the card bitmap for [ntams, top)
1759 1759        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1760 1760        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1761 1761  
1762 1762        // Note: if we're looking at the last region in heap - top
1763 1763        // could be actually just beyond the end of the heap; end_idx
1764 1764        // will then correspond to a (non-existent) card that is also
1765 1765        // just beyond the heap.
1766 1766        if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1767 1767          // end of object is not card aligned - increment to cover
1768 1768          // all the cards spanned by the object
1769 1769          end_idx += 1;
1770 1770        }
1771 1771  
1772 1772        assert(end_idx <= _card_bm->size(),
1773 1773               err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1774 1774                       end_idx, _card_bm->size()));
1775 1775        assert(start_idx < _card_bm->size(),
1776 1776               err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1777 1777                       start_idx, _card_bm->size()));
1778 1778  
1779 1779        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1780 1780      }
1781 1781  
1782 1782      // Set the bit for the region if it contains live data
1783 1783      if (hr->next_marked_bytes() > 0) {
1784 1784        set_bit_for_region(hr);
1785 1785      }
1786 1786  
1787 1787      return false;
1788 1788    }
1789 1789  };
1790 1790  
1791 1791  class G1ParFinalCountTask: public AbstractGangTask {
1792 1792  protected:
1793 1793    G1CollectedHeap* _g1h;
1794 1794    ConcurrentMark* _cm;
1795 1795    BitMap* _actual_region_bm;
1796 1796    BitMap* _actual_card_bm;
1797 1797  
1798 1798    uint    _n_workers;
1799 1799  
1800 1800  public:
1801 1801    G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1802 1802      : AbstractGangTask("G1 final counting"),
1803 1803        _g1h(g1h), _cm(_g1h->concurrent_mark()),
1804 1804        _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1805 1805        _n_workers(0) {
1806 1806      // Use the value already set as the number of active threads
1807 1807      // in the call to run_task().
1808 1808      if (G1CollectedHeap::use_parallel_gc_threads()) {
1809 1809        assert( _g1h->workers()->active_workers() > 0,
1810 1810          "Should have been previously set");
1811 1811        _n_workers = _g1h->workers()->active_workers();
1812 1812      } else {
1813 1813        _n_workers = 1;
1814 1814      }
1815 1815    }
1816 1816  
1817 1817    void work(uint worker_id) {
1818 1818      assert(worker_id < _n_workers, "invariant");
1819 1819  
1820 1820      FinalCountDataUpdateClosure final_update_cl(_g1h,
1821 1821                                                  _actual_region_bm,
1822 1822                                                  _actual_card_bm);
1823 1823  
1824 1824      if (G1CollectedHeap::use_parallel_gc_threads()) {
1825 1825        _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1826 1826                                              worker_id,
1827 1827                                              _n_workers,
1828 1828                                              HeapRegion::FinalCountClaimValue);
1829 1829      } else {
1830 1830        _g1h->heap_region_iterate(&final_update_cl);
1831 1831      }
1832 1832    }
1833 1833  };
1834 1834  
1835 1835  class G1ParNoteEndTask;
1836 1836  
1837 1837  class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1838 1838    G1CollectedHeap* _g1;
1839 1839    size_t _max_live_bytes;
1840 1840    uint _regions_claimed;
1841 1841    size_t _freed_bytes;
1842 1842    FreeRegionList* _local_cleanup_list;
1843 1843    HeapRegionSetCount _old_regions_removed;
1844 1844    HeapRegionSetCount _humongous_regions_removed;
1845 1845    HRRSCleanupTask* _hrrs_cleanup_task;
1846 1846    double _claimed_region_time;
1847 1847    double _max_region_time;
1848 1848  
1849 1849  public:
1850 1850    G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1851 1851                               FreeRegionList* local_cleanup_list,
1852 1852                               HRRSCleanupTask* hrrs_cleanup_task) :
1853 1853      _g1(g1),
1854 1854      _max_live_bytes(0), _regions_claimed(0),
1855 1855      _freed_bytes(0),
1856 1856      _claimed_region_time(0.0), _max_region_time(0.0),
1857 1857      _local_cleanup_list(local_cleanup_list),
1858 1858      _old_regions_removed(),
1859 1859      _humongous_regions_removed(),
1860 1860      _hrrs_cleanup_task(hrrs_cleanup_task) { }
1861 1861  
1862 1862    size_t freed_bytes() { return _freed_bytes; }
1863 1863    const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1864 1864    const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1865 1865  
1866 1866    bool doHeapRegion(HeapRegion *hr) {
1867 1867      if (hr->continuesHumongous()) {
1868 1868        return false;
1869 1869      }
1870 1870      // We use a claim value of zero here because all regions
1871 1871      // were claimed with value 1 in the FinalCount task.
1872 1872      _g1->reset_gc_time_stamps(hr);
1873 1873      double start = os::elapsedTime();
1874 1874      _regions_claimed++;
1875 1875      hr->note_end_of_marking();
1876 1876      _max_live_bytes += hr->max_live_bytes();
1877 1877  
1878 1878      if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1879 1879        _freed_bytes += hr->used();
1880 1880        hr->set_containing_set(NULL);
1881 1881        if (hr->isHumongous()) {
1882 1882          assert(hr->startsHumongous(), "we should only see starts humongous");
1883 1883          _humongous_regions_removed.increment(1u, hr->capacity());
1884 1884          _g1->free_humongous_region(hr, _local_cleanup_list, true);
1885 1885        } else {
1886 1886          _old_regions_removed.increment(1u, hr->capacity());
1887 1887          _g1->free_region(hr, _local_cleanup_list, true);
1888 1888        }
1889 1889      } else {
1890 1890        hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1891 1891      }
1892 1892  
1893 1893      double region_time = (os::elapsedTime() - start);
1894 1894      _claimed_region_time += region_time;
1895 1895      if (region_time > _max_region_time) {
1896 1896        _max_region_time = region_time;
1897 1897      }
1898 1898      return false;
1899 1899    }
1900 1900  
1901 1901    size_t max_live_bytes() { return _max_live_bytes; }
1902 1902    uint regions_claimed() { return _regions_claimed; }
1903 1903    double claimed_region_time_sec() { return _claimed_region_time; }
1904 1904    double max_region_time_sec() { return _max_region_time; }
1905 1905  };
1906 1906  
1907 1907  class G1ParNoteEndTask: public AbstractGangTask {
1908 1908    friend class G1NoteEndOfConcMarkClosure;
1909 1909  
1910 1910  protected:
1911 1911    G1CollectedHeap* _g1h;
1912 1912    size_t _max_live_bytes;
1913 1913    size_t _freed_bytes;
1914 1914    FreeRegionList* _cleanup_list;
1915 1915  
1916 1916  public:
1917 1917    G1ParNoteEndTask(G1CollectedHeap* g1h,
1918 1918                     FreeRegionList* cleanup_list) :
1919 1919      AbstractGangTask("G1 note end"), _g1h(g1h),
1920 1920      _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1921 1921  
1922 1922    void work(uint worker_id) {
1923 1923      double start = os::elapsedTime();
1924 1924      FreeRegionList local_cleanup_list("Local Cleanup List");
1925 1925      HRRSCleanupTask hrrs_cleanup_task;
1926 1926      G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1927 1927                                             &hrrs_cleanup_task);
1928 1928      if (G1CollectedHeap::use_parallel_gc_threads()) {
1929 1929        _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1930 1930                                              _g1h->workers()->active_workers(),
1931 1931                                              HeapRegion::NoteEndClaimValue);
1932 1932      } else {
1933 1933        _g1h->heap_region_iterate(&g1_note_end);
1934 1934      }
1935 1935      assert(g1_note_end.complete(), "Shouldn't have yielded!");
1936 1936  
1937 1937      // Now update the lists
1938 1938      _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1939 1939      {
1940 1940        MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1941 1941        _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1942 1942        _max_live_bytes += g1_note_end.max_live_bytes();
1943 1943        _freed_bytes += g1_note_end.freed_bytes();
1944 1944  
1945 1945        // If we iterate over the global cleanup list at the end of
1946 1946        // cleanup to do this printing we will not guarantee to only
1947 1947        // generate output for the newly-reclaimed regions (the list
1948 1948        // might not be empty at the beginning of cleanup; we might
1949 1949        // still be working on its previous contents). So we do the
1950 1950        // printing here, before we append the new regions to the global
1951 1951        // cleanup list.
1952 1952  
1953 1953        G1HRPrinter* hr_printer = _g1h->hr_printer();
1954 1954        if (hr_printer->is_active()) {
1955 1955          FreeRegionListIterator iter(&local_cleanup_list);
1956 1956          while (iter.more_available()) {
1957 1957            HeapRegion* hr = iter.get_next();
1958 1958            hr_printer->cleanup(hr);
1959 1959          }
1960 1960        }
1961 1961  
1962 1962        _cleanup_list->add_ordered(&local_cleanup_list);
1963 1963        assert(local_cleanup_list.is_empty(), "post-condition");
1964 1964  
1965 1965        HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1966 1966      }
1967 1967    }
1968 1968    size_t max_live_bytes() { return _max_live_bytes; }
1969 1969    size_t freed_bytes() { return _freed_bytes; }
1970 1970  };
1971 1971  
1972 1972  class G1ParScrubRemSetTask: public AbstractGangTask {
1973 1973  protected:
1974 1974    G1RemSet* _g1rs;
1975 1975    BitMap* _region_bm;
1976 1976    BitMap* _card_bm;
1977 1977  public:
1978 1978    G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1979 1979                         BitMap* region_bm, BitMap* card_bm) :
1980 1980      AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1981 1981      _region_bm(region_bm), _card_bm(card_bm) { }
1982 1982  
1983 1983    void work(uint worker_id) {
1984 1984      if (G1CollectedHeap::use_parallel_gc_threads()) {
1985 1985        _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1986 1986                         HeapRegion::ScrubRemSetClaimValue);
1987 1987      } else {
1988 1988        _g1rs->scrub(_region_bm, _card_bm);
1989 1989      }
1990 1990    }
1991 1991  
1992 1992  };
1993 1993  
1994 1994  void ConcurrentMark::cleanup() {
1995 1995    // world is stopped at this checkpoint
1996 1996    assert(SafepointSynchronize::is_at_safepoint(),
1997 1997           "world should be stopped");
1998 1998    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1999 1999  
2000 2000    // If a full collection has happened, we shouldn't do this.
2001 2001    if (has_aborted()) {
2002 2002      g1h->set_marking_complete(); // So bitmap clearing isn't confused
2003 2003      return;
2004 2004    }
2005 2005  
2006 2006    g1h->verify_region_sets_optional();
2007 2007  
2008 2008    if (VerifyDuringGC) {
2009 2009      HandleMark hm;  // handle scope
2010 2010      Universe::heap()->prepare_for_verify();
2011 2011      Universe::verify(VerifyOption_G1UsePrevMarking,
2012 2012                       " VerifyDuringGC:(before)");
2013 2013    }
2014 2014    g1h->check_bitmaps("Cleanup Start");
2015 2015  
2016 2016    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2017 2017    g1p->record_concurrent_mark_cleanup_start();
2018 2018  
2019 2019    double start = os::elapsedTime();
2020 2020  
2021 2021    HeapRegionRemSet::reset_for_cleanup_tasks();
2022 2022  
2023 2023    uint n_workers;
2024 2024  
2025 2025    // Do counting once more with the world stopped for good measure.
2026 2026    G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2027 2027  
2028 2028    if (G1CollectedHeap::use_parallel_gc_threads()) {
2029 2029     assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2030 2030             "sanity check");
2031 2031  
2032 2032      g1h->set_par_threads();
2033 2033      n_workers = g1h->n_par_threads();
2034 2034      assert(g1h->n_par_threads() == n_workers,
2035 2035             "Should not have been reset");
2036 2036      g1h->workers()->run_task(&g1_par_count_task);
2037 2037      // Done with the parallel phase so reset to 0.
2038 2038      g1h->set_par_threads(0);
2039 2039  
2040 2040      assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2041 2041             "sanity check");
2042 2042    } else {
2043 2043      n_workers = 1;
2044 2044      g1_par_count_task.work(0);
2045 2045    }
2046 2046  
2047 2047    if (VerifyDuringGC) {
2048 2048      // Verify that the counting data accumulated during marking matches
2049 2049      // that calculated by walking the marking bitmap.
2050 2050  
2051 2051      // Bitmaps to hold expected values
2052 2052      BitMap expected_region_bm(_region_bm.size(), true);
2053 2053      BitMap expected_card_bm(_card_bm.size(), true);
2054 2054  
2055 2055      G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2056 2056                                                   &_region_bm,
2057 2057                                                   &_card_bm,
2058 2058                                                   &expected_region_bm,
2059 2059                                                   &expected_card_bm);
2060 2060  
2061 2061      if (G1CollectedHeap::use_parallel_gc_threads()) {
2062 2062        g1h->set_par_threads((int)n_workers);
2063 2063        g1h->workers()->run_task(&g1_par_verify_task);
2064 2064        // Done with the parallel phase so reset to 0.
2065 2065        g1h->set_par_threads(0);
2066 2066  
2067 2067        assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2068 2068               "sanity check");
2069 2069      } else {
2070 2070        g1_par_verify_task.work(0);
2071 2071      }
2072 2072  
2073 2073      guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2074 2074    }
2075 2075  
2076 2076    size_t start_used_bytes = g1h->used();
2077 2077    g1h->set_marking_complete();
2078 2078  
2079 2079    double count_end = os::elapsedTime();
2080 2080    double this_final_counting_time = (count_end - start);
2081 2081    _total_counting_time += this_final_counting_time;
2082 2082  
2083 2083    if (G1PrintRegionLivenessInfo) {
2084 2084      G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2085 2085      _g1h->heap_region_iterate(&cl);
2086 2086    }
2087 2087  
2088 2088    // Install newly created mark bitMap as "prev".
2089 2089    swapMarkBitMaps();
2090 2090  
2091 2091    g1h->reset_gc_time_stamp();
2092 2092  
2093 2093    // Note end of marking in all heap regions.
2094 2094    G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2095 2095    if (G1CollectedHeap::use_parallel_gc_threads()) {
2096 2096      g1h->set_par_threads((int)n_workers);
2097 2097      g1h->workers()->run_task(&g1_par_note_end_task);
2098 2098      g1h->set_par_threads(0);
2099 2099  
2100 2100      assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2101 2101             "sanity check");
2102 2102    } else {
2103 2103      g1_par_note_end_task.work(0);
2104 2104    }
2105 2105    g1h->check_gc_time_stamps();
2106 2106  
2107 2107    if (!cleanup_list_is_empty()) {
2108 2108      // The cleanup list is not empty, so we'll have to process it
2109 2109      // concurrently. Notify anyone else that might be wanting free
2110 2110      // regions that there will be more free regions coming soon.
2111 2111      g1h->set_free_regions_coming();
2112 2112    }
2113 2113  
2114 2114    // call below, since it affects the metric by which we sort the heap
2115 2115    // regions.
2116 2116    if (G1ScrubRemSets) {
2117 2117      double rs_scrub_start = os::elapsedTime();
2118 2118      G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2119 2119      if (G1CollectedHeap::use_parallel_gc_threads()) {
2120 2120        g1h->set_par_threads((int)n_workers);
2121 2121        g1h->workers()->run_task(&g1_par_scrub_rs_task);
2122 2122        g1h->set_par_threads(0);
2123 2123  
2124 2124        assert(g1h->check_heap_region_claim_values(
2125 2125                                              HeapRegion::ScrubRemSetClaimValue),
2126 2126               "sanity check");
2127 2127      } else {
2128 2128        g1_par_scrub_rs_task.work(0);
2129 2129      }
2130 2130  
2131 2131      double rs_scrub_end = os::elapsedTime();
2132 2132      double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2133 2133      _total_rs_scrub_time += this_rs_scrub_time;
2134 2134    }
2135 2135  
2136 2136    // this will also free any regions totally full of garbage objects,
2137 2137    // and sort the regions.
2138 2138    g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2139 2139  
2140 2140    // Statistics.
2141 2141    double end = os::elapsedTime();
2142 2142    _cleanup_times.add((end - start) * 1000.0);
2143 2143  
2144 2144    if (G1Log::fine()) {
2145 2145      g1h->print_size_transition(gclog_or_tty,
2146 2146                                 start_used_bytes,
2147 2147                                 g1h->used(),
2148 2148                                 g1h->capacity());
2149 2149    }
2150 2150  
2151 2151    // Clean up will have freed any regions completely full of garbage.
2152 2152    // Update the soft reference policy with the new heap occupancy.
2153 2153    Universe::update_heap_info_at_gc();
2154 2154  
2155 2155    if (VerifyDuringGC) {
2156 2156      HandleMark hm;  // handle scope
2157 2157      Universe::heap()->prepare_for_verify();
2158 2158      Universe::verify(VerifyOption_G1UsePrevMarking,
2159 2159                       " VerifyDuringGC:(after)");
2160 2160    }
2161 2161    g1h->check_bitmaps("Cleanup End");
2162 2162  
2163 2163    g1h->verify_region_sets_optional();
2164 2164  
2165 2165    // We need to make this be a "collection" so any collection pause that
2166 2166    // races with it goes around and waits for completeCleanup to finish.
2167 2167    g1h->increment_total_collections();

↓ open down ↓

2167 lines elided

↑ open up ↑

2168 2168  
2169 2169    // Clean out dead classes and update Metaspace sizes.
2170 2170    if (ClassUnloadingWithConcurrentMark) {
2171 2171      ClassLoaderDataGraph::purge();
2172 2172    }
2173 2173    MetaspaceGC::compute_new_size();
2174 2174  
2175 2175    // We reclaimed old regions so we should calculate the sizes to make
2176 2176    // sure we update the old gen/space data.
2177 2177    g1h->g1mm()->update_sizes();
     2178 +  g1h->allocation_context_stats().update_after_mark();
2178 2179  
2179 2180    g1h->trace_heap_after_concurrent_cycle();
2180 2181  }
2181 2182  
2182 2183  void ConcurrentMark::completeCleanup() {
2183 2184    if (has_aborted()) return;
2184 2185  
2185 2186    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2186 2187  
2187 2188    _cleanup_list.verify_optional();

2188 2189    FreeRegionList tmp_free_list("Tmp Free List");
2189 2190  
2190 2191    if (G1ConcRegionFreeingVerbose) {
2191 2192      gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2192 2193                             "cleanup list has %u entries",
2193 2194                             _cleanup_list.length());
2194 2195    }
2195 2196  
2196 2197    // No one else should be accessing the _cleanup_list at this point,
2197 2198    // so it is not necessary to take any locks
2198 2199    while (!_cleanup_list.is_empty()) {
2199 2200      HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2200 2201      assert(hr != NULL, "Got NULL from a non-empty list");
2201 2202      hr->par_clear();
2202 2203      tmp_free_list.add_ordered(hr);
2203 2204  
2204 2205      // Instead of adding one region at a time to the secondary_free_list,
2205 2206      // we accumulate them in the local list and move them a few at a
2206 2207      // time. This also cuts down on the number of notify_all() calls
2207 2208      // we do during this process. We'll also append the local list when
2208 2209      // _cleanup_list is empty (which means we just removed the last
2209 2210      // region from the _cleanup_list).
2210 2211      if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2211 2212          _cleanup_list.is_empty()) {
2212 2213        if (G1ConcRegionFreeingVerbose) {
2213 2214          gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2214 2215                                 "appending %u entries to the secondary_free_list, "
2215 2216                                 "cleanup list still has %u entries",
2216 2217                                 tmp_free_list.length(),
2217 2218                                 _cleanup_list.length());
2218 2219        }
2219 2220  
2220 2221        {
2221 2222          MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2222 2223          g1h->secondary_free_list_add(&tmp_free_list);
2223 2224          SecondaryFreeList_lock->notify_all();
2224 2225        }
2225 2226  
2226 2227        if (G1StressConcRegionFreeing) {
2227 2228          for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2228 2229            os::sleep(Thread::current(), (jlong) 1, false);
2229 2230          }
2230 2231        }
2231 2232      }
2232 2233    }
2233 2234    assert(tmp_free_list.is_empty(), "post-condition");
2234 2235  }
2235 2236  
2236 2237  // Supporting Object and Oop closures for reference discovery
2237 2238  // and processing in during marking
2238 2239  
2239 2240  bool G1CMIsAliveClosure::do_object_b(oop obj) {
2240 2241    HeapWord* addr = (HeapWord*)obj;
2241 2242    return addr != NULL &&
2242 2243           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2243 2244  }
2244 2245  
2245 2246  // 'Keep Alive' oop closure used by both serial parallel reference processing.
2246 2247  // Uses the CMTask associated with a worker thread (for serial reference
2247 2248  // processing the CMTask for worker 0 is used) to preserve (mark) and
2248 2249  // trace referent objects.
2249 2250  //
2250 2251  // Using the CMTask and embedded local queues avoids having the worker
2251 2252  // threads operating on the global mark stack. This reduces the risk
2252 2253  // of overflowing the stack - which we would rather avoid at this late
2253 2254  // state. Also using the tasks' local queues removes the potential
2254 2255  // of the workers interfering with each other that could occur if
2255 2256  // operating on the global stack.
2256 2257  
2257 2258  class G1CMKeepAliveAndDrainClosure: public OopClosure {
2258 2259    ConcurrentMark* _cm;
2259 2260    CMTask*         _task;
2260 2261    int             _ref_counter_limit;
2261 2262    int             _ref_counter;
2262 2263    bool            _is_serial;
2263 2264   public:
2264 2265    G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2265 2266      _cm(cm), _task(task), _is_serial(is_serial),
2266 2267      _ref_counter_limit(G1RefProcDrainInterval) {
2267 2268      assert(_ref_counter_limit > 0, "sanity");
2268 2269      assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2269 2270      _ref_counter = _ref_counter_limit;
2270 2271    }
2271 2272  
2272 2273    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2273 2274    virtual void do_oop(      oop* p) { do_oop_work(p); }
2274 2275  
2275 2276    template <class T> void do_oop_work(T* p) {
2276 2277      if (!_cm->has_overflown()) {
2277 2278        oop obj = oopDesc::load_decode_heap_oop(p);
2278 2279        if (_cm->verbose_high()) {
2279 2280          gclog_or_tty->print_cr("\t[%u] we're looking at location "
2280 2281                                 "*"PTR_FORMAT" = "PTR_FORMAT,
2281 2282                                 _task->worker_id(), p2i(p), p2i((void*) obj));
2282 2283        }
2283 2284  
2284 2285        _task->deal_with_reference(obj);
2285 2286        _ref_counter--;
2286 2287  
2287 2288        if (_ref_counter == 0) {
2288 2289          // We have dealt with _ref_counter_limit references, pushing them
2289 2290          // and objects reachable from them on to the local stack (and
2290 2291          // possibly the global stack). Call CMTask::do_marking_step() to
2291 2292          // process these entries.
2292 2293          //
2293 2294          // We call CMTask::do_marking_step() in a loop, which we'll exit if
2294 2295          // there's nothing more to do (i.e. we're done with the entries that
2295 2296          // were pushed as a result of the CMTask::deal_with_reference() calls
2296 2297          // above) or we overflow.
2297 2298          //
2298 2299          // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2299 2300          // flag while there may still be some work to do. (See the comment at
2300 2301          // the beginning of CMTask::do_marking_step() for those conditions -
2301 2302          // one of which is reaching the specified time target.) It is only
2302 2303          // when CMTask::do_marking_step() returns without setting the
2303 2304          // has_aborted() flag that the marking step has completed.
2304 2305          do {
2305 2306            double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2306 2307            _task->do_marking_step(mark_step_duration_ms,
2307 2308                                   false      /* do_termination */,
2308 2309                                   _is_serial);
2309 2310          } while (_task->has_aborted() && !_cm->has_overflown());
2310 2311          _ref_counter = _ref_counter_limit;
2311 2312        }
2312 2313      } else {
2313 2314        if (_cm->verbose_high()) {
2314 2315           gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2315 2316        }
2316 2317      }
2317 2318    }
2318 2319  };
2319 2320  
2320 2321  // 'Drain' oop closure used by both serial and parallel reference processing.
2321 2322  // Uses the CMTask associated with a given worker thread (for serial
2322 2323  // reference processing the CMtask for worker 0 is used). Calls the
2323 2324  // do_marking_step routine, with an unbelievably large timeout value,
2324 2325  // to drain the marking data structures of the remaining entries
2325 2326  // added by the 'keep alive' oop closure above.
2326 2327  
2327 2328  class G1CMDrainMarkingStackClosure: public VoidClosure {
2328 2329    ConcurrentMark* _cm;
2329 2330    CMTask*         _task;
2330 2331    bool            _is_serial;
2331 2332   public:
2332 2333    G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2333 2334      _cm(cm), _task(task), _is_serial(is_serial) {
2334 2335      assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2335 2336    }
2336 2337  
2337 2338    void do_void() {
2338 2339      do {
2339 2340        if (_cm->verbose_high()) {
2340 2341          gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2341 2342                                 _task->worker_id(), BOOL_TO_STR(_is_serial));
2342 2343        }
2343 2344  
2344 2345        // We call CMTask::do_marking_step() to completely drain the local
2345 2346        // and global marking stacks of entries pushed by the 'keep alive'
2346 2347        // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2347 2348        //
2348 2349        // CMTask::do_marking_step() is called in a loop, which we'll exit
2349 2350        // if there's nothing more to do (i.e. we'completely drained the
2350 2351        // entries that were pushed as a a result of applying the 'keep alive'
2351 2352        // closure to the entries on the discovered ref lists) or we overflow
2352 2353        // the global marking stack.
2353 2354        //
2354 2355        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2355 2356        // flag while there may still be some work to do. (See the comment at
2356 2357        // the beginning of CMTask::do_marking_step() for those conditions -
2357 2358        // one of which is reaching the specified time target.) It is only
2358 2359        // when CMTask::do_marking_step() returns without setting the
2359 2360        // has_aborted() flag that the marking step has completed.
2360 2361  
2361 2362        _task->do_marking_step(1000000000.0 /* something very large */,
2362 2363                               true         /* do_termination */,
2363 2364                               _is_serial);
2364 2365      } while (_task->has_aborted() && !_cm->has_overflown());
2365 2366    }
2366 2367  };
2367 2368  
2368 2369  // Implementation of AbstractRefProcTaskExecutor for parallel
2369 2370  // reference processing at the end of G1 concurrent marking
2370 2371  
2371 2372  class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2372 2373  private:
2373 2374    G1CollectedHeap* _g1h;
2374 2375    ConcurrentMark*  _cm;
2375 2376    WorkGang*        _workers;
2376 2377    int              _active_workers;
2377 2378  
2378 2379  public:
2379 2380    G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2380 2381                          ConcurrentMark* cm,
2381 2382                          WorkGang* workers,
2382 2383                          int n_workers) :
2383 2384      _g1h(g1h), _cm(cm),
2384 2385      _workers(workers), _active_workers(n_workers) { }
2385 2386  
2386 2387    // Executes the given task using concurrent marking worker threads.
2387 2388    virtual void execute(ProcessTask& task);
2388 2389    virtual void execute(EnqueueTask& task);
2389 2390  };
2390 2391  
2391 2392  class G1CMRefProcTaskProxy: public AbstractGangTask {
2392 2393    typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2393 2394    ProcessTask&     _proc_task;
2394 2395    G1CollectedHeap* _g1h;
2395 2396    ConcurrentMark*  _cm;
2396 2397  
2397 2398  public:
2398 2399    G1CMRefProcTaskProxy(ProcessTask& proc_task,
2399 2400                       G1CollectedHeap* g1h,
2400 2401                       ConcurrentMark* cm) :
2401 2402      AbstractGangTask("Process reference objects in parallel"),
2402 2403      _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2403 2404      ReferenceProcessor* rp = _g1h->ref_processor_cm();
2404 2405      assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2405 2406    }
2406 2407  
2407 2408    virtual void work(uint worker_id) {
2408 2409      ResourceMark rm;
2409 2410      HandleMark hm;
2410 2411      CMTask* task = _cm->task(worker_id);
2411 2412      G1CMIsAliveClosure g1_is_alive(_g1h);
2412 2413      G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2413 2414      G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2414 2415  
2415 2416      _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2416 2417    }
2417 2418  };
2418 2419  
2419 2420  void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2420 2421    assert(_workers != NULL, "Need parallel worker threads.");
2421 2422    assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2422 2423  
2423 2424    G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2424 2425  
2425 2426    // We need to reset the concurrency level before each
2426 2427    // proxy task execution, so that the termination protocol
2427 2428    // and overflow handling in CMTask::do_marking_step() knows
2428 2429    // how many workers to wait for.
2429 2430    _cm->set_concurrency(_active_workers);
2430 2431    _g1h->set_par_threads(_active_workers);
2431 2432    _workers->run_task(&proc_task_proxy);
2432 2433    _g1h->set_par_threads(0);
2433 2434  }
2434 2435  
2435 2436  class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2436 2437    typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2437 2438    EnqueueTask& _enq_task;
2438 2439  
2439 2440  public:
2440 2441    G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2441 2442      AbstractGangTask("Enqueue reference objects in parallel"),
2442 2443      _enq_task(enq_task) { }
2443 2444  
2444 2445    virtual void work(uint worker_id) {
2445 2446      _enq_task.work(worker_id);
2446 2447    }
2447 2448  };
2448 2449  
2449 2450  void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2450 2451    assert(_workers != NULL, "Need parallel worker threads.");
2451 2452    assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2452 2453  
2453 2454    G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2454 2455  
2455 2456    // Not strictly necessary but...
2456 2457    //
2457 2458    // We need to reset the concurrency level before each
2458 2459    // proxy task execution, so that the termination protocol
2459 2460    // and overflow handling in CMTask::do_marking_step() knows
2460 2461    // how many workers to wait for.
2461 2462    _cm->set_concurrency(_active_workers);
2462 2463    _g1h->set_par_threads(_active_workers);
2463 2464    _workers->run_task(&enq_task_proxy);
2464 2465    _g1h->set_par_threads(0);
2465 2466  }
2466 2467  
2467 2468  void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2468 2469    G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2469 2470  }
2470 2471  
2471 2472  // Helper class to get rid of some boilerplate code.
2472 2473  class G1RemarkGCTraceTime : public GCTraceTime {
2473 2474    static bool doit_and_prepend(bool doit) {
2474 2475      if (doit) {
2475 2476        gclog_or_tty->put(' ');
2476 2477      }
2477 2478      return doit;
2478 2479    }
2479 2480  
2480 2481   public:
2481 2482    G1RemarkGCTraceTime(const char* title, bool doit)
2482 2483      : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
2483 2484          G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
2484 2485    }
2485 2486  };
2486 2487  
2487 2488  void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2488 2489    if (has_overflown()) {
2489 2490      // Skip processing the discovered references if we have
2490 2491      // overflown the global marking stack. Reference objects
2491 2492      // only get discovered once so it is OK to not
2492 2493      // de-populate the discovered reference lists. We could have,
2493 2494      // but the only benefit would be that, when marking restarts,
2494 2495      // less reference objects are discovered.
2495 2496      return;
2496 2497    }
2497 2498  
2498 2499    ResourceMark rm;
2499 2500    HandleMark   hm;
2500 2501  
2501 2502    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2502 2503  
2503 2504    // Is alive closure.
2504 2505    G1CMIsAliveClosure g1_is_alive(g1h);
2505 2506  
2506 2507    // Inner scope to exclude the cleaning of the string and symbol
2507 2508    // tables from the displayed time.
2508 2509    {
2509 2510      if (G1Log::finer()) {
2510 2511        gclog_or_tty->put(' ');
2511 2512      }
2512 2513      GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2513 2514  
2514 2515      ReferenceProcessor* rp = g1h->ref_processor_cm();
2515 2516  
2516 2517      // See the comment in G1CollectedHeap::ref_processing_init()
2517 2518      // about how reference processing currently works in G1.
2518 2519  
2519 2520      // Set the soft reference policy
2520 2521      rp->setup_policy(clear_all_soft_refs);
2521 2522      assert(_markStack.isEmpty(), "mark stack should be empty");
2522 2523  
2523 2524      // Instances of the 'Keep Alive' and 'Complete GC' closures used
2524 2525      // in serial reference processing. Note these closures are also
2525 2526      // used for serially processing (by the the current thread) the
2526 2527      // JNI references during parallel reference processing.
2527 2528      //
2528 2529      // These closures do not need to synchronize with the worker
2529 2530      // threads involved in parallel reference processing as these
2530 2531      // instances are executed serially by the current thread (e.g.
2531 2532      // reference processing is not multi-threaded and is thus
2532 2533      // performed by the current thread instead of a gang worker).
2533 2534      //
2534 2535      // The gang tasks involved in parallel reference procssing create
2535 2536      // their own instances of these closures, which do their own
2536 2537      // synchronization among themselves.
2537 2538      G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2538 2539      G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2539 2540  
2540 2541      // We need at least one active thread. If reference processing
2541 2542      // is not multi-threaded we use the current (VMThread) thread,
2542 2543      // otherwise we use the work gang from the G1CollectedHeap and
2543 2544      // we utilize all the worker threads we can.
2544 2545      bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2545 2546      uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2546 2547      active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2547 2548  
2548 2549      // Parallel processing task executor.
2549 2550      G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2550 2551                                                g1h->workers(), active_workers);
2551 2552      AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2552 2553  
2553 2554      // Set the concurrency level. The phase was already set prior to
2554 2555      // executing the remark task.
2555 2556      set_concurrency(active_workers);
2556 2557  
2557 2558      // Set the degree of MT processing here.  If the discovery was done MT,
2558 2559      // the number of threads involved during discovery could differ from
2559 2560      // the number of active workers.  This is OK as long as the discovered
2560 2561      // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2561 2562      rp->set_active_mt_degree(active_workers);
2562 2563  
2563 2564      // Process the weak references.
2564 2565      const ReferenceProcessorStats& stats =
2565 2566          rp->process_discovered_references(&g1_is_alive,
2566 2567                                            &g1_keep_alive,
2567 2568                                            &g1_drain_mark_stack,
2568 2569                                            executor,
2569 2570                                            g1h->gc_timer_cm(),
2570 2571                                            concurrent_gc_id());
2571 2572      g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2572 2573  
2573 2574      // The do_oop work routines of the keep_alive and drain_marking_stack
2574 2575      // oop closures will set the has_overflown flag if we overflow the
2575 2576      // global marking stack.
2576 2577  
2577 2578      assert(_markStack.overflow() || _markStack.isEmpty(),
2578 2579              "mark stack should be empty (unless it overflowed)");
2579 2580  
2580 2581      if (_markStack.overflow()) {
2581 2582        // This should have been done already when we tried to push an
2582 2583        // entry on to the global mark stack. But let's do it again.
2583 2584        set_has_overflown();
2584 2585      }
2585 2586  
2586 2587      assert(rp->num_q() == active_workers, "why not");
2587 2588  
2588 2589      rp->enqueue_discovered_references(executor);
2589 2590  
2590 2591      rp->verify_no_references_recorded();
2591 2592      assert(!rp->discovery_enabled(), "Post condition");
2592 2593    }
2593 2594  
2594 2595    if (has_overflown()) {
2595 2596      // We can not trust g1_is_alive if the marking stack overflowed
2596 2597      return;
2597 2598    }
2598 2599  
2599 2600    assert(_markStack.isEmpty(), "Marking should have completed");
2600 2601  
2601 2602    // Unload Klasses, String, Symbols, Code Cache, etc.
2602 2603    {
2603 2604      G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
2604 2605  
2605 2606      if (ClassUnloadingWithConcurrentMark) {
2606 2607        // Cleaning of klasses depends on correct information from MetadataMarkOnStack. The CodeCache::mark_on_stack
2607 2608        // part is too slow to be done serially, so it is handled during the weakRefsWorkParallelPart phase.
2608 2609        // Defer the cleaning until we have complete on_stack data.
2609 2610        MetadataOnStackMark md_on_stack(false /* Don't visit the code cache at this point */);
2610 2611  
2611 2612        bool purged_classes;
2612 2613  
2613 2614        {
2614 2615          G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
2615 2616          purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2616 2617        }
2617 2618  
2618 2619        {
2619 2620          G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
2620 2621          weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2621 2622        }
2622 2623  
2623 2624        {
2624 2625          G1RemarkGCTraceTime trace("Deallocate Metadata", G1Log::finest());
2625 2626          ClassLoaderDataGraph::free_deallocate_lists();
2626 2627        }
2627 2628      }
2628 2629  
2629 2630      if (G1StringDedup::is_enabled()) {
2630 2631        G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
2631 2632        G1StringDedup::unlink(&g1_is_alive);
2632 2633      }
2633 2634    }
2634 2635  }
2635 2636  
2636 2637  void ConcurrentMark::swapMarkBitMaps() {
2637 2638    CMBitMapRO* temp = _prevMarkBitMap;
2638 2639    _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2639 2640    _nextMarkBitMap  = (CMBitMap*)  temp;
2640 2641  }
2641 2642  
2642 2643  class CMObjectClosure;
2643 2644  
2644 2645  // Closure for iterating over objects, currently only used for
2645 2646  // processing SATB buffers.
2646 2647  class CMObjectClosure : public ObjectClosure {
2647 2648  private:
2648 2649    CMTask* _task;
2649 2650  
2650 2651  public:
2651 2652    void do_object(oop obj) {
2652 2653      _task->deal_with_reference(obj);
2653 2654    }
2654 2655  
2655 2656    CMObjectClosure(CMTask* task) : _task(task) { }
2656 2657  };
2657 2658  
2658 2659  class G1RemarkThreadsClosure : public ThreadClosure {
2659 2660    CMObjectClosure _cm_obj;
2660 2661    G1CMOopClosure _cm_cl;
2661 2662    MarkingCodeBlobClosure _code_cl;
2662 2663    int _thread_parity;
2663 2664    bool _is_par;
2664 2665  
2665 2666   public:
2666 2667    G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
2667 2668      _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2668 2669      _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
2669 2670  
2670 2671    void do_thread(Thread* thread) {
2671 2672      if (thread->is_Java_thread()) {
2672 2673        if (thread->claim_oops_do(_is_par, _thread_parity)) {
2673 2674          JavaThread* jt = (JavaThread*)thread;
2674 2675  
2675 2676          // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2676 2677          // however the liveness of oops reachable from nmethods have very complex lifecycles:
2677 2678          // * Alive if on the stack of an executing method
2678 2679          // * Weakly reachable otherwise
2679 2680          // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2680 2681          // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2681 2682          jt->nmethods_do(&_code_cl);
2682 2683  
2683 2684          jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2684 2685        }
2685 2686      } else if (thread->is_VM_thread()) {
2686 2687        if (thread->claim_oops_do(_is_par, _thread_parity)) {
2687 2688          JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2688 2689        }
2689 2690      }
2690 2691    }
2691 2692  };
2692 2693  
2693 2694  class CMRemarkTask: public AbstractGangTask {
2694 2695  private:
2695 2696    ConcurrentMark* _cm;
2696 2697    bool            _is_serial;
2697 2698  public:
2698 2699    void work(uint worker_id) {
2699 2700      // Since all available tasks are actually started, we should
2700 2701      // only proceed if we're supposed to be actived.
2701 2702      if (worker_id < _cm->active_tasks()) {
2702 2703        CMTask* task = _cm->task(worker_id);
2703 2704        task->record_start_time();
2704 2705        {
2705 2706          ResourceMark rm;
2706 2707          HandleMark hm;
2707 2708  
2708 2709          G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
2709 2710          Threads::threads_do(&threads_f);
2710 2711        }
2711 2712  
2712 2713        do {
2713 2714          task->do_marking_step(1000000000.0 /* something very large */,
2714 2715                                true         /* do_termination       */,
2715 2716                                _is_serial);
2716 2717        } while (task->has_aborted() && !_cm->has_overflown());
2717 2718        // If we overflow, then we do not want to restart. We instead
2718 2719        // want to abort remark and do concurrent marking again.
2719 2720        task->record_end_time();
2720 2721      }
2721 2722    }
2722 2723  
2723 2724    CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2724 2725      AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2725 2726      _cm->terminator()->reset_for_reuse(active_workers);
2726 2727    }
2727 2728  };
2728 2729  
2729 2730  void ConcurrentMark::checkpointRootsFinalWork() {
2730 2731    ResourceMark rm;
2731 2732    HandleMark   hm;
2732 2733    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2733 2734  
2734 2735    G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
2735 2736  
2736 2737    g1h->ensure_parsability(false);
2737 2738  
2738 2739    if (G1CollectedHeap::use_parallel_gc_threads()) {
2739 2740      G1CollectedHeap::StrongRootsScope srs(g1h);
2740 2741      // this is remark, so we'll use up all active threads
2741 2742      uint active_workers = g1h->workers()->active_workers();
2742 2743      if (active_workers == 0) {
2743 2744        assert(active_workers > 0, "Should have been set earlier");
2744 2745        active_workers = (uint) ParallelGCThreads;
2745 2746        g1h->workers()->set_active_workers(active_workers);
2746 2747      }
2747 2748      set_concurrency_and_phase(active_workers, false /* concurrent */);
2748 2749      // Leave _parallel_marking_threads at it's
2749 2750      // value originally calculated in the ConcurrentMark
2750 2751      // constructor and pass values of the active workers
2751 2752      // through the gang in the task.
2752 2753  
2753 2754      CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2754 2755      // We will start all available threads, even if we decide that the
2755 2756      // active_workers will be fewer. The extra ones will just bail out
2756 2757      // immediately.
2757 2758      g1h->set_par_threads(active_workers);
2758 2759      g1h->workers()->run_task(&remarkTask);
2759 2760      g1h->set_par_threads(0);
2760 2761    } else {
2761 2762      G1CollectedHeap::StrongRootsScope srs(g1h);
2762 2763      uint active_workers = 1;
2763 2764      set_concurrency_and_phase(active_workers, false /* concurrent */);
2764 2765  
2765 2766      // Note - if there's no work gang then the VMThread will be
2766 2767      // the thread to execute the remark - serially. We have
2767 2768      // to pass true for the is_serial parameter so that
2768 2769      // CMTask::do_marking_step() doesn't enter the sync
2769 2770      // barriers in the event of an overflow. Doing so will
2770 2771      // cause an assert that the current thread is not a
2771 2772      // concurrent GC thread.
2772 2773      CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2773 2774      remarkTask.work(0);
2774 2775    }
2775 2776    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2776 2777    guarantee(has_overflown() ||
2777 2778              satb_mq_set.completed_buffers_num() == 0,
2778 2779              err_msg("Invariant: has_overflown = %s, num buffers = %d",
2779 2780                      BOOL_TO_STR(has_overflown()),
2780 2781                      satb_mq_set.completed_buffers_num()));
2781 2782  
2782 2783    print_stats();
2783 2784  }
2784 2785  
2785 2786  #ifndef PRODUCT
2786 2787  
2787 2788  class PrintReachableOopClosure: public OopClosure {
2788 2789  private:
2789 2790    G1CollectedHeap* _g1h;
2790 2791    outputStream*    _out;
2791 2792    VerifyOption     _vo;
2792 2793    bool             _all;
2793 2794  
2794 2795  public:
2795 2796    PrintReachableOopClosure(outputStream* out,
2796 2797                             VerifyOption  vo,
2797 2798                             bool          all) :
2798 2799      _g1h(G1CollectedHeap::heap()),
2799 2800      _out(out), _vo(vo), _all(all) { }
2800 2801  
2801 2802    void do_oop(narrowOop* p) { do_oop_work(p); }
2802 2803    void do_oop(      oop* p) { do_oop_work(p); }
2803 2804  
2804 2805    template <class T> void do_oop_work(T* p) {
2805 2806      oop         obj = oopDesc::load_decode_heap_oop(p);
2806 2807      const char* str = NULL;
2807 2808      const char* str2 = "";
2808 2809  
2809 2810      if (obj == NULL) {
2810 2811        str = "";
2811 2812      } else if (!_g1h->is_in_g1_reserved(obj)) {
2812 2813        str = " O";
2813 2814      } else {
2814 2815        HeapRegion* hr  = _g1h->heap_region_containing(obj);
2815 2816        bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2816 2817        bool marked = _g1h->is_marked(obj, _vo);
2817 2818  
2818 2819        if (over_tams) {
2819 2820          str = " >";
2820 2821          if (marked) {
2821 2822            str2 = " AND MARKED";
2822 2823          }
2823 2824        } else if (marked) {
2824 2825          str = " M";
2825 2826        } else {
2826 2827          str = " NOT";
2827 2828        }
2828 2829      }
2829 2830  
2830 2831      _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2831 2832                     p2i(p), p2i((void*) obj), str, str2);
2832 2833    }
2833 2834  };
2834 2835  
2835 2836  class PrintReachableObjectClosure : public ObjectClosure {
2836 2837  private:
2837 2838    G1CollectedHeap* _g1h;
2838 2839    outputStream*    _out;
2839 2840    VerifyOption     _vo;
2840 2841    bool             _all;
2841 2842    HeapRegion*      _hr;
2842 2843  
2843 2844  public:
2844 2845    PrintReachableObjectClosure(outputStream* out,
2845 2846                                VerifyOption  vo,
2846 2847                                bool          all,
2847 2848                                HeapRegion*   hr) :
2848 2849      _g1h(G1CollectedHeap::heap()),
2849 2850      _out(out), _vo(vo), _all(all), _hr(hr) { }
2850 2851  
2851 2852    void do_object(oop o) {
2852 2853      bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2853 2854      bool marked = _g1h->is_marked(o, _vo);
2854 2855      bool print_it = _all || over_tams || marked;
2855 2856  
2856 2857      if (print_it) {
2857 2858        _out->print_cr(" "PTR_FORMAT"%s",
2858 2859                       p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2859 2860        PrintReachableOopClosure oopCl(_out, _vo, _all);
2860 2861        o->oop_iterate_no_header(&oopCl);
2861 2862      }
2862 2863    }
2863 2864  };
2864 2865  
2865 2866  class PrintReachableRegionClosure : public HeapRegionClosure {
2866 2867  private:
2867 2868    G1CollectedHeap* _g1h;
2868 2869    outputStream*    _out;
2869 2870    VerifyOption     _vo;
2870 2871    bool             _all;
2871 2872  
2872 2873  public:
2873 2874    bool doHeapRegion(HeapRegion* hr) {
2874 2875      HeapWord* b = hr->bottom();
2875 2876      HeapWord* e = hr->end();
2876 2877      HeapWord* t = hr->top();
2877 2878      HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2878 2879      _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2879 2880                     "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2880 2881      _out->cr();
2881 2882  
2882 2883      HeapWord* from = b;
2883 2884      HeapWord* to   = t;
2884 2885  
2885 2886      if (to > from) {
2886 2887        _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2887 2888        _out->cr();
2888 2889        PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2889 2890        hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2890 2891        _out->cr();
2891 2892      }
2892 2893  
2893 2894      return false;
2894 2895    }
2895 2896  
2896 2897    PrintReachableRegionClosure(outputStream* out,
2897 2898                                VerifyOption  vo,
2898 2899                                bool          all) :
2899 2900      _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2900 2901  };
2901 2902  
2902 2903  void ConcurrentMark::print_reachable(const char* str,
2903 2904                                       VerifyOption vo,
2904 2905                                       bool all) {
2905 2906    gclog_or_tty->cr();
2906 2907    gclog_or_tty->print_cr("== Doing heap dump... ");
2907 2908  
2908 2909    if (G1PrintReachableBaseFile == NULL) {
2909 2910      gclog_or_tty->print_cr("  #### error: no base file defined");
2910 2911      return;
2911 2912    }
2912 2913  
2913 2914    if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2914 2915        (JVM_MAXPATHLEN - 1)) {
2915 2916      gclog_or_tty->print_cr("  #### error: file name too long");
2916 2917      return;
2917 2918    }
2918 2919  
2919 2920    char file_name[JVM_MAXPATHLEN];
2920 2921    sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2921 2922    gclog_or_tty->print_cr("  dumping to file %s", file_name);
2922 2923  
2923 2924    fileStream fout(file_name);
2924 2925    if (!fout.is_open()) {
2925 2926      gclog_or_tty->print_cr("  #### error: could not open file");
2926 2927      return;
2927 2928    }
2928 2929  
2929 2930    outputStream* out = &fout;
2930 2931    out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2931 2932    out->cr();
2932 2933  
2933 2934    out->print_cr("--- ITERATING OVER REGIONS");
2934 2935    out->cr();
2935 2936    PrintReachableRegionClosure rcl(out, vo, all);
2936 2937    _g1h->heap_region_iterate(&rcl);
2937 2938    out->cr();
2938 2939  
2939 2940    gclog_or_tty->print_cr("  done");
2940 2941    gclog_or_tty->flush();
2941 2942  }
2942 2943  
2943 2944  #endif // PRODUCT
2944 2945  
2945 2946  void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2946 2947    // Note we are overriding the read-only view of the prev map here, via
2947 2948    // the cast.
2948 2949    ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2949 2950  }
2950 2951  
2951 2952  void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2952 2953    _nextMarkBitMap->clearRange(mr);
2953 2954  }
2954 2955  
2955 2956  HeapRegion*
2956 2957  ConcurrentMark::claim_region(uint worker_id) {
2957 2958    // "checkpoint" the finger
2958 2959    HeapWord* finger = _finger;
2959 2960  
2960 2961    // _heap_end will not change underneath our feet; it only changes at
2961 2962    // yield points.
2962 2963    while (finger < _heap_end) {
2963 2964      assert(_g1h->is_in_g1_reserved(finger), "invariant");
2964 2965  
2965 2966      // Note on how this code handles humongous regions. In the
2966 2967      // normal case the finger will reach the start of a "starts
2967 2968      // humongous" (SH) region. Its end will either be the end of the
2968 2969      // last "continues humongous" (CH) region in the sequence, or the
2969 2970      // standard end of the SH region (if the SH is the only region in
2970 2971      // the sequence). That way claim_region() will skip over the CH
2971 2972      // regions. However, there is a subtle race between a CM thread
2972 2973      // executing this method and a mutator thread doing a humongous
2973 2974      // object allocation. The two are not mutually exclusive as the CM
2974 2975      // thread does not need to hold the Heap_lock when it gets
2975 2976      // here. So there is a chance that claim_region() will come across
2976 2977      // a free region that's in the progress of becoming a SH or a CH
2977 2978      // region. In the former case, it will either
2978 2979      //   a) Miss the update to the region's end, in which case it will
2979 2980      //      visit every subsequent CH region, will find their bitmaps
2980 2981      //      empty, and do nothing, or
2981 2982      //   b) Will observe the update of the region's end (in which case
2982 2983      //      it will skip the subsequent CH regions).
2983 2984      // If it comes across a region that suddenly becomes CH, the
2984 2985      // scenario will be similar to b). So, the race between
2985 2986      // claim_region() and a humongous object allocation might force us
2986 2987      // to do a bit of unnecessary work (due to some unnecessary bitmap
2987 2988      // iterations) but it should not introduce and correctness issues.
2988 2989      HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2989 2990  
2990 2991      // Above heap_region_containing_raw may return NULL as we always scan claim
2991 2992      // until the end of the heap. In this case, just jump to the next region.
2992 2993      HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2993 2994  
2994 2995      // Is the gap between reading the finger and doing the CAS too long?
2995 2996      HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2996 2997      if (res == finger && curr_region != NULL) {
2997 2998        // we succeeded
2998 2999        HeapWord*   bottom        = curr_region->bottom();
2999 3000        HeapWord*   limit         = curr_region->next_top_at_mark_start();
3000 3001  
3001 3002        if (verbose_low()) {
3002 3003          gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
3003 3004                                 "["PTR_FORMAT", "PTR_FORMAT"), "
3004 3005                                 "limit = "PTR_FORMAT,
3005 3006                                 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
3006 3007        }
3007 3008  
3008 3009        // notice that _finger == end cannot be guaranteed here since,
3009 3010        // someone else might have moved the finger even further
3010 3011        assert(_finger >= end, "the finger should have moved forward");
3011 3012  
3012 3013        if (verbose_low()) {
3013 3014          gclog_or_tty->print_cr("[%u] we were successful with region = "
3014 3015                                 PTR_FORMAT, worker_id, p2i(curr_region));
3015 3016        }
3016 3017  
3017 3018        if (limit > bottom) {
3018 3019          if (verbose_low()) {
3019 3020            gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
3020 3021                                   "returning it ", worker_id, p2i(curr_region));
3021 3022          }
3022 3023          return curr_region;
3023 3024        } else {
3024 3025          assert(limit == bottom,
3025 3026                 "the region limit should be at bottom");
3026 3027          if (verbose_low()) {
3027 3028            gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
3028 3029                                   "returning NULL", worker_id, p2i(curr_region));
3029 3030          }
3030 3031          // we return NULL and the caller should try calling
3031 3032          // claim_region() again.
3032 3033          return NULL;
3033 3034        }
3034 3035      } else {
3035 3036        assert(_finger > finger, "the finger should have moved forward");
3036 3037        if (verbose_low()) {
3037 3038          if (curr_region == NULL) {
3038 3039            gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
3039 3040                                   "global finger = "PTR_FORMAT", "
3040 3041                                   "our finger = "PTR_FORMAT,
3041 3042                                   worker_id, p2i(_finger), p2i(finger));
3042 3043          } else {
3043 3044            gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
3044 3045                                   "global finger = "PTR_FORMAT", "
3045 3046                                   "our finger = "PTR_FORMAT,
3046 3047                                   worker_id, p2i(_finger), p2i(finger));
3047 3048          }
3048 3049        }
3049 3050  
3050 3051        // read it again
3051 3052        finger = _finger;
3052 3053      }
3053 3054    }
3054 3055  
3055 3056    return NULL;
3056 3057  }
3057 3058  
3058 3059  #ifndef PRODUCT
3059 3060  enum VerifyNoCSetOopsPhase {
3060 3061    VerifyNoCSetOopsStack,
3061 3062    VerifyNoCSetOopsQueues,
3062 3063    VerifyNoCSetOopsSATBCompleted,
3063 3064    VerifyNoCSetOopsSATBThread
3064 3065  };
3065 3066  
3066 3067  class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
3067 3068  private:
3068 3069    G1CollectedHeap* _g1h;
3069 3070    VerifyNoCSetOopsPhase _phase;
3070 3071    int _info;
3071 3072  
3072 3073    const char* phase_str() {
3073 3074      switch (_phase) {
3074 3075      case VerifyNoCSetOopsStack:         return "Stack";
3075 3076      case VerifyNoCSetOopsQueues:        return "Queue";
3076 3077      case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
3077 3078      case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
3078 3079      default:                            ShouldNotReachHere();
3079 3080      }
3080 3081      return NULL;
3081 3082    }
3082 3083  
3083 3084    void do_object_work(oop obj) {
3084 3085      guarantee(!_g1h->obj_in_cs(obj),
3085 3086                err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
3086 3087                        p2i((void*) obj), phase_str(), _info));
3087 3088    }
3088 3089  
3089 3090  public:
3090 3091    VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3091 3092  
3092 3093    void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3093 3094      _phase = phase;
3094 3095      _info = info;
3095 3096    }
3096 3097  
3097 3098    virtual void do_oop(oop* p) {
3098 3099      oop obj = oopDesc::load_decode_heap_oop(p);
3099 3100      do_object_work(obj);
3100 3101    }
3101 3102  
3102 3103    virtual void do_oop(narrowOop* p) {
3103 3104      // We should not come across narrow oops while scanning marking
3104 3105      // stacks and SATB buffers.
3105 3106      ShouldNotReachHere();
3106 3107    }
3107 3108  
3108 3109    virtual void do_object(oop obj) {
3109 3110      do_object_work(obj);
3110 3111    }
3111 3112  };
3112 3113  
3113 3114  void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3114 3115                                           bool verify_enqueued_buffers,
3115 3116                                           bool verify_thread_buffers,
3116 3117                                           bool verify_fingers) {
3117 3118    assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3118 3119    if (!G1CollectedHeap::heap()->mark_in_progress()) {
3119 3120      return;
3120 3121    }
3121 3122  
3122 3123    VerifyNoCSetOopsClosure cl;
3123 3124  
3124 3125    if (verify_stacks) {
3125 3126      // Verify entries on the global mark stack
3126 3127      cl.set_phase(VerifyNoCSetOopsStack);
3127 3128      _markStack.oops_do(&cl);
3128 3129  
3129 3130      // Verify entries on the task queues
3130 3131      for (uint i = 0; i < _max_worker_id; i += 1) {
3131 3132        cl.set_phase(VerifyNoCSetOopsQueues, i);
3132 3133        CMTaskQueue* queue = _task_queues->queue(i);
3133 3134        queue->oops_do(&cl);
3134 3135      }
3135 3136    }
3136 3137  
3137 3138    SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3138 3139  
3139 3140    // Verify entries on the enqueued SATB buffers
3140 3141    if (verify_enqueued_buffers) {
3141 3142      cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3142 3143      satb_qs.iterate_completed_buffers_read_only(&cl);
3143 3144    }
3144 3145  
3145 3146    // Verify entries on the per-thread SATB buffers
3146 3147    if (verify_thread_buffers) {
3147 3148      cl.set_phase(VerifyNoCSetOopsSATBThread);
3148 3149      satb_qs.iterate_thread_buffers_read_only(&cl);
3149 3150    }
3150 3151  
3151 3152    if (verify_fingers) {
3152 3153      // Verify the global finger
3153 3154      HeapWord* global_finger = finger();
3154 3155      if (global_finger != NULL && global_finger < _heap_end) {
3155 3156        // The global finger always points to a heap region boundary. We
3156 3157        // use heap_region_containing_raw() to get the containing region
3157 3158        // given that the global finger could be pointing to a free region
3158 3159        // which subsequently becomes continues humongous. If that
3159 3160        // happens, heap_region_containing() will return the bottom of the
3160 3161        // corresponding starts humongous region and the check below will
3161 3162        // not hold any more.
3162 3163        // Since we always iterate over all regions, we might get a NULL HeapRegion
3163 3164        // here.
3164 3165        HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3165 3166        guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3166 3167                  err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3167 3168                          p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3168 3169      }
3169 3170  
3170 3171      // Verify the task fingers
3171 3172      assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3172 3173      for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3173 3174        CMTask* task = _tasks[i];
3174 3175        HeapWord* task_finger = task->finger();
3175 3176        if (task_finger != NULL && task_finger < _heap_end) {
3176 3177          // See above note on the global finger verification.
3177 3178          HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3178 3179          guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3179 3180                    !task_hr->in_collection_set(),
3180 3181                    err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3181 3182                            p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3182 3183        }
3183 3184      }
3184 3185    }
3185 3186  }
3186 3187  #endif // PRODUCT
3187 3188  
3188 3189  // Aggregate the counting data that was constructed concurrently
3189 3190  // with marking.
3190 3191  class AggregateCountDataHRClosure: public HeapRegionClosure {
3191 3192    G1CollectedHeap* _g1h;
3192 3193    ConcurrentMark* _cm;
3193 3194    CardTableModRefBS* _ct_bs;
3194 3195    BitMap* _cm_card_bm;
3195 3196    uint _max_worker_id;
3196 3197  
3197 3198   public:
3198 3199    AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3199 3200                                BitMap* cm_card_bm,
3200 3201                                uint max_worker_id) :
3201 3202      _g1h(g1h), _cm(g1h->concurrent_mark()),
3202 3203      _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3203 3204      _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3204 3205  
3205 3206    bool doHeapRegion(HeapRegion* hr) {
3206 3207      if (hr->continuesHumongous()) {
3207 3208        // We will ignore these here and process them when their
3208 3209        // associated "starts humongous" region is processed.
3209 3210        // Note that we cannot rely on their associated
3210 3211        // "starts humongous" region to have their bit set to 1
3211 3212        // since, due to the region chunking in the parallel region
3212 3213        // iteration, a "continues humongous" region might be visited
3213 3214        // before its associated "starts humongous".
3214 3215        return false;
3215 3216      }
3216 3217  
3217 3218      HeapWord* start = hr->bottom();
3218 3219      HeapWord* limit = hr->next_top_at_mark_start();
3219 3220      HeapWord* end = hr->end();
3220 3221  
3221 3222      assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3222 3223             err_msg("Preconditions not met - "
3223 3224                     "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3224 3225                     "top: "PTR_FORMAT", end: "PTR_FORMAT,
3225 3226                     p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3226 3227  
3227 3228      assert(hr->next_marked_bytes() == 0, "Precondition");
3228 3229  
3229 3230      if (start == limit) {
3230 3231        // NTAMS of this region has not been set so nothing to do.
3231 3232        return false;
3232 3233      }
3233 3234  
3234 3235      // 'start' should be in the heap.
3235 3236      assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3236 3237      // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3237 3238      assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3238 3239  
3239 3240      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3240 3241      BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3241 3242      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3242 3243  
3243 3244      // If ntams is not card aligned then we bump card bitmap index
3244 3245      // for limit so that we get the all the cards spanned by
3245 3246      // the object ending at ntams.
3246 3247      // Note: if this is the last region in the heap then ntams
3247 3248      // could be actually just beyond the end of the the heap;
3248 3249      // limit_idx will then  correspond to a (non-existent) card
3249 3250      // that is also outside the heap.
3250 3251      if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3251 3252        limit_idx += 1;
3252 3253      }
3253 3254  
3254 3255      assert(limit_idx <= end_idx, "or else use atomics");
3255 3256  
3256 3257      // Aggregate the "stripe" in the count data associated with hr.
3257 3258      uint hrm_index = hr->hrm_index();
3258 3259      size_t marked_bytes = 0;
3259 3260  
3260 3261      for (uint i = 0; i < _max_worker_id; i += 1) {
3261 3262        size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3262 3263        BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3263 3264  
3264 3265        // Fetch the marked_bytes in this region for task i and
3265 3266        // add it to the running total for this region.
3266 3267        marked_bytes += marked_bytes_array[hrm_index];
3267 3268  
3268 3269        // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3269 3270        // into the global card bitmap.
3270 3271        BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3271 3272  
3272 3273        while (scan_idx < limit_idx) {
3273 3274          assert(task_card_bm->at(scan_idx) == true, "should be");
3274 3275          _cm_card_bm->set_bit(scan_idx);
3275 3276          assert(_cm_card_bm->at(scan_idx) == true, "should be");
3276 3277  
3277 3278          // BitMap::get_next_one_offset() can handle the case when
3278 3279          // its left_offset parameter is greater than its right_offset
3279 3280          // parameter. It does, however, have an early exit if
3280 3281          // left_offset == right_offset. So let's limit the value
3281 3282          // passed in for left offset here.
3282 3283          BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3283 3284          scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3284 3285        }
3285 3286      }
3286 3287  
3287 3288      // Update the marked bytes for this region.
3288 3289      hr->add_to_marked_bytes(marked_bytes);
3289 3290  
3290 3291      // Next heap region
3291 3292      return false;
3292 3293    }
3293 3294  };
3294 3295  
3295 3296  class G1AggregateCountDataTask: public AbstractGangTask {
3296 3297  protected:
3297 3298    G1CollectedHeap* _g1h;
3298 3299    ConcurrentMark* _cm;
3299 3300    BitMap* _cm_card_bm;
3300 3301    uint _max_worker_id;
3301 3302    int _active_workers;
3302 3303  
3303 3304  public:
3304 3305    G1AggregateCountDataTask(G1CollectedHeap* g1h,
3305 3306                             ConcurrentMark* cm,
3306 3307                             BitMap* cm_card_bm,
3307 3308                             uint max_worker_id,
3308 3309                             int n_workers) :
3309 3310      AbstractGangTask("Count Aggregation"),
3310 3311      _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3311 3312      _max_worker_id(max_worker_id),
3312 3313      _active_workers(n_workers) { }
3313 3314  
3314 3315    void work(uint worker_id) {
3315 3316      AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3316 3317  
3317 3318      if (G1CollectedHeap::use_parallel_gc_threads()) {
3318 3319        _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3319 3320                                              _active_workers,
3320 3321                                              HeapRegion::AggregateCountClaimValue);
3321 3322      } else {
3322 3323        _g1h->heap_region_iterate(&cl);
3323 3324      }
3324 3325    }
3325 3326  };
3326 3327  
3327 3328  
3328 3329  void ConcurrentMark::aggregate_count_data() {
3329 3330    int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3330 3331                          _g1h->workers()->active_workers() :
3331 3332                          1);
3332 3333  
3333 3334    G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3334 3335                                             _max_worker_id, n_workers);
3335 3336  
3336 3337    if (G1CollectedHeap::use_parallel_gc_threads()) {
3337 3338      assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3338 3339             "sanity check");

↓ open down ↓

1151 lines elided

↑ open up ↑

3339 3340      _g1h->set_par_threads(n_workers);
3340 3341      _g1h->workers()->run_task(&g1_par_agg_task);
3341 3342      _g1h->set_par_threads(0);
3342 3343  
3343 3344      assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3344 3345             "sanity check");
3345 3346      _g1h->reset_heap_region_claim_values();
3346 3347    } else {
3347 3348      g1_par_agg_task.work(0);
3348 3349    }
3349      -  _g1h->allocation_context_stats().update_at_remark();
3350 3350  }
3351 3351  
3352 3352  // Clear the per-worker arrays used to store the per-region counting data
3353 3353  void ConcurrentMark::clear_all_count_data() {
3354 3354    // Clear the global card bitmap - it will be filled during
3355 3355    // liveness count aggregation (during remark) and the
3356 3356    // final counting task.
3357 3357    _card_bm.clear();
3358 3358  
3359 3359    // Clear the global region bitmap - it will be filled as part

3360 3360    // of the final counting task.
3361 3361    _region_bm.clear();
3362 3362  
3363 3363    uint max_regions = _g1h->max_regions();
3364 3364    assert(_max_worker_id > 0, "uninitialized");
3365 3365  
3366 3366    for (uint i = 0; i < _max_worker_id; i += 1) {
3367 3367      BitMap* task_card_bm = count_card_bitmap_for(i);
3368 3368      size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3369 3369  
3370 3370      assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3371 3371      assert(marked_bytes_array != NULL, "uninitialized");
3372 3372  
3373 3373      memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3374 3374      task_card_bm->clear();
3375 3375    }
3376 3376  }
3377 3377  
3378 3378  void ConcurrentMark::print_stats() {
3379 3379    if (verbose_stats()) {
3380 3380      gclog_or_tty->print_cr("---------------------------------------------------------------------");
3381 3381      for (size_t i = 0; i < _active_tasks; ++i) {
3382 3382        _tasks[i]->print_stats();
3383 3383        gclog_or_tty->print_cr("---------------------------------------------------------------------");
3384 3384      }
3385 3385    }
3386 3386  }
3387 3387  
3388 3388  // abandon current marking iteration due to a Full GC
3389 3389  void ConcurrentMark::abort() {
3390 3390    // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3391 3391    // concurrent bitmap clearing.
3392 3392    _nextMarkBitMap->clearAll();
3393 3393  
3394 3394    // Note we cannot clear the previous marking bitmap here
3395 3395    // since VerifyDuringGC verifies the objects marked during
3396 3396    // a full GC against the previous bitmap.
3397 3397  
3398 3398    // Clear the liveness counting data
3399 3399    clear_all_count_data();
3400 3400    // Empty mark stack
3401 3401    reset_marking_state();
3402 3402    for (uint i = 0; i < _max_worker_id; ++i) {
3403 3403      _tasks[i]->clear_region_fields();
3404 3404    }
3405 3405    _first_overflow_barrier_sync.abort();
3406 3406    _second_overflow_barrier_sync.abort();
3407 3407    const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3408 3408    if (!gc_id.is_undefined()) {
3409 3409      // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3410 3410      // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3411 3411      _aborted_gc_id = gc_id;
3412 3412     }
3413 3413    _has_aborted = true;
3414 3414  
3415 3415    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3416 3416    satb_mq_set.abandon_partial_marking();
3417 3417    // This can be called either during or outside marking, we'll read
3418 3418    // the expected_active value from the SATB queue set.
3419 3419    satb_mq_set.set_active_all_threads(
3420 3420                                   false, /* new active value */
3421 3421                                   satb_mq_set.is_active() /* expected_active */);
3422 3422  
3423 3423    _g1h->trace_heap_after_concurrent_cycle();
3424 3424    _g1h->register_concurrent_cycle_end();
3425 3425  }
3426 3426  
3427 3427  const GCId& ConcurrentMark::concurrent_gc_id() {
3428 3428    if (has_aborted()) {
3429 3429      return _aborted_gc_id;
3430 3430    }
3431 3431    return _g1h->gc_tracer_cm()->gc_id();
3432 3432  }
3433 3433  
3434 3434  static void print_ms_time_info(const char* prefix, const char* name,
3435 3435                                 NumberSeq& ns) {
3436 3436    gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3437 3437                           prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3438 3438    if (ns.num() > 0) {
3439 3439      gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3440 3440                             prefix, ns.sd(), ns.maximum());
3441 3441    }
3442 3442  }
3443 3443  
3444 3444  void ConcurrentMark::print_summary_info() {
3445 3445    gclog_or_tty->print_cr(" Concurrent marking:");
3446 3446    print_ms_time_info("  ", "init marks", _init_times);
3447 3447    print_ms_time_info("  ", "remarks", _remark_times);
3448 3448    {
3449 3449      print_ms_time_info("     ", "final marks", _remark_mark_times);
3450 3450      print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3451 3451  
3452 3452    }
3453 3453    print_ms_time_info("  ", "cleanups", _cleanup_times);
3454 3454    gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3455 3455                           _total_counting_time,
3456 3456                           (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3457 3457                            (double)_cleanup_times.num()
3458 3458                           : 0.0));
3459 3459    if (G1ScrubRemSets) {
3460 3460      gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3461 3461                             _total_rs_scrub_time,
3462 3462                             (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3463 3463                              (double)_cleanup_times.num()
3464 3464                             : 0.0));
3465 3465    }
3466 3466    gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3467 3467                           (_init_times.sum() + _remark_times.sum() +
3468 3468                            _cleanup_times.sum())/1000.0);
3469 3469    gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3470 3470                  "(%8.2f s marking).",
3471 3471                  cmThread()->vtime_accum(),
3472 3472                  cmThread()->vtime_mark_accum());
3473 3473  }
3474 3474  
3475 3475  void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3476 3476    if (use_parallel_marking_threads()) {
3477 3477      _parallel_workers->print_worker_threads_on(st);
3478 3478    }
3479 3479  }
3480 3480  
3481 3481  void ConcurrentMark::print_on_error(outputStream* st) const {
3482 3482    st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3483 3483        p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3484 3484    _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3485 3485    _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3486 3486  }
3487 3487  
3488 3488  // We take a break if someone is trying to stop the world.
3489 3489  bool ConcurrentMark::do_yield_check(uint worker_id) {
3490 3490    if (SuspendibleThreadSet::should_yield()) {
3491 3491      if (worker_id == 0) {
3492 3492        _g1h->g1_policy()->record_concurrent_pause();
3493 3493      }
3494 3494      SuspendibleThreadSet::yield();
3495 3495      return true;
3496 3496    } else {
3497 3497      return false;
3498 3498    }
3499 3499  }
3500 3500  
3501 3501  #ifndef PRODUCT
3502 3502  // for debugging purposes
3503 3503  void ConcurrentMark::print_finger() {
3504 3504    gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3505 3505                           p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3506 3506    for (uint i = 0; i < _max_worker_id; ++i) {
3507 3507      gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3508 3508    }
3509 3509    gclog_or_tty->cr();
3510 3510  }
3511 3511  #endif
3512 3512  
3513 3513  void CMTask::scan_object(oop obj) {
3514 3514    assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3515 3515  
3516 3516    if (_cm->verbose_high()) {
3517 3517      gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3518 3518                             _worker_id, p2i((void*) obj));
3519 3519    }
3520 3520  
3521 3521    size_t obj_size = obj->size();
3522 3522    _words_scanned += obj_size;
3523 3523  
3524 3524    obj->oop_iterate(_cm_oop_closure);
3525 3525    statsOnly( ++_objs_scanned );
3526 3526    check_limits();
3527 3527  }
3528 3528  
3529 3529  // Closure for iteration over bitmaps
3530 3530  class CMBitMapClosure : public BitMapClosure {
3531 3531  private:
3532 3532    // the bitmap that is being iterated over
3533 3533    CMBitMap*                   _nextMarkBitMap;
3534 3534    ConcurrentMark*             _cm;
3535 3535    CMTask*                     _task;
3536 3536  
3537 3537  public:
3538 3538    CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3539 3539      _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3540 3540  
3541 3541    bool do_bit(size_t offset) {
3542 3542      HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3543 3543      assert(_nextMarkBitMap->isMarked(addr), "invariant");
3544 3544      assert( addr < _cm->finger(), "invariant");
3545 3545  
3546 3546      statsOnly( _task->increase_objs_found_on_bitmap() );
3547 3547      assert(addr >= _task->finger(), "invariant");
3548 3548  
3549 3549      // We move that task's local finger along.
3550 3550      _task->move_finger_to(addr);
3551 3551  
3552 3552      _task->scan_object(oop(addr));
3553 3553      // we only partially drain the local queue and global stack
3554 3554      _task->drain_local_queue(true);
3555 3555      _task->drain_global_stack(true);
3556 3556  
3557 3557      // if the has_aborted flag has been raised, we need to bail out of
3558 3558      // the iteration
3559 3559      return !_task->has_aborted();
3560 3560    }
3561 3561  };
3562 3562  
3563 3563  G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3564 3564                                 ConcurrentMark* cm,
3565 3565                                 CMTask* task)
3566 3566    : _g1h(g1h), _cm(cm), _task(task) {
3567 3567    assert(_ref_processor == NULL, "should be initialized to NULL");
3568 3568  
3569 3569    if (G1UseConcMarkReferenceProcessing) {
3570 3570      _ref_processor = g1h->ref_processor_cm();
3571 3571      assert(_ref_processor != NULL, "should not be NULL");
3572 3572    }
3573 3573  }
3574 3574  
3575 3575  void CMTask::setup_for_region(HeapRegion* hr) {
3576 3576    assert(hr != NULL,
3577 3577          "claim_region() should have filtered out NULL regions");
3578 3578    assert(!hr->continuesHumongous(),
3579 3579          "claim_region() should have filtered out continues humongous regions");
3580 3580  
3581 3581    if (_cm->verbose_low()) {
3582 3582      gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3583 3583                             _worker_id, p2i(hr));
3584 3584    }
3585 3585  
3586 3586    _curr_region  = hr;
3587 3587    _finger       = hr->bottom();
3588 3588    update_region_limit();
3589 3589  }
3590 3590  
3591 3591  void CMTask::update_region_limit() {
3592 3592    HeapRegion* hr            = _curr_region;
3593 3593    HeapWord* bottom          = hr->bottom();
3594 3594    HeapWord* limit           = hr->next_top_at_mark_start();
3595 3595  
3596 3596    if (limit == bottom) {
3597 3597      if (_cm->verbose_low()) {
3598 3598        gclog_or_tty->print_cr("[%u] found an empty region "
3599 3599                               "["PTR_FORMAT", "PTR_FORMAT")",
3600 3600                               _worker_id, p2i(bottom), p2i(limit));
3601 3601      }
3602 3602      // The region was collected underneath our feet.
3603 3603      // We set the finger to bottom to ensure that the bitmap
3604 3604      // iteration that will follow this will not do anything.
3605 3605      // (this is not a condition that holds when we set the region up,
3606 3606      // as the region is not supposed to be empty in the first place)
3607 3607      _finger = bottom;
3608 3608    } else if (limit >= _region_limit) {
3609 3609      assert(limit >= _finger, "peace of mind");
3610 3610    } else {
3611 3611      assert(limit < _region_limit, "only way to get here");
3612 3612      // This can happen under some pretty unusual circumstances.  An
3613 3613      // evacuation pause empties the region underneath our feet (NTAMS
3614 3614      // at bottom). We then do some allocation in the region (NTAMS
3615 3615      // stays at bottom), followed by the region being used as a GC
3616 3616      // alloc region (NTAMS will move to top() and the objects
3617 3617      // originally below it will be grayed). All objects now marked in
3618 3618      // the region are explicitly grayed, if below the global finger,
3619 3619      // and we do not need in fact to scan anything else. So, we simply
3620 3620      // set _finger to be limit to ensure that the bitmap iteration
3621 3621      // doesn't do anything.
3622 3622      _finger = limit;
3623 3623    }
3624 3624  
3625 3625    _region_limit = limit;
3626 3626  }
3627 3627  
3628 3628  void CMTask::giveup_current_region() {
3629 3629    assert(_curr_region != NULL, "invariant");
3630 3630    if (_cm->verbose_low()) {
3631 3631      gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3632 3632                             _worker_id, p2i(_curr_region));
3633 3633    }
3634 3634    clear_region_fields();
3635 3635  }
3636 3636  
3637 3637  void CMTask::clear_region_fields() {
3638 3638    // Values for these three fields that indicate that we're not
3639 3639    // holding on to a region.
3640 3640    _curr_region   = NULL;
3641 3641    _finger        = NULL;
3642 3642    _region_limit  = NULL;
3643 3643  }
3644 3644  
3645 3645  void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3646 3646    if (cm_oop_closure == NULL) {
3647 3647      assert(_cm_oop_closure != NULL, "invariant");
3648 3648    } else {
3649 3649      assert(_cm_oop_closure == NULL, "invariant");
3650 3650    }
3651 3651    _cm_oop_closure = cm_oop_closure;
3652 3652  }
3653 3653  
3654 3654  void CMTask::reset(CMBitMap* nextMarkBitMap) {
3655 3655    guarantee(nextMarkBitMap != NULL, "invariant");
3656 3656  
3657 3657    if (_cm->verbose_low()) {
3658 3658      gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3659 3659    }
3660 3660  
3661 3661    _nextMarkBitMap                = nextMarkBitMap;
3662 3662    clear_region_fields();
3663 3663  
3664 3664    _calls                         = 0;
3665 3665    _elapsed_time_ms               = 0.0;
3666 3666    _termination_time_ms           = 0.0;
3667 3667    _termination_start_time_ms     = 0.0;
3668 3668  
3669 3669  #if _MARKING_STATS_
3670 3670    _local_pushes                  = 0;
3671 3671    _local_pops                    = 0;
3672 3672    _local_max_size                = 0;
3673 3673    _objs_scanned                  = 0;
3674 3674    _global_pushes                 = 0;
3675 3675    _global_pops                   = 0;
3676 3676    _global_max_size               = 0;
3677 3677    _global_transfers_to           = 0;
3678 3678    _global_transfers_from         = 0;
3679 3679    _regions_claimed               = 0;
3680 3680    _objs_found_on_bitmap          = 0;
3681 3681    _satb_buffers_processed        = 0;
3682 3682    _steal_attempts                = 0;
3683 3683    _steals                        = 0;
3684 3684    _aborted                       = 0;
3685 3685    _aborted_overflow              = 0;
3686 3686    _aborted_cm_aborted            = 0;
3687 3687    _aborted_yield                 = 0;
3688 3688    _aborted_timed_out             = 0;
3689 3689    _aborted_satb                  = 0;
3690 3690    _aborted_termination           = 0;
3691 3691  #endif // _MARKING_STATS_
3692 3692  }
3693 3693  
3694 3694  bool CMTask::should_exit_termination() {
3695 3695    regular_clock_call();
3696 3696    // This is called when we are in the termination protocol. We should
3697 3697    // quit if, for some reason, this task wants to abort or the global
3698 3698    // stack is not empty (this means that we can get work from it).
3699 3699    return !_cm->mark_stack_empty() || has_aborted();
3700 3700  }
3701 3701  
3702 3702  void CMTask::reached_limit() {
3703 3703    assert(_words_scanned >= _words_scanned_limit ||
3704 3704           _refs_reached >= _refs_reached_limit ,
3705 3705           "shouldn't have been called otherwise");
3706 3706    regular_clock_call();
3707 3707  }
3708 3708  
3709 3709  void CMTask::regular_clock_call() {
3710 3710    if (has_aborted()) return;
3711 3711  
3712 3712    // First, we need to recalculate the words scanned and refs reached
3713 3713    // limits for the next clock call.
3714 3714    recalculate_limits();
3715 3715  
3716 3716    // During the regular clock call we do the following
3717 3717  
3718 3718    // (1) If an overflow has been flagged, then we abort.
3719 3719    if (_cm->has_overflown()) {
3720 3720      set_has_aborted();
3721 3721      return;
3722 3722    }
3723 3723  
3724 3724    // If we are not concurrent (i.e. we're doing remark) we don't need
3725 3725    // to check anything else. The other steps are only needed during
3726 3726    // the concurrent marking phase.
3727 3727    if (!concurrent()) return;
3728 3728  
3729 3729    // (2) If marking has been aborted for Full GC, then we also abort.
3730 3730    if (_cm->has_aborted()) {
3731 3731      set_has_aborted();
3732 3732      statsOnly( ++_aborted_cm_aborted );
3733 3733      return;
3734 3734    }
3735 3735  
3736 3736    double curr_time_ms = os::elapsedVTime() * 1000.0;
3737 3737  
3738 3738    // (3) If marking stats are enabled, then we update the step history.
3739 3739  #if _MARKING_STATS_
3740 3740    if (_words_scanned >= _words_scanned_limit) {
3741 3741      ++_clock_due_to_scanning;
3742 3742    }
3743 3743    if (_refs_reached >= _refs_reached_limit) {
3744 3744      ++_clock_due_to_marking;
3745 3745    }
3746 3746  
3747 3747    double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3748 3748    _interval_start_time_ms = curr_time_ms;
3749 3749    _all_clock_intervals_ms.add(last_interval_ms);
3750 3750  
3751 3751    if (_cm->verbose_medium()) {
3752 3752        gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3753 3753                          "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3754 3754                          _worker_id, last_interval_ms,
3755 3755                          _words_scanned,
3756 3756                          (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3757 3757                          _refs_reached,
3758 3758                          (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3759 3759    }
3760 3760  #endif // _MARKING_STATS_
3761 3761  
3762 3762    // (4) We check whether we should yield. If we have to, then we abort.
3763 3763    if (SuspendibleThreadSet::should_yield()) {
3764 3764      // We should yield. To do this we abort the task. The caller is
3765 3765      // responsible for yielding.
3766 3766      set_has_aborted();
3767 3767      statsOnly( ++_aborted_yield );
3768 3768      return;
3769 3769    }
3770 3770  
3771 3771    // (5) We check whether we've reached our time quota. If we have,
3772 3772    // then we abort.
3773 3773    double elapsed_time_ms = curr_time_ms - _start_time_ms;
3774 3774    if (elapsed_time_ms > _time_target_ms) {
3775 3775      set_has_aborted();
3776 3776      _has_timed_out = true;
3777 3777      statsOnly( ++_aborted_timed_out );
3778 3778      return;
3779 3779    }
3780 3780  
3781 3781    // (6) Finally, we check whether there are enough completed STAB
3782 3782    // buffers available for processing. If there are, we abort.
3783 3783    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3784 3784    if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3785 3785      if (_cm->verbose_low()) {
3786 3786        gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3787 3787                               _worker_id);
3788 3788      }
3789 3789      // we do need to process SATB buffers, we'll abort and restart
3790 3790      // the marking task to do so
3791 3791      set_has_aborted();
3792 3792      statsOnly( ++_aborted_satb );
3793 3793      return;
3794 3794    }
3795 3795  }
3796 3796  
3797 3797  void CMTask::recalculate_limits() {
3798 3798    _real_words_scanned_limit = _words_scanned + words_scanned_period;
3799 3799    _words_scanned_limit      = _real_words_scanned_limit;
3800 3800  
3801 3801    _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3802 3802    _refs_reached_limit       = _real_refs_reached_limit;
3803 3803  }
3804 3804  
3805 3805  void CMTask::decrease_limits() {
3806 3806    // This is called when we believe that we're going to do an infrequent
3807 3807    // operation which will increase the per byte scanned cost (i.e. move
3808 3808    // entries to/from the global stack). It basically tries to decrease the
3809 3809    // scanning limit so that the clock is called earlier.
3810 3810  
3811 3811    if (_cm->verbose_medium()) {
3812 3812      gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3813 3813    }
3814 3814  
3815 3815    _words_scanned_limit = _real_words_scanned_limit -
3816 3816      3 * words_scanned_period / 4;
3817 3817    _refs_reached_limit  = _real_refs_reached_limit -
3818 3818      3 * refs_reached_period / 4;
3819 3819  }
3820 3820  
3821 3821  void CMTask::move_entries_to_global_stack() {
3822 3822    // local array where we'll store the entries that will be popped
3823 3823    // from the local queue
3824 3824    oop buffer[global_stack_transfer_size];
3825 3825  
3826 3826    int n = 0;
3827 3827    oop obj;
3828 3828    while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3829 3829      buffer[n] = obj;
3830 3830      ++n;
3831 3831    }
3832 3832  
3833 3833    if (n > 0) {
3834 3834      // we popped at least one entry from the local queue
3835 3835  
3836 3836      statsOnly( ++_global_transfers_to; _local_pops += n );
3837 3837  
3838 3838      if (!_cm->mark_stack_push(buffer, n)) {
3839 3839        if (_cm->verbose_low()) {
3840 3840          gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3841 3841                                 _worker_id);
3842 3842        }
3843 3843        set_has_aborted();
3844 3844      } else {
3845 3845        // the transfer was successful
3846 3846  
3847 3847        if (_cm->verbose_medium()) {
3848 3848          gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3849 3849                                 _worker_id, n);
3850 3850        }
3851 3851        statsOnly( int tmp_size = _cm->mark_stack_size();
3852 3852                   if (tmp_size > _global_max_size) {
3853 3853                     _global_max_size = tmp_size;
3854 3854                   }
3855 3855                   _global_pushes += n );
3856 3856      }
3857 3857    }
3858 3858  
3859 3859    // this operation was quite expensive, so decrease the limits
3860 3860    decrease_limits();
3861 3861  }
3862 3862  
3863 3863  void CMTask::get_entries_from_global_stack() {
3864 3864    // local array where we'll store the entries that will be popped
3865 3865    // from the global stack.
3866 3866    oop buffer[global_stack_transfer_size];
3867 3867    int n;
3868 3868    _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3869 3869    assert(n <= global_stack_transfer_size,
3870 3870           "we should not pop more than the given limit");
3871 3871    if (n > 0) {
3872 3872      // yes, we did actually pop at least one entry
3873 3873  
3874 3874      statsOnly( ++_global_transfers_from; _global_pops += n );
3875 3875      if (_cm->verbose_medium()) {
3876 3876        gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3877 3877                               _worker_id, n);
3878 3878      }
3879 3879      for (int i = 0; i < n; ++i) {
3880 3880        bool success = _task_queue->push(buffer[i]);
3881 3881        // We only call this when the local queue is empty or under a
3882 3882        // given target limit. So, we do not expect this push to fail.
3883 3883        assert(success, "invariant");
3884 3884      }
3885 3885  
3886 3886      statsOnly( int tmp_size = _task_queue->size();
3887 3887                 if (tmp_size > _local_max_size) {
3888 3888                   _local_max_size = tmp_size;
3889 3889                 }
3890 3890                 _local_pushes += n );
3891 3891    }
3892 3892  
3893 3893    // this operation was quite expensive, so decrease the limits
3894 3894    decrease_limits();
3895 3895  }
3896 3896  
3897 3897  void CMTask::drain_local_queue(bool partially) {
3898 3898    if (has_aborted()) return;
3899 3899  
3900 3900    // Decide what the target size is, depending whether we're going to
3901 3901    // drain it partially (so that other tasks can steal if they run out
3902 3902    // of things to do) or totally (at the very end).
3903 3903    size_t target_size;
3904 3904    if (partially) {
3905 3905      target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3906 3906    } else {
3907 3907      target_size = 0;
3908 3908    }
3909 3909  
3910 3910    if (_task_queue->size() > target_size) {
3911 3911      if (_cm->verbose_high()) {
3912 3912        gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3913 3913                               _worker_id, target_size);
3914 3914      }
3915 3915  
3916 3916      oop obj;
3917 3917      bool ret = _task_queue->pop_local(obj);
3918 3918      while (ret) {
3919 3919        statsOnly( ++_local_pops );
3920 3920  
3921 3921        if (_cm->verbose_high()) {
3922 3922          gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3923 3923                                 p2i((void*) obj));
3924 3924        }
3925 3925  
3926 3926        assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3927 3927        assert(!_g1h->is_on_master_free_list(
3928 3928                    _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3929 3929  
3930 3930        scan_object(obj);
3931 3931  
3932 3932        if (_task_queue->size() <= target_size || has_aborted()) {
3933 3933          ret = false;
3934 3934        } else {
3935 3935          ret = _task_queue->pop_local(obj);
3936 3936        }
3937 3937      }
3938 3938  
3939 3939      if (_cm->verbose_high()) {
3940 3940        gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3941 3941                               _worker_id, _task_queue->size());
3942 3942      }
3943 3943    }
3944 3944  }
3945 3945  
3946 3946  void CMTask::drain_global_stack(bool partially) {
3947 3947    if (has_aborted()) return;
3948 3948  
3949 3949    // We have a policy to drain the local queue before we attempt to
3950 3950    // drain the global stack.
3951 3951    assert(partially || _task_queue->size() == 0, "invariant");
3952 3952  
3953 3953    // Decide what the target size is, depending whether we're going to
3954 3954    // drain it partially (so that other tasks can steal if they run out
3955 3955    // of things to do) or totally (at the very end).  Notice that,
3956 3956    // because we move entries from the global stack in chunks or
3957 3957    // because another task might be doing the same, we might in fact
3958 3958    // drop below the target. But, this is not a problem.
3959 3959    size_t target_size;
3960 3960    if (partially) {
3961 3961      target_size = _cm->partial_mark_stack_size_target();
3962 3962    } else {
3963 3963      target_size = 0;
3964 3964    }
3965 3965  
3966 3966    if (_cm->mark_stack_size() > target_size) {
3967 3967      if (_cm->verbose_low()) {
3968 3968        gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3969 3969                               _worker_id, target_size);
3970 3970      }
3971 3971  
3972 3972      while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3973 3973        get_entries_from_global_stack();
3974 3974        drain_local_queue(partially);
3975 3975      }
3976 3976  
3977 3977      if (_cm->verbose_low()) {
3978 3978        gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3979 3979                               _worker_id, _cm->mark_stack_size());
3980 3980      }
3981 3981    }
3982 3982  }
3983 3983  
3984 3984  // SATB Queue has several assumptions on whether to call the par or
3985 3985  // non-par versions of the methods. this is why some of the code is
3986 3986  // replicated. We should really get rid of the single-threaded version
3987 3987  // of the code to simplify things.
3988 3988  void CMTask::drain_satb_buffers() {
3989 3989    if (has_aborted()) return;
3990 3990  
3991 3991    // We set this so that the regular clock knows that we're in the
3992 3992    // middle of draining buffers and doesn't set the abort flag when it
3993 3993    // notices that SATB buffers are available for draining. It'd be
3994 3994    // very counter productive if it did that. :-)
3995 3995    _draining_satb_buffers = true;
3996 3996  
3997 3997    CMObjectClosure oc(this);
3998 3998    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3999 3999    if (G1CollectedHeap::use_parallel_gc_threads()) {
4000 4000      satb_mq_set.set_par_closure(_worker_id, &oc);
4001 4001    } else {
4002 4002      satb_mq_set.set_closure(&oc);
4003 4003    }
4004 4004  
4005 4005    // This keeps claiming and applying the closure to completed buffers
4006 4006    // until we run out of buffers or we need to abort.
4007 4007    if (G1CollectedHeap::use_parallel_gc_threads()) {
4008 4008      while (!has_aborted() &&
4009 4009             satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
4010 4010        if (_cm->verbose_medium()) {
4011 4011          gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4012 4012        }
4013 4013        statsOnly( ++_satb_buffers_processed );
4014 4014        regular_clock_call();
4015 4015      }
4016 4016    } else {
4017 4017      while (!has_aborted() &&
4018 4018             satb_mq_set.apply_closure_to_completed_buffer()) {
4019 4019        if (_cm->verbose_medium()) {
4020 4020          gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4021 4021        }
4022 4022        statsOnly( ++_satb_buffers_processed );
4023 4023        regular_clock_call();
4024 4024      }
4025 4025    }
4026 4026  
4027 4027    _draining_satb_buffers = false;
4028 4028  
4029 4029    assert(has_aborted() ||
4030 4030           concurrent() ||
4031 4031           satb_mq_set.completed_buffers_num() == 0, "invariant");
4032 4032  
4033 4033    if (G1CollectedHeap::use_parallel_gc_threads()) {
4034 4034      satb_mq_set.set_par_closure(_worker_id, NULL);
4035 4035    } else {
4036 4036      satb_mq_set.set_closure(NULL);
4037 4037    }
4038 4038  
4039 4039    // again, this was a potentially expensive operation, decrease the
4040 4040    // limits to get the regular clock call early
4041 4041    decrease_limits();
4042 4042  }
4043 4043  
4044 4044  void CMTask::print_stats() {
4045 4045    gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
4046 4046                           _worker_id, _calls);
4047 4047    gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4048 4048                           _elapsed_time_ms, _termination_time_ms);
4049 4049    gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4050 4050                           _step_times_ms.num(), _step_times_ms.avg(),
4051 4051                           _step_times_ms.sd());
4052 4052    gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4053 4053                           _step_times_ms.maximum(), _step_times_ms.sum());
4054 4054  
4055 4055  #if _MARKING_STATS_
4056 4056    gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4057 4057                           _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4058 4058                           _all_clock_intervals_ms.sd());
4059 4059    gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4060 4060                           _all_clock_intervals_ms.maximum(),
4061 4061                           _all_clock_intervals_ms.sum());
4062 4062    gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4063 4063                           _clock_due_to_scanning, _clock_due_to_marking);
4064 4064    gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4065 4065                           _objs_scanned, _objs_found_on_bitmap);
4066 4066    gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4067 4067                           _local_pushes, _local_pops, _local_max_size);
4068 4068    gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4069 4069                           _global_pushes, _global_pops, _global_max_size);
4070 4070    gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4071 4071                           _global_transfers_to,_global_transfers_from);
4072 4072    gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
4073 4073    gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4074 4074    gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4075 4075                           _steal_attempts, _steals);
4076 4076    gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4077 4077    gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4078 4078                           _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4079 4079    gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4080 4080                           _aborted_timed_out, _aborted_satb, _aborted_termination);
4081 4081  #endif // _MARKING_STATS_
4082 4082  }
4083 4083  
4084 4084  /*****************************************************************************
4085 4085  
4086 4086      The do_marking_step(time_target_ms, ...) method is the building
4087 4087      block of the parallel marking framework. It can be called in parallel
4088 4088      with other invocations of do_marking_step() on different tasks
4089 4089      (but only one per task, obviously) and concurrently with the
4090 4090      mutator threads, or during remark, hence it eliminates the need
4091 4091      for two versions of the code. When called during remark, it will
4092 4092      pick up from where the task left off during the concurrent marking
4093 4093      phase. Interestingly, tasks are also claimable during evacuation
4094 4094      pauses too, since do_marking_step() ensures that it aborts before
4095 4095      it needs to yield.
4096 4096  
4097 4097      The data structures that it uses to do marking work are the
4098 4098      following:
4099 4099  
4100 4100        (1) Marking Bitmap. If there are gray objects that appear only
4101 4101        on the bitmap (this happens either when dealing with an overflow
4102 4102        or when the initial marking phase has simply marked the roots
4103 4103        and didn't push them on the stack), then tasks claim heap
4104 4104        regions whose bitmap they then scan to find gray objects. A
4105 4105        global finger indicates where the end of the last claimed region
4106 4106        is. A local finger indicates how far into the region a task has
4107 4107        scanned. The two fingers are used to determine how to gray an
4108 4108        object (i.e. whether simply marking it is OK, as it will be
4109 4109        visited by a task in the future, or whether it needs to be also
4110 4110        pushed on a stack).
4111 4111  
4112 4112        (2) Local Queue. The local queue of the task which is accessed
4113 4113        reasonably efficiently by the task. Other tasks can steal from
4114 4114        it when they run out of work. Throughout the marking phase, a
4115 4115        task attempts to keep its local queue short but not totally
4116 4116        empty, so that entries are available for stealing by other
4117 4117        tasks. Only when there is no more work, a task will totally
4118 4118        drain its local queue.
4119 4119  
4120 4120        (3) Global Mark Stack. This handles local queue overflow. During
4121 4121        marking only sets of entries are moved between it and the local
4122 4122        queues, as access to it requires a mutex and more fine-grain
4123 4123        interaction with it which might cause contention. If it
4124 4124        overflows, then the marking phase should restart and iterate
4125 4125        over the bitmap to identify gray objects. Throughout the marking
4126 4126        phase, tasks attempt to keep the global mark stack at a small
4127 4127        length but not totally empty, so that entries are available for
4128 4128        popping by other tasks. Only when there is no more work, tasks
4129 4129        will totally drain the global mark stack.
4130 4130  
4131 4131        (4) SATB Buffer Queue. This is where completed SATB buffers are
4132 4132        made available. Buffers are regularly removed from this queue
4133 4133        and scanned for roots, so that the queue doesn't get too
4134 4134        long. During remark, all completed buffers are processed, as
4135 4135        well as the filled in parts of any uncompleted buffers.
4136 4136  
4137 4137      The do_marking_step() method tries to abort when the time target
4138 4138      has been reached. There are a few other cases when the
4139 4139      do_marking_step() method also aborts:
4140 4140  
4141 4141        (1) When the marking phase has been aborted (after a Full GC).
4142 4142  
4143 4143        (2) When a global overflow (on the global stack) has been
4144 4144        triggered. Before the task aborts, it will actually sync up with
4145 4145        the other tasks to ensure that all the marking data structures
4146 4146        (local queues, stacks, fingers etc.)  are re-initialized so that
4147 4147        when do_marking_step() completes, the marking phase can
4148 4148        immediately restart.
4149 4149  
4150 4150        (3) When enough completed SATB buffers are available. The
4151 4151        do_marking_step() method only tries to drain SATB buffers right
4152 4152        at the beginning. So, if enough buffers are available, the
4153 4153        marking step aborts and the SATB buffers are processed at
4154 4154        the beginning of the next invocation.
4155 4155  
4156 4156        (4) To yield. when we have to yield then we abort and yield
4157 4157        right at the end of do_marking_step(). This saves us from a lot
4158 4158        of hassle as, by yielding we might allow a Full GC. If this
4159 4159        happens then objects will be compacted underneath our feet, the
4160 4160        heap might shrink, etc. We save checking for this by just
4161 4161        aborting and doing the yield right at the end.
4162 4162  
4163 4163      From the above it follows that the do_marking_step() method should
4164 4164      be called in a loop (or, otherwise, regularly) until it completes.
4165 4165  
4166 4166      If a marking step completes without its has_aborted() flag being
4167 4167      true, it means it has completed the current marking phase (and
4168 4168      also all other marking tasks have done so and have all synced up).
4169 4169  
4170 4170      A method called regular_clock_call() is invoked "regularly" (in
4171 4171      sub ms intervals) throughout marking. It is this clock method that
4172 4172      checks all the abort conditions which were mentioned above and
4173 4173      decides when the task should abort. A work-based scheme is used to
4174 4174      trigger this clock method: when the number of object words the
4175 4175      marking phase has scanned or the number of references the marking
4176 4176      phase has visited reach a given limit. Additional invocations to
4177 4177      the method clock have been planted in a few other strategic places
4178 4178      too. The initial reason for the clock method was to avoid calling
4179 4179      vtime too regularly, as it is quite expensive. So, once it was in
4180 4180      place, it was natural to piggy-back all the other conditions on it
4181 4181      too and not constantly check them throughout the code.
4182 4182  
4183 4183      If do_termination is true then do_marking_step will enter its
4184 4184      termination protocol.
4185 4185  
4186 4186      The value of is_serial must be true when do_marking_step is being
4187 4187      called serially (i.e. by the VMThread) and do_marking_step should
4188 4188      skip any synchronization in the termination and overflow code.
4189 4189      Examples include the serial remark code and the serial reference
4190 4190      processing closures.
4191 4191  
4192 4192      The value of is_serial must be false when do_marking_step is
4193 4193      being called by any of the worker threads in a work gang.
4194 4194      Examples include the concurrent marking code (CMMarkingTask),
4195 4195      the MT remark code, and the MT reference processing closures.
4196 4196  
4197 4197   *****************************************************************************/
4198 4198  
4199 4199  void CMTask::do_marking_step(double time_target_ms,
4200 4200                               bool do_termination,
4201 4201                               bool is_serial) {
4202 4202    assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4203 4203    assert(concurrent() == _cm->concurrent(), "they should be the same");
4204 4204  
4205 4205    G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4206 4206    assert(_task_queues != NULL, "invariant");
4207 4207    assert(_task_queue != NULL, "invariant");
4208 4208    assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4209 4209  
4210 4210    assert(!_claimed,
4211 4211           "only one thread should claim this task at any one time");
4212 4212  
4213 4213    // OK, this doesn't safeguard again all possible scenarios, as it is
4214 4214    // possible for two threads to set the _claimed flag at the same
4215 4215    // time. But it is only for debugging purposes anyway and it will
4216 4216    // catch most problems.
4217 4217    _claimed = true;
4218 4218  
4219 4219    _start_time_ms = os::elapsedVTime() * 1000.0;
4220 4220    statsOnly( _interval_start_time_ms = _start_time_ms );
4221 4221  
4222 4222    // If do_stealing is true then do_marking_step will attempt to
4223 4223    // steal work from the other CMTasks. It only makes sense to
4224 4224    // enable stealing when the termination protocol is enabled
4225 4225    // and do_marking_step() is not being called serially.
4226 4226    bool do_stealing = do_termination && !is_serial;
4227 4227  
4228 4228    double diff_prediction_ms =
4229 4229      g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4230 4230    _time_target_ms = time_target_ms - diff_prediction_ms;
4231 4231  
4232 4232    // set up the variables that are used in the work-based scheme to
4233 4233    // call the regular clock method
4234 4234    _words_scanned = 0;
4235 4235    _refs_reached  = 0;
4236 4236    recalculate_limits();
4237 4237  
4238 4238    // clear all flags
4239 4239    clear_has_aborted();
4240 4240    _has_timed_out = false;
4241 4241    _draining_satb_buffers = false;
4242 4242  
4243 4243    ++_calls;
4244 4244  
4245 4245    if (_cm->verbose_low()) {
4246 4246      gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4247 4247                             "target = %1.2lfms >>>>>>>>>>",
4248 4248                             _worker_id, _calls, _time_target_ms);
4249 4249    }
4250 4250  
4251 4251    // Set up the bitmap and oop closures. Anything that uses them is
4252 4252    // eventually called from this method, so it is OK to allocate these
4253 4253    // statically.
4254 4254    CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4255 4255    G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4256 4256    set_cm_oop_closure(&cm_oop_closure);
4257 4257  
4258 4258    if (_cm->has_overflown()) {
4259 4259      // This can happen if the mark stack overflows during a GC pause
4260 4260      // and this task, after a yield point, restarts. We have to abort
4261 4261      // as we need to get into the overflow protocol which happens
4262 4262      // right at the end of this task.
4263 4263      set_has_aborted();
4264 4264    }
4265 4265  
4266 4266    // First drain any available SATB buffers. After this, we will not
4267 4267    // look at SATB buffers before the next invocation of this method.
4268 4268    // If enough completed SATB buffers are queued up, the regular clock
4269 4269    // will abort this task so that it restarts.
4270 4270    drain_satb_buffers();
4271 4271    // ...then partially drain the local queue and the global stack
4272 4272    drain_local_queue(true);
4273 4273    drain_global_stack(true);
4274 4274  
4275 4275    do {
4276 4276      if (!has_aborted() && _curr_region != NULL) {
4277 4277        // This means that we're already holding on to a region.
4278 4278        assert(_finger != NULL, "if region is not NULL, then the finger "
4279 4279               "should not be NULL either");
4280 4280  
4281 4281        // We might have restarted this task after an evacuation pause
4282 4282        // which might have evacuated the region we're holding on to
4283 4283        // underneath our feet. Let's read its limit again to make sure
4284 4284        // that we do not iterate over a region of the heap that
4285 4285        // contains garbage (update_region_limit() will also move
4286 4286        // _finger to the start of the region if it is found empty).
4287 4287        update_region_limit();
4288 4288        // We will start from _finger not from the start of the region,
4289 4289        // as we might be restarting this task after aborting half-way
4290 4290        // through scanning this region. In this case, _finger points to
4291 4291        // the address where we last found a marked object. If this is a
4292 4292        // fresh region, _finger points to start().
4293 4293        MemRegion mr = MemRegion(_finger, _region_limit);
4294 4294  
4295 4295        if (_cm->verbose_low()) {
4296 4296          gclog_or_tty->print_cr("[%u] we're scanning part "
4297 4297                                 "["PTR_FORMAT", "PTR_FORMAT") "
4298 4298                                 "of region "HR_FORMAT,
4299 4299                                 _worker_id, p2i(_finger), p2i(_region_limit),
4300 4300                                 HR_FORMAT_PARAMS(_curr_region));
4301 4301        }
4302 4302  
4303 4303        assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4304 4304               "humongous regions should go around loop once only");
4305 4305  
4306 4306        // Some special cases:
4307 4307        // If the memory region is empty, we can just give up the region.
4308 4308        // If the current region is humongous then we only need to check
4309 4309        // the bitmap for the bit associated with the start of the object,
4310 4310        // scan the object if it's live, and give up the region.
4311 4311        // Otherwise, let's iterate over the bitmap of the part of the region
4312 4312        // that is left.
4313 4313        // If the iteration is successful, give up the region.
4314 4314        if (mr.is_empty()) {
4315 4315          giveup_current_region();
4316 4316          regular_clock_call();
4317 4317        } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4318 4318          if (_nextMarkBitMap->isMarked(mr.start())) {
4319 4319            // The object is marked - apply the closure
4320 4320            BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4321 4321            bitmap_closure.do_bit(offset);
4322 4322          }
4323 4323          // Even if this task aborted while scanning the humongous object
4324 4324          // we can (and should) give up the current region.
4325 4325          giveup_current_region();
4326 4326          regular_clock_call();
4327 4327        } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4328 4328          giveup_current_region();
4329 4329          regular_clock_call();
4330 4330        } else {
4331 4331          assert(has_aborted(), "currently the only way to do so");
4332 4332          // The only way to abort the bitmap iteration is to return
4333 4333          // false from the do_bit() method. However, inside the
4334 4334          // do_bit() method we move the _finger to point to the
4335 4335          // object currently being looked at. So, if we bail out, we
4336 4336          // have definitely set _finger to something non-null.
4337 4337          assert(_finger != NULL, "invariant");
4338 4338  
4339 4339          // Region iteration was actually aborted. So now _finger
4340 4340          // points to the address of the object we last scanned. If we
4341 4341          // leave it there, when we restart this task, we will rescan
4342 4342          // the object. It is easy to avoid this. We move the finger by
4343 4343          // enough to point to the next possible object header (the
4344 4344          // bitmap knows by how much we need to move it as it knows its
4345 4345          // granularity).
4346 4346          assert(_finger < _region_limit, "invariant");
4347 4347          HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4348 4348          // Check if bitmap iteration was aborted while scanning the last object
4349 4349          if (new_finger >= _region_limit) {
4350 4350            giveup_current_region();
4351 4351          } else {
4352 4352            move_finger_to(new_finger);
4353 4353          }
4354 4354        }
4355 4355      }
4356 4356      // At this point we have either completed iterating over the
4357 4357      // region we were holding on to, or we have aborted.
4358 4358  
4359 4359      // We then partially drain the local queue and the global stack.
4360 4360      // (Do we really need this?)
4361 4361      drain_local_queue(true);
4362 4362      drain_global_stack(true);
4363 4363  
4364 4364      // Read the note on the claim_region() method on why it might
4365 4365      // return NULL with potentially more regions available for
4366 4366      // claiming and why we have to check out_of_regions() to determine
4367 4367      // whether we're done or not.
4368 4368      while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4369 4369        // We are going to try to claim a new region. We should have
4370 4370        // given up on the previous one.
4371 4371        // Separated the asserts so that we know which one fires.
4372 4372        assert(_curr_region  == NULL, "invariant");
4373 4373        assert(_finger       == NULL, "invariant");
4374 4374        assert(_region_limit == NULL, "invariant");
4375 4375        if (_cm->verbose_low()) {
4376 4376          gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4377 4377        }
4378 4378        HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4379 4379        if (claimed_region != NULL) {
4380 4380          // Yes, we managed to claim one
4381 4381          statsOnly( ++_regions_claimed );
4382 4382  
4383 4383          if (_cm->verbose_low()) {
4384 4384            gclog_or_tty->print_cr("[%u] we successfully claimed "
4385 4385                                   "region "PTR_FORMAT,
4386 4386                                   _worker_id, p2i(claimed_region));
4387 4387          }
4388 4388  
4389 4389          setup_for_region(claimed_region);
4390 4390          assert(_curr_region == claimed_region, "invariant");
4391 4391        }
4392 4392        // It is important to call the regular clock here. It might take
4393 4393        // a while to claim a region if, for example, we hit a large
4394 4394        // block of empty regions. So we need to call the regular clock
4395 4395        // method once round the loop to make sure it's called
4396 4396        // frequently enough.
4397 4397        regular_clock_call();
4398 4398      }
4399 4399  
4400 4400      if (!has_aborted() && _curr_region == NULL) {
4401 4401        assert(_cm->out_of_regions(),
4402 4402               "at this point we should be out of regions");
4403 4403      }
4404 4404    } while ( _curr_region != NULL && !has_aborted());
4405 4405  
4406 4406    if (!has_aborted()) {
4407 4407      // We cannot check whether the global stack is empty, since other
4408 4408      // tasks might be pushing objects to it concurrently.
4409 4409      assert(_cm->out_of_regions(),
4410 4410             "at this point we should be out of regions");
4411 4411  
4412 4412      if (_cm->verbose_low()) {
4413 4413        gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4414 4414      }
4415 4415  
4416 4416      // Try to reduce the number of available SATB buffers so that
4417 4417      // remark has less work to do.
4418 4418      drain_satb_buffers();
4419 4419    }
4420 4420  
4421 4421    // Since we've done everything else, we can now totally drain the
4422 4422    // local queue and global stack.
4423 4423    drain_local_queue(false);
4424 4424    drain_global_stack(false);
4425 4425  
4426 4426    // Attempt at work stealing from other task's queues.
4427 4427    if (do_stealing && !has_aborted()) {
4428 4428      // We have not aborted. This means that we have finished all that
4429 4429      // we could. Let's try to do some stealing...
4430 4430  
4431 4431      // We cannot check whether the global stack is empty, since other
4432 4432      // tasks might be pushing objects to it concurrently.
4433 4433      assert(_cm->out_of_regions() && _task_queue->size() == 0,
4434 4434             "only way to reach here");
4435 4435  
4436 4436      if (_cm->verbose_low()) {
4437 4437        gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4438 4438      }
4439 4439  
4440 4440      while (!has_aborted()) {
4441 4441        oop obj;
4442 4442        statsOnly( ++_steal_attempts );
4443 4443  
4444 4444        if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4445 4445          if (_cm->verbose_medium()) {
4446 4446            gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4447 4447                                   _worker_id, p2i((void*) obj));
4448 4448          }
4449 4449  
4450 4450          statsOnly( ++_steals );
4451 4451  
4452 4452          assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4453 4453                 "any stolen object should be marked");
4454 4454          scan_object(obj);
4455 4455  
4456 4456          // And since we're towards the end, let's totally drain the
4457 4457          // local queue and global stack.
4458 4458          drain_local_queue(false);
4459 4459          drain_global_stack(false);
4460 4460        } else {
4461 4461          break;
4462 4462        }
4463 4463      }
4464 4464    }
4465 4465  
4466 4466    // If we are about to wrap up and go into termination, check if we
4467 4467    // should raise the overflow flag.
4468 4468    if (do_termination && !has_aborted()) {
4469 4469      if (_cm->force_overflow()->should_force()) {
4470 4470        _cm->set_has_overflown();
4471 4471        regular_clock_call();
4472 4472      }
4473 4473    }
4474 4474  
4475 4475    // We still haven't aborted. Now, let's try to get into the
4476 4476    // termination protocol.
4477 4477    if (do_termination && !has_aborted()) {
4478 4478      // We cannot check whether the global stack is empty, since other
4479 4479      // tasks might be concurrently pushing objects on it.
4480 4480      // Separated the asserts so that we know which one fires.
4481 4481      assert(_cm->out_of_regions(), "only way to reach here");
4482 4482      assert(_task_queue->size() == 0, "only way to reach here");
4483 4483  
4484 4484      if (_cm->verbose_low()) {
4485 4485        gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4486 4486      }
4487 4487  
4488 4488      _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4489 4489  
4490 4490      // The CMTask class also extends the TerminatorTerminator class,
4491 4491      // hence its should_exit_termination() method will also decide
4492 4492      // whether to exit the termination protocol or not.
4493 4493      bool finished = (is_serial ||
4494 4494                       _cm->terminator()->offer_termination(this));
4495 4495      double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4496 4496      _termination_time_ms +=
4497 4497        termination_end_time_ms - _termination_start_time_ms;
4498 4498  
4499 4499      if (finished) {
4500 4500        // We're all done.
4501 4501  
4502 4502        if (_worker_id == 0) {
4503 4503          // let's allow task 0 to do this
4504 4504          if (concurrent()) {
4505 4505            assert(_cm->concurrent_marking_in_progress(), "invariant");
4506 4506            // we need to set this to false before the next
4507 4507            // safepoint. This way we ensure that the marking phase
4508 4508            // doesn't observe any more heap expansions.
4509 4509            _cm->clear_concurrent_marking_in_progress();
4510 4510          }
4511 4511        }
4512 4512  
4513 4513        // We can now guarantee that the global stack is empty, since
4514 4514        // all other tasks have finished. We separated the guarantees so
4515 4515        // that, if a condition is false, we can immediately find out
4516 4516        // which one.
4517 4517        guarantee(_cm->out_of_regions(), "only way to reach here");
4518 4518        guarantee(_cm->mark_stack_empty(), "only way to reach here");
4519 4519        guarantee(_task_queue->size() == 0, "only way to reach here");
4520 4520        guarantee(!_cm->has_overflown(), "only way to reach here");
4521 4521        guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4522 4522  
4523 4523        if (_cm->verbose_low()) {
4524 4524          gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4525 4525        }
4526 4526      } else {
4527 4527        // Apparently there's more work to do. Let's abort this task. It
4528 4528        // will restart it and we can hopefully find more things to do.
4529 4529  
4530 4530        if (_cm->verbose_low()) {
4531 4531          gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4532 4532                                 _worker_id);
4533 4533        }
4534 4534  
4535 4535        set_has_aborted();
4536 4536        statsOnly( ++_aborted_termination );
4537 4537      }
4538 4538    }
4539 4539  
4540 4540    // Mainly for debugging purposes to make sure that a pointer to the
4541 4541    // closure which was statically allocated in this frame doesn't
4542 4542    // escape it by accident.
4543 4543    set_cm_oop_closure(NULL);
4544 4544    double end_time_ms = os::elapsedVTime() * 1000.0;
4545 4545    double elapsed_time_ms = end_time_ms - _start_time_ms;
4546 4546    // Update the step history.
4547 4547    _step_times_ms.add(elapsed_time_ms);
4548 4548  
4549 4549    if (has_aborted()) {
4550 4550      // The task was aborted for some reason.
4551 4551  
4552 4552      statsOnly( ++_aborted );
4553 4553  
4554 4554      if (_has_timed_out) {
4555 4555        double diff_ms = elapsed_time_ms - _time_target_ms;
4556 4556        // Keep statistics of how well we did with respect to hitting
4557 4557        // our target only if we actually timed out (if we aborted for
4558 4558        // other reasons, then the results might get skewed).
4559 4559        _marking_step_diffs_ms.add(diff_ms);
4560 4560      }
4561 4561  
4562 4562      if (_cm->has_overflown()) {
4563 4563        // This is the interesting one. We aborted because a global
4564 4564        // overflow was raised. This means we have to restart the
4565 4565        // marking phase and start iterating over regions. However, in
4566 4566        // order to do this we have to make sure that all tasks stop
4567 4567        // what they are doing and re-initialise in a safe manner. We
4568 4568        // will achieve this with the use of two barrier sync points.
4569 4569  
4570 4570        if (_cm->verbose_low()) {
4571 4571          gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4572 4572        }
4573 4573  
4574 4574        if (!is_serial) {
4575 4575          // We only need to enter the sync barrier if being called
4576 4576          // from a parallel context
4577 4577          _cm->enter_first_sync_barrier(_worker_id);
4578 4578  
4579 4579          // When we exit this sync barrier we know that all tasks have
4580 4580          // stopped doing marking work. So, it's now safe to
4581 4581          // re-initialise our data structures. At the end of this method,
4582 4582          // task 0 will clear the global data structures.
4583 4583        }
4584 4584  
4585 4585        statsOnly( ++_aborted_overflow );
4586 4586  
4587 4587        // We clear the local state of this task...
4588 4588        clear_region_fields();
4589 4589  
4590 4590        if (!is_serial) {
4591 4591          // ...and enter the second barrier.
4592 4592          _cm->enter_second_sync_barrier(_worker_id);
4593 4593        }
4594 4594        // At this point, if we're during the concurrent phase of
4595 4595        // marking, everything has been re-initialized and we're
4596 4596        // ready to restart.
4597 4597      }
4598 4598  
4599 4599      if (_cm->verbose_low()) {
4600 4600        gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4601 4601                               "elapsed = %1.2lfms <<<<<<<<<<",
4602 4602                               _worker_id, _time_target_ms, elapsed_time_ms);
4603 4603        if (_cm->has_aborted()) {
4604 4604          gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4605 4605                                 _worker_id);
4606 4606        }
4607 4607      }
4608 4608    } else {
4609 4609      if (_cm->verbose_low()) {
4610 4610        gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4611 4611                               "elapsed = %1.2lfms <<<<<<<<<<",
4612 4612                               _worker_id, _time_target_ms, elapsed_time_ms);
4613 4613      }
4614 4614    }
4615 4615  
4616 4616    _claimed = false;
4617 4617  }
4618 4618  
4619 4619  CMTask::CMTask(uint worker_id,
4620 4620                 ConcurrentMark* cm,
4621 4621                 size_t* marked_bytes,
4622 4622                 BitMap* card_bm,
4623 4623                 CMTaskQueue* task_queue,
4624 4624                 CMTaskQueueSet* task_queues)
4625 4625    : _g1h(G1CollectedHeap::heap()),
4626 4626      _worker_id(worker_id), _cm(cm),
4627 4627      _claimed(false),
4628 4628      _nextMarkBitMap(NULL), _hash_seed(17),
4629 4629      _task_queue(task_queue),
4630 4630      _task_queues(task_queues),
4631 4631      _cm_oop_closure(NULL),
4632 4632      _marked_bytes_array(marked_bytes),
4633 4633      _card_bm(card_bm) {
4634 4634    guarantee(task_queue != NULL, "invariant");
4635 4635    guarantee(task_queues != NULL, "invariant");
4636 4636  
4637 4637    statsOnly( _clock_due_to_scanning = 0;
4638 4638               _clock_due_to_marking  = 0 );
4639 4639  
4640 4640    _marking_step_diffs_ms.add(0.5);
4641 4641  }
4642 4642  
4643 4643  // These are formatting macros that are used below to ensure
4644 4644  // consistent formatting. The *_H_* versions are used to format the
4645 4645  // header for a particular value and they should be kept consistent
4646 4646  // with the corresponding macro. Also note that most of the macros add
4647 4647  // the necessary white space (as a prefix) which makes them a bit
4648 4648  // easier to compose.
4649 4649  
4650 4650  // All the output lines are prefixed with this string to be able to
4651 4651  // identify them easily in a large log file.
4652 4652  #define G1PPRL_LINE_PREFIX            "###"
4653 4653  
4654 4654  #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4655 4655  #ifdef _LP64
4656 4656  #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4657 4657  #else // _LP64
4658 4658  #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4659 4659  #endif // _LP64
4660 4660  
4661 4661  // For per-region info
4662 4662  #define G1PPRL_TYPE_FORMAT            "   %-4s"
4663 4663  #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4664 4664  #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4665 4665  #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4666 4666  #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4667 4667  #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4668 4668  
4669 4669  // For summary info
4670 4670  #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4671 4671  #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4672 4672  #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4673 4673  #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4674 4674  
4675 4675  G1PrintRegionLivenessInfoClosure::
4676 4676  G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4677 4677    : _out(out),
4678 4678      _total_used_bytes(0), _total_capacity_bytes(0),
4679 4679      _total_prev_live_bytes(0), _total_next_live_bytes(0),
4680 4680      _hum_used_bytes(0), _hum_capacity_bytes(0),
4681 4681      _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4682 4682      _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4683 4683    G1CollectedHeap* g1h = G1CollectedHeap::heap();
4684 4684    MemRegion g1_reserved = g1h->g1_reserved();
4685 4685    double now = os::elapsedTime();
4686 4686  
4687 4687    // Print the header of the output.
4688 4688    _out->cr();
4689 4689    _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4690 4690    _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4691 4691                   G1PPRL_SUM_ADDR_FORMAT("reserved")
4692 4692                   G1PPRL_SUM_BYTE_FORMAT("region-size"),
4693 4693                   p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4694 4694                   HeapRegion::GrainBytes);
4695 4695    _out->print_cr(G1PPRL_LINE_PREFIX);
4696 4696    _out->print_cr(G1PPRL_LINE_PREFIX
4697 4697                  G1PPRL_TYPE_H_FORMAT
4698 4698                  G1PPRL_ADDR_BASE_H_FORMAT
4699 4699                  G1PPRL_BYTE_H_FORMAT
4700 4700                  G1PPRL_BYTE_H_FORMAT
4701 4701                  G1PPRL_BYTE_H_FORMAT
4702 4702                  G1PPRL_DOUBLE_H_FORMAT
4703 4703                  G1PPRL_BYTE_H_FORMAT
4704 4704                  G1PPRL_BYTE_H_FORMAT,
4705 4705                  "type", "address-range",
4706 4706                  "used", "prev-live", "next-live", "gc-eff",
4707 4707                  "remset", "code-roots");
4708 4708    _out->print_cr(G1PPRL_LINE_PREFIX
4709 4709                  G1PPRL_TYPE_H_FORMAT
4710 4710                  G1PPRL_ADDR_BASE_H_FORMAT
4711 4711                  G1PPRL_BYTE_H_FORMAT
4712 4712                  G1PPRL_BYTE_H_FORMAT
4713 4713                  G1PPRL_BYTE_H_FORMAT
4714 4714                  G1PPRL_DOUBLE_H_FORMAT
4715 4715                  G1PPRL_BYTE_H_FORMAT
4716 4716                  G1PPRL_BYTE_H_FORMAT,
4717 4717                  "", "",
4718 4718                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4719 4719                  "(bytes)", "(bytes)");
4720 4720  }
4721 4721  
4722 4722  // It takes as a parameter a reference to one of the _hum_* fields, it
4723 4723  // deduces the corresponding value for a region in a humongous region
4724 4724  // series (either the region size, or what's left if the _hum_* field
4725 4725  // is < the region size), and updates the _hum_* field accordingly.
4726 4726  size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4727 4727    size_t bytes = 0;
4728 4728    // The > 0 check is to deal with the prev and next live bytes which
4729 4729    // could be 0.
4730 4730    if (*hum_bytes > 0) {
4731 4731      bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4732 4732      *hum_bytes -= bytes;
4733 4733    }
4734 4734    return bytes;
4735 4735  }
4736 4736  
4737 4737  // It deduces the values for a region in a humongous region series
4738 4738  // from the _hum_* fields and updates those accordingly. It assumes
4739 4739  // that that _hum_* fields have already been set up from the "starts
4740 4740  // humongous" region and we visit the regions in address order.
4741 4741  void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4742 4742                                                       size_t* capacity_bytes,
4743 4743                                                       size_t* prev_live_bytes,
4744 4744                                                       size_t* next_live_bytes) {
4745 4745    assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4746 4746    *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4747 4747    *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4748 4748    *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4749 4749    *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4750 4750  }
4751 4751  
4752 4752  bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4753 4753    const char* type       = r->get_type_str();
4754 4754    HeapWord* bottom       = r->bottom();
4755 4755    HeapWord* end          = r->end();
4756 4756    size_t capacity_bytes  = r->capacity();
4757 4757    size_t used_bytes      = r->used();
4758 4758    size_t prev_live_bytes = r->live_bytes();
4759 4759    size_t next_live_bytes = r->next_live_bytes();
4760 4760    double gc_eff          = r->gc_efficiency();
4761 4761    size_t remset_bytes    = r->rem_set()->mem_size();
4762 4762    size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4763 4763  
4764 4764    if (r->startsHumongous()) {
4765 4765      assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4766 4766             _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4767 4767             "they should have been zeroed after the last time we used them");
4768 4768      // Set up the _hum_* fields.
4769 4769      _hum_capacity_bytes  = capacity_bytes;
4770 4770      _hum_used_bytes      = used_bytes;
4771 4771      _hum_prev_live_bytes = prev_live_bytes;
4772 4772      _hum_next_live_bytes = next_live_bytes;
4773 4773      get_hum_bytes(&used_bytes, &capacity_bytes,
4774 4774                    &prev_live_bytes, &next_live_bytes);
4775 4775      end = bottom + HeapRegion::GrainWords;
4776 4776    } else if (r->continuesHumongous()) {
4777 4777      get_hum_bytes(&used_bytes, &capacity_bytes,
4778 4778                    &prev_live_bytes, &next_live_bytes);
4779 4779      assert(end == bottom + HeapRegion::GrainWords, "invariant");
4780 4780    }
4781 4781  
4782 4782    _total_used_bytes      += used_bytes;
4783 4783    _total_capacity_bytes  += capacity_bytes;
4784 4784    _total_prev_live_bytes += prev_live_bytes;
4785 4785    _total_next_live_bytes += next_live_bytes;
4786 4786    _total_remset_bytes    += remset_bytes;
4787 4787    _total_strong_code_roots_bytes += strong_code_roots_bytes;
4788 4788  
4789 4789    // Print a line for this particular region.
4790 4790    _out->print_cr(G1PPRL_LINE_PREFIX
4791 4791                   G1PPRL_TYPE_FORMAT
4792 4792                   G1PPRL_ADDR_BASE_FORMAT
4793 4793                   G1PPRL_BYTE_FORMAT
4794 4794                   G1PPRL_BYTE_FORMAT
4795 4795                   G1PPRL_BYTE_FORMAT
4796 4796                   G1PPRL_DOUBLE_FORMAT
4797 4797                   G1PPRL_BYTE_FORMAT
4798 4798                   G1PPRL_BYTE_FORMAT,
4799 4799                   type, p2i(bottom), p2i(end),
4800 4800                   used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4801 4801                   remset_bytes, strong_code_roots_bytes);
4802 4802  
4803 4803    return false;
4804 4804  }
4805 4805  
4806 4806  G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4807 4807    // add static memory usages to remembered set sizes
4808 4808    _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4809 4809    // Print the footer of the output.
4810 4810    _out->print_cr(G1PPRL_LINE_PREFIX);
4811 4811    _out->print_cr(G1PPRL_LINE_PREFIX
4812 4812                   " SUMMARY"
4813 4813                   G1PPRL_SUM_MB_FORMAT("capacity")
4814 4814                   G1PPRL_SUM_MB_PERC_FORMAT("used")
4815 4815                   G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4816 4816                   G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4817 4817                   G1PPRL_SUM_MB_FORMAT("remset")
4818 4818                   G1PPRL_SUM_MB_FORMAT("code-roots"),
4819 4819                   bytes_to_mb(_total_capacity_bytes),
4820 4820                   bytes_to_mb(_total_used_bytes),
4821 4821                   perc(_total_used_bytes, _total_capacity_bytes),
4822 4822                   bytes_to_mb(_total_prev_live_bytes),
4823 4823                   perc(_total_prev_live_bytes, _total_capacity_bytes),
4824 4824                   bytes_to_mb(_total_next_live_bytes),
4825 4825                   perc(_total_next_live_bytes, _total_capacity_bytes),
4826 4826                   bytes_to_mb(_total_remset_bytes),
4827 4827                   bytes_to_mb(_total_strong_code_roots_bytes));
4828 4828    _out->cr();
4829 4829  }

↓ open down ↓

1470 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX