hotspot Wdiff src/share/vm/gc_implementation/g1/concurrentMark.cpp

Print this page

rev 6875 : 8056240: Investigate increased GC remark time after class unloading changes in CRM Fuse
Reviewed-by: mgerdin, coleenp, bdelsart

Split	Split	Close
Expand all
Collapse all

          --- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
          +++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1    1  /*
   2    2   * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version

↓ open down ↓

15 lines elided

↑ open up ↑

  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
       26 +#include "classfile/metadataOnStackMark.hpp"
  26   27  #include "classfile/symbolTable.hpp"
  27   28  #include "code/codeCache.hpp"
  28   29  #include "gc_implementation/g1/concurrentMark.inline.hpp"
  29   30  #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  30   31  #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  31   32  #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  32   33  #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  33   34  #include "gc_implementation/g1/g1Log.hpp"
  34   35  #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  35   36  #include "gc_implementation/g1/g1RemSet.hpp"

  36   37  #include "gc_implementation/g1/heapRegion.inline.hpp"
  37   38  #include "gc_implementation/g1/heapRegionManager.inline.hpp"
  38   39  #include "gc_implementation/g1/heapRegionRemSet.hpp"
  39   40  #include "gc_implementation/g1/heapRegionSet.inline.hpp"
  40   41  #include "gc_implementation/shared/vmGCOperations.hpp"
  41   42  #include "gc_implementation/shared/gcTimer.hpp"
  42   43  #include "gc_implementation/shared/gcTrace.hpp"
  43   44  #include "gc_implementation/shared/gcTraceTime.hpp"
  44   45  #include "memory/allocation.hpp"
  45   46  #include "memory/genOopClosures.inline.hpp"
  46   47  #include "memory/referencePolicy.hpp"
  47   48  #include "memory/resourceArea.hpp"
  48   49  #include "oops/oop.inline.hpp"
  49   50  #include "runtime/handles.inline.hpp"
  50   51  #include "runtime/java.hpp"
  51   52  #include "runtime/prefetch.inline.hpp"
  52   53  #include "services/memTracker.hpp"
  53   54  
  54   55  // Concurrent marking bit map wrapper
  55   56  
  56   57  CMBitMapRO::CMBitMapRO(int shifter) :
  57   58    _bm(),
  58   59    _shifter(shifter) {
  59   60    _bmStartWord = 0;
  60   61    _bmWordSize = 0;
  61   62  }
  62   63  
  63   64  HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  64   65                                                 const HeapWord* limit) const {
  65   66    // First we must round addr *up* to a possible object boundary.
  66   67    addr = (HeapWord*)align_size_up((intptr_t)addr,
  67   68                                    HeapWordSize << _shifter);
  68   69    size_t addrOffset = heapWordToOffset(addr);
  69   70    if (limit == NULL) {
  70   71      limit = _bmStartWord + _bmWordSize;
  71   72    }
  72   73    size_t limitOffset = heapWordToOffset(limit);
  73   74    size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  74   75    HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  75   76    assert(nextAddr >= addr, "get_next_one postcondition");
  76   77    assert(nextAddr == limit || isMarked(nextAddr),
  77   78           "get_next_one postcondition");
  78   79    return nextAddr;
  79   80  }
  80   81  
  81   82  HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  82   83                                                   const HeapWord* limit) const {
  83   84    size_t addrOffset = heapWordToOffset(addr);
  84   85    if (limit == NULL) {
  85   86      limit = _bmStartWord + _bmWordSize;
  86   87    }
  87   88    size_t limitOffset = heapWordToOffset(limit);
  88   89    size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  89   90    HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  90   91    assert(nextAddr >= addr, "get_next_one postcondition");
  91   92    assert(nextAddr == limit || !isMarked(nextAddr),
  92   93           "get_next_one postcondition");
  93   94    return nextAddr;
  94   95  }
  95   96  
  96   97  int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  97   98    assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  98   99    return (int) (diff >> _shifter);
  99  100  }
 100  101  
 101  102  #ifndef PRODUCT
 102  103  bool CMBitMapRO::covers(MemRegion heap_rs) const {
 103  104    // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 104  105    assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 105  106           "size inconsistency");
 106  107    return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 107  108           _bmWordSize  == heap_rs.word_size();
 108  109  }
 109  110  #endif
 110  111  
 111  112  void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 112  113    _bm.print_on_error(st, prefix);
 113  114  }
 114  115  
 115  116  size_t CMBitMap::compute_size(size_t heap_size) {
 116  117    return heap_size / mark_distance();
 117  118  }
 118  119  
 119  120  size_t CMBitMap::mark_distance() {
 120  121    return MinObjAlignmentInBytes * BitsPerByte;
 121  122  }
 122  123  
 123  124  void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 124  125    _bmStartWord = heap.start();
 125  126    _bmWordSize = heap.word_size();
 126  127  
 127  128    _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 128  129    _bm.set_size(_bmWordSize >> _shifter);
 129  130  
 130  131    storage->set_mapping_changed_listener(&_listener);
 131  132  }
 132  133  
 133  134  void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 134  135    if (zero_filled) {
 135  136      return;
 136  137    }
 137  138    // We need to clear the bitmap on commit, removing any existing information.
 138  139    MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 139  140    _bm->clearRange(mr);
 140  141  }
 141  142  
 142  143  // Closure used for clearing the given mark bitmap.
 143  144  class ClearBitmapHRClosure : public HeapRegionClosure {
 144  145   private:
 145  146    ConcurrentMark* _cm;
 146  147    CMBitMap* _bitmap;
 147  148    bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 148  149   public:
 149  150    ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 150  151      assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 151  152    }
 152  153  
 153  154    virtual bool doHeapRegion(HeapRegion* r) {
 154  155      size_t const chunk_size_in_words = M / HeapWordSize;
 155  156  
 156  157      HeapWord* cur = r->bottom();
 157  158      HeapWord* const end = r->end();
 158  159  
 159  160      while (cur < end) {
 160  161        MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 161  162        _bitmap->clearRange(mr);
 162  163  
 163  164        cur += chunk_size_in_words;
 164  165  
 165  166        // Abort iteration if after yielding the marking has been aborted.
 166  167        if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 167  168          return true;
 168  169        }
 169  170        // Repeat the asserts from before the start of the closure. We will do them
 170  171        // as asserts here to minimize their overhead on the product. However, we
 171  172        // will have them as guarantees at the beginning / end of the bitmap
 172  173        // clearing to get some checking in the product.
 173  174        assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 174  175        assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 175  176      }
 176  177  
 177  178      return false;
 178  179    }
 179  180  };
 180  181  
 181  182  void CMBitMap::clearAll() {
 182  183    ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 183  184    G1CollectedHeap::heap()->heap_region_iterate(&cl);
 184  185    guarantee(cl.complete(), "Must have completed iteration.");
 185  186    return;
 186  187  }
 187  188  
 188  189  void CMBitMap::markRange(MemRegion mr) {
 189  190    mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 190  191    assert(!mr.is_empty(), "unexpected empty region");
 191  192    assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 192  193            ((HeapWord *) mr.end())),
 193  194           "markRange memory region end is not card aligned");
 194  195    // convert address range into offset range
 195  196    _bm.at_put_range(heapWordToOffset(mr.start()),
 196  197                     heapWordToOffset(mr.end()), true);
 197  198  }
 198  199  
 199  200  void CMBitMap::clearRange(MemRegion mr) {
 200  201    mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 201  202    assert(!mr.is_empty(), "unexpected empty region");
 202  203    // convert address range into offset range
 203  204    _bm.at_put_range(heapWordToOffset(mr.start()),
 204  205                     heapWordToOffset(mr.end()), false);
 205  206  }
 206  207  
 207  208  MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 208  209                                              HeapWord* end_addr) {
 209  210    HeapWord* start = getNextMarkedWordAddress(addr);
 210  211    start = MIN2(start, end_addr);
 211  212    HeapWord* end   = getNextUnmarkedWordAddress(start);
 212  213    end = MIN2(end, end_addr);
 213  214    assert(start <= end, "Consistency check");
 214  215    MemRegion mr(start, end);
 215  216    if (!mr.is_empty()) {
 216  217      clearRange(mr);
 217  218    }
 218  219    return mr;
 219  220  }
 220  221  
 221  222  CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 222  223    _base(NULL), _cm(cm)
 223  224  #ifdef ASSERT
 224  225    , _drain_in_progress(false)
 225  226    , _drain_in_progress_yields(false)
 226  227  #endif
 227  228  {}
 228  229  
 229  230  bool CMMarkStack::allocate(size_t capacity) {
 230  231    // allocate a stack of the requisite depth
 231  232    ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 232  233    if (!rs.is_reserved()) {
 233  234      warning("ConcurrentMark MarkStack allocation failure");
 234  235      return false;
 235  236    }
 236  237    MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 237  238    if (!_virtual_space.initialize(rs, rs.size())) {
 238  239      warning("ConcurrentMark MarkStack backing store failure");
 239  240      // Release the virtual memory reserved for the marking stack
 240  241      rs.release();
 241  242      return false;
 242  243    }
 243  244    assert(_virtual_space.committed_size() == rs.size(),
 244  245           "Didn't reserve backing store for all of ConcurrentMark stack?");
 245  246    _base = (oop*) _virtual_space.low();
 246  247    setEmpty();
 247  248    _capacity = (jint) capacity;
 248  249    _saved_index = -1;
 249  250    _should_expand = false;
 250  251    NOT_PRODUCT(_max_depth = 0);
 251  252    return true;
 252  253  }
 253  254  
 254  255  void CMMarkStack::expand() {
 255  256    // Called, during remark, if we've overflown the marking stack during marking.
 256  257    assert(isEmpty(), "stack should been emptied while handling overflow");
 257  258    assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 258  259    // Clear expansion flag
 259  260    _should_expand = false;
 260  261    if (_capacity == (jint) MarkStackSizeMax) {
 261  262      if (PrintGCDetails && Verbose) {
 262  263        gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 263  264      }
 264  265      return;
 265  266    }
 266  267    // Double capacity if possible
 267  268    jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 268  269    // Do not give up existing stack until we have managed to
 269  270    // get the double capacity that we desired.
 270  271    ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 271  272                                                             sizeof(oop)));
 272  273    if (rs.is_reserved()) {
 273  274      // Release the backing store associated with old stack
 274  275      _virtual_space.release();
 275  276      // Reinitialize virtual space for new stack
 276  277      if (!_virtual_space.initialize(rs, rs.size())) {
 277  278        fatal("Not enough swap for expanded marking stack capacity");
 278  279      }
 279  280      _base = (oop*)(_virtual_space.low());
 280  281      _index = 0;
 281  282      _capacity = new_capacity;
 282  283    } else {
 283  284      if (PrintGCDetails && Verbose) {
 284  285        // Failed to double capacity, continue;
 285  286        gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 286  287                            SIZE_FORMAT"K to " SIZE_FORMAT"K",
 287  288                            _capacity / K, new_capacity / K);
 288  289      }
 289  290    }
 290  291  }
 291  292  
 292  293  void CMMarkStack::set_should_expand() {
 293  294    // If we're resetting the marking state because of an
 294  295    // marking stack overflow, record that we should, if
 295  296    // possible, expand the stack.
 296  297    _should_expand = _cm->has_overflown();
 297  298  }
 298  299  
 299  300  CMMarkStack::~CMMarkStack() {
 300  301    if (_base != NULL) {
 301  302      _base = NULL;
 302  303      _virtual_space.release();
 303  304    }
 304  305  }
 305  306  
 306  307  void CMMarkStack::par_push(oop ptr) {
 307  308    while (true) {
 308  309      if (isFull()) {
 309  310        _overflow = true;
 310  311        return;
 311  312      }
 312  313      // Otherwise...
 313  314      jint index = _index;
 314  315      jint next_index = index+1;
 315  316      jint res = Atomic::cmpxchg(next_index, &_index, index);
 316  317      if (res == index) {
 317  318        _base[index] = ptr;
 318  319        // Note that we don't maintain this atomically.  We could, but it
 319  320        // doesn't seem necessary.
 320  321        NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 321  322        return;
 322  323      }
 323  324      // Otherwise, we need to try again.
 324  325    }
 325  326  }
 326  327  
 327  328  void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 328  329    while (true) {
 329  330      if (isFull()) {
 330  331        _overflow = true;
 331  332        return;
 332  333      }
 333  334      // Otherwise...
 334  335      jint index = _index;
 335  336      jint next_index = index + n;
 336  337      if (next_index > _capacity) {
 337  338        _overflow = true;
 338  339        return;
 339  340      }
 340  341      jint res = Atomic::cmpxchg(next_index, &_index, index);
 341  342      if (res == index) {
 342  343        for (int i = 0; i < n; i++) {
 343  344          int  ind = index + i;
 344  345          assert(ind < _capacity, "By overflow test above.");
 345  346          _base[ind] = ptr_arr[i];
 346  347        }
 347  348        NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 348  349        return;
 349  350      }
 350  351      // Otherwise, we need to try again.
 351  352    }
 352  353  }
 353  354  
 354  355  void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 355  356    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 356  357    jint start = _index;
 357  358    jint next_index = start + n;
 358  359    if (next_index > _capacity) {
 359  360      _overflow = true;
 360  361      return;
 361  362    }
 362  363    // Otherwise.
 363  364    _index = next_index;
 364  365    for (int i = 0; i < n; i++) {
 365  366      int ind = start + i;
 366  367      assert(ind < _capacity, "By overflow test above.");
 367  368      _base[ind] = ptr_arr[i];
 368  369    }
 369  370    NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 370  371  }
 371  372  
 372  373  bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 373  374    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 374  375    jint index = _index;
 375  376    if (index == 0) {
 376  377      *n = 0;
 377  378      return false;
 378  379    } else {
 379  380      int k = MIN2(max, index);
 380  381      jint  new_ind = index - k;
 381  382      for (int j = 0; j < k; j++) {
 382  383        ptr_arr[j] = _base[new_ind + j];
 383  384      }
 384  385      _index = new_ind;
 385  386      *n = k;
 386  387      return true;
 387  388    }
 388  389  }
 389  390  
 390  391  template<class OopClosureClass>
 391  392  bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 392  393    assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 393  394           || SafepointSynchronize::is_at_safepoint(),
 394  395           "Drain recursion must be yield-safe.");
 395  396    bool res = true;
 396  397    debug_only(_drain_in_progress = true);
 397  398    debug_only(_drain_in_progress_yields = yield_after);
 398  399    while (!isEmpty()) {
 399  400      oop newOop = pop();
 400  401      assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 401  402      assert(newOop->is_oop(), "Expected an oop");
 402  403      assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 403  404             "only grey objects on this stack");
 404  405      newOop->oop_iterate(cl);
 405  406      if (yield_after && _cm->do_yield_check()) {
 406  407        res = false;
 407  408        break;
 408  409      }
 409  410    }
 410  411    debug_only(_drain_in_progress = false);
 411  412    return res;
 412  413  }
 413  414  
 414  415  void CMMarkStack::note_start_of_gc() {
 415  416    assert(_saved_index == -1,
 416  417           "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 417  418    _saved_index = _index;
 418  419  }
 419  420  
 420  421  void CMMarkStack::note_end_of_gc() {
 421  422    // This is intentionally a guarantee, instead of an assert. If we
 422  423    // accidentally add something to the mark stack during GC, it
 423  424    // will be a correctness issue so it's better if we crash. we'll
 424  425    // only check this once per GC anyway, so it won't be a performance
 425  426    // issue in any way.
 426  427    guarantee(_saved_index == _index,
 427  428              err_msg("saved index: %d index: %d", _saved_index, _index));
 428  429    _saved_index = -1;
 429  430  }
 430  431  
 431  432  void CMMarkStack::oops_do(OopClosure* f) {
 432  433    assert(_saved_index == _index,
 433  434           err_msg("saved index: %d index: %d", _saved_index, _index));
 434  435    for (int i = 0; i < _index; i += 1) {
 435  436      f->do_oop(&_base[i]);
 436  437    }
 437  438  }
 438  439  
 439  440  CMRootRegions::CMRootRegions() :
 440  441    _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 441  442    _should_abort(false),  _next_survivor(NULL) { }
 442  443  
 443  444  void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 444  445    _young_list = g1h->young_list();
 445  446    _cm = cm;
 446  447  }
 447  448  
 448  449  void CMRootRegions::prepare_for_scan() {
 449  450    assert(!scan_in_progress(), "pre-condition");
 450  451  
 451  452    // Currently, only survivors can be root regions.
 452  453    assert(_next_survivor == NULL, "pre-condition");
 453  454    _next_survivor = _young_list->first_survivor_region();
 454  455    _scan_in_progress = (_next_survivor != NULL);
 455  456    _should_abort = false;
 456  457  }
 457  458  
 458  459  HeapRegion* CMRootRegions::claim_next() {
 459  460    if (_should_abort) {
 460  461      // If someone has set the should_abort flag, we return NULL to
 461  462      // force the caller to bail out of their loop.
 462  463      return NULL;
 463  464    }
 464  465  
 465  466    // Currently, only survivors can be root regions.
 466  467    HeapRegion* res = _next_survivor;
 467  468    if (res != NULL) {
 468  469      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 469  470      // Read it again in case it changed while we were waiting for the lock.
 470  471      res = _next_survivor;
 471  472      if (res != NULL) {
 472  473        if (res == _young_list->last_survivor_region()) {
 473  474          // We just claimed the last survivor so store NULL to indicate
 474  475          // that we're done.
 475  476          _next_survivor = NULL;
 476  477        } else {
 477  478          _next_survivor = res->get_next_young_region();
 478  479        }
 479  480      } else {
 480  481        // Someone else claimed the last survivor while we were trying
 481  482        // to take the lock so nothing else to do.
 482  483      }
 483  484    }
 484  485    assert(res == NULL || res->is_survivor(), "post-condition");
 485  486  
 486  487    return res;
 487  488  }
 488  489  
 489  490  void CMRootRegions::scan_finished() {
 490  491    assert(scan_in_progress(), "pre-condition");
 491  492  
 492  493    // Currently, only survivors can be root regions.
 493  494    if (!_should_abort) {
 494  495      assert(_next_survivor == NULL, "we should have claimed all survivors");
 495  496    }
 496  497    _next_survivor = NULL;
 497  498  
 498  499    {
 499  500      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 500  501      _scan_in_progress = false;
 501  502      RootRegionScan_lock->notify_all();
 502  503    }
 503  504  }
 504  505  
 505  506  bool CMRootRegions::wait_until_scan_finished() {
 506  507    if (!scan_in_progress()) return false;
 507  508  
 508  509    {
 509  510      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 510  511      while (scan_in_progress()) {
 511  512        RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 512  513      }
 513  514    }
 514  515    return true;
 515  516  }
 516  517  
 517  518  #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 518  519  #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 519  520  #endif // _MSC_VER
 520  521  
 521  522  uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 522  523    return MAX2((n_par_threads + 2) / 4, 1U);
 523  524  }
 524  525  
 525  526  ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 526  527    _g1h(g1h),
 527  528    _markBitMap1(),
 528  529    _markBitMap2(),
 529  530    _parallel_marking_threads(0),
 530  531    _max_parallel_marking_threads(0),
 531  532    _sleep_factor(0.0),
 532  533    _marking_task_overhead(1.0),
 533  534    _cleanup_sleep_factor(0.0),
 534  535    _cleanup_task_overhead(1.0),
 535  536    _cleanup_list("Cleanup List"),
 536  537    _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 537  538    _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 538  539              CardTableModRefBS::card_shift,
 539  540              false /* in_resource_area*/),
 540  541  
 541  542    _prevMarkBitMap(&_markBitMap1),
 542  543    _nextMarkBitMap(&_markBitMap2),
 543  544  
 544  545    _markStack(this),
 545  546    // _finger set in set_non_marking_state
 546  547  
 547  548    _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 548  549    // _active_tasks set in set_non_marking_state
 549  550    // _tasks set inside the constructor
 550  551    _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 551  552    _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 552  553  
 553  554    _has_overflown(false),
 554  555    _concurrent(false),
 555  556    _has_aborted(false),
 556  557    _aborted_gc_id(GCId::undefined()),
 557  558    _restart_for_overflow(false),
 558  559    _concurrent_marking_in_progress(false),
 559  560  
 560  561    // _verbose_level set below
 561  562  
 562  563    _init_times(),
 563  564    _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 564  565    _cleanup_times(),
 565  566    _total_counting_time(0.0),
 566  567    _total_rs_scrub_time(0.0),
 567  568  
 568  569    _parallel_workers(NULL),
 569  570  
 570  571    _count_card_bitmaps(NULL),
 571  572    _count_marked_bytes(NULL),
 572  573    _completed_initialization(false) {
 573  574    CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 574  575    if (verbose_level < no_verbose) {
 575  576      verbose_level = no_verbose;
 576  577    }
 577  578    if (verbose_level > high_verbose) {
 578  579      verbose_level = high_verbose;
 579  580    }
 580  581    _verbose_level = verbose_level;
 581  582  
 582  583    if (verbose_low()) {
 583  584      gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 584  585                             "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 585  586    }
 586  587  
 587  588    _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 588  589    _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 589  590  
 590  591    // Create & start a ConcurrentMark thread.
 591  592    _cmThread = new ConcurrentMarkThread(this);
 592  593    assert(cmThread() != NULL, "CM Thread should have been created");
 593  594    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 594  595    if (_cmThread->osthread() == NULL) {
 595  596        vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 596  597    }
 597  598  
 598  599    assert(CGC_lock != NULL, "Where's the CGC_lock?");
 599  600    assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 600  601    assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 601  602  
 602  603    SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 603  604    satb_qs.set_buffer_size(G1SATBBufferSize);
 604  605  
 605  606    _root_regions.init(_g1h, this);
 606  607  
 607  608    if (ConcGCThreads > ParallelGCThreads) {
 608  609      warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 609  610              "than ParallelGCThreads (" UINTX_FORMAT ").",
 610  611              ConcGCThreads, ParallelGCThreads);
 611  612      return;
 612  613    }
 613  614    if (ParallelGCThreads == 0) {
 614  615      // if we are not running with any parallel GC threads we will not
 615  616      // spawn any marking threads either
 616  617      _parallel_marking_threads =       0;
 617  618      _max_parallel_marking_threads =   0;
 618  619      _sleep_factor             =     0.0;
 619  620      _marking_task_overhead    =     1.0;
 620  621    } else {
 621  622      if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 622  623        // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 623  624        // if both are set
 624  625        _sleep_factor             = 0.0;
 625  626        _marking_task_overhead    = 1.0;
 626  627      } else if (G1MarkingOverheadPercent > 0) {
 627  628        // We will calculate the number of parallel marking threads based
 628  629        // on a target overhead with respect to the soft real-time goal
 629  630        double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 630  631        double overall_cm_overhead =
 631  632          (double) MaxGCPauseMillis * marking_overhead /
 632  633          (double) GCPauseIntervalMillis;
 633  634        double cpu_ratio = 1.0 / (double) os::processor_count();
 634  635        double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 635  636        double marking_task_overhead =
 636  637          overall_cm_overhead / marking_thread_num *
 637  638                                                  (double) os::processor_count();
 638  639        double sleep_factor =
 639  640                           (1.0 - marking_task_overhead) / marking_task_overhead;
 640  641  
 641  642        FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 642  643        _sleep_factor             = sleep_factor;
 643  644        _marking_task_overhead    = marking_task_overhead;
 644  645      } else {
 645  646        // Calculate the number of parallel marking threads by scaling
 646  647        // the number of parallel GC threads.
 647  648        uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 648  649        FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 649  650        _sleep_factor             = 0.0;
 650  651        _marking_task_overhead    = 1.0;
 651  652      }
 652  653  
 653  654      assert(ConcGCThreads > 0, "Should have been set");
 654  655      _parallel_marking_threads = (uint) ConcGCThreads;
 655  656      _max_parallel_marking_threads = _parallel_marking_threads;
 656  657  
 657  658      if (parallel_marking_threads() > 1) {
 658  659        _cleanup_task_overhead = 1.0;
 659  660      } else {
 660  661        _cleanup_task_overhead = marking_task_overhead();
 661  662      }
 662  663      _cleanup_sleep_factor =
 663  664                       (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 664  665  
 665  666  #if 0
 666  667      gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 667  668      gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 668  669      gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 669  670      gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 670  671      gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 671  672  #endif
 672  673  
 673  674      guarantee(parallel_marking_threads() > 0, "peace of mind");
 674  675      _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 675  676           _max_parallel_marking_threads, false, true);
 676  677      if (_parallel_workers == NULL) {
 677  678        vm_exit_during_initialization("Failed necessary allocation.");
 678  679      } else {
 679  680        _parallel_workers->initialize_workers();
 680  681      }
 681  682    }
 682  683  
 683  684    if (FLAG_IS_DEFAULT(MarkStackSize)) {
 684  685      uintx mark_stack_size =
 685  686        MIN2(MarkStackSizeMax,
 686  687            MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 687  688      // Verify that the calculated value for MarkStackSize is in range.
 688  689      // It would be nice to use the private utility routine from Arguments.
 689  690      if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 690  691        warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 691  692                "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 692  693                mark_stack_size, (uintx) 1, MarkStackSizeMax);
 693  694        return;
 694  695      }
 695  696      FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 696  697    } else {
 697  698      // Verify MarkStackSize is in range.
 698  699      if (FLAG_IS_CMDLINE(MarkStackSize)) {
 699  700        if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 700  701          if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 701  702            warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 702  703                    "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 703  704                    MarkStackSize, (uintx) 1, MarkStackSizeMax);
 704  705            return;
 705  706          }
 706  707        } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 707  708          if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 708  709            warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 709  710                    " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 710  711                    MarkStackSize, MarkStackSizeMax);
 711  712            return;
 712  713          }
 713  714        }
 714  715      }
 715  716    }
 716  717  
 717  718    if (!_markStack.allocate(MarkStackSize)) {
 718  719      warning("Failed to allocate CM marking stack");
 719  720      return;
 720  721    }
 721  722  
 722  723    _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 723  724    _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 724  725  
 725  726    _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 726  727    _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 727  728  
 728  729    BitMap::idx_t card_bm_size = _card_bm.size();
 729  730  
 730  731    // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 731  732    _active_tasks = _max_worker_id;
 732  733  
 733  734    size_t max_regions = (size_t) _g1h->max_regions();
 734  735    for (uint i = 0; i < _max_worker_id; ++i) {
 735  736      CMTaskQueue* task_queue = new CMTaskQueue();
 736  737      task_queue->initialize();
 737  738      _task_queues->register_queue(i, task_queue);
 738  739  
 739  740      _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 740  741      _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 741  742  
 742  743      _tasks[i] = new CMTask(i, this,
 743  744                             _count_marked_bytes[i],
 744  745                             &_count_card_bitmaps[i],
 745  746                             task_queue, _task_queues);
 746  747  
 747  748      _accum_task_vtime[i] = 0.0;
 748  749    }
 749  750  
 750  751    // Calculate the card number for the bottom of the heap. Used
 751  752    // in biasing indexes into the accounting card bitmaps.
 752  753    _heap_bottom_card_num =
 753  754      intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 754  755                                  CardTableModRefBS::card_shift);
 755  756  
 756  757    // Clear all the liveness counting data
 757  758    clear_all_count_data();
 758  759  
 759  760    // so that the call below can read a sensible value
 760  761    _heap_start = g1h->reserved_region().start();
 761  762    set_non_marking_state();
 762  763    _completed_initialization = true;
 763  764  }
 764  765  
 765  766  void ConcurrentMark::reset() {
 766  767    // Starting values for these two. This should be called in a STW
 767  768    // phase.
 768  769    MemRegion reserved = _g1h->g1_reserved();
 769  770    _heap_start = reserved.start();
 770  771    _heap_end   = reserved.end();
 771  772  
 772  773    // Separated the asserts so that we know which one fires.
 773  774    assert(_heap_start != NULL, "heap bounds should look ok");
 774  775    assert(_heap_end != NULL, "heap bounds should look ok");
 775  776    assert(_heap_start < _heap_end, "heap bounds should look ok");
 776  777  
 777  778    // Reset all the marking data structures and any necessary flags
 778  779    reset_marking_state();
 779  780  
 780  781    if (verbose_low()) {
 781  782      gclog_or_tty->print_cr("[global] resetting");
 782  783    }
 783  784  
 784  785    // We do reset all of them, since different phases will use
 785  786    // different number of active threads. So, it's easiest to have all
 786  787    // of them ready.
 787  788    for (uint i = 0; i < _max_worker_id; ++i) {
 788  789      _tasks[i]->reset(_nextMarkBitMap);
 789  790    }
 790  791  
 791  792    // we need this to make sure that the flag is on during the evac
 792  793    // pause with initial mark piggy-backed
 793  794    set_concurrent_marking_in_progress();
 794  795  }
 795  796  
 796  797  
 797  798  void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 798  799    _markStack.set_should_expand();
 799  800    _markStack.setEmpty();        // Also clears the _markStack overflow flag
 800  801    if (clear_overflow) {
 801  802      clear_has_overflown();
 802  803    } else {
 803  804      assert(has_overflown(), "pre-condition");
 804  805    }
 805  806    _finger = _heap_start;
 806  807  
 807  808    for (uint i = 0; i < _max_worker_id; ++i) {
 808  809      CMTaskQueue* queue = _task_queues->queue(i);
 809  810      queue->set_empty();
 810  811    }
 811  812  }
 812  813  
 813  814  void ConcurrentMark::set_concurrency(uint active_tasks) {
 814  815    assert(active_tasks <= _max_worker_id, "we should not have more");
 815  816  
 816  817    _active_tasks = active_tasks;
 817  818    // Need to update the three data structures below according to the
 818  819    // number of active threads for this phase.
 819  820    _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 820  821    _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 821  822    _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 822  823  }
 823  824  
 824  825  void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 825  826    set_concurrency(active_tasks);
 826  827  
 827  828    _concurrent = concurrent;
 828  829    // We propagate this to all tasks, not just the active ones.
 829  830    for (uint i = 0; i < _max_worker_id; ++i)
 830  831      _tasks[i]->set_concurrent(concurrent);
 831  832  
 832  833    if (concurrent) {
 833  834      set_concurrent_marking_in_progress();
 834  835    } else {
 835  836      // We currently assume that the concurrent flag has been set to
 836  837      // false before we start remark. At this point we should also be
 837  838      // in a STW phase.
 838  839      assert(!concurrent_marking_in_progress(), "invariant");
 839  840      assert(out_of_regions(),
 840  841             err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 841  842                     p2i(_finger), p2i(_heap_end)));
 842  843    }
 843  844  }
 844  845  
 845  846  void ConcurrentMark::set_non_marking_state() {
 846  847    // We set the global marking state to some default values when we're
 847  848    // not doing marking.
 848  849    reset_marking_state();
 849  850    _active_tasks = 0;
 850  851    clear_concurrent_marking_in_progress();
 851  852  }
 852  853  
 853  854  ConcurrentMark::~ConcurrentMark() {
 854  855    // The ConcurrentMark instance is never freed.
 855  856    ShouldNotReachHere();
 856  857  }
 857  858  
 858  859  void ConcurrentMark::clearNextBitmap() {
 859  860    G1CollectedHeap* g1h = G1CollectedHeap::heap();
 860  861  
 861  862    // Make sure that the concurrent mark thread looks to still be in
 862  863    // the current cycle.
 863  864    guarantee(cmThread()->during_cycle(), "invariant");
 864  865  
 865  866    // We are finishing up the current cycle by clearing the next
 866  867    // marking bitmap and getting it ready for the next cycle. During
 867  868    // this time no other cycle can start. So, let's make sure that this
 868  869    // is the case.
 869  870    guarantee(!g1h->mark_in_progress(), "invariant");
 870  871  
 871  872    ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 872  873    g1h->heap_region_iterate(&cl);
 873  874  
 874  875    // Clear the liveness counting data. If the marking has been aborted, the abort()
 875  876    // call already did that.
 876  877    if (cl.complete()) {
 877  878      clear_all_count_data();
 878  879    }
 879  880  
 880  881    // Repeat the asserts from above.
 881  882    guarantee(cmThread()->during_cycle(), "invariant");
 882  883    guarantee(!g1h->mark_in_progress(), "invariant");
 883  884  }
 884  885  
 885  886  class CheckBitmapClearHRClosure : public HeapRegionClosure {
 886  887    CMBitMap* _bitmap;
 887  888    bool _error;
 888  889   public:
 889  890    CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 890  891    }
 891  892  
 892  893    virtual bool doHeapRegion(HeapRegion* r) {
 893  894      // This closure can be called concurrently to the mutator, so we must make sure
 894  895      // that the result of the getNextMarkedWordAddress() call is compared to the
 895  896      // value passed to it as limit to detect any found bits.
 896  897      // We can use the region's orig_end() for the limit and the comparison value
 897  898      // as it always contains the "real" end of the region that never changes and
 898  899      // has no side effects.
 899  900      // Due to the latter, there can also be no problem with the compiler generating
 900  901      // reloads of the orig_end() call.
 901  902      HeapWord* end = r->orig_end();
 902  903      return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 903  904    }
 904  905  };
 905  906  
 906  907  bool ConcurrentMark::nextMarkBitmapIsClear() {
 907  908    CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 908  909    _g1h->heap_region_iterate(&cl);
 909  910    return cl.complete();
 910  911  }
 911  912  
 912  913  class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 913  914  public:
 914  915    bool doHeapRegion(HeapRegion* r) {
 915  916      if (!r->continuesHumongous()) {
 916  917        r->note_start_of_marking();
 917  918      }
 918  919      return false;
 919  920    }
 920  921  };
 921  922  
 922  923  void ConcurrentMark::checkpointRootsInitialPre() {
 923  924    G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 924  925    G1CollectorPolicy* g1p = g1h->g1_policy();
 925  926  
 926  927    _has_aborted = false;
 927  928  
 928  929  #ifndef PRODUCT
 929  930    if (G1PrintReachableAtInitialMark) {
 930  931      print_reachable("at-cycle-start",
 931  932                      VerifyOption_G1UsePrevMarking, true /* all */);
 932  933    }
 933  934  #endif
 934  935  
 935  936    // Initialise marking structures. This has to be done in a STW phase.
 936  937    reset();
 937  938  
 938  939    // For each region note start of marking.
 939  940    NoteStartOfMarkHRClosure startcl;
 940  941    g1h->heap_region_iterate(&startcl);
 941  942  }
 942  943  
 943  944  
 944  945  void ConcurrentMark::checkpointRootsInitialPost() {
 945  946    G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 946  947  
 947  948    // If we force an overflow during remark, the remark operation will
 948  949    // actually abort and we'll restart concurrent marking. If we always
 949  950    // force an oveflow during remark we'll never actually complete the
 950  951    // marking phase. So, we initilize this here, at the start of the
 951  952    // cycle, so that at the remaining overflow number will decrease at
 952  953    // every remark and we'll eventually not need to cause one.
 953  954    force_overflow_stw()->init();
 954  955  
 955  956    // Start Concurrent Marking weak-reference discovery.
 956  957    ReferenceProcessor* rp = g1h->ref_processor_cm();
 957  958    // enable ("weak") refs discovery
 958  959    rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 959  960    rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 960  961  
 961  962    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 962  963    // This is the start of  the marking cycle, we're expected all
 963  964    // threads to have SATB queues with active set to false.
 964  965    satb_mq_set.set_active_all_threads(true, /* new active value */
 965  966                                       false /* expected_active */);
 966  967  
 967  968    _root_regions.prepare_for_scan();
 968  969  
 969  970    // update_g1_committed() will be called at the end of an evac pause
 970  971    // when marking is on. So, it's also called at the end of the
 971  972    // initial-mark pause to update the heap end, if the heap expands
 972  973    // during it. No need to call it here.
 973  974  }
 974  975  
 975  976  /*
 976  977   * Notice that in the next two methods, we actually leave the STS
 977  978   * during the barrier sync and join it immediately afterwards. If we
 978  979   * do not do this, the following deadlock can occur: one thread could
 979  980   * be in the barrier sync code, waiting for the other thread to also
 980  981   * sync up, whereas another one could be trying to yield, while also
 981  982   * waiting for the other threads to sync up too.
 982  983   *
 983  984   * Note, however, that this code is also used during remark and in
 984  985   * this case we should not attempt to leave / enter the STS, otherwise
 985  986   * we'll either hit an asseert (debug / fastdebug) or deadlock
 986  987   * (product). So we should only leave / enter the STS if we are
 987  988   * operating concurrently.
 988  989   *
 989  990   * Because the thread that does the sync barrier has left the STS, it
 990  991   * is possible to be suspended for a Full GC or an evacuation pause
 991  992   * could occur. This is actually safe, since the entering the sync
 992  993   * barrier is one of the last things do_marking_step() does, and it
 993  994   * doesn't manipulate any data structures afterwards.
 994  995   */
 995  996  
 996  997  void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 997  998    if (verbose_low()) {
 998  999      gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 999 1000    }
1000 1001  
1001 1002    if (concurrent()) {
1002 1003      SuspendibleThreadSet::leave();
1003 1004    }
1004 1005  
1005 1006    bool barrier_aborted = !_first_overflow_barrier_sync.enter();
1006 1007  
1007 1008    if (concurrent()) {
1008 1009      SuspendibleThreadSet::join();
1009 1010    }
1010 1011    // at this point everyone should have synced up and not be doing any
1011 1012    // more work
1012 1013  
1013 1014    if (verbose_low()) {
1014 1015      if (barrier_aborted) {
1015 1016        gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1016 1017      } else {
1017 1018        gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1018 1019      }
1019 1020    }
1020 1021  
1021 1022    if (barrier_aborted) {
1022 1023      // If the barrier aborted we ignore the overflow condition and
1023 1024      // just abort the whole marking phase as quickly as possible.
1024 1025      return;
1025 1026    }
1026 1027  
1027 1028    // If we're executing the concurrent phase of marking, reset the marking
1028 1029    // state; otherwise the marking state is reset after reference processing,
1029 1030    // during the remark pause.
1030 1031    // If we reset here as a result of an overflow during the remark we will
1031 1032    // see assertion failures from any subsequent set_concurrency_and_phase()
1032 1033    // calls.
1033 1034    if (concurrent()) {
1034 1035      // let the task associated with with worker 0 do this
1035 1036      if (worker_id == 0) {
1036 1037        // task 0 is responsible for clearing the global data structures
1037 1038        // We should be here because of an overflow. During STW we should
1038 1039        // not clear the overflow flag since we rely on it being true when
1039 1040        // we exit this method to abort the pause and restart concurent
1040 1041        // marking.
1041 1042        reset_marking_state(true /* clear_overflow */);
1042 1043        force_overflow()->update();
1043 1044  
1044 1045        if (G1Log::fine()) {
1045 1046          gclog_or_tty->gclog_stamp(concurrent_gc_id());
1046 1047          gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1047 1048        }
1048 1049      }
1049 1050    }
1050 1051  
1051 1052    // after this, each task should reset its own data structures then
1052 1053    // then go into the second barrier
1053 1054  }
1054 1055  
1055 1056  void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1056 1057    if (verbose_low()) {
1057 1058      gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1058 1059    }
1059 1060  
1060 1061    if (concurrent()) {
1061 1062      SuspendibleThreadSet::leave();
1062 1063    }
1063 1064  
1064 1065    bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1065 1066  
1066 1067    if (concurrent()) {
1067 1068      SuspendibleThreadSet::join();
1068 1069    }
1069 1070    // at this point everything should be re-initialized and ready to go
1070 1071  
1071 1072    if (verbose_low()) {
1072 1073      if (barrier_aborted) {
1073 1074        gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1074 1075      } else {
1075 1076        gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1076 1077      }
1077 1078    }
1078 1079  }
1079 1080  
1080 1081  #ifndef PRODUCT
1081 1082  void ForceOverflowSettings::init() {
1082 1083    _num_remaining = G1ConcMarkForceOverflow;
1083 1084    _force = false;
1084 1085    update();
1085 1086  }
1086 1087  
1087 1088  void ForceOverflowSettings::update() {
1088 1089    if (_num_remaining > 0) {
1089 1090      _num_remaining -= 1;
1090 1091      _force = true;
1091 1092    } else {
1092 1093      _force = false;
1093 1094    }
1094 1095  }
1095 1096  
1096 1097  bool ForceOverflowSettings::should_force() {
1097 1098    if (_force) {
1098 1099      _force = false;
1099 1100      return true;
1100 1101    } else {
1101 1102      return false;
1102 1103    }
1103 1104  }
1104 1105  #endif // !PRODUCT
1105 1106  
1106 1107  class CMConcurrentMarkingTask: public AbstractGangTask {
1107 1108  private:
1108 1109    ConcurrentMark*       _cm;
1109 1110    ConcurrentMarkThread* _cmt;
1110 1111  
1111 1112  public:
1112 1113    void work(uint worker_id) {
1113 1114      assert(Thread::current()->is_ConcurrentGC_thread(),
1114 1115             "this should only be done by a conc GC thread");
1115 1116      ResourceMark rm;
1116 1117  
1117 1118      double start_vtime = os::elapsedVTime();
1118 1119  
1119 1120      SuspendibleThreadSet::join();
1120 1121  
1121 1122      assert(worker_id < _cm->active_tasks(), "invariant");
1122 1123      CMTask* the_task = _cm->task(worker_id);
1123 1124      the_task->record_start_time();
1124 1125      if (!_cm->has_aborted()) {
1125 1126        do {
1126 1127          double start_vtime_sec = os::elapsedVTime();
1127 1128          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1128 1129  
1129 1130          the_task->do_marking_step(mark_step_duration_ms,
1130 1131                                    true  /* do_termination */,
1131 1132                                    false /* is_serial*/);
1132 1133  
1133 1134          double end_vtime_sec = os::elapsedVTime();
1134 1135          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1135 1136          _cm->clear_has_overflown();
1136 1137  
1137 1138          _cm->do_yield_check(worker_id);
1138 1139  
1139 1140          jlong sleep_time_ms;
1140 1141          if (!_cm->has_aborted() && the_task->has_aborted()) {
1141 1142            sleep_time_ms =
1142 1143              (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1143 1144            SuspendibleThreadSet::leave();
1144 1145            os::sleep(Thread::current(), sleep_time_ms, false);
1145 1146            SuspendibleThreadSet::join();
1146 1147          }
1147 1148        } while (!_cm->has_aborted() && the_task->has_aborted());
1148 1149      }
1149 1150      the_task->record_end_time();
1150 1151      guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1151 1152  
1152 1153      SuspendibleThreadSet::leave();
1153 1154  
1154 1155      double end_vtime = os::elapsedVTime();
1155 1156      _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1156 1157    }
1157 1158  
1158 1159    CMConcurrentMarkingTask(ConcurrentMark* cm,
1159 1160                            ConcurrentMarkThread* cmt) :
1160 1161        AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1161 1162  
1162 1163    ~CMConcurrentMarkingTask() { }
1163 1164  };
1164 1165  
1165 1166  // Calculates the number of active workers for a concurrent
1166 1167  // phase.
1167 1168  uint ConcurrentMark::calc_parallel_marking_threads() {
1168 1169    if (G1CollectedHeap::use_parallel_gc_threads()) {
1169 1170      uint n_conc_workers = 0;
1170 1171      if (!UseDynamicNumberOfGCThreads ||
1171 1172          (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1172 1173           !ForceDynamicNumberOfGCThreads)) {
1173 1174        n_conc_workers = max_parallel_marking_threads();
1174 1175      } else {
1175 1176        n_conc_workers =
1176 1177          AdaptiveSizePolicy::calc_default_active_workers(
1177 1178                                       max_parallel_marking_threads(),
1178 1179                                       1, /* Minimum workers */
1179 1180                                       parallel_marking_threads(),
1180 1181                                       Threads::number_of_non_daemon_threads());
1181 1182        // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1182 1183        // that scaling has already gone into "_max_parallel_marking_threads".
1183 1184      }
1184 1185      assert(n_conc_workers > 0, "Always need at least 1");
1185 1186      return n_conc_workers;
1186 1187    }
1187 1188    // If we are not running with any parallel GC threads we will not
1188 1189    // have spawned any marking threads either. Hence the number of
1189 1190    // concurrent workers should be 0.
1190 1191    return 0;
1191 1192  }
1192 1193  
1193 1194  void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1194 1195    // Currently, only survivors can be root regions.
1195 1196    assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1196 1197    G1RootRegionScanClosure cl(_g1h, this, worker_id);
1197 1198  
1198 1199    const uintx interval = PrefetchScanIntervalInBytes;
1199 1200    HeapWord* curr = hr->bottom();
1200 1201    const HeapWord* end = hr->top();
1201 1202    while (curr < end) {
1202 1203      Prefetch::read(curr, interval);
1203 1204      oop obj = oop(curr);
1204 1205      int size = obj->oop_iterate(&cl);
1205 1206      assert(size == obj->size(), "sanity");
1206 1207      curr += size;
1207 1208    }
1208 1209  }
1209 1210  
1210 1211  class CMRootRegionScanTask : public AbstractGangTask {
1211 1212  private:
1212 1213    ConcurrentMark* _cm;
1213 1214  
1214 1215  public:
1215 1216    CMRootRegionScanTask(ConcurrentMark* cm) :
1216 1217      AbstractGangTask("Root Region Scan"), _cm(cm) { }
1217 1218  
1218 1219    void work(uint worker_id) {
1219 1220      assert(Thread::current()->is_ConcurrentGC_thread(),
1220 1221             "this should only be done by a conc GC thread");
1221 1222  
1222 1223      CMRootRegions* root_regions = _cm->root_regions();
1223 1224      HeapRegion* hr = root_regions->claim_next();
1224 1225      while (hr != NULL) {
1225 1226        _cm->scanRootRegion(hr, worker_id);
1226 1227        hr = root_regions->claim_next();
1227 1228      }
1228 1229    }
1229 1230  };
1230 1231  
1231 1232  void ConcurrentMark::scanRootRegions() {
1232 1233    // Start of concurrent marking.
1233 1234    ClassLoaderDataGraph::clear_claimed_marks();
1234 1235  
1235 1236    // scan_in_progress() will have been set to true only if there was
1236 1237    // at least one root region to scan. So, if it's false, we
1237 1238    // should not attempt to do any further work.
1238 1239    if (root_regions()->scan_in_progress()) {
1239 1240      _parallel_marking_threads = calc_parallel_marking_threads();
1240 1241      assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1241 1242             "Maximum number of marking threads exceeded");
1242 1243      uint active_workers = MAX2(1U, parallel_marking_threads());
1243 1244  
1244 1245      CMRootRegionScanTask task(this);
1245 1246      if (use_parallel_marking_threads()) {
1246 1247        _parallel_workers->set_active_workers((int) active_workers);
1247 1248        _parallel_workers->run_task(&task);
1248 1249      } else {
1249 1250        task.work(0);
1250 1251      }
1251 1252  
1252 1253      // It's possible that has_aborted() is true here without actually
1253 1254      // aborting the survivor scan earlier. This is OK as it's
1254 1255      // mainly used for sanity checking.
1255 1256      root_regions()->scan_finished();
1256 1257    }
1257 1258  }
1258 1259  
1259 1260  void ConcurrentMark::markFromRoots() {
1260 1261    // we might be tempted to assert that:
1261 1262    // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1262 1263    //        "inconsistent argument?");
1263 1264    // However that wouldn't be right, because it's possible that
1264 1265    // a safepoint is indeed in progress as a younger generation
1265 1266    // stop-the-world GC happens even as we mark in this generation.
1266 1267  
1267 1268    _restart_for_overflow = false;
1268 1269    force_overflow_conc()->init();
1269 1270  
1270 1271    // _g1h has _n_par_threads
1271 1272    _parallel_marking_threads = calc_parallel_marking_threads();
1272 1273    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1273 1274      "Maximum number of marking threads exceeded");
1274 1275  
1275 1276    uint active_workers = MAX2(1U, parallel_marking_threads());
1276 1277  
1277 1278    // Parallel task terminator is set in "set_concurrency_and_phase()"
1278 1279    set_concurrency_and_phase(active_workers, true /* concurrent */);
1279 1280  
1280 1281    CMConcurrentMarkingTask markingTask(this, cmThread());
1281 1282    if (use_parallel_marking_threads()) {
1282 1283      _parallel_workers->set_active_workers((int)active_workers);
1283 1284      // Don't set _n_par_threads because it affects MT in process_roots()
1284 1285      // and the decisions on that MT processing is made elsewhere.
1285 1286      assert(_parallel_workers->active_workers() > 0, "Should have been set");
1286 1287      _parallel_workers->run_task(&markingTask);
1287 1288    } else {
1288 1289      markingTask.work(0);
1289 1290    }
1290 1291    print_stats();
1291 1292  }
1292 1293  
1293 1294  void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1294 1295    // world is stopped at this checkpoint
1295 1296    assert(SafepointSynchronize::is_at_safepoint(),
1296 1297           "world should be stopped");
1297 1298  
1298 1299    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1299 1300  
1300 1301    // If a full collection has happened, we shouldn't do this.
1301 1302    if (has_aborted()) {
1302 1303      g1h->set_marking_complete(); // So bitmap clearing isn't confused
1303 1304      return;
1304 1305    }
1305 1306  
1306 1307    SvcGCMarker sgcm(SvcGCMarker::OTHER);
1307 1308  
1308 1309    if (VerifyDuringGC) {
1309 1310      HandleMark hm;  // handle scope
1310 1311      Universe::heap()->prepare_for_verify();
1311 1312      Universe::verify(VerifyOption_G1UsePrevMarking,
1312 1313                       " VerifyDuringGC:(before)");
1313 1314    }
1314 1315    g1h->check_bitmaps("Remark Start");
1315 1316  
1316 1317    G1CollectorPolicy* g1p = g1h->g1_policy();
1317 1318    g1p->record_concurrent_mark_remark_start();
1318 1319  
1319 1320    double start = os::elapsedTime();
1320 1321  
1321 1322    checkpointRootsFinalWork();
1322 1323  
1323 1324    double mark_work_end = os::elapsedTime();
1324 1325  
1325 1326    weakRefsWork(clear_all_soft_refs);
1326 1327  
1327 1328    if (has_overflown()) {
1328 1329      // Oops.  We overflowed.  Restart concurrent marking.
1329 1330      _restart_for_overflow = true;
1330 1331      if (G1TraceMarkStackOverflow) {
1331 1332        gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1332 1333      }
1333 1334  
1334 1335      // Verify the heap w.r.t. the previous marking bitmap.
1335 1336      if (VerifyDuringGC) {
1336 1337        HandleMark hm;  // handle scope
1337 1338        Universe::heap()->prepare_for_verify();
1338 1339        Universe::verify(VerifyOption_G1UsePrevMarking,
1339 1340                         " VerifyDuringGC:(overflow)");
1340 1341      }
1341 1342  
1342 1343      // Clear the marking state because we will be restarting
1343 1344      // marking due to overflowing the global mark stack.
1344 1345      reset_marking_state();
1345 1346    } else {
1346 1347      // Aggregate the per-task counting data that we have accumulated
1347 1348      // while marking.
1348 1349      aggregate_count_data();
1349 1350  
1350 1351      SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1351 1352      // We're done with marking.
1352 1353      // This is the end of  the marking cycle, we're expected all
1353 1354      // threads to have SATB queues with active set to true.
1354 1355      satb_mq_set.set_active_all_threads(false, /* new active value */
1355 1356                                         true /* expected_active */);
1356 1357  
1357 1358      if (VerifyDuringGC) {
1358 1359        HandleMark hm;  // handle scope
1359 1360        Universe::heap()->prepare_for_verify();
1360 1361        Universe::verify(VerifyOption_G1UseNextMarking,
1361 1362                         " VerifyDuringGC:(after)");
1362 1363      }
1363 1364      g1h->check_bitmaps("Remark End");
1364 1365      assert(!restart_for_overflow(), "sanity");
1365 1366      // Completely reset the marking state since marking completed
1366 1367      set_non_marking_state();
1367 1368    }
1368 1369  
1369 1370    // Expand the marking stack, if we have to and if we can.
1370 1371    if (_markStack.should_expand()) {
1371 1372      _markStack.expand();
1372 1373    }
1373 1374  
1374 1375    // Statistics
1375 1376    double now = os::elapsedTime();
1376 1377    _remark_mark_times.add((mark_work_end - start) * 1000.0);
1377 1378    _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1378 1379    _remark_times.add((now - start) * 1000.0);
1379 1380  
1380 1381    g1p->record_concurrent_mark_remark_end();
1381 1382  
1382 1383    G1CMIsAliveClosure is_alive(g1h);
1383 1384    g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1384 1385  }
1385 1386  
1386 1387  // Base class of the closures that finalize and verify the
1387 1388  // liveness counting data.
1388 1389  class CMCountDataClosureBase: public HeapRegionClosure {
1389 1390  protected:
1390 1391    G1CollectedHeap* _g1h;
1391 1392    ConcurrentMark* _cm;
1392 1393    CardTableModRefBS* _ct_bs;
1393 1394  
1394 1395    BitMap* _region_bm;
1395 1396    BitMap* _card_bm;
1396 1397  
1397 1398    // Takes a region that's not empty (i.e., it has at least one
1398 1399    // live object in it and sets its corresponding bit on the region
1399 1400    // bitmap to 1. If the region is "starts humongous" it will also set
1400 1401    // to 1 the bits on the region bitmap that correspond to its
1401 1402    // associated "continues humongous" regions.
1402 1403    void set_bit_for_region(HeapRegion* hr) {
1403 1404      assert(!hr->continuesHumongous(), "should have filtered those out");
1404 1405  
1405 1406      BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1406 1407      if (!hr->startsHumongous()) {
1407 1408        // Normal (non-humongous) case: just set the bit.
1408 1409        _region_bm->par_at_put(index, true);
1409 1410      } else {
1410 1411        // Starts humongous case: calculate how many regions are part of
1411 1412        // this humongous region and then set the bit range.
1412 1413        BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1413 1414        _region_bm->par_at_put_range(index, end_index, true);
1414 1415      }
1415 1416    }
1416 1417  
1417 1418  public:
1418 1419    CMCountDataClosureBase(G1CollectedHeap* g1h,
1419 1420                           BitMap* region_bm, BitMap* card_bm):
1420 1421      _g1h(g1h), _cm(g1h->concurrent_mark()),
1421 1422      _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1422 1423      _region_bm(region_bm), _card_bm(card_bm) { }
1423 1424  };
1424 1425  
1425 1426  // Closure that calculates the # live objects per region. Used
1426 1427  // for verification purposes during the cleanup pause.
1427 1428  class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1428 1429    CMBitMapRO* _bm;
1429 1430    size_t _region_marked_bytes;
1430 1431  
1431 1432  public:
1432 1433    CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1433 1434                           BitMap* region_bm, BitMap* card_bm) :
1434 1435      CMCountDataClosureBase(g1h, region_bm, card_bm),
1435 1436      _bm(bm), _region_marked_bytes(0) { }
1436 1437  
1437 1438    bool doHeapRegion(HeapRegion* hr) {
1438 1439  
1439 1440      if (hr->continuesHumongous()) {
1440 1441        // We will ignore these here and process them when their
1441 1442        // associated "starts humongous" region is processed (see
1442 1443        // set_bit_for_heap_region()). Note that we cannot rely on their
1443 1444        // associated "starts humongous" region to have their bit set to
1444 1445        // 1 since, due to the region chunking in the parallel region
1445 1446        // iteration, a "continues humongous" region might be visited
1446 1447        // before its associated "starts humongous".
1447 1448        return false;
1448 1449      }
1449 1450  
1450 1451      HeapWord* ntams = hr->next_top_at_mark_start();
1451 1452      HeapWord* start = hr->bottom();
1452 1453  
1453 1454      assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1454 1455             err_msg("Preconditions not met - "
1455 1456                     "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1456 1457                     p2i(start), p2i(ntams), p2i(hr->end())));
1457 1458  
1458 1459      // Find the first marked object at or after "start".
1459 1460      start = _bm->getNextMarkedWordAddress(start, ntams);
1460 1461  
1461 1462      size_t marked_bytes = 0;
1462 1463  
1463 1464      while (start < ntams) {
1464 1465        oop obj = oop(start);
1465 1466        int obj_sz = obj->size();
1466 1467        HeapWord* obj_end = start + obj_sz;
1467 1468  
1468 1469        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1469 1470        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1470 1471  
1471 1472        // Note: if we're looking at the last region in heap - obj_end
1472 1473        // could be actually just beyond the end of the heap; end_idx
1473 1474        // will then correspond to a (non-existent) card that is also
1474 1475        // just beyond the heap.
1475 1476        if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1476 1477          // end of object is not card aligned - increment to cover
1477 1478          // all the cards spanned by the object
1478 1479          end_idx += 1;
1479 1480        }
1480 1481  
1481 1482        // Set the bits in the card BM for the cards spanned by this object.
1482 1483        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1483 1484  
1484 1485        // Add the size of this object to the number of marked bytes.
1485 1486        marked_bytes += (size_t)obj_sz * HeapWordSize;
1486 1487  
1487 1488        // Find the next marked object after this one.
1488 1489        start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1489 1490      }
1490 1491  
1491 1492      // Mark the allocated-since-marking portion...
1492 1493      HeapWord* top = hr->top();
1493 1494      if (ntams < top) {
1494 1495        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1495 1496        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1496 1497  
1497 1498        // Note: if we're looking at the last region in heap - top
1498 1499        // could be actually just beyond the end of the heap; end_idx
1499 1500        // will then correspond to a (non-existent) card that is also
1500 1501        // just beyond the heap.
1501 1502        if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1502 1503          // end of object is not card aligned - increment to cover
1503 1504          // all the cards spanned by the object
1504 1505          end_idx += 1;
1505 1506        }
1506 1507        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1507 1508  
1508 1509        // This definitely means the region has live objects.
1509 1510        set_bit_for_region(hr);
1510 1511      }
1511 1512  
1512 1513      // Update the live region bitmap.
1513 1514      if (marked_bytes > 0) {
1514 1515        set_bit_for_region(hr);
1515 1516      }
1516 1517  
1517 1518      // Set the marked bytes for the current region so that
1518 1519      // it can be queried by a calling verificiation routine
1519 1520      _region_marked_bytes = marked_bytes;
1520 1521  
1521 1522      return false;
1522 1523    }
1523 1524  
1524 1525    size_t region_marked_bytes() const { return _region_marked_bytes; }
1525 1526  };
1526 1527  
1527 1528  // Heap region closure used for verifying the counting data
1528 1529  // that was accumulated concurrently and aggregated during
1529 1530  // the remark pause. This closure is applied to the heap
1530 1531  // regions during the STW cleanup pause.
1531 1532  
1532 1533  class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1533 1534    G1CollectedHeap* _g1h;
1534 1535    ConcurrentMark* _cm;
1535 1536    CalcLiveObjectsClosure _calc_cl;
1536 1537    BitMap* _region_bm;   // Region BM to be verified
1537 1538    BitMap* _card_bm;     // Card BM to be verified
1538 1539    bool _verbose;        // verbose output?
1539 1540  
1540 1541    BitMap* _exp_region_bm; // Expected Region BM values
1541 1542    BitMap* _exp_card_bm;   // Expected card BM values
1542 1543  
1543 1544    int _failures;
1544 1545  
1545 1546  public:
1546 1547    VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1547 1548                                  BitMap* region_bm,
1548 1549                                  BitMap* card_bm,
1549 1550                                  BitMap* exp_region_bm,
1550 1551                                  BitMap* exp_card_bm,
1551 1552                                  bool verbose) :
1552 1553      _g1h(g1h), _cm(g1h->concurrent_mark()),
1553 1554      _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1554 1555      _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1555 1556      _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1556 1557      _failures(0) { }
1557 1558  
1558 1559    int failures() const { return _failures; }
1559 1560  
1560 1561    bool doHeapRegion(HeapRegion* hr) {
1561 1562      if (hr->continuesHumongous()) {
1562 1563        // We will ignore these here and process them when their
1563 1564        // associated "starts humongous" region is processed (see
1564 1565        // set_bit_for_heap_region()). Note that we cannot rely on their
1565 1566        // associated "starts humongous" region to have their bit set to
1566 1567        // 1 since, due to the region chunking in the parallel region
1567 1568        // iteration, a "continues humongous" region might be visited
1568 1569        // before its associated "starts humongous".
1569 1570        return false;
1570 1571      }
1571 1572  
1572 1573      int failures = 0;
1573 1574  
1574 1575      // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1575 1576      // this region and set the corresponding bits in the expected region
1576 1577      // and card bitmaps.
1577 1578      bool res = _calc_cl.doHeapRegion(hr);
1578 1579      assert(res == false, "should be continuing");
1579 1580  
1580 1581      MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1581 1582                      Mutex::_no_safepoint_check_flag);
1582 1583  
1583 1584      // Verify the marked bytes for this region.
1584 1585      size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1585 1586      size_t act_marked_bytes = hr->next_marked_bytes();
1586 1587  
1587 1588      // We're not OK if expected marked bytes > actual marked bytes. It means
1588 1589      // we have missed accounting some objects during the actual marking.
1589 1590      if (exp_marked_bytes > act_marked_bytes) {
1590 1591        if (_verbose) {
1591 1592          gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1592 1593                                 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1593 1594                                 hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1594 1595        }
1595 1596        failures += 1;
1596 1597      }
1597 1598  
1598 1599      // Verify the bit, for this region, in the actual and expected
1599 1600      // (which was just calculated) region bit maps.
1600 1601      // We're not OK if the bit in the calculated expected region
1601 1602      // bitmap is set and the bit in the actual region bitmap is not.
1602 1603      BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1603 1604  
1604 1605      bool expected = _exp_region_bm->at(index);
1605 1606      bool actual = _region_bm->at(index);
1606 1607      if (expected && !actual) {
1607 1608        if (_verbose) {
1608 1609          gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1609 1610                                 "expected: %s, actual: %s",
1610 1611                                 hr->hrm_index(),
1611 1612                                 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1612 1613        }
1613 1614        failures += 1;
1614 1615      }
1615 1616  
1616 1617      // Verify that the card bit maps for the cards spanned by the current
1617 1618      // region match. We have an error if we have a set bit in the expected
1618 1619      // bit map and the corresponding bit in the actual bitmap is not set.
1619 1620  
1620 1621      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1621 1622      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1622 1623  
1623 1624      for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1624 1625        expected = _exp_card_bm->at(i);
1625 1626        actual = _card_bm->at(i);
1626 1627  
1627 1628        if (expected && !actual) {
1628 1629          if (_verbose) {
1629 1630            gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1630 1631                                   "expected: %s, actual: %s",
1631 1632                                   hr->hrm_index(), i,
1632 1633                                   BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1633 1634          }
1634 1635          failures += 1;
1635 1636        }
1636 1637      }
1637 1638  
1638 1639      if (failures > 0 && _verbose)  {
1639 1640        gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1640 1641                               "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1641 1642                               HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1642 1643                               _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1643 1644      }
1644 1645  
1645 1646      _failures += failures;
1646 1647  
1647 1648      // We could stop iteration over the heap when we
1648 1649      // find the first violating region by returning true.
1649 1650      return false;
1650 1651    }
1651 1652  };
1652 1653  
1653 1654  class G1ParVerifyFinalCountTask: public AbstractGangTask {
1654 1655  protected:
1655 1656    G1CollectedHeap* _g1h;
1656 1657    ConcurrentMark* _cm;
1657 1658    BitMap* _actual_region_bm;
1658 1659    BitMap* _actual_card_bm;
1659 1660  
1660 1661    uint    _n_workers;
1661 1662  
1662 1663    BitMap* _expected_region_bm;
1663 1664    BitMap* _expected_card_bm;
1664 1665  
1665 1666    int  _failures;
1666 1667    bool _verbose;
1667 1668  
1668 1669  public:
1669 1670    G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1670 1671                              BitMap* region_bm, BitMap* card_bm,
1671 1672                              BitMap* expected_region_bm, BitMap* expected_card_bm)
1672 1673      : AbstractGangTask("G1 verify final counting"),
1673 1674        _g1h(g1h), _cm(_g1h->concurrent_mark()),
1674 1675        _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1675 1676        _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1676 1677        _failures(0), _verbose(false),
1677 1678        _n_workers(0) {
1678 1679      assert(VerifyDuringGC, "don't call this otherwise");
1679 1680  
1680 1681      // Use the value already set as the number of active threads
1681 1682      // in the call to run_task().
1682 1683      if (G1CollectedHeap::use_parallel_gc_threads()) {
1683 1684        assert( _g1h->workers()->active_workers() > 0,
1684 1685          "Should have been previously set");
1685 1686        _n_workers = _g1h->workers()->active_workers();
1686 1687      } else {
1687 1688        _n_workers = 1;
1688 1689      }
1689 1690  
1690 1691      assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1691 1692      assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1692 1693  
1693 1694      _verbose = _cm->verbose_medium();
1694 1695    }
1695 1696  
1696 1697    void work(uint worker_id) {
1697 1698      assert(worker_id < _n_workers, "invariant");
1698 1699  
1699 1700      VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1700 1701                                              _actual_region_bm, _actual_card_bm,
1701 1702                                              _expected_region_bm,
1702 1703                                              _expected_card_bm,
1703 1704                                              _verbose);
1704 1705  
1705 1706      if (G1CollectedHeap::use_parallel_gc_threads()) {
1706 1707        _g1h->heap_region_par_iterate_chunked(&verify_cl,
1707 1708                                              worker_id,
1708 1709                                              _n_workers,
1709 1710                                              HeapRegion::VerifyCountClaimValue);
1710 1711      } else {
1711 1712        _g1h->heap_region_iterate(&verify_cl);
1712 1713      }
1713 1714  
1714 1715      Atomic::add(verify_cl.failures(), &_failures);
1715 1716    }
1716 1717  
1717 1718    int failures() const { return _failures; }
1718 1719  };
1719 1720  
1720 1721  // Closure that finalizes the liveness counting data.
1721 1722  // Used during the cleanup pause.
1722 1723  // Sets the bits corresponding to the interval [NTAMS, top]
1723 1724  // (which contains the implicitly live objects) in the
1724 1725  // card liveness bitmap. Also sets the bit for each region,
1725 1726  // containing live data, in the region liveness bitmap.
1726 1727  
1727 1728  class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1728 1729   public:
1729 1730    FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1730 1731                                BitMap* region_bm,
1731 1732                                BitMap* card_bm) :
1732 1733      CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1733 1734  
1734 1735    bool doHeapRegion(HeapRegion* hr) {
1735 1736  
1736 1737      if (hr->continuesHumongous()) {
1737 1738        // We will ignore these here and process them when their
1738 1739        // associated "starts humongous" region is processed (see
1739 1740        // set_bit_for_heap_region()). Note that we cannot rely on their
1740 1741        // associated "starts humongous" region to have their bit set to
1741 1742        // 1 since, due to the region chunking in the parallel region
1742 1743        // iteration, a "continues humongous" region might be visited
1743 1744        // before its associated "starts humongous".
1744 1745        return false;
1745 1746      }
1746 1747  
1747 1748      HeapWord* ntams = hr->next_top_at_mark_start();
1748 1749      HeapWord* top   = hr->top();
1749 1750  
1750 1751      assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1751 1752  
1752 1753      // Mark the allocated-since-marking portion...
1753 1754      if (ntams < top) {
1754 1755        // This definitely means the region has live objects.
1755 1756        set_bit_for_region(hr);
1756 1757  
1757 1758        // Now set the bits in the card bitmap for [ntams, top)
1758 1759        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1759 1760        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1760 1761  
1761 1762        // Note: if we're looking at the last region in heap - top
1762 1763        // could be actually just beyond the end of the heap; end_idx
1763 1764        // will then correspond to a (non-existent) card that is also
1764 1765        // just beyond the heap.
1765 1766        if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1766 1767          // end of object is not card aligned - increment to cover
1767 1768          // all the cards spanned by the object
1768 1769          end_idx += 1;
1769 1770        }
1770 1771  
1771 1772        assert(end_idx <= _card_bm->size(),
1772 1773               err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1773 1774                       end_idx, _card_bm->size()));
1774 1775        assert(start_idx < _card_bm->size(),
1775 1776               err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1776 1777                       start_idx, _card_bm->size()));
1777 1778  
1778 1779        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1779 1780      }
1780 1781  
1781 1782      // Set the bit for the region if it contains live data
1782 1783      if (hr->next_marked_bytes() > 0) {
1783 1784        set_bit_for_region(hr);
1784 1785      }
1785 1786  
1786 1787      return false;
1787 1788    }
1788 1789  };
1789 1790  
1790 1791  class G1ParFinalCountTask: public AbstractGangTask {
1791 1792  protected:
1792 1793    G1CollectedHeap* _g1h;
1793 1794    ConcurrentMark* _cm;
1794 1795    BitMap* _actual_region_bm;
1795 1796    BitMap* _actual_card_bm;
1796 1797  
1797 1798    uint    _n_workers;
1798 1799  
1799 1800  public:
1800 1801    G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1801 1802      : AbstractGangTask("G1 final counting"),
1802 1803        _g1h(g1h), _cm(_g1h->concurrent_mark()),
1803 1804        _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1804 1805        _n_workers(0) {
1805 1806      // Use the value already set as the number of active threads
1806 1807      // in the call to run_task().
1807 1808      if (G1CollectedHeap::use_parallel_gc_threads()) {
1808 1809        assert( _g1h->workers()->active_workers() > 0,
1809 1810          "Should have been previously set");
1810 1811        _n_workers = _g1h->workers()->active_workers();
1811 1812      } else {
1812 1813        _n_workers = 1;
1813 1814      }
1814 1815    }
1815 1816  
1816 1817    void work(uint worker_id) {
1817 1818      assert(worker_id < _n_workers, "invariant");
1818 1819  
1819 1820      FinalCountDataUpdateClosure final_update_cl(_g1h,
1820 1821                                                  _actual_region_bm,
1821 1822                                                  _actual_card_bm);
1822 1823  
1823 1824      if (G1CollectedHeap::use_parallel_gc_threads()) {
1824 1825        _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1825 1826                                              worker_id,
1826 1827                                              _n_workers,
1827 1828                                              HeapRegion::FinalCountClaimValue);
1828 1829      } else {
1829 1830        _g1h->heap_region_iterate(&final_update_cl);
1830 1831      }
1831 1832    }
1832 1833  };
1833 1834  
1834 1835  class G1ParNoteEndTask;
1835 1836  
1836 1837  class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1837 1838    G1CollectedHeap* _g1;
1838 1839    size_t _max_live_bytes;
1839 1840    uint _regions_claimed;
1840 1841    size_t _freed_bytes;
1841 1842    FreeRegionList* _local_cleanup_list;
1842 1843    HeapRegionSetCount _old_regions_removed;
1843 1844    HeapRegionSetCount _humongous_regions_removed;
1844 1845    HRRSCleanupTask* _hrrs_cleanup_task;
1845 1846    double _claimed_region_time;
1846 1847    double _max_region_time;
1847 1848  
1848 1849  public:
1849 1850    G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1850 1851                               FreeRegionList* local_cleanup_list,
1851 1852                               HRRSCleanupTask* hrrs_cleanup_task) :
1852 1853      _g1(g1),
1853 1854      _max_live_bytes(0), _regions_claimed(0),
1854 1855      _freed_bytes(0),
1855 1856      _claimed_region_time(0.0), _max_region_time(0.0),
1856 1857      _local_cleanup_list(local_cleanup_list),
1857 1858      _old_regions_removed(),
1858 1859      _humongous_regions_removed(),
1859 1860      _hrrs_cleanup_task(hrrs_cleanup_task) { }
1860 1861  
1861 1862    size_t freed_bytes() { return _freed_bytes; }
1862 1863    const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1863 1864    const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1864 1865  
1865 1866    bool doHeapRegion(HeapRegion *hr) {
1866 1867      if (hr->continuesHumongous()) {
1867 1868        return false;
1868 1869      }
1869 1870      // We use a claim value of zero here because all regions
1870 1871      // were claimed with value 1 in the FinalCount task.
1871 1872      _g1->reset_gc_time_stamps(hr);
1872 1873      double start = os::elapsedTime();
1873 1874      _regions_claimed++;
1874 1875      hr->note_end_of_marking();
1875 1876      _max_live_bytes += hr->max_live_bytes();
1876 1877  
1877 1878      if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1878 1879        _freed_bytes += hr->used();
1879 1880        hr->set_containing_set(NULL);
1880 1881        if (hr->isHumongous()) {
1881 1882          assert(hr->startsHumongous(), "we should only see starts humongous");
1882 1883          _humongous_regions_removed.increment(1u, hr->capacity());
1883 1884          _g1->free_humongous_region(hr, _local_cleanup_list, true);
1884 1885        } else {
1885 1886          _old_regions_removed.increment(1u, hr->capacity());
1886 1887          _g1->free_region(hr, _local_cleanup_list, true);
1887 1888        }
1888 1889      } else {
1889 1890        hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1890 1891      }
1891 1892  
1892 1893      double region_time = (os::elapsedTime() - start);
1893 1894      _claimed_region_time += region_time;
1894 1895      if (region_time > _max_region_time) {
1895 1896        _max_region_time = region_time;
1896 1897      }
1897 1898      return false;
1898 1899    }
1899 1900  
1900 1901    size_t max_live_bytes() { return _max_live_bytes; }
1901 1902    uint regions_claimed() { return _regions_claimed; }
1902 1903    double claimed_region_time_sec() { return _claimed_region_time; }
1903 1904    double max_region_time_sec() { return _max_region_time; }
1904 1905  };
1905 1906  
1906 1907  class G1ParNoteEndTask: public AbstractGangTask {
1907 1908    friend class G1NoteEndOfConcMarkClosure;
1908 1909  
1909 1910  protected:
1910 1911    G1CollectedHeap* _g1h;
1911 1912    size_t _max_live_bytes;
1912 1913    size_t _freed_bytes;
1913 1914    FreeRegionList* _cleanup_list;
1914 1915  
1915 1916  public:
1916 1917    G1ParNoteEndTask(G1CollectedHeap* g1h,
1917 1918                     FreeRegionList* cleanup_list) :
1918 1919      AbstractGangTask("G1 note end"), _g1h(g1h),
1919 1920      _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1920 1921  
1921 1922    void work(uint worker_id) {
1922 1923      double start = os::elapsedTime();
1923 1924      FreeRegionList local_cleanup_list("Local Cleanup List");
1924 1925      HRRSCleanupTask hrrs_cleanup_task;
1925 1926      G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1926 1927                                             &hrrs_cleanup_task);
1927 1928      if (G1CollectedHeap::use_parallel_gc_threads()) {
1928 1929        _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1929 1930                                              _g1h->workers()->active_workers(),
1930 1931                                              HeapRegion::NoteEndClaimValue);
1931 1932      } else {
1932 1933        _g1h->heap_region_iterate(&g1_note_end);
1933 1934      }
1934 1935      assert(g1_note_end.complete(), "Shouldn't have yielded!");
1935 1936  
1936 1937      // Now update the lists
1937 1938      _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1938 1939      {
1939 1940        MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1940 1941        _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1941 1942        _max_live_bytes += g1_note_end.max_live_bytes();
1942 1943        _freed_bytes += g1_note_end.freed_bytes();
1943 1944  
1944 1945        // If we iterate over the global cleanup list at the end of
1945 1946        // cleanup to do this printing we will not guarantee to only
1946 1947        // generate output for the newly-reclaimed regions (the list
1947 1948        // might not be empty at the beginning of cleanup; we might
1948 1949        // still be working on its previous contents). So we do the
1949 1950        // printing here, before we append the new regions to the global
1950 1951        // cleanup list.
1951 1952  
1952 1953        G1HRPrinter* hr_printer = _g1h->hr_printer();
1953 1954        if (hr_printer->is_active()) {
1954 1955          FreeRegionListIterator iter(&local_cleanup_list);
1955 1956          while (iter.more_available()) {
1956 1957            HeapRegion* hr = iter.get_next();
1957 1958            hr_printer->cleanup(hr);
1958 1959          }
1959 1960        }
1960 1961  
1961 1962        _cleanup_list->add_ordered(&local_cleanup_list);
1962 1963        assert(local_cleanup_list.is_empty(), "post-condition");
1963 1964  
1964 1965        HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1965 1966      }
1966 1967    }
1967 1968    size_t max_live_bytes() { return _max_live_bytes; }
1968 1969    size_t freed_bytes() { return _freed_bytes; }
1969 1970  };
1970 1971  
1971 1972  class G1ParScrubRemSetTask: public AbstractGangTask {
1972 1973  protected:
1973 1974    G1RemSet* _g1rs;
1974 1975    BitMap* _region_bm;
1975 1976    BitMap* _card_bm;
1976 1977  public:
1977 1978    G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1978 1979                         BitMap* region_bm, BitMap* card_bm) :
1979 1980      AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1980 1981      _region_bm(region_bm), _card_bm(card_bm) { }
1981 1982  
1982 1983    void work(uint worker_id) {
1983 1984      if (G1CollectedHeap::use_parallel_gc_threads()) {
1984 1985        _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1985 1986                         HeapRegion::ScrubRemSetClaimValue);
1986 1987      } else {
1987 1988        _g1rs->scrub(_region_bm, _card_bm);
1988 1989      }
1989 1990    }
1990 1991  
1991 1992  };
1992 1993  
1993 1994  void ConcurrentMark::cleanup() {
1994 1995    // world is stopped at this checkpoint
1995 1996    assert(SafepointSynchronize::is_at_safepoint(),
1996 1997           "world should be stopped");
1997 1998    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1998 1999  
1999 2000    // If a full collection has happened, we shouldn't do this.
2000 2001    if (has_aborted()) {
2001 2002      g1h->set_marking_complete(); // So bitmap clearing isn't confused
2002 2003      return;
2003 2004    }
2004 2005  
2005 2006    g1h->verify_region_sets_optional();
2006 2007  
2007 2008    if (VerifyDuringGC) {
2008 2009      HandleMark hm;  // handle scope
2009 2010      Universe::heap()->prepare_for_verify();
2010 2011      Universe::verify(VerifyOption_G1UsePrevMarking,
2011 2012                       " VerifyDuringGC:(before)");
2012 2013    }
2013 2014    g1h->check_bitmaps("Cleanup Start");
2014 2015  
2015 2016    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2016 2017    g1p->record_concurrent_mark_cleanup_start();
2017 2018  
2018 2019    double start = os::elapsedTime();
2019 2020  
2020 2021    HeapRegionRemSet::reset_for_cleanup_tasks();
2021 2022  
2022 2023    uint n_workers;
2023 2024  
2024 2025    // Do counting once more with the world stopped for good measure.
2025 2026    G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2026 2027  
2027 2028    if (G1CollectedHeap::use_parallel_gc_threads()) {
2028 2029     assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2029 2030             "sanity check");
2030 2031  
2031 2032      g1h->set_par_threads();
2032 2033      n_workers = g1h->n_par_threads();
2033 2034      assert(g1h->n_par_threads() == n_workers,
2034 2035             "Should not have been reset");
2035 2036      g1h->workers()->run_task(&g1_par_count_task);
2036 2037      // Done with the parallel phase so reset to 0.
2037 2038      g1h->set_par_threads(0);
2038 2039  
2039 2040      assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2040 2041             "sanity check");
2041 2042    } else {
2042 2043      n_workers = 1;
2043 2044      g1_par_count_task.work(0);
2044 2045    }
2045 2046  
2046 2047    if (VerifyDuringGC) {
2047 2048      // Verify that the counting data accumulated during marking matches
2048 2049      // that calculated by walking the marking bitmap.
2049 2050  
2050 2051      // Bitmaps to hold expected values
2051 2052      BitMap expected_region_bm(_region_bm.size(), true);
2052 2053      BitMap expected_card_bm(_card_bm.size(), true);
2053 2054  
2054 2055      G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2055 2056                                                   &_region_bm,
2056 2057                                                   &_card_bm,
2057 2058                                                   &expected_region_bm,
2058 2059                                                   &expected_card_bm);
2059 2060  
2060 2061      if (G1CollectedHeap::use_parallel_gc_threads()) {
2061 2062        g1h->set_par_threads((int)n_workers);
2062 2063        g1h->workers()->run_task(&g1_par_verify_task);
2063 2064        // Done with the parallel phase so reset to 0.
2064 2065        g1h->set_par_threads(0);
2065 2066  
2066 2067        assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2067 2068               "sanity check");
2068 2069      } else {
2069 2070        g1_par_verify_task.work(0);
2070 2071      }
2071 2072  
2072 2073      guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2073 2074    }
2074 2075  
2075 2076    size_t start_used_bytes = g1h->used();
2076 2077    g1h->set_marking_complete();
2077 2078  
2078 2079    double count_end = os::elapsedTime();
2079 2080    double this_final_counting_time = (count_end - start);
2080 2081    _total_counting_time += this_final_counting_time;
2081 2082  
2082 2083    if (G1PrintRegionLivenessInfo) {
2083 2084      G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2084 2085      _g1h->heap_region_iterate(&cl);
2085 2086    }
2086 2087  
2087 2088    // Install newly created mark bitMap as "prev".
2088 2089    swapMarkBitMaps();
2089 2090  
2090 2091    g1h->reset_gc_time_stamp();
2091 2092  
2092 2093    // Note end of marking in all heap regions.
2093 2094    G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2094 2095    if (G1CollectedHeap::use_parallel_gc_threads()) {
2095 2096      g1h->set_par_threads((int)n_workers);
2096 2097      g1h->workers()->run_task(&g1_par_note_end_task);
2097 2098      g1h->set_par_threads(0);
2098 2099  
2099 2100      assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2100 2101             "sanity check");
2101 2102    } else {
2102 2103      g1_par_note_end_task.work(0);
2103 2104    }
2104 2105    g1h->check_gc_time_stamps();
2105 2106  
2106 2107    if (!cleanup_list_is_empty()) {
2107 2108      // The cleanup list is not empty, so we'll have to process it
2108 2109      // concurrently. Notify anyone else that might be wanting free
2109 2110      // regions that there will be more free regions coming soon.
2110 2111      g1h->set_free_regions_coming();
2111 2112    }
2112 2113  
2113 2114    // call below, since it affects the metric by which we sort the heap
2114 2115    // regions.
2115 2116    if (G1ScrubRemSets) {
2116 2117      double rs_scrub_start = os::elapsedTime();
2117 2118      G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2118 2119      if (G1CollectedHeap::use_parallel_gc_threads()) {
2119 2120        g1h->set_par_threads((int)n_workers);
2120 2121        g1h->workers()->run_task(&g1_par_scrub_rs_task);
2121 2122        g1h->set_par_threads(0);
2122 2123  
2123 2124        assert(g1h->check_heap_region_claim_values(
2124 2125                                              HeapRegion::ScrubRemSetClaimValue),
2125 2126               "sanity check");
2126 2127      } else {
2127 2128        g1_par_scrub_rs_task.work(0);
2128 2129      }
2129 2130  
2130 2131      double rs_scrub_end = os::elapsedTime();
2131 2132      double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2132 2133      _total_rs_scrub_time += this_rs_scrub_time;
2133 2134    }
2134 2135  
2135 2136    // this will also free any regions totally full of garbage objects,
2136 2137    // and sort the regions.
2137 2138    g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2138 2139  
2139 2140    // Statistics.
2140 2141    double end = os::elapsedTime();
2141 2142    _cleanup_times.add((end - start) * 1000.0);
2142 2143  
2143 2144    if (G1Log::fine()) {
2144 2145      g1h->print_size_transition(gclog_or_tty,
2145 2146                                 start_used_bytes,
2146 2147                                 g1h->used(),
2147 2148                                 g1h->capacity());
2148 2149    }
2149 2150  
2150 2151    // Clean up will have freed any regions completely full of garbage.
2151 2152    // Update the soft reference policy with the new heap occupancy.
2152 2153    Universe::update_heap_info_at_gc();
2153 2154  
2154 2155    if (VerifyDuringGC) {
2155 2156      HandleMark hm;  // handle scope
2156 2157      Universe::heap()->prepare_for_verify();
2157 2158      Universe::verify(VerifyOption_G1UsePrevMarking,
2158 2159                       " VerifyDuringGC:(after)");
2159 2160    }
2160 2161    g1h->check_bitmaps("Cleanup End");
2161 2162  
2162 2163    g1h->verify_region_sets_optional();
2163 2164  
2164 2165    // We need to make this be a "collection" so any collection pause that
2165 2166    // races with it goes around and waits for completeCleanup to finish.
2166 2167    g1h->increment_total_collections();
2167 2168  
2168 2169    // Clean out dead classes and update Metaspace sizes.
2169 2170    if (ClassUnloadingWithConcurrentMark) {
2170 2171      ClassLoaderDataGraph::purge();
2171 2172    }
2172 2173    MetaspaceGC::compute_new_size();
2173 2174  
2174 2175    // We reclaimed old regions so we should calculate the sizes to make
2175 2176    // sure we update the old gen/space data.
2176 2177    g1h->g1mm()->update_sizes();
2177 2178  
2178 2179    g1h->trace_heap_after_concurrent_cycle();
2179 2180  }
2180 2181  
2181 2182  void ConcurrentMark::completeCleanup() {
2182 2183    if (has_aborted()) return;
2183 2184  
2184 2185    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2185 2186  
2186 2187    _cleanup_list.verify_optional();
2187 2188    FreeRegionList tmp_free_list("Tmp Free List");
2188 2189  
2189 2190    if (G1ConcRegionFreeingVerbose) {
2190 2191      gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2191 2192                             "cleanup list has %u entries",
2192 2193                             _cleanup_list.length());
2193 2194    }
2194 2195  
2195 2196    // No one else should be accessing the _cleanup_list at this point,
2196 2197    // so it is not necessary to take any locks
2197 2198    while (!_cleanup_list.is_empty()) {
2198 2199      HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2199 2200      assert(hr != NULL, "Got NULL from a non-empty list");
2200 2201      hr->par_clear();
2201 2202      tmp_free_list.add_ordered(hr);
2202 2203  
2203 2204      // Instead of adding one region at a time to the secondary_free_list,
2204 2205      // we accumulate them in the local list and move them a few at a
2205 2206      // time. This also cuts down on the number of notify_all() calls
2206 2207      // we do during this process. We'll also append the local list when
2207 2208      // _cleanup_list is empty (which means we just removed the last
2208 2209      // region from the _cleanup_list).
2209 2210      if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2210 2211          _cleanup_list.is_empty()) {
2211 2212        if (G1ConcRegionFreeingVerbose) {
2212 2213          gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2213 2214                                 "appending %u entries to the secondary_free_list, "
2214 2215                                 "cleanup list still has %u entries",
2215 2216                                 tmp_free_list.length(),
2216 2217                                 _cleanup_list.length());
2217 2218        }
2218 2219  
2219 2220        {
2220 2221          MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2221 2222          g1h->secondary_free_list_add(&tmp_free_list);
2222 2223          SecondaryFreeList_lock->notify_all();
2223 2224        }
2224 2225  
2225 2226        if (G1StressConcRegionFreeing) {
2226 2227          for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2227 2228            os::sleep(Thread::current(), (jlong) 1, false);
2228 2229          }
2229 2230        }
2230 2231      }
2231 2232    }
2232 2233    assert(tmp_free_list.is_empty(), "post-condition");
2233 2234  }
2234 2235  
2235 2236  // Supporting Object and Oop closures for reference discovery
2236 2237  // and processing in during marking
2237 2238  
2238 2239  bool G1CMIsAliveClosure::do_object_b(oop obj) {
2239 2240    HeapWord* addr = (HeapWord*)obj;
2240 2241    return addr != NULL &&
2241 2242           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2242 2243  }
2243 2244  
2244 2245  // 'Keep Alive' oop closure used by both serial parallel reference processing.
2245 2246  // Uses the CMTask associated with a worker thread (for serial reference
2246 2247  // processing the CMTask for worker 0 is used) to preserve (mark) and
2247 2248  // trace referent objects.
2248 2249  //
2249 2250  // Using the CMTask and embedded local queues avoids having the worker
2250 2251  // threads operating on the global mark stack. This reduces the risk
2251 2252  // of overflowing the stack - which we would rather avoid at this late
2252 2253  // state. Also using the tasks' local queues removes the potential
2253 2254  // of the workers interfering with each other that could occur if
2254 2255  // operating on the global stack.
2255 2256  
2256 2257  class G1CMKeepAliveAndDrainClosure: public OopClosure {
2257 2258    ConcurrentMark* _cm;
2258 2259    CMTask*         _task;
2259 2260    int             _ref_counter_limit;
2260 2261    int             _ref_counter;
2261 2262    bool            _is_serial;
2262 2263   public:
2263 2264    G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2264 2265      _cm(cm), _task(task), _is_serial(is_serial),
2265 2266      _ref_counter_limit(G1RefProcDrainInterval) {
2266 2267      assert(_ref_counter_limit > 0, "sanity");
2267 2268      assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2268 2269      _ref_counter = _ref_counter_limit;
2269 2270    }
2270 2271  
2271 2272    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2272 2273    virtual void do_oop(      oop* p) { do_oop_work(p); }
2273 2274  
2274 2275    template <class T> void do_oop_work(T* p) {
2275 2276      if (!_cm->has_overflown()) {
2276 2277        oop obj = oopDesc::load_decode_heap_oop(p);
2277 2278        if (_cm->verbose_high()) {
2278 2279          gclog_or_tty->print_cr("\t[%u] we're looking at location "
2279 2280                                 "*"PTR_FORMAT" = "PTR_FORMAT,
2280 2281                                 _task->worker_id(), p2i(p), p2i((void*) obj));
2281 2282        }
2282 2283  
2283 2284        _task->deal_with_reference(obj);
2284 2285        _ref_counter--;
2285 2286  
2286 2287        if (_ref_counter == 0) {
2287 2288          // We have dealt with _ref_counter_limit references, pushing them
2288 2289          // and objects reachable from them on to the local stack (and
2289 2290          // possibly the global stack). Call CMTask::do_marking_step() to
2290 2291          // process these entries.
2291 2292          //
2292 2293          // We call CMTask::do_marking_step() in a loop, which we'll exit if
2293 2294          // there's nothing more to do (i.e. we're done with the entries that
2294 2295          // were pushed as a result of the CMTask::deal_with_reference() calls
2295 2296          // above) or we overflow.
2296 2297          //
2297 2298          // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2298 2299          // flag while there may still be some work to do. (See the comment at
2299 2300          // the beginning of CMTask::do_marking_step() for those conditions -
2300 2301          // one of which is reaching the specified time target.) It is only
2301 2302          // when CMTask::do_marking_step() returns without setting the
2302 2303          // has_aborted() flag that the marking step has completed.
2303 2304          do {
2304 2305            double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2305 2306            _task->do_marking_step(mark_step_duration_ms,
2306 2307                                   false      /* do_termination */,
2307 2308                                   _is_serial);
2308 2309          } while (_task->has_aborted() && !_cm->has_overflown());
2309 2310          _ref_counter = _ref_counter_limit;
2310 2311        }
2311 2312      } else {
2312 2313        if (_cm->verbose_high()) {
2313 2314           gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2314 2315        }
2315 2316      }
2316 2317    }
2317 2318  };
2318 2319  
2319 2320  // 'Drain' oop closure used by both serial and parallel reference processing.
2320 2321  // Uses the CMTask associated with a given worker thread (for serial
2321 2322  // reference processing the CMtask for worker 0 is used). Calls the
2322 2323  // do_marking_step routine, with an unbelievably large timeout value,
2323 2324  // to drain the marking data structures of the remaining entries
2324 2325  // added by the 'keep alive' oop closure above.
2325 2326  
2326 2327  class G1CMDrainMarkingStackClosure: public VoidClosure {
2327 2328    ConcurrentMark* _cm;
2328 2329    CMTask*         _task;
2329 2330    bool            _is_serial;
2330 2331   public:
2331 2332    G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2332 2333      _cm(cm), _task(task), _is_serial(is_serial) {
2333 2334      assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2334 2335    }
2335 2336  
2336 2337    void do_void() {
2337 2338      do {
2338 2339        if (_cm->verbose_high()) {
2339 2340          gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2340 2341                                 _task->worker_id(), BOOL_TO_STR(_is_serial));
2341 2342        }
2342 2343  
2343 2344        // We call CMTask::do_marking_step() to completely drain the local
2344 2345        // and global marking stacks of entries pushed by the 'keep alive'
2345 2346        // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2346 2347        //
2347 2348        // CMTask::do_marking_step() is called in a loop, which we'll exit
2348 2349        // if there's nothing more to do (i.e. we'completely drained the
2349 2350        // entries that were pushed as a a result of applying the 'keep alive'
2350 2351        // closure to the entries on the discovered ref lists) or we overflow
2351 2352        // the global marking stack.
2352 2353        //
2353 2354        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2354 2355        // flag while there may still be some work to do. (See the comment at
2355 2356        // the beginning of CMTask::do_marking_step() for those conditions -
2356 2357        // one of which is reaching the specified time target.) It is only
2357 2358        // when CMTask::do_marking_step() returns without setting the
2358 2359        // has_aborted() flag that the marking step has completed.
2359 2360  
2360 2361        _task->do_marking_step(1000000000.0 /* something very large */,
2361 2362                               true         /* do_termination */,
2362 2363                               _is_serial);
2363 2364      } while (_task->has_aborted() && !_cm->has_overflown());
2364 2365    }
2365 2366  };
2366 2367  
2367 2368  // Implementation of AbstractRefProcTaskExecutor for parallel
2368 2369  // reference processing at the end of G1 concurrent marking
2369 2370  
2370 2371  class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2371 2372  private:
2372 2373    G1CollectedHeap* _g1h;
2373 2374    ConcurrentMark*  _cm;
2374 2375    WorkGang*        _workers;
2375 2376    int              _active_workers;
2376 2377  
2377 2378  public:
2378 2379    G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2379 2380                          ConcurrentMark* cm,
2380 2381                          WorkGang* workers,
2381 2382                          int n_workers) :
2382 2383      _g1h(g1h), _cm(cm),
2383 2384      _workers(workers), _active_workers(n_workers) { }
2384 2385  
2385 2386    // Executes the given task using concurrent marking worker threads.
2386 2387    virtual void execute(ProcessTask& task);
2387 2388    virtual void execute(EnqueueTask& task);
2388 2389  };
2389 2390  
2390 2391  class G1CMRefProcTaskProxy: public AbstractGangTask {
2391 2392    typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2392 2393    ProcessTask&     _proc_task;
2393 2394    G1CollectedHeap* _g1h;
2394 2395    ConcurrentMark*  _cm;
2395 2396  
2396 2397  public:
2397 2398    G1CMRefProcTaskProxy(ProcessTask& proc_task,
2398 2399                       G1CollectedHeap* g1h,
2399 2400                       ConcurrentMark* cm) :
2400 2401      AbstractGangTask("Process reference objects in parallel"),
2401 2402      _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2402 2403      ReferenceProcessor* rp = _g1h->ref_processor_cm();
2403 2404      assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2404 2405    }
2405 2406  
2406 2407    virtual void work(uint worker_id) {
2407 2408      ResourceMark rm;
2408 2409      HandleMark hm;
2409 2410      CMTask* task = _cm->task(worker_id);
2410 2411      G1CMIsAliveClosure g1_is_alive(_g1h);
2411 2412      G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2412 2413      G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2413 2414  
2414 2415      _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2415 2416    }
2416 2417  };
2417 2418  
2418 2419  void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2419 2420    assert(_workers != NULL, "Need parallel worker threads.");
2420 2421    assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2421 2422  
2422 2423    G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2423 2424  
2424 2425    // We need to reset the concurrency level before each
2425 2426    // proxy task execution, so that the termination protocol
2426 2427    // and overflow handling in CMTask::do_marking_step() knows
2427 2428    // how many workers to wait for.
2428 2429    _cm->set_concurrency(_active_workers);
2429 2430    _g1h->set_par_threads(_active_workers);
2430 2431    _workers->run_task(&proc_task_proxy);
2431 2432    _g1h->set_par_threads(0);
2432 2433  }
2433 2434  
2434 2435  class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2435 2436    typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2436 2437    EnqueueTask& _enq_task;
2437 2438  
2438 2439  public:
2439 2440    G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2440 2441      AbstractGangTask("Enqueue reference objects in parallel"),
2441 2442      _enq_task(enq_task) { }
2442 2443  
2443 2444    virtual void work(uint worker_id) {
2444 2445      _enq_task.work(worker_id);
2445 2446    }
2446 2447  };
2447 2448  
2448 2449  void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2449 2450    assert(_workers != NULL, "Need parallel worker threads.");
2450 2451    assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2451 2452  
2452 2453    G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2453 2454  
2454 2455    // Not strictly necessary but...
2455 2456    //
2456 2457    // We need to reset the concurrency level before each
2457 2458    // proxy task execution, so that the termination protocol
2458 2459    // and overflow handling in CMTask::do_marking_step() knows
2459 2460    // how many workers to wait for.
2460 2461    _cm->set_concurrency(_active_workers);
2461 2462    _g1h->set_par_threads(_active_workers);
2462 2463    _workers->run_task(&enq_task_proxy);
2463 2464    _g1h->set_par_threads(0);
2464 2465  }
2465 2466  
2466 2467  void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2467 2468    G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2468 2469  }
2469 2470  
2470 2471  // Helper class to get rid of some boilerplate code.
2471 2472  class G1RemarkGCTraceTime : public GCTraceTime {
2472 2473    static bool doit_and_prepend(bool doit) {
2473 2474      if (doit) {
2474 2475        gclog_or_tty->put(' ');
2475 2476      }
2476 2477      return doit;
2477 2478    }
2478 2479  
2479 2480   public:
2480 2481    G1RemarkGCTraceTime(const char* title, bool doit)
2481 2482      : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
2482 2483          G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
2483 2484    }
2484 2485  };
2485 2486  
2486 2487  void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2487 2488    if (has_overflown()) {
2488 2489      // Skip processing the discovered references if we have
2489 2490      // overflown the global marking stack. Reference objects
2490 2491      // only get discovered once so it is OK to not
2491 2492      // de-populate the discovered reference lists. We could have,
2492 2493      // but the only benefit would be that, when marking restarts,
2493 2494      // less reference objects are discovered.
2494 2495      return;
2495 2496    }
2496 2497  
2497 2498    ResourceMark rm;
2498 2499    HandleMark   hm;
2499 2500  
2500 2501    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2501 2502  
2502 2503    // Is alive closure.
2503 2504    G1CMIsAliveClosure g1_is_alive(g1h);
2504 2505  
2505 2506    // Inner scope to exclude the cleaning of the string and symbol
2506 2507    // tables from the displayed time.
2507 2508    {
2508 2509      if (G1Log::finer()) {
2509 2510        gclog_or_tty->put(' ');
2510 2511      }
2511 2512      GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2512 2513  
2513 2514      ReferenceProcessor* rp = g1h->ref_processor_cm();
2514 2515  
2515 2516      // See the comment in G1CollectedHeap::ref_processing_init()
2516 2517      // about how reference processing currently works in G1.
2517 2518  
2518 2519      // Set the soft reference policy
2519 2520      rp->setup_policy(clear_all_soft_refs);
2520 2521      assert(_markStack.isEmpty(), "mark stack should be empty");
2521 2522  
2522 2523      // Instances of the 'Keep Alive' and 'Complete GC' closures used
2523 2524      // in serial reference processing. Note these closures are also
2524 2525      // used for serially processing (by the the current thread) the
2525 2526      // JNI references during parallel reference processing.
2526 2527      //
2527 2528      // These closures do not need to synchronize with the worker
2528 2529      // threads involved in parallel reference processing as these
2529 2530      // instances are executed serially by the current thread (e.g.
2530 2531      // reference processing is not multi-threaded and is thus
2531 2532      // performed by the current thread instead of a gang worker).
2532 2533      //
2533 2534      // The gang tasks involved in parallel reference procssing create
2534 2535      // their own instances of these closures, which do their own
2535 2536      // synchronization among themselves.
2536 2537      G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2537 2538      G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2538 2539  
2539 2540      // We need at least one active thread. If reference processing
2540 2541      // is not multi-threaded we use the current (VMThread) thread,
2541 2542      // otherwise we use the work gang from the G1CollectedHeap and
2542 2543      // we utilize all the worker threads we can.
2543 2544      bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2544 2545      uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2545 2546      active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2546 2547  
2547 2548      // Parallel processing task executor.
2548 2549      G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2549 2550                                                g1h->workers(), active_workers);
2550 2551      AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2551 2552  
2552 2553      // Set the concurrency level. The phase was already set prior to
2553 2554      // executing the remark task.
2554 2555      set_concurrency(active_workers);
2555 2556  
2556 2557      // Set the degree of MT processing here.  If the discovery was done MT,
2557 2558      // the number of threads involved during discovery could differ from
2558 2559      // the number of active workers.  This is OK as long as the discovered
2559 2560      // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2560 2561      rp->set_active_mt_degree(active_workers);
2561 2562  
2562 2563      // Process the weak references.
2563 2564      const ReferenceProcessorStats& stats =
2564 2565          rp->process_discovered_references(&g1_is_alive,
2565 2566                                            &g1_keep_alive,
2566 2567                                            &g1_drain_mark_stack,
2567 2568                                            executor,
2568 2569                                            g1h->gc_timer_cm(),
2569 2570                                            concurrent_gc_id());
2570 2571      g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2571 2572  
2572 2573      // The do_oop work routines of the keep_alive and drain_marking_stack
2573 2574      // oop closures will set the has_overflown flag if we overflow the
2574 2575      // global marking stack.
2575 2576  
2576 2577      assert(_markStack.overflow() || _markStack.isEmpty(),
2577 2578              "mark stack should be empty (unless it overflowed)");
2578 2579  
2579 2580      if (_markStack.overflow()) {
2580 2581        // This should have been done already when we tried to push an
2581 2582        // entry on to the global mark stack. But let's do it again.
2582 2583        set_has_overflown();
2583 2584      }
2584 2585  
2585 2586      assert(rp->num_q() == active_workers, "why not");
2586 2587  
2587 2588      rp->enqueue_discovered_references(executor);
2588 2589  
2589 2590      rp->verify_no_references_recorded();
2590 2591      assert(!rp->discovery_enabled(), "Post condition");
2591 2592    }
2592 2593  
2593 2594    if (has_overflown()) {
2594 2595      // We can not trust g1_is_alive if the marking stack overflowed

↓ open down ↓

2559 lines elided

↑ open up ↑

2595 2596      return;
2596 2597    }
2597 2598  
2598 2599    assert(_markStack.isEmpty(), "Marking should have completed");
2599 2600  
2600 2601    // Unload Klasses, String, Symbols, Code Cache, etc.
2601 2602    {
2602 2603      G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
2603 2604  
2604 2605      if (ClassUnloadingWithConcurrentMark) {
     2606 +      // Cleaning of klasses depends on correct information from MetadataMarkOnStack. The CodeCache::mark_on_stack
     2607 +      // part is too slow to be done serially, so it is handled during the weakRefsWorkParallelPart phase.
     2608 +      // Defer the cleaning until we have complete on_stack data.
     2609 +      MetadataOnStackMark md_on_stack(false /* Don't visit the code cache at this point */);
     2610 +
2605 2611        bool purged_classes;
2606 2612  
2607 2613        {
2608 2614          G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
2609      -        purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
     2615 +        purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2610 2616        }
2611 2617  
2612 2618        {
2613 2619          G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
2614 2620          weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2615 2621        }
     2622 +
     2623 +      {
     2624 +        G1RemarkGCTraceTime trace("Deallocate Metadata", G1Log::finest());
     2625 +        ClassLoaderDataGraph::free_deallocate_lists();
     2626 +      }
2616 2627      }
2617 2628  
2618 2629      if (G1StringDedup::is_enabled()) {
2619 2630        G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
2620 2631        G1StringDedup::unlink(&g1_is_alive);
2621 2632      }
2622 2633    }
2623 2634  }
2624 2635  
2625 2636  void ConcurrentMark::swapMarkBitMaps() {

2626 2637    CMBitMapRO* temp = _prevMarkBitMap;
2627 2638    _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2628 2639    _nextMarkBitMap  = (CMBitMap*)  temp;
2629 2640  }
2630 2641  
2631 2642  class CMObjectClosure;
2632 2643  
2633 2644  // Closure for iterating over objects, currently only used for
2634 2645  // processing SATB buffers.
2635 2646  class CMObjectClosure : public ObjectClosure {
2636 2647  private:
2637 2648    CMTask* _task;
2638 2649  
2639 2650  public:
2640 2651    void do_object(oop obj) {
2641 2652      _task->deal_with_reference(obj);
2642 2653    }
2643 2654  
2644 2655    CMObjectClosure(CMTask* task) : _task(task) { }
2645 2656  };
2646 2657  
2647 2658  class G1RemarkThreadsClosure : public ThreadClosure {
2648 2659    CMObjectClosure _cm_obj;
2649 2660    G1CMOopClosure _cm_cl;
2650 2661    MarkingCodeBlobClosure _code_cl;
2651 2662    int _thread_parity;
2652 2663    bool _is_par;
2653 2664  
2654 2665   public:
2655 2666    G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
2656 2667      _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2657 2668      _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
2658 2669  
2659 2670    void do_thread(Thread* thread) {
2660 2671      if (thread->is_Java_thread()) {
2661 2672        if (thread->claim_oops_do(_is_par, _thread_parity)) {
2662 2673          JavaThread* jt = (JavaThread*)thread;
2663 2674  
2664 2675          // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2665 2676          // however the liveness of oops reachable from nmethods have very complex lifecycles:
2666 2677          // * Alive if on the stack of an executing method
2667 2678          // * Weakly reachable otherwise
2668 2679          // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2669 2680          // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2670 2681          jt->nmethods_do(&_code_cl);
2671 2682  
2672 2683          jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2673 2684        }
2674 2685      } else if (thread->is_VM_thread()) {
2675 2686        if (thread->claim_oops_do(_is_par, _thread_parity)) {
2676 2687          JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2677 2688        }
2678 2689      }
2679 2690    }
2680 2691  };
2681 2692  
2682 2693  class CMRemarkTask: public AbstractGangTask {
2683 2694  private:
2684 2695    ConcurrentMark* _cm;
2685 2696    bool            _is_serial;
2686 2697  public:
2687 2698    void work(uint worker_id) {
2688 2699      // Since all available tasks are actually started, we should
2689 2700      // only proceed if we're supposed to be actived.
2690 2701      if (worker_id < _cm->active_tasks()) {
2691 2702        CMTask* task = _cm->task(worker_id);
2692 2703        task->record_start_time();
2693 2704        {
2694 2705          ResourceMark rm;
2695 2706          HandleMark hm;
2696 2707  
2697 2708          G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
2698 2709          Threads::threads_do(&threads_f);
2699 2710        }
2700 2711  
2701 2712        do {
2702 2713          task->do_marking_step(1000000000.0 /* something very large */,
2703 2714                                true         /* do_termination       */,
2704 2715                                _is_serial);
2705 2716        } while (task->has_aborted() && !_cm->has_overflown());
2706 2717        // If we overflow, then we do not want to restart. We instead
2707 2718        // want to abort remark and do concurrent marking again.
2708 2719        task->record_end_time();
2709 2720      }
2710 2721    }
2711 2722  
2712 2723    CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2713 2724      AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2714 2725      _cm->terminator()->reset_for_reuse(active_workers);
2715 2726    }
2716 2727  };
2717 2728  
2718 2729  void ConcurrentMark::checkpointRootsFinalWork() {
2719 2730    ResourceMark rm;
2720 2731    HandleMark   hm;
2721 2732    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2722 2733  
2723 2734    G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
2724 2735  
2725 2736    g1h->ensure_parsability(false);
2726 2737  
2727 2738    if (G1CollectedHeap::use_parallel_gc_threads()) {
2728 2739      G1CollectedHeap::StrongRootsScope srs(g1h);
2729 2740      // this is remark, so we'll use up all active threads
2730 2741      uint active_workers = g1h->workers()->active_workers();
2731 2742      if (active_workers == 0) {
2732 2743        assert(active_workers > 0, "Should have been set earlier");
2733 2744        active_workers = (uint) ParallelGCThreads;
2734 2745        g1h->workers()->set_active_workers(active_workers);
2735 2746      }
2736 2747      set_concurrency_and_phase(active_workers, false /* concurrent */);
2737 2748      // Leave _parallel_marking_threads at it's
2738 2749      // value originally calculated in the ConcurrentMark
2739 2750      // constructor and pass values of the active workers
2740 2751      // through the gang in the task.
2741 2752  
2742 2753      CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2743 2754      // We will start all available threads, even if we decide that the
2744 2755      // active_workers will be fewer. The extra ones will just bail out
2745 2756      // immediately.
2746 2757      g1h->set_par_threads(active_workers);
2747 2758      g1h->workers()->run_task(&remarkTask);
2748 2759      g1h->set_par_threads(0);
2749 2760    } else {
2750 2761      G1CollectedHeap::StrongRootsScope srs(g1h);
2751 2762      uint active_workers = 1;
2752 2763      set_concurrency_and_phase(active_workers, false /* concurrent */);
2753 2764  
2754 2765      // Note - if there's no work gang then the VMThread will be
2755 2766      // the thread to execute the remark - serially. We have
2756 2767      // to pass true for the is_serial parameter so that
2757 2768      // CMTask::do_marking_step() doesn't enter the sync
2758 2769      // barriers in the event of an overflow. Doing so will
2759 2770      // cause an assert that the current thread is not a
2760 2771      // concurrent GC thread.
2761 2772      CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2762 2773      remarkTask.work(0);
2763 2774    }
2764 2775    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2765 2776    guarantee(has_overflown() ||
2766 2777              satb_mq_set.completed_buffers_num() == 0,
2767 2778              err_msg("Invariant: has_overflown = %s, num buffers = %d",
2768 2779                      BOOL_TO_STR(has_overflown()),
2769 2780                      satb_mq_set.completed_buffers_num()));
2770 2781  
2771 2782    print_stats();
2772 2783  }
2773 2784  
2774 2785  #ifndef PRODUCT
2775 2786  
2776 2787  class PrintReachableOopClosure: public OopClosure {
2777 2788  private:
2778 2789    G1CollectedHeap* _g1h;
2779 2790    outputStream*    _out;
2780 2791    VerifyOption     _vo;
2781 2792    bool             _all;
2782 2793  
2783 2794  public:
2784 2795    PrintReachableOopClosure(outputStream* out,
2785 2796                             VerifyOption  vo,
2786 2797                             bool          all) :
2787 2798      _g1h(G1CollectedHeap::heap()),
2788 2799      _out(out), _vo(vo), _all(all) { }
2789 2800  
2790 2801    void do_oop(narrowOop* p) { do_oop_work(p); }
2791 2802    void do_oop(      oop* p) { do_oop_work(p); }
2792 2803  
2793 2804    template <class T> void do_oop_work(T* p) {
2794 2805      oop         obj = oopDesc::load_decode_heap_oop(p);
2795 2806      const char* str = NULL;
2796 2807      const char* str2 = "";
2797 2808  
2798 2809      if (obj == NULL) {
2799 2810        str = "";
2800 2811      } else if (!_g1h->is_in_g1_reserved(obj)) {
2801 2812        str = " O";
2802 2813      } else {
2803 2814        HeapRegion* hr  = _g1h->heap_region_containing(obj);
2804 2815        bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2805 2816        bool marked = _g1h->is_marked(obj, _vo);
2806 2817  
2807 2818        if (over_tams) {
2808 2819          str = " >";
2809 2820          if (marked) {
2810 2821            str2 = " AND MARKED";
2811 2822          }
2812 2823        } else if (marked) {
2813 2824          str = " M";
2814 2825        } else {
2815 2826          str = " NOT";
2816 2827        }
2817 2828      }
2818 2829  
2819 2830      _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2820 2831                     p2i(p), p2i((void*) obj), str, str2);
2821 2832    }
2822 2833  };
2823 2834  
2824 2835  class PrintReachableObjectClosure : public ObjectClosure {
2825 2836  private:
2826 2837    G1CollectedHeap* _g1h;
2827 2838    outputStream*    _out;
2828 2839    VerifyOption     _vo;
2829 2840    bool             _all;
2830 2841    HeapRegion*      _hr;
2831 2842  
2832 2843  public:
2833 2844    PrintReachableObjectClosure(outputStream* out,
2834 2845                                VerifyOption  vo,
2835 2846                                bool          all,
2836 2847                                HeapRegion*   hr) :
2837 2848      _g1h(G1CollectedHeap::heap()),
2838 2849      _out(out), _vo(vo), _all(all), _hr(hr) { }
2839 2850  
2840 2851    void do_object(oop o) {
2841 2852      bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2842 2853      bool marked = _g1h->is_marked(o, _vo);
2843 2854      bool print_it = _all || over_tams || marked;
2844 2855  
2845 2856      if (print_it) {
2846 2857        _out->print_cr(" "PTR_FORMAT"%s",
2847 2858                       p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2848 2859        PrintReachableOopClosure oopCl(_out, _vo, _all);
2849 2860        o->oop_iterate_no_header(&oopCl);
2850 2861      }
2851 2862    }
2852 2863  };
2853 2864  
2854 2865  class PrintReachableRegionClosure : public HeapRegionClosure {
2855 2866  private:
2856 2867    G1CollectedHeap* _g1h;
2857 2868    outputStream*    _out;
2858 2869    VerifyOption     _vo;
2859 2870    bool             _all;
2860 2871  
2861 2872  public:
2862 2873    bool doHeapRegion(HeapRegion* hr) {
2863 2874      HeapWord* b = hr->bottom();
2864 2875      HeapWord* e = hr->end();
2865 2876      HeapWord* t = hr->top();
2866 2877      HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2867 2878      _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2868 2879                     "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2869 2880      _out->cr();
2870 2881  
2871 2882      HeapWord* from = b;
2872 2883      HeapWord* to   = t;
2873 2884  
2874 2885      if (to > from) {
2875 2886        _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2876 2887        _out->cr();
2877 2888        PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2878 2889        hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2879 2890        _out->cr();
2880 2891      }
2881 2892  
2882 2893      return false;
2883 2894    }
2884 2895  
2885 2896    PrintReachableRegionClosure(outputStream* out,
2886 2897                                VerifyOption  vo,
2887 2898                                bool          all) :
2888 2899      _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2889 2900  };
2890 2901  
2891 2902  void ConcurrentMark::print_reachable(const char* str,
2892 2903                                       VerifyOption vo,
2893 2904                                       bool all) {
2894 2905    gclog_or_tty->cr();
2895 2906    gclog_or_tty->print_cr("== Doing heap dump... ");
2896 2907  
2897 2908    if (G1PrintReachableBaseFile == NULL) {
2898 2909      gclog_or_tty->print_cr("  #### error: no base file defined");
2899 2910      return;
2900 2911    }
2901 2912  
2902 2913    if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2903 2914        (JVM_MAXPATHLEN - 1)) {
2904 2915      gclog_or_tty->print_cr("  #### error: file name too long");
2905 2916      return;
2906 2917    }
2907 2918  
2908 2919    char file_name[JVM_MAXPATHLEN];
2909 2920    sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2910 2921    gclog_or_tty->print_cr("  dumping to file %s", file_name);
2911 2922  
2912 2923    fileStream fout(file_name);
2913 2924    if (!fout.is_open()) {
2914 2925      gclog_or_tty->print_cr("  #### error: could not open file");
2915 2926      return;
2916 2927    }
2917 2928  
2918 2929    outputStream* out = &fout;
2919 2930    out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2920 2931    out->cr();
2921 2932  
2922 2933    out->print_cr("--- ITERATING OVER REGIONS");
2923 2934    out->cr();
2924 2935    PrintReachableRegionClosure rcl(out, vo, all);
2925 2936    _g1h->heap_region_iterate(&rcl);
2926 2937    out->cr();
2927 2938  
2928 2939    gclog_or_tty->print_cr("  done");
2929 2940    gclog_or_tty->flush();
2930 2941  }
2931 2942  
2932 2943  #endif // PRODUCT
2933 2944  
2934 2945  void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2935 2946    // Note we are overriding the read-only view of the prev map here, via
2936 2947    // the cast.
2937 2948    ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2938 2949  }
2939 2950  
2940 2951  void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2941 2952    _nextMarkBitMap->clearRange(mr);
2942 2953  }
2943 2954  
2944 2955  HeapRegion*
2945 2956  ConcurrentMark::claim_region(uint worker_id) {
2946 2957    // "checkpoint" the finger
2947 2958    HeapWord* finger = _finger;
2948 2959  
2949 2960    // _heap_end will not change underneath our feet; it only changes at
2950 2961    // yield points.
2951 2962    while (finger < _heap_end) {
2952 2963      assert(_g1h->is_in_g1_reserved(finger), "invariant");
2953 2964  
2954 2965      // Note on how this code handles humongous regions. In the
2955 2966      // normal case the finger will reach the start of a "starts
2956 2967      // humongous" (SH) region. Its end will either be the end of the
2957 2968      // last "continues humongous" (CH) region in the sequence, or the
2958 2969      // standard end of the SH region (if the SH is the only region in
2959 2970      // the sequence). That way claim_region() will skip over the CH
2960 2971      // regions. However, there is a subtle race between a CM thread
2961 2972      // executing this method and a mutator thread doing a humongous
2962 2973      // object allocation. The two are not mutually exclusive as the CM
2963 2974      // thread does not need to hold the Heap_lock when it gets
2964 2975      // here. So there is a chance that claim_region() will come across
2965 2976      // a free region that's in the progress of becoming a SH or a CH
2966 2977      // region. In the former case, it will either
2967 2978      //   a) Miss the update to the region's end, in which case it will
2968 2979      //      visit every subsequent CH region, will find their bitmaps
2969 2980      //      empty, and do nothing, or
2970 2981      //   b) Will observe the update of the region's end (in which case
2971 2982      //      it will skip the subsequent CH regions).
2972 2983      // If it comes across a region that suddenly becomes CH, the
2973 2984      // scenario will be similar to b). So, the race between
2974 2985      // claim_region() and a humongous object allocation might force us
2975 2986      // to do a bit of unnecessary work (due to some unnecessary bitmap
2976 2987      // iterations) but it should not introduce and correctness issues.
2977 2988      HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2978 2989  
2979 2990      // Above heap_region_containing_raw may return NULL as we always scan claim
2980 2991      // until the end of the heap. In this case, just jump to the next region.
2981 2992      HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2982 2993  
2983 2994      // Is the gap between reading the finger and doing the CAS too long?
2984 2995      HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2985 2996      if (res == finger && curr_region != NULL) {
2986 2997        // we succeeded
2987 2998        HeapWord*   bottom        = curr_region->bottom();
2988 2999        HeapWord*   limit         = curr_region->next_top_at_mark_start();
2989 3000  
2990 3001        if (verbose_low()) {
2991 3002          gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2992 3003                                 "["PTR_FORMAT", "PTR_FORMAT"), "
2993 3004                                 "limit = "PTR_FORMAT,
2994 3005                                 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2995 3006        }
2996 3007  
2997 3008        // notice that _finger == end cannot be guaranteed here since,
2998 3009        // someone else might have moved the finger even further
2999 3010        assert(_finger >= end, "the finger should have moved forward");
3000 3011  
3001 3012        if (verbose_low()) {
3002 3013          gclog_or_tty->print_cr("[%u] we were successful with region = "
3003 3014                                 PTR_FORMAT, worker_id, p2i(curr_region));
3004 3015        }
3005 3016  
3006 3017        if (limit > bottom) {
3007 3018          if (verbose_low()) {
3008 3019            gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
3009 3020                                   "returning it ", worker_id, p2i(curr_region));
3010 3021          }
3011 3022          return curr_region;
3012 3023        } else {
3013 3024          assert(limit == bottom,
3014 3025                 "the region limit should be at bottom");
3015 3026          if (verbose_low()) {
3016 3027            gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
3017 3028                                   "returning NULL", worker_id, p2i(curr_region));
3018 3029          }
3019 3030          // we return NULL and the caller should try calling
3020 3031          // claim_region() again.
3021 3032          return NULL;
3022 3033        }
3023 3034      } else {
3024 3035        assert(_finger > finger, "the finger should have moved forward");
3025 3036        if (verbose_low()) {
3026 3037          if (curr_region == NULL) {
3027 3038            gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
3028 3039                                   "global finger = "PTR_FORMAT", "
3029 3040                                   "our finger = "PTR_FORMAT,
3030 3041                                   worker_id, p2i(_finger), p2i(finger));
3031 3042          } else {
3032 3043            gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
3033 3044                                   "global finger = "PTR_FORMAT", "
3034 3045                                   "our finger = "PTR_FORMAT,
3035 3046                                   worker_id, p2i(_finger), p2i(finger));
3036 3047          }
3037 3048        }
3038 3049  
3039 3050        // read it again
3040 3051        finger = _finger;
3041 3052      }
3042 3053    }
3043 3054  
3044 3055    return NULL;
3045 3056  }
3046 3057  
3047 3058  #ifndef PRODUCT
3048 3059  enum VerifyNoCSetOopsPhase {
3049 3060    VerifyNoCSetOopsStack,
3050 3061    VerifyNoCSetOopsQueues,
3051 3062    VerifyNoCSetOopsSATBCompleted,
3052 3063    VerifyNoCSetOopsSATBThread
3053 3064  };
3054 3065  
3055 3066  class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
3056 3067  private:
3057 3068    G1CollectedHeap* _g1h;
3058 3069    VerifyNoCSetOopsPhase _phase;
3059 3070    int _info;
3060 3071  
3061 3072    const char* phase_str() {
3062 3073      switch (_phase) {
3063 3074      case VerifyNoCSetOopsStack:         return "Stack";
3064 3075      case VerifyNoCSetOopsQueues:        return "Queue";
3065 3076      case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
3066 3077      case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
3067 3078      default:                            ShouldNotReachHere();
3068 3079      }
3069 3080      return NULL;
3070 3081    }
3071 3082  
3072 3083    void do_object_work(oop obj) {
3073 3084      guarantee(!_g1h->obj_in_cs(obj),
3074 3085                err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
3075 3086                        p2i((void*) obj), phase_str(), _info));
3076 3087    }
3077 3088  
3078 3089  public:
3079 3090    VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3080 3091  
3081 3092    void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3082 3093      _phase = phase;
3083 3094      _info = info;
3084 3095    }
3085 3096  
3086 3097    virtual void do_oop(oop* p) {
3087 3098      oop obj = oopDesc::load_decode_heap_oop(p);
3088 3099      do_object_work(obj);
3089 3100    }
3090 3101  
3091 3102    virtual void do_oop(narrowOop* p) {
3092 3103      // We should not come across narrow oops while scanning marking
3093 3104      // stacks and SATB buffers.
3094 3105      ShouldNotReachHere();
3095 3106    }
3096 3107  
3097 3108    virtual void do_object(oop obj) {
3098 3109      do_object_work(obj);
3099 3110    }
3100 3111  };
3101 3112  
3102 3113  void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3103 3114                                           bool verify_enqueued_buffers,
3104 3115                                           bool verify_thread_buffers,
3105 3116                                           bool verify_fingers) {
3106 3117    assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3107 3118    if (!G1CollectedHeap::heap()->mark_in_progress()) {
3108 3119      return;
3109 3120    }
3110 3121  
3111 3122    VerifyNoCSetOopsClosure cl;
3112 3123  
3113 3124    if (verify_stacks) {
3114 3125      // Verify entries on the global mark stack
3115 3126      cl.set_phase(VerifyNoCSetOopsStack);
3116 3127      _markStack.oops_do(&cl);
3117 3128  
3118 3129      // Verify entries on the task queues
3119 3130      for (uint i = 0; i < _max_worker_id; i += 1) {
3120 3131        cl.set_phase(VerifyNoCSetOopsQueues, i);
3121 3132        CMTaskQueue* queue = _task_queues->queue(i);
3122 3133        queue->oops_do(&cl);
3123 3134      }
3124 3135    }
3125 3136  
3126 3137    SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3127 3138  
3128 3139    // Verify entries on the enqueued SATB buffers
3129 3140    if (verify_enqueued_buffers) {
3130 3141      cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3131 3142      satb_qs.iterate_completed_buffers_read_only(&cl);
3132 3143    }
3133 3144  
3134 3145    // Verify entries on the per-thread SATB buffers
3135 3146    if (verify_thread_buffers) {
3136 3147      cl.set_phase(VerifyNoCSetOopsSATBThread);
3137 3148      satb_qs.iterate_thread_buffers_read_only(&cl);
3138 3149    }
3139 3150  
3140 3151    if (verify_fingers) {
3141 3152      // Verify the global finger
3142 3153      HeapWord* global_finger = finger();
3143 3154      if (global_finger != NULL && global_finger < _heap_end) {
3144 3155        // The global finger always points to a heap region boundary. We
3145 3156        // use heap_region_containing_raw() to get the containing region
3146 3157        // given that the global finger could be pointing to a free region
3147 3158        // which subsequently becomes continues humongous. If that
3148 3159        // happens, heap_region_containing() will return the bottom of the
3149 3160        // corresponding starts humongous region and the check below will
3150 3161        // not hold any more.
3151 3162        // Since we always iterate over all regions, we might get a NULL HeapRegion
3152 3163        // here.
3153 3164        HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3154 3165        guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3155 3166                  err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3156 3167                          p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3157 3168      }
3158 3169  
3159 3170      // Verify the task fingers
3160 3171      assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3161 3172      for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3162 3173        CMTask* task = _tasks[i];
3163 3174        HeapWord* task_finger = task->finger();
3164 3175        if (task_finger != NULL && task_finger < _heap_end) {
3165 3176          // See above note on the global finger verification.
3166 3177          HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3167 3178          guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3168 3179                    !task_hr->in_collection_set(),
3169 3180                    err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3170 3181                            p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3171 3182        }
3172 3183      }
3173 3184    }
3174 3185  }
3175 3186  #endif // PRODUCT
3176 3187  
3177 3188  // Aggregate the counting data that was constructed concurrently
3178 3189  // with marking.
3179 3190  class AggregateCountDataHRClosure: public HeapRegionClosure {
3180 3191    G1CollectedHeap* _g1h;
3181 3192    ConcurrentMark* _cm;
3182 3193    CardTableModRefBS* _ct_bs;
3183 3194    BitMap* _cm_card_bm;
3184 3195    uint _max_worker_id;
3185 3196  
3186 3197   public:
3187 3198    AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3188 3199                                BitMap* cm_card_bm,
3189 3200                                uint max_worker_id) :
3190 3201      _g1h(g1h), _cm(g1h->concurrent_mark()),
3191 3202      _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3192 3203      _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3193 3204  
3194 3205    bool doHeapRegion(HeapRegion* hr) {
3195 3206      if (hr->continuesHumongous()) {
3196 3207        // We will ignore these here and process them when their
3197 3208        // associated "starts humongous" region is processed.
3198 3209        // Note that we cannot rely on their associated
3199 3210        // "starts humongous" region to have their bit set to 1
3200 3211        // since, due to the region chunking in the parallel region
3201 3212        // iteration, a "continues humongous" region might be visited
3202 3213        // before its associated "starts humongous".
3203 3214        return false;
3204 3215      }
3205 3216  
3206 3217      HeapWord* start = hr->bottom();
3207 3218      HeapWord* limit = hr->next_top_at_mark_start();
3208 3219      HeapWord* end = hr->end();
3209 3220  
3210 3221      assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3211 3222             err_msg("Preconditions not met - "
3212 3223                     "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3213 3224                     "top: "PTR_FORMAT", end: "PTR_FORMAT,
3214 3225                     p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3215 3226  
3216 3227      assert(hr->next_marked_bytes() == 0, "Precondition");
3217 3228  
3218 3229      if (start == limit) {
3219 3230        // NTAMS of this region has not been set so nothing to do.
3220 3231        return false;
3221 3232      }
3222 3233  
3223 3234      // 'start' should be in the heap.
3224 3235      assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3225 3236      // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3226 3237      assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3227 3238  
3228 3239      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3229 3240      BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3230 3241      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3231 3242  
3232 3243      // If ntams is not card aligned then we bump card bitmap index
3233 3244      // for limit so that we get the all the cards spanned by
3234 3245      // the object ending at ntams.
3235 3246      // Note: if this is the last region in the heap then ntams
3236 3247      // could be actually just beyond the end of the the heap;
3237 3248      // limit_idx will then  correspond to a (non-existent) card
3238 3249      // that is also outside the heap.
3239 3250      if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3240 3251        limit_idx += 1;
3241 3252      }
3242 3253  
3243 3254      assert(limit_idx <= end_idx, "or else use atomics");
3244 3255  
3245 3256      // Aggregate the "stripe" in the count data associated with hr.
3246 3257      uint hrm_index = hr->hrm_index();
3247 3258      size_t marked_bytes = 0;
3248 3259  
3249 3260      for (uint i = 0; i < _max_worker_id; i += 1) {
3250 3261        size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3251 3262        BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3252 3263  
3253 3264        // Fetch the marked_bytes in this region for task i and
3254 3265        // add it to the running total for this region.
3255 3266        marked_bytes += marked_bytes_array[hrm_index];
3256 3267  
3257 3268        // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3258 3269        // into the global card bitmap.
3259 3270        BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3260 3271  
3261 3272        while (scan_idx < limit_idx) {
3262 3273          assert(task_card_bm->at(scan_idx) == true, "should be");
3263 3274          _cm_card_bm->set_bit(scan_idx);
3264 3275          assert(_cm_card_bm->at(scan_idx) == true, "should be");
3265 3276  
3266 3277          // BitMap::get_next_one_offset() can handle the case when
3267 3278          // its left_offset parameter is greater than its right_offset
3268 3279          // parameter. It does, however, have an early exit if
3269 3280          // left_offset == right_offset. So let's limit the value
3270 3281          // passed in for left offset here.
3271 3282          BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3272 3283          scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3273 3284        }
3274 3285      }
3275 3286  
3276 3287      // Update the marked bytes for this region.
3277 3288      hr->add_to_marked_bytes(marked_bytes);
3278 3289  
3279 3290      // Next heap region
3280 3291      return false;
3281 3292    }
3282 3293  };
3283 3294  
3284 3295  class G1AggregateCountDataTask: public AbstractGangTask {
3285 3296  protected:
3286 3297    G1CollectedHeap* _g1h;
3287 3298    ConcurrentMark* _cm;
3288 3299    BitMap* _cm_card_bm;
3289 3300    uint _max_worker_id;
3290 3301    int _active_workers;
3291 3302  
3292 3303  public:
3293 3304    G1AggregateCountDataTask(G1CollectedHeap* g1h,
3294 3305                             ConcurrentMark* cm,
3295 3306                             BitMap* cm_card_bm,
3296 3307                             uint max_worker_id,
3297 3308                             int n_workers) :
3298 3309      AbstractGangTask("Count Aggregation"),
3299 3310      _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3300 3311      _max_worker_id(max_worker_id),
3301 3312      _active_workers(n_workers) { }
3302 3313  
3303 3314    void work(uint worker_id) {
3304 3315      AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3305 3316  
3306 3317      if (G1CollectedHeap::use_parallel_gc_threads()) {
3307 3318        _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3308 3319                                              _active_workers,
3309 3320                                              HeapRegion::AggregateCountClaimValue);
3310 3321      } else {
3311 3322        _g1h->heap_region_iterate(&cl);
3312 3323      }
3313 3324    }
3314 3325  };
3315 3326  
3316 3327  
3317 3328  void ConcurrentMark::aggregate_count_data() {
3318 3329    int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3319 3330                          _g1h->workers()->active_workers() :
3320 3331                          1);
3321 3332  
3322 3333    G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3323 3334                                             _max_worker_id, n_workers);
3324 3335  
3325 3336    if (G1CollectedHeap::use_parallel_gc_threads()) {
3326 3337      assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3327 3338             "sanity check");
3328 3339      _g1h->set_par_threads(n_workers);
3329 3340      _g1h->workers()->run_task(&g1_par_agg_task);
3330 3341      _g1h->set_par_threads(0);
3331 3342  
3332 3343      assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3333 3344             "sanity check");
3334 3345      _g1h->reset_heap_region_claim_values();
3335 3346    } else {
3336 3347      g1_par_agg_task.work(0);
3337 3348    }
3338 3349    _g1h->allocation_context_stats().update_at_remark();
3339 3350  }
3340 3351  
3341 3352  // Clear the per-worker arrays used to store the per-region counting data
3342 3353  void ConcurrentMark::clear_all_count_data() {
3343 3354    // Clear the global card bitmap - it will be filled during
3344 3355    // liveness count aggregation (during remark) and the
3345 3356    // final counting task.
3346 3357    _card_bm.clear();
3347 3358  
3348 3359    // Clear the global region bitmap - it will be filled as part
3349 3360    // of the final counting task.
3350 3361    _region_bm.clear();
3351 3362  
3352 3363    uint max_regions = _g1h->max_regions();
3353 3364    assert(_max_worker_id > 0, "uninitialized");
3354 3365  
3355 3366    for (uint i = 0; i < _max_worker_id; i += 1) {
3356 3367      BitMap* task_card_bm = count_card_bitmap_for(i);
3357 3368      size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3358 3369  
3359 3370      assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3360 3371      assert(marked_bytes_array != NULL, "uninitialized");
3361 3372  
3362 3373      memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3363 3374      task_card_bm->clear();
3364 3375    }
3365 3376  }
3366 3377  
3367 3378  void ConcurrentMark::print_stats() {
3368 3379    if (verbose_stats()) {
3369 3380      gclog_or_tty->print_cr("---------------------------------------------------------------------");
3370 3381      for (size_t i = 0; i < _active_tasks; ++i) {
3371 3382        _tasks[i]->print_stats();
3372 3383        gclog_or_tty->print_cr("---------------------------------------------------------------------");
3373 3384      }
3374 3385    }
3375 3386  }
3376 3387  
3377 3388  // abandon current marking iteration due to a Full GC
3378 3389  void ConcurrentMark::abort() {
3379 3390    // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3380 3391    // concurrent bitmap clearing.
3381 3392    _nextMarkBitMap->clearAll();
3382 3393  
3383 3394    // Note we cannot clear the previous marking bitmap here
3384 3395    // since VerifyDuringGC verifies the objects marked during
3385 3396    // a full GC against the previous bitmap.
3386 3397  
3387 3398    // Clear the liveness counting data
3388 3399    clear_all_count_data();
3389 3400    // Empty mark stack
3390 3401    reset_marking_state();
3391 3402    for (uint i = 0; i < _max_worker_id; ++i) {
3392 3403      _tasks[i]->clear_region_fields();
3393 3404    }
3394 3405    _first_overflow_barrier_sync.abort();
3395 3406    _second_overflow_barrier_sync.abort();
3396 3407    const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3397 3408    if (!gc_id.is_undefined()) {
3398 3409      // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3399 3410      // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3400 3411      _aborted_gc_id = gc_id;
3401 3412     }
3402 3413    _has_aborted = true;
3403 3414  
3404 3415    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3405 3416    satb_mq_set.abandon_partial_marking();
3406 3417    // This can be called either during or outside marking, we'll read
3407 3418    // the expected_active value from the SATB queue set.
3408 3419    satb_mq_set.set_active_all_threads(
3409 3420                                   false, /* new active value */
3410 3421                                   satb_mq_set.is_active() /* expected_active */);
3411 3422  
3412 3423    _g1h->trace_heap_after_concurrent_cycle();
3413 3424    _g1h->register_concurrent_cycle_end();
3414 3425  }
3415 3426  
3416 3427  const GCId& ConcurrentMark::concurrent_gc_id() {
3417 3428    if (has_aborted()) {
3418 3429      return _aborted_gc_id;
3419 3430    }
3420 3431    return _g1h->gc_tracer_cm()->gc_id();
3421 3432  }
3422 3433  
3423 3434  static void print_ms_time_info(const char* prefix, const char* name,
3424 3435                                 NumberSeq& ns) {
3425 3436    gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3426 3437                           prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3427 3438    if (ns.num() > 0) {
3428 3439      gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3429 3440                             prefix, ns.sd(), ns.maximum());
3430 3441    }
3431 3442  }
3432 3443  
3433 3444  void ConcurrentMark::print_summary_info() {
3434 3445    gclog_or_tty->print_cr(" Concurrent marking:");
3435 3446    print_ms_time_info("  ", "init marks", _init_times);
3436 3447    print_ms_time_info("  ", "remarks", _remark_times);
3437 3448    {
3438 3449      print_ms_time_info("     ", "final marks", _remark_mark_times);
3439 3450      print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3440 3451  
3441 3452    }
3442 3453    print_ms_time_info("  ", "cleanups", _cleanup_times);
3443 3454    gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3444 3455                           _total_counting_time,
3445 3456                           (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3446 3457                            (double)_cleanup_times.num()
3447 3458                           : 0.0));
3448 3459    if (G1ScrubRemSets) {
3449 3460      gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3450 3461                             _total_rs_scrub_time,
3451 3462                             (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3452 3463                              (double)_cleanup_times.num()
3453 3464                             : 0.0));
3454 3465    }
3455 3466    gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3456 3467                           (_init_times.sum() + _remark_times.sum() +
3457 3468                            _cleanup_times.sum())/1000.0);
3458 3469    gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3459 3470                  "(%8.2f s marking).",
3460 3471                  cmThread()->vtime_accum(),
3461 3472                  cmThread()->vtime_mark_accum());
3462 3473  }
3463 3474  
3464 3475  void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3465 3476    if (use_parallel_marking_threads()) {
3466 3477      _parallel_workers->print_worker_threads_on(st);
3467 3478    }
3468 3479  }
3469 3480  
3470 3481  void ConcurrentMark::print_on_error(outputStream* st) const {
3471 3482    st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3472 3483        p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3473 3484    _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3474 3485    _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3475 3486  }
3476 3487  
3477 3488  // We take a break if someone is trying to stop the world.
3478 3489  bool ConcurrentMark::do_yield_check(uint worker_id) {
3479 3490    if (SuspendibleThreadSet::should_yield()) {
3480 3491      if (worker_id == 0) {
3481 3492        _g1h->g1_policy()->record_concurrent_pause();
3482 3493      }
3483 3494      SuspendibleThreadSet::yield();
3484 3495      return true;
3485 3496    } else {
3486 3497      return false;
3487 3498    }
3488 3499  }
3489 3500  
3490 3501  #ifndef PRODUCT
3491 3502  // for debugging purposes
3492 3503  void ConcurrentMark::print_finger() {
3493 3504    gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3494 3505                           p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3495 3506    for (uint i = 0; i < _max_worker_id; ++i) {
3496 3507      gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3497 3508    }
3498 3509    gclog_or_tty->cr();
3499 3510  }
3500 3511  #endif
3501 3512  
3502 3513  void CMTask::scan_object(oop obj) {
3503 3514    assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3504 3515  
3505 3516    if (_cm->verbose_high()) {
3506 3517      gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3507 3518                             _worker_id, p2i((void*) obj));
3508 3519    }
3509 3520  
3510 3521    size_t obj_size = obj->size();
3511 3522    _words_scanned += obj_size;
3512 3523  
3513 3524    obj->oop_iterate(_cm_oop_closure);
3514 3525    statsOnly( ++_objs_scanned );
3515 3526    check_limits();
3516 3527  }
3517 3528  
3518 3529  // Closure for iteration over bitmaps
3519 3530  class CMBitMapClosure : public BitMapClosure {
3520 3531  private:
3521 3532    // the bitmap that is being iterated over
3522 3533    CMBitMap*                   _nextMarkBitMap;
3523 3534    ConcurrentMark*             _cm;
3524 3535    CMTask*                     _task;
3525 3536  
3526 3537  public:
3527 3538    CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3528 3539      _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3529 3540  
3530 3541    bool do_bit(size_t offset) {
3531 3542      HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3532 3543      assert(_nextMarkBitMap->isMarked(addr), "invariant");
3533 3544      assert( addr < _cm->finger(), "invariant");
3534 3545  
3535 3546      statsOnly( _task->increase_objs_found_on_bitmap() );
3536 3547      assert(addr >= _task->finger(), "invariant");
3537 3548  
3538 3549      // We move that task's local finger along.
3539 3550      _task->move_finger_to(addr);
3540 3551  
3541 3552      _task->scan_object(oop(addr));
3542 3553      // we only partially drain the local queue and global stack
3543 3554      _task->drain_local_queue(true);
3544 3555      _task->drain_global_stack(true);
3545 3556  
3546 3557      // if the has_aborted flag has been raised, we need to bail out of
3547 3558      // the iteration
3548 3559      return !_task->has_aborted();
3549 3560    }
3550 3561  };
3551 3562  
3552 3563  G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3553 3564                                 ConcurrentMark* cm,
3554 3565                                 CMTask* task)
3555 3566    : _g1h(g1h), _cm(cm), _task(task) {
3556 3567    assert(_ref_processor == NULL, "should be initialized to NULL");
3557 3568  
3558 3569    if (G1UseConcMarkReferenceProcessing) {
3559 3570      _ref_processor = g1h->ref_processor_cm();
3560 3571      assert(_ref_processor != NULL, "should not be NULL");
3561 3572    }
3562 3573  }
3563 3574  
3564 3575  void CMTask::setup_for_region(HeapRegion* hr) {
3565 3576    assert(hr != NULL,
3566 3577          "claim_region() should have filtered out NULL regions");
3567 3578    assert(!hr->continuesHumongous(),
3568 3579          "claim_region() should have filtered out continues humongous regions");
3569 3580  
3570 3581    if (_cm->verbose_low()) {
3571 3582      gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3572 3583                             _worker_id, p2i(hr));
3573 3584    }
3574 3585  
3575 3586    _curr_region  = hr;
3576 3587    _finger       = hr->bottom();
3577 3588    update_region_limit();
3578 3589  }
3579 3590  
3580 3591  void CMTask::update_region_limit() {
3581 3592    HeapRegion* hr            = _curr_region;
3582 3593    HeapWord* bottom          = hr->bottom();
3583 3594    HeapWord* limit           = hr->next_top_at_mark_start();
3584 3595  
3585 3596    if (limit == bottom) {
3586 3597      if (_cm->verbose_low()) {
3587 3598        gclog_or_tty->print_cr("[%u] found an empty region "
3588 3599                               "["PTR_FORMAT", "PTR_FORMAT")",
3589 3600                               _worker_id, p2i(bottom), p2i(limit));
3590 3601      }
3591 3602      // The region was collected underneath our feet.
3592 3603      // We set the finger to bottom to ensure that the bitmap
3593 3604      // iteration that will follow this will not do anything.
3594 3605      // (this is not a condition that holds when we set the region up,
3595 3606      // as the region is not supposed to be empty in the first place)
3596 3607      _finger = bottom;
3597 3608    } else if (limit >= _region_limit) {
3598 3609      assert(limit >= _finger, "peace of mind");
3599 3610    } else {
3600 3611      assert(limit < _region_limit, "only way to get here");
3601 3612      // This can happen under some pretty unusual circumstances.  An
3602 3613      // evacuation pause empties the region underneath our feet (NTAMS
3603 3614      // at bottom). We then do some allocation in the region (NTAMS
3604 3615      // stays at bottom), followed by the region being used as a GC
3605 3616      // alloc region (NTAMS will move to top() and the objects
3606 3617      // originally below it will be grayed). All objects now marked in
3607 3618      // the region are explicitly grayed, if below the global finger,
3608 3619      // and we do not need in fact to scan anything else. So, we simply
3609 3620      // set _finger to be limit to ensure that the bitmap iteration
3610 3621      // doesn't do anything.
3611 3622      _finger = limit;
3612 3623    }
3613 3624  
3614 3625    _region_limit = limit;
3615 3626  }
3616 3627  
3617 3628  void CMTask::giveup_current_region() {
3618 3629    assert(_curr_region != NULL, "invariant");
3619 3630    if (_cm->verbose_low()) {
3620 3631      gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3621 3632                             _worker_id, p2i(_curr_region));
3622 3633    }
3623 3634    clear_region_fields();
3624 3635  }
3625 3636  
3626 3637  void CMTask::clear_region_fields() {
3627 3638    // Values for these three fields that indicate that we're not
3628 3639    // holding on to a region.
3629 3640    _curr_region   = NULL;
3630 3641    _finger        = NULL;
3631 3642    _region_limit  = NULL;
3632 3643  }
3633 3644  
3634 3645  void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3635 3646    if (cm_oop_closure == NULL) {
3636 3647      assert(_cm_oop_closure != NULL, "invariant");
3637 3648    } else {
3638 3649      assert(_cm_oop_closure == NULL, "invariant");
3639 3650    }
3640 3651    _cm_oop_closure = cm_oop_closure;
3641 3652  }
3642 3653  
3643 3654  void CMTask::reset(CMBitMap* nextMarkBitMap) {
3644 3655    guarantee(nextMarkBitMap != NULL, "invariant");
3645 3656  
3646 3657    if (_cm->verbose_low()) {
3647 3658      gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3648 3659    }
3649 3660  
3650 3661    _nextMarkBitMap                = nextMarkBitMap;
3651 3662    clear_region_fields();
3652 3663  
3653 3664    _calls                         = 0;
3654 3665    _elapsed_time_ms               = 0.0;
3655 3666    _termination_time_ms           = 0.0;
3656 3667    _termination_start_time_ms     = 0.0;
3657 3668  
3658 3669  #if _MARKING_STATS_
3659 3670    _local_pushes                  = 0;
3660 3671    _local_pops                    = 0;
3661 3672    _local_max_size                = 0;
3662 3673    _objs_scanned                  = 0;
3663 3674    _global_pushes                 = 0;
3664 3675    _global_pops                   = 0;
3665 3676    _global_max_size               = 0;
3666 3677    _global_transfers_to           = 0;
3667 3678    _global_transfers_from         = 0;
3668 3679    _regions_claimed               = 0;
3669 3680    _objs_found_on_bitmap          = 0;
3670 3681    _satb_buffers_processed        = 0;
3671 3682    _steal_attempts                = 0;
3672 3683    _steals                        = 0;
3673 3684    _aborted                       = 0;
3674 3685    _aborted_overflow              = 0;
3675 3686    _aborted_cm_aborted            = 0;
3676 3687    _aborted_yield                 = 0;
3677 3688    _aborted_timed_out             = 0;
3678 3689    _aborted_satb                  = 0;
3679 3690    _aborted_termination           = 0;
3680 3691  #endif // _MARKING_STATS_
3681 3692  }
3682 3693  
3683 3694  bool CMTask::should_exit_termination() {
3684 3695    regular_clock_call();
3685 3696    // This is called when we are in the termination protocol. We should
3686 3697    // quit if, for some reason, this task wants to abort or the global
3687 3698    // stack is not empty (this means that we can get work from it).
3688 3699    return !_cm->mark_stack_empty() || has_aborted();
3689 3700  }
3690 3701  
3691 3702  void CMTask::reached_limit() {
3692 3703    assert(_words_scanned >= _words_scanned_limit ||
3693 3704           _refs_reached >= _refs_reached_limit ,
3694 3705           "shouldn't have been called otherwise");
3695 3706    regular_clock_call();
3696 3707  }
3697 3708  
3698 3709  void CMTask::regular_clock_call() {
3699 3710    if (has_aborted()) return;
3700 3711  
3701 3712    // First, we need to recalculate the words scanned and refs reached
3702 3713    // limits for the next clock call.
3703 3714    recalculate_limits();
3704 3715  
3705 3716    // During the regular clock call we do the following
3706 3717  
3707 3718    // (1) If an overflow has been flagged, then we abort.
3708 3719    if (_cm->has_overflown()) {
3709 3720      set_has_aborted();
3710 3721      return;
3711 3722    }
3712 3723  
3713 3724    // If we are not concurrent (i.e. we're doing remark) we don't need
3714 3725    // to check anything else. The other steps are only needed during
3715 3726    // the concurrent marking phase.
3716 3727    if (!concurrent()) return;
3717 3728  
3718 3729    // (2) If marking has been aborted for Full GC, then we also abort.
3719 3730    if (_cm->has_aborted()) {
3720 3731      set_has_aborted();
3721 3732      statsOnly( ++_aborted_cm_aborted );
3722 3733      return;
3723 3734    }
3724 3735  
3725 3736    double curr_time_ms = os::elapsedVTime() * 1000.0;
3726 3737  
3727 3738    // (3) If marking stats are enabled, then we update the step history.
3728 3739  #if _MARKING_STATS_
3729 3740    if (_words_scanned >= _words_scanned_limit) {
3730 3741      ++_clock_due_to_scanning;
3731 3742    }
3732 3743    if (_refs_reached >= _refs_reached_limit) {
3733 3744      ++_clock_due_to_marking;
3734 3745    }
3735 3746  
3736 3747    double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3737 3748    _interval_start_time_ms = curr_time_ms;
3738 3749    _all_clock_intervals_ms.add(last_interval_ms);
3739 3750  
3740 3751    if (_cm->verbose_medium()) {
3741 3752        gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3742 3753                          "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3743 3754                          _worker_id, last_interval_ms,
3744 3755                          _words_scanned,
3745 3756                          (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3746 3757                          _refs_reached,
3747 3758                          (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3748 3759    }
3749 3760  #endif // _MARKING_STATS_
3750 3761  
3751 3762    // (4) We check whether we should yield. If we have to, then we abort.
3752 3763    if (SuspendibleThreadSet::should_yield()) {
3753 3764      // We should yield. To do this we abort the task. The caller is
3754 3765      // responsible for yielding.
3755 3766      set_has_aborted();
3756 3767      statsOnly( ++_aborted_yield );
3757 3768      return;
3758 3769    }
3759 3770  
3760 3771    // (5) We check whether we've reached our time quota. If we have,
3761 3772    // then we abort.
3762 3773    double elapsed_time_ms = curr_time_ms - _start_time_ms;
3763 3774    if (elapsed_time_ms > _time_target_ms) {
3764 3775      set_has_aborted();
3765 3776      _has_timed_out = true;
3766 3777      statsOnly( ++_aborted_timed_out );
3767 3778      return;
3768 3779    }
3769 3780  
3770 3781    // (6) Finally, we check whether there are enough completed STAB
3771 3782    // buffers available for processing. If there are, we abort.
3772 3783    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3773 3784    if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3774 3785      if (_cm->verbose_low()) {
3775 3786        gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3776 3787                               _worker_id);
3777 3788      }
3778 3789      // we do need to process SATB buffers, we'll abort and restart
3779 3790      // the marking task to do so
3780 3791      set_has_aborted();
3781 3792      statsOnly( ++_aborted_satb );
3782 3793      return;
3783 3794    }
3784 3795  }
3785 3796  
3786 3797  void CMTask::recalculate_limits() {
3787 3798    _real_words_scanned_limit = _words_scanned + words_scanned_period;
3788 3799    _words_scanned_limit      = _real_words_scanned_limit;
3789 3800  
3790 3801    _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3791 3802    _refs_reached_limit       = _real_refs_reached_limit;
3792 3803  }
3793 3804  
3794 3805  void CMTask::decrease_limits() {
3795 3806    // This is called when we believe that we're going to do an infrequent
3796 3807    // operation which will increase the per byte scanned cost (i.e. move
3797 3808    // entries to/from the global stack). It basically tries to decrease the
3798 3809    // scanning limit so that the clock is called earlier.
3799 3810  
3800 3811    if (_cm->verbose_medium()) {
3801 3812      gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3802 3813    }
3803 3814  
3804 3815    _words_scanned_limit = _real_words_scanned_limit -
3805 3816      3 * words_scanned_period / 4;
3806 3817    _refs_reached_limit  = _real_refs_reached_limit -
3807 3818      3 * refs_reached_period / 4;
3808 3819  }
3809 3820  
3810 3821  void CMTask::move_entries_to_global_stack() {
3811 3822    // local array where we'll store the entries that will be popped
3812 3823    // from the local queue
3813 3824    oop buffer[global_stack_transfer_size];
3814 3825  
3815 3826    int n = 0;
3816 3827    oop obj;
3817 3828    while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3818 3829      buffer[n] = obj;
3819 3830      ++n;
3820 3831    }
3821 3832  
3822 3833    if (n > 0) {
3823 3834      // we popped at least one entry from the local queue
3824 3835  
3825 3836      statsOnly( ++_global_transfers_to; _local_pops += n );
3826 3837  
3827 3838      if (!_cm->mark_stack_push(buffer, n)) {
3828 3839        if (_cm->verbose_low()) {
3829 3840          gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3830 3841                                 _worker_id);
3831 3842        }
3832 3843        set_has_aborted();
3833 3844      } else {
3834 3845        // the transfer was successful
3835 3846  
3836 3847        if (_cm->verbose_medium()) {
3837 3848          gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3838 3849                                 _worker_id, n);
3839 3850        }
3840 3851        statsOnly( int tmp_size = _cm->mark_stack_size();
3841 3852                   if (tmp_size > _global_max_size) {
3842 3853                     _global_max_size = tmp_size;
3843 3854                   }
3844 3855                   _global_pushes += n );
3845 3856      }
3846 3857    }
3847 3858  
3848 3859    // this operation was quite expensive, so decrease the limits
3849 3860    decrease_limits();
3850 3861  }
3851 3862  
3852 3863  void CMTask::get_entries_from_global_stack() {
3853 3864    // local array where we'll store the entries that will be popped
3854 3865    // from the global stack.
3855 3866    oop buffer[global_stack_transfer_size];
3856 3867    int n;
3857 3868    _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3858 3869    assert(n <= global_stack_transfer_size,
3859 3870           "we should not pop more than the given limit");
3860 3871    if (n > 0) {
3861 3872      // yes, we did actually pop at least one entry
3862 3873  
3863 3874      statsOnly( ++_global_transfers_from; _global_pops += n );
3864 3875      if (_cm->verbose_medium()) {
3865 3876        gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3866 3877                               _worker_id, n);
3867 3878      }
3868 3879      for (int i = 0; i < n; ++i) {
3869 3880        bool success = _task_queue->push(buffer[i]);
3870 3881        // We only call this when the local queue is empty or under a
3871 3882        // given target limit. So, we do not expect this push to fail.
3872 3883        assert(success, "invariant");
3873 3884      }
3874 3885  
3875 3886      statsOnly( int tmp_size = _task_queue->size();
3876 3887                 if (tmp_size > _local_max_size) {
3877 3888                   _local_max_size = tmp_size;
3878 3889                 }
3879 3890                 _local_pushes += n );
3880 3891    }
3881 3892  
3882 3893    // this operation was quite expensive, so decrease the limits
3883 3894    decrease_limits();
3884 3895  }
3885 3896  
3886 3897  void CMTask::drain_local_queue(bool partially) {
3887 3898    if (has_aborted()) return;
3888 3899  
3889 3900    // Decide what the target size is, depending whether we're going to
3890 3901    // drain it partially (so that other tasks can steal if they run out
3891 3902    // of things to do) or totally (at the very end).
3892 3903    size_t target_size;
3893 3904    if (partially) {
3894 3905      target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3895 3906    } else {
3896 3907      target_size = 0;
3897 3908    }
3898 3909  
3899 3910    if (_task_queue->size() > target_size) {
3900 3911      if (_cm->verbose_high()) {
3901 3912        gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3902 3913                               _worker_id, target_size);
3903 3914      }
3904 3915  
3905 3916      oop obj;
3906 3917      bool ret = _task_queue->pop_local(obj);
3907 3918      while (ret) {
3908 3919        statsOnly( ++_local_pops );
3909 3920  
3910 3921        if (_cm->verbose_high()) {
3911 3922          gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3912 3923                                 p2i((void*) obj));
3913 3924        }
3914 3925  
3915 3926        assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3916 3927        assert(!_g1h->is_on_master_free_list(
3917 3928                    _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3918 3929  
3919 3930        scan_object(obj);
3920 3931  
3921 3932        if (_task_queue->size() <= target_size || has_aborted()) {
3922 3933          ret = false;
3923 3934        } else {
3924 3935          ret = _task_queue->pop_local(obj);
3925 3936        }
3926 3937      }
3927 3938  
3928 3939      if (_cm->verbose_high()) {
3929 3940        gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3930 3941                               _worker_id, _task_queue->size());
3931 3942      }
3932 3943    }
3933 3944  }
3934 3945  
3935 3946  void CMTask::drain_global_stack(bool partially) {
3936 3947    if (has_aborted()) return;
3937 3948  
3938 3949    // We have a policy to drain the local queue before we attempt to
3939 3950    // drain the global stack.
3940 3951    assert(partially || _task_queue->size() == 0, "invariant");
3941 3952  
3942 3953    // Decide what the target size is, depending whether we're going to
3943 3954    // drain it partially (so that other tasks can steal if they run out
3944 3955    // of things to do) or totally (at the very end).  Notice that,
3945 3956    // because we move entries from the global stack in chunks or
3946 3957    // because another task might be doing the same, we might in fact
3947 3958    // drop below the target. But, this is not a problem.
3948 3959    size_t target_size;
3949 3960    if (partially) {
3950 3961      target_size = _cm->partial_mark_stack_size_target();
3951 3962    } else {
3952 3963      target_size = 0;
3953 3964    }
3954 3965  
3955 3966    if (_cm->mark_stack_size() > target_size) {
3956 3967      if (_cm->verbose_low()) {
3957 3968        gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3958 3969                               _worker_id, target_size);
3959 3970      }
3960 3971  
3961 3972      while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3962 3973        get_entries_from_global_stack();
3963 3974        drain_local_queue(partially);
3964 3975      }
3965 3976  
3966 3977      if (_cm->verbose_low()) {
3967 3978        gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3968 3979                               _worker_id, _cm->mark_stack_size());
3969 3980      }
3970 3981    }
3971 3982  }
3972 3983  
3973 3984  // SATB Queue has several assumptions on whether to call the par or
3974 3985  // non-par versions of the methods. this is why some of the code is
3975 3986  // replicated. We should really get rid of the single-threaded version
3976 3987  // of the code to simplify things.
3977 3988  void CMTask::drain_satb_buffers() {
3978 3989    if (has_aborted()) return;
3979 3990  
3980 3991    // We set this so that the regular clock knows that we're in the
3981 3992    // middle of draining buffers and doesn't set the abort flag when it
3982 3993    // notices that SATB buffers are available for draining. It'd be
3983 3994    // very counter productive if it did that. :-)
3984 3995    _draining_satb_buffers = true;
3985 3996  
3986 3997    CMObjectClosure oc(this);
3987 3998    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3988 3999    if (G1CollectedHeap::use_parallel_gc_threads()) {
3989 4000      satb_mq_set.set_par_closure(_worker_id, &oc);
3990 4001    } else {
3991 4002      satb_mq_set.set_closure(&oc);
3992 4003    }
3993 4004  
3994 4005    // This keeps claiming and applying the closure to completed buffers
3995 4006    // until we run out of buffers or we need to abort.
3996 4007    if (G1CollectedHeap::use_parallel_gc_threads()) {
3997 4008      while (!has_aborted() &&
3998 4009             satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3999 4010        if (_cm->verbose_medium()) {
4000 4011          gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4001 4012        }
4002 4013        statsOnly( ++_satb_buffers_processed );
4003 4014        regular_clock_call();
4004 4015      }
4005 4016    } else {
4006 4017      while (!has_aborted() &&
4007 4018             satb_mq_set.apply_closure_to_completed_buffer()) {
4008 4019        if (_cm->verbose_medium()) {
4009 4020          gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4010 4021        }
4011 4022        statsOnly( ++_satb_buffers_processed );
4012 4023        regular_clock_call();
4013 4024      }
4014 4025    }
4015 4026  
4016 4027    _draining_satb_buffers = false;
4017 4028  
4018 4029    assert(has_aborted() ||
4019 4030           concurrent() ||
4020 4031           satb_mq_set.completed_buffers_num() == 0, "invariant");
4021 4032  
4022 4033    if (G1CollectedHeap::use_parallel_gc_threads()) {
4023 4034      satb_mq_set.set_par_closure(_worker_id, NULL);
4024 4035    } else {
4025 4036      satb_mq_set.set_closure(NULL);
4026 4037    }
4027 4038  
4028 4039    // again, this was a potentially expensive operation, decrease the
4029 4040    // limits to get the regular clock call early
4030 4041    decrease_limits();
4031 4042  }
4032 4043  
4033 4044  void CMTask::print_stats() {
4034 4045    gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
4035 4046                           _worker_id, _calls);
4036 4047    gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4037 4048                           _elapsed_time_ms, _termination_time_ms);
4038 4049    gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4039 4050                           _step_times_ms.num(), _step_times_ms.avg(),
4040 4051                           _step_times_ms.sd());
4041 4052    gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4042 4053                           _step_times_ms.maximum(), _step_times_ms.sum());
4043 4054  
4044 4055  #if _MARKING_STATS_
4045 4056    gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4046 4057                           _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4047 4058                           _all_clock_intervals_ms.sd());
4048 4059    gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4049 4060                           _all_clock_intervals_ms.maximum(),
4050 4061                           _all_clock_intervals_ms.sum());
4051 4062    gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4052 4063                           _clock_due_to_scanning, _clock_due_to_marking);
4053 4064    gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4054 4065                           _objs_scanned, _objs_found_on_bitmap);
4055 4066    gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4056 4067                           _local_pushes, _local_pops, _local_max_size);
4057 4068    gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4058 4069                           _global_pushes, _global_pops, _global_max_size);
4059 4070    gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4060 4071                           _global_transfers_to,_global_transfers_from);
4061 4072    gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
4062 4073    gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4063 4074    gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4064 4075                           _steal_attempts, _steals);
4065 4076    gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4066 4077    gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4067 4078                           _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4068 4079    gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4069 4080                           _aborted_timed_out, _aborted_satb, _aborted_termination);
4070 4081  #endif // _MARKING_STATS_
4071 4082  }
4072 4083  
4073 4084  /*****************************************************************************
4074 4085  
4075 4086      The do_marking_step(time_target_ms, ...) method is the building
4076 4087      block of the parallel marking framework. It can be called in parallel
4077 4088      with other invocations of do_marking_step() on different tasks
4078 4089      (but only one per task, obviously) and concurrently with the
4079 4090      mutator threads, or during remark, hence it eliminates the need
4080 4091      for two versions of the code. When called during remark, it will
4081 4092      pick up from where the task left off during the concurrent marking
4082 4093      phase. Interestingly, tasks are also claimable during evacuation
4083 4094      pauses too, since do_marking_step() ensures that it aborts before
4084 4095      it needs to yield.
4085 4096  
4086 4097      The data structures that it uses to do marking work are the
4087 4098      following:
4088 4099  
4089 4100        (1) Marking Bitmap. If there are gray objects that appear only
4090 4101        on the bitmap (this happens either when dealing with an overflow
4091 4102        or when the initial marking phase has simply marked the roots
4092 4103        and didn't push them on the stack), then tasks claim heap
4093 4104        regions whose bitmap they then scan to find gray objects. A
4094 4105        global finger indicates where the end of the last claimed region
4095 4106        is. A local finger indicates how far into the region a task has
4096 4107        scanned. The two fingers are used to determine how to gray an
4097 4108        object (i.e. whether simply marking it is OK, as it will be
4098 4109        visited by a task in the future, or whether it needs to be also
4099 4110        pushed on a stack).
4100 4111  
4101 4112        (2) Local Queue. The local queue of the task which is accessed
4102 4113        reasonably efficiently by the task. Other tasks can steal from
4103 4114        it when they run out of work. Throughout the marking phase, a
4104 4115        task attempts to keep its local queue short but not totally
4105 4116        empty, so that entries are available for stealing by other
4106 4117        tasks. Only when there is no more work, a task will totally
4107 4118        drain its local queue.
4108 4119  
4109 4120        (3) Global Mark Stack. This handles local queue overflow. During
4110 4121        marking only sets of entries are moved between it and the local
4111 4122        queues, as access to it requires a mutex and more fine-grain
4112 4123        interaction with it which might cause contention. If it
4113 4124        overflows, then the marking phase should restart and iterate
4114 4125        over the bitmap to identify gray objects. Throughout the marking
4115 4126        phase, tasks attempt to keep the global mark stack at a small
4116 4127        length but not totally empty, so that entries are available for
4117 4128        popping by other tasks. Only when there is no more work, tasks
4118 4129        will totally drain the global mark stack.
4119 4130  
4120 4131        (4) SATB Buffer Queue. This is where completed SATB buffers are
4121 4132        made available. Buffers are regularly removed from this queue
4122 4133        and scanned for roots, so that the queue doesn't get too
4123 4134        long. During remark, all completed buffers are processed, as
4124 4135        well as the filled in parts of any uncompleted buffers.
4125 4136  
4126 4137      The do_marking_step() method tries to abort when the time target
4127 4138      has been reached. There are a few other cases when the
4128 4139      do_marking_step() method also aborts:
4129 4140  
4130 4141        (1) When the marking phase has been aborted (after a Full GC).
4131 4142  
4132 4143        (2) When a global overflow (on the global stack) has been
4133 4144        triggered. Before the task aborts, it will actually sync up with
4134 4145        the other tasks to ensure that all the marking data structures
4135 4146        (local queues, stacks, fingers etc.)  are re-initialized so that
4136 4147        when do_marking_step() completes, the marking phase can
4137 4148        immediately restart.
4138 4149  
4139 4150        (3) When enough completed SATB buffers are available. The
4140 4151        do_marking_step() method only tries to drain SATB buffers right
4141 4152        at the beginning. So, if enough buffers are available, the
4142 4153        marking step aborts and the SATB buffers are processed at
4143 4154        the beginning of the next invocation.
4144 4155  
4145 4156        (4) To yield. when we have to yield then we abort and yield
4146 4157        right at the end of do_marking_step(). This saves us from a lot
4147 4158        of hassle as, by yielding we might allow a Full GC. If this
4148 4159        happens then objects will be compacted underneath our feet, the
4149 4160        heap might shrink, etc. We save checking for this by just
4150 4161        aborting and doing the yield right at the end.
4151 4162  
4152 4163      From the above it follows that the do_marking_step() method should
4153 4164      be called in a loop (or, otherwise, regularly) until it completes.
4154 4165  
4155 4166      If a marking step completes without its has_aborted() flag being
4156 4167      true, it means it has completed the current marking phase (and
4157 4168      also all other marking tasks have done so and have all synced up).
4158 4169  
4159 4170      A method called regular_clock_call() is invoked "regularly" (in
4160 4171      sub ms intervals) throughout marking. It is this clock method that
4161 4172      checks all the abort conditions which were mentioned above and
4162 4173      decides when the task should abort. A work-based scheme is used to
4163 4174      trigger this clock method: when the number of object words the
4164 4175      marking phase has scanned or the number of references the marking
4165 4176      phase has visited reach a given limit. Additional invocations to
4166 4177      the method clock have been planted in a few other strategic places
4167 4178      too. The initial reason for the clock method was to avoid calling
4168 4179      vtime too regularly, as it is quite expensive. So, once it was in
4169 4180      place, it was natural to piggy-back all the other conditions on it
4170 4181      too and not constantly check them throughout the code.
4171 4182  
4172 4183      If do_termination is true then do_marking_step will enter its
4173 4184      termination protocol.
4174 4185  
4175 4186      The value of is_serial must be true when do_marking_step is being
4176 4187      called serially (i.e. by the VMThread) and do_marking_step should
4177 4188      skip any synchronization in the termination and overflow code.
4178 4189      Examples include the serial remark code and the serial reference
4179 4190      processing closures.
4180 4191  
4181 4192      The value of is_serial must be false when do_marking_step is
4182 4193      being called by any of the worker threads in a work gang.
4183 4194      Examples include the concurrent marking code (CMMarkingTask),
4184 4195      the MT remark code, and the MT reference processing closures.
4185 4196  
4186 4197   *****************************************************************************/
4187 4198  
4188 4199  void CMTask::do_marking_step(double time_target_ms,
4189 4200                               bool do_termination,
4190 4201                               bool is_serial) {
4191 4202    assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4192 4203    assert(concurrent() == _cm->concurrent(), "they should be the same");
4193 4204  
4194 4205    G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4195 4206    assert(_task_queues != NULL, "invariant");
4196 4207    assert(_task_queue != NULL, "invariant");
4197 4208    assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4198 4209  
4199 4210    assert(!_claimed,
4200 4211           "only one thread should claim this task at any one time");
4201 4212  
4202 4213    // OK, this doesn't safeguard again all possible scenarios, as it is
4203 4214    // possible for two threads to set the _claimed flag at the same
4204 4215    // time. But it is only for debugging purposes anyway and it will
4205 4216    // catch most problems.
4206 4217    _claimed = true;
4207 4218  
4208 4219    _start_time_ms = os::elapsedVTime() * 1000.0;
4209 4220    statsOnly( _interval_start_time_ms = _start_time_ms );
4210 4221  
4211 4222    // If do_stealing is true then do_marking_step will attempt to
4212 4223    // steal work from the other CMTasks. It only makes sense to
4213 4224    // enable stealing when the termination protocol is enabled
4214 4225    // and do_marking_step() is not being called serially.
4215 4226    bool do_stealing = do_termination && !is_serial;
4216 4227  
4217 4228    double diff_prediction_ms =
4218 4229      g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4219 4230    _time_target_ms = time_target_ms - diff_prediction_ms;
4220 4231  
4221 4232    // set up the variables that are used in the work-based scheme to
4222 4233    // call the regular clock method
4223 4234    _words_scanned = 0;
4224 4235    _refs_reached  = 0;
4225 4236    recalculate_limits();
4226 4237  
4227 4238    // clear all flags
4228 4239    clear_has_aborted();
4229 4240    _has_timed_out = false;
4230 4241    _draining_satb_buffers = false;
4231 4242  
4232 4243    ++_calls;
4233 4244  
4234 4245    if (_cm->verbose_low()) {
4235 4246      gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4236 4247                             "target = %1.2lfms >>>>>>>>>>",
4237 4248                             _worker_id, _calls, _time_target_ms);
4238 4249    }
4239 4250  
4240 4251    // Set up the bitmap and oop closures. Anything that uses them is
4241 4252    // eventually called from this method, so it is OK to allocate these
4242 4253    // statically.
4243 4254    CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4244 4255    G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4245 4256    set_cm_oop_closure(&cm_oop_closure);
4246 4257  
4247 4258    if (_cm->has_overflown()) {
4248 4259      // This can happen if the mark stack overflows during a GC pause
4249 4260      // and this task, after a yield point, restarts. We have to abort
4250 4261      // as we need to get into the overflow protocol which happens
4251 4262      // right at the end of this task.
4252 4263      set_has_aborted();
4253 4264    }
4254 4265  
4255 4266    // First drain any available SATB buffers. After this, we will not
4256 4267    // look at SATB buffers before the next invocation of this method.
4257 4268    // If enough completed SATB buffers are queued up, the regular clock
4258 4269    // will abort this task so that it restarts.
4259 4270    drain_satb_buffers();
4260 4271    // ...then partially drain the local queue and the global stack
4261 4272    drain_local_queue(true);
4262 4273    drain_global_stack(true);
4263 4274  
4264 4275    do {
4265 4276      if (!has_aborted() && _curr_region != NULL) {
4266 4277        // This means that we're already holding on to a region.
4267 4278        assert(_finger != NULL, "if region is not NULL, then the finger "
4268 4279               "should not be NULL either");
4269 4280  
4270 4281        // We might have restarted this task after an evacuation pause
4271 4282        // which might have evacuated the region we're holding on to
4272 4283        // underneath our feet. Let's read its limit again to make sure
4273 4284        // that we do not iterate over a region of the heap that
4274 4285        // contains garbage (update_region_limit() will also move
4275 4286        // _finger to the start of the region if it is found empty).
4276 4287        update_region_limit();
4277 4288        // We will start from _finger not from the start of the region,
4278 4289        // as we might be restarting this task after aborting half-way
4279 4290        // through scanning this region. In this case, _finger points to
4280 4291        // the address where we last found a marked object. If this is a
4281 4292        // fresh region, _finger points to start().
4282 4293        MemRegion mr = MemRegion(_finger, _region_limit);
4283 4294  
4284 4295        if (_cm->verbose_low()) {
4285 4296          gclog_or_tty->print_cr("[%u] we're scanning part "
4286 4297                                 "["PTR_FORMAT", "PTR_FORMAT") "
4287 4298                                 "of region "HR_FORMAT,
4288 4299                                 _worker_id, p2i(_finger), p2i(_region_limit),
4289 4300                                 HR_FORMAT_PARAMS(_curr_region));
4290 4301        }
4291 4302  
4292 4303        assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4293 4304               "humongous regions should go around loop once only");
4294 4305  
4295 4306        // Some special cases:
4296 4307        // If the memory region is empty, we can just give up the region.
4297 4308        // If the current region is humongous then we only need to check
4298 4309        // the bitmap for the bit associated with the start of the object,
4299 4310        // scan the object if it's live, and give up the region.
4300 4311        // Otherwise, let's iterate over the bitmap of the part of the region
4301 4312        // that is left.
4302 4313        // If the iteration is successful, give up the region.
4303 4314        if (mr.is_empty()) {
4304 4315          giveup_current_region();
4305 4316          regular_clock_call();
4306 4317        } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4307 4318          if (_nextMarkBitMap->isMarked(mr.start())) {
4308 4319            // The object is marked - apply the closure
4309 4320            BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4310 4321            bitmap_closure.do_bit(offset);
4311 4322          }
4312 4323          // Even if this task aborted while scanning the humongous object
4313 4324          // we can (and should) give up the current region.
4314 4325          giveup_current_region();
4315 4326          regular_clock_call();
4316 4327        } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4317 4328          giveup_current_region();
4318 4329          regular_clock_call();
4319 4330        } else {
4320 4331          assert(has_aborted(), "currently the only way to do so");
4321 4332          // The only way to abort the bitmap iteration is to return
4322 4333          // false from the do_bit() method. However, inside the
4323 4334          // do_bit() method we move the _finger to point to the
4324 4335          // object currently being looked at. So, if we bail out, we
4325 4336          // have definitely set _finger to something non-null.
4326 4337          assert(_finger != NULL, "invariant");
4327 4338  
4328 4339          // Region iteration was actually aborted. So now _finger
4329 4340          // points to the address of the object we last scanned. If we
4330 4341          // leave it there, when we restart this task, we will rescan
4331 4342          // the object. It is easy to avoid this. We move the finger by
4332 4343          // enough to point to the next possible object header (the
4333 4344          // bitmap knows by how much we need to move it as it knows its
4334 4345          // granularity).
4335 4346          assert(_finger < _region_limit, "invariant");
4336 4347          HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4337 4348          // Check if bitmap iteration was aborted while scanning the last object
4338 4349          if (new_finger >= _region_limit) {
4339 4350            giveup_current_region();
4340 4351          } else {
4341 4352            move_finger_to(new_finger);
4342 4353          }
4343 4354        }
4344 4355      }
4345 4356      // At this point we have either completed iterating over the
4346 4357      // region we were holding on to, or we have aborted.
4347 4358  
4348 4359      // We then partially drain the local queue and the global stack.
4349 4360      // (Do we really need this?)
4350 4361      drain_local_queue(true);
4351 4362      drain_global_stack(true);
4352 4363  
4353 4364      // Read the note on the claim_region() method on why it might
4354 4365      // return NULL with potentially more regions available for
4355 4366      // claiming and why we have to check out_of_regions() to determine
4356 4367      // whether we're done or not.
4357 4368      while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4358 4369        // We are going to try to claim a new region. We should have
4359 4370        // given up on the previous one.
4360 4371        // Separated the asserts so that we know which one fires.
4361 4372        assert(_curr_region  == NULL, "invariant");
4362 4373        assert(_finger       == NULL, "invariant");
4363 4374        assert(_region_limit == NULL, "invariant");
4364 4375        if (_cm->verbose_low()) {
4365 4376          gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4366 4377        }
4367 4378        HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4368 4379        if (claimed_region != NULL) {
4369 4380          // Yes, we managed to claim one
4370 4381          statsOnly( ++_regions_claimed );
4371 4382  
4372 4383          if (_cm->verbose_low()) {
4373 4384            gclog_or_tty->print_cr("[%u] we successfully claimed "
4374 4385                                   "region "PTR_FORMAT,
4375 4386                                   _worker_id, p2i(claimed_region));
4376 4387          }
4377 4388  
4378 4389          setup_for_region(claimed_region);
4379 4390          assert(_curr_region == claimed_region, "invariant");
4380 4391        }
4381 4392        // It is important to call the regular clock here. It might take
4382 4393        // a while to claim a region if, for example, we hit a large
4383 4394        // block of empty regions. So we need to call the regular clock
4384 4395        // method once round the loop to make sure it's called
4385 4396        // frequently enough.
4386 4397        regular_clock_call();
4387 4398      }
4388 4399  
4389 4400      if (!has_aborted() && _curr_region == NULL) {
4390 4401        assert(_cm->out_of_regions(),
4391 4402               "at this point we should be out of regions");
4392 4403      }
4393 4404    } while ( _curr_region != NULL && !has_aborted());
4394 4405  
4395 4406    if (!has_aborted()) {
4396 4407      // We cannot check whether the global stack is empty, since other
4397 4408      // tasks might be pushing objects to it concurrently.
4398 4409      assert(_cm->out_of_regions(),
4399 4410             "at this point we should be out of regions");
4400 4411  
4401 4412      if (_cm->verbose_low()) {
4402 4413        gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4403 4414      }
4404 4415  
4405 4416      // Try to reduce the number of available SATB buffers so that
4406 4417      // remark has less work to do.
4407 4418      drain_satb_buffers();
4408 4419    }
4409 4420  
4410 4421    // Since we've done everything else, we can now totally drain the
4411 4422    // local queue and global stack.
4412 4423    drain_local_queue(false);
4413 4424    drain_global_stack(false);
4414 4425  
4415 4426    // Attempt at work stealing from other task's queues.
4416 4427    if (do_stealing && !has_aborted()) {
4417 4428      // We have not aborted. This means that we have finished all that
4418 4429      // we could. Let's try to do some stealing...
4419 4430  
4420 4431      // We cannot check whether the global stack is empty, since other
4421 4432      // tasks might be pushing objects to it concurrently.
4422 4433      assert(_cm->out_of_regions() && _task_queue->size() == 0,
4423 4434             "only way to reach here");
4424 4435  
4425 4436      if (_cm->verbose_low()) {
4426 4437        gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4427 4438      }
4428 4439  
4429 4440      while (!has_aborted()) {
4430 4441        oop obj;
4431 4442        statsOnly( ++_steal_attempts );
4432 4443  
4433 4444        if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4434 4445          if (_cm->verbose_medium()) {
4435 4446            gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4436 4447                                   _worker_id, p2i((void*) obj));
4437 4448          }
4438 4449  
4439 4450          statsOnly( ++_steals );
4440 4451  
4441 4452          assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4442 4453                 "any stolen object should be marked");
4443 4454          scan_object(obj);
4444 4455  
4445 4456          // And since we're towards the end, let's totally drain the
4446 4457          // local queue and global stack.
4447 4458          drain_local_queue(false);
4448 4459          drain_global_stack(false);
4449 4460        } else {
4450 4461          break;
4451 4462        }
4452 4463      }
4453 4464    }
4454 4465  
4455 4466    // If we are about to wrap up and go into termination, check if we
4456 4467    // should raise the overflow flag.
4457 4468    if (do_termination && !has_aborted()) {
4458 4469      if (_cm->force_overflow()->should_force()) {
4459 4470        _cm->set_has_overflown();
4460 4471        regular_clock_call();
4461 4472      }
4462 4473    }
4463 4474  
4464 4475    // We still haven't aborted. Now, let's try to get into the
4465 4476    // termination protocol.
4466 4477    if (do_termination && !has_aborted()) {
4467 4478      // We cannot check whether the global stack is empty, since other
4468 4479      // tasks might be concurrently pushing objects on it.
4469 4480      // Separated the asserts so that we know which one fires.
4470 4481      assert(_cm->out_of_regions(), "only way to reach here");
4471 4482      assert(_task_queue->size() == 0, "only way to reach here");
4472 4483  
4473 4484      if (_cm->verbose_low()) {
4474 4485        gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4475 4486      }
4476 4487  
4477 4488      _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4478 4489  
4479 4490      // The CMTask class also extends the TerminatorTerminator class,
4480 4491      // hence its should_exit_termination() method will also decide
4481 4492      // whether to exit the termination protocol or not.
4482 4493      bool finished = (is_serial ||
4483 4494                       _cm->terminator()->offer_termination(this));
4484 4495      double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4485 4496      _termination_time_ms +=
4486 4497        termination_end_time_ms - _termination_start_time_ms;
4487 4498  
4488 4499      if (finished) {
4489 4500        // We're all done.
4490 4501  
4491 4502        if (_worker_id == 0) {
4492 4503          // let's allow task 0 to do this
4493 4504          if (concurrent()) {
4494 4505            assert(_cm->concurrent_marking_in_progress(), "invariant");
4495 4506            // we need to set this to false before the next
4496 4507            // safepoint. This way we ensure that the marking phase
4497 4508            // doesn't observe any more heap expansions.
4498 4509            _cm->clear_concurrent_marking_in_progress();
4499 4510          }
4500 4511        }
4501 4512  
4502 4513        // We can now guarantee that the global stack is empty, since
4503 4514        // all other tasks have finished. We separated the guarantees so
4504 4515        // that, if a condition is false, we can immediately find out
4505 4516        // which one.
4506 4517        guarantee(_cm->out_of_regions(), "only way to reach here");
4507 4518        guarantee(_cm->mark_stack_empty(), "only way to reach here");
4508 4519        guarantee(_task_queue->size() == 0, "only way to reach here");
4509 4520        guarantee(!_cm->has_overflown(), "only way to reach here");
4510 4521        guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4511 4522  
4512 4523        if (_cm->verbose_low()) {
4513 4524          gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4514 4525        }
4515 4526      } else {
4516 4527        // Apparently there's more work to do. Let's abort this task. It
4517 4528        // will restart it and we can hopefully find more things to do.
4518 4529  
4519 4530        if (_cm->verbose_low()) {
4520 4531          gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4521 4532                                 _worker_id);
4522 4533        }
4523 4534  
4524 4535        set_has_aborted();
4525 4536        statsOnly( ++_aborted_termination );
4526 4537      }
4527 4538    }
4528 4539  
4529 4540    // Mainly for debugging purposes to make sure that a pointer to the
4530 4541    // closure which was statically allocated in this frame doesn't
4531 4542    // escape it by accident.
4532 4543    set_cm_oop_closure(NULL);
4533 4544    double end_time_ms = os::elapsedVTime() * 1000.0;
4534 4545    double elapsed_time_ms = end_time_ms - _start_time_ms;
4535 4546    // Update the step history.
4536 4547    _step_times_ms.add(elapsed_time_ms);
4537 4548  
4538 4549    if (has_aborted()) {
4539 4550      // The task was aborted for some reason.
4540 4551  
4541 4552      statsOnly( ++_aborted );
4542 4553  
4543 4554      if (_has_timed_out) {
4544 4555        double diff_ms = elapsed_time_ms - _time_target_ms;
4545 4556        // Keep statistics of how well we did with respect to hitting
4546 4557        // our target only if we actually timed out (if we aborted for
4547 4558        // other reasons, then the results might get skewed).
4548 4559        _marking_step_diffs_ms.add(diff_ms);
4549 4560      }
4550 4561  
4551 4562      if (_cm->has_overflown()) {
4552 4563        // This is the interesting one. We aborted because a global
4553 4564        // overflow was raised. This means we have to restart the
4554 4565        // marking phase and start iterating over regions. However, in
4555 4566        // order to do this we have to make sure that all tasks stop
4556 4567        // what they are doing and re-initialise in a safe manner. We
4557 4568        // will achieve this with the use of two barrier sync points.
4558 4569  
4559 4570        if (_cm->verbose_low()) {
4560 4571          gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4561 4572        }
4562 4573  
4563 4574        if (!is_serial) {
4564 4575          // We only need to enter the sync barrier if being called
4565 4576          // from a parallel context
4566 4577          _cm->enter_first_sync_barrier(_worker_id);
4567 4578  
4568 4579          // When we exit this sync barrier we know that all tasks have
4569 4580          // stopped doing marking work. So, it's now safe to
4570 4581          // re-initialise our data structures. At the end of this method,
4571 4582          // task 0 will clear the global data structures.
4572 4583        }
4573 4584  
4574 4585        statsOnly( ++_aborted_overflow );
4575 4586  
4576 4587        // We clear the local state of this task...
4577 4588        clear_region_fields();
4578 4589  
4579 4590        if (!is_serial) {
4580 4591          // ...and enter the second barrier.
4581 4592          _cm->enter_second_sync_barrier(_worker_id);
4582 4593        }
4583 4594        // At this point, if we're during the concurrent phase of
4584 4595        // marking, everything has been re-initialized and we're
4585 4596        // ready to restart.
4586 4597      }
4587 4598  
4588 4599      if (_cm->verbose_low()) {
4589 4600        gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4590 4601                               "elapsed = %1.2lfms <<<<<<<<<<",
4591 4602                               _worker_id, _time_target_ms, elapsed_time_ms);
4592 4603        if (_cm->has_aborted()) {
4593 4604          gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4594 4605                                 _worker_id);
4595 4606        }
4596 4607      }
4597 4608    } else {
4598 4609      if (_cm->verbose_low()) {
4599 4610        gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4600 4611                               "elapsed = %1.2lfms <<<<<<<<<<",
4601 4612                               _worker_id, _time_target_ms, elapsed_time_ms);
4602 4613      }
4603 4614    }
4604 4615  
4605 4616    _claimed = false;
4606 4617  }
4607 4618  
4608 4619  CMTask::CMTask(uint worker_id,
4609 4620                 ConcurrentMark* cm,
4610 4621                 size_t* marked_bytes,
4611 4622                 BitMap* card_bm,
4612 4623                 CMTaskQueue* task_queue,
4613 4624                 CMTaskQueueSet* task_queues)
4614 4625    : _g1h(G1CollectedHeap::heap()),
4615 4626      _worker_id(worker_id), _cm(cm),
4616 4627      _claimed(false),
4617 4628      _nextMarkBitMap(NULL), _hash_seed(17),
4618 4629      _task_queue(task_queue),
4619 4630      _task_queues(task_queues),
4620 4631      _cm_oop_closure(NULL),
4621 4632      _marked_bytes_array(marked_bytes),
4622 4633      _card_bm(card_bm) {
4623 4634    guarantee(task_queue != NULL, "invariant");
4624 4635    guarantee(task_queues != NULL, "invariant");
4625 4636  
4626 4637    statsOnly( _clock_due_to_scanning = 0;
4627 4638               _clock_due_to_marking  = 0 );
4628 4639  
4629 4640    _marking_step_diffs_ms.add(0.5);
4630 4641  }
4631 4642  
4632 4643  // These are formatting macros that are used below to ensure
4633 4644  // consistent formatting. The *_H_* versions are used to format the
4634 4645  // header for a particular value and they should be kept consistent
4635 4646  // with the corresponding macro. Also note that most of the macros add
4636 4647  // the necessary white space (as a prefix) which makes them a bit
4637 4648  // easier to compose.
4638 4649  
4639 4650  // All the output lines are prefixed with this string to be able to
4640 4651  // identify them easily in a large log file.
4641 4652  #define G1PPRL_LINE_PREFIX            "###"
4642 4653  
4643 4654  #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4644 4655  #ifdef _LP64
4645 4656  #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4646 4657  #else // _LP64
4647 4658  #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4648 4659  #endif // _LP64
4649 4660  
4650 4661  // For per-region info
4651 4662  #define G1PPRL_TYPE_FORMAT            "   %-4s"
4652 4663  #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4653 4664  #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4654 4665  #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4655 4666  #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4656 4667  #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4657 4668  
4658 4669  // For summary info
4659 4670  #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4660 4671  #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4661 4672  #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4662 4673  #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4663 4674  
4664 4675  G1PrintRegionLivenessInfoClosure::
4665 4676  G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4666 4677    : _out(out),
4667 4678      _total_used_bytes(0), _total_capacity_bytes(0),
4668 4679      _total_prev_live_bytes(0), _total_next_live_bytes(0),
4669 4680      _hum_used_bytes(0), _hum_capacity_bytes(0),
4670 4681      _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4671 4682      _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4672 4683    G1CollectedHeap* g1h = G1CollectedHeap::heap();
4673 4684    MemRegion g1_reserved = g1h->g1_reserved();
4674 4685    double now = os::elapsedTime();
4675 4686  
4676 4687    // Print the header of the output.
4677 4688    _out->cr();
4678 4689    _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4679 4690    _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4680 4691                   G1PPRL_SUM_ADDR_FORMAT("reserved")
4681 4692                   G1PPRL_SUM_BYTE_FORMAT("region-size"),
4682 4693                   p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4683 4694                   HeapRegion::GrainBytes);
4684 4695    _out->print_cr(G1PPRL_LINE_PREFIX);
4685 4696    _out->print_cr(G1PPRL_LINE_PREFIX
4686 4697                  G1PPRL_TYPE_H_FORMAT
4687 4698                  G1PPRL_ADDR_BASE_H_FORMAT
4688 4699                  G1PPRL_BYTE_H_FORMAT
4689 4700                  G1PPRL_BYTE_H_FORMAT
4690 4701                  G1PPRL_BYTE_H_FORMAT
4691 4702                  G1PPRL_DOUBLE_H_FORMAT
4692 4703                  G1PPRL_BYTE_H_FORMAT
4693 4704                  G1PPRL_BYTE_H_FORMAT,
4694 4705                  "type", "address-range",
4695 4706                  "used", "prev-live", "next-live", "gc-eff",
4696 4707                  "remset", "code-roots");
4697 4708    _out->print_cr(G1PPRL_LINE_PREFIX
4698 4709                  G1PPRL_TYPE_H_FORMAT
4699 4710                  G1PPRL_ADDR_BASE_H_FORMAT
4700 4711                  G1PPRL_BYTE_H_FORMAT
4701 4712                  G1PPRL_BYTE_H_FORMAT
4702 4713                  G1PPRL_BYTE_H_FORMAT
4703 4714                  G1PPRL_DOUBLE_H_FORMAT
4704 4715                  G1PPRL_BYTE_H_FORMAT
4705 4716                  G1PPRL_BYTE_H_FORMAT,
4706 4717                  "", "",
4707 4718                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4708 4719                  "(bytes)", "(bytes)");
4709 4720  }
4710 4721  
4711 4722  // It takes as a parameter a reference to one of the _hum_* fields, it
4712 4723  // deduces the corresponding value for a region in a humongous region
4713 4724  // series (either the region size, or what's left if the _hum_* field
4714 4725  // is < the region size), and updates the _hum_* field accordingly.
4715 4726  size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4716 4727    size_t bytes = 0;
4717 4728    // The > 0 check is to deal with the prev and next live bytes which
4718 4729    // could be 0.
4719 4730    if (*hum_bytes > 0) {
4720 4731      bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4721 4732      *hum_bytes -= bytes;
4722 4733    }
4723 4734    return bytes;
4724 4735  }
4725 4736  
4726 4737  // It deduces the values for a region in a humongous region series
4727 4738  // from the _hum_* fields and updates those accordingly. It assumes
4728 4739  // that that _hum_* fields have already been set up from the "starts
4729 4740  // humongous" region and we visit the regions in address order.
4730 4741  void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4731 4742                                                       size_t* capacity_bytes,
4732 4743                                                       size_t* prev_live_bytes,
4733 4744                                                       size_t* next_live_bytes) {
4734 4745    assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4735 4746    *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4736 4747    *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4737 4748    *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4738 4749    *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4739 4750  }
4740 4751  
4741 4752  bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4742 4753    const char* type       = r->get_type_str();
4743 4754    HeapWord* bottom       = r->bottom();
4744 4755    HeapWord* end          = r->end();
4745 4756    size_t capacity_bytes  = r->capacity();
4746 4757    size_t used_bytes      = r->used();
4747 4758    size_t prev_live_bytes = r->live_bytes();
4748 4759    size_t next_live_bytes = r->next_live_bytes();
4749 4760    double gc_eff          = r->gc_efficiency();
4750 4761    size_t remset_bytes    = r->rem_set()->mem_size();
4751 4762    size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4752 4763  
4753 4764    if (r->startsHumongous()) {
4754 4765      assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4755 4766             _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4756 4767             "they should have been zeroed after the last time we used them");
4757 4768      // Set up the _hum_* fields.
4758 4769      _hum_capacity_bytes  = capacity_bytes;
4759 4770      _hum_used_bytes      = used_bytes;
4760 4771      _hum_prev_live_bytes = prev_live_bytes;
4761 4772      _hum_next_live_bytes = next_live_bytes;
4762 4773      get_hum_bytes(&used_bytes, &capacity_bytes,
4763 4774                    &prev_live_bytes, &next_live_bytes);
4764 4775      end = bottom + HeapRegion::GrainWords;
4765 4776    } else if (r->continuesHumongous()) {
4766 4777      get_hum_bytes(&used_bytes, &capacity_bytes,
4767 4778                    &prev_live_bytes, &next_live_bytes);
4768 4779      assert(end == bottom + HeapRegion::GrainWords, "invariant");
4769 4780    }
4770 4781  
4771 4782    _total_used_bytes      += used_bytes;
4772 4783    _total_capacity_bytes  += capacity_bytes;
4773 4784    _total_prev_live_bytes += prev_live_bytes;
4774 4785    _total_next_live_bytes += next_live_bytes;
4775 4786    _total_remset_bytes    += remset_bytes;
4776 4787    _total_strong_code_roots_bytes += strong_code_roots_bytes;
4777 4788  
4778 4789    // Print a line for this particular region.
4779 4790    _out->print_cr(G1PPRL_LINE_PREFIX
4780 4791                   G1PPRL_TYPE_FORMAT
4781 4792                   G1PPRL_ADDR_BASE_FORMAT
4782 4793                   G1PPRL_BYTE_FORMAT
4783 4794                   G1PPRL_BYTE_FORMAT
4784 4795                   G1PPRL_BYTE_FORMAT
4785 4796                   G1PPRL_DOUBLE_FORMAT
4786 4797                   G1PPRL_BYTE_FORMAT
4787 4798                   G1PPRL_BYTE_FORMAT,
4788 4799                   type, p2i(bottom), p2i(end),
4789 4800                   used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4790 4801                   remset_bytes, strong_code_roots_bytes);
4791 4802  
4792 4803    return false;
4793 4804  }
4794 4805  
4795 4806  G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4796 4807    // add static memory usages to remembered set sizes
4797 4808    _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4798 4809    // Print the footer of the output.
4799 4810    _out->print_cr(G1PPRL_LINE_PREFIX);
4800 4811    _out->print_cr(G1PPRL_LINE_PREFIX
4801 4812                   " SUMMARY"
4802 4813                   G1PPRL_SUM_MB_FORMAT("capacity")
4803 4814                   G1PPRL_SUM_MB_PERC_FORMAT("used")
4804 4815                   G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4805 4816                   G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4806 4817                   G1PPRL_SUM_MB_FORMAT("remset")
4807 4818                   G1PPRL_SUM_MB_FORMAT("code-roots"),
4808 4819                   bytes_to_mb(_total_capacity_bytes),
4809 4820                   bytes_to_mb(_total_used_bytes),
4810 4821                   perc(_total_used_bytes, _total_capacity_bytes),
4811 4822                   bytes_to_mb(_total_prev_live_bytes),
4812 4823                   perc(_total_prev_live_bytes, _total_capacity_bytes),
4813 4824                   bytes_to_mb(_total_next_live_bytes),
4814 4825                   perc(_total_next_live_bytes, _total_capacity_bytes),
4815 4826                   bytes_to_mb(_total_remset_bytes),
4816 4827                   bytes_to_mb(_total_strong_code_roots_bytes));
4817 4828    _out->cr();
4818 4829  }

↓ open down ↓

2193 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX