g1-mark-stack-size Wdiff src/share/vm/gc_implementation/g1/concurrentMark.cpp

Print this page

rev 3708 : 8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack
Summary: Set the value of MarkStackSize to a value based on the number of parallel marking threads with a reasonable minimum. Expand the marking stack if we have to restart marking due to an overflow up to a reasonable maximum. Allocate the underlying space for the marking stack from virtual memory.
Reviewed-by: jmasa
rev 3709 : imported patch reuse-old-marking-stack

Split	Close
Expand all
Collapse all

          --- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
          +++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1    1  /*
   2    2   * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "classfile/symbolTable.hpp"
  27   27  #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28   28  #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29   29  #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30   30  #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31   31  #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32   32  #include "gc_implementation/g1/g1Log.hpp"
  33   33  #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34   34  #include "gc_implementation/g1/g1RemSet.hpp"
  35   35  #include "gc_implementation/g1/heapRegion.inline.hpp"
  36   36  #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37   37  #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38   38  #include "gc_implementation/shared/vmGCOperations.hpp"
  39   39  #include "memory/genOopClosures.inline.hpp"
  40   40  #include "memory/referencePolicy.hpp"
  41   41  #include "memory/resourceArea.hpp"
  42   42  #include "oops/oop.inline.hpp"
  43   43  #include "runtime/handles.inline.hpp"
  44   44  #include "runtime/java.hpp"
  45   45  #include "services/memTracker.hpp"
  46   46  
  47   47  // Concurrent marking bit map wrapper
  48   48  
  49   49  CMBitMapRO::CMBitMapRO(int shifter) :
  50   50    _bm(),
  51   51    _shifter(shifter) {
  52   52    _bmStartWord = 0;
  53   53    _bmWordSize = 0;
  54   54  }
  55   55  
  56   56  HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  57   57                                                 HeapWord* limit) const {
  58   58    // First we must round addr *up* to a possible object boundary.
  59   59    addr = (HeapWord*)align_size_up((intptr_t)addr,
  60   60                                    HeapWordSize << _shifter);
  61   61    size_t addrOffset = heapWordToOffset(addr);
  62   62    if (limit == NULL) {
  63   63      limit = _bmStartWord + _bmWordSize;
  64   64    }
  65   65    size_t limitOffset = heapWordToOffset(limit);
  66   66    size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  67   67    HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  68   68    assert(nextAddr >= addr, "get_next_one postcondition");
  69   69    assert(nextAddr == limit || isMarked(nextAddr),
  70   70           "get_next_one postcondition");
  71   71    return nextAddr;
  72   72  }
  73   73  
  74   74  HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  75   75                                                   HeapWord* limit) const {
  76   76    size_t addrOffset = heapWordToOffset(addr);
  77   77    if (limit == NULL) {
  78   78      limit = _bmStartWord + _bmWordSize;
  79   79    }
  80   80    size_t limitOffset = heapWordToOffset(limit);
  81   81    size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  82   82    HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   83    assert(nextAddr >= addr, "get_next_one postcondition");
  84   84    assert(nextAddr == limit || !isMarked(nextAddr),
  85   85           "get_next_one postcondition");
  86   86    return nextAddr;
  87   87  }
  88   88  
  89   89  int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  90   90    assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  91   91    return (int) (diff >> _shifter);
  92   92  }
  93   93  
  94   94  #ifndef PRODUCT
  95   95  bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
  96   96    // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  97   97    assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  98   98           "size inconsistency");
  99   99    return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
 100  100           _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 101  101  }
 102  102  #endif
 103  103  
 104  104  bool CMBitMap::allocate(ReservedSpace heap_rs) {
 105  105    _bmStartWord = (HeapWord*)(heap_rs.base());
 106  106    _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
 107  107    ReservedSpace brs(ReservedSpace::allocation_align_size_up(
 108  108                       (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 109  109    if (!brs.is_reserved()) {
 110  110      warning("ConcurrentMark marking bit map allocation failure");
 111  111      return false;
 112  112    }
 113  113    MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
 114  114    // For now we'll just commit all of the bit map up front.
 115  115    // Later on we'll try to be more parsimonious with swap.
 116  116    if (!_virtual_space.initialize(brs, brs.size())) {
 117  117      warning("ConcurrentMark marking bit map backing store failure");
 118  118      return false;
 119  119    }
 120  120    assert(_virtual_space.committed_size() == brs.size(),
 121  121           "didn't reserve backing store for all of concurrent marking bit map?");
 122  122    _bm.set_map((uintptr_t*)_virtual_space.low());
 123  123    assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
 124  124           _bmWordSize, "inconsistency in bit map sizing");
 125  125    _bm.set_size(_bmWordSize >> _shifter);
 126  126    return true;
 127  127  }
 128  128  
 129  129  void CMBitMap::clearAll() {
 130  130    _bm.clear();
 131  131    return;
 132  132  }
 133  133  
 134  134  void CMBitMap::markRange(MemRegion mr) {
 135  135    mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 136  136    assert(!mr.is_empty(), "unexpected empty region");
 137  137    assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 138  138            ((HeapWord *) mr.end())),
 139  139           "markRange memory region end is not card aligned");
 140  140    // convert address range into offset range
 141  141    _bm.at_put_range(heapWordToOffset(mr.start()),
 142  142                     heapWordToOffset(mr.end()), true);
 143  143  }
 144  144  
 145  145  void CMBitMap::clearRange(MemRegion mr) {
 146  146    mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 147  147    assert(!mr.is_empty(), "unexpected empty region");
 148  148    // convert address range into offset range
 149  149    _bm.at_put_range(heapWordToOffset(mr.start()),
 150  150                     heapWordToOffset(mr.end()), false);
 151  151  }
 152  152  
 153  153  MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 154  154                                              HeapWord* end_addr) {
 155  155    HeapWord* start = getNextMarkedWordAddress(addr);
 156  156    start = MIN2(start, end_addr);
 157  157    HeapWord* end   = getNextUnmarkedWordAddress(start);
 158  158    end = MIN2(end, end_addr);
 159  159    assert(start <= end, "Consistency check");
 160  160    MemRegion mr(start, end);
 161  161    if (!mr.is_empty()) {
 162  162      clearRange(mr);
 163  163    }
 164  164    return mr;
 165  165  }
 166  166  
 167  167  CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 168  168    _base(NULL), _cm(cm)
 169  169  #ifdef ASSERT
 170  170    , _drain_in_progress(false)
 171  171    , _drain_in_progress_yields(false)
 172  172  #endif
 173  173  {}
 174  174  
 175  175  bool CMMarkStack::allocate(size_t capacity) {
 176  176    // allocate a stack of the requisite depth
 177  177    ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 178  178    if (!rs.is_reserved()) {
 179  179      warning("ConcurrentMark MarkStack allocation failure");
 180  180      return false;

↓ open down ↓

180 lines elided

↑ open up ↑

 181  181    }
 182  182    MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 183  183    if (!_virtual_space.initialize(rs, rs.size())) {
 184  184      warning("ConcurrentMark MarkStack backing store failure");
 185  185      // Release the virtual memory reserved for the marking stack
 186  186      rs.release();
 187  187      return false;
 188  188    }
 189  189    assert(_virtual_space.committed_size() == rs.size(),
 190  190           "Didn't reserve backing store for all of ConcurrentMark stack?");
      191 +  _rs = rs;
 191  192    _base = (oop*) _virtual_space.low();
 192  193    setEmpty();
 193  194    _capacity = (jint) capacity;
 194  195    _saved_index = -1;
 195  196    NOT_PRODUCT(_max_depth = 0);
 196  197    return true;
 197  198  }
 198  199  
 199  200  void CMMarkStack::expand() {
 200  201    // Called, during remark, if we've overflown the marking stack during marking.

 201  202    assert(isEmpty(), "stack should been emptied while handling overflow");
 202  203    assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 203  204    // Clear expansion flag
 204  205    _should_expand = false;
 205  206    if (_capacity == (jint) MarkStackSizeMax) {
 206  207      if (PrintGCDetails && Verbose) {

↓ open down ↓

6 lines elided

↑ open up ↑

 207  208        gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 208  209      }
 209  210      return;
 210  211    }
 211  212    // Double capacity if possible
 212  213    jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 213  214    // Do not give up existing stack until we have managed to
 214  215    // get the double capacity that we desired.
 215  216    ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 216  217                                                             sizeof(oop)));
 217      -  if (rs.is_reserved()) {
 218      -    // Release the backing store associated with old stack
 219      -    _virtual_space.release();
 220      -    // Reinitialize virtual space for new stack
 221      -    if (!_virtual_space.initialize(rs, rs.size())) {
 222      -      fatal("Not enough swap for expanded marking stack capacity");
 223      -    }
 224      -    _base = (oop*)(_virtual_space.low());
 225      -    _index = 0;
 226      -    _capacity = new_capacity;
 227      -  } else {
      218 +  if (!rs.is_reserved()) {
 228  219      if (PrintGCDetails && Verbose) {
 229  220        // Failed to double capacity, continue;
 230  221        gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 231      -                          SIZE_FORMAT"K to " SIZE_FORMAT"K",
      222 +                          SIZE_FORMAT "K to " SIZE_FORMAT "K",
 232  223                            _capacity / K, new_capacity / K);
 233  224      }
      225 +    return;
      226 +  }
      227 +
      228 +  // Clear the backing store fields associated with the space for the
      229 +  // old marking stack. Note this doesn't actuall release the space.
      230 +  _virtual_space.release();
      231 +
      232 +  // Reinitialize virtual space for the expanded stack.
      233 +  if (!_virtual_space.initialize(rs, rs.size())) {
      234 +    // We failed to commit the the space for the expanded marking stack
      235 +    // Release the expanded reserved space...
      236 +    rs.release();
      237 +    // ... and reinitialize with the previous un-expanded space.
      238 +    if (_virtual_space.initialize(_rs, _rs.size())) {
      239 +      if (PrintGCDetails && Verbose) {
      240 +        gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
      241 +                            SIZE_FORMAT "K to " SIZE_FORMAT "K",
      242 +                            _capacity / K, new_capacity / K);
      243 +      }
      244 +    } else {
      245 +      // The previous backing store space should have been already
      246 +      // committed but we failed to initialize the virtual space
      247 +      // for some reason.
      248 +      fatal("Error re-initializing marking stack with old capacity");
      249 +    }
      250 +  } else {
      251 +    // We successfully committed the space for the expanded marking stack.
      252 +    if (PrintGCDetails && Verbose) {
      253 +      gclog_or_tty->print(" Successfully expanded marking stack capacity from "
      254 +                          SIZE_FORMAT "K to " SIZE_FORMAT "K",
      255 +                          _capacity / K, new_capacity / K);
      256 +    }
      257 +    // Release the previous (unexpanded) space.
      258 +    _rs.release();
      259 +    // Record the new (expanded) space.
      260 +    _rs = rs;
      261 +    // Record the new capacity
      262 +    _capacity = new_capacity;
 234  263    }
      264 +  assert(_virtual_space.committed_size() == _rs.size(),
      265 +         "Didn't reserve backing store for all of ConcurrentMark stack?");
      266 +  _base = (oop*)(_virtual_space.low());
      267 +  _index = 0;
 235  268  }
 236  269  
 237  270  void CMMarkStack::set_should_expand() {
 238  271    // If we're resetting the marking state because of an
 239  272    // marking stack overflow, record that we should, if
 240  273    // possible, expand the stack.
 241  274    _should_expand = _cm->has_overflown();
 242  275  }
 243  276  
 244  277  CMMarkStack::~CMMarkStack() {

 245  278    if (_base != NULL) {
 246  279      _base = NULL;
 247  280      _virtual_space.release();
 248  281    }
 249  282  }
 250  283  
 251  284  void CMMarkStack::par_push(oop ptr) {
 252  285    while (true) {
 253  286      if (isFull()) {
 254  287        _overflow = true;
 255  288        return;
 256  289      }
 257  290      // Otherwise...
 258  291      jint index = _index;
 259  292      jint next_index = index+1;
 260  293      jint res = Atomic::cmpxchg(next_index, &_index, index);
 261  294      if (res == index) {
 262  295        _base[index] = ptr;
 263  296        // Note that we don't maintain this atomically.  We could, but it
 264  297        // doesn't seem necessary.
 265  298        NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 266  299        return;
 267  300      }
 268  301      // Otherwise, we need to try again.
 269  302    }
 270  303  }
 271  304  
 272  305  void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 273  306    while (true) {
 274  307      if (isFull()) {
 275  308        _overflow = true;
 276  309        return;
 277  310      }
 278  311      // Otherwise...
 279  312      jint index = _index;
 280  313      jint next_index = index + n;
 281  314      if (next_index > _capacity) {
 282  315        _overflow = true;
 283  316        return;
 284  317      }
 285  318      jint res = Atomic::cmpxchg(next_index, &_index, index);
 286  319      if (res == index) {
 287  320        for (int i = 0; i < n; i++) {
 288  321          int  ind = index + i;
 289  322          assert(ind < _capacity, "By overflow test above.");
 290  323          _base[ind] = ptr_arr[i];
 291  324        }
 292  325        NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 293  326        return;
 294  327      }
 295  328      // Otherwise, we need to try again.
 296  329    }
 297  330  }
 298  331  
 299  332  void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 300  333    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 301  334    jint start = _index;
 302  335    jint next_index = start + n;
 303  336    if (next_index > _capacity) {
 304  337      _overflow = true;
 305  338      return;
 306  339    }
 307  340    // Otherwise.
 308  341    _index = next_index;
 309  342    for (int i = 0; i < n; i++) {
 310  343      int ind = start + i;
 311  344      assert(ind < _capacity, "By overflow test above.");
 312  345      _base[ind] = ptr_arr[i];
 313  346    }
 314  347    NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 315  348  }
 316  349  
 317  350  bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 318  351    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 319  352    jint index = _index;
 320  353    if (index == 0) {
 321  354      *n = 0;
 322  355      return false;
 323  356    } else {
 324  357      int k = MIN2(max, index);
 325  358      jint  new_ind = index - k;
 326  359      for (int j = 0; j < k; j++) {
 327  360        ptr_arr[j] = _base[new_ind + j];
 328  361      }
 329  362      _index = new_ind;
 330  363      *n = k;
 331  364      return true;
 332  365    }
 333  366  }
 334  367  
 335  368  template<class OopClosureClass>
 336  369  bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 337  370    assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 338  371           || SafepointSynchronize::is_at_safepoint(),
 339  372           "Drain recursion must be yield-safe.");
 340  373    bool res = true;
 341  374    debug_only(_drain_in_progress = true);
 342  375    debug_only(_drain_in_progress_yields = yield_after);
 343  376    while (!isEmpty()) {
 344  377      oop newOop = pop();
 345  378      assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 346  379      assert(newOop->is_oop(), "Expected an oop");
 347  380      assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 348  381             "only grey objects on this stack");
 349  382      newOop->oop_iterate(cl);
 350  383      if (yield_after && _cm->do_yield_check()) {
 351  384        res = false;
 352  385        break;
 353  386      }
 354  387    }
 355  388    debug_only(_drain_in_progress = false);
 356  389    return res;
 357  390  }
 358  391  
 359  392  void CMMarkStack::note_start_of_gc() {
 360  393    assert(_saved_index == -1,
 361  394           "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 362  395    _saved_index = _index;
 363  396  }
 364  397  
 365  398  void CMMarkStack::note_end_of_gc() {
 366  399    // This is intentionally a guarantee, instead of an assert. If we
 367  400    // accidentally add something to the mark stack during GC, it
 368  401    // will be a correctness issue so it's better if we crash. we'll
 369  402    // only check this once per GC anyway, so it won't be a performance
 370  403    // issue in any way.
 371  404    guarantee(_saved_index == _index,
 372  405              err_msg("saved index: %d index: %d", _saved_index, _index));
 373  406    _saved_index = -1;
 374  407  }
 375  408  
 376  409  void CMMarkStack::oops_do(OopClosure* f) {
 377  410    assert(_saved_index == _index,
 378  411           err_msg("saved index: %d index: %d", _saved_index, _index));
 379  412    for (int i = 0; i < _index; i += 1) {
 380  413      f->do_oop(&_base[i]);
 381  414    }
 382  415  }
 383  416  
 384  417  bool ConcurrentMark::not_yet_marked(oop obj) const {
 385  418    return _g1h->is_obj_ill(obj);
 386  419  }
 387  420  
 388  421  CMRootRegions::CMRootRegions() :
 389  422    _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 390  423    _should_abort(false),  _next_survivor(NULL) { }
 391  424  
 392  425  void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 393  426    _young_list = g1h->young_list();
 394  427    _cm = cm;
 395  428  }
 396  429  
 397  430  void CMRootRegions::prepare_for_scan() {
 398  431    assert(!scan_in_progress(), "pre-condition");
 399  432  
 400  433    // Currently, only survivors can be root regions.
 401  434    assert(_next_survivor == NULL, "pre-condition");
 402  435    _next_survivor = _young_list->first_survivor_region();
 403  436    _scan_in_progress = (_next_survivor != NULL);
 404  437    _should_abort = false;
 405  438  }
 406  439  
 407  440  HeapRegion* CMRootRegions::claim_next() {
 408  441    if (_should_abort) {
 409  442      // If someone has set the should_abort flag, we return NULL to
 410  443      // force the caller to bail out of their loop.
 411  444      return NULL;
 412  445    }
 413  446  
 414  447    // Currently, only survivors can be root regions.
 415  448    HeapRegion* res = _next_survivor;
 416  449    if (res != NULL) {
 417  450      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 418  451      // Read it again in case it changed while we were waiting for the lock.
 419  452      res = _next_survivor;
 420  453      if (res != NULL) {
 421  454        if (res == _young_list->last_survivor_region()) {
 422  455          // We just claimed the last survivor so store NULL to indicate
 423  456          // that we're done.
 424  457          _next_survivor = NULL;
 425  458        } else {
 426  459          _next_survivor = res->get_next_young_region();
 427  460        }
 428  461      } else {
 429  462        // Someone else claimed the last survivor while we were trying
 430  463        // to take the lock so nothing else to do.
 431  464      }
 432  465    }
 433  466    assert(res == NULL || res->is_survivor(), "post-condition");
 434  467  
 435  468    return res;
 436  469  }
 437  470  
 438  471  void CMRootRegions::scan_finished() {
 439  472    assert(scan_in_progress(), "pre-condition");
 440  473  
 441  474    // Currently, only survivors can be root regions.
 442  475    if (!_should_abort) {
 443  476      assert(_next_survivor == NULL, "we should have claimed all survivors");
 444  477    }
 445  478    _next_survivor = NULL;
 446  479  
 447  480    {
 448  481      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 449  482      _scan_in_progress = false;
 450  483      RootRegionScan_lock->notify_all();
 451  484    }
 452  485  }
 453  486  
 454  487  bool CMRootRegions::wait_until_scan_finished() {
 455  488    if (!scan_in_progress()) return false;
 456  489  
 457  490    {
 458  491      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 459  492      while (scan_in_progress()) {
 460  493        RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 461  494      }
 462  495    }
 463  496    return true;
 464  497  }
 465  498  
 466  499  #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 467  500  #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 468  501  #endif // _MSC_VER
 469  502  
 470  503  uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 471  504    return MAX2((n_par_threads + 2) / 4, 1U);
 472  505  }
 473  506  
 474  507  ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
 475  508    _g1h(g1h),
 476  509    _markBitMap1(MinObjAlignment - 1),
 477  510    _markBitMap2(MinObjAlignment - 1),
 478  511  
 479  512    _parallel_marking_threads(0),
 480  513    _max_parallel_marking_threads(0),
 481  514    _sleep_factor(0.0),
 482  515    _marking_task_overhead(1.0),
 483  516    _cleanup_sleep_factor(0.0),
 484  517    _cleanup_task_overhead(1.0),
 485  518    _cleanup_list("Cleanup List"),
 486  519    _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 487  520    _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
 488  521              CardTableModRefBS::card_shift,
 489  522              false /* in_resource_area*/),
 490  523  
 491  524    _prevMarkBitMap(&_markBitMap1),
 492  525    _nextMarkBitMap(&_markBitMap2),
 493  526  
 494  527    _markStack(this),
 495  528    // _finger set in set_non_marking_state
 496  529  
 497  530    _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 498  531    // _active_tasks set in set_non_marking_state
 499  532    // _tasks set inside the constructor
 500  533    _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 501  534    _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 502  535  
 503  536    _has_overflown(false),
 504  537    _concurrent(false),
 505  538    _has_aborted(false),
 506  539    _restart_for_overflow(false),
 507  540    _concurrent_marking_in_progress(false),
 508  541  
 509  542    // _verbose_level set below
 510  543  
 511  544    _init_times(),
 512  545    _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 513  546    _cleanup_times(),
 514  547    _total_counting_time(0.0),
 515  548    _total_rs_scrub_time(0.0),
 516  549  
 517  550    _parallel_workers(NULL),
 518  551  
 519  552    _count_card_bitmaps(NULL),
 520  553    _count_marked_bytes(NULL),
 521  554    _completed_initialization(false) {
 522  555    CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 523  556    if (verbose_level < no_verbose) {
 524  557      verbose_level = no_verbose;
 525  558    }
 526  559    if (verbose_level > high_verbose) {
 527  560      verbose_level = high_verbose;
 528  561    }
 529  562    _verbose_level = verbose_level;
 530  563  
 531  564    if (verbose_low()) {
 532  565      gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 533  566                             "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 534  567    }
 535  568  
 536  569    if (!_markBitMap1.allocate(heap_rs)) {
 537  570      warning("Failed to allocate first CM bit map");
 538  571      return;
 539  572    }
 540  573    if (!_markBitMap2.allocate(heap_rs)) {
 541  574      warning("Failed to allocate second CM bit map");
 542  575      return;
 543  576    }
 544  577  
 545  578    // Create & start a ConcurrentMark thread.
 546  579    _cmThread = new ConcurrentMarkThread(this);
 547  580    assert(cmThread() != NULL, "CM Thread should have been created");
 548  581    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 549  582  
 550  583    assert(CGC_lock != NULL, "Where's the CGC_lock?");
 551  584    assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
 552  585    assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 553  586  
 554  587    SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 555  588    satb_qs.set_buffer_size(G1SATBBufferSize);
 556  589  
 557  590    _root_regions.init(_g1h, this);
 558  591  
 559  592    if (ConcGCThreads > ParallelGCThreads) {
 560  593      warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
 561  594              "than ParallelGCThreads (" UINT32_FORMAT ").",
 562  595              ConcGCThreads, ParallelGCThreads);
 563  596      return;
 564  597    }
 565  598    if (ParallelGCThreads == 0) {
 566  599      // if we are not running with any parallel GC threads we will not
 567  600      // spawn any marking threads either
 568  601      _parallel_marking_threads =       0;
 569  602      _max_parallel_marking_threads =   0;
 570  603      _sleep_factor             =     0.0;
 571  604      _marking_task_overhead    =     1.0;
 572  605    } else {
 573  606      if (ConcGCThreads > 0) {
 574  607        // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 575  608        // if both are set
 576  609  
 577  610        _parallel_marking_threads = (uint) ConcGCThreads;
 578  611        _max_parallel_marking_threads = _parallel_marking_threads;
 579  612        _sleep_factor             = 0.0;
 580  613        _marking_task_overhead    = 1.0;
 581  614      } else if (G1MarkingOverheadPercent > 0) {
 582  615        // we will calculate the number of parallel marking threads
 583  616        // based on a target overhead with respect to the soft real-time
 584  617        // goal
 585  618  
 586  619        double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 587  620        double overall_cm_overhead =
 588  621          (double) MaxGCPauseMillis * marking_overhead /
 589  622          (double) GCPauseIntervalMillis;
 590  623        double cpu_ratio = 1.0 / (double) os::processor_count();
 591  624        double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 592  625        double marking_task_overhead =
 593  626          overall_cm_overhead / marking_thread_num *
 594  627                                                  (double) os::processor_count();
 595  628        double sleep_factor =
 596  629                           (1.0 - marking_task_overhead) / marking_task_overhead;
 597  630  
 598  631        _parallel_marking_threads = (uint) marking_thread_num;
 599  632        _max_parallel_marking_threads = _parallel_marking_threads;
 600  633        _sleep_factor             = sleep_factor;
 601  634        _marking_task_overhead    = marking_task_overhead;
 602  635      } else {
 603  636        _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
 604  637        _max_parallel_marking_threads = _parallel_marking_threads;
 605  638        _sleep_factor             = 0.0;
 606  639        _marking_task_overhead    = 1.0;
 607  640      }
 608  641  
 609  642      if (parallel_marking_threads() > 1) {
 610  643        _cleanup_task_overhead = 1.0;
 611  644      } else {
 612  645        _cleanup_task_overhead = marking_task_overhead();
 613  646      }
 614  647      _cleanup_sleep_factor =
 615  648                       (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 616  649  
 617  650  #if 0
 618  651      gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 619  652      gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 620  653      gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 621  654      gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 622  655      gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 623  656  #endif
 624  657  
 625  658      guarantee(parallel_marking_threads() > 0, "peace of mind");
 626  659      _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 627  660           _max_parallel_marking_threads, false, true);
 628  661      if (_parallel_workers == NULL) {
 629  662        vm_exit_during_initialization("Failed necessary allocation.");
 630  663      } else {
 631  664        _parallel_workers->initialize_workers();
 632  665      }
 633  666    }
 634  667  
 635  668    if (FLAG_IS_DEFAULT(MarkStackSize)) {
 636  669      uintx mark_stack_size =
 637  670        MIN2(MarkStackSizeMax,
 638  671            MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 639  672      // Verify that the calculated value for MarkStackSize is in range.
 640  673      // It would be nice to use the private utility routine from Arguments.
 641  674      if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 642  675        warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 643  676                "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 644  677                mark_stack_size, 1, MarkStackSizeMax);
 645  678        return;
 646  679      }
 647  680      FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 648  681    } else {
 649  682      // Verify MarkStackSize is in range.
 650  683      if (FLAG_IS_CMDLINE(MarkStackSize)) {
 651  684        if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 652  685          if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 653  686            warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 654  687                    "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 655  688                    MarkStackSize, 1, MarkStackSizeMax);
 656  689            return;
 657  690          }
 658  691        } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 659  692          if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 660  693            warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 661  694                    " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 662  695                    MarkStackSize, MarkStackSizeMax);
 663  696            return;
 664  697          }
 665  698        }
 666  699      }
 667  700    }
 668  701  
 669  702    if (!_markStack.allocate(MarkStackSize)) {
 670  703      warning("Failed to allocate CM marking stack");
 671  704      return;
 672  705    }
 673  706  
 674  707    _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 675  708    _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 676  709  
 677  710    _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 678  711    _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 679  712  
 680  713    BitMap::idx_t card_bm_size = _card_bm.size();
 681  714  
 682  715    // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 683  716    _active_tasks = _max_worker_id;
 684  717  
 685  718    size_t max_regions = (size_t) _g1h->max_regions();
 686  719    for (uint i = 0; i < _max_worker_id; ++i) {
 687  720      CMTaskQueue* task_queue = new CMTaskQueue();
 688  721      task_queue->initialize();
 689  722      _task_queues->register_queue(i, task_queue);
 690  723  
 691  724      _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 692  725      _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 693  726  
 694  727      _tasks[i] = new CMTask(i, this,
 695  728                             _count_marked_bytes[i],
 696  729                             &_count_card_bitmaps[i],
 697  730                             task_queue, _task_queues);
 698  731  
 699  732      _accum_task_vtime[i] = 0.0;
 700  733    }
 701  734  
 702  735    // Calculate the card number for the bottom of the heap. Used
 703  736    // in biasing indexes into the accounting card bitmaps.
 704  737    _heap_bottom_card_num =
 705  738      intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 706  739                                  CardTableModRefBS::card_shift);
 707  740  
 708  741    // Clear all the liveness counting data
 709  742    clear_all_count_data();
 710  743  
 711  744    // so that the call below can read a sensible value
 712  745    _heap_start = (HeapWord*) heap_rs.base();
 713  746    set_non_marking_state();
 714  747    _completed_initialization = true;
 715  748  }
 716  749  
 717  750  void ConcurrentMark::update_g1_committed(bool force) {
 718  751    // If concurrent marking is not in progress, then we do not need to
 719  752    // update _heap_end.
 720  753    if (!concurrent_marking_in_progress() && !force) return;
 721  754  
 722  755    MemRegion committed = _g1h->g1_committed();
 723  756    assert(committed.start() == _heap_start, "start shouldn't change");
 724  757    HeapWord* new_end = committed.end();
 725  758    if (new_end > _heap_end) {
 726  759      // The heap has been expanded.
 727  760  
 728  761      _heap_end = new_end;
 729  762    }
 730  763    // Notice that the heap can also shrink. However, this only happens
 731  764    // during a Full GC (at least currently) and the entire marking
 732  765    // phase will bail out and the task will not be restarted. So, let's
 733  766    // do nothing.
 734  767  }
 735  768  
 736  769  void ConcurrentMark::reset() {
 737  770    // Starting values for these two. This should be called in a STW
 738  771    // phase. CM will be notified of any future g1_committed expansions
 739  772    // will be at the end of evacuation pauses, when tasks are
 740  773    // inactive.
 741  774    MemRegion committed = _g1h->g1_committed();
 742  775    _heap_start = committed.start();
 743  776    _heap_end   = committed.end();
 744  777  
 745  778    // Separated the asserts so that we know which one fires.
 746  779    assert(_heap_start != NULL, "heap bounds should look ok");
 747  780    assert(_heap_end != NULL, "heap bounds should look ok");
 748  781    assert(_heap_start < _heap_end, "heap bounds should look ok");
 749  782  
 750  783    // reset all the marking data structures and any necessary flags
 751  784    clear_marking_state();
 752  785  
 753  786    if (verbose_low()) {
 754  787      gclog_or_tty->print_cr("[global] resetting");
 755  788    }
 756  789  
 757  790    // We do reset all of them, since different phases will use
 758  791    // different number of active threads. So, it's easiest to have all
 759  792    // of them ready.
 760  793    for (uint i = 0; i < _max_worker_id; ++i) {
 761  794      _tasks[i]->reset(_nextMarkBitMap);
 762  795    }
 763  796  
 764  797    // we need this to make sure that the flag is on during the evac
 765  798    // pause with initial mark piggy-backed
 766  799    set_concurrent_marking_in_progress();
 767  800  }
 768  801  
 769  802  void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 770  803    assert(active_tasks <= _max_worker_id, "we should not have more");
 771  804  
 772  805    _active_tasks = active_tasks;
 773  806    // Need to update the three data structures below according to the
 774  807    // number of active threads for this phase.
 775  808    _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 776  809    _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 777  810    _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 778  811  
 779  812    _concurrent = concurrent;
 780  813    // We propagate this to all tasks, not just the active ones.
 781  814    for (uint i = 0; i < _max_worker_id; ++i)
 782  815      _tasks[i]->set_concurrent(concurrent);
 783  816  
 784  817    if (concurrent) {
 785  818      set_concurrent_marking_in_progress();
 786  819    } else {
 787  820      // We currently assume that the concurrent flag has been set to
 788  821      // false before we start remark. At this point we should also be
 789  822      // in a STW phase.
 790  823      assert(!concurrent_marking_in_progress(), "invariant");
 791  824      assert(_finger == _heap_end, "only way to get here");
 792  825      update_g1_committed(true);
 793  826    }
 794  827  }
 795  828  
 796  829  void ConcurrentMark::set_non_marking_state() {
 797  830    // We set the global marking state to some default values when we're
 798  831    // not doing marking.
 799  832    clear_marking_state();
 800  833    _active_tasks = 0;
 801  834    clear_concurrent_marking_in_progress();
 802  835  }
 803  836  
 804  837  ConcurrentMark::~ConcurrentMark() {
 805  838    // The ConcurrentMark instance is never freed.
 806  839    ShouldNotReachHere();
 807  840  }
 808  841  
 809  842  void ConcurrentMark::clearNextBitmap() {
 810  843    G1CollectedHeap* g1h = G1CollectedHeap::heap();
 811  844    G1CollectorPolicy* g1p = g1h->g1_policy();
 812  845  
 813  846    // Make sure that the concurrent mark thread looks to still be in
 814  847    // the current cycle.
 815  848    guarantee(cmThread()->during_cycle(), "invariant");
 816  849  
 817  850    // We are finishing up the current cycle by clearing the next
 818  851    // marking bitmap and getting it ready for the next cycle. During
 819  852    // this time no other cycle can start. So, let's make sure that this
 820  853    // is the case.
 821  854    guarantee(!g1h->mark_in_progress(), "invariant");
 822  855  
 823  856    // clear the mark bitmap (no grey objects to start with).
 824  857    // We need to do this in chunks and offer to yield in between
 825  858    // each chunk.
 826  859    HeapWord* start  = _nextMarkBitMap->startWord();
 827  860    HeapWord* end    = _nextMarkBitMap->endWord();
 828  861    HeapWord* cur    = start;
 829  862    size_t chunkSize = M;
 830  863    while (cur < end) {
 831  864      HeapWord* next = cur + chunkSize;
 832  865      if (next > end) {
 833  866        next = end;
 834  867      }
 835  868      MemRegion mr(cur,next);
 836  869      _nextMarkBitMap->clearRange(mr);
 837  870      cur = next;
 838  871      do_yield_check();
 839  872  
 840  873      // Repeat the asserts from above. We'll do them as asserts here to
 841  874      // minimize their overhead on the product. However, we'll have
 842  875      // them as guarantees at the beginning / end of the bitmap
 843  876      // clearing to get some checking in the product.
 844  877      assert(cmThread()->during_cycle(), "invariant");
 845  878      assert(!g1h->mark_in_progress(), "invariant");
 846  879    }
 847  880  
 848  881    // Clear the liveness counting data
 849  882    clear_all_count_data();
 850  883  
 851  884    // Repeat the asserts from above.
 852  885    guarantee(cmThread()->during_cycle(), "invariant");
 853  886    guarantee(!g1h->mark_in_progress(), "invariant");
 854  887  }
 855  888  
 856  889  class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 857  890  public:
 858  891    bool doHeapRegion(HeapRegion* r) {
 859  892      if (!r->continuesHumongous()) {
 860  893        r->note_start_of_marking();
 861  894      }
 862  895      return false;
 863  896    }
 864  897  };
 865  898  
 866  899  void ConcurrentMark::checkpointRootsInitialPre() {
 867  900    G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 868  901    G1CollectorPolicy* g1p = g1h->g1_policy();
 869  902  
 870  903    _has_aborted = false;
 871  904  
 872  905  #ifndef PRODUCT
 873  906    if (G1PrintReachableAtInitialMark) {
 874  907      print_reachable("at-cycle-start",
 875  908                      VerifyOption_G1UsePrevMarking, true /* all */);
 876  909    }
 877  910  #endif
 878  911  
 879  912    // Initialise marking structures. This has to be done in a STW phase.
 880  913    reset();
 881  914  
 882  915    // For each region note start of marking.
 883  916    NoteStartOfMarkHRClosure startcl;
 884  917    g1h->heap_region_iterate(&startcl);
 885  918  }
 886  919  
 887  920  
 888  921  void ConcurrentMark::checkpointRootsInitialPost() {
 889  922    G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 890  923  
 891  924    // If we force an overflow during remark, the remark operation will
 892  925    // actually abort and we'll restart concurrent marking. If we always
 893  926    // force an oveflow during remark we'll never actually complete the
 894  927    // marking phase. So, we initilize this here, at the start of the
 895  928    // cycle, so that at the remaining overflow number will decrease at
 896  929    // every remark and we'll eventually not need to cause one.
 897  930    force_overflow_stw()->init();
 898  931  
 899  932    // Start Concurrent Marking weak-reference discovery.
 900  933    ReferenceProcessor* rp = g1h->ref_processor_cm();
 901  934    // enable ("weak") refs discovery
 902  935    rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 903  936    rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 904  937  
 905  938    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 906  939    // This is the start of  the marking cycle, we're expected all
 907  940    // threads to have SATB queues with active set to false.
 908  941    satb_mq_set.set_active_all_threads(true, /* new active value */
 909  942                                       false /* expected_active */);
 910  943  
 911  944    _root_regions.prepare_for_scan();
 912  945  
 913  946    // update_g1_committed() will be called at the end of an evac pause
 914  947    // when marking is on. So, it's also called at the end of the
 915  948    // initial-mark pause to update the heap end, if the heap expands
 916  949    // during it. No need to call it here.
 917  950  }
 918  951  
 919  952  /*
 920  953   * Notice that in the next two methods, we actually leave the STS
 921  954   * during the barrier sync and join it immediately afterwards. If we
 922  955   * do not do this, the following deadlock can occur: one thread could
 923  956   * be in the barrier sync code, waiting for the other thread to also
 924  957   * sync up, whereas another one could be trying to yield, while also
 925  958   * waiting for the other threads to sync up too.
 926  959   *
 927  960   * Note, however, that this code is also used during remark and in
 928  961   * this case we should not attempt to leave / enter the STS, otherwise
 929  962   * we'll either hit an asseert (debug / fastdebug) or deadlock
 930  963   * (product). So we should only leave / enter the STS if we are
 931  964   * operating concurrently.
 932  965   *
 933  966   * Because the thread that does the sync barrier has left the STS, it
 934  967   * is possible to be suspended for a Full GC or an evacuation pause
 935  968   * could occur. This is actually safe, since the entering the sync
 936  969   * barrier is one of the last things do_marking_step() does, and it
 937  970   * doesn't manipulate any data structures afterwards.
 938  971   */
 939  972  
 940  973  void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 941  974    if (verbose_low()) {
 942  975      gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 943  976    }
 944  977  
 945  978    if (concurrent()) {
 946  979      ConcurrentGCThread::stsLeave();
 947  980    }
 948  981    _first_overflow_barrier_sync.enter();
 949  982    if (concurrent()) {
 950  983      ConcurrentGCThread::stsJoin();
 951  984    }
 952  985    // at this point everyone should have synced up and not be doing any
 953  986    // more work
 954  987  
 955  988    if (verbose_low()) {
 956  989      gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 957  990    }
 958  991  
 959  992    // let the task associated with with worker 0 do this
 960  993    if (worker_id == 0) {
 961  994      // task 0 is responsible for clearing the global data structures
 962  995      // We should be here because of an overflow. During STW we should
 963  996      // not clear the overflow flag since we rely on it being true when
 964  997      // we exit this method to abort the pause and restart concurent
 965  998      // marking.
 966  999      clear_marking_state(concurrent() /* clear_overflow */);
 967 1000      force_overflow()->update();
 968 1001  
 969 1002      if (G1Log::fine()) {
 970 1003        gclog_or_tty->date_stamp(PrintGCDateStamps);
 971 1004        gclog_or_tty->stamp(PrintGCTimeStamps);
 972 1005        gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 973 1006      }
 974 1007    }
 975 1008  
 976 1009    // after this, each task should reset its own data structures then
 977 1010    // then go into the second barrier
 978 1011  }
 979 1012  
 980 1013  void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 981 1014    if (verbose_low()) {
 982 1015      gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
 983 1016    }
 984 1017  
 985 1018    if (concurrent()) {
 986 1019      ConcurrentGCThread::stsLeave();
 987 1020    }
 988 1021    _second_overflow_barrier_sync.enter();
 989 1022    if (concurrent()) {
 990 1023      ConcurrentGCThread::stsJoin();
 991 1024    }
 992 1025    // at this point everything should be re-initialised and ready to go
 993 1026  
 994 1027    if (verbose_low()) {
 995 1028      gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
 996 1029    }
 997 1030  }
 998 1031  
 999 1032  #ifndef PRODUCT
1000 1033  void ForceOverflowSettings::init() {
1001 1034    _num_remaining = G1ConcMarkForceOverflow;
1002 1035    _force = false;
1003 1036    update();
1004 1037  }
1005 1038  
1006 1039  void ForceOverflowSettings::update() {
1007 1040    if (_num_remaining > 0) {
1008 1041      _num_remaining -= 1;
1009 1042      _force = true;
1010 1043    } else {
1011 1044      _force = false;
1012 1045    }
1013 1046  }
1014 1047  
1015 1048  bool ForceOverflowSettings::should_force() {
1016 1049    if (_force) {
1017 1050      _force = false;
1018 1051      return true;
1019 1052    } else {
1020 1053      return false;
1021 1054    }
1022 1055  }
1023 1056  #endif // !PRODUCT
1024 1057  
1025 1058  class CMConcurrentMarkingTask: public AbstractGangTask {
1026 1059  private:
1027 1060    ConcurrentMark*       _cm;
1028 1061    ConcurrentMarkThread* _cmt;
1029 1062  
1030 1063  public:
1031 1064    void work(uint worker_id) {
1032 1065      assert(Thread::current()->is_ConcurrentGC_thread(),
1033 1066             "this should only be done by a conc GC thread");
1034 1067      ResourceMark rm;
1035 1068  
1036 1069      double start_vtime = os::elapsedVTime();
1037 1070  
1038 1071      ConcurrentGCThread::stsJoin();
1039 1072  
1040 1073      assert(worker_id < _cm->active_tasks(), "invariant");
1041 1074      CMTask* the_task = _cm->task(worker_id);
1042 1075      the_task->record_start_time();
1043 1076      if (!_cm->has_aborted()) {
1044 1077        do {
1045 1078          double start_vtime_sec = os::elapsedVTime();
1046 1079          double start_time_sec = os::elapsedTime();
1047 1080          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1048 1081  
1049 1082          the_task->do_marking_step(mark_step_duration_ms,
1050 1083                                    true /* do_stealing    */,
1051 1084                                    true /* do_termination */);
1052 1085  
1053 1086          double end_time_sec = os::elapsedTime();
1054 1087          double end_vtime_sec = os::elapsedVTime();
1055 1088          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1056 1089          double elapsed_time_sec = end_time_sec - start_time_sec;
1057 1090          _cm->clear_has_overflown();
1058 1091  
1059 1092          bool ret = _cm->do_yield_check(worker_id);
1060 1093  
1061 1094          jlong sleep_time_ms;
1062 1095          if (!_cm->has_aborted() && the_task->has_aborted()) {
1063 1096            sleep_time_ms =
1064 1097              (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1065 1098            ConcurrentGCThread::stsLeave();
1066 1099            os::sleep(Thread::current(), sleep_time_ms, false);
1067 1100            ConcurrentGCThread::stsJoin();
1068 1101          }
1069 1102          double end_time2_sec = os::elapsedTime();
1070 1103          double elapsed_time2_sec = end_time2_sec - start_time_sec;
1071 1104  
1072 1105  #if 0
1073 1106            gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1074 1107                                   "overhead %1.4lf",
1075 1108                                   elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1076 1109                                   the_task->conc_overhead(os::elapsedTime()) * 8.0);
1077 1110            gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1078 1111                                   elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1079 1112  #endif
1080 1113        } while (!_cm->has_aborted() && the_task->has_aborted());
1081 1114      }
1082 1115      the_task->record_end_time();
1083 1116      guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1084 1117  
1085 1118      ConcurrentGCThread::stsLeave();
1086 1119  
1087 1120      double end_vtime = os::elapsedVTime();
1088 1121      _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1089 1122    }
1090 1123  
1091 1124    CMConcurrentMarkingTask(ConcurrentMark* cm,
1092 1125                            ConcurrentMarkThread* cmt) :
1093 1126        AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1094 1127  
1095 1128    ~CMConcurrentMarkingTask() { }
1096 1129  };
1097 1130  
1098 1131  // Calculates the number of active workers for a concurrent
1099 1132  // phase.
1100 1133  uint ConcurrentMark::calc_parallel_marking_threads() {
1101 1134    if (G1CollectedHeap::use_parallel_gc_threads()) {
1102 1135      uint n_conc_workers = 0;
1103 1136      if (!UseDynamicNumberOfGCThreads ||
1104 1137          (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1105 1138           !ForceDynamicNumberOfGCThreads)) {
1106 1139        n_conc_workers = max_parallel_marking_threads();
1107 1140      } else {
1108 1141        n_conc_workers =
1109 1142          AdaptiveSizePolicy::calc_default_active_workers(
1110 1143                                       max_parallel_marking_threads(),
1111 1144                                       1, /* Minimum workers */
1112 1145                                       parallel_marking_threads(),
1113 1146                                       Threads::number_of_non_daemon_threads());
1114 1147        // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1115 1148        // that scaling has already gone into "_max_parallel_marking_threads".
1116 1149      }
1117 1150      assert(n_conc_workers > 0, "Always need at least 1");
1118 1151      return n_conc_workers;
1119 1152    }
1120 1153    // If we are not running with any parallel GC threads we will not
1121 1154    // have spawned any marking threads either. Hence the number of
1122 1155    // concurrent workers should be 0.
1123 1156    return 0;
1124 1157  }
1125 1158  
1126 1159  void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1127 1160    // Currently, only survivors can be root regions.
1128 1161    assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1129 1162    G1RootRegionScanClosure cl(_g1h, this, worker_id);
1130 1163  
1131 1164    const uintx interval = PrefetchScanIntervalInBytes;
1132 1165    HeapWord* curr = hr->bottom();
1133 1166    const HeapWord* end = hr->top();
1134 1167    while (curr < end) {
1135 1168      Prefetch::read(curr, interval);
1136 1169      oop obj = oop(curr);
1137 1170      int size = obj->oop_iterate(&cl);
1138 1171      assert(size == obj->size(), "sanity");
1139 1172      curr += size;
1140 1173    }
1141 1174  }
1142 1175  
1143 1176  class CMRootRegionScanTask : public AbstractGangTask {
1144 1177  private:
1145 1178    ConcurrentMark* _cm;
1146 1179  
1147 1180  public:
1148 1181    CMRootRegionScanTask(ConcurrentMark* cm) :
1149 1182      AbstractGangTask("Root Region Scan"), _cm(cm) { }
1150 1183  
1151 1184    void work(uint worker_id) {
1152 1185      assert(Thread::current()->is_ConcurrentGC_thread(),
1153 1186             "this should only be done by a conc GC thread");
1154 1187  
1155 1188      CMRootRegions* root_regions = _cm->root_regions();
1156 1189      HeapRegion* hr = root_regions->claim_next();
1157 1190      while (hr != NULL) {
1158 1191        _cm->scanRootRegion(hr, worker_id);
1159 1192        hr = root_regions->claim_next();
1160 1193      }
1161 1194    }
1162 1195  };
1163 1196  
1164 1197  void ConcurrentMark::scanRootRegions() {
1165 1198    // scan_in_progress() will have been set to true only if there was
1166 1199    // at least one root region to scan. So, if it's false, we
1167 1200    // should not attempt to do any further work.
1168 1201    if (root_regions()->scan_in_progress()) {
1169 1202      _parallel_marking_threads = calc_parallel_marking_threads();
1170 1203      assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1171 1204             "Maximum number of marking threads exceeded");
1172 1205      uint active_workers = MAX2(1U, parallel_marking_threads());
1173 1206  
1174 1207      CMRootRegionScanTask task(this);
1175 1208      if (parallel_marking_threads() > 0) {
1176 1209        _parallel_workers->set_active_workers((int) active_workers);
1177 1210        _parallel_workers->run_task(&task);
1178 1211      } else {
1179 1212        task.work(0);
1180 1213      }
1181 1214  
1182 1215      // It's possible that has_aborted() is true here without actually
1183 1216      // aborting the survivor scan earlier. This is OK as it's
1184 1217      // mainly used for sanity checking.
1185 1218      root_regions()->scan_finished();
1186 1219    }
1187 1220  }
1188 1221  
1189 1222  void ConcurrentMark::markFromRoots() {
1190 1223    // we might be tempted to assert that:
1191 1224    // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1192 1225    //        "inconsistent argument?");
1193 1226    // However that wouldn't be right, because it's possible that
1194 1227    // a safepoint is indeed in progress as a younger generation
1195 1228    // stop-the-world GC happens even as we mark in this generation.
1196 1229  
1197 1230    _restart_for_overflow = false;
1198 1231    force_overflow_conc()->init();
1199 1232  
1200 1233    // _g1h has _n_par_threads
1201 1234    _parallel_marking_threads = calc_parallel_marking_threads();
1202 1235    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1203 1236      "Maximum number of marking threads exceeded");
1204 1237  
1205 1238    uint active_workers = MAX2(1U, parallel_marking_threads());
1206 1239  
1207 1240    // Parallel task terminator is set in "set_phase()"
1208 1241    set_phase(active_workers, true /* concurrent */);
1209 1242  
1210 1243    CMConcurrentMarkingTask markingTask(this, cmThread());
1211 1244    if (parallel_marking_threads() > 0) {
1212 1245      _parallel_workers->set_active_workers((int)active_workers);
1213 1246      // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1214 1247      // and the decisions on that MT processing is made elsewhere.
1215 1248      assert(_parallel_workers->active_workers() > 0, "Should have been set");
1216 1249      _parallel_workers->run_task(&markingTask);
1217 1250    } else {
1218 1251      markingTask.work(0);
1219 1252    }
1220 1253    print_stats();
1221 1254  }
1222 1255  
1223 1256  void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1224 1257    // world is stopped at this checkpoint
1225 1258    assert(SafepointSynchronize::is_at_safepoint(),
1226 1259           "world should be stopped");
1227 1260  
1228 1261    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1229 1262  
1230 1263    // If a full collection has happened, we shouldn't do this.
1231 1264    if (has_aborted()) {
1232 1265      g1h->set_marking_complete(); // So bitmap clearing isn't confused
1233 1266      return;
1234 1267    }
1235 1268  
1236 1269    SvcGCMarker sgcm(SvcGCMarker::OTHER);
1237 1270  
1238 1271    if (VerifyDuringGC) {
1239 1272      HandleMark hm;  // handle scope
1240 1273      gclog_or_tty->print(" VerifyDuringGC:(before)");
1241 1274      Universe::heap()->prepare_for_verify();
1242 1275      Universe::verify(/* silent      */ false,
1243 1276                       /* option      */ VerifyOption_G1UsePrevMarking);
1244 1277    }
1245 1278  
1246 1279    G1CollectorPolicy* g1p = g1h->g1_policy();
1247 1280    g1p->record_concurrent_mark_remark_start();
1248 1281  
1249 1282    double start = os::elapsedTime();
1250 1283  
1251 1284    checkpointRootsFinalWork();
1252 1285  
1253 1286    double mark_work_end = os::elapsedTime();
1254 1287  
1255 1288    weakRefsWork(clear_all_soft_refs);
1256 1289  
1257 1290    if (has_overflown()) {
1258 1291      // Oops.  We overflowed.  Restart concurrent marking.
1259 1292      _restart_for_overflow = true;
1260 1293      // Clear the flag. We do not need it any more.
1261 1294      clear_has_overflown();
1262 1295      if (G1TraceMarkStackOverflow) {
1263 1296        gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1264 1297      }
1265 1298    } else {
1266 1299      // Aggregate the per-task counting data that we have accumulated
1267 1300      // while marking.
1268 1301      aggregate_count_data();
1269 1302  
1270 1303      SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1271 1304      // We're done with marking.
1272 1305      // This is the end of  the marking cycle, we're expected all
1273 1306      // threads to have SATB queues with active set to true.
1274 1307      satb_mq_set.set_active_all_threads(false, /* new active value */
1275 1308                                         true /* expected_active */);
1276 1309  
1277 1310      if (VerifyDuringGC) {
1278 1311        HandleMark hm;  // handle scope
1279 1312        gclog_or_tty->print(" VerifyDuringGC:(after)");
1280 1313        Universe::heap()->prepare_for_verify();
1281 1314        Universe::verify(/* silent      */ false,
1282 1315                         /* option      */ VerifyOption_G1UseNextMarking);
1283 1316      }
1284 1317      assert(!restart_for_overflow(), "sanity");
1285 1318    }
1286 1319  
1287 1320    // Expand the marking stack, if we have to and if we can.
1288 1321    if (_markStack.should_expand()) {
1289 1322      _markStack.expand();
1290 1323    }
1291 1324  
1292 1325    // Reset the marking state if marking completed
1293 1326    if (!restart_for_overflow()) {
1294 1327      set_non_marking_state();
1295 1328    }
1296 1329  
1297 1330  #if VERIFY_OBJS_PROCESSED
1298 1331    _scan_obj_cl.objs_processed = 0;
1299 1332    ThreadLocalObjQueue::objs_enqueued = 0;
1300 1333  #endif
1301 1334  
1302 1335    // Statistics
1303 1336    double now = os::elapsedTime();
1304 1337    _remark_mark_times.add((mark_work_end - start) * 1000.0);
1305 1338    _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1306 1339    _remark_times.add((now - start) * 1000.0);
1307 1340  
1308 1341    g1p->record_concurrent_mark_remark_end();
1309 1342  }
1310 1343  
1311 1344  // Base class of the closures that finalize and verify the
1312 1345  // liveness counting data.
1313 1346  class CMCountDataClosureBase: public HeapRegionClosure {
1314 1347  protected:
1315 1348    G1CollectedHeap* _g1h;
1316 1349    ConcurrentMark* _cm;
1317 1350    CardTableModRefBS* _ct_bs;
1318 1351  
1319 1352    BitMap* _region_bm;
1320 1353    BitMap* _card_bm;
1321 1354  
1322 1355    // Takes a region that's not empty (i.e., it has at least one
1323 1356    // live object in it and sets its corresponding bit on the region
1324 1357    // bitmap to 1. If the region is "starts humongous" it will also set
1325 1358    // to 1 the bits on the region bitmap that correspond to its
1326 1359    // associated "continues humongous" regions.
1327 1360    void set_bit_for_region(HeapRegion* hr) {
1328 1361      assert(!hr->continuesHumongous(), "should have filtered those out");
1329 1362  
1330 1363      BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1331 1364      if (!hr->startsHumongous()) {
1332 1365        // Normal (non-humongous) case: just set the bit.
1333 1366        _region_bm->par_at_put(index, true);
1334 1367      } else {
1335 1368        // Starts humongous case: calculate how many regions are part of
1336 1369        // this humongous region and then set the bit range.
1337 1370        BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1338 1371        _region_bm->par_at_put_range(index, end_index, true);
1339 1372      }
1340 1373    }
1341 1374  
1342 1375  public:
1343 1376    CMCountDataClosureBase(G1CollectedHeap* g1h,
1344 1377                           BitMap* region_bm, BitMap* card_bm):
1345 1378      _g1h(g1h), _cm(g1h->concurrent_mark()),
1346 1379      _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1347 1380      _region_bm(region_bm), _card_bm(card_bm) { }
1348 1381  };
1349 1382  
1350 1383  // Closure that calculates the # live objects per region. Used
1351 1384  // for verification purposes during the cleanup pause.
1352 1385  class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1353 1386    CMBitMapRO* _bm;
1354 1387    size_t _region_marked_bytes;
1355 1388  
1356 1389  public:
1357 1390    CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1358 1391                           BitMap* region_bm, BitMap* card_bm) :
1359 1392      CMCountDataClosureBase(g1h, region_bm, card_bm),
1360 1393      _bm(bm), _region_marked_bytes(0) { }
1361 1394  
1362 1395    bool doHeapRegion(HeapRegion* hr) {
1363 1396  
1364 1397      if (hr->continuesHumongous()) {
1365 1398        // We will ignore these here and process them when their
1366 1399        // associated "starts humongous" region is processed (see
1367 1400        // set_bit_for_heap_region()). Note that we cannot rely on their
1368 1401        // associated "starts humongous" region to have their bit set to
1369 1402        // 1 since, due to the region chunking in the parallel region
1370 1403        // iteration, a "continues humongous" region might be visited
1371 1404        // before its associated "starts humongous".
1372 1405        return false;
1373 1406      }
1374 1407  
1375 1408      HeapWord* ntams = hr->next_top_at_mark_start();
1376 1409      HeapWord* start = hr->bottom();
1377 1410  
1378 1411      assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1379 1412             err_msg("Preconditions not met - "
1380 1413                     "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1381 1414                     start, ntams, hr->end()));
1382 1415  
1383 1416      // Find the first marked object at or after "start".
1384 1417      start = _bm->getNextMarkedWordAddress(start, ntams);
1385 1418  
1386 1419      size_t marked_bytes = 0;
1387 1420  
1388 1421      while (start < ntams) {
1389 1422        oop obj = oop(start);
1390 1423        int obj_sz = obj->size();
1391 1424        HeapWord* obj_end = start + obj_sz;
1392 1425  
1393 1426        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1394 1427        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1395 1428  
1396 1429        // Note: if we're looking at the last region in heap - obj_end
1397 1430        // could be actually just beyond the end of the heap; end_idx
1398 1431        // will then correspond to a (non-existent) card that is also
1399 1432        // just beyond the heap.
1400 1433        if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1401 1434          // end of object is not card aligned - increment to cover
1402 1435          // all the cards spanned by the object
1403 1436          end_idx += 1;
1404 1437        }
1405 1438  
1406 1439        // Set the bits in the card BM for the cards spanned by this object.
1407 1440        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1408 1441  
1409 1442        // Add the size of this object to the number of marked bytes.
1410 1443        marked_bytes += (size_t)obj_sz * HeapWordSize;
1411 1444  
1412 1445        // Find the next marked object after this one.
1413 1446        start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1414 1447      }
1415 1448  
1416 1449      // Mark the allocated-since-marking portion...
1417 1450      HeapWord* top = hr->top();
1418 1451      if (ntams < top) {
1419 1452        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1420 1453        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1421 1454  
1422 1455        // Note: if we're looking at the last region in heap - top
1423 1456        // could be actually just beyond the end of the heap; end_idx
1424 1457        // will then correspond to a (non-existent) card that is also
1425 1458        // just beyond the heap.
1426 1459        if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1427 1460          // end of object is not card aligned - increment to cover
1428 1461          // all the cards spanned by the object
1429 1462          end_idx += 1;
1430 1463        }
1431 1464        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1432 1465  
1433 1466        // This definitely means the region has live objects.
1434 1467        set_bit_for_region(hr);
1435 1468      }
1436 1469  
1437 1470      // Update the live region bitmap.
1438 1471      if (marked_bytes > 0) {
1439 1472        set_bit_for_region(hr);
1440 1473      }
1441 1474  
1442 1475      // Set the marked bytes for the current region so that
1443 1476      // it can be queried by a calling verificiation routine
1444 1477      _region_marked_bytes = marked_bytes;
1445 1478  
1446 1479      return false;
1447 1480    }
1448 1481  
1449 1482    size_t region_marked_bytes() const { return _region_marked_bytes; }
1450 1483  };
1451 1484  
1452 1485  // Heap region closure used for verifying the counting data
1453 1486  // that was accumulated concurrently and aggregated during
1454 1487  // the remark pause. This closure is applied to the heap
1455 1488  // regions during the STW cleanup pause.
1456 1489  
1457 1490  class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1458 1491    G1CollectedHeap* _g1h;
1459 1492    ConcurrentMark* _cm;
1460 1493    CalcLiveObjectsClosure _calc_cl;
1461 1494    BitMap* _region_bm;   // Region BM to be verified
1462 1495    BitMap* _card_bm;     // Card BM to be verified
1463 1496    bool _verbose;        // verbose output?
1464 1497  
1465 1498    BitMap* _exp_region_bm; // Expected Region BM values
1466 1499    BitMap* _exp_card_bm;   // Expected card BM values
1467 1500  
1468 1501    int _failures;
1469 1502  
1470 1503  public:
1471 1504    VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1472 1505                                  BitMap* region_bm,
1473 1506                                  BitMap* card_bm,
1474 1507                                  BitMap* exp_region_bm,
1475 1508                                  BitMap* exp_card_bm,
1476 1509                                  bool verbose) :
1477 1510      _g1h(g1h), _cm(g1h->concurrent_mark()),
1478 1511      _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1479 1512      _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1480 1513      _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1481 1514      _failures(0) { }
1482 1515  
1483 1516    int failures() const { return _failures; }
1484 1517  
1485 1518    bool doHeapRegion(HeapRegion* hr) {
1486 1519      if (hr->continuesHumongous()) {
1487 1520        // We will ignore these here and process them when their
1488 1521        // associated "starts humongous" region is processed (see
1489 1522        // set_bit_for_heap_region()). Note that we cannot rely on their
1490 1523        // associated "starts humongous" region to have their bit set to
1491 1524        // 1 since, due to the region chunking in the parallel region
1492 1525        // iteration, a "continues humongous" region might be visited
1493 1526        // before its associated "starts humongous".
1494 1527        return false;
1495 1528      }
1496 1529  
1497 1530      int failures = 0;
1498 1531  
1499 1532      // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1500 1533      // this region and set the corresponding bits in the expected region
1501 1534      // and card bitmaps.
1502 1535      bool res = _calc_cl.doHeapRegion(hr);
1503 1536      assert(res == false, "should be continuing");
1504 1537  
1505 1538      MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1506 1539                      Mutex::_no_safepoint_check_flag);
1507 1540  
1508 1541      // Verify the marked bytes for this region.
1509 1542      size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1510 1543      size_t act_marked_bytes = hr->next_marked_bytes();
1511 1544  
1512 1545      // We're not OK if expected marked bytes > actual marked bytes. It means
1513 1546      // we have missed accounting some objects during the actual marking.
1514 1547      if (exp_marked_bytes > act_marked_bytes) {
1515 1548        if (_verbose) {
1516 1549          gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1517 1550                                 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1518 1551                                 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1519 1552        }
1520 1553        failures += 1;
1521 1554      }
1522 1555  
1523 1556      // Verify the bit, for this region, in the actual and expected
1524 1557      // (which was just calculated) region bit maps.
1525 1558      // We're not OK if the bit in the calculated expected region
1526 1559      // bitmap is set and the bit in the actual region bitmap is not.
1527 1560      BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1528 1561  
1529 1562      bool expected = _exp_region_bm->at(index);
1530 1563      bool actual = _region_bm->at(index);
1531 1564      if (expected && !actual) {
1532 1565        if (_verbose) {
1533 1566          gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1534 1567                                 "expected: %s, actual: %s",
1535 1568                                 hr->hrs_index(),
1536 1569                                 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1537 1570        }
1538 1571        failures += 1;
1539 1572      }
1540 1573  
1541 1574      // Verify that the card bit maps for the cards spanned by the current
1542 1575      // region match. We have an error if we have a set bit in the expected
1543 1576      // bit map and the corresponding bit in the actual bitmap is not set.
1544 1577  
1545 1578      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1546 1579      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1547 1580  
1548 1581      for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1549 1582        expected = _exp_card_bm->at(i);
1550 1583        actual = _card_bm->at(i);
1551 1584  
1552 1585        if (expected && !actual) {
1553 1586          if (_verbose) {
1554 1587            gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1555 1588                                   "expected: %s, actual: %s",
1556 1589                                   hr->hrs_index(), i,
1557 1590                                   BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1558 1591          }
1559 1592          failures += 1;
1560 1593        }
1561 1594      }
1562 1595  
1563 1596      if (failures > 0 && _verbose)  {
1564 1597        gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1565 1598                               "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1566 1599                               HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1567 1600                               _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1568 1601      }
1569 1602  
1570 1603      _failures += failures;
1571 1604  
1572 1605      // We could stop iteration over the heap when we
1573 1606      // find the first violating region by returning true.
1574 1607      return false;
1575 1608    }
1576 1609  };
1577 1610  
1578 1611  
1579 1612  class G1ParVerifyFinalCountTask: public AbstractGangTask {
1580 1613  protected:
1581 1614    G1CollectedHeap* _g1h;
1582 1615    ConcurrentMark* _cm;
1583 1616    BitMap* _actual_region_bm;
1584 1617    BitMap* _actual_card_bm;
1585 1618  
1586 1619    uint    _n_workers;
1587 1620  
1588 1621    BitMap* _expected_region_bm;
1589 1622    BitMap* _expected_card_bm;
1590 1623  
1591 1624    int  _failures;
1592 1625    bool _verbose;
1593 1626  
1594 1627  public:
1595 1628    G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1596 1629                              BitMap* region_bm, BitMap* card_bm,
1597 1630                              BitMap* expected_region_bm, BitMap* expected_card_bm)
1598 1631      : AbstractGangTask("G1 verify final counting"),
1599 1632        _g1h(g1h), _cm(_g1h->concurrent_mark()),
1600 1633        _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1601 1634        _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1602 1635        _failures(0), _verbose(false),
1603 1636        _n_workers(0) {
1604 1637      assert(VerifyDuringGC, "don't call this otherwise");
1605 1638  
1606 1639      // Use the value already set as the number of active threads
1607 1640      // in the call to run_task().
1608 1641      if (G1CollectedHeap::use_parallel_gc_threads()) {
1609 1642        assert( _g1h->workers()->active_workers() > 0,
1610 1643          "Should have been previously set");
1611 1644        _n_workers = _g1h->workers()->active_workers();
1612 1645      } else {
1613 1646        _n_workers = 1;
1614 1647      }
1615 1648  
1616 1649      assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1617 1650      assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1618 1651  
1619 1652      _verbose = _cm->verbose_medium();
1620 1653    }
1621 1654  
1622 1655    void work(uint worker_id) {
1623 1656      assert(worker_id < _n_workers, "invariant");
1624 1657  
1625 1658      VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1626 1659                                              _actual_region_bm, _actual_card_bm,
1627 1660                                              _expected_region_bm,
1628 1661                                              _expected_card_bm,
1629 1662                                              _verbose);
1630 1663  
1631 1664      if (G1CollectedHeap::use_parallel_gc_threads()) {
1632 1665        _g1h->heap_region_par_iterate_chunked(&verify_cl,
1633 1666                                              worker_id,
1634 1667                                              _n_workers,
1635 1668                                              HeapRegion::VerifyCountClaimValue);
1636 1669      } else {
1637 1670        _g1h->heap_region_iterate(&verify_cl);
1638 1671      }
1639 1672  
1640 1673      Atomic::add(verify_cl.failures(), &_failures);
1641 1674    }
1642 1675  
1643 1676    int failures() const { return _failures; }
1644 1677  };
1645 1678  
1646 1679  // Closure that finalizes the liveness counting data.
1647 1680  // Used during the cleanup pause.
1648 1681  // Sets the bits corresponding to the interval [NTAMS, top]
1649 1682  // (which contains the implicitly live objects) in the
1650 1683  // card liveness bitmap. Also sets the bit for each region,
1651 1684  // containing live data, in the region liveness bitmap.
1652 1685  
1653 1686  class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1654 1687   public:
1655 1688    FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1656 1689                                BitMap* region_bm,
1657 1690                                BitMap* card_bm) :
1658 1691      CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1659 1692  
1660 1693    bool doHeapRegion(HeapRegion* hr) {
1661 1694  
1662 1695      if (hr->continuesHumongous()) {
1663 1696        // We will ignore these here and process them when their
1664 1697        // associated "starts humongous" region is processed (see
1665 1698        // set_bit_for_heap_region()). Note that we cannot rely on their
1666 1699        // associated "starts humongous" region to have their bit set to
1667 1700        // 1 since, due to the region chunking in the parallel region
1668 1701        // iteration, a "continues humongous" region might be visited
1669 1702        // before its associated "starts humongous".
1670 1703        return false;
1671 1704      }
1672 1705  
1673 1706      HeapWord* ntams = hr->next_top_at_mark_start();
1674 1707      HeapWord* top   = hr->top();
1675 1708  
1676 1709      assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1677 1710  
1678 1711      // Mark the allocated-since-marking portion...
1679 1712      if (ntams < top) {
1680 1713        // This definitely means the region has live objects.
1681 1714        set_bit_for_region(hr);
1682 1715  
1683 1716        // Now set the bits in the card bitmap for [ntams, top)
1684 1717        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1685 1718        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1686 1719  
1687 1720        // Note: if we're looking at the last region in heap - top
1688 1721        // could be actually just beyond the end of the heap; end_idx
1689 1722        // will then correspond to a (non-existent) card that is also
1690 1723        // just beyond the heap.
1691 1724        if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1692 1725          // end of object is not card aligned - increment to cover
1693 1726          // all the cards spanned by the object
1694 1727          end_idx += 1;
1695 1728        }
1696 1729  
1697 1730        assert(end_idx <= _card_bm->size(),
1698 1731               err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1699 1732                       end_idx, _card_bm->size()));
1700 1733        assert(start_idx < _card_bm->size(),
1701 1734               err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1702 1735                       start_idx, _card_bm->size()));
1703 1736  
1704 1737        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1705 1738      }
1706 1739  
1707 1740      // Set the bit for the region if it contains live data
1708 1741      if (hr->next_marked_bytes() > 0) {
1709 1742        set_bit_for_region(hr);
1710 1743      }
1711 1744  
1712 1745      return false;
1713 1746    }
1714 1747  };
1715 1748  
1716 1749  class G1ParFinalCountTask: public AbstractGangTask {
1717 1750  protected:
1718 1751    G1CollectedHeap* _g1h;
1719 1752    ConcurrentMark* _cm;
1720 1753    BitMap* _actual_region_bm;
1721 1754    BitMap* _actual_card_bm;
1722 1755  
1723 1756    uint    _n_workers;
1724 1757  
1725 1758  public:
1726 1759    G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1727 1760      : AbstractGangTask("G1 final counting"),
1728 1761        _g1h(g1h), _cm(_g1h->concurrent_mark()),
1729 1762        _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1730 1763        _n_workers(0) {
1731 1764      // Use the value already set as the number of active threads
1732 1765      // in the call to run_task().
1733 1766      if (G1CollectedHeap::use_parallel_gc_threads()) {
1734 1767        assert( _g1h->workers()->active_workers() > 0,
1735 1768          "Should have been previously set");
1736 1769        _n_workers = _g1h->workers()->active_workers();
1737 1770      } else {
1738 1771        _n_workers = 1;
1739 1772      }
1740 1773    }
1741 1774  
1742 1775    void work(uint worker_id) {
1743 1776      assert(worker_id < _n_workers, "invariant");
1744 1777  
1745 1778      FinalCountDataUpdateClosure final_update_cl(_g1h,
1746 1779                                                  _actual_region_bm,
1747 1780                                                  _actual_card_bm);
1748 1781  
1749 1782      if (G1CollectedHeap::use_parallel_gc_threads()) {
1750 1783        _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1751 1784                                              worker_id,
1752 1785                                              _n_workers,
1753 1786                                              HeapRegion::FinalCountClaimValue);
1754 1787      } else {
1755 1788        _g1h->heap_region_iterate(&final_update_cl);
1756 1789      }
1757 1790    }
1758 1791  };
1759 1792  
1760 1793  class G1ParNoteEndTask;
1761 1794  
1762 1795  class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1763 1796    G1CollectedHeap* _g1;
1764 1797    int _worker_num;
1765 1798    size_t _max_live_bytes;
1766 1799    uint _regions_claimed;
1767 1800    size_t _freed_bytes;
1768 1801    FreeRegionList* _local_cleanup_list;
1769 1802    OldRegionSet* _old_proxy_set;
1770 1803    HumongousRegionSet* _humongous_proxy_set;
1771 1804    HRRSCleanupTask* _hrrs_cleanup_task;
1772 1805    double _claimed_region_time;
1773 1806    double _max_region_time;
1774 1807  
1775 1808  public:
1776 1809    G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1777 1810                               int worker_num,
1778 1811                               FreeRegionList* local_cleanup_list,
1779 1812                               OldRegionSet* old_proxy_set,
1780 1813                               HumongousRegionSet* humongous_proxy_set,
1781 1814                               HRRSCleanupTask* hrrs_cleanup_task) :
1782 1815      _g1(g1), _worker_num(worker_num),
1783 1816      _max_live_bytes(0), _regions_claimed(0),
1784 1817      _freed_bytes(0),
1785 1818      _claimed_region_time(0.0), _max_region_time(0.0),
1786 1819      _local_cleanup_list(local_cleanup_list),
1787 1820      _old_proxy_set(old_proxy_set),
1788 1821      _humongous_proxy_set(humongous_proxy_set),
1789 1822      _hrrs_cleanup_task(hrrs_cleanup_task) { }
1790 1823  
1791 1824    size_t freed_bytes() { return _freed_bytes; }
1792 1825  
1793 1826    bool doHeapRegion(HeapRegion *hr) {
1794 1827      if (hr->continuesHumongous()) {
1795 1828        return false;
1796 1829      }
1797 1830      // We use a claim value of zero here because all regions
1798 1831      // were claimed with value 1 in the FinalCount task.
1799 1832      _g1->reset_gc_time_stamps(hr);
1800 1833      double start = os::elapsedTime();
1801 1834      _regions_claimed++;
1802 1835      hr->note_end_of_marking();
1803 1836      _max_live_bytes += hr->max_live_bytes();
1804 1837      _g1->free_region_if_empty(hr,
1805 1838                                &_freed_bytes,
1806 1839                                _local_cleanup_list,
1807 1840                                _old_proxy_set,
1808 1841                                _humongous_proxy_set,
1809 1842                                _hrrs_cleanup_task,
1810 1843                                true /* par */);
1811 1844      double region_time = (os::elapsedTime() - start);
1812 1845      _claimed_region_time += region_time;
1813 1846      if (region_time > _max_region_time) {
1814 1847        _max_region_time = region_time;
1815 1848      }
1816 1849      return false;
1817 1850    }
1818 1851  
1819 1852    size_t max_live_bytes() { return _max_live_bytes; }
1820 1853    uint regions_claimed() { return _regions_claimed; }
1821 1854    double claimed_region_time_sec() { return _claimed_region_time; }
1822 1855    double max_region_time_sec() { return _max_region_time; }
1823 1856  };
1824 1857  
1825 1858  class G1ParNoteEndTask: public AbstractGangTask {
1826 1859    friend class G1NoteEndOfConcMarkClosure;
1827 1860  
1828 1861  protected:
1829 1862    G1CollectedHeap* _g1h;
1830 1863    size_t _max_live_bytes;
1831 1864    size_t _freed_bytes;
1832 1865    FreeRegionList* _cleanup_list;
1833 1866  
1834 1867  public:
1835 1868    G1ParNoteEndTask(G1CollectedHeap* g1h,
1836 1869                     FreeRegionList* cleanup_list) :
1837 1870      AbstractGangTask("G1 note end"), _g1h(g1h),
1838 1871      _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1839 1872  
1840 1873    void work(uint worker_id) {
1841 1874      double start = os::elapsedTime();
1842 1875      FreeRegionList local_cleanup_list("Local Cleanup List");
1843 1876      OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1844 1877      HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1845 1878      HRRSCleanupTask hrrs_cleanup_task;
1846 1879      G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1847 1880                                             &old_proxy_set,
1848 1881                                             &humongous_proxy_set,
1849 1882                                             &hrrs_cleanup_task);
1850 1883      if (G1CollectedHeap::use_parallel_gc_threads()) {
1851 1884        _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1852 1885                                              _g1h->workers()->active_workers(),
1853 1886                                              HeapRegion::NoteEndClaimValue);
1854 1887      } else {
1855 1888        _g1h->heap_region_iterate(&g1_note_end);
1856 1889      }
1857 1890      assert(g1_note_end.complete(), "Shouldn't have yielded!");
1858 1891  
1859 1892      // Now update the lists
1860 1893      _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1861 1894                                              NULL /* free_list */,
1862 1895                                              &old_proxy_set,
1863 1896                                              &humongous_proxy_set,
1864 1897                                              true /* par */);
1865 1898      {
1866 1899        MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1867 1900        _max_live_bytes += g1_note_end.max_live_bytes();
1868 1901        _freed_bytes += g1_note_end.freed_bytes();
1869 1902  
1870 1903        // If we iterate over the global cleanup list at the end of
1871 1904        // cleanup to do this printing we will not guarantee to only
1872 1905        // generate output for the newly-reclaimed regions (the list
1873 1906        // might not be empty at the beginning of cleanup; we might
1874 1907        // still be working on its previous contents). So we do the
1875 1908        // printing here, before we append the new regions to the global
1876 1909        // cleanup list.
1877 1910  
1878 1911        G1HRPrinter* hr_printer = _g1h->hr_printer();
1879 1912        if (hr_printer->is_active()) {
1880 1913          HeapRegionLinkedListIterator iter(&local_cleanup_list);
1881 1914          while (iter.more_available()) {
1882 1915            HeapRegion* hr = iter.get_next();
1883 1916            hr_printer->cleanup(hr);
1884 1917          }
1885 1918        }
1886 1919  
1887 1920        _cleanup_list->add_as_tail(&local_cleanup_list);
1888 1921        assert(local_cleanup_list.is_empty(), "post-condition");
1889 1922  
1890 1923        HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1891 1924      }
1892 1925    }
1893 1926    size_t max_live_bytes() { return _max_live_bytes; }
1894 1927    size_t freed_bytes() { return _freed_bytes; }
1895 1928  };
1896 1929  
1897 1930  class G1ParScrubRemSetTask: public AbstractGangTask {
1898 1931  protected:
1899 1932    G1RemSet* _g1rs;
1900 1933    BitMap* _region_bm;
1901 1934    BitMap* _card_bm;
1902 1935  public:
1903 1936    G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1904 1937                         BitMap* region_bm, BitMap* card_bm) :
1905 1938      AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1906 1939      _region_bm(region_bm), _card_bm(card_bm) { }
1907 1940  
1908 1941    void work(uint worker_id) {
1909 1942      if (G1CollectedHeap::use_parallel_gc_threads()) {
1910 1943        _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1911 1944                         HeapRegion::ScrubRemSetClaimValue);
1912 1945      } else {
1913 1946        _g1rs->scrub(_region_bm, _card_bm);
1914 1947      }
1915 1948    }
1916 1949  
1917 1950  };
1918 1951  
1919 1952  void ConcurrentMark::cleanup() {
1920 1953    // world is stopped at this checkpoint
1921 1954    assert(SafepointSynchronize::is_at_safepoint(),
1922 1955           "world should be stopped");
1923 1956    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1924 1957  
1925 1958    // If a full collection has happened, we shouldn't do this.
1926 1959    if (has_aborted()) {
1927 1960      g1h->set_marking_complete(); // So bitmap clearing isn't confused
1928 1961      return;
1929 1962    }
1930 1963  
1931 1964    HRSPhaseSetter x(HRSPhaseCleanup);
1932 1965    g1h->verify_region_sets_optional();
1933 1966  
1934 1967    if (VerifyDuringGC) {
1935 1968      HandleMark hm;  // handle scope
1936 1969      gclog_or_tty->print(" VerifyDuringGC:(before)");
1937 1970      Universe::heap()->prepare_for_verify();
1938 1971      Universe::verify(/* silent      */ false,
1939 1972                       /* option      */ VerifyOption_G1UsePrevMarking);
1940 1973    }
1941 1974  
1942 1975    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1943 1976    g1p->record_concurrent_mark_cleanup_start();
1944 1977  
1945 1978    double start = os::elapsedTime();
1946 1979  
1947 1980    HeapRegionRemSet::reset_for_cleanup_tasks();
1948 1981  
1949 1982    uint n_workers;
1950 1983  
1951 1984    // Do counting once more with the world stopped for good measure.
1952 1985    G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1953 1986  
1954 1987    if (G1CollectedHeap::use_parallel_gc_threads()) {
1955 1988     assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1956 1989             "sanity check");
1957 1990  
1958 1991      g1h->set_par_threads();
1959 1992      n_workers = g1h->n_par_threads();
1960 1993      assert(g1h->n_par_threads() == n_workers,
1961 1994             "Should not have been reset");
1962 1995      g1h->workers()->run_task(&g1_par_count_task);
1963 1996      // Done with the parallel phase so reset to 0.
1964 1997      g1h->set_par_threads(0);
1965 1998  
1966 1999      assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1967 2000             "sanity check");
1968 2001    } else {
1969 2002      n_workers = 1;
1970 2003      g1_par_count_task.work(0);
1971 2004    }
1972 2005  
1973 2006    if (VerifyDuringGC) {
1974 2007      // Verify that the counting data accumulated during marking matches
1975 2008      // that calculated by walking the marking bitmap.
1976 2009  
1977 2010      // Bitmaps to hold expected values
1978 2011      BitMap expected_region_bm(_region_bm.size(), false);
1979 2012      BitMap expected_card_bm(_card_bm.size(), false);
1980 2013  
1981 2014      G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1982 2015                                                   &_region_bm,
1983 2016                                                   &_card_bm,
1984 2017                                                   &expected_region_bm,
1985 2018                                                   &expected_card_bm);
1986 2019  
1987 2020      if (G1CollectedHeap::use_parallel_gc_threads()) {
1988 2021        g1h->set_par_threads((int)n_workers);
1989 2022        g1h->workers()->run_task(&g1_par_verify_task);
1990 2023        // Done with the parallel phase so reset to 0.
1991 2024        g1h->set_par_threads(0);
1992 2025  
1993 2026        assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1994 2027               "sanity check");
1995 2028      } else {
1996 2029        g1_par_verify_task.work(0);
1997 2030      }
1998 2031  
1999 2032      guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2000 2033    }
2001 2034  
2002 2035    size_t start_used_bytes = g1h->used();
2003 2036    g1h->set_marking_complete();
2004 2037  
2005 2038    double count_end = os::elapsedTime();
2006 2039    double this_final_counting_time = (count_end - start);
2007 2040    _total_counting_time += this_final_counting_time;
2008 2041  
2009 2042    if (G1PrintRegionLivenessInfo) {
2010 2043      G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2011 2044      _g1h->heap_region_iterate(&cl);
2012 2045    }
2013 2046  
2014 2047    // Install newly created mark bitMap as "prev".
2015 2048    swapMarkBitMaps();
2016 2049  
2017 2050    g1h->reset_gc_time_stamp();
2018 2051  
2019 2052    // Note end of marking in all heap regions.
2020 2053    G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2021 2054    if (G1CollectedHeap::use_parallel_gc_threads()) {
2022 2055      g1h->set_par_threads((int)n_workers);
2023 2056      g1h->workers()->run_task(&g1_par_note_end_task);
2024 2057      g1h->set_par_threads(0);
2025 2058  
2026 2059      assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2027 2060             "sanity check");
2028 2061    } else {
2029 2062      g1_par_note_end_task.work(0);
2030 2063    }
2031 2064    g1h->check_gc_time_stamps();
2032 2065  
2033 2066    if (!cleanup_list_is_empty()) {
2034 2067      // The cleanup list is not empty, so we'll have to process it
2035 2068      // concurrently. Notify anyone else that might be wanting free
2036 2069      // regions that there will be more free regions coming soon.
2037 2070      g1h->set_free_regions_coming();
2038 2071    }
2039 2072  
2040 2073    // call below, since it affects the metric by which we sort the heap
2041 2074    // regions.
2042 2075    if (G1ScrubRemSets) {
2043 2076      double rs_scrub_start = os::elapsedTime();
2044 2077      G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2045 2078      if (G1CollectedHeap::use_parallel_gc_threads()) {
2046 2079        g1h->set_par_threads((int)n_workers);
2047 2080        g1h->workers()->run_task(&g1_par_scrub_rs_task);
2048 2081        g1h->set_par_threads(0);
2049 2082  
2050 2083        assert(g1h->check_heap_region_claim_values(
2051 2084                                              HeapRegion::ScrubRemSetClaimValue),
2052 2085               "sanity check");
2053 2086      } else {
2054 2087        g1_par_scrub_rs_task.work(0);
2055 2088      }
2056 2089  
2057 2090      double rs_scrub_end = os::elapsedTime();
2058 2091      double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2059 2092      _total_rs_scrub_time += this_rs_scrub_time;
2060 2093    }
2061 2094  
2062 2095    // this will also free any regions totally full of garbage objects,
2063 2096    // and sort the regions.
2064 2097    g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2065 2098  
2066 2099    // Statistics.
2067 2100    double end = os::elapsedTime();
2068 2101    _cleanup_times.add((end - start) * 1000.0);
2069 2102  
2070 2103    if (G1Log::fine()) {
2071 2104      g1h->print_size_transition(gclog_or_tty,
2072 2105                                 start_used_bytes,
2073 2106                                 g1h->used(),
2074 2107                                 g1h->capacity());
2075 2108    }
2076 2109  
2077 2110    // Clean up will have freed any regions completely full of garbage.
2078 2111    // Update the soft reference policy with the new heap occupancy.
2079 2112    Universe::update_heap_info_at_gc();
2080 2113  
2081 2114    // We need to make this be a "collection" so any collection pause that
2082 2115    // races with it goes around and waits for completeCleanup to finish.
2083 2116    g1h->increment_total_collections();
2084 2117  
2085 2118    // We reclaimed old regions so we should calculate the sizes to make
2086 2119    // sure we update the old gen/space data.
2087 2120    g1h->g1mm()->update_sizes();
2088 2121  
2089 2122    if (VerifyDuringGC) {
2090 2123      HandleMark hm;  // handle scope
2091 2124      gclog_or_tty->print(" VerifyDuringGC:(after)");
2092 2125      Universe::heap()->prepare_for_verify();
2093 2126      Universe::verify(/* silent      */ false,
2094 2127                       /* option      */ VerifyOption_G1UsePrevMarking);
2095 2128    }
2096 2129  
2097 2130    g1h->verify_region_sets_optional();
2098 2131  }
2099 2132  
2100 2133  void ConcurrentMark::completeCleanup() {
2101 2134    if (has_aborted()) return;
2102 2135  
2103 2136    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2104 2137  
2105 2138    _cleanup_list.verify_optional();
2106 2139    FreeRegionList tmp_free_list("Tmp Free List");
2107 2140  
2108 2141    if (G1ConcRegionFreeingVerbose) {
2109 2142      gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2110 2143                             "cleanup list has %u entries",
2111 2144                             _cleanup_list.length());
2112 2145    }
2113 2146  
2114 2147    // Noone else should be accessing the _cleanup_list at this point,
2115 2148    // so it's not necessary to take any locks
2116 2149    while (!_cleanup_list.is_empty()) {
2117 2150      HeapRegion* hr = _cleanup_list.remove_head();
2118 2151      assert(hr != NULL, "the list was not empty");
2119 2152      hr->par_clear();
2120 2153      tmp_free_list.add_as_tail(hr);
2121 2154  
2122 2155      // Instead of adding one region at a time to the secondary_free_list,
2123 2156      // we accumulate them in the local list and move them a few at a
2124 2157      // time. This also cuts down on the number of notify_all() calls
2125 2158      // we do during this process. We'll also append the local list when
2126 2159      // _cleanup_list is empty (which means we just removed the last
2127 2160      // region from the _cleanup_list).
2128 2161      if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2129 2162          _cleanup_list.is_empty()) {
2130 2163        if (G1ConcRegionFreeingVerbose) {
2131 2164          gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2132 2165                                 "appending %u entries to the secondary_free_list, "
2133 2166                                 "cleanup list still has %u entries",
2134 2167                                 tmp_free_list.length(),
2135 2168                                 _cleanup_list.length());
2136 2169        }
2137 2170  
2138 2171        {
2139 2172          MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2140 2173          g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2141 2174          SecondaryFreeList_lock->notify_all();
2142 2175        }
2143 2176  
2144 2177        if (G1StressConcRegionFreeing) {
2145 2178          for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2146 2179            os::sleep(Thread::current(), (jlong) 1, false);
2147 2180          }
2148 2181        }
2149 2182      }
2150 2183    }
2151 2184    assert(tmp_free_list.is_empty(), "post-condition");
2152 2185  }
2153 2186  
2154 2187  // Support closures for reference procssing in G1
2155 2188  
2156 2189  bool G1CMIsAliveClosure::do_object_b(oop obj) {
2157 2190    HeapWord* addr = (HeapWord*)obj;
2158 2191    return addr != NULL &&
2159 2192           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2160 2193  }
2161 2194  
2162 2195  class G1CMKeepAliveClosure: public ExtendedOopClosure {
2163 2196    G1CollectedHeap* _g1;
2164 2197    ConcurrentMark*  _cm;
2165 2198   public:
2166 2199    G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2167 2200      _g1(g1), _cm(cm) {
2168 2201      assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2169 2202    }
2170 2203  
2171 2204    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2172 2205    virtual void do_oop(      oop* p) { do_oop_work(p); }
2173 2206  
2174 2207    template <class T> void do_oop_work(T* p) {
2175 2208      oop obj = oopDesc::load_decode_heap_oop(p);
2176 2209      HeapWord* addr = (HeapWord*)obj;
2177 2210  
2178 2211      if (_cm->verbose_high()) {
2179 2212        gclog_or_tty->print_cr("\t[0] we're looking at location "
2180 2213                               "*"PTR_FORMAT" = "PTR_FORMAT,
2181 2214                               p, (void*) obj);
2182 2215      }
2183 2216  
2184 2217      if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2185 2218        _cm->mark_and_count(obj);
2186 2219        _cm->mark_stack_push(obj);
2187 2220      }
2188 2221    }
2189 2222  };
2190 2223  
2191 2224  class G1CMDrainMarkingStackClosure: public VoidClosure {
2192 2225    ConcurrentMark*               _cm;
2193 2226    CMMarkStack*                  _markStack;
2194 2227    G1CMKeepAliveClosure*         _oopClosure;
2195 2228   public:
2196 2229    G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2197 2230                                 G1CMKeepAliveClosure* oopClosure) :
2198 2231      _cm(cm),
2199 2232      _markStack(markStack),
2200 2233      _oopClosure(oopClosure) { }
2201 2234  
2202 2235    void do_void() {
2203 2236      _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2204 2237    }
2205 2238  };
2206 2239  
2207 2240  // 'Keep Alive' closure used by parallel reference processing.
2208 2241  // An instance of this closure is used in the parallel reference processing
2209 2242  // code rather than an instance of G1CMKeepAliveClosure. We could have used
2210 2243  // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2211 2244  // placed on to discovered ref lists once so we can mark and push with no
2212 2245  // need to check whether the object has already been marked. Using the
2213 2246  // G1CMKeepAliveClosure would mean, however, having all the worker threads
2214 2247  // operating on the global mark stack. This means that an individual
2215 2248  // worker would be doing lock-free pushes while it processes its own
2216 2249  // discovered ref list followed by drain call. If the discovered ref lists
2217 2250  // are unbalanced then this could cause interference with the other
2218 2251  // workers. Using a CMTask (and its embedded local data structures)
2219 2252  // avoids that potential interference.
2220 2253  class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2221 2254    ConcurrentMark*  _cm;
2222 2255    CMTask*          _task;
2223 2256    int              _ref_counter_limit;
2224 2257    int              _ref_counter;
2225 2258   public:
2226 2259    G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2227 2260      _cm(cm), _task(task),
2228 2261      _ref_counter_limit(G1RefProcDrainInterval) {
2229 2262      assert(_ref_counter_limit > 0, "sanity");
2230 2263      _ref_counter = _ref_counter_limit;
2231 2264    }
2232 2265  
2233 2266    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2234 2267    virtual void do_oop(      oop* p) { do_oop_work(p); }
2235 2268  
2236 2269    template <class T> void do_oop_work(T* p) {
2237 2270      if (!_cm->has_overflown()) {
2238 2271        oop obj = oopDesc::load_decode_heap_oop(p);
2239 2272        if (_cm->verbose_high()) {
2240 2273          gclog_or_tty->print_cr("\t[%u] we're looking at location "
2241 2274                                 "*"PTR_FORMAT" = "PTR_FORMAT,
2242 2275                                 _task->worker_id(), p, (void*) obj);
2243 2276        }
2244 2277  
2245 2278        _task->deal_with_reference(obj);
2246 2279        _ref_counter--;
2247 2280  
2248 2281        if (_ref_counter == 0) {
2249 2282          // We have dealt with _ref_counter_limit references, pushing them and objects
2250 2283          // reachable from them on to the local stack (and possibly the global stack).
2251 2284          // Call do_marking_step() to process these entries. We call the routine in a
2252 2285          // loop, which we'll exit if there's nothing more to do (i.e. we're done
2253 2286          // with the entries that we've pushed as a result of the deal_with_reference
2254 2287          // calls above) or we overflow.
2255 2288          // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2256 2289          // while there may still be some work to do. (See the comment at the
2257 2290          // beginning of CMTask::do_marking_step() for those conditions - one of which
2258 2291          // is reaching the specified time target.) It is only when
2259 2292          // CMTask::do_marking_step() returns without setting the has_aborted() flag
2260 2293          // that the marking has completed.
2261 2294          do {
2262 2295            double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2263 2296            _task->do_marking_step(mark_step_duration_ms,
2264 2297                                   false /* do_stealing    */,
2265 2298                                   false /* do_termination */);
2266 2299          } while (_task->has_aborted() && !_cm->has_overflown());
2267 2300          _ref_counter = _ref_counter_limit;
2268 2301        }
2269 2302      } else {
2270 2303        if (_cm->verbose_high()) {
2271 2304           gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2272 2305        }
2273 2306      }
2274 2307    }
2275 2308  };
2276 2309  
2277 2310  class G1CMParDrainMarkingStackClosure: public VoidClosure {
2278 2311    ConcurrentMark* _cm;
2279 2312    CMTask* _task;
2280 2313   public:
2281 2314    G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2282 2315      _cm(cm), _task(task) { }
2283 2316  
2284 2317    void do_void() {
2285 2318      do {
2286 2319        if (_cm->verbose_high()) {
2287 2320          gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step",
2288 2321                                 _task->worker_id());
2289 2322        }
2290 2323  
2291 2324        // We call CMTask::do_marking_step() to completely drain the local and
2292 2325        // global marking stacks. The routine is called in a loop, which we'll
2293 2326        // exit if there's nothing more to do (i.e. we'completely drained the
2294 2327        // entries that were pushed as a result of applying the
2295 2328        // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2296 2329        // lists above) or we overflow the global marking stack.
2297 2330        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2298 2331        // while there may still be some work to do. (See the comment at the
2299 2332        // beginning of CMTask::do_marking_step() for those conditions - one of which
2300 2333        // is reaching the specified time target.) It is only when
2301 2334        // CMTask::do_marking_step() returns without setting the has_aborted() flag
2302 2335        // that the marking has completed.
2303 2336  
2304 2337        _task->do_marking_step(1000000000.0 /* something very large */,
2305 2338                               true /* do_stealing    */,
2306 2339                               true /* do_termination */);
2307 2340      } while (_task->has_aborted() && !_cm->has_overflown());
2308 2341    }
2309 2342  };
2310 2343  
2311 2344  // Implementation of AbstractRefProcTaskExecutor for parallel
2312 2345  // reference processing at the end of G1 concurrent marking
2313 2346  
2314 2347  class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2315 2348  private:
2316 2349    G1CollectedHeap* _g1h;
2317 2350    ConcurrentMark*  _cm;
2318 2351    WorkGang*        _workers;
2319 2352    int              _active_workers;
2320 2353  
2321 2354  public:
2322 2355    G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2323 2356                          ConcurrentMark* cm,
2324 2357                          WorkGang* workers,
2325 2358                          int n_workers) :
2326 2359      _g1h(g1h), _cm(cm),
2327 2360      _workers(workers), _active_workers(n_workers) { }
2328 2361  
2329 2362    // Executes the given task using concurrent marking worker threads.
2330 2363    virtual void execute(ProcessTask& task);
2331 2364    virtual void execute(EnqueueTask& task);
2332 2365  };
2333 2366  
2334 2367  class G1CMRefProcTaskProxy: public AbstractGangTask {
2335 2368    typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2336 2369    ProcessTask&     _proc_task;
2337 2370    G1CollectedHeap* _g1h;
2338 2371    ConcurrentMark*  _cm;
2339 2372  
2340 2373  public:
2341 2374    G1CMRefProcTaskProxy(ProcessTask& proc_task,
2342 2375                       G1CollectedHeap* g1h,
2343 2376                       ConcurrentMark* cm) :
2344 2377      AbstractGangTask("Process reference objects in parallel"),
2345 2378      _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2346 2379  
2347 2380    virtual void work(uint worker_id) {
2348 2381      CMTask* marking_task = _cm->task(worker_id);
2349 2382      G1CMIsAliveClosure g1_is_alive(_g1h);
2350 2383      G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2351 2384      G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2352 2385  
2353 2386      _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2354 2387    }
2355 2388  };
2356 2389  
2357 2390  void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2358 2391    assert(_workers != NULL, "Need parallel worker threads.");
2359 2392  
2360 2393    G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2361 2394  
2362 2395    // We need to reset the phase for each task execution so that
2363 2396    // the termination protocol of CMTask::do_marking_step works.
2364 2397    _cm->set_phase(_active_workers, false /* concurrent */);
2365 2398    _g1h->set_par_threads(_active_workers);
2366 2399    _workers->run_task(&proc_task_proxy);
2367 2400    _g1h->set_par_threads(0);
2368 2401  }
2369 2402  
2370 2403  class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2371 2404    typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2372 2405    EnqueueTask& _enq_task;
2373 2406  
2374 2407  public:
2375 2408    G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2376 2409      AbstractGangTask("Enqueue reference objects in parallel"),
2377 2410      _enq_task(enq_task) { }
2378 2411  
2379 2412    virtual void work(uint worker_id) {
2380 2413      _enq_task.work(worker_id);
2381 2414    }
2382 2415  };
2383 2416  
2384 2417  void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2385 2418    assert(_workers != NULL, "Need parallel worker threads.");
2386 2419  
2387 2420    G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2388 2421  
2389 2422    _g1h->set_par_threads(_active_workers);
2390 2423    _workers->run_task(&enq_task_proxy);
2391 2424    _g1h->set_par_threads(0);
2392 2425  }
2393 2426  
2394 2427  void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2395 2428    ResourceMark rm;
2396 2429    HandleMark   hm;
2397 2430  
2398 2431    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2399 2432  
2400 2433    // Is alive closure.
2401 2434    G1CMIsAliveClosure g1_is_alive(g1h);
2402 2435  
2403 2436    // Inner scope to exclude the cleaning of the string and symbol
2404 2437    // tables from the displayed time.
2405 2438    {
2406 2439      if (G1Log::finer()) {
2407 2440        gclog_or_tty->put(' ');
2408 2441      }
2409 2442      TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2410 2443  
2411 2444      ReferenceProcessor* rp = g1h->ref_processor_cm();
2412 2445  
2413 2446      // See the comment in G1CollectedHeap::ref_processing_init()
2414 2447      // about how reference processing currently works in G1.
2415 2448  
2416 2449      // Process weak references.
2417 2450      rp->setup_policy(clear_all_soft_refs);
2418 2451      assert(_markStack.isEmpty(), "mark stack should be empty");
2419 2452  
2420 2453      G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2421 2454      G1CMDrainMarkingStackClosure
2422 2455        g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2423 2456  
2424 2457      // We use the work gang from the G1CollectedHeap and we utilize all
2425 2458      // the worker threads.
2426 2459      uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2427 2460      active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2428 2461  
2429 2462      G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2430 2463                                                g1h->workers(), active_workers);
2431 2464  
2432 2465      if (rp->processing_is_mt()) {
2433 2466        // Set the degree of MT here.  If the discovery is done MT, there
2434 2467        // may have been a different number of threads doing the discovery
2435 2468        // and a different number of discovered lists may have Ref objects.
2436 2469        // That is OK as long as the Reference lists are balanced (see
2437 2470        // balance_all_queues() and balance_queues()).
2438 2471        rp->set_active_mt_degree(active_workers);
2439 2472  
2440 2473        rp->process_discovered_references(&g1_is_alive,
2441 2474                                        &g1_keep_alive,
2442 2475                                        &g1_drain_mark_stack,
2443 2476                                        &par_task_executor);
2444 2477  
2445 2478        // The work routines of the parallel keep_alive and drain_marking_stack
2446 2479        // will set the has_overflown flag if we overflow the global marking
2447 2480        // stack.
2448 2481      } else {
2449 2482        rp->process_discovered_references(&g1_is_alive,
2450 2483                                          &g1_keep_alive,
2451 2484                                          &g1_drain_mark_stack,
2452 2485                                          NULL);
2453 2486      }
2454 2487  
2455 2488      assert(_markStack.overflow() || _markStack.isEmpty(),
2456 2489              "mark stack should be empty (unless it overflowed)");
2457 2490      if (_markStack.overflow()) {
2458 2491        // Should have been done already when we tried to push an
2459 2492        // entry on to the global mark stack. But let's do it again.
2460 2493        set_has_overflown();
2461 2494      }
2462 2495  
2463 2496      if (rp->processing_is_mt()) {
2464 2497        assert(rp->num_q() == active_workers, "why not");
2465 2498        rp->enqueue_discovered_references(&par_task_executor);
2466 2499      } else {
2467 2500        rp->enqueue_discovered_references();
2468 2501      }
2469 2502  
2470 2503      rp->verify_no_references_recorded();
2471 2504      assert(!rp->discovery_enabled(), "Post condition");
2472 2505    }
2473 2506  
2474 2507    // Now clean up stale oops in StringTable
2475 2508    StringTable::unlink(&g1_is_alive);
2476 2509    // Clean up unreferenced symbols in symbol table.
2477 2510    SymbolTable::unlink();
2478 2511  }
2479 2512  
2480 2513  void ConcurrentMark::swapMarkBitMaps() {
2481 2514    CMBitMapRO* temp = _prevMarkBitMap;
2482 2515    _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2483 2516    _nextMarkBitMap  = (CMBitMap*)  temp;
2484 2517  }
2485 2518  
2486 2519  class CMRemarkTask: public AbstractGangTask {
2487 2520  private:
2488 2521    ConcurrentMark *_cm;
2489 2522  
2490 2523  public:
2491 2524    void work(uint worker_id) {
2492 2525      // Since all available tasks are actually started, we should
2493 2526      // only proceed if we're supposed to be actived.
2494 2527      if (worker_id < _cm->active_tasks()) {
2495 2528        CMTask* task = _cm->task(worker_id);
2496 2529        task->record_start_time();
2497 2530        do {
2498 2531          task->do_marking_step(1000000000.0 /* something very large */,
2499 2532                                true /* do_stealing    */,
2500 2533                                true /* do_termination */);
2501 2534        } while (task->has_aborted() && !_cm->has_overflown());
2502 2535        // If we overflow, then we do not want to restart. We instead
2503 2536        // want to abort remark and do concurrent marking again.
2504 2537        task->record_end_time();
2505 2538      }
2506 2539    }
2507 2540  
2508 2541    CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2509 2542      AbstractGangTask("Par Remark"), _cm(cm) {
2510 2543      _cm->terminator()->reset_for_reuse(active_workers);
2511 2544    }
2512 2545  };
2513 2546  
2514 2547  void ConcurrentMark::checkpointRootsFinalWork() {
2515 2548    ResourceMark rm;
2516 2549    HandleMark   hm;
2517 2550    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2518 2551  
2519 2552    g1h->ensure_parsability(false);
2520 2553  
2521 2554    if (G1CollectedHeap::use_parallel_gc_threads()) {
2522 2555      G1CollectedHeap::StrongRootsScope srs(g1h);
2523 2556      // this is remark, so we'll use up all active threads
2524 2557      uint active_workers = g1h->workers()->active_workers();
2525 2558      if (active_workers == 0) {
2526 2559        assert(active_workers > 0, "Should have been set earlier");
2527 2560        active_workers = (uint) ParallelGCThreads;
2528 2561        g1h->workers()->set_active_workers(active_workers);
2529 2562      }
2530 2563      set_phase(active_workers, false /* concurrent */);
2531 2564      // Leave _parallel_marking_threads at it's
2532 2565      // value originally calculated in the ConcurrentMark
2533 2566      // constructor and pass values of the active workers
2534 2567      // through the gang in the task.
2535 2568  
2536 2569      CMRemarkTask remarkTask(this, active_workers);
2537 2570      g1h->set_par_threads(active_workers);
2538 2571      g1h->workers()->run_task(&remarkTask);
2539 2572      g1h->set_par_threads(0);
2540 2573    } else {
2541 2574      G1CollectedHeap::StrongRootsScope srs(g1h);
2542 2575      // this is remark, so we'll use up all available threads
2543 2576      uint active_workers = 1;
2544 2577      set_phase(active_workers, false /* concurrent */);
2545 2578  
2546 2579      CMRemarkTask remarkTask(this, active_workers);
2547 2580      // We will start all available threads, even if we decide that the
2548 2581      // active_workers will be fewer. The extra ones will just bail out
2549 2582      // immediately.
2550 2583      remarkTask.work(0);
2551 2584    }
2552 2585    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2553 2586    guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2554 2587  
2555 2588    print_stats();
2556 2589  
2557 2590  #if VERIFY_OBJS_PROCESSED
2558 2591    if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2559 2592      gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2560 2593                             _scan_obj_cl.objs_processed,
2561 2594                             ThreadLocalObjQueue::objs_enqueued);
2562 2595      guarantee(_scan_obj_cl.objs_processed ==
2563 2596                ThreadLocalObjQueue::objs_enqueued,
2564 2597                "Different number of objs processed and enqueued.");
2565 2598    }
2566 2599  #endif
2567 2600  }
2568 2601  
2569 2602  #ifndef PRODUCT
2570 2603  
2571 2604  class PrintReachableOopClosure: public OopClosure {
2572 2605  private:
2573 2606    G1CollectedHeap* _g1h;
2574 2607    outputStream*    _out;
2575 2608    VerifyOption     _vo;
2576 2609    bool             _all;
2577 2610  
2578 2611  public:
2579 2612    PrintReachableOopClosure(outputStream* out,
2580 2613                             VerifyOption  vo,
2581 2614                             bool          all) :
2582 2615      _g1h(G1CollectedHeap::heap()),
2583 2616      _out(out), _vo(vo), _all(all) { }
2584 2617  
2585 2618    void do_oop(narrowOop* p) { do_oop_work(p); }
2586 2619    void do_oop(      oop* p) { do_oop_work(p); }
2587 2620  
2588 2621    template <class T> void do_oop_work(T* p) {
2589 2622      oop         obj = oopDesc::load_decode_heap_oop(p);
2590 2623      const char* str = NULL;
2591 2624      const char* str2 = "";
2592 2625  
2593 2626      if (obj == NULL) {
2594 2627        str = "";
2595 2628      } else if (!_g1h->is_in_g1_reserved(obj)) {
2596 2629        str = " O";
2597 2630      } else {
2598 2631        HeapRegion* hr  = _g1h->heap_region_containing(obj);
2599 2632        guarantee(hr != NULL, "invariant");
2600 2633        bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2601 2634        bool marked = _g1h->is_marked(obj, _vo);
2602 2635  
2603 2636        if (over_tams) {
2604 2637          str = " >";
2605 2638          if (marked) {
2606 2639            str2 = " AND MARKED";
2607 2640          }
2608 2641        } else if (marked) {
2609 2642          str = " M";
2610 2643        } else {
2611 2644          str = " NOT";
2612 2645        }
2613 2646      }
2614 2647  
2615 2648      _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2616 2649                     p, (void*) obj, str, str2);
2617 2650    }
2618 2651  };
2619 2652  
2620 2653  class PrintReachableObjectClosure : public ObjectClosure {
2621 2654  private:
2622 2655    G1CollectedHeap* _g1h;
2623 2656    outputStream*    _out;
2624 2657    VerifyOption     _vo;
2625 2658    bool             _all;
2626 2659    HeapRegion*      _hr;
2627 2660  
2628 2661  public:
2629 2662    PrintReachableObjectClosure(outputStream* out,
2630 2663                                VerifyOption  vo,
2631 2664                                bool          all,
2632 2665                                HeapRegion*   hr) :
2633 2666      _g1h(G1CollectedHeap::heap()),
2634 2667      _out(out), _vo(vo), _all(all), _hr(hr) { }
2635 2668  
2636 2669    void do_object(oop o) {
2637 2670      bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2638 2671      bool marked = _g1h->is_marked(o, _vo);
2639 2672      bool print_it = _all || over_tams || marked;
2640 2673  
2641 2674      if (print_it) {
2642 2675        _out->print_cr(" "PTR_FORMAT"%s",
2643 2676                       o, (over_tams) ? " >" : (marked) ? " M" : "");
2644 2677        PrintReachableOopClosure oopCl(_out, _vo, _all);
2645 2678        o->oop_iterate_no_header(&oopCl);
2646 2679      }
2647 2680    }
2648 2681  };
2649 2682  
2650 2683  class PrintReachableRegionClosure : public HeapRegionClosure {
2651 2684  private:
2652 2685    G1CollectedHeap* _g1h;
2653 2686    outputStream*    _out;
2654 2687    VerifyOption     _vo;
2655 2688    bool             _all;
2656 2689  
2657 2690  public:
2658 2691    bool doHeapRegion(HeapRegion* hr) {
2659 2692      HeapWord* b = hr->bottom();
2660 2693      HeapWord* e = hr->end();
2661 2694      HeapWord* t = hr->top();
2662 2695      HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2663 2696      _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2664 2697                     "TAMS: "PTR_FORMAT, b, e, t, p);
2665 2698      _out->cr();
2666 2699  
2667 2700      HeapWord* from = b;
2668 2701      HeapWord* to   = t;
2669 2702  
2670 2703      if (to > from) {
2671 2704        _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2672 2705        _out->cr();
2673 2706        PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2674 2707        hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2675 2708        _out->cr();
2676 2709      }
2677 2710  
2678 2711      return false;
2679 2712    }
2680 2713  
2681 2714    PrintReachableRegionClosure(outputStream* out,
2682 2715                                VerifyOption  vo,
2683 2716                                bool          all) :
2684 2717      _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2685 2718  };
2686 2719  
2687 2720  void ConcurrentMark::print_reachable(const char* str,
2688 2721                                       VerifyOption vo,
2689 2722                                       bool all) {
2690 2723    gclog_or_tty->cr();
2691 2724    gclog_or_tty->print_cr("== Doing heap dump... ");
2692 2725  
2693 2726    if (G1PrintReachableBaseFile == NULL) {
2694 2727      gclog_or_tty->print_cr("  #### error: no base file defined");
2695 2728      return;
2696 2729    }
2697 2730  
2698 2731    if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2699 2732        (JVM_MAXPATHLEN - 1)) {
2700 2733      gclog_or_tty->print_cr("  #### error: file name too long");
2701 2734      return;
2702 2735    }
2703 2736  
2704 2737    char file_name[JVM_MAXPATHLEN];
2705 2738    sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2706 2739    gclog_or_tty->print_cr("  dumping to file %s", file_name);
2707 2740  
2708 2741    fileStream fout(file_name);
2709 2742    if (!fout.is_open()) {
2710 2743      gclog_or_tty->print_cr("  #### error: could not open file");
2711 2744      return;
2712 2745    }
2713 2746  
2714 2747    outputStream* out = &fout;
2715 2748    out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2716 2749    out->cr();
2717 2750  
2718 2751    out->print_cr("--- ITERATING OVER REGIONS");
2719 2752    out->cr();
2720 2753    PrintReachableRegionClosure rcl(out, vo, all);
2721 2754    _g1h->heap_region_iterate(&rcl);
2722 2755    out->cr();
2723 2756  
2724 2757    gclog_or_tty->print_cr("  done");
2725 2758    gclog_or_tty->flush();
2726 2759  }
2727 2760  
2728 2761  #endif // PRODUCT
2729 2762  
2730 2763  void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2731 2764    // Note we are overriding the read-only view of the prev map here, via
2732 2765    // the cast.
2733 2766    ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2734 2767  }
2735 2768  
2736 2769  void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2737 2770    _nextMarkBitMap->clearRange(mr);
2738 2771  }
2739 2772  
2740 2773  void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2741 2774    clearRangePrevBitmap(mr);
2742 2775    clearRangeNextBitmap(mr);
2743 2776  }
2744 2777  
2745 2778  HeapRegion*
2746 2779  ConcurrentMark::claim_region(uint worker_id) {
2747 2780    // "checkpoint" the finger
2748 2781    HeapWord* finger = _finger;
2749 2782  
2750 2783    // _heap_end will not change underneath our feet; it only changes at
2751 2784    // yield points.
2752 2785    while (finger < _heap_end) {
2753 2786      assert(_g1h->is_in_g1_reserved(finger), "invariant");
2754 2787  
2755 2788      // Note on how this code handles humongous regions. In the
2756 2789      // normal case the finger will reach the start of a "starts
2757 2790      // humongous" (SH) region. Its end will either be the end of the
2758 2791      // last "continues humongous" (CH) region in the sequence, or the
2759 2792      // standard end of the SH region (if the SH is the only region in
2760 2793      // the sequence). That way claim_region() will skip over the CH
2761 2794      // regions. However, there is a subtle race between a CM thread
2762 2795      // executing this method and a mutator thread doing a humongous
2763 2796      // object allocation. The two are not mutually exclusive as the CM
2764 2797      // thread does not need to hold the Heap_lock when it gets
2765 2798      // here. So there is a chance that claim_region() will come across
2766 2799      // a free region that's in the progress of becoming a SH or a CH
2767 2800      // region. In the former case, it will either
2768 2801      //   a) Miss the update to the region's end, in which case it will
2769 2802      //      visit every subsequent CH region, will find their bitmaps
2770 2803      //      empty, and do nothing, or
2771 2804      //   b) Will observe the update of the region's end (in which case
2772 2805      //      it will skip the subsequent CH regions).
2773 2806      // If it comes across a region that suddenly becomes CH, the
2774 2807      // scenario will be similar to b). So, the race between
2775 2808      // claim_region() and a humongous object allocation might force us
2776 2809      // to do a bit of unnecessary work (due to some unnecessary bitmap
2777 2810      // iterations) but it should not introduce and correctness issues.
2778 2811      HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2779 2812      HeapWord*   bottom        = curr_region->bottom();
2780 2813      HeapWord*   end           = curr_region->end();
2781 2814      HeapWord*   limit         = curr_region->next_top_at_mark_start();
2782 2815  
2783 2816      if (verbose_low()) {
2784 2817        gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2785 2818                               "["PTR_FORMAT", "PTR_FORMAT"), "
2786 2819                               "limit = "PTR_FORMAT,
2787 2820                               worker_id, curr_region, bottom, end, limit);
2788 2821      }
2789 2822  
2790 2823      // Is the gap between reading the finger and doing the CAS too long?
2791 2824      HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2792 2825      if (res == finger) {
2793 2826        // we succeeded
2794 2827  
2795 2828        // notice that _finger == end cannot be guaranteed here since,
2796 2829        // someone else might have moved the finger even further
2797 2830        assert(_finger >= end, "the finger should have moved forward");
2798 2831  
2799 2832        if (verbose_low()) {
2800 2833          gclog_or_tty->print_cr("[%u] we were successful with region = "
2801 2834                                 PTR_FORMAT, worker_id, curr_region);
2802 2835        }
2803 2836  
2804 2837        if (limit > bottom) {
2805 2838          if (verbose_low()) {
2806 2839            gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2807 2840                                   "returning it ", worker_id, curr_region);
2808 2841          }
2809 2842          return curr_region;
2810 2843        } else {
2811 2844          assert(limit == bottom,
2812 2845                 "the region limit should be at bottom");
2813 2846          if (verbose_low()) {
2814 2847            gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2815 2848                                   "returning NULL", worker_id, curr_region);
2816 2849          }
2817 2850          // we return NULL and the caller should try calling
2818 2851          // claim_region() again.
2819 2852          return NULL;
2820 2853        }
2821 2854      } else {
2822 2855        assert(_finger > finger, "the finger should have moved forward");
2823 2856        if (verbose_low()) {
2824 2857          gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2825 2858                                 "global finger = "PTR_FORMAT", "
2826 2859                                 "our finger = "PTR_FORMAT,
2827 2860                                 worker_id, _finger, finger);
2828 2861        }
2829 2862  
2830 2863        // read it again
2831 2864        finger = _finger;
2832 2865      }
2833 2866    }
2834 2867  
2835 2868    return NULL;
2836 2869  }
2837 2870  
2838 2871  #ifndef PRODUCT
2839 2872  enum VerifyNoCSetOopsPhase {
2840 2873    VerifyNoCSetOopsStack,
2841 2874    VerifyNoCSetOopsQueues,
2842 2875    VerifyNoCSetOopsSATBCompleted,
2843 2876    VerifyNoCSetOopsSATBThread
2844 2877  };
2845 2878  
2846 2879  class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2847 2880  private:
2848 2881    G1CollectedHeap* _g1h;
2849 2882    VerifyNoCSetOopsPhase _phase;
2850 2883    int _info;
2851 2884  
2852 2885    const char* phase_str() {
2853 2886      switch (_phase) {
2854 2887      case VerifyNoCSetOopsStack:         return "Stack";
2855 2888      case VerifyNoCSetOopsQueues:        return "Queue";
2856 2889      case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2857 2890      case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2858 2891      default:                            ShouldNotReachHere();
2859 2892      }
2860 2893      return NULL;
2861 2894    }
2862 2895  
2863 2896    void do_object_work(oop obj) {
2864 2897      guarantee(!_g1h->obj_in_cs(obj),
2865 2898                err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2866 2899                        (void*) obj, phase_str(), _info));
2867 2900    }
2868 2901  
2869 2902  public:
2870 2903    VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2871 2904  
2872 2905    void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2873 2906      _phase = phase;
2874 2907      _info = info;
2875 2908    }
2876 2909  
2877 2910    virtual void do_oop(oop* p) {
2878 2911      oop obj = oopDesc::load_decode_heap_oop(p);
2879 2912      do_object_work(obj);
2880 2913    }
2881 2914  
2882 2915    virtual void do_oop(narrowOop* p) {
2883 2916      // We should not come across narrow oops while scanning marking
2884 2917      // stacks and SATB buffers.
2885 2918      ShouldNotReachHere();
2886 2919    }
2887 2920  
2888 2921    virtual void do_object(oop obj) {
2889 2922      do_object_work(obj);
2890 2923    }
2891 2924  };
2892 2925  
2893 2926  void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2894 2927                                           bool verify_enqueued_buffers,
2895 2928                                           bool verify_thread_buffers,
2896 2929                                           bool verify_fingers) {
2897 2930    assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2898 2931    if (!G1CollectedHeap::heap()->mark_in_progress()) {
2899 2932      return;
2900 2933    }
2901 2934  
2902 2935    VerifyNoCSetOopsClosure cl;
2903 2936  
2904 2937    if (verify_stacks) {
2905 2938      // Verify entries on the global mark stack
2906 2939      cl.set_phase(VerifyNoCSetOopsStack);
2907 2940      _markStack.oops_do(&cl);
2908 2941  
2909 2942      // Verify entries on the task queues
2910 2943      for (uint i = 0; i < _max_worker_id; i += 1) {
2911 2944        cl.set_phase(VerifyNoCSetOopsQueues, i);
2912 2945        CMTaskQueue* queue = _task_queues->queue(i);
2913 2946        queue->oops_do(&cl);
2914 2947      }
2915 2948    }
2916 2949  
2917 2950    SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2918 2951  
2919 2952    // Verify entries on the enqueued SATB buffers
2920 2953    if (verify_enqueued_buffers) {
2921 2954      cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2922 2955      satb_qs.iterate_completed_buffers_read_only(&cl);
2923 2956    }
2924 2957  
2925 2958    // Verify entries on the per-thread SATB buffers
2926 2959    if (verify_thread_buffers) {
2927 2960      cl.set_phase(VerifyNoCSetOopsSATBThread);
2928 2961      satb_qs.iterate_thread_buffers_read_only(&cl);
2929 2962    }
2930 2963  
2931 2964    if (verify_fingers) {
2932 2965      // Verify the global finger
2933 2966      HeapWord* global_finger = finger();
2934 2967      if (global_finger != NULL && global_finger < _heap_end) {
2935 2968        // The global finger always points to a heap region boundary. We
2936 2969        // use heap_region_containing_raw() to get the containing region
2937 2970        // given that the global finger could be pointing to a free region
2938 2971        // which subsequently becomes continues humongous. If that
2939 2972        // happens, heap_region_containing() will return the bottom of the
2940 2973        // corresponding starts humongous region and the check below will
2941 2974        // not hold any more.
2942 2975        HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2943 2976        guarantee(global_finger == global_hr->bottom(),
2944 2977                  err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2945 2978                          global_finger, HR_FORMAT_PARAMS(global_hr)));
2946 2979      }
2947 2980  
2948 2981      // Verify the task fingers
2949 2982      assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2950 2983      for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2951 2984        CMTask* task = _tasks[i];
2952 2985        HeapWord* task_finger = task->finger();
2953 2986        if (task_finger != NULL && task_finger < _heap_end) {
2954 2987          // See above note on the global finger verification.
2955 2988          HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2956 2989          guarantee(task_finger == task_hr->bottom() ||
2957 2990                    !task_hr->in_collection_set(),
2958 2991                    err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2959 2992                            task_finger, HR_FORMAT_PARAMS(task_hr)));
2960 2993        }
2961 2994      }
2962 2995    }
2963 2996  }
2964 2997  #endif // PRODUCT
2965 2998  
2966 2999  void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2967 3000    _markStack.set_should_expand();
2968 3001    _markStack.setEmpty();        // Also clears the _markStack overflow flag
2969 3002    if (clear_overflow) {
2970 3003      clear_has_overflown();
2971 3004    } else {
2972 3005      assert(has_overflown(), "pre-condition");
2973 3006    }
2974 3007    _finger = _heap_start;
2975 3008  
2976 3009    for (uint i = 0; i < _max_worker_id; ++i) {
2977 3010      CMTaskQueue* queue = _task_queues->queue(i);
2978 3011      queue->set_empty();
2979 3012    }
2980 3013  }
2981 3014  
2982 3015  // Aggregate the counting data that was constructed concurrently
2983 3016  // with marking.
2984 3017  class AggregateCountDataHRClosure: public HeapRegionClosure {
2985 3018    G1CollectedHeap* _g1h;
2986 3019    ConcurrentMark* _cm;
2987 3020    CardTableModRefBS* _ct_bs;
2988 3021    BitMap* _cm_card_bm;
2989 3022    uint _max_worker_id;
2990 3023  
2991 3024   public:
2992 3025    AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2993 3026                                BitMap* cm_card_bm,
2994 3027                                uint max_worker_id) :
2995 3028      _g1h(g1h), _cm(g1h->concurrent_mark()),
2996 3029      _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2997 3030      _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2998 3031  
2999 3032    bool doHeapRegion(HeapRegion* hr) {
3000 3033      if (hr->continuesHumongous()) {
3001 3034        // We will ignore these here and process them when their
3002 3035        // associated "starts humongous" region is processed.
3003 3036        // Note that we cannot rely on their associated
3004 3037        // "starts humongous" region to have their bit set to 1
3005 3038        // since, due to the region chunking in the parallel region
3006 3039        // iteration, a "continues humongous" region might be visited
3007 3040        // before its associated "starts humongous".
3008 3041        return false;
3009 3042      }
3010 3043  
3011 3044      HeapWord* start = hr->bottom();
3012 3045      HeapWord* limit = hr->next_top_at_mark_start();
3013 3046      HeapWord* end = hr->end();
3014 3047  
3015 3048      assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3016 3049             err_msg("Preconditions not met - "
3017 3050                     "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3018 3051                     "top: "PTR_FORMAT", end: "PTR_FORMAT,
3019 3052                     start, limit, hr->top(), hr->end()));
3020 3053  
3021 3054      assert(hr->next_marked_bytes() == 0, "Precondition");
3022 3055  
3023 3056      if (start == limit) {
3024 3057        // NTAMS of this region has not been set so nothing to do.
3025 3058        return false;
3026 3059      }
3027 3060  
3028 3061      // 'start' should be in the heap.
3029 3062      assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3030 3063      // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3031 3064      assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3032 3065  
3033 3066      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3034 3067      BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3035 3068      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3036 3069  
3037 3070      // If ntams is not card aligned then we bump card bitmap index
3038 3071      // for limit so that we get the all the cards spanned by
3039 3072      // the object ending at ntams.
3040 3073      // Note: if this is the last region in the heap then ntams
3041 3074      // could be actually just beyond the end of the the heap;
3042 3075      // limit_idx will then  correspond to a (non-existent) card
3043 3076      // that is also outside the heap.
3044 3077      if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3045 3078        limit_idx += 1;
3046 3079      }
3047 3080  
3048 3081      assert(limit_idx <= end_idx, "or else use atomics");
3049 3082  
3050 3083      // Aggregate the "stripe" in the count data associated with hr.
3051 3084      uint hrs_index = hr->hrs_index();
3052 3085      size_t marked_bytes = 0;
3053 3086  
3054 3087      for (uint i = 0; i < _max_worker_id; i += 1) {
3055 3088        size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3056 3089        BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3057 3090  
3058 3091        // Fetch the marked_bytes in this region for task i and
3059 3092        // add it to the running total for this region.
3060 3093        marked_bytes += marked_bytes_array[hrs_index];
3061 3094  
3062 3095        // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3063 3096        // into the global card bitmap.
3064 3097        BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3065 3098  
3066 3099        while (scan_idx < limit_idx) {
3067 3100          assert(task_card_bm->at(scan_idx) == true, "should be");
3068 3101          _cm_card_bm->set_bit(scan_idx);
3069 3102          assert(_cm_card_bm->at(scan_idx) == true, "should be");
3070 3103  
3071 3104          // BitMap::get_next_one_offset() can handle the case when
3072 3105          // its left_offset parameter is greater than its right_offset
3073 3106          // parameter. It does, however, have an early exit if
3074 3107          // left_offset == right_offset. So let's limit the value
3075 3108          // passed in for left offset here.
3076 3109          BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3077 3110          scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3078 3111        }
3079 3112      }
3080 3113  
3081 3114      // Update the marked bytes for this region.
3082 3115      hr->add_to_marked_bytes(marked_bytes);
3083 3116  
3084 3117      // Next heap region
3085 3118      return false;
3086 3119    }
3087 3120  };
3088 3121  
3089 3122  class G1AggregateCountDataTask: public AbstractGangTask {
3090 3123  protected:
3091 3124    G1CollectedHeap* _g1h;
3092 3125    ConcurrentMark* _cm;
3093 3126    BitMap* _cm_card_bm;
3094 3127    uint _max_worker_id;
3095 3128    int _active_workers;
3096 3129  
3097 3130  public:
3098 3131    G1AggregateCountDataTask(G1CollectedHeap* g1h,
3099 3132                             ConcurrentMark* cm,
3100 3133                             BitMap* cm_card_bm,
3101 3134                             uint max_worker_id,
3102 3135                             int n_workers) :
3103 3136      AbstractGangTask("Count Aggregation"),
3104 3137      _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3105 3138      _max_worker_id(max_worker_id),
3106 3139      _active_workers(n_workers) { }
3107 3140  
3108 3141    void work(uint worker_id) {
3109 3142      AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3110 3143  
3111 3144      if (G1CollectedHeap::use_parallel_gc_threads()) {
3112 3145        _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3113 3146                                              _active_workers,
3114 3147                                              HeapRegion::AggregateCountClaimValue);
3115 3148      } else {
3116 3149        _g1h->heap_region_iterate(&cl);
3117 3150      }
3118 3151    }
3119 3152  };
3120 3153  
3121 3154  
3122 3155  void ConcurrentMark::aggregate_count_data() {
3123 3156    int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3124 3157                          _g1h->workers()->active_workers() :
3125 3158                          1);
3126 3159  
3127 3160    G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3128 3161                                             _max_worker_id, n_workers);
3129 3162  
3130 3163    if (G1CollectedHeap::use_parallel_gc_threads()) {
3131 3164      assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3132 3165             "sanity check");
3133 3166      _g1h->set_par_threads(n_workers);
3134 3167      _g1h->workers()->run_task(&g1_par_agg_task);
3135 3168      _g1h->set_par_threads(0);
3136 3169  
3137 3170      assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3138 3171             "sanity check");
3139 3172      _g1h->reset_heap_region_claim_values();
3140 3173    } else {
3141 3174      g1_par_agg_task.work(0);
3142 3175    }
3143 3176  }
3144 3177  
3145 3178  // Clear the per-worker arrays used to store the per-region counting data
3146 3179  void ConcurrentMark::clear_all_count_data() {
3147 3180    // Clear the global card bitmap - it will be filled during
3148 3181    // liveness count aggregation (during remark) and the
3149 3182    // final counting task.
3150 3183    _card_bm.clear();
3151 3184  
3152 3185    // Clear the global region bitmap - it will be filled as part
3153 3186    // of the final counting task.
3154 3187    _region_bm.clear();
3155 3188  
3156 3189    uint max_regions = _g1h->max_regions();
3157 3190    assert(_max_worker_id > 0, "uninitialized");
3158 3191  
3159 3192    for (uint i = 0; i < _max_worker_id; i += 1) {
3160 3193      BitMap* task_card_bm = count_card_bitmap_for(i);
3161 3194      size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3162 3195  
3163 3196      assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3164 3197      assert(marked_bytes_array != NULL, "uninitialized");
3165 3198  
3166 3199      memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3167 3200      task_card_bm->clear();
3168 3201    }
3169 3202  }
3170 3203  
3171 3204  void ConcurrentMark::print_stats() {
3172 3205    if (verbose_stats()) {
3173 3206      gclog_or_tty->print_cr("---------------------------------------------------------------------");
3174 3207      for (size_t i = 0; i < _active_tasks; ++i) {
3175 3208        _tasks[i]->print_stats();
3176 3209        gclog_or_tty->print_cr("---------------------------------------------------------------------");
3177 3210      }
3178 3211    }
3179 3212  }
3180 3213  
3181 3214  // abandon current marking iteration due to a Full GC
3182 3215  void ConcurrentMark::abort() {
3183 3216    // Clear all marks to force marking thread to do nothing
3184 3217    _nextMarkBitMap->clearAll();
3185 3218    // Clear the liveness counting data
3186 3219    clear_all_count_data();
3187 3220    // Empty mark stack
3188 3221    clear_marking_state();
3189 3222    for (uint i = 0; i < _max_worker_id; ++i) {
3190 3223      _tasks[i]->clear_region_fields();
3191 3224    }
3192 3225    _has_aborted = true;
3193 3226  
3194 3227    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3195 3228    satb_mq_set.abandon_partial_marking();
3196 3229    // This can be called either during or outside marking, we'll read
3197 3230    // the expected_active value from the SATB queue set.
3198 3231    satb_mq_set.set_active_all_threads(
3199 3232                                   false, /* new active value */
3200 3233                                   satb_mq_set.is_active() /* expected_active */);
3201 3234  }
3202 3235  
3203 3236  static void print_ms_time_info(const char* prefix, const char* name,
3204 3237                                 NumberSeq& ns) {
3205 3238    gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3206 3239                           prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3207 3240    if (ns.num() > 0) {
3208 3241      gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3209 3242                             prefix, ns.sd(), ns.maximum());
3210 3243    }
3211 3244  }
3212 3245  
3213 3246  void ConcurrentMark::print_summary_info() {
3214 3247    gclog_or_tty->print_cr(" Concurrent marking:");
3215 3248    print_ms_time_info("  ", "init marks", _init_times);
3216 3249    print_ms_time_info("  ", "remarks", _remark_times);
3217 3250    {
3218 3251      print_ms_time_info("     ", "final marks", _remark_mark_times);
3219 3252      print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3220 3253  
3221 3254    }
3222 3255    print_ms_time_info("  ", "cleanups", _cleanup_times);
3223 3256    gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3224 3257                           _total_counting_time,
3225 3258                           (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3226 3259                            (double)_cleanup_times.num()
3227 3260                           : 0.0));
3228 3261    if (G1ScrubRemSets) {
3229 3262      gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3230 3263                             _total_rs_scrub_time,
3231 3264                             (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3232 3265                              (double)_cleanup_times.num()
3233 3266                             : 0.0));
3234 3267    }
3235 3268    gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3236 3269                           (_init_times.sum() + _remark_times.sum() +
3237 3270                            _cleanup_times.sum())/1000.0);
3238 3271    gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3239 3272                  "(%8.2f s marking).",
3240 3273                  cmThread()->vtime_accum(),
3241 3274                  cmThread()->vtime_mark_accum());
3242 3275  }
3243 3276  
3244 3277  void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3245 3278    _parallel_workers->print_worker_threads_on(st);
3246 3279  }
3247 3280  
3248 3281  // We take a break if someone is trying to stop the world.
3249 3282  bool ConcurrentMark::do_yield_check(uint worker_id) {
3250 3283    if (should_yield()) {
3251 3284      if (worker_id == 0) {
3252 3285        _g1h->g1_policy()->record_concurrent_pause();
3253 3286      }
3254 3287      cmThread()->yield();
3255 3288      return true;
3256 3289    } else {
3257 3290      return false;
3258 3291    }
3259 3292  }
3260 3293  
3261 3294  bool ConcurrentMark::should_yield() {
3262 3295    return cmThread()->should_yield();
3263 3296  }
3264 3297  
3265 3298  bool ConcurrentMark::containing_card_is_marked(void* p) {
3266 3299    size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3267 3300    return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3268 3301  }
3269 3302  
3270 3303  bool ConcurrentMark::containing_cards_are_marked(void* start,
3271 3304                                                   void* last) {
3272 3305    return containing_card_is_marked(start) &&
3273 3306           containing_card_is_marked(last);
3274 3307  }
3275 3308  
3276 3309  #ifndef PRODUCT
3277 3310  // for debugging purposes
3278 3311  void ConcurrentMark::print_finger() {
3279 3312    gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3280 3313                           _heap_start, _heap_end, _finger);
3281 3314    for (uint i = 0; i < _max_worker_id; ++i) {
3282 3315      gclog_or_tty->print("   %u: "PTR_FORMAT, i, _tasks[i]->finger());
3283 3316    }
3284 3317    gclog_or_tty->print_cr("");
3285 3318  }
3286 3319  #endif
3287 3320  
3288 3321  void CMTask::scan_object(oop obj) {
3289 3322    assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3290 3323  
3291 3324    if (_cm->verbose_high()) {
3292 3325      gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3293 3326                             _worker_id, (void*) obj);
3294 3327    }
3295 3328  
3296 3329    size_t obj_size = obj->size();
3297 3330    _words_scanned += obj_size;
3298 3331  
3299 3332    obj->oop_iterate(_cm_oop_closure);
3300 3333    statsOnly( ++_objs_scanned );
3301 3334    check_limits();
3302 3335  }
3303 3336  
3304 3337  // Closure for iteration over bitmaps
3305 3338  class CMBitMapClosure : public BitMapClosure {
3306 3339  private:
3307 3340    // the bitmap that is being iterated over
3308 3341    CMBitMap*                   _nextMarkBitMap;
3309 3342    ConcurrentMark*             _cm;
3310 3343    CMTask*                     _task;
3311 3344  
3312 3345  public:
3313 3346    CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3314 3347      _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3315 3348  
3316 3349    bool do_bit(size_t offset) {
3317 3350      HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3318 3351      assert(_nextMarkBitMap->isMarked(addr), "invariant");
3319 3352      assert( addr < _cm->finger(), "invariant");
3320 3353  
3321 3354      statsOnly( _task->increase_objs_found_on_bitmap() );
3322 3355      assert(addr >= _task->finger(), "invariant");
3323 3356  
3324 3357      // We move that task's local finger along.
3325 3358      _task->move_finger_to(addr);
3326 3359  
3327 3360      _task->scan_object(oop(addr));
3328 3361      // we only partially drain the local queue and global stack
3329 3362      _task->drain_local_queue(true);
3330 3363      _task->drain_global_stack(true);
3331 3364  
3332 3365      // if the has_aborted flag has been raised, we need to bail out of
3333 3366      // the iteration
3334 3367      return !_task->has_aborted();
3335 3368    }
3336 3369  };
3337 3370  
3338 3371  // Closure for iterating over objects, currently only used for
3339 3372  // processing SATB buffers.
3340 3373  class CMObjectClosure : public ObjectClosure {
3341 3374  private:
3342 3375    CMTask* _task;
3343 3376  
3344 3377  public:
3345 3378    void do_object(oop obj) {
3346 3379      _task->deal_with_reference(obj);
3347 3380    }
3348 3381  
3349 3382    CMObjectClosure(CMTask* task) : _task(task) { }
3350 3383  };
3351 3384  
3352 3385  G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3353 3386                                 ConcurrentMark* cm,
3354 3387                                 CMTask* task)
3355 3388    : _g1h(g1h), _cm(cm), _task(task) {
3356 3389    assert(_ref_processor == NULL, "should be initialized to NULL");
3357 3390  
3358 3391    if (G1UseConcMarkReferenceProcessing) {
3359 3392      _ref_processor = g1h->ref_processor_cm();
3360 3393      assert(_ref_processor != NULL, "should not be NULL");
3361 3394    }
3362 3395  }
3363 3396  
3364 3397  void CMTask::setup_for_region(HeapRegion* hr) {
3365 3398    // Separated the asserts so that we know which one fires.
3366 3399    assert(hr != NULL,
3367 3400          "claim_region() should have filtered out continues humongous regions");
3368 3401    assert(!hr->continuesHumongous(),
3369 3402          "claim_region() should have filtered out continues humongous regions");
3370 3403  
3371 3404    if (_cm->verbose_low()) {
3372 3405      gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3373 3406                             _worker_id, hr);
3374 3407    }
3375 3408  
3376 3409    _curr_region  = hr;
3377 3410    _finger       = hr->bottom();
3378 3411    update_region_limit();
3379 3412  }
3380 3413  
3381 3414  void CMTask::update_region_limit() {
3382 3415    HeapRegion* hr            = _curr_region;
3383 3416    HeapWord* bottom          = hr->bottom();
3384 3417    HeapWord* limit           = hr->next_top_at_mark_start();
3385 3418  
3386 3419    if (limit == bottom) {
3387 3420      if (_cm->verbose_low()) {
3388 3421        gclog_or_tty->print_cr("[%u] found an empty region "
3389 3422                               "["PTR_FORMAT", "PTR_FORMAT")",
3390 3423                               _worker_id, bottom, limit);
3391 3424      }
3392 3425      // The region was collected underneath our feet.
3393 3426      // We set the finger to bottom to ensure that the bitmap
3394 3427      // iteration that will follow this will not do anything.
3395 3428      // (this is not a condition that holds when we set the region up,
3396 3429      // as the region is not supposed to be empty in the first place)
3397 3430      _finger = bottom;
3398 3431    } else if (limit >= _region_limit) {
3399 3432      assert(limit >= _finger, "peace of mind");
3400 3433    } else {
3401 3434      assert(limit < _region_limit, "only way to get here");
3402 3435      // This can happen under some pretty unusual circumstances.  An
3403 3436      // evacuation pause empties the region underneath our feet (NTAMS
3404 3437      // at bottom). We then do some allocation in the region (NTAMS
3405 3438      // stays at bottom), followed by the region being used as a GC
3406 3439      // alloc region (NTAMS will move to top() and the objects
3407 3440      // originally below it will be grayed). All objects now marked in
3408 3441      // the region are explicitly grayed, if below the global finger,
3409 3442      // and we do not need in fact to scan anything else. So, we simply
3410 3443      // set _finger to be limit to ensure that the bitmap iteration
3411 3444      // doesn't do anything.
3412 3445      _finger = limit;
3413 3446    }
3414 3447  
3415 3448    _region_limit = limit;
3416 3449  }
3417 3450  
3418 3451  void CMTask::giveup_current_region() {
3419 3452    assert(_curr_region != NULL, "invariant");
3420 3453    if (_cm->verbose_low()) {
3421 3454      gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3422 3455                             _worker_id, _curr_region);
3423 3456    }
3424 3457    clear_region_fields();
3425 3458  }
3426 3459  
3427 3460  void CMTask::clear_region_fields() {
3428 3461    // Values for these three fields that indicate that we're not
3429 3462    // holding on to a region.
3430 3463    _curr_region   = NULL;
3431 3464    _finger        = NULL;
3432 3465    _region_limit  = NULL;
3433 3466  }
3434 3467  
3435 3468  void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3436 3469    if (cm_oop_closure == NULL) {
3437 3470      assert(_cm_oop_closure != NULL, "invariant");
3438 3471    } else {
3439 3472      assert(_cm_oop_closure == NULL, "invariant");
3440 3473    }
3441 3474    _cm_oop_closure = cm_oop_closure;
3442 3475  }
3443 3476  
3444 3477  void CMTask::reset(CMBitMap* nextMarkBitMap) {
3445 3478    guarantee(nextMarkBitMap != NULL, "invariant");
3446 3479  
3447 3480    if (_cm->verbose_low()) {
3448 3481      gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3449 3482    }
3450 3483  
3451 3484    _nextMarkBitMap                = nextMarkBitMap;
3452 3485    clear_region_fields();
3453 3486  
3454 3487    _calls                         = 0;
3455 3488    _elapsed_time_ms               = 0.0;
3456 3489    _termination_time_ms           = 0.0;
3457 3490    _termination_start_time_ms     = 0.0;
3458 3491  
3459 3492  #if _MARKING_STATS_
3460 3493    _local_pushes                  = 0;
3461 3494    _local_pops                    = 0;
3462 3495    _local_max_size                = 0;
3463 3496    _objs_scanned                  = 0;
3464 3497    _global_pushes                 = 0;
3465 3498    _global_pops                   = 0;
3466 3499    _global_max_size               = 0;
3467 3500    _global_transfers_to           = 0;
3468 3501    _global_transfers_from         = 0;
3469 3502    _regions_claimed               = 0;
3470 3503    _objs_found_on_bitmap          = 0;
3471 3504    _satb_buffers_processed        = 0;
3472 3505    _steal_attempts                = 0;
3473 3506    _steals                        = 0;
3474 3507    _aborted                       = 0;
3475 3508    _aborted_overflow              = 0;
3476 3509    _aborted_cm_aborted            = 0;
3477 3510    _aborted_yield                 = 0;
3478 3511    _aborted_timed_out             = 0;
3479 3512    _aborted_satb                  = 0;
3480 3513    _aborted_termination           = 0;
3481 3514  #endif // _MARKING_STATS_
3482 3515  }
3483 3516  
3484 3517  bool CMTask::should_exit_termination() {
3485 3518    regular_clock_call();
3486 3519    // This is called when we are in the termination protocol. We should
3487 3520    // quit if, for some reason, this task wants to abort or the global
3488 3521    // stack is not empty (this means that we can get work from it).
3489 3522    return !_cm->mark_stack_empty() || has_aborted();
3490 3523  }
3491 3524  
3492 3525  void CMTask::reached_limit() {
3493 3526    assert(_words_scanned >= _words_scanned_limit ||
3494 3527           _refs_reached >= _refs_reached_limit ,
3495 3528           "shouldn't have been called otherwise");
3496 3529    regular_clock_call();
3497 3530  }
3498 3531  
3499 3532  void CMTask::regular_clock_call() {
3500 3533    if (has_aborted()) return;
3501 3534  
3502 3535    // First, we need to recalculate the words scanned and refs reached
3503 3536    // limits for the next clock call.
3504 3537    recalculate_limits();
3505 3538  
3506 3539    // During the regular clock call we do the following
3507 3540  
3508 3541    // (1) If an overflow has been flagged, then we abort.
3509 3542    if (_cm->has_overflown()) {
3510 3543      set_has_aborted();
3511 3544      return;
3512 3545    }
3513 3546  
3514 3547    // If we are not concurrent (i.e. we're doing remark) we don't need
3515 3548    // to check anything else. The other steps are only needed during
3516 3549    // the concurrent marking phase.
3517 3550    if (!concurrent()) return;
3518 3551  
3519 3552    // (2) If marking has been aborted for Full GC, then we also abort.
3520 3553    if (_cm->has_aborted()) {
3521 3554      set_has_aborted();
3522 3555      statsOnly( ++_aborted_cm_aborted );
3523 3556      return;
3524 3557    }
3525 3558  
3526 3559    double curr_time_ms = os::elapsedVTime() * 1000.0;
3527 3560  
3528 3561    // (3) If marking stats are enabled, then we update the step history.
3529 3562  #if _MARKING_STATS_
3530 3563    if (_words_scanned >= _words_scanned_limit) {
3531 3564      ++_clock_due_to_scanning;
3532 3565    }
3533 3566    if (_refs_reached >= _refs_reached_limit) {
3534 3567      ++_clock_due_to_marking;
3535 3568    }
3536 3569  
3537 3570    double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3538 3571    _interval_start_time_ms = curr_time_ms;
3539 3572    _all_clock_intervals_ms.add(last_interval_ms);
3540 3573  
3541 3574    if (_cm->verbose_medium()) {
3542 3575        gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3543 3576                          "scanned = %d%s, refs reached = %d%s",
3544 3577                          _worker_id, last_interval_ms,
3545 3578                          _words_scanned,
3546 3579                          (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3547 3580                          _refs_reached,
3548 3581                          (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3549 3582    }
3550 3583  #endif // _MARKING_STATS_
3551 3584  
3552 3585    // (4) We check whether we should yield. If we have to, then we abort.
3553 3586    if (_cm->should_yield()) {
3554 3587      // We should yield. To do this we abort the task. The caller is
3555 3588      // responsible for yielding.
3556 3589      set_has_aborted();
3557 3590      statsOnly( ++_aborted_yield );
3558 3591      return;
3559 3592    }
3560 3593  
3561 3594    // (5) We check whether we've reached our time quota. If we have,
3562 3595    // then we abort.
3563 3596    double elapsed_time_ms = curr_time_ms - _start_time_ms;
3564 3597    if (elapsed_time_ms > _time_target_ms) {
3565 3598      set_has_aborted();
3566 3599      _has_timed_out = true;
3567 3600      statsOnly( ++_aborted_timed_out );
3568 3601      return;
3569 3602    }
3570 3603  
3571 3604    // (6) Finally, we check whether there are enough completed STAB
3572 3605    // buffers available for processing. If there are, we abort.
3573 3606    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3574 3607    if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3575 3608      if (_cm->verbose_low()) {
3576 3609        gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3577 3610                               _worker_id);
3578 3611      }
3579 3612      // we do need to process SATB buffers, we'll abort and restart
3580 3613      // the marking task to do so
3581 3614      set_has_aborted();
3582 3615      statsOnly( ++_aborted_satb );
3583 3616      return;
3584 3617    }
3585 3618  }
3586 3619  
3587 3620  void CMTask::recalculate_limits() {
3588 3621    _real_words_scanned_limit = _words_scanned + words_scanned_period;
3589 3622    _words_scanned_limit      = _real_words_scanned_limit;
3590 3623  
3591 3624    _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3592 3625    _refs_reached_limit       = _real_refs_reached_limit;
3593 3626  }
3594 3627  
3595 3628  void CMTask::decrease_limits() {
3596 3629    // This is called when we believe that we're going to do an infrequent
3597 3630    // operation which will increase the per byte scanned cost (i.e. move
3598 3631    // entries to/from the global stack). It basically tries to decrease the
3599 3632    // scanning limit so that the clock is called earlier.
3600 3633  
3601 3634    if (_cm->verbose_medium()) {
3602 3635      gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3603 3636    }
3604 3637  
3605 3638    _words_scanned_limit = _real_words_scanned_limit -
3606 3639      3 * words_scanned_period / 4;
3607 3640    _refs_reached_limit  = _real_refs_reached_limit -
3608 3641      3 * refs_reached_period / 4;
3609 3642  }
3610 3643  
3611 3644  void CMTask::move_entries_to_global_stack() {
3612 3645    // local array where we'll store the entries that will be popped
3613 3646    // from the local queue
3614 3647    oop buffer[global_stack_transfer_size];
3615 3648  
3616 3649    int n = 0;
3617 3650    oop obj;
3618 3651    while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3619 3652      buffer[n] = obj;
3620 3653      ++n;
3621 3654    }
3622 3655  
3623 3656    if (n > 0) {
3624 3657      // we popped at least one entry from the local queue
3625 3658  
3626 3659      statsOnly( ++_global_transfers_to; _local_pops += n );
3627 3660  
3628 3661      if (!_cm->mark_stack_push(buffer, n)) {
3629 3662        if (_cm->verbose_low()) {
3630 3663          gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3631 3664                                 _worker_id);
3632 3665        }
3633 3666        set_has_aborted();
3634 3667      } else {
3635 3668        // the transfer was successful
3636 3669  
3637 3670        if (_cm->verbose_medium()) {
3638 3671          gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3639 3672                                 _worker_id, n);
3640 3673        }
3641 3674        statsOnly( int tmp_size = _cm->mark_stack_size();
3642 3675                   if (tmp_size > _global_max_size) {
3643 3676                     _global_max_size = tmp_size;
3644 3677                   }
3645 3678                   _global_pushes += n );
3646 3679      }
3647 3680    }
3648 3681  
3649 3682    // this operation was quite expensive, so decrease the limits
3650 3683    decrease_limits();
3651 3684  }
3652 3685  
3653 3686  void CMTask::get_entries_from_global_stack() {
3654 3687    // local array where we'll store the entries that will be popped
3655 3688    // from the global stack.
3656 3689    oop buffer[global_stack_transfer_size];
3657 3690    int n;
3658 3691    _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3659 3692    assert(n <= global_stack_transfer_size,
3660 3693           "we should not pop more than the given limit");
3661 3694    if (n > 0) {
3662 3695      // yes, we did actually pop at least one entry
3663 3696  
3664 3697      statsOnly( ++_global_transfers_from; _global_pops += n );
3665 3698      if (_cm->verbose_medium()) {
3666 3699        gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3667 3700                               _worker_id, n);
3668 3701      }
3669 3702      for (int i = 0; i < n; ++i) {
3670 3703        bool success = _task_queue->push(buffer[i]);
3671 3704        // We only call this when the local queue is empty or under a
3672 3705        // given target limit. So, we do not expect this push to fail.
3673 3706        assert(success, "invariant");
3674 3707      }
3675 3708  
3676 3709      statsOnly( int tmp_size = _task_queue->size();
3677 3710                 if (tmp_size > _local_max_size) {
3678 3711                   _local_max_size = tmp_size;
3679 3712                 }
3680 3713                 _local_pushes += n );
3681 3714    }
3682 3715  
3683 3716    // this operation was quite expensive, so decrease the limits
3684 3717    decrease_limits();
3685 3718  }
3686 3719  
3687 3720  void CMTask::drain_local_queue(bool partially) {
3688 3721    if (has_aborted()) return;
3689 3722  
3690 3723    // Decide what the target size is, depending whether we're going to
3691 3724    // drain it partially (so that other tasks can steal if they run out
3692 3725    // of things to do) or totally (at the very end).
3693 3726    size_t target_size;
3694 3727    if (partially) {
3695 3728      target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3696 3729    } else {
3697 3730      target_size = 0;
3698 3731    }
3699 3732  
3700 3733    if (_task_queue->size() > target_size) {
3701 3734      if (_cm->verbose_high()) {
3702 3735        gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3703 3736                               _worker_id, target_size);
3704 3737      }
3705 3738  
3706 3739      oop obj;
3707 3740      bool ret = _task_queue->pop_local(obj);
3708 3741      while (ret) {
3709 3742        statsOnly( ++_local_pops );
3710 3743  
3711 3744        if (_cm->verbose_high()) {
3712 3745          gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3713 3746                                 (void*) obj);
3714 3747        }
3715 3748  
3716 3749        assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3717 3750        assert(!_g1h->is_on_master_free_list(
3718 3751                    _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3719 3752  
3720 3753        scan_object(obj);
3721 3754  
3722 3755        if (_task_queue->size() <= target_size || has_aborted()) {
3723 3756          ret = false;
3724 3757        } else {
3725 3758          ret = _task_queue->pop_local(obj);
3726 3759        }
3727 3760      }
3728 3761  
3729 3762      if (_cm->verbose_high()) {
3730 3763        gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3731 3764                               _worker_id, _task_queue->size());
3732 3765      }
3733 3766    }
3734 3767  }
3735 3768  
3736 3769  void CMTask::drain_global_stack(bool partially) {
3737 3770    if (has_aborted()) return;
3738 3771  
3739 3772    // We have a policy to drain the local queue before we attempt to
3740 3773    // drain the global stack.
3741 3774    assert(partially || _task_queue->size() == 0, "invariant");
3742 3775  
3743 3776    // Decide what the target size is, depending whether we're going to
3744 3777    // drain it partially (so that other tasks can steal if they run out
3745 3778    // of things to do) or totally (at the very end).  Notice that,
3746 3779    // because we move entries from the global stack in chunks or
3747 3780    // because another task might be doing the same, we might in fact
3748 3781    // drop below the target. But, this is not a problem.
3749 3782    size_t target_size;
3750 3783    if (partially) {
3751 3784      target_size = _cm->partial_mark_stack_size_target();
3752 3785    } else {
3753 3786      target_size = 0;
3754 3787    }
3755 3788  
3756 3789    if (_cm->mark_stack_size() > target_size) {
3757 3790      if (_cm->verbose_low()) {
3758 3791        gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3759 3792                               _worker_id, target_size);
3760 3793      }
3761 3794  
3762 3795      while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3763 3796        get_entries_from_global_stack();
3764 3797        drain_local_queue(partially);
3765 3798      }
3766 3799  
3767 3800      if (_cm->verbose_low()) {
3768 3801        gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3769 3802                               _worker_id, _cm->mark_stack_size());
3770 3803      }
3771 3804    }
3772 3805  }
3773 3806  
3774 3807  // SATB Queue has several assumptions on whether to call the par or
3775 3808  // non-par versions of the methods. this is why some of the code is
3776 3809  // replicated. We should really get rid of the single-threaded version
3777 3810  // of the code to simplify things.
3778 3811  void CMTask::drain_satb_buffers() {
3779 3812    if (has_aborted()) return;
3780 3813  
3781 3814    // We set this so that the regular clock knows that we're in the
3782 3815    // middle of draining buffers and doesn't set the abort flag when it
3783 3816    // notices that SATB buffers are available for draining. It'd be
3784 3817    // very counter productive if it did that. :-)
3785 3818    _draining_satb_buffers = true;
3786 3819  
3787 3820    CMObjectClosure oc(this);
3788 3821    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3789 3822    if (G1CollectedHeap::use_parallel_gc_threads()) {
3790 3823      satb_mq_set.set_par_closure(_worker_id, &oc);
3791 3824    } else {
3792 3825      satb_mq_set.set_closure(&oc);
3793 3826    }
3794 3827  
3795 3828    // This keeps claiming and applying the closure to completed buffers
3796 3829    // until we run out of buffers or we need to abort.
3797 3830    if (G1CollectedHeap::use_parallel_gc_threads()) {
3798 3831      while (!has_aborted() &&
3799 3832             satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3800 3833        if (_cm->verbose_medium()) {
3801 3834          gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3802 3835        }
3803 3836        statsOnly( ++_satb_buffers_processed );
3804 3837        regular_clock_call();
3805 3838      }
3806 3839    } else {
3807 3840      while (!has_aborted() &&
3808 3841             satb_mq_set.apply_closure_to_completed_buffer()) {
3809 3842        if (_cm->verbose_medium()) {
3810 3843          gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3811 3844        }
3812 3845        statsOnly( ++_satb_buffers_processed );
3813 3846        regular_clock_call();
3814 3847      }
3815 3848    }
3816 3849  
3817 3850    if (!concurrent() && !has_aborted()) {
3818 3851      // We should only do this during remark.
3819 3852      if (G1CollectedHeap::use_parallel_gc_threads()) {
3820 3853        satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3821 3854      } else {
3822 3855        satb_mq_set.iterate_closure_all_threads();
3823 3856      }
3824 3857    }
3825 3858  
3826 3859    _draining_satb_buffers = false;
3827 3860  
3828 3861    assert(has_aborted() ||
3829 3862           concurrent() ||
3830 3863           satb_mq_set.completed_buffers_num() == 0, "invariant");
3831 3864  
3832 3865    if (G1CollectedHeap::use_parallel_gc_threads()) {
3833 3866      satb_mq_set.set_par_closure(_worker_id, NULL);
3834 3867    } else {
3835 3868      satb_mq_set.set_closure(NULL);
3836 3869    }
3837 3870  
3838 3871    // again, this was a potentially expensive operation, decrease the
3839 3872    // limits to get the regular clock call early
3840 3873    decrease_limits();
3841 3874  }
3842 3875  
3843 3876  void CMTask::print_stats() {
3844 3877    gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3845 3878                           _worker_id, _calls);
3846 3879    gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3847 3880                           _elapsed_time_ms, _termination_time_ms);
3848 3881    gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3849 3882                           _step_times_ms.num(), _step_times_ms.avg(),
3850 3883                           _step_times_ms.sd());
3851 3884    gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3852 3885                           _step_times_ms.maximum(), _step_times_ms.sum());
3853 3886  
3854 3887  #if _MARKING_STATS_
3855 3888    gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3856 3889                           _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3857 3890                           _all_clock_intervals_ms.sd());
3858 3891    gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3859 3892                           _all_clock_intervals_ms.maximum(),
3860 3893                           _all_clock_intervals_ms.sum());
3861 3894    gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3862 3895                           _clock_due_to_scanning, _clock_due_to_marking);
3863 3896    gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3864 3897                           _objs_scanned, _objs_found_on_bitmap);
3865 3898    gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3866 3899                           _local_pushes, _local_pops, _local_max_size);
3867 3900    gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3868 3901                           _global_pushes, _global_pops, _global_max_size);
3869 3902    gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3870 3903                           _global_transfers_to,_global_transfers_from);
3871 3904    gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3872 3905    gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3873 3906    gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3874 3907                           _steal_attempts, _steals);
3875 3908    gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3876 3909    gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3877 3910                           _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3878 3911    gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3879 3912                           _aborted_timed_out, _aborted_satb, _aborted_termination);
3880 3913  #endif // _MARKING_STATS_
3881 3914  }
3882 3915  
3883 3916  /*****************************************************************************
3884 3917  
3885 3918      The do_marking_step(time_target_ms) method is the building block
3886 3919      of the parallel marking framework. It can be called in parallel
3887 3920      with other invocations of do_marking_step() on different tasks
3888 3921      (but only one per task, obviously) and concurrently with the
3889 3922      mutator threads, or during remark, hence it eliminates the need
3890 3923      for two versions of the code. When called during remark, it will
3891 3924      pick up from where the task left off during the concurrent marking
3892 3925      phase. Interestingly, tasks are also claimable during evacuation
3893 3926      pauses too, since do_marking_step() ensures that it aborts before
3894 3927      it needs to yield.
3895 3928  
3896 3929      The data structures that is uses to do marking work are the
3897 3930      following:
3898 3931  
3899 3932        (1) Marking Bitmap. If there are gray objects that appear only
3900 3933        on the bitmap (this happens either when dealing with an overflow
3901 3934        or when the initial marking phase has simply marked the roots
3902 3935        and didn't push them on the stack), then tasks claim heap
3903 3936        regions whose bitmap they then scan to find gray objects. A
3904 3937        global finger indicates where the end of the last claimed region
3905 3938        is. A local finger indicates how far into the region a task has
3906 3939        scanned. The two fingers are used to determine how to gray an
3907 3940        object (i.e. whether simply marking it is OK, as it will be
3908 3941        visited by a task in the future, or whether it needs to be also
3909 3942        pushed on a stack).
3910 3943  
3911 3944        (2) Local Queue. The local queue of the task which is accessed
3912 3945        reasonably efficiently by the task. Other tasks can steal from
3913 3946        it when they run out of work. Throughout the marking phase, a
3914 3947        task attempts to keep its local queue short but not totally
3915 3948        empty, so that entries are available for stealing by other
3916 3949        tasks. Only when there is no more work, a task will totally
3917 3950        drain its local queue.
3918 3951  
3919 3952        (3) Global Mark Stack. This handles local queue overflow. During
3920 3953        marking only sets of entries are moved between it and the local
3921 3954        queues, as access to it requires a mutex and more fine-grain
3922 3955        interaction with it which might cause contention. If it
3923 3956        overflows, then the marking phase should restart and iterate
3924 3957        over the bitmap to identify gray objects. Throughout the marking
3925 3958        phase, tasks attempt to keep the global mark stack at a small
3926 3959        length but not totally empty, so that entries are available for
3927 3960        popping by other tasks. Only when there is no more work, tasks
3928 3961        will totally drain the global mark stack.
3929 3962  
3930 3963        (4) SATB Buffer Queue. This is where completed SATB buffers are
3931 3964        made available. Buffers are regularly removed from this queue
3932 3965        and scanned for roots, so that the queue doesn't get too
3933 3966        long. During remark, all completed buffers are processed, as
3934 3967        well as the filled in parts of any uncompleted buffers.
3935 3968  
3936 3969      The do_marking_step() method tries to abort when the time target
3937 3970      has been reached. There are a few other cases when the
3938 3971      do_marking_step() method also aborts:
3939 3972  
3940 3973        (1) When the marking phase has been aborted (after a Full GC).
3941 3974  
3942 3975        (2) When a global overflow (on the global stack) has been
3943 3976        triggered. Before the task aborts, it will actually sync up with
3944 3977        the other tasks to ensure that all the marking data structures
3945 3978        (local queues, stacks, fingers etc.)  are re-initialised so that
3946 3979        when do_marking_step() completes, the marking phase can
3947 3980        immediately restart.
3948 3981  
3949 3982        (3) When enough completed SATB buffers are available. The
3950 3983        do_marking_step() method only tries to drain SATB buffers right
3951 3984        at the beginning. So, if enough buffers are available, the
3952 3985        marking step aborts and the SATB buffers are processed at
3953 3986        the beginning of the next invocation.
3954 3987  
3955 3988        (4) To yield. when we have to yield then we abort and yield
3956 3989        right at the end of do_marking_step(). This saves us from a lot
3957 3990        of hassle as, by yielding we might allow a Full GC. If this
3958 3991        happens then objects will be compacted underneath our feet, the
3959 3992        heap might shrink, etc. We save checking for this by just
3960 3993        aborting and doing the yield right at the end.
3961 3994  
3962 3995      From the above it follows that the do_marking_step() method should
3963 3996      be called in a loop (or, otherwise, regularly) until it completes.
3964 3997  
3965 3998      If a marking step completes without its has_aborted() flag being
3966 3999      true, it means it has completed the current marking phase (and
3967 4000      also all other marking tasks have done so and have all synced up).
3968 4001  
3969 4002      A method called regular_clock_call() is invoked "regularly" (in
3970 4003      sub ms intervals) throughout marking. It is this clock method that
3971 4004      checks all the abort conditions which were mentioned above and
3972 4005      decides when the task should abort. A work-based scheme is used to
3973 4006      trigger this clock method: when the number of object words the
3974 4007      marking phase has scanned or the number of references the marking
3975 4008      phase has visited reach a given limit. Additional invocations to
3976 4009      the method clock have been planted in a few other strategic places
3977 4010      too. The initial reason for the clock method was to avoid calling
3978 4011      vtime too regularly, as it is quite expensive. So, once it was in
3979 4012      place, it was natural to piggy-back all the other conditions on it
3980 4013      too and not constantly check them throughout the code.
3981 4014  
3982 4015   *****************************************************************************/
3983 4016  
3984 4017  void CMTask::do_marking_step(double time_target_ms,
3985 4018                               bool do_stealing,
3986 4019                               bool do_termination) {
3987 4020    assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3988 4021    assert(concurrent() == _cm->concurrent(), "they should be the same");
3989 4022  
3990 4023    G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3991 4024    assert(_task_queues != NULL, "invariant");
3992 4025    assert(_task_queue != NULL, "invariant");
3993 4026    assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3994 4027  
3995 4028    assert(!_claimed,
3996 4029           "only one thread should claim this task at any one time");
3997 4030  
3998 4031    // OK, this doesn't safeguard again all possible scenarios, as it is
3999 4032    // possible for two threads to set the _claimed flag at the same
4000 4033    // time. But it is only for debugging purposes anyway and it will
4001 4034    // catch most problems.
4002 4035    _claimed = true;
4003 4036  
4004 4037    _start_time_ms = os::elapsedVTime() * 1000.0;
4005 4038    statsOnly( _interval_start_time_ms = _start_time_ms );
4006 4039  
4007 4040    double diff_prediction_ms =
4008 4041      g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4009 4042    _time_target_ms = time_target_ms - diff_prediction_ms;
4010 4043  
4011 4044    // set up the variables that are used in the work-based scheme to
4012 4045    // call the regular clock method
4013 4046    _words_scanned = 0;
4014 4047    _refs_reached  = 0;
4015 4048    recalculate_limits();
4016 4049  
4017 4050    // clear all flags
4018 4051    clear_has_aborted();
4019 4052    _has_timed_out = false;
4020 4053    _draining_satb_buffers = false;
4021 4054  
4022 4055    ++_calls;
4023 4056  
4024 4057    if (_cm->verbose_low()) {
4025 4058      gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4026 4059                             "target = %1.2lfms >>>>>>>>>>",
4027 4060                             _worker_id, _calls, _time_target_ms);
4028 4061    }
4029 4062  
4030 4063    // Set up the bitmap and oop closures. Anything that uses them is
4031 4064    // eventually called from this method, so it is OK to allocate these
4032 4065    // statically.
4033 4066    CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4034 4067    G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4035 4068    set_cm_oop_closure(&cm_oop_closure);
4036 4069  
4037 4070    if (_cm->has_overflown()) {
4038 4071      // This can happen if the mark stack overflows during a GC pause
4039 4072      // and this task, after a yield point, restarts. We have to abort
4040 4073      // as we need to get into the overflow protocol which happens
4041 4074      // right at the end of this task.
4042 4075      set_has_aborted();
4043 4076    }
4044 4077  
4045 4078    // First drain any available SATB buffers. After this, we will not
4046 4079    // look at SATB buffers before the next invocation of this method.
4047 4080    // If enough completed SATB buffers are queued up, the regular clock
4048 4081    // will abort this task so that it restarts.
4049 4082    drain_satb_buffers();
4050 4083    // ...then partially drain the local queue and the global stack
4051 4084    drain_local_queue(true);
4052 4085    drain_global_stack(true);
4053 4086  
4054 4087    do {
4055 4088      if (!has_aborted() && _curr_region != NULL) {
4056 4089        // This means that we're already holding on to a region.
4057 4090        assert(_finger != NULL, "if region is not NULL, then the finger "
4058 4091               "should not be NULL either");
4059 4092  
4060 4093        // We might have restarted this task after an evacuation pause
4061 4094        // which might have evacuated the region we're holding on to
4062 4095        // underneath our feet. Let's read its limit again to make sure
4063 4096        // that we do not iterate over a region of the heap that
4064 4097        // contains garbage (update_region_limit() will also move
4065 4098        // _finger to the start of the region if it is found empty).
4066 4099        update_region_limit();
4067 4100        // We will start from _finger not from the start of the region,
4068 4101        // as we might be restarting this task after aborting half-way
4069 4102        // through scanning this region. In this case, _finger points to
4070 4103        // the address where we last found a marked object. If this is a
4071 4104        // fresh region, _finger points to start().
4072 4105        MemRegion mr = MemRegion(_finger, _region_limit);
4073 4106  
4074 4107        if (_cm->verbose_low()) {
4075 4108          gclog_or_tty->print_cr("[%u] we're scanning part "
4076 4109                                 "["PTR_FORMAT", "PTR_FORMAT") "
4077 4110                                 "of region "PTR_FORMAT,
4078 4111                                 _worker_id, _finger, _region_limit, _curr_region);
4079 4112        }
4080 4113  
4081 4114        // Let's iterate over the bitmap of the part of the
4082 4115        // region that is left.
4083 4116        if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4084 4117          // We successfully completed iterating over the region. Now,
4085 4118          // let's give up the region.
4086 4119          giveup_current_region();
4087 4120          regular_clock_call();
4088 4121        } else {
4089 4122          assert(has_aborted(), "currently the only way to do so");
4090 4123          // The only way to abort the bitmap iteration is to return
4091 4124          // false from the do_bit() method. However, inside the
4092 4125          // do_bit() method we move the _finger to point to the
4093 4126          // object currently being looked at. So, if we bail out, we
4094 4127          // have definitely set _finger to something non-null.
4095 4128          assert(_finger != NULL, "invariant");
4096 4129  
4097 4130          // Region iteration was actually aborted. So now _finger
4098 4131          // points to the address of the object we last scanned. If we
4099 4132          // leave it there, when we restart this task, we will rescan
4100 4133          // the object. It is easy to avoid this. We move the finger by
4101 4134          // enough to point to the next possible object header (the
4102 4135          // bitmap knows by how much we need to move it as it knows its
4103 4136          // granularity).
4104 4137          assert(_finger < _region_limit, "invariant");
4105 4138          HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4106 4139          // Check if bitmap iteration was aborted while scanning the last object
4107 4140          if (new_finger >= _region_limit) {
4108 4141            giveup_current_region();
4109 4142          } else {
4110 4143            move_finger_to(new_finger);
4111 4144          }
4112 4145        }
4113 4146      }
4114 4147      // At this point we have either completed iterating over the
4115 4148      // region we were holding on to, or we have aborted.
4116 4149  
4117 4150      // We then partially drain the local queue and the global stack.
4118 4151      // (Do we really need this?)
4119 4152      drain_local_queue(true);
4120 4153      drain_global_stack(true);
4121 4154  
4122 4155      // Read the note on the claim_region() method on why it might
4123 4156      // return NULL with potentially more regions available for
4124 4157      // claiming and why we have to check out_of_regions() to determine
4125 4158      // whether we're done or not.
4126 4159      while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4127 4160        // We are going to try to claim a new region. We should have
4128 4161        // given up on the previous one.
4129 4162        // Separated the asserts so that we know which one fires.
4130 4163        assert(_curr_region  == NULL, "invariant");
4131 4164        assert(_finger       == NULL, "invariant");
4132 4165        assert(_region_limit == NULL, "invariant");
4133 4166        if (_cm->verbose_low()) {
4134 4167          gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4135 4168        }
4136 4169        HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4137 4170        if (claimed_region != NULL) {
4138 4171          // Yes, we managed to claim one
4139 4172          statsOnly( ++_regions_claimed );
4140 4173  
4141 4174          if (_cm->verbose_low()) {
4142 4175            gclog_or_tty->print_cr("[%u] we successfully claimed "
4143 4176                                   "region "PTR_FORMAT,
4144 4177                                   _worker_id, claimed_region);
4145 4178          }
4146 4179  
4147 4180          setup_for_region(claimed_region);
4148 4181          assert(_curr_region == claimed_region, "invariant");
4149 4182        }
4150 4183        // It is important to call the regular clock here. It might take
4151 4184        // a while to claim a region if, for example, we hit a large
4152 4185        // block of empty regions. So we need to call the regular clock
4153 4186        // method once round the loop to make sure it's called
4154 4187        // frequently enough.
4155 4188        regular_clock_call();
4156 4189      }
4157 4190  
4158 4191      if (!has_aborted() && _curr_region == NULL) {
4159 4192        assert(_cm->out_of_regions(),
4160 4193               "at this point we should be out of regions");
4161 4194      }
4162 4195    } while ( _curr_region != NULL && !has_aborted());
4163 4196  
4164 4197    if (!has_aborted()) {
4165 4198      // We cannot check whether the global stack is empty, since other
4166 4199      // tasks might be pushing objects to it concurrently.
4167 4200      assert(_cm->out_of_regions(),
4168 4201             "at this point we should be out of regions");
4169 4202  
4170 4203      if (_cm->verbose_low()) {
4171 4204        gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4172 4205      }
4173 4206  
4174 4207      // Try to reduce the number of available SATB buffers so that
4175 4208      // remark has less work to do.
4176 4209      drain_satb_buffers();
4177 4210    }
4178 4211  
4179 4212    // Since we've done everything else, we can now totally drain the
4180 4213    // local queue and global stack.
4181 4214    drain_local_queue(false);
4182 4215    drain_global_stack(false);
4183 4216  
4184 4217    // Attempt at work stealing from other task's queues.
4185 4218    if (do_stealing && !has_aborted()) {
4186 4219      // We have not aborted. This means that we have finished all that
4187 4220      // we could. Let's try to do some stealing...
4188 4221  
4189 4222      // We cannot check whether the global stack is empty, since other
4190 4223      // tasks might be pushing objects to it concurrently.
4191 4224      assert(_cm->out_of_regions() && _task_queue->size() == 0,
4192 4225             "only way to reach here");
4193 4226  
4194 4227      if (_cm->verbose_low()) {
4195 4228        gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4196 4229      }
4197 4230  
4198 4231      while (!has_aborted()) {
4199 4232        oop obj;
4200 4233        statsOnly( ++_steal_attempts );
4201 4234  
4202 4235        if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4203 4236          if (_cm->verbose_medium()) {
4204 4237            gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4205 4238                                   _worker_id, (void*) obj);
4206 4239          }
4207 4240  
4208 4241          statsOnly( ++_steals );
4209 4242  
4210 4243          assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4211 4244                 "any stolen object should be marked");
4212 4245          scan_object(obj);
4213 4246  
4214 4247          // And since we're towards the end, let's totally drain the
4215 4248          // local queue and global stack.
4216 4249          drain_local_queue(false);
4217 4250          drain_global_stack(false);
4218 4251        } else {
4219 4252          break;
4220 4253        }
4221 4254      }
4222 4255    }
4223 4256  
4224 4257    // If we are about to wrap up and go into termination, check if we
4225 4258    // should raise the overflow flag.
4226 4259    if (do_termination && !has_aborted()) {
4227 4260      if (_cm->force_overflow()->should_force()) {
4228 4261        _cm->set_has_overflown();
4229 4262        regular_clock_call();
4230 4263      }
4231 4264    }
4232 4265  
4233 4266    // We still haven't aborted. Now, let's try to get into the
4234 4267    // termination protocol.
4235 4268    if (do_termination && !has_aborted()) {
4236 4269      // We cannot check whether the global stack is empty, since other
4237 4270      // tasks might be concurrently pushing objects on it.
4238 4271      // Separated the asserts so that we know which one fires.
4239 4272      assert(_cm->out_of_regions(), "only way to reach here");
4240 4273      assert(_task_queue->size() == 0, "only way to reach here");
4241 4274  
4242 4275      if (_cm->verbose_low()) {
4243 4276        gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4244 4277      }
4245 4278  
4246 4279      _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4247 4280      // The CMTask class also extends the TerminatorTerminator class,
4248 4281      // hence its should_exit_termination() method will also decide
4249 4282      // whether to exit the termination protocol or not.
4250 4283      bool finished = _cm->terminator()->offer_termination(this);
4251 4284      double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4252 4285      _termination_time_ms +=
4253 4286        termination_end_time_ms - _termination_start_time_ms;
4254 4287  
4255 4288      if (finished) {
4256 4289        // We're all done.
4257 4290  
4258 4291        if (_worker_id == 0) {
4259 4292          // let's allow task 0 to do this
4260 4293          if (concurrent()) {
4261 4294            assert(_cm->concurrent_marking_in_progress(), "invariant");
4262 4295            // we need to set this to false before the next
4263 4296            // safepoint. This way we ensure that the marking phase
4264 4297            // doesn't observe any more heap expansions.
4265 4298            _cm->clear_concurrent_marking_in_progress();
4266 4299          }
4267 4300        }
4268 4301  
4269 4302        // We can now guarantee that the global stack is empty, since
4270 4303        // all other tasks have finished. We separated the guarantees so
4271 4304        // that, if a condition is false, we can immediately find out
4272 4305        // which one.
4273 4306        guarantee(_cm->out_of_regions(), "only way to reach here");
4274 4307        guarantee(_cm->mark_stack_empty(), "only way to reach here");
4275 4308        guarantee(_task_queue->size() == 0, "only way to reach here");
4276 4309        guarantee(!_cm->has_overflown(), "only way to reach here");
4277 4310        guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4278 4311  
4279 4312        if (_cm->verbose_low()) {
4280 4313          gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4281 4314        }
4282 4315      } else {
4283 4316        // Apparently there's more work to do. Let's abort this task. It
4284 4317        // will restart it and we can hopefully find more things to do.
4285 4318  
4286 4319        if (_cm->verbose_low()) {
4287 4320          gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4288 4321                                 _worker_id);
4289 4322        }
4290 4323  
4291 4324        set_has_aborted();
4292 4325        statsOnly( ++_aborted_termination );
4293 4326      }
4294 4327    }
4295 4328  
4296 4329    // Mainly for debugging purposes to make sure that a pointer to the
4297 4330    // closure which was statically allocated in this frame doesn't
4298 4331    // escape it by accident.
4299 4332    set_cm_oop_closure(NULL);
4300 4333    double end_time_ms = os::elapsedVTime() * 1000.0;
4301 4334    double elapsed_time_ms = end_time_ms - _start_time_ms;
4302 4335    // Update the step history.
4303 4336    _step_times_ms.add(elapsed_time_ms);
4304 4337  
4305 4338    if (has_aborted()) {
4306 4339      // The task was aborted for some reason.
4307 4340  
4308 4341      statsOnly( ++_aborted );
4309 4342  
4310 4343      if (_has_timed_out) {
4311 4344        double diff_ms = elapsed_time_ms - _time_target_ms;
4312 4345        // Keep statistics of how well we did with respect to hitting
4313 4346        // our target only if we actually timed out (if we aborted for
4314 4347        // other reasons, then the results might get skewed).
4315 4348        _marking_step_diffs_ms.add(diff_ms);
4316 4349      }
4317 4350  
4318 4351      if (_cm->has_overflown()) {
4319 4352        // This is the interesting one. We aborted because a global
4320 4353        // overflow was raised. This means we have to restart the
4321 4354        // marking phase and start iterating over regions. However, in
4322 4355        // order to do this we have to make sure that all tasks stop
4323 4356        // what they are doing and re-initialise in a safe manner. We
4324 4357        // will achieve this with the use of two barrier sync points.
4325 4358  
4326 4359        if (_cm->verbose_low()) {
4327 4360          gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4328 4361        }
4329 4362  
4330 4363        _cm->enter_first_sync_barrier(_worker_id);
4331 4364        // When we exit this sync barrier we know that all tasks have
4332 4365        // stopped doing marking work. So, it's now safe to
4333 4366        // re-initialise our data structures. At the end of this method,
4334 4367        // task 0 will clear the global data structures.
4335 4368  
4336 4369        statsOnly( ++_aborted_overflow );
4337 4370  
4338 4371        // We clear the local state of this task...
4339 4372        clear_region_fields();
4340 4373  
4341 4374        // ...and enter the second barrier.
4342 4375        _cm->enter_second_sync_barrier(_worker_id);
4343 4376        // At this point everything has bee re-initialised and we're
4344 4377        // ready to restart.
4345 4378      }
4346 4379  
4347 4380      if (_cm->verbose_low()) {
4348 4381        gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4349 4382                               "elapsed = %1.2lfms <<<<<<<<<<",
4350 4383                               _worker_id, _time_target_ms, elapsed_time_ms);
4351 4384        if (_cm->has_aborted()) {
4352 4385          gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4353 4386                                 _worker_id);
4354 4387        }
4355 4388      }
4356 4389    } else {
4357 4390      if (_cm->verbose_low()) {
4358 4391        gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4359 4392                               "elapsed = %1.2lfms <<<<<<<<<<",
4360 4393                               _worker_id, _time_target_ms, elapsed_time_ms);
4361 4394      }
4362 4395    }
4363 4396  
4364 4397    _claimed = false;
4365 4398  }
4366 4399  
4367 4400  CMTask::CMTask(uint worker_id,
4368 4401                 ConcurrentMark* cm,
4369 4402                 size_t* marked_bytes,
4370 4403                 BitMap* card_bm,
4371 4404                 CMTaskQueue* task_queue,
4372 4405                 CMTaskQueueSet* task_queues)
4373 4406    : _g1h(G1CollectedHeap::heap()),
4374 4407      _worker_id(worker_id), _cm(cm),
4375 4408      _claimed(false),
4376 4409      _nextMarkBitMap(NULL), _hash_seed(17),
4377 4410      _task_queue(task_queue),
4378 4411      _task_queues(task_queues),
4379 4412      _cm_oop_closure(NULL),
4380 4413      _marked_bytes_array(marked_bytes),
4381 4414      _card_bm(card_bm) {
4382 4415    guarantee(task_queue != NULL, "invariant");
4383 4416    guarantee(task_queues != NULL, "invariant");
4384 4417  
4385 4418    statsOnly( _clock_due_to_scanning = 0;
4386 4419               _clock_due_to_marking  = 0 );
4387 4420  
4388 4421    _marking_step_diffs_ms.add(0.5);
4389 4422  }
4390 4423  
4391 4424  // These are formatting macros that are used below to ensure
4392 4425  // consistent formatting. The *_H_* versions are used to format the
4393 4426  // header for a particular value and they should be kept consistent
4394 4427  // with the corresponding macro. Also note that most of the macros add
4395 4428  // the necessary white space (as a prefix) which makes them a bit
4396 4429  // easier to compose.
4397 4430  
4398 4431  // All the output lines are prefixed with this string to be able to
4399 4432  // identify them easily in a large log file.
4400 4433  #define G1PPRL_LINE_PREFIX            "###"
4401 4434  
4402 4435  #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4403 4436  #ifdef _LP64
4404 4437  #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4405 4438  #else // _LP64
4406 4439  #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4407 4440  #endif // _LP64
4408 4441  
4409 4442  // For per-region info
4410 4443  #define G1PPRL_TYPE_FORMAT            "   %-4s"
4411 4444  #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4412 4445  #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4413 4446  #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4414 4447  #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4415 4448  #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4416 4449  
4417 4450  // For summary info
4418 4451  #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4419 4452  #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4420 4453  #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4421 4454  #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4422 4455  
4423 4456  G1PrintRegionLivenessInfoClosure::
4424 4457  G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4425 4458    : _out(out),
4426 4459      _total_used_bytes(0), _total_capacity_bytes(0),
4427 4460      _total_prev_live_bytes(0), _total_next_live_bytes(0),
4428 4461      _hum_used_bytes(0), _hum_capacity_bytes(0),
4429 4462      _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4430 4463    G1CollectedHeap* g1h = G1CollectedHeap::heap();
4431 4464    MemRegion g1_committed = g1h->g1_committed();
4432 4465    MemRegion g1_reserved = g1h->g1_reserved();
4433 4466    double now = os::elapsedTime();
4434 4467  
4435 4468    // Print the header of the output.
4436 4469    _out->cr();
4437 4470    _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4438 4471    _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4439 4472                   G1PPRL_SUM_ADDR_FORMAT("committed")
4440 4473                   G1PPRL_SUM_ADDR_FORMAT("reserved")
4441 4474                   G1PPRL_SUM_BYTE_FORMAT("region-size"),
4442 4475                   g1_committed.start(), g1_committed.end(),
4443 4476                   g1_reserved.start(), g1_reserved.end(),
4444 4477                   HeapRegion::GrainBytes);
4445 4478    _out->print_cr(G1PPRL_LINE_PREFIX);
4446 4479    _out->print_cr(G1PPRL_LINE_PREFIX
4447 4480                   G1PPRL_TYPE_H_FORMAT
4448 4481                   G1PPRL_ADDR_BASE_H_FORMAT
4449 4482                   G1PPRL_BYTE_H_FORMAT
4450 4483                   G1PPRL_BYTE_H_FORMAT
4451 4484                   G1PPRL_BYTE_H_FORMAT
4452 4485                   G1PPRL_DOUBLE_H_FORMAT,
4453 4486                   "type", "address-range",
4454 4487                   "used", "prev-live", "next-live", "gc-eff");
4455 4488    _out->print_cr(G1PPRL_LINE_PREFIX
4456 4489                   G1PPRL_TYPE_H_FORMAT
4457 4490                   G1PPRL_ADDR_BASE_H_FORMAT
4458 4491                   G1PPRL_BYTE_H_FORMAT
4459 4492                   G1PPRL_BYTE_H_FORMAT
4460 4493                   G1PPRL_BYTE_H_FORMAT
4461 4494                   G1PPRL_DOUBLE_H_FORMAT,
4462 4495                   "", "",
4463 4496                   "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4464 4497  }
4465 4498  
4466 4499  // It takes as a parameter a reference to one of the _hum_* fields, it
4467 4500  // deduces the corresponding value for a region in a humongous region
4468 4501  // series (either the region size, or what's left if the _hum_* field
4469 4502  // is < the region size), and updates the _hum_* field accordingly.
4470 4503  size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4471 4504    size_t bytes = 0;
4472 4505    // The > 0 check is to deal with the prev and next live bytes which
4473 4506    // could be 0.
4474 4507    if (*hum_bytes > 0) {
4475 4508      bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4476 4509      *hum_bytes -= bytes;
4477 4510    }
4478 4511    return bytes;
4479 4512  }
4480 4513  
4481 4514  // It deduces the values for a region in a humongous region series
4482 4515  // from the _hum_* fields and updates those accordingly. It assumes
4483 4516  // that that _hum_* fields have already been set up from the "starts
4484 4517  // humongous" region and we visit the regions in address order.
4485 4518  void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4486 4519                                                       size_t* capacity_bytes,
4487 4520                                                       size_t* prev_live_bytes,
4488 4521                                                       size_t* next_live_bytes) {
4489 4522    assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4490 4523    *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4491 4524    *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4492 4525    *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4493 4526    *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4494 4527  }
4495 4528  
4496 4529  bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4497 4530    const char* type = "";
4498 4531    HeapWord* bottom       = r->bottom();
4499 4532    HeapWord* end          = r->end();
4500 4533    size_t capacity_bytes  = r->capacity();
4501 4534    size_t used_bytes      = r->used();
4502 4535    size_t prev_live_bytes = r->live_bytes();
4503 4536    size_t next_live_bytes = r->next_live_bytes();
4504 4537    double gc_eff          = r->gc_efficiency();
4505 4538    if (r->used() == 0) {
4506 4539      type = "FREE";
4507 4540    } else if (r->is_survivor()) {
4508 4541      type = "SURV";
4509 4542    } else if (r->is_young()) {
4510 4543      type = "EDEN";
4511 4544    } else if (r->startsHumongous()) {
4512 4545      type = "HUMS";
4513 4546  
4514 4547      assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4515 4548             _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4516 4549             "they should have been zeroed after the last time we used them");
4517 4550      // Set up the _hum_* fields.
4518 4551      _hum_capacity_bytes  = capacity_bytes;
4519 4552      _hum_used_bytes      = used_bytes;
4520 4553      _hum_prev_live_bytes = prev_live_bytes;
4521 4554      _hum_next_live_bytes = next_live_bytes;
4522 4555      get_hum_bytes(&used_bytes, &capacity_bytes,
4523 4556                    &prev_live_bytes, &next_live_bytes);
4524 4557      end = bottom + HeapRegion::GrainWords;
4525 4558    } else if (r->continuesHumongous()) {
4526 4559      type = "HUMC";
4527 4560      get_hum_bytes(&used_bytes, &capacity_bytes,
4528 4561                    &prev_live_bytes, &next_live_bytes);
4529 4562      assert(end == bottom + HeapRegion::GrainWords, "invariant");
4530 4563    } else {
4531 4564      type = "OLD";
4532 4565    }
4533 4566  
4534 4567    _total_used_bytes      += used_bytes;
4535 4568    _total_capacity_bytes  += capacity_bytes;
4536 4569    _total_prev_live_bytes += prev_live_bytes;
4537 4570    _total_next_live_bytes += next_live_bytes;
4538 4571  
4539 4572    // Print a line for this particular region.
4540 4573    _out->print_cr(G1PPRL_LINE_PREFIX
4541 4574                   G1PPRL_TYPE_FORMAT
4542 4575                   G1PPRL_ADDR_BASE_FORMAT
4543 4576                   G1PPRL_BYTE_FORMAT
4544 4577                   G1PPRL_BYTE_FORMAT
4545 4578                   G1PPRL_BYTE_FORMAT
4546 4579                   G1PPRL_DOUBLE_FORMAT,
4547 4580                   type, bottom, end,
4548 4581                   used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4549 4582  
4550 4583    return false;
4551 4584  }
4552 4585  
4553 4586  G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4554 4587    // Print the footer of the output.
4555 4588    _out->print_cr(G1PPRL_LINE_PREFIX);
4556 4589    _out->print_cr(G1PPRL_LINE_PREFIX
4557 4590                   " SUMMARY"
4558 4591                   G1PPRL_SUM_MB_FORMAT("capacity")
4559 4592                   G1PPRL_SUM_MB_PERC_FORMAT("used")
4560 4593                   G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4561 4594                   G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4562 4595                   bytes_to_mb(_total_capacity_bytes),
4563 4596                   bytes_to_mb(_total_used_bytes),
4564 4597                   perc(_total_used_bytes, _total_capacity_bytes),
4565 4598                   bytes_to_mb(_total_prev_live_bytes),
4566 4599                   perc(_total_prev_live_bytes, _total_capacity_bytes),
4567 4600                   bytes_to_mb(_total_next_live_bytes),
4568 4601                   perc(_total_next_live_bytes, _total_capacity_bytes));
4569 4602    _out->cr();
4570 4603  }

↓ open down ↓

4326 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX