g1-mark-stack-size Wdiff src/share/vm/gc_implementation/g1/concurrentMark.cpp

Print this page

rev 3708 : 8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack
Summary: Set the value of MarkStackSize to a value based on the number of parallel marking threads with a reasonable minimum. Expand the marking stack if we have to restart marking due to an overflow up to a reasonable maximum. Allocate the underlying space for the marking stack from virtual memory.
Reviewed-by: jmasa
rev 3709 : imported patch reuse-old-marking-stack

Split	Close
Expand all
Collapse all

          --- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
          +++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1    1  /*
   2    2   * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "classfile/symbolTable.hpp"
  27   27  #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28   28  #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29   29  #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30   30  #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31   31  #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32   32  #include "gc_implementation/g1/g1Log.hpp"
  33   33  #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34   34  #include "gc_implementation/g1/g1RemSet.hpp"
  35   35  #include "gc_implementation/g1/heapRegion.inline.hpp"
  36   36  #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37   37  #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38   38  #include "gc_implementation/shared/vmGCOperations.hpp"

↓ open down ↓

38 lines elided

↑ open up ↑

  39   39  #include "memory/genOopClosures.inline.hpp"
  40   40  #include "memory/referencePolicy.hpp"
  41   41  #include "memory/resourceArea.hpp"
  42   42  #include "oops/oop.inline.hpp"
  43   43  #include "runtime/handles.inline.hpp"
  44   44  #include "runtime/java.hpp"
  45   45  #include "services/memTracker.hpp"
  46   46  
  47   47  // Concurrent marking bit map wrapper
  48   48  
  49      -CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  50      -  _bm((uintptr_t*)NULL,0),
       49 +CMBitMapRO::CMBitMapRO(int shifter) :
       50 +  _bm(),
  51   51    _shifter(shifter) {
  52      -  _bmStartWord = (HeapWord*)(rs.base());
  53      -  _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  54      -  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  55      -                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  56      -
  57      -  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
  58      -
  59      -  guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  60      -  // For now we'll just commit all of the bit map up fromt.
  61      -  // Later on we'll try to be more parsimonious with swap.
  62      -  guarantee(_virtual_space.initialize(brs, brs.size()),
  63      -            "couldn't reseve backing store for concurrent marking bit map");
  64      -  assert(_virtual_space.committed_size() == brs.size(),
  65      -         "didn't reserve backing store for all of concurrent marking bit map?");
  66      -  _bm.set_map((uintptr_t*)_virtual_space.low());
  67      -  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  68      -         _bmWordSize, "inconsistency in bit map sizing");
  69      -  _bm.set_size(_bmWordSize >> _shifter);
       52 +  _bmStartWord = 0;
       53 +  _bmWordSize = 0;
  70   54  }
  71   55  
  72   56  HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  73   57                                                 HeapWord* limit) const {
  74   58    // First we must round addr *up* to a possible object boundary.
  75   59    addr = (HeapWord*)align_size_up((intptr_t)addr,
  76   60                                    HeapWordSize << _shifter);
  77   61    size_t addrOffset = heapWordToOffset(addr);
  78   62    if (limit == NULL) {
  79   63      limit = _bmStartWord + _bmWordSize;

  80   64    }
  81   65    size_t limitOffset = heapWordToOffset(limit);
  82   66    size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  83   67    HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  84   68    assert(nextAddr >= addr, "get_next_one postcondition");
  85   69    assert(nextAddr == limit || isMarked(nextAddr),
  86   70           "get_next_one postcondition");
  87   71    return nextAddr;
  88   72  }
  89   73  
  90   74  HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  91   75                                                   HeapWord* limit) const {
  92   76    size_t addrOffset = heapWordToOffset(addr);
  93   77    if (limit == NULL) {
  94   78      limit = _bmStartWord + _bmWordSize;
  95   79    }
  96   80    size_t limitOffset = heapWordToOffset(limit);
  97   81    size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  98   82    HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  99   83    assert(nextAddr >= addr, "get_next_one postcondition");
 100   84    assert(nextAddr == limit || !isMarked(nextAddr),

↓ open down ↓

21 lines elided

↑ open up ↑

 101   85           "get_next_one postcondition");
 102   86    return nextAddr;
 103   87  }
 104   88  
 105   89  int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 106   90    assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 107   91    return (int) (diff >> _shifter);
 108   92  }
 109   93  
 110   94  #ifndef PRODUCT
 111      -bool CMBitMapRO::covers(ReservedSpace rs) const {
       95 +bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
 112   96    // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 113   97    assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 114   98           "size inconsistency");
 115      -  return _bmStartWord == (HeapWord*)(rs.base()) &&
 116      -         _bmWordSize  == rs.size()>>LogHeapWordSize;
       99 +  return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
      100 +         _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 117  101  }
 118  102  #endif
 119  103  
      104 +bool CMBitMap::allocate(ReservedSpace heap_rs) {
      105 +  _bmStartWord = (HeapWord*)(heap_rs.base());
      106 +  _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
      107 +  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
      108 +                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
      109 +  if (!brs.is_reserved()) {
      110 +    warning("ConcurrentMark marking bit map allocation failure");
      111 +    return false;
      112 +  }
      113 +  MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
      114 +  // For now we'll just commit all of the bit map up front.
      115 +  // Later on we'll try to be more parsimonious with swap.
      116 +  if (!_virtual_space.initialize(brs, brs.size())) {
      117 +    warning("ConcurrentMark marking bit map backing store failure");
      118 +    return false;
      119 +  }
      120 +  assert(_virtual_space.committed_size() == brs.size(),
      121 +         "didn't reserve backing store for all of concurrent marking bit map?");
      122 +  _bm.set_map((uintptr_t*)_virtual_space.low());
      123 +  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
      124 +         _bmWordSize, "inconsistency in bit map sizing");
      125 +  _bm.set_size(_bmWordSize >> _shifter);
      126 +  return true;
      127 +}
      128 +
 120  129  void CMBitMap::clearAll() {
 121  130    _bm.clear();
 122  131    return;
 123  132  }
 124  133  
 125  134  void CMBitMap::markRange(MemRegion mr) {
 126  135    mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 127  136    assert(!mr.is_empty(), "unexpected empty region");
 128  137    assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 129  138            ((HeapWord *) mr.end())),

 130  139           "markRange memory region end is not card aligned");
 131  140    // convert address range into offset range
 132  141    _bm.at_put_range(heapWordToOffset(mr.start()),
 133  142                     heapWordToOffset(mr.end()), true);
 134  143  }
 135  144  
 136  145  void CMBitMap::clearRange(MemRegion mr) {
 137  146    mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 138  147    assert(!mr.is_empty(), "unexpected empty region");
 139  148    // convert address range into offset range
 140  149    _bm.at_put_range(heapWordToOffset(mr.start()),
 141  150                     heapWordToOffset(mr.end()), false);
 142  151  }
 143  152  
 144  153  MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 145  154                                              HeapWord* end_addr) {
 146  155    HeapWord* start = getNextMarkedWordAddress(addr);
 147  156    start = MIN2(start, end_addr);
 148  157    HeapWord* end   = getNextUnmarkedWordAddress(start);
 149  158    end = MIN2(end, end_addr);
 150  159    assert(start <= end, "Consistency check");
 151  160    MemRegion mr(start, end);
 152  161    if (!mr.is_empty()) {
 153  162      clearRange(mr);
 154  163    }
 155  164    return mr;

↓ open down ↓

26 lines elided

↑ open up ↑

 156  165  }
 157  166  
 158  167  CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 159  168    _base(NULL), _cm(cm)
 160  169  #ifdef ASSERT
 161  170    , _drain_in_progress(false)
 162  171    , _drain_in_progress_yields(false)
 163  172  #endif
 164  173  {}
 165  174  
 166      -void CMMarkStack::allocate(size_t size) {
 167      -  _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
 168      -  if (_base == NULL) {
 169      -    vm_exit_during_initialization("Failed to allocate CM region mark stack");
      175 +bool CMMarkStack::allocate(size_t capacity) {
      176 +  // allocate a stack of the requisite depth
      177 +  ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
      178 +  if (!rs.is_reserved()) {
      179 +    warning("ConcurrentMark MarkStack allocation failure");
      180 +    return false;
 170  181    }
 171      -  _index = 0;
 172      -  _capacity = (jint) size;
      182 +  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
      183 +  if (!_virtual_space.initialize(rs, rs.size())) {
      184 +    warning("ConcurrentMark MarkStack backing store failure");
      185 +    // Release the virtual memory reserved for the marking stack
      186 +    rs.release();
      187 +    return false;
      188 +  }
      189 +  assert(_virtual_space.committed_size() == rs.size(),
      190 +         "Didn't reserve backing store for all of ConcurrentMark stack?");
      191 +  _rs = rs;
      192 +  _base = (oop*) _virtual_space.low();
      193 +  setEmpty();
      194 +  _capacity = (jint) capacity;
 173  195    _saved_index = -1;
 174  196    NOT_PRODUCT(_max_depth = 0);
      197 +  return true;
      198 +}
      199 +
      200 +void CMMarkStack::expand() {
      201 +  // Called, during remark, if we've overflown the marking stack during marking.
      202 +  assert(isEmpty(), "stack should been emptied while handling overflow");
      203 +  assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
      204 +  // Clear expansion flag
      205 +  _should_expand = false;
      206 +  if (_capacity == (jint) MarkStackSizeMax) {
      207 +    if (PrintGCDetails && Verbose) {
      208 +      gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
      209 +    }
      210 +    return;
      211 +  }
      212 +  // Double capacity if possible
      213 +  jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
      214 +  // Do not give up existing stack until we have managed to
      215 +  // get the double capacity that we desired.
      216 +  ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
      217 +                                                           sizeof(oop)));
      218 +  if (!rs.is_reserved()) {
      219 +    if (PrintGCDetails && Verbose) {
      220 +      // Failed to double capacity, continue;
      221 +      gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
      222 +                          SIZE_FORMAT "K to " SIZE_FORMAT "K",
      223 +                          _capacity / K, new_capacity / K);
      224 +    }
      225 +    return;
      226 +  }
      227 +
      228 +  // Clear the backing store fields associated with the space for the
      229 +  // old marking stack. Note this doesn't actuall release the space.
      230 +  _virtual_space.release();
      231 +
      232 +  // Reinitialize virtual space for the expanded stack.
      233 +  if (!_virtual_space.initialize(rs, rs.size())) {
      234 +    // We failed to commit the the space for the expanded marking stack
      235 +    // Release the expanded reserved space...
      236 +    rs.release();
      237 +    // ... and reinitialize with the previous un-expanded space.
      238 +    if (_virtual_space.initialize(_rs, _rs.size())) {
      239 +      if (PrintGCDetails && Verbose) {
      240 +        gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
      241 +                            SIZE_FORMAT "K to " SIZE_FORMAT "K",
      242 +                            _capacity / K, new_capacity / K);
      243 +      }
      244 +    } else {
      245 +      // The previous backing store space should have been already
      246 +      // committed but we failed to initialize the virtual space
      247 +      // for some reason.
      248 +      fatal("Error re-initializing marking stack with old capacity");
      249 +    }
      250 +  } else {
      251 +    // We successfully committed the space for the expanded marking stack.
      252 +    if (PrintGCDetails && Verbose) {
      253 +      gclog_or_tty->print(" Successfully expanded marking stack capacity from "
      254 +                          SIZE_FORMAT "K to " SIZE_FORMAT "K",
      255 +                          _capacity / K, new_capacity / K);
      256 +    }
      257 +    // Release the previous (unexpanded) space.
      258 +    _rs.release();
      259 +    // Record the new (expanded) space.
      260 +    _rs = rs;
      261 +    // Record the new capacity
      262 +    _capacity = new_capacity;
      263 +  }
      264 +  assert(_virtual_space.committed_size() == _rs.size(),
      265 +         "Didn't reserve backing store for all of ConcurrentMark stack?");
      266 +  _base = (oop*)(_virtual_space.low());
      267 +  _index = 0;
      268 +}
      269 +
      270 +void CMMarkStack::set_should_expand() {
      271 +  // If we're resetting the marking state because of an
      272 +  // marking stack overflow, record that we should, if
      273 +  // possible, expand the stack.
      274 +  _should_expand = _cm->has_overflown();
 175  275  }
 176  276  
 177  277  CMMarkStack::~CMMarkStack() {
 178  278    if (_base != NULL) {
 179      -    FREE_C_HEAP_ARRAY(oop, _base, mtGC);
      279 +    _base = NULL;
      280 +    _virtual_space.release();
 180  281    }
 181  282  }
 182  283  
 183  284  void CMMarkStack::par_push(oop ptr) {
 184  285    while (true) {
 185  286      if (isFull()) {
 186  287        _overflow = true;
 187  288        return;
 188  289      }
 189  290      // Otherwise...

 190  291      jint index = _index;
 191  292      jint next_index = index+1;
 192  293      jint res = Atomic::cmpxchg(next_index, &_index, index);
 193  294      if (res == index) {
 194  295        _base[index] = ptr;
 195  296        // Note that we don't maintain this atomically.  We could, but it
 196  297        // doesn't seem necessary.
 197  298        NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 198  299        return;
 199  300      }
 200  301      // Otherwise, we need to try again.
 201  302    }
 202  303  }
 203  304  
 204  305  void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 205  306    while (true) {
 206  307      if (isFull()) {
 207  308        _overflow = true;
 208  309        return;
 209  310      }

↓ open down ↓

20 lines elided

↑ open up ↑

 210  311      // Otherwise...
 211  312      jint index = _index;
 212  313      jint next_index = index + n;
 213  314      if (next_index > _capacity) {
 214  315        _overflow = true;
 215  316        return;
 216  317      }
 217  318      jint res = Atomic::cmpxchg(next_index, &_index, index);
 218  319      if (res == index) {
 219  320        for (int i = 0; i < n; i++) {
 220      -        int ind = index + i;
      321 +        int  ind = index + i;
 221  322          assert(ind < _capacity, "By overflow test above.");
 222  323          _base[ind] = ptr_arr[i];
 223  324        }
 224  325        NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 225  326        return;
 226  327      }
 227  328      // Otherwise, we need to try again.
 228  329    }
 229  330  }
 230  331  
 231      -
 232  332  void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 233  333    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 234  334    jint start = _index;
 235  335    jint next_index = start + n;
 236  336    if (next_index > _capacity) {
 237  337      _overflow = true;
 238  338      return;
 239  339    }
 240  340    // Otherwise.
 241  341    _index = next_index;
 242  342    for (int i = 0; i < n; i++) {
 243  343      int ind = start + i;
 244  344      assert(ind < _capacity, "By overflow test above.");
 245  345      _base[ind] = ptr_arr[i];
 246  346    }
      347 +  NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 247  348  }
 248  349  
 249      -
 250  350  bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 251  351    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252  352    jint index = _index;
 253  353    if (index == 0) {
 254  354      *n = 0;
 255  355      return false;
 256  356    } else {
 257  357      int k = MIN2(max, index);
 258      -    jint new_ind = index - k;
      358 +    jint  new_ind = index - k;
 259  359      for (int j = 0; j < k; j++) {
 260  360        ptr_arr[j] = _base[new_ind + j];
 261  361      }
 262  362      _index = new_ind;
 263  363      *n = k;
 264  364      return true;
 265  365    }
 266  366  }
 267  367  
 268  368  template<class OopClosureClass>

 269  369  bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 270  370    assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 271  371           || SafepointSynchronize::is_at_safepoint(),
 272  372           "Drain recursion must be yield-safe.");
 273  373    bool res = true;
 274  374    debug_only(_drain_in_progress = true);
 275  375    debug_only(_drain_in_progress_yields = yield_after);
 276  376    while (!isEmpty()) {
 277  377      oop newOop = pop();
 278  378      assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 279  379      assert(newOop->is_oop(), "Expected an oop");
 280  380      assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 281  381             "only grey objects on this stack");
 282  382      newOop->oop_iterate(cl);
 283  383      if (yield_after && _cm->do_yield_check()) {
 284  384        res = false;
 285  385        break;
 286  386      }
 287  387    }
 288  388    debug_only(_drain_in_progress = false);
 289  389    return res;
 290  390  }
 291  391  
 292  392  void CMMarkStack::note_start_of_gc() {
 293  393    assert(_saved_index == -1,
 294  394           "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 295  395    _saved_index = _index;
 296  396  }
 297  397  
 298  398  void CMMarkStack::note_end_of_gc() {
 299  399    // This is intentionally a guarantee, instead of an assert. If we
 300  400    // accidentally add something to the mark stack during GC, it
 301  401    // will be a correctness issue so it's better if we crash. we'll
 302  402    // only check this once per GC anyway, so it won't be a performance
 303  403    // issue in any way.
 304  404    guarantee(_saved_index == _index,
 305  405              err_msg("saved index: %d index: %d", _saved_index, _index));
 306  406    _saved_index = -1;
 307  407  }
 308  408  
 309  409  void CMMarkStack::oops_do(OopClosure* f) {
 310  410    assert(_saved_index == _index,
 311  411           err_msg("saved index: %d index: %d", _saved_index, _index));
 312  412    for (int i = 0; i < _index; i += 1) {
 313  413      f->do_oop(&_base[i]);
 314  414    }
 315  415  }
 316  416  
 317  417  bool ConcurrentMark::not_yet_marked(oop obj) const {
 318  418    return _g1h->is_obj_ill(obj);
 319  419  }
 320  420  
 321  421  CMRootRegions::CMRootRegions() :
 322  422    _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 323  423    _should_abort(false),  _next_survivor(NULL) { }
 324  424  
 325  425  void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 326  426    _young_list = g1h->young_list();
 327  427    _cm = cm;
 328  428  }
 329  429  
 330  430  void CMRootRegions::prepare_for_scan() {
 331  431    assert(!scan_in_progress(), "pre-condition");
 332  432  
 333  433    // Currently, only survivors can be root regions.
 334  434    assert(_next_survivor == NULL, "pre-condition");
 335  435    _next_survivor = _young_list->first_survivor_region();
 336  436    _scan_in_progress = (_next_survivor != NULL);
 337  437    _should_abort = false;
 338  438  }
 339  439  
 340  440  HeapRegion* CMRootRegions::claim_next() {
 341  441    if (_should_abort) {
 342  442      // If someone has set the should_abort flag, we return NULL to
 343  443      // force the caller to bail out of their loop.
 344  444      return NULL;
 345  445    }
 346  446  
 347  447    // Currently, only survivors can be root regions.
 348  448    HeapRegion* res = _next_survivor;
 349  449    if (res != NULL) {
 350  450      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 351  451      // Read it again in case it changed while we were waiting for the lock.
 352  452      res = _next_survivor;
 353  453      if (res != NULL) {
 354  454        if (res == _young_list->last_survivor_region()) {
 355  455          // We just claimed the last survivor so store NULL to indicate
 356  456          // that we're done.
 357  457          _next_survivor = NULL;
 358  458        } else {
 359  459          _next_survivor = res->get_next_young_region();
 360  460        }
 361  461      } else {
 362  462        // Someone else claimed the last survivor while we were trying
 363  463        // to take the lock so nothing else to do.
 364  464      }
 365  465    }
 366  466    assert(res == NULL || res->is_survivor(), "post-condition");
 367  467  
 368  468    return res;
 369  469  }
 370  470  
 371  471  void CMRootRegions::scan_finished() {
 372  472    assert(scan_in_progress(), "pre-condition");
 373  473  
 374  474    // Currently, only survivors can be root regions.
 375  475    if (!_should_abort) {
 376  476      assert(_next_survivor == NULL, "we should have claimed all survivors");
 377  477    }
 378  478    _next_survivor = NULL;
 379  479  
 380  480    {
 381  481      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 382  482      _scan_in_progress = false;
 383  483      RootRegionScan_lock->notify_all();
 384  484    }
 385  485  }
 386  486  
 387  487  bool CMRootRegions::wait_until_scan_finished() {
 388  488    if (!scan_in_progress()) return false;
 389  489  
 390  490    {
 391  491      MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 392  492      while (scan_in_progress()) {
 393  493        RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 394  494      }
 395  495    }
 396  496    return true;

↓ open down ↓

128 lines elided

↑ open up ↑

 397  497  }
 398  498  
 399  499  #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 400  500  #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 401  501  #endif // _MSC_VER
 402  502  
 403  503  uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 404  504    return MAX2((n_par_threads + 2) / 4, 1U);
 405  505  }
 406  506  
 407      -ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 408      -  _markBitMap1(rs, MinObjAlignment - 1),
 409      -  _markBitMap2(rs, MinObjAlignment - 1),
      507 +ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
      508 +  _g1h(g1h),
      509 +  _markBitMap1(MinObjAlignment - 1),
      510 +  _markBitMap2(MinObjAlignment - 1),
 410  511  
 411  512    _parallel_marking_threads(0),
 412  513    _max_parallel_marking_threads(0),
 413  514    _sleep_factor(0.0),
 414  515    _marking_task_overhead(1.0),
 415  516    _cleanup_sleep_factor(0.0),
 416  517    _cleanup_task_overhead(1.0),
 417  518    _cleanup_list("Cleanup List"),
 418      -  _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 419      -  _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 420      -           CardTableModRefBS::card_shift,
 421      -           false /* in_resource_area*/),
      519 +  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
      520 +  _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
      521 +            CardTableModRefBS::card_shift,
      522 +            false /* in_resource_area*/),
 422  523  
 423  524    _prevMarkBitMap(&_markBitMap1),
 424  525    _nextMarkBitMap(&_markBitMap2),
 425  526  
 426  527    _markStack(this),
 427  528    // _finger set in set_non_marking_state
 428  529  
 429  530    _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 430  531    // _active_tasks set in set_non_marking_state
 431  532    // _tasks set inside the constructor

 432  533    _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 433  534    _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 434  535  
 435  536    _has_overflown(false),
 436  537    _concurrent(false),
 437  538    _has_aborted(false),
 438  539    _restart_for_overflow(false),
 439  540    _concurrent_marking_in_progress(false),
 440  541  
 441  542    // _verbose_level set below

↓ open down ↓

10 lines elided

↑ open up ↑

 442  543  
 443  544    _init_times(),
 444  545    _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 445  546    _cleanup_times(),
 446  547    _total_counting_time(0.0),
 447  548    _total_rs_scrub_time(0.0),
 448  549  
 449  550    _parallel_workers(NULL),
 450  551  
 451  552    _count_card_bitmaps(NULL),
 452      -  _count_marked_bytes(NULL) {
      553 +  _count_marked_bytes(NULL),
      554 +  _completed_initialization(false) {
 453  555    CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 454  556    if (verbose_level < no_verbose) {
 455  557      verbose_level = no_verbose;
 456  558    }
 457  559    if (verbose_level > high_verbose) {
 458  560      verbose_level = high_verbose;
 459  561    }
 460  562    _verbose_level = verbose_level;
 461  563  
 462  564    if (verbose_low()) {
 463  565      gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 464  566                             "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 465  567    }
 466  568  
 467      -  _markStack.allocate(MarkStackSize);
      569 +  if (!_markBitMap1.allocate(heap_rs)) {
      570 +    warning("Failed to allocate first CM bit map");
      571 +    return;
      572 +  }
      573 +  if (!_markBitMap2.allocate(heap_rs)) {
      574 +    warning("Failed to allocate second CM bit map");
      575 +    return;
      576 +  }
 468  577  
 469  578    // Create & start a ConcurrentMark thread.
 470  579    _cmThread = new ConcurrentMarkThread(this);
 471  580    assert(cmThread() != NULL, "CM Thread should have been created");
 472  581    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 473  582  
 474      -  _g1h = G1CollectedHeap::heap();
 475  583    assert(CGC_lock != NULL, "Where's the CGC_lock?");
 476      -  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 477      -  assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
      584 +  assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
      585 +  assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 478  586  
 479  587    SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 480  588    satb_qs.set_buffer_size(G1SATBBufferSize);
 481  589  
 482  590    _root_regions.init(_g1h, this);
 483  591  
 484      -  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 485      -  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 486      -
 487      -  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 488      -  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 489      -
 490      -  BitMap::idx_t card_bm_size = _card_bm.size();
 491      -
 492      -  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 493      -  _active_tasks = _max_worker_id;
 494      -  for (uint i = 0; i < _max_worker_id; ++i) {
 495      -    CMTaskQueue* task_queue = new CMTaskQueue();
 496      -    task_queue->initialize();
 497      -    _task_queues->register_queue(i, task_queue);
 498      -
 499      -    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 500      -    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 501      -
 502      -    _tasks[i] = new CMTask(i, this,
 503      -                           _count_marked_bytes[i],
 504      -                           &_count_card_bitmaps[i],
 505      -                           task_queue, _task_queues);
 506      -
 507      -    _accum_task_vtime[i] = 0.0;
 508      -  }
 509      -
 510      -  // Calculate the card number for the bottom of the heap. Used
 511      -  // in biasing indexes into the accounting card bitmaps.
 512      -  _heap_bottom_card_num =
 513      -    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 514      -                                CardTableModRefBS::card_shift);
 515      -
 516      -  // Clear all the liveness counting data
 517      -  clear_all_count_data();
 518      -
 519  592    if (ConcGCThreads > ParallelGCThreads) {
 520      -    vm_exit_during_initialization("Can't have more ConcGCThreads "
 521      -                                  "than ParallelGCThreads.");
      593 +    warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
      594 +            "than ParallelGCThreads (" UINT32_FORMAT ").",
      595 +            ConcGCThreads, ParallelGCThreads);
      596 +    return;
 522  597    }
 523  598    if (ParallelGCThreads == 0) {
 524  599      // if we are not running with any parallel GC threads we will not
 525  600      // spawn any marking threads either
 526  601      _parallel_marking_threads =       0;
 527  602      _max_parallel_marking_threads =   0;
 528  603      _sleep_factor             =     0.0;
 529  604      _marking_task_overhead    =     1.0;
 530  605    } else {
 531  606      if (ConcGCThreads > 0) {

 532  607        // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 533  608        // if both are set
 534  609  
 535  610        _parallel_marking_threads = (uint) ConcGCThreads;
 536  611        _max_parallel_marking_threads = _parallel_marking_threads;
 537  612        _sleep_factor             = 0.0;
 538  613        _marking_task_overhead    = 1.0;
 539  614      } else if (G1MarkingOverheadPercent > 0) {
 540  615        // we will calculate the number of parallel marking threads
 541  616        // based on a target overhead with respect to the soft real-time
 542  617        // goal
 543  618  
 544  619        double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 545  620        double overall_cm_overhead =
 546  621          (double) MaxGCPauseMillis * marking_overhead /
 547  622          (double) GCPauseIntervalMillis;
 548  623        double cpu_ratio = 1.0 / (double) os::processor_count();
 549  624        double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 550  625        double marking_task_overhead =
 551  626          overall_cm_overhead / marking_thread_num *
 552  627                                                  (double) os::processor_count();
 553  628        double sleep_factor =
 554  629                           (1.0 - marking_task_overhead) / marking_task_overhead;
 555  630  
 556  631        _parallel_marking_threads = (uint) marking_thread_num;
 557  632        _max_parallel_marking_threads = _parallel_marking_threads;
 558  633        _sleep_factor             = sleep_factor;
 559  634        _marking_task_overhead    = marking_task_overhead;
 560  635      } else {
 561  636        _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
 562  637        _max_parallel_marking_threads = _parallel_marking_threads;
 563  638        _sleep_factor             = 0.0;
 564  639        _marking_task_overhead    = 1.0;
 565  640      }
 566  641  
 567  642      if (parallel_marking_threads() > 1) {
 568  643        _cleanup_task_overhead = 1.0;
 569  644      } else {
 570  645        _cleanup_task_overhead = marking_task_overhead();
 571  646      }
 572  647      _cleanup_sleep_factor =
 573  648                       (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 574  649  
 575  650  #if 0
 576  651      gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 577  652      gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 578  653      gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 579  654      gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 580  655      gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 581  656  #endif
 582  657

↓ open down ↓

51 lines elided

↑ open up ↑

 583  658      guarantee(parallel_marking_threads() > 0, "peace of mind");
 584  659      _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 585  660           _max_parallel_marking_threads, false, true);
 586  661      if (_parallel_workers == NULL) {
 587  662        vm_exit_during_initialization("Failed necessary allocation.");
 588  663      } else {
 589  664        _parallel_workers->initialize_workers();
 590  665      }
 591  666    }
 592  667  
      668 +  if (FLAG_IS_DEFAULT(MarkStackSize)) {
      669 +    uintx mark_stack_size =
      670 +      MIN2(MarkStackSizeMax,
      671 +          MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
      672 +    // Verify that the calculated value for MarkStackSize is in range.
      673 +    // It would be nice to use the private utility routine from Arguments.
      674 +    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
      675 +      warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
      676 +              "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
      677 +              mark_stack_size, 1, MarkStackSizeMax);
      678 +      return;
      679 +    }
      680 +    FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
      681 +  } else {
      682 +    // Verify MarkStackSize is in range.
      683 +    if (FLAG_IS_CMDLINE(MarkStackSize)) {
      684 +      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
      685 +        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
      686 +          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
      687 +                  "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
      688 +                  MarkStackSize, 1, MarkStackSizeMax);
      689 +          return;
      690 +        }
      691 +      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
      692 +        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
      693 +          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
      694 +                  " or for MarkStackSizeMax (" UINTX_FORMAT ")",
      695 +                  MarkStackSize, MarkStackSizeMax);
      696 +          return;
      697 +        }
      698 +      }
      699 +    }
      700 +  }
      701 +
      702 +  if (!_markStack.allocate(MarkStackSize)) {
      703 +    warning("Failed to allocate CM marking stack");
      704 +    return;
      705 +  }
      706 +
      707 +  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
      708 +  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
      709 +
      710 +  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
      711 +  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
      712 +
      713 +  BitMap::idx_t card_bm_size = _card_bm.size();
      714 +
      715 +  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
      716 +  _active_tasks = _max_worker_id;
      717 +
      718 +  size_t max_regions = (size_t) _g1h->max_regions();
      719 +  for (uint i = 0; i < _max_worker_id; ++i) {
      720 +    CMTaskQueue* task_queue = new CMTaskQueue();
      721 +    task_queue->initialize();
      722 +    _task_queues->register_queue(i, task_queue);
      723 +
      724 +    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
      725 +    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
      726 +
      727 +    _tasks[i] = new CMTask(i, this,
      728 +                           _count_marked_bytes[i],
      729 +                           &_count_card_bitmaps[i],
      730 +                           task_queue, _task_queues);
      731 +
      732 +    _accum_task_vtime[i] = 0.0;
      733 +  }
      734 +
      735 +  // Calculate the card number for the bottom of the heap. Used
      736 +  // in biasing indexes into the accounting card bitmaps.
      737 +  _heap_bottom_card_num =
      738 +    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
      739 +                                CardTableModRefBS::card_shift);
      740 +
      741 +  // Clear all the liveness counting data
      742 +  clear_all_count_data();
      743 +
 593  744    // so that the call below can read a sensible value
 594      -  _heap_start = (HeapWord*) rs.base();
      745 +  _heap_start = (HeapWord*) heap_rs.base();
 595  746    set_non_marking_state();
      747 +  _completed_initialization = true;
 596  748  }
 597  749  
 598  750  void ConcurrentMark::update_g1_committed(bool force) {
 599  751    // If concurrent marking is not in progress, then we do not need to
 600  752    // update _heap_end.
 601  753    if (!concurrent_marking_in_progress() && !force) return;
 602  754  
 603  755    MemRegion committed = _g1h->g1_committed();
 604  756    assert(committed.start() == _heap_start, "start shouldn't change");
 605  757    HeapWord* new_end = committed.end();

 606  758    if (new_end > _heap_end) {
 607  759      // The heap has been expanded.
 608  760  
 609  761      _heap_end = new_end;
 610  762    }
 611  763    // Notice that the heap can also shrink. However, this only happens
 612  764    // during a Full GC (at least currently) and the entire marking
 613  765    // phase will bail out and the task will not be restarted. So, let's
 614  766    // do nothing.
 615  767  }
 616  768  
 617  769  void ConcurrentMark::reset() {
 618  770    // Starting values for these two. This should be called in a STW
 619  771    // phase. CM will be notified of any future g1_committed expansions
 620  772    // will be at the end of evacuation pauses, when tasks are
 621  773    // inactive.
 622  774    MemRegion committed = _g1h->g1_committed();
 623  775    _heap_start = committed.start();
 624  776    _heap_end   = committed.end();
 625  777  
 626  778    // Separated the asserts so that we know which one fires.
 627  779    assert(_heap_start != NULL, "heap bounds should look ok");
 628  780    assert(_heap_end != NULL, "heap bounds should look ok");
 629  781    assert(_heap_start < _heap_end, "heap bounds should look ok");
 630  782  
 631  783    // reset all the marking data structures and any necessary flags
 632  784    clear_marking_state();
 633  785  
 634  786    if (verbose_low()) {
 635  787      gclog_or_tty->print_cr("[global] resetting");
 636  788    }
 637  789  
 638  790    // We do reset all of them, since different phases will use
 639  791    // different number of active threads. So, it's easiest to have all
 640  792    // of them ready.
 641  793    for (uint i = 0; i < _max_worker_id; ++i) {
 642  794      _tasks[i]->reset(_nextMarkBitMap);
 643  795    }
 644  796  
 645  797    // we need this to make sure that the flag is on during the evac
 646  798    // pause with initial mark piggy-backed
 647  799    set_concurrent_marking_in_progress();
 648  800  }
 649  801  
 650  802  void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 651  803    assert(active_tasks <= _max_worker_id, "we should not have more");
 652  804  
 653  805    _active_tasks = active_tasks;
 654  806    // Need to update the three data structures below according to the
 655  807    // number of active threads for this phase.
 656  808    _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 657  809    _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 658  810    _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 659  811  
 660  812    _concurrent = concurrent;
 661  813    // We propagate this to all tasks, not just the active ones.
 662  814    for (uint i = 0; i < _max_worker_id; ++i)
 663  815      _tasks[i]->set_concurrent(concurrent);
 664  816  
 665  817    if (concurrent) {
 666  818      set_concurrent_marking_in_progress();
 667  819    } else {
 668  820      // We currently assume that the concurrent flag has been set to
 669  821      // false before we start remark. At this point we should also be
 670  822      // in a STW phase.
 671  823      assert(!concurrent_marking_in_progress(), "invariant");
 672  824      assert(_finger == _heap_end, "only way to get here");
 673  825      update_g1_committed(true);
 674  826    }
 675  827  }
 676  828  
 677  829  void ConcurrentMark::set_non_marking_state() {
 678  830    // We set the global marking state to some default values when we're
 679  831    // not doing marking.
 680  832    clear_marking_state();
 681  833    _active_tasks = 0;
 682  834    clear_concurrent_marking_in_progress();
 683  835  }
 684  836  
 685  837  ConcurrentMark::~ConcurrentMark() {
 686  838    // The ConcurrentMark instance is never freed.
 687  839    ShouldNotReachHere();
 688  840  }
 689  841  
 690  842  void ConcurrentMark::clearNextBitmap() {
 691  843    G1CollectedHeap* g1h = G1CollectedHeap::heap();
 692  844    G1CollectorPolicy* g1p = g1h->g1_policy();
 693  845  
 694  846    // Make sure that the concurrent mark thread looks to still be in
 695  847    // the current cycle.
 696  848    guarantee(cmThread()->during_cycle(), "invariant");
 697  849  
 698  850    // We are finishing up the current cycle by clearing the next
 699  851    // marking bitmap and getting it ready for the next cycle. During
 700  852    // this time no other cycle can start. So, let's make sure that this
 701  853    // is the case.
 702  854    guarantee(!g1h->mark_in_progress(), "invariant");
 703  855  
 704  856    // clear the mark bitmap (no grey objects to start with).
 705  857    // We need to do this in chunks and offer to yield in between
 706  858    // each chunk.
 707  859    HeapWord* start  = _nextMarkBitMap->startWord();
 708  860    HeapWord* end    = _nextMarkBitMap->endWord();
 709  861    HeapWord* cur    = start;
 710  862    size_t chunkSize = M;
 711  863    while (cur < end) {
 712  864      HeapWord* next = cur + chunkSize;
 713  865      if (next > end) {
 714  866        next = end;
 715  867      }
 716  868      MemRegion mr(cur,next);
 717  869      _nextMarkBitMap->clearRange(mr);
 718  870      cur = next;
 719  871      do_yield_check();
 720  872  
 721  873      // Repeat the asserts from above. We'll do them as asserts here to
 722  874      // minimize their overhead on the product. However, we'll have
 723  875      // them as guarantees at the beginning / end of the bitmap
 724  876      // clearing to get some checking in the product.
 725  877      assert(cmThread()->during_cycle(), "invariant");
 726  878      assert(!g1h->mark_in_progress(), "invariant");
 727  879    }
 728  880  
 729  881    // Clear the liveness counting data
 730  882    clear_all_count_data();
 731  883  
 732  884    // Repeat the asserts from above.
 733  885    guarantee(cmThread()->during_cycle(), "invariant");
 734  886    guarantee(!g1h->mark_in_progress(), "invariant");
 735  887  }
 736  888  
 737  889  class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 738  890  public:
 739  891    bool doHeapRegion(HeapRegion* r) {
 740  892      if (!r->continuesHumongous()) {
 741  893        r->note_start_of_marking();
 742  894      }
 743  895      return false;
 744  896    }
 745  897  };
 746  898  
 747  899  void ConcurrentMark::checkpointRootsInitialPre() {
 748  900    G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 749  901    G1CollectorPolicy* g1p = g1h->g1_policy();
 750  902  
 751  903    _has_aborted = false;
 752  904  
 753  905  #ifndef PRODUCT
 754  906    if (G1PrintReachableAtInitialMark) {
 755  907      print_reachable("at-cycle-start",
 756  908                      VerifyOption_G1UsePrevMarking, true /* all */);
 757  909    }
 758  910  #endif
 759  911  
 760  912    // Initialise marking structures. This has to be done in a STW phase.
 761  913    reset();
 762  914  
 763  915    // For each region note start of marking.
 764  916    NoteStartOfMarkHRClosure startcl;
 765  917    g1h->heap_region_iterate(&startcl);
 766  918  }
 767  919  
 768  920  
 769  921  void ConcurrentMark::checkpointRootsInitialPost() {
 770  922    G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 771  923  
 772  924    // If we force an overflow during remark, the remark operation will
 773  925    // actually abort and we'll restart concurrent marking. If we always
 774  926    // force an oveflow during remark we'll never actually complete the
 775  927    // marking phase. So, we initilize this here, at the start of the
 776  928    // cycle, so that at the remaining overflow number will decrease at
 777  929    // every remark and we'll eventually not need to cause one.
 778  930    force_overflow_stw()->init();
 779  931  
 780  932    // Start Concurrent Marking weak-reference discovery.
 781  933    ReferenceProcessor* rp = g1h->ref_processor_cm();
 782  934    // enable ("weak") refs discovery
 783  935    rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 784  936    rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 785  937  
 786  938    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 787  939    // This is the start of  the marking cycle, we're expected all
 788  940    // threads to have SATB queues with active set to false.
 789  941    satb_mq_set.set_active_all_threads(true, /* new active value */
 790  942                                       false /* expected_active */);
 791  943  
 792  944    _root_regions.prepare_for_scan();
 793  945  
 794  946    // update_g1_committed() will be called at the end of an evac pause
 795  947    // when marking is on. So, it's also called at the end of the
 796  948    // initial-mark pause to update the heap end, if the heap expands
 797  949    // during it. No need to call it here.
 798  950  }
 799  951  
 800  952  /*
 801  953   * Notice that in the next two methods, we actually leave the STS
 802  954   * during the barrier sync and join it immediately afterwards. If we
 803  955   * do not do this, the following deadlock can occur: one thread could
 804  956   * be in the barrier sync code, waiting for the other thread to also
 805  957   * sync up, whereas another one could be trying to yield, while also
 806  958   * waiting for the other threads to sync up too.
 807  959   *
 808  960   * Note, however, that this code is also used during remark and in
 809  961   * this case we should not attempt to leave / enter the STS, otherwise
 810  962   * we'll either hit an asseert (debug / fastdebug) or deadlock
 811  963   * (product). So we should only leave / enter the STS if we are
 812  964   * operating concurrently.
 813  965   *
 814  966   * Because the thread that does the sync barrier has left the STS, it
 815  967   * is possible to be suspended for a Full GC or an evacuation pause
 816  968   * could occur. This is actually safe, since the entering the sync
 817  969   * barrier is one of the last things do_marking_step() does, and it
 818  970   * doesn't manipulate any data structures afterwards.
 819  971   */
 820  972  
 821  973  void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 822  974    if (verbose_low()) {
 823  975      gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 824  976    }
 825  977  
 826  978    if (concurrent()) {
 827  979      ConcurrentGCThread::stsLeave();
 828  980    }
 829  981    _first_overflow_barrier_sync.enter();
 830  982    if (concurrent()) {
 831  983      ConcurrentGCThread::stsJoin();
 832  984    }
 833  985    // at this point everyone should have synced up and not be doing any
 834  986    // more work
 835  987  
 836  988    if (verbose_low()) {
 837  989      gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 838  990    }
 839  991  
 840  992    // let the task associated with with worker 0 do this
 841  993    if (worker_id == 0) {
 842  994      // task 0 is responsible for clearing the global data structures
 843  995      // We should be here because of an overflow. During STW we should
 844  996      // not clear the overflow flag since we rely on it being true when
 845  997      // we exit this method to abort the pause and restart concurent
 846  998      // marking.
 847  999      clear_marking_state(concurrent() /* clear_overflow */);
 848 1000      force_overflow()->update();
 849 1001  
 850 1002      if (G1Log::fine()) {
 851 1003        gclog_or_tty->date_stamp(PrintGCDateStamps);
 852 1004        gclog_or_tty->stamp(PrintGCTimeStamps);
 853 1005        gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 854 1006      }
 855 1007    }
 856 1008  
 857 1009    // after this, each task should reset its own data structures then
 858 1010    // then go into the second barrier
 859 1011  }
 860 1012  
 861 1013  void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 862 1014    if (verbose_low()) {
 863 1015      gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
 864 1016    }
 865 1017  
 866 1018    if (concurrent()) {
 867 1019      ConcurrentGCThread::stsLeave();
 868 1020    }
 869 1021    _second_overflow_barrier_sync.enter();
 870 1022    if (concurrent()) {
 871 1023      ConcurrentGCThread::stsJoin();
 872 1024    }
 873 1025    // at this point everything should be re-initialised and ready to go
 874 1026  
 875 1027    if (verbose_low()) {
 876 1028      gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
 877 1029    }
 878 1030  }
 879 1031  
 880 1032  #ifndef PRODUCT
 881 1033  void ForceOverflowSettings::init() {
 882 1034    _num_remaining = G1ConcMarkForceOverflow;
 883 1035    _force = false;
 884 1036    update();
 885 1037  }
 886 1038  
 887 1039  void ForceOverflowSettings::update() {
 888 1040    if (_num_remaining > 0) {
 889 1041      _num_remaining -= 1;
 890 1042      _force = true;
 891 1043    } else {
 892 1044      _force = false;
 893 1045    }
 894 1046  }
 895 1047  
 896 1048  bool ForceOverflowSettings::should_force() {
 897 1049    if (_force) {
 898 1050      _force = false;
 899 1051      return true;
 900 1052    } else {
 901 1053      return false;
 902 1054    }
 903 1055  }
 904 1056  #endif // !PRODUCT
 905 1057  
 906 1058  class CMConcurrentMarkingTask: public AbstractGangTask {
 907 1059  private:
 908 1060    ConcurrentMark*       _cm;
 909 1061    ConcurrentMarkThread* _cmt;
 910 1062  
 911 1063  public:
 912 1064    void work(uint worker_id) {
 913 1065      assert(Thread::current()->is_ConcurrentGC_thread(),
 914 1066             "this should only be done by a conc GC thread");
 915 1067      ResourceMark rm;
 916 1068  
 917 1069      double start_vtime = os::elapsedVTime();
 918 1070  
 919 1071      ConcurrentGCThread::stsJoin();
 920 1072  
 921 1073      assert(worker_id < _cm->active_tasks(), "invariant");
 922 1074      CMTask* the_task = _cm->task(worker_id);
 923 1075      the_task->record_start_time();
 924 1076      if (!_cm->has_aborted()) {
 925 1077        do {
 926 1078          double start_vtime_sec = os::elapsedVTime();
 927 1079          double start_time_sec = os::elapsedTime();
 928 1080          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 929 1081  
 930 1082          the_task->do_marking_step(mark_step_duration_ms,
 931 1083                                    true /* do_stealing    */,
 932 1084                                    true /* do_termination */);
 933 1085  
 934 1086          double end_time_sec = os::elapsedTime();
 935 1087          double end_vtime_sec = os::elapsedVTime();
 936 1088          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 937 1089          double elapsed_time_sec = end_time_sec - start_time_sec;
 938 1090          _cm->clear_has_overflown();
 939 1091  
 940 1092          bool ret = _cm->do_yield_check(worker_id);
 941 1093  
 942 1094          jlong sleep_time_ms;
 943 1095          if (!_cm->has_aborted() && the_task->has_aborted()) {
 944 1096            sleep_time_ms =
 945 1097              (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 946 1098            ConcurrentGCThread::stsLeave();
 947 1099            os::sleep(Thread::current(), sleep_time_ms, false);
 948 1100            ConcurrentGCThread::stsJoin();
 949 1101          }
 950 1102          double end_time2_sec = os::elapsedTime();
 951 1103          double elapsed_time2_sec = end_time2_sec - start_time_sec;
 952 1104  
 953 1105  #if 0
 954 1106            gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 955 1107                                   "overhead %1.4lf",
 956 1108                                   elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 957 1109                                   the_task->conc_overhead(os::elapsedTime()) * 8.0);
 958 1110            gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 959 1111                                   elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 960 1112  #endif
 961 1113        } while (!_cm->has_aborted() && the_task->has_aborted());
 962 1114      }
 963 1115      the_task->record_end_time();
 964 1116      guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 965 1117  
 966 1118      ConcurrentGCThread::stsLeave();
 967 1119  
 968 1120      double end_vtime = os::elapsedVTime();
 969 1121      _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 970 1122    }
 971 1123  
 972 1124    CMConcurrentMarkingTask(ConcurrentMark* cm,
 973 1125                            ConcurrentMarkThread* cmt) :
 974 1126        AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 975 1127  
 976 1128    ~CMConcurrentMarkingTask() { }
 977 1129  };
 978 1130  
 979 1131  // Calculates the number of active workers for a concurrent
 980 1132  // phase.
 981 1133  uint ConcurrentMark::calc_parallel_marking_threads() {
 982 1134    if (G1CollectedHeap::use_parallel_gc_threads()) {
 983 1135      uint n_conc_workers = 0;
 984 1136      if (!UseDynamicNumberOfGCThreads ||
 985 1137          (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 986 1138           !ForceDynamicNumberOfGCThreads)) {
 987 1139        n_conc_workers = max_parallel_marking_threads();
 988 1140      } else {
 989 1141        n_conc_workers =
 990 1142          AdaptiveSizePolicy::calc_default_active_workers(
 991 1143                                       max_parallel_marking_threads(),
 992 1144                                       1, /* Minimum workers */
 993 1145                                       parallel_marking_threads(),
 994 1146                                       Threads::number_of_non_daemon_threads());
 995 1147        // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 996 1148        // that scaling has already gone into "_max_parallel_marking_threads".
 997 1149      }
 998 1150      assert(n_conc_workers > 0, "Always need at least 1");
 999 1151      return n_conc_workers;
1000 1152    }
1001 1153    // If we are not running with any parallel GC threads we will not
1002 1154    // have spawned any marking threads either. Hence the number of
1003 1155    // concurrent workers should be 0.
1004 1156    return 0;
1005 1157  }
1006 1158  
1007 1159  void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1008 1160    // Currently, only survivors can be root regions.
1009 1161    assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1010 1162    G1RootRegionScanClosure cl(_g1h, this, worker_id);
1011 1163  
1012 1164    const uintx interval = PrefetchScanIntervalInBytes;
1013 1165    HeapWord* curr = hr->bottom();
1014 1166    const HeapWord* end = hr->top();
1015 1167    while (curr < end) {
1016 1168      Prefetch::read(curr, interval);
1017 1169      oop obj = oop(curr);
1018 1170      int size = obj->oop_iterate(&cl);
1019 1171      assert(size == obj->size(), "sanity");
1020 1172      curr += size;
1021 1173    }
1022 1174  }
1023 1175  
1024 1176  class CMRootRegionScanTask : public AbstractGangTask {
1025 1177  private:
1026 1178    ConcurrentMark* _cm;
1027 1179  
1028 1180  public:
1029 1181    CMRootRegionScanTask(ConcurrentMark* cm) :
1030 1182      AbstractGangTask("Root Region Scan"), _cm(cm) { }
1031 1183  
1032 1184    void work(uint worker_id) {
1033 1185      assert(Thread::current()->is_ConcurrentGC_thread(),
1034 1186             "this should only be done by a conc GC thread");
1035 1187  
1036 1188      CMRootRegions* root_regions = _cm->root_regions();
1037 1189      HeapRegion* hr = root_regions->claim_next();
1038 1190      while (hr != NULL) {
1039 1191        _cm->scanRootRegion(hr, worker_id);
1040 1192        hr = root_regions->claim_next();
1041 1193      }
1042 1194    }
1043 1195  };
1044 1196  
1045 1197  void ConcurrentMark::scanRootRegions() {
1046 1198    // scan_in_progress() will have been set to true only if there was
1047 1199    // at least one root region to scan. So, if it's false, we
1048 1200    // should not attempt to do any further work.
1049 1201    if (root_regions()->scan_in_progress()) {
1050 1202      _parallel_marking_threads = calc_parallel_marking_threads();
1051 1203      assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1052 1204             "Maximum number of marking threads exceeded");
1053 1205      uint active_workers = MAX2(1U, parallel_marking_threads());
1054 1206  
1055 1207      CMRootRegionScanTask task(this);
1056 1208      if (parallel_marking_threads() > 0) {
1057 1209        _parallel_workers->set_active_workers((int) active_workers);
1058 1210        _parallel_workers->run_task(&task);
1059 1211      } else {
1060 1212        task.work(0);
1061 1213      }
1062 1214  
1063 1215      // It's possible that has_aborted() is true here without actually
1064 1216      // aborting the survivor scan earlier. This is OK as it's
1065 1217      // mainly used for sanity checking.
1066 1218      root_regions()->scan_finished();
1067 1219    }
1068 1220  }
1069 1221  
1070 1222  void ConcurrentMark::markFromRoots() {
1071 1223    // we might be tempted to assert that:
1072 1224    // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1073 1225    //        "inconsistent argument?");
1074 1226    // However that wouldn't be right, because it's possible that
1075 1227    // a safepoint is indeed in progress as a younger generation
1076 1228    // stop-the-world GC happens even as we mark in this generation.
1077 1229  
1078 1230    _restart_for_overflow = false;
1079 1231    force_overflow_conc()->init();
1080 1232  
1081 1233    // _g1h has _n_par_threads
1082 1234    _parallel_marking_threads = calc_parallel_marking_threads();
1083 1235    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1084 1236      "Maximum number of marking threads exceeded");
1085 1237  
1086 1238    uint active_workers = MAX2(1U, parallel_marking_threads());
1087 1239  
1088 1240    // Parallel task terminator is set in "set_phase()"
1089 1241    set_phase(active_workers, true /* concurrent */);
1090 1242  
1091 1243    CMConcurrentMarkingTask markingTask(this, cmThread());
1092 1244    if (parallel_marking_threads() > 0) {
1093 1245      _parallel_workers->set_active_workers((int)active_workers);
1094 1246      // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1095 1247      // and the decisions on that MT processing is made elsewhere.
1096 1248      assert(_parallel_workers->active_workers() > 0, "Should have been set");
1097 1249      _parallel_workers->run_task(&markingTask);
1098 1250    } else {
1099 1251      markingTask.work(0);
1100 1252    }
1101 1253    print_stats();
1102 1254  }
1103 1255  
1104 1256  void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1105 1257    // world is stopped at this checkpoint
1106 1258    assert(SafepointSynchronize::is_at_safepoint(),
1107 1259           "world should be stopped");
1108 1260  
1109 1261    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1110 1262  
1111 1263    // If a full collection has happened, we shouldn't do this.
1112 1264    if (has_aborted()) {
1113 1265      g1h->set_marking_complete(); // So bitmap clearing isn't confused
1114 1266      return;
1115 1267    }
1116 1268  
1117 1269    SvcGCMarker sgcm(SvcGCMarker::OTHER);
1118 1270  
1119 1271    if (VerifyDuringGC) {
1120 1272      HandleMark hm;  // handle scope
1121 1273      gclog_or_tty->print(" VerifyDuringGC:(before)");
1122 1274      Universe::heap()->prepare_for_verify();
1123 1275      Universe::verify(/* silent      */ false,
1124 1276                       /* option      */ VerifyOption_G1UsePrevMarking);
1125 1277    }
1126 1278  
1127 1279    G1CollectorPolicy* g1p = g1h->g1_policy();
1128 1280    g1p->record_concurrent_mark_remark_start();
1129 1281  
1130 1282    double start = os::elapsedTime();
1131 1283  
1132 1284    checkpointRootsFinalWork();
1133 1285  
1134 1286    double mark_work_end = os::elapsedTime();
1135 1287  
1136 1288    weakRefsWork(clear_all_soft_refs);
1137 1289  
1138 1290    if (has_overflown()) {
1139 1291      // Oops.  We overflowed.  Restart concurrent marking.
1140 1292      _restart_for_overflow = true;
1141 1293      // Clear the flag. We do not need it any more.
1142 1294      clear_has_overflown();
1143 1295      if (G1TraceMarkStackOverflow) {
1144 1296        gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1145 1297      }
1146 1298    } else {
1147 1299      // Aggregate the per-task counting data that we have accumulated
1148 1300      // while marking.
1149 1301      aggregate_count_data();
1150 1302  
1151 1303      SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1152 1304      // We're done with marking.
1153 1305      // This is the end of  the marking cycle, we're expected all
1154 1306      // threads to have SATB queues with active set to true.
1155 1307      satb_mq_set.set_active_all_threads(false, /* new active value */
1156 1308                                         true /* expected_active */);
1157 1309

↓ open down ↓

552 lines elided

↑ open up ↑

1158 1310      if (VerifyDuringGC) {
1159 1311        HandleMark hm;  // handle scope
1160 1312        gclog_or_tty->print(" VerifyDuringGC:(after)");
1161 1313        Universe::heap()->prepare_for_verify();
1162 1314        Universe::verify(/* silent      */ false,
1163 1315                         /* option      */ VerifyOption_G1UseNextMarking);
1164 1316      }
1165 1317      assert(!restart_for_overflow(), "sanity");
1166 1318    }
1167 1319  
     1320 +  // Expand the marking stack, if we have to and if we can.
     1321 +  if (_markStack.should_expand()) {
     1322 +    _markStack.expand();
     1323 +  }
     1324 +
1168 1325    // Reset the marking state if marking completed
1169 1326    if (!restart_for_overflow()) {
1170 1327      set_non_marking_state();
1171 1328    }
1172 1329  
1173 1330  #if VERIFY_OBJS_PROCESSED
1174 1331    _scan_obj_cl.objs_processed = 0;
1175 1332    ThreadLocalObjQueue::objs_enqueued = 0;
1176 1333  #endif
1177 1334

1178 1335    // Statistics
1179 1336    double now = os::elapsedTime();
1180 1337    _remark_mark_times.add((mark_work_end - start) * 1000.0);
1181 1338    _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182 1339    _remark_times.add((now - start) * 1000.0);
1183 1340  
1184 1341    g1p->record_concurrent_mark_remark_end();
1185 1342  }
1186 1343  
1187 1344  // Base class of the closures that finalize and verify the
1188 1345  // liveness counting data.
1189 1346  class CMCountDataClosureBase: public HeapRegionClosure {
1190 1347  protected:
1191 1348    G1CollectedHeap* _g1h;
1192 1349    ConcurrentMark* _cm;
1193 1350    CardTableModRefBS* _ct_bs;
1194 1351  
1195 1352    BitMap* _region_bm;
1196 1353    BitMap* _card_bm;
1197 1354  
1198 1355    // Takes a region that's not empty (i.e., it has at least one
1199 1356    // live object in it and sets its corresponding bit on the region
1200 1357    // bitmap to 1. If the region is "starts humongous" it will also set
1201 1358    // to 1 the bits on the region bitmap that correspond to its
1202 1359    // associated "continues humongous" regions.
1203 1360    void set_bit_for_region(HeapRegion* hr) {
1204 1361      assert(!hr->continuesHumongous(), "should have filtered those out");
1205 1362  
1206 1363      BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1207 1364      if (!hr->startsHumongous()) {
1208 1365        // Normal (non-humongous) case: just set the bit.
1209 1366        _region_bm->par_at_put(index, true);
1210 1367      } else {
1211 1368        // Starts humongous case: calculate how many regions are part of
1212 1369        // this humongous region and then set the bit range.
1213 1370        BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1214 1371        _region_bm->par_at_put_range(index, end_index, true);
1215 1372      }
1216 1373    }
1217 1374  
1218 1375  public:
1219 1376    CMCountDataClosureBase(G1CollectedHeap* g1h,
1220 1377                           BitMap* region_bm, BitMap* card_bm):
1221 1378      _g1h(g1h), _cm(g1h->concurrent_mark()),
1222 1379      _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1223 1380      _region_bm(region_bm), _card_bm(card_bm) { }
1224 1381  };
1225 1382  
1226 1383  // Closure that calculates the # live objects per region. Used
1227 1384  // for verification purposes during the cleanup pause.
1228 1385  class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1229 1386    CMBitMapRO* _bm;
1230 1387    size_t _region_marked_bytes;
1231 1388  
1232 1389  public:
1233 1390    CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1234 1391                           BitMap* region_bm, BitMap* card_bm) :
1235 1392      CMCountDataClosureBase(g1h, region_bm, card_bm),
1236 1393      _bm(bm), _region_marked_bytes(0) { }
1237 1394  
1238 1395    bool doHeapRegion(HeapRegion* hr) {
1239 1396  
1240 1397      if (hr->continuesHumongous()) {
1241 1398        // We will ignore these here and process them when their
1242 1399        // associated "starts humongous" region is processed (see
1243 1400        // set_bit_for_heap_region()). Note that we cannot rely on their
1244 1401        // associated "starts humongous" region to have their bit set to
1245 1402        // 1 since, due to the region chunking in the parallel region
1246 1403        // iteration, a "continues humongous" region might be visited
1247 1404        // before its associated "starts humongous".
1248 1405        return false;
1249 1406      }
1250 1407  
1251 1408      HeapWord* ntams = hr->next_top_at_mark_start();
1252 1409      HeapWord* start = hr->bottom();
1253 1410  
1254 1411      assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1255 1412             err_msg("Preconditions not met - "
1256 1413                     "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1257 1414                     start, ntams, hr->end()));
1258 1415  
1259 1416      // Find the first marked object at or after "start".
1260 1417      start = _bm->getNextMarkedWordAddress(start, ntams);
1261 1418  
1262 1419      size_t marked_bytes = 0;
1263 1420  
1264 1421      while (start < ntams) {
1265 1422        oop obj = oop(start);
1266 1423        int obj_sz = obj->size();
1267 1424        HeapWord* obj_end = start + obj_sz;
1268 1425  
1269 1426        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1270 1427        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1271 1428  
1272 1429        // Note: if we're looking at the last region in heap - obj_end
1273 1430        // could be actually just beyond the end of the heap; end_idx
1274 1431        // will then correspond to a (non-existent) card that is also
1275 1432        // just beyond the heap.
1276 1433        if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1277 1434          // end of object is not card aligned - increment to cover
1278 1435          // all the cards spanned by the object
1279 1436          end_idx += 1;
1280 1437        }
1281 1438  
1282 1439        // Set the bits in the card BM for the cards spanned by this object.
1283 1440        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1284 1441  
1285 1442        // Add the size of this object to the number of marked bytes.
1286 1443        marked_bytes += (size_t)obj_sz * HeapWordSize;
1287 1444  
1288 1445        // Find the next marked object after this one.
1289 1446        start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1290 1447      }
1291 1448  
1292 1449      // Mark the allocated-since-marking portion...
1293 1450      HeapWord* top = hr->top();
1294 1451      if (ntams < top) {
1295 1452        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1296 1453        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1297 1454  
1298 1455        // Note: if we're looking at the last region in heap - top
1299 1456        // could be actually just beyond the end of the heap; end_idx
1300 1457        // will then correspond to a (non-existent) card that is also
1301 1458        // just beyond the heap.
1302 1459        if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1303 1460          // end of object is not card aligned - increment to cover
1304 1461          // all the cards spanned by the object
1305 1462          end_idx += 1;
1306 1463        }
1307 1464        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1308 1465  
1309 1466        // This definitely means the region has live objects.
1310 1467        set_bit_for_region(hr);
1311 1468      }
1312 1469  
1313 1470      // Update the live region bitmap.
1314 1471      if (marked_bytes > 0) {
1315 1472        set_bit_for_region(hr);
1316 1473      }
1317 1474  
1318 1475      // Set the marked bytes for the current region so that
1319 1476      // it can be queried by a calling verificiation routine
1320 1477      _region_marked_bytes = marked_bytes;
1321 1478  
1322 1479      return false;
1323 1480    }
1324 1481  
1325 1482    size_t region_marked_bytes() const { return _region_marked_bytes; }
1326 1483  };
1327 1484  
1328 1485  // Heap region closure used for verifying the counting data
1329 1486  // that was accumulated concurrently and aggregated during
1330 1487  // the remark pause. This closure is applied to the heap
1331 1488  // regions during the STW cleanup pause.
1332 1489  
1333 1490  class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1334 1491    G1CollectedHeap* _g1h;
1335 1492    ConcurrentMark* _cm;
1336 1493    CalcLiveObjectsClosure _calc_cl;
1337 1494    BitMap* _region_bm;   // Region BM to be verified
1338 1495    BitMap* _card_bm;     // Card BM to be verified
1339 1496    bool _verbose;        // verbose output?
1340 1497  
1341 1498    BitMap* _exp_region_bm; // Expected Region BM values
1342 1499    BitMap* _exp_card_bm;   // Expected card BM values
1343 1500  
1344 1501    int _failures;
1345 1502  
1346 1503  public:
1347 1504    VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1348 1505                                  BitMap* region_bm,
1349 1506                                  BitMap* card_bm,
1350 1507                                  BitMap* exp_region_bm,
1351 1508                                  BitMap* exp_card_bm,
1352 1509                                  bool verbose) :
1353 1510      _g1h(g1h), _cm(g1h->concurrent_mark()),
1354 1511      _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1355 1512      _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1356 1513      _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1357 1514      _failures(0) { }
1358 1515  
1359 1516    int failures() const { return _failures; }
1360 1517  
1361 1518    bool doHeapRegion(HeapRegion* hr) {
1362 1519      if (hr->continuesHumongous()) {
1363 1520        // We will ignore these here and process them when their
1364 1521        // associated "starts humongous" region is processed (see
1365 1522        // set_bit_for_heap_region()). Note that we cannot rely on their
1366 1523        // associated "starts humongous" region to have their bit set to
1367 1524        // 1 since, due to the region chunking in the parallel region
1368 1525        // iteration, a "continues humongous" region might be visited
1369 1526        // before its associated "starts humongous".
1370 1527        return false;
1371 1528      }
1372 1529  
1373 1530      int failures = 0;
1374 1531  
1375 1532      // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1376 1533      // this region and set the corresponding bits in the expected region
1377 1534      // and card bitmaps.
1378 1535      bool res = _calc_cl.doHeapRegion(hr);
1379 1536      assert(res == false, "should be continuing");
1380 1537  
1381 1538      MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1382 1539                      Mutex::_no_safepoint_check_flag);
1383 1540  
1384 1541      // Verify the marked bytes for this region.
1385 1542      size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1386 1543      size_t act_marked_bytes = hr->next_marked_bytes();
1387 1544  
1388 1545      // We're not OK if expected marked bytes > actual marked bytes. It means
1389 1546      // we have missed accounting some objects during the actual marking.
1390 1547      if (exp_marked_bytes > act_marked_bytes) {
1391 1548        if (_verbose) {
1392 1549          gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1393 1550                                 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1394 1551                                 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1395 1552        }
1396 1553        failures += 1;
1397 1554      }
1398 1555  
1399 1556      // Verify the bit, for this region, in the actual and expected
1400 1557      // (which was just calculated) region bit maps.
1401 1558      // We're not OK if the bit in the calculated expected region
1402 1559      // bitmap is set and the bit in the actual region bitmap is not.
1403 1560      BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1404 1561  
1405 1562      bool expected = _exp_region_bm->at(index);
1406 1563      bool actual = _region_bm->at(index);
1407 1564      if (expected && !actual) {
1408 1565        if (_verbose) {
1409 1566          gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1410 1567                                 "expected: %s, actual: %s",
1411 1568                                 hr->hrs_index(),
1412 1569                                 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1413 1570        }
1414 1571        failures += 1;
1415 1572      }
1416 1573  
1417 1574      // Verify that the card bit maps for the cards spanned by the current
1418 1575      // region match. We have an error if we have a set bit in the expected
1419 1576      // bit map and the corresponding bit in the actual bitmap is not set.
1420 1577  
1421 1578      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1422 1579      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1423 1580  
1424 1581      for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1425 1582        expected = _exp_card_bm->at(i);
1426 1583        actual = _card_bm->at(i);
1427 1584  
1428 1585        if (expected && !actual) {
1429 1586          if (_verbose) {
1430 1587            gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1431 1588                                   "expected: %s, actual: %s",
1432 1589                                   hr->hrs_index(), i,
1433 1590                                   BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1434 1591          }
1435 1592          failures += 1;
1436 1593        }
1437 1594      }
1438 1595  
1439 1596      if (failures > 0 && _verbose)  {
1440 1597        gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1441 1598                               "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1442 1599                               HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1443 1600                               _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1444 1601      }
1445 1602  
1446 1603      _failures += failures;
1447 1604  
1448 1605      // We could stop iteration over the heap when we
1449 1606      // find the first violating region by returning true.
1450 1607      return false;
1451 1608    }
1452 1609  };
1453 1610  
1454 1611  
1455 1612  class G1ParVerifyFinalCountTask: public AbstractGangTask {
1456 1613  protected:
1457 1614    G1CollectedHeap* _g1h;
1458 1615    ConcurrentMark* _cm;
1459 1616    BitMap* _actual_region_bm;
1460 1617    BitMap* _actual_card_bm;
1461 1618  
1462 1619    uint    _n_workers;
1463 1620  
1464 1621    BitMap* _expected_region_bm;
1465 1622    BitMap* _expected_card_bm;
1466 1623  
1467 1624    int  _failures;
1468 1625    bool _verbose;
1469 1626  
1470 1627  public:
1471 1628    G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1472 1629                              BitMap* region_bm, BitMap* card_bm,
1473 1630                              BitMap* expected_region_bm, BitMap* expected_card_bm)
1474 1631      : AbstractGangTask("G1 verify final counting"),
1475 1632        _g1h(g1h), _cm(_g1h->concurrent_mark()),
1476 1633        _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1477 1634        _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1478 1635        _failures(0), _verbose(false),
1479 1636        _n_workers(0) {
1480 1637      assert(VerifyDuringGC, "don't call this otherwise");
1481 1638  
1482 1639      // Use the value already set as the number of active threads
1483 1640      // in the call to run_task().
1484 1641      if (G1CollectedHeap::use_parallel_gc_threads()) {
1485 1642        assert( _g1h->workers()->active_workers() > 0,
1486 1643          "Should have been previously set");
1487 1644        _n_workers = _g1h->workers()->active_workers();
1488 1645      } else {
1489 1646        _n_workers = 1;
1490 1647      }
1491 1648  
1492 1649      assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1493 1650      assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1494 1651  
1495 1652      _verbose = _cm->verbose_medium();
1496 1653    }
1497 1654  
1498 1655    void work(uint worker_id) {
1499 1656      assert(worker_id < _n_workers, "invariant");
1500 1657  
1501 1658      VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1502 1659                                              _actual_region_bm, _actual_card_bm,
1503 1660                                              _expected_region_bm,
1504 1661                                              _expected_card_bm,
1505 1662                                              _verbose);
1506 1663  
1507 1664      if (G1CollectedHeap::use_parallel_gc_threads()) {
1508 1665        _g1h->heap_region_par_iterate_chunked(&verify_cl,
1509 1666                                              worker_id,
1510 1667                                              _n_workers,
1511 1668                                              HeapRegion::VerifyCountClaimValue);
1512 1669      } else {
1513 1670        _g1h->heap_region_iterate(&verify_cl);
1514 1671      }
1515 1672  
1516 1673      Atomic::add(verify_cl.failures(), &_failures);
1517 1674    }
1518 1675  
1519 1676    int failures() const { return _failures; }
1520 1677  };
1521 1678  
1522 1679  // Closure that finalizes the liveness counting data.
1523 1680  // Used during the cleanup pause.
1524 1681  // Sets the bits corresponding to the interval [NTAMS, top]
1525 1682  // (which contains the implicitly live objects) in the
1526 1683  // card liveness bitmap. Also sets the bit for each region,
1527 1684  // containing live data, in the region liveness bitmap.
1528 1685  
1529 1686  class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1530 1687   public:
1531 1688    FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1532 1689                                BitMap* region_bm,
1533 1690                                BitMap* card_bm) :
1534 1691      CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1535 1692  
1536 1693    bool doHeapRegion(HeapRegion* hr) {
1537 1694  
1538 1695      if (hr->continuesHumongous()) {
1539 1696        // We will ignore these here and process them when their
1540 1697        // associated "starts humongous" region is processed (see
1541 1698        // set_bit_for_heap_region()). Note that we cannot rely on their
1542 1699        // associated "starts humongous" region to have their bit set to
1543 1700        // 1 since, due to the region chunking in the parallel region
1544 1701        // iteration, a "continues humongous" region might be visited
1545 1702        // before its associated "starts humongous".
1546 1703        return false;
1547 1704      }
1548 1705  
1549 1706      HeapWord* ntams = hr->next_top_at_mark_start();
1550 1707      HeapWord* top   = hr->top();
1551 1708  
1552 1709      assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1553 1710  
1554 1711      // Mark the allocated-since-marking portion...
1555 1712      if (ntams < top) {
1556 1713        // This definitely means the region has live objects.
1557 1714        set_bit_for_region(hr);
1558 1715  
1559 1716        // Now set the bits in the card bitmap for [ntams, top)
1560 1717        BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1561 1718        BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1562 1719  
1563 1720        // Note: if we're looking at the last region in heap - top
1564 1721        // could be actually just beyond the end of the heap; end_idx
1565 1722        // will then correspond to a (non-existent) card that is also
1566 1723        // just beyond the heap.
1567 1724        if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1568 1725          // end of object is not card aligned - increment to cover
1569 1726          // all the cards spanned by the object
1570 1727          end_idx += 1;
1571 1728        }
1572 1729  
1573 1730        assert(end_idx <= _card_bm->size(),
1574 1731               err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1575 1732                       end_idx, _card_bm->size()));
1576 1733        assert(start_idx < _card_bm->size(),
1577 1734               err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1578 1735                       start_idx, _card_bm->size()));
1579 1736  
1580 1737        _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1581 1738      }
1582 1739  
1583 1740      // Set the bit for the region if it contains live data
1584 1741      if (hr->next_marked_bytes() > 0) {
1585 1742        set_bit_for_region(hr);
1586 1743      }
1587 1744  
1588 1745      return false;
1589 1746    }
1590 1747  };
1591 1748  
1592 1749  class G1ParFinalCountTask: public AbstractGangTask {
1593 1750  protected:
1594 1751    G1CollectedHeap* _g1h;
1595 1752    ConcurrentMark* _cm;
1596 1753    BitMap* _actual_region_bm;
1597 1754    BitMap* _actual_card_bm;
1598 1755  
1599 1756    uint    _n_workers;
1600 1757  
1601 1758  public:
1602 1759    G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1603 1760      : AbstractGangTask("G1 final counting"),
1604 1761        _g1h(g1h), _cm(_g1h->concurrent_mark()),
1605 1762        _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1606 1763        _n_workers(0) {
1607 1764      // Use the value already set as the number of active threads
1608 1765      // in the call to run_task().
1609 1766      if (G1CollectedHeap::use_parallel_gc_threads()) {
1610 1767        assert( _g1h->workers()->active_workers() > 0,
1611 1768          "Should have been previously set");
1612 1769        _n_workers = _g1h->workers()->active_workers();
1613 1770      } else {
1614 1771        _n_workers = 1;
1615 1772      }
1616 1773    }
1617 1774  
1618 1775    void work(uint worker_id) {
1619 1776      assert(worker_id < _n_workers, "invariant");
1620 1777  
1621 1778      FinalCountDataUpdateClosure final_update_cl(_g1h,
1622 1779                                                  _actual_region_bm,
1623 1780                                                  _actual_card_bm);
1624 1781  
1625 1782      if (G1CollectedHeap::use_parallel_gc_threads()) {
1626 1783        _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1627 1784                                              worker_id,
1628 1785                                              _n_workers,
1629 1786                                              HeapRegion::FinalCountClaimValue);
1630 1787      } else {
1631 1788        _g1h->heap_region_iterate(&final_update_cl);
1632 1789      }
1633 1790    }
1634 1791  };
1635 1792  
1636 1793  class G1ParNoteEndTask;
1637 1794  
1638 1795  class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1639 1796    G1CollectedHeap* _g1;
1640 1797    int _worker_num;
1641 1798    size_t _max_live_bytes;
1642 1799    uint _regions_claimed;
1643 1800    size_t _freed_bytes;
1644 1801    FreeRegionList* _local_cleanup_list;
1645 1802    OldRegionSet* _old_proxy_set;
1646 1803    HumongousRegionSet* _humongous_proxy_set;
1647 1804    HRRSCleanupTask* _hrrs_cleanup_task;
1648 1805    double _claimed_region_time;
1649 1806    double _max_region_time;
1650 1807  
1651 1808  public:
1652 1809    G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1653 1810                               int worker_num,
1654 1811                               FreeRegionList* local_cleanup_list,
1655 1812                               OldRegionSet* old_proxy_set,
1656 1813                               HumongousRegionSet* humongous_proxy_set,
1657 1814                               HRRSCleanupTask* hrrs_cleanup_task) :
1658 1815      _g1(g1), _worker_num(worker_num),
1659 1816      _max_live_bytes(0), _regions_claimed(0),
1660 1817      _freed_bytes(0),
1661 1818      _claimed_region_time(0.0), _max_region_time(0.0),
1662 1819      _local_cleanup_list(local_cleanup_list),
1663 1820      _old_proxy_set(old_proxy_set),
1664 1821      _humongous_proxy_set(humongous_proxy_set),
1665 1822      _hrrs_cleanup_task(hrrs_cleanup_task) { }
1666 1823  
1667 1824    size_t freed_bytes() { return _freed_bytes; }
1668 1825  
1669 1826    bool doHeapRegion(HeapRegion *hr) {
1670 1827      if (hr->continuesHumongous()) {
1671 1828        return false;
1672 1829      }
1673 1830      // We use a claim value of zero here because all regions
1674 1831      // were claimed with value 1 in the FinalCount task.
1675 1832      _g1->reset_gc_time_stamps(hr);
1676 1833      double start = os::elapsedTime();
1677 1834      _regions_claimed++;
1678 1835      hr->note_end_of_marking();
1679 1836      _max_live_bytes += hr->max_live_bytes();
1680 1837      _g1->free_region_if_empty(hr,
1681 1838                                &_freed_bytes,
1682 1839                                _local_cleanup_list,
1683 1840                                _old_proxy_set,
1684 1841                                _humongous_proxy_set,
1685 1842                                _hrrs_cleanup_task,
1686 1843                                true /* par */);
1687 1844      double region_time = (os::elapsedTime() - start);
1688 1845      _claimed_region_time += region_time;
1689 1846      if (region_time > _max_region_time) {
1690 1847        _max_region_time = region_time;
1691 1848      }
1692 1849      return false;
1693 1850    }
1694 1851  
1695 1852    size_t max_live_bytes() { return _max_live_bytes; }
1696 1853    uint regions_claimed() { return _regions_claimed; }
1697 1854    double claimed_region_time_sec() { return _claimed_region_time; }
1698 1855    double max_region_time_sec() { return _max_region_time; }
1699 1856  };
1700 1857  
1701 1858  class G1ParNoteEndTask: public AbstractGangTask {
1702 1859    friend class G1NoteEndOfConcMarkClosure;
1703 1860  
1704 1861  protected:
1705 1862    G1CollectedHeap* _g1h;
1706 1863    size_t _max_live_bytes;
1707 1864    size_t _freed_bytes;
1708 1865    FreeRegionList* _cleanup_list;
1709 1866  
1710 1867  public:
1711 1868    G1ParNoteEndTask(G1CollectedHeap* g1h,
1712 1869                     FreeRegionList* cleanup_list) :
1713 1870      AbstractGangTask("G1 note end"), _g1h(g1h),
1714 1871      _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1715 1872  
1716 1873    void work(uint worker_id) {
1717 1874      double start = os::elapsedTime();
1718 1875      FreeRegionList local_cleanup_list("Local Cleanup List");
1719 1876      OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1720 1877      HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1721 1878      HRRSCleanupTask hrrs_cleanup_task;
1722 1879      G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1723 1880                                             &old_proxy_set,
1724 1881                                             &humongous_proxy_set,
1725 1882                                             &hrrs_cleanup_task);
1726 1883      if (G1CollectedHeap::use_parallel_gc_threads()) {
1727 1884        _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1728 1885                                              _g1h->workers()->active_workers(),
1729 1886                                              HeapRegion::NoteEndClaimValue);
1730 1887      } else {
1731 1888        _g1h->heap_region_iterate(&g1_note_end);
1732 1889      }
1733 1890      assert(g1_note_end.complete(), "Shouldn't have yielded!");
1734 1891  
1735 1892      // Now update the lists
1736 1893      _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1737 1894                                              NULL /* free_list */,
1738 1895                                              &old_proxy_set,
1739 1896                                              &humongous_proxy_set,
1740 1897                                              true /* par */);
1741 1898      {
1742 1899        MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1743 1900        _max_live_bytes += g1_note_end.max_live_bytes();
1744 1901        _freed_bytes += g1_note_end.freed_bytes();
1745 1902  
1746 1903        // If we iterate over the global cleanup list at the end of
1747 1904        // cleanup to do this printing we will not guarantee to only
1748 1905        // generate output for the newly-reclaimed regions (the list
1749 1906        // might not be empty at the beginning of cleanup; we might
1750 1907        // still be working on its previous contents). So we do the
1751 1908        // printing here, before we append the new regions to the global
1752 1909        // cleanup list.
1753 1910  
1754 1911        G1HRPrinter* hr_printer = _g1h->hr_printer();
1755 1912        if (hr_printer->is_active()) {
1756 1913          HeapRegionLinkedListIterator iter(&local_cleanup_list);
1757 1914          while (iter.more_available()) {
1758 1915            HeapRegion* hr = iter.get_next();
1759 1916            hr_printer->cleanup(hr);
1760 1917          }
1761 1918        }
1762 1919  
1763 1920        _cleanup_list->add_as_tail(&local_cleanup_list);
1764 1921        assert(local_cleanup_list.is_empty(), "post-condition");
1765 1922  
1766 1923        HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1767 1924      }
1768 1925    }
1769 1926    size_t max_live_bytes() { return _max_live_bytes; }
1770 1927    size_t freed_bytes() { return _freed_bytes; }
1771 1928  };
1772 1929  
1773 1930  class G1ParScrubRemSetTask: public AbstractGangTask {
1774 1931  protected:
1775 1932    G1RemSet* _g1rs;
1776 1933    BitMap* _region_bm;
1777 1934    BitMap* _card_bm;
1778 1935  public:
1779 1936    G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1780 1937                         BitMap* region_bm, BitMap* card_bm) :
1781 1938      AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1782 1939      _region_bm(region_bm), _card_bm(card_bm) { }
1783 1940  
1784 1941    void work(uint worker_id) {
1785 1942      if (G1CollectedHeap::use_parallel_gc_threads()) {
1786 1943        _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1787 1944                         HeapRegion::ScrubRemSetClaimValue);
1788 1945      } else {
1789 1946        _g1rs->scrub(_region_bm, _card_bm);
1790 1947      }
1791 1948    }
1792 1949  
1793 1950  };
1794 1951  
1795 1952  void ConcurrentMark::cleanup() {
1796 1953    // world is stopped at this checkpoint
1797 1954    assert(SafepointSynchronize::is_at_safepoint(),
1798 1955           "world should be stopped");
1799 1956    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1800 1957  
1801 1958    // If a full collection has happened, we shouldn't do this.
1802 1959    if (has_aborted()) {
1803 1960      g1h->set_marking_complete(); // So bitmap clearing isn't confused
1804 1961      return;
1805 1962    }
1806 1963  
1807 1964    HRSPhaseSetter x(HRSPhaseCleanup);
1808 1965    g1h->verify_region_sets_optional();
1809 1966  
1810 1967    if (VerifyDuringGC) {
1811 1968      HandleMark hm;  // handle scope
1812 1969      gclog_or_tty->print(" VerifyDuringGC:(before)");
1813 1970      Universe::heap()->prepare_for_verify();
1814 1971      Universe::verify(/* silent      */ false,
1815 1972                       /* option      */ VerifyOption_G1UsePrevMarking);
1816 1973    }
1817 1974  
1818 1975    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1819 1976    g1p->record_concurrent_mark_cleanup_start();
1820 1977  
1821 1978    double start = os::elapsedTime();
1822 1979  
1823 1980    HeapRegionRemSet::reset_for_cleanup_tasks();
1824 1981  
1825 1982    uint n_workers;
1826 1983  
1827 1984    // Do counting once more with the world stopped for good measure.
1828 1985    G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1829 1986  
1830 1987    if (G1CollectedHeap::use_parallel_gc_threads()) {
1831 1988     assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1832 1989             "sanity check");
1833 1990  
1834 1991      g1h->set_par_threads();
1835 1992      n_workers = g1h->n_par_threads();
1836 1993      assert(g1h->n_par_threads() == n_workers,
1837 1994             "Should not have been reset");
1838 1995      g1h->workers()->run_task(&g1_par_count_task);
1839 1996      // Done with the parallel phase so reset to 0.
1840 1997      g1h->set_par_threads(0);
1841 1998  
1842 1999      assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1843 2000             "sanity check");
1844 2001    } else {
1845 2002      n_workers = 1;
1846 2003      g1_par_count_task.work(0);
1847 2004    }
1848 2005  
1849 2006    if (VerifyDuringGC) {
1850 2007      // Verify that the counting data accumulated during marking matches
1851 2008      // that calculated by walking the marking bitmap.
1852 2009  
1853 2010      // Bitmaps to hold expected values
1854 2011      BitMap expected_region_bm(_region_bm.size(), false);
1855 2012      BitMap expected_card_bm(_card_bm.size(), false);
1856 2013  
1857 2014      G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1858 2015                                                   &_region_bm,
1859 2016                                                   &_card_bm,
1860 2017                                                   &expected_region_bm,
1861 2018                                                   &expected_card_bm);
1862 2019  
1863 2020      if (G1CollectedHeap::use_parallel_gc_threads()) {
1864 2021        g1h->set_par_threads((int)n_workers);
1865 2022        g1h->workers()->run_task(&g1_par_verify_task);
1866 2023        // Done with the parallel phase so reset to 0.
1867 2024        g1h->set_par_threads(0);
1868 2025  
1869 2026        assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1870 2027               "sanity check");
1871 2028      } else {
1872 2029        g1_par_verify_task.work(0);
1873 2030      }
1874 2031  
1875 2032      guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1876 2033    }
1877 2034  
1878 2035    size_t start_used_bytes = g1h->used();
1879 2036    g1h->set_marking_complete();
1880 2037  
1881 2038    double count_end = os::elapsedTime();
1882 2039    double this_final_counting_time = (count_end - start);
1883 2040    _total_counting_time += this_final_counting_time;
1884 2041  
1885 2042    if (G1PrintRegionLivenessInfo) {
1886 2043      G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1887 2044      _g1h->heap_region_iterate(&cl);
1888 2045    }
1889 2046  
1890 2047    // Install newly created mark bitMap as "prev".
1891 2048    swapMarkBitMaps();
1892 2049  
1893 2050    g1h->reset_gc_time_stamp();
1894 2051  
1895 2052    // Note end of marking in all heap regions.
1896 2053    G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1897 2054    if (G1CollectedHeap::use_parallel_gc_threads()) {
1898 2055      g1h->set_par_threads((int)n_workers);
1899 2056      g1h->workers()->run_task(&g1_par_note_end_task);
1900 2057      g1h->set_par_threads(0);
1901 2058  
1902 2059      assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1903 2060             "sanity check");
1904 2061    } else {
1905 2062      g1_par_note_end_task.work(0);
1906 2063    }
1907 2064    g1h->check_gc_time_stamps();
1908 2065  
1909 2066    if (!cleanup_list_is_empty()) {
1910 2067      // The cleanup list is not empty, so we'll have to process it
1911 2068      // concurrently. Notify anyone else that might be wanting free
1912 2069      // regions that there will be more free regions coming soon.
1913 2070      g1h->set_free_regions_coming();
1914 2071    }
1915 2072  
1916 2073    // call below, since it affects the metric by which we sort the heap
1917 2074    // regions.
1918 2075    if (G1ScrubRemSets) {
1919 2076      double rs_scrub_start = os::elapsedTime();
1920 2077      G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1921 2078      if (G1CollectedHeap::use_parallel_gc_threads()) {
1922 2079        g1h->set_par_threads((int)n_workers);
1923 2080        g1h->workers()->run_task(&g1_par_scrub_rs_task);
1924 2081        g1h->set_par_threads(0);
1925 2082  
1926 2083        assert(g1h->check_heap_region_claim_values(
1927 2084                                              HeapRegion::ScrubRemSetClaimValue),
1928 2085               "sanity check");
1929 2086      } else {
1930 2087        g1_par_scrub_rs_task.work(0);
1931 2088      }
1932 2089  
1933 2090      double rs_scrub_end = os::elapsedTime();
1934 2091      double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1935 2092      _total_rs_scrub_time += this_rs_scrub_time;
1936 2093    }
1937 2094  
1938 2095    // this will also free any regions totally full of garbage objects,
1939 2096    // and sort the regions.
1940 2097    g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1941 2098  
1942 2099    // Statistics.
1943 2100    double end = os::elapsedTime();
1944 2101    _cleanup_times.add((end - start) * 1000.0);
1945 2102  
1946 2103    if (G1Log::fine()) {
1947 2104      g1h->print_size_transition(gclog_or_tty,
1948 2105                                 start_used_bytes,
1949 2106                                 g1h->used(),
1950 2107                                 g1h->capacity());
1951 2108    }
1952 2109  
1953 2110    // Clean up will have freed any regions completely full of garbage.
1954 2111    // Update the soft reference policy with the new heap occupancy.
1955 2112    Universe::update_heap_info_at_gc();
1956 2113  
1957 2114    // We need to make this be a "collection" so any collection pause that
1958 2115    // races with it goes around and waits for completeCleanup to finish.
1959 2116    g1h->increment_total_collections();
1960 2117  
1961 2118    // We reclaimed old regions so we should calculate the sizes to make
1962 2119    // sure we update the old gen/space data.
1963 2120    g1h->g1mm()->update_sizes();
1964 2121  
1965 2122    if (VerifyDuringGC) {
1966 2123      HandleMark hm;  // handle scope
1967 2124      gclog_or_tty->print(" VerifyDuringGC:(after)");
1968 2125      Universe::heap()->prepare_for_verify();
1969 2126      Universe::verify(/* silent      */ false,
1970 2127                       /* option      */ VerifyOption_G1UsePrevMarking);
1971 2128    }
1972 2129  
1973 2130    g1h->verify_region_sets_optional();
1974 2131  }
1975 2132  
1976 2133  void ConcurrentMark::completeCleanup() {
1977 2134    if (has_aborted()) return;
1978 2135  
1979 2136    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1980 2137  
1981 2138    _cleanup_list.verify_optional();
1982 2139    FreeRegionList tmp_free_list("Tmp Free List");
1983 2140  
1984 2141    if (G1ConcRegionFreeingVerbose) {
1985 2142      gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1986 2143                             "cleanup list has %u entries",
1987 2144                             _cleanup_list.length());
1988 2145    }
1989 2146  
1990 2147    // Noone else should be accessing the _cleanup_list at this point,
1991 2148    // so it's not necessary to take any locks
1992 2149    while (!_cleanup_list.is_empty()) {
1993 2150      HeapRegion* hr = _cleanup_list.remove_head();
1994 2151      assert(hr != NULL, "the list was not empty");
1995 2152      hr->par_clear();
1996 2153      tmp_free_list.add_as_tail(hr);
1997 2154  
1998 2155      // Instead of adding one region at a time to the secondary_free_list,
1999 2156      // we accumulate them in the local list and move them a few at a
2000 2157      // time. This also cuts down on the number of notify_all() calls
2001 2158      // we do during this process. We'll also append the local list when
2002 2159      // _cleanup_list is empty (which means we just removed the last
2003 2160      // region from the _cleanup_list).
2004 2161      if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2005 2162          _cleanup_list.is_empty()) {
2006 2163        if (G1ConcRegionFreeingVerbose) {
2007 2164          gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2008 2165                                 "appending %u entries to the secondary_free_list, "
2009 2166                                 "cleanup list still has %u entries",
2010 2167                                 tmp_free_list.length(),
2011 2168                                 _cleanup_list.length());
2012 2169        }
2013 2170  
2014 2171        {
2015 2172          MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2016 2173          g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2017 2174          SecondaryFreeList_lock->notify_all();
2018 2175        }
2019 2176  
2020 2177        if (G1StressConcRegionFreeing) {
2021 2178          for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2022 2179            os::sleep(Thread::current(), (jlong) 1, false);
2023 2180          }
2024 2181        }
2025 2182      }
2026 2183    }
2027 2184    assert(tmp_free_list.is_empty(), "post-condition");
2028 2185  }
2029 2186  
2030 2187  // Support closures for reference procssing in G1
2031 2188  
2032 2189  bool G1CMIsAliveClosure::do_object_b(oop obj) {
2033 2190    HeapWord* addr = (HeapWord*)obj;
2034 2191    return addr != NULL &&
2035 2192           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2036 2193  }
2037 2194  
2038 2195  class G1CMKeepAliveClosure: public ExtendedOopClosure {
2039 2196    G1CollectedHeap* _g1;
2040 2197    ConcurrentMark*  _cm;
2041 2198   public:
2042 2199    G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2043 2200      _g1(g1), _cm(cm) {
2044 2201      assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2045 2202    }
2046 2203  
2047 2204    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2048 2205    virtual void do_oop(      oop* p) { do_oop_work(p); }
2049 2206  
2050 2207    template <class T> void do_oop_work(T* p) {
2051 2208      oop obj = oopDesc::load_decode_heap_oop(p);
2052 2209      HeapWord* addr = (HeapWord*)obj;
2053 2210  
2054 2211      if (_cm->verbose_high()) {
2055 2212        gclog_or_tty->print_cr("\t[0] we're looking at location "
2056 2213                               "*"PTR_FORMAT" = "PTR_FORMAT,
2057 2214                               p, (void*) obj);
2058 2215      }
2059 2216  
2060 2217      if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2061 2218        _cm->mark_and_count(obj);
2062 2219        _cm->mark_stack_push(obj);
2063 2220      }
2064 2221    }
2065 2222  };
2066 2223  
2067 2224  class G1CMDrainMarkingStackClosure: public VoidClosure {
2068 2225    ConcurrentMark*               _cm;
2069 2226    CMMarkStack*                  _markStack;
2070 2227    G1CMKeepAliveClosure*         _oopClosure;
2071 2228   public:
2072 2229    G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2073 2230                                 G1CMKeepAliveClosure* oopClosure) :
2074 2231      _cm(cm),
2075 2232      _markStack(markStack),
2076 2233      _oopClosure(oopClosure) { }
2077 2234  
2078 2235    void do_void() {
2079 2236      _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2080 2237    }
2081 2238  };
2082 2239  
2083 2240  // 'Keep Alive' closure used by parallel reference processing.
2084 2241  // An instance of this closure is used in the parallel reference processing
2085 2242  // code rather than an instance of G1CMKeepAliveClosure. We could have used
2086 2243  // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2087 2244  // placed on to discovered ref lists once so we can mark and push with no
2088 2245  // need to check whether the object has already been marked. Using the
2089 2246  // G1CMKeepAliveClosure would mean, however, having all the worker threads
2090 2247  // operating on the global mark stack. This means that an individual
2091 2248  // worker would be doing lock-free pushes while it processes its own
2092 2249  // discovered ref list followed by drain call. If the discovered ref lists
2093 2250  // are unbalanced then this could cause interference with the other
2094 2251  // workers. Using a CMTask (and its embedded local data structures)
2095 2252  // avoids that potential interference.
2096 2253  class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2097 2254    ConcurrentMark*  _cm;
2098 2255    CMTask*          _task;
2099 2256    int              _ref_counter_limit;
2100 2257    int              _ref_counter;
2101 2258   public:
2102 2259    G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2103 2260      _cm(cm), _task(task),
2104 2261      _ref_counter_limit(G1RefProcDrainInterval) {
2105 2262      assert(_ref_counter_limit > 0, "sanity");
2106 2263      _ref_counter = _ref_counter_limit;
2107 2264    }
2108 2265  
2109 2266    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2110 2267    virtual void do_oop(      oop* p) { do_oop_work(p); }
2111 2268  
2112 2269    template <class T> void do_oop_work(T* p) {
2113 2270      if (!_cm->has_overflown()) {
2114 2271        oop obj = oopDesc::load_decode_heap_oop(p);
2115 2272        if (_cm->verbose_high()) {
2116 2273          gclog_or_tty->print_cr("\t[%u] we're looking at location "
2117 2274                                 "*"PTR_FORMAT" = "PTR_FORMAT,
2118 2275                                 _task->worker_id(), p, (void*) obj);
2119 2276        }
2120 2277  
2121 2278        _task->deal_with_reference(obj);
2122 2279        _ref_counter--;
2123 2280  
2124 2281        if (_ref_counter == 0) {
2125 2282          // We have dealt with _ref_counter_limit references, pushing them and objects
2126 2283          // reachable from them on to the local stack (and possibly the global stack).
2127 2284          // Call do_marking_step() to process these entries. We call the routine in a
2128 2285          // loop, which we'll exit if there's nothing more to do (i.e. we're done
2129 2286          // with the entries that we've pushed as a result of the deal_with_reference
2130 2287          // calls above) or we overflow.
2131 2288          // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2132 2289          // while there may still be some work to do. (See the comment at the
2133 2290          // beginning of CMTask::do_marking_step() for those conditions - one of which
2134 2291          // is reaching the specified time target.) It is only when
2135 2292          // CMTask::do_marking_step() returns without setting the has_aborted() flag
2136 2293          // that the marking has completed.
2137 2294          do {
2138 2295            double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2139 2296            _task->do_marking_step(mark_step_duration_ms,
2140 2297                                   false /* do_stealing    */,
2141 2298                                   false /* do_termination */);
2142 2299          } while (_task->has_aborted() && !_cm->has_overflown());
2143 2300          _ref_counter = _ref_counter_limit;
2144 2301        }
2145 2302      } else {
2146 2303        if (_cm->verbose_high()) {
2147 2304           gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2148 2305        }
2149 2306      }
2150 2307    }
2151 2308  };
2152 2309  
2153 2310  class G1CMParDrainMarkingStackClosure: public VoidClosure {
2154 2311    ConcurrentMark* _cm;
2155 2312    CMTask* _task;
2156 2313   public:
2157 2314    G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2158 2315      _cm(cm), _task(task) { }
2159 2316  
2160 2317    void do_void() {
2161 2318      do {
2162 2319        if (_cm->verbose_high()) {
2163 2320          gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step",
2164 2321                                 _task->worker_id());
2165 2322        }
2166 2323  
2167 2324        // We call CMTask::do_marking_step() to completely drain the local and
2168 2325        // global marking stacks. The routine is called in a loop, which we'll
2169 2326        // exit if there's nothing more to do (i.e. we'completely drained the
2170 2327        // entries that were pushed as a result of applying the
2171 2328        // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2172 2329        // lists above) or we overflow the global marking stack.
2173 2330        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2174 2331        // while there may still be some work to do. (See the comment at the
2175 2332        // beginning of CMTask::do_marking_step() for those conditions - one of which
2176 2333        // is reaching the specified time target.) It is only when
2177 2334        // CMTask::do_marking_step() returns without setting the has_aborted() flag
2178 2335        // that the marking has completed.
2179 2336  
2180 2337        _task->do_marking_step(1000000000.0 /* something very large */,
2181 2338                               true /* do_stealing    */,
2182 2339                               true /* do_termination */);
2183 2340      } while (_task->has_aborted() && !_cm->has_overflown());
2184 2341    }
2185 2342  };
2186 2343  
2187 2344  // Implementation of AbstractRefProcTaskExecutor for parallel
2188 2345  // reference processing at the end of G1 concurrent marking
2189 2346  
2190 2347  class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2191 2348  private:
2192 2349    G1CollectedHeap* _g1h;
2193 2350    ConcurrentMark*  _cm;
2194 2351    WorkGang*        _workers;
2195 2352    int              _active_workers;
2196 2353  
2197 2354  public:
2198 2355    G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2199 2356                          ConcurrentMark* cm,
2200 2357                          WorkGang* workers,
2201 2358                          int n_workers) :
2202 2359      _g1h(g1h), _cm(cm),
2203 2360      _workers(workers), _active_workers(n_workers) { }
2204 2361  
2205 2362    // Executes the given task using concurrent marking worker threads.
2206 2363    virtual void execute(ProcessTask& task);
2207 2364    virtual void execute(EnqueueTask& task);
2208 2365  };
2209 2366  
2210 2367  class G1CMRefProcTaskProxy: public AbstractGangTask {
2211 2368    typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2212 2369    ProcessTask&     _proc_task;
2213 2370    G1CollectedHeap* _g1h;
2214 2371    ConcurrentMark*  _cm;
2215 2372  
2216 2373  public:
2217 2374    G1CMRefProcTaskProxy(ProcessTask& proc_task,
2218 2375                       G1CollectedHeap* g1h,
2219 2376                       ConcurrentMark* cm) :
2220 2377      AbstractGangTask("Process reference objects in parallel"),
2221 2378      _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2222 2379  
2223 2380    virtual void work(uint worker_id) {
2224 2381      CMTask* marking_task = _cm->task(worker_id);
2225 2382      G1CMIsAliveClosure g1_is_alive(_g1h);
2226 2383      G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2227 2384      G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2228 2385  
2229 2386      _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2230 2387    }
2231 2388  };
2232 2389  
2233 2390  void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2234 2391    assert(_workers != NULL, "Need parallel worker threads.");
2235 2392  
2236 2393    G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2237 2394  
2238 2395    // We need to reset the phase for each task execution so that
2239 2396    // the termination protocol of CMTask::do_marking_step works.
2240 2397    _cm->set_phase(_active_workers, false /* concurrent */);
2241 2398    _g1h->set_par_threads(_active_workers);
2242 2399    _workers->run_task(&proc_task_proxy);
2243 2400    _g1h->set_par_threads(0);
2244 2401  }
2245 2402  
2246 2403  class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2247 2404    typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2248 2405    EnqueueTask& _enq_task;
2249 2406  
2250 2407  public:
2251 2408    G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2252 2409      AbstractGangTask("Enqueue reference objects in parallel"),
2253 2410      _enq_task(enq_task) { }
2254 2411  
2255 2412    virtual void work(uint worker_id) {
2256 2413      _enq_task.work(worker_id);
2257 2414    }
2258 2415  };
2259 2416  
2260 2417  void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2261 2418    assert(_workers != NULL, "Need parallel worker threads.");
2262 2419  
2263 2420    G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2264 2421  
2265 2422    _g1h->set_par_threads(_active_workers);
2266 2423    _workers->run_task(&enq_task_proxy);
2267 2424    _g1h->set_par_threads(0);
2268 2425  }
2269 2426  
2270 2427  void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2271 2428    ResourceMark rm;
2272 2429    HandleMark   hm;
2273 2430  
2274 2431    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2275 2432  
2276 2433    // Is alive closure.
2277 2434    G1CMIsAliveClosure g1_is_alive(g1h);
2278 2435  
2279 2436    // Inner scope to exclude the cleaning of the string and symbol
2280 2437    // tables from the displayed time.
2281 2438    {
2282 2439      if (G1Log::finer()) {
2283 2440        gclog_or_tty->put(' ');
2284 2441      }
2285 2442      TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2286 2443  
2287 2444      ReferenceProcessor* rp = g1h->ref_processor_cm();
2288 2445  
2289 2446      // See the comment in G1CollectedHeap::ref_processing_init()
2290 2447      // about how reference processing currently works in G1.
2291 2448  
2292 2449      // Process weak references.
2293 2450      rp->setup_policy(clear_all_soft_refs);
2294 2451      assert(_markStack.isEmpty(), "mark stack should be empty");
2295 2452  
2296 2453      G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2297 2454      G1CMDrainMarkingStackClosure
2298 2455        g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2299 2456  
2300 2457      // We use the work gang from the G1CollectedHeap and we utilize all
2301 2458      // the worker threads.
2302 2459      uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2303 2460      active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2304 2461  
2305 2462      G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2306 2463                                                g1h->workers(), active_workers);
2307 2464  
2308 2465      if (rp->processing_is_mt()) {
2309 2466        // Set the degree of MT here.  If the discovery is done MT, there
2310 2467        // may have been a different number of threads doing the discovery
2311 2468        // and a different number of discovered lists may have Ref objects.
2312 2469        // That is OK as long as the Reference lists are balanced (see
2313 2470        // balance_all_queues() and balance_queues()).
2314 2471        rp->set_active_mt_degree(active_workers);
2315 2472  
2316 2473        rp->process_discovered_references(&g1_is_alive,
2317 2474                                        &g1_keep_alive,
2318 2475                                        &g1_drain_mark_stack,
2319 2476                                        &par_task_executor);
2320 2477  
2321 2478        // The work routines of the parallel keep_alive and drain_marking_stack
2322 2479        // will set the has_overflown flag if we overflow the global marking
2323 2480        // stack.
2324 2481      } else {
2325 2482        rp->process_discovered_references(&g1_is_alive,
2326 2483                                          &g1_keep_alive,
2327 2484                                          &g1_drain_mark_stack,
2328 2485                                          NULL);
2329 2486      }
2330 2487  
2331 2488      assert(_markStack.overflow() || _markStack.isEmpty(),
2332 2489              "mark stack should be empty (unless it overflowed)");
2333 2490      if (_markStack.overflow()) {
2334 2491        // Should have been done already when we tried to push an
2335 2492        // entry on to the global mark stack. But let's do it again.
2336 2493        set_has_overflown();
2337 2494      }
2338 2495  
2339 2496      if (rp->processing_is_mt()) {
2340 2497        assert(rp->num_q() == active_workers, "why not");
2341 2498        rp->enqueue_discovered_references(&par_task_executor);
2342 2499      } else {
2343 2500        rp->enqueue_discovered_references();
2344 2501      }
2345 2502  
2346 2503      rp->verify_no_references_recorded();
2347 2504      assert(!rp->discovery_enabled(), "Post condition");
2348 2505    }
2349 2506  
2350 2507    // Now clean up stale oops in StringTable
2351 2508    StringTable::unlink(&g1_is_alive);
2352 2509    // Clean up unreferenced symbols in symbol table.
2353 2510    SymbolTable::unlink();
2354 2511  }
2355 2512  
2356 2513  void ConcurrentMark::swapMarkBitMaps() {
2357 2514    CMBitMapRO* temp = _prevMarkBitMap;
2358 2515    _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2359 2516    _nextMarkBitMap  = (CMBitMap*)  temp;
2360 2517  }
2361 2518  
2362 2519  class CMRemarkTask: public AbstractGangTask {
2363 2520  private:
2364 2521    ConcurrentMark *_cm;
2365 2522  
2366 2523  public:
2367 2524    void work(uint worker_id) {
2368 2525      // Since all available tasks are actually started, we should
2369 2526      // only proceed if we're supposed to be actived.
2370 2527      if (worker_id < _cm->active_tasks()) {
2371 2528        CMTask* task = _cm->task(worker_id);
2372 2529        task->record_start_time();
2373 2530        do {
2374 2531          task->do_marking_step(1000000000.0 /* something very large */,
2375 2532                                true /* do_stealing    */,
2376 2533                                true /* do_termination */);
2377 2534        } while (task->has_aborted() && !_cm->has_overflown());
2378 2535        // If we overflow, then we do not want to restart. We instead
2379 2536        // want to abort remark and do concurrent marking again.
2380 2537        task->record_end_time();
2381 2538      }
2382 2539    }
2383 2540  
2384 2541    CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2385 2542      AbstractGangTask("Par Remark"), _cm(cm) {
2386 2543      _cm->terminator()->reset_for_reuse(active_workers);
2387 2544    }
2388 2545  };
2389 2546  
2390 2547  void ConcurrentMark::checkpointRootsFinalWork() {
2391 2548    ResourceMark rm;
2392 2549    HandleMark   hm;
2393 2550    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2394 2551  
2395 2552    g1h->ensure_parsability(false);
2396 2553  
2397 2554    if (G1CollectedHeap::use_parallel_gc_threads()) {
2398 2555      G1CollectedHeap::StrongRootsScope srs(g1h);
2399 2556      // this is remark, so we'll use up all active threads
2400 2557      uint active_workers = g1h->workers()->active_workers();
2401 2558      if (active_workers == 0) {
2402 2559        assert(active_workers > 0, "Should have been set earlier");
2403 2560        active_workers = (uint) ParallelGCThreads;
2404 2561        g1h->workers()->set_active_workers(active_workers);
2405 2562      }
2406 2563      set_phase(active_workers, false /* concurrent */);
2407 2564      // Leave _parallel_marking_threads at it's
2408 2565      // value originally calculated in the ConcurrentMark
2409 2566      // constructor and pass values of the active workers
2410 2567      // through the gang in the task.
2411 2568  
2412 2569      CMRemarkTask remarkTask(this, active_workers);
2413 2570      g1h->set_par_threads(active_workers);
2414 2571      g1h->workers()->run_task(&remarkTask);
2415 2572      g1h->set_par_threads(0);
2416 2573    } else {
2417 2574      G1CollectedHeap::StrongRootsScope srs(g1h);
2418 2575      // this is remark, so we'll use up all available threads
2419 2576      uint active_workers = 1;
2420 2577      set_phase(active_workers, false /* concurrent */);
2421 2578  
2422 2579      CMRemarkTask remarkTask(this, active_workers);
2423 2580      // We will start all available threads, even if we decide that the
2424 2581      // active_workers will be fewer. The extra ones will just bail out
2425 2582      // immediately.
2426 2583      remarkTask.work(0);
2427 2584    }
2428 2585    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2429 2586    guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2430 2587  
2431 2588    print_stats();
2432 2589  
2433 2590  #if VERIFY_OBJS_PROCESSED
2434 2591    if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2435 2592      gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2436 2593                             _scan_obj_cl.objs_processed,
2437 2594                             ThreadLocalObjQueue::objs_enqueued);
2438 2595      guarantee(_scan_obj_cl.objs_processed ==
2439 2596                ThreadLocalObjQueue::objs_enqueued,
2440 2597                "Different number of objs processed and enqueued.");
2441 2598    }
2442 2599  #endif
2443 2600  }
2444 2601  
2445 2602  #ifndef PRODUCT
2446 2603  
2447 2604  class PrintReachableOopClosure: public OopClosure {
2448 2605  private:
2449 2606    G1CollectedHeap* _g1h;
2450 2607    outputStream*    _out;
2451 2608    VerifyOption     _vo;
2452 2609    bool             _all;
2453 2610  
2454 2611  public:
2455 2612    PrintReachableOopClosure(outputStream* out,
2456 2613                             VerifyOption  vo,
2457 2614                             bool          all) :
2458 2615      _g1h(G1CollectedHeap::heap()),
2459 2616      _out(out), _vo(vo), _all(all) { }
2460 2617  
2461 2618    void do_oop(narrowOop* p) { do_oop_work(p); }
2462 2619    void do_oop(      oop* p) { do_oop_work(p); }
2463 2620  
2464 2621    template <class T> void do_oop_work(T* p) {
2465 2622      oop         obj = oopDesc::load_decode_heap_oop(p);
2466 2623      const char* str = NULL;
2467 2624      const char* str2 = "";
2468 2625  
2469 2626      if (obj == NULL) {
2470 2627        str = "";
2471 2628      } else if (!_g1h->is_in_g1_reserved(obj)) {
2472 2629        str = " O";
2473 2630      } else {
2474 2631        HeapRegion* hr  = _g1h->heap_region_containing(obj);
2475 2632        guarantee(hr != NULL, "invariant");
2476 2633        bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2477 2634        bool marked = _g1h->is_marked(obj, _vo);
2478 2635  
2479 2636        if (over_tams) {
2480 2637          str = " >";
2481 2638          if (marked) {
2482 2639            str2 = " AND MARKED";
2483 2640          }
2484 2641        } else if (marked) {
2485 2642          str = " M";
2486 2643        } else {
2487 2644          str = " NOT";
2488 2645        }
2489 2646      }
2490 2647  
2491 2648      _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2492 2649                     p, (void*) obj, str, str2);
2493 2650    }
2494 2651  };
2495 2652  
2496 2653  class PrintReachableObjectClosure : public ObjectClosure {
2497 2654  private:
2498 2655    G1CollectedHeap* _g1h;
2499 2656    outputStream*    _out;
2500 2657    VerifyOption     _vo;
2501 2658    bool             _all;
2502 2659    HeapRegion*      _hr;
2503 2660  
2504 2661  public:
2505 2662    PrintReachableObjectClosure(outputStream* out,
2506 2663                                VerifyOption  vo,
2507 2664                                bool          all,
2508 2665                                HeapRegion*   hr) :
2509 2666      _g1h(G1CollectedHeap::heap()),
2510 2667      _out(out), _vo(vo), _all(all), _hr(hr) { }
2511 2668  
2512 2669    void do_object(oop o) {
2513 2670      bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2514 2671      bool marked = _g1h->is_marked(o, _vo);
2515 2672      bool print_it = _all || over_tams || marked;
2516 2673  
2517 2674      if (print_it) {
2518 2675        _out->print_cr(" "PTR_FORMAT"%s",
2519 2676                       o, (over_tams) ? " >" : (marked) ? " M" : "");
2520 2677        PrintReachableOopClosure oopCl(_out, _vo, _all);
2521 2678        o->oop_iterate_no_header(&oopCl);
2522 2679      }
2523 2680    }
2524 2681  };
2525 2682  
2526 2683  class PrintReachableRegionClosure : public HeapRegionClosure {
2527 2684  private:
2528 2685    G1CollectedHeap* _g1h;
2529 2686    outputStream*    _out;
2530 2687    VerifyOption     _vo;
2531 2688    bool             _all;
2532 2689  
2533 2690  public:
2534 2691    bool doHeapRegion(HeapRegion* hr) {
2535 2692      HeapWord* b = hr->bottom();
2536 2693      HeapWord* e = hr->end();
2537 2694      HeapWord* t = hr->top();
2538 2695      HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2539 2696      _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2540 2697                     "TAMS: "PTR_FORMAT, b, e, t, p);
2541 2698      _out->cr();
2542 2699  
2543 2700      HeapWord* from = b;
2544 2701      HeapWord* to   = t;
2545 2702  
2546 2703      if (to > from) {
2547 2704        _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2548 2705        _out->cr();
2549 2706        PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2550 2707        hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2551 2708        _out->cr();
2552 2709      }
2553 2710  
2554 2711      return false;
2555 2712    }
2556 2713  
2557 2714    PrintReachableRegionClosure(outputStream* out,
2558 2715                                VerifyOption  vo,
2559 2716                                bool          all) :
2560 2717      _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2561 2718  };
2562 2719  
2563 2720  void ConcurrentMark::print_reachable(const char* str,
2564 2721                                       VerifyOption vo,
2565 2722                                       bool all) {
2566 2723    gclog_or_tty->cr();
2567 2724    gclog_or_tty->print_cr("== Doing heap dump... ");
2568 2725  
2569 2726    if (G1PrintReachableBaseFile == NULL) {
2570 2727      gclog_or_tty->print_cr("  #### error: no base file defined");
2571 2728      return;
2572 2729    }
2573 2730  
2574 2731    if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2575 2732        (JVM_MAXPATHLEN - 1)) {
2576 2733      gclog_or_tty->print_cr("  #### error: file name too long");
2577 2734      return;
2578 2735    }
2579 2736  
2580 2737    char file_name[JVM_MAXPATHLEN];
2581 2738    sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2582 2739    gclog_or_tty->print_cr("  dumping to file %s", file_name);
2583 2740  
2584 2741    fileStream fout(file_name);
2585 2742    if (!fout.is_open()) {
2586 2743      gclog_or_tty->print_cr("  #### error: could not open file");
2587 2744      return;
2588 2745    }
2589 2746  
2590 2747    outputStream* out = &fout;
2591 2748    out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2592 2749    out->cr();
2593 2750  
2594 2751    out->print_cr("--- ITERATING OVER REGIONS");
2595 2752    out->cr();
2596 2753    PrintReachableRegionClosure rcl(out, vo, all);
2597 2754    _g1h->heap_region_iterate(&rcl);
2598 2755    out->cr();
2599 2756  
2600 2757    gclog_or_tty->print_cr("  done");
2601 2758    gclog_or_tty->flush();
2602 2759  }
2603 2760  
2604 2761  #endif // PRODUCT
2605 2762  
2606 2763  void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2607 2764    // Note we are overriding the read-only view of the prev map here, via
2608 2765    // the cast.
2609 2766    ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2610 2767  }
2611 2768  
2612 2769  void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2613 2770    _nextMarkBitMap->clearRange(mr);
2614 2771  }
2615 2772  
2616 2773  void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2617 2774    clearRangePrevBitmap(mr);
2618 2775    clearRangeNextBitmap(mr);
2619 2776  }
2620 2777  
2621 2778  HeapRegion*
2622 2779  ConcurrentMark::claim_region(uint worker_id) {
2623 2780    // "checkpoint" the finger
2624 2781    HeapWord* finger = _finger;
2625 2782  
2626 2783    // _heap_end will not change underneath our feet; it only changes at
2627 2784    // yield points.
2628 2785    while (finger < _heap_end) {
2629 2786      assert(_g1h->is_in_g1_reserved(finger), "invariant");
2630 2787  
2631 2788      // Note on how this code handles humongous regions. In the
2632 2789      // normal case the finger will reach the start of a "starts
2633 2790      // humongous" (SH) region. Its end will either be the end of the
2634 2791      // last "continues humongous" (CH) region in the sequence, or the
2635 2792      // standard end of the SH region (if the SH is the only region in
2636 2793      // the sequence). That way claim_region() will skip over the CH
2637 2794      // regions. However, there is a subtle race between a CM thread
2638 2795      // executing this method and a mutator thread doing a humongous
2639 2796      // object allocation. The two are not mutually exclusive as the CM
2640 2797      // thread does not need to hold the Heap_lock when it gets
2641 2798      // here. So there is a chance that claim_region() will come across
2642 2799      // a free region that's in the progress of becoming a SH or a CH
2643 2800      // region. In the former case, it will either
2644 2801      //   a) Miss the update to the region's end, in which case it will
2645 2802      //      visit every subsequent CH region, will find their bitmaps
2646 2803      //      empty, and do nothing, or
2647 2804      //   b) Will observe the update of the region's end (in which case
2648 2805      //      it will skip the subsequent CH regions).
2649 2806      // If it comes across a region that suddenly becomes CH, the
2650 2807      // scenario will be similar to b). So, the race between
2651 2808      // claim_region() and a humongous object allocation might force us
2652 2809      // to do a bit of unnecessary work (due to some unnecessary bitmap
2653 2810      // iterations) but it should not introduce and correctness issues.
2654 2811      HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2655 2812      HeapWord*   bottom        = curr_region->bottom();
2656 2813      HeapWord*   end           = curr_region->end();
2657 2814      HeapWord*   limit         = curr_region->next_top_at_mark_start();
2658 2815  
2659 2816      if (verbose_low()) {
2660 2817        gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2661 2818                               "["PTR_FORMAT", "PTR_FORMAT"), "
2662 2819                               "limit = "PTR_FORMAT,
2663 2820                               worker_id, curr_region, bottom, end, limit);
2664 2821      }
2665 2822  
2666 2823      // Is the gap between reading the finger and doing the CAS too long?
2667 2824      HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2668 2825      if (res == finger) {
2669 2826        // we succeeded
2670 2827  
2671 2828        // notice that _finger == end cannot be guaranteed here since,
2672 2829        // someone else might have moved the finger even further
2673 2830        assert(_finger >= end, "the finger should have moved forward");
2674 2831  
2675 2832        if (verbose_low()) {
2676 2833          gclog_or_tty->print_cr("[%u] we were successful with region = "
2677 2834                                 PTR_FORMAT, worker_id, curr_region);
2678 2835        }
2679 2836  
2680 2837        if (limit > bottom) {
2681 2838          if (verbose_low()) {
2682 2839            gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2683 2840                                   "returning it ", worker_id, curr_region);
2684 2841          }
2685 2842          return curr_region;
2686 2843        } else {
2687 2844          assert(limit == bottom,
2688 2845                 "the region limit should be at bottom");
2689 2846          if (verbose_low()) {
2690 2847            gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2691 2848                                   "returning NULL", worker_id, curr_region);
2692 2849          }
2693 2850          // we return NULL and the caller should try calling
2694 2851          // claim_region() again.
2695 2852          return NULL;
2696 2853        }
2697 2854      } else {
2698 2855        assert(_finger > finger, "the finger should have moved forward");
2699 2856        if (verbose_low()) {
2700 2857          gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2701 2858                                 "global finger = "PTR_FORMAT", "
2702 2859                                 "our finger = "PTR_FORMAT,
2703 2860                                 worker_id, _finger, finger);
2704 2861        }
2705 2862  
2706 2863        // read it again
2707 2864        finger = _finger;
2708 2865      }
2709 2866    }
2710 2867  
2711 2868    return NULL;
2712 2869  }
2713 2870  
2714 2871  #ifndef PRODUCT
2715 2872  enum VerifyNoCSetOopsPhase {
2716 2873    VerifyNoCSetOopsStack,
2717 2874    VerifyNoCSetOopsQueues,
2718 2875    VerifyNoCSetOopsSATBCompleted,
2719 2876    VerifyNoCSetOopsSATBThread
2720 2877  };
2721 2878  
2722 2879  class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2723 2880  private:
2724 2881    G1CollectedHeap* _g1h;
2725 2882    VerifyNoCSetOopsPhase _phase;
2726 2883    int _info;
2727 2884  
2728 2885    const char* phase_str() {
2729 2886      switch (_phase) {
2730 2887      case VerifyNoCSetOopsStack:         return "Stack";
2731 2888      case VerifyNoCSetOopsQueues:        return "Queue";
2732 2889      case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2733 2890      case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2734 2891      default:                            ShouldNotReachHere();
2735 2892      }
2736 2893      return NULL;
2737 2894    }
2738 2895  
2739 2896    void do_object_work(oop obj) {
2740 2897      guarantee(!_g1h->obj_in_cs(obj),
2741 2898                err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2742 2899                        (void*) obj, phase_str(), _info));
2743 2900    }
2744 2901  
2745 2902  public:
2746 2903    VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2747 2904  
2748 2905    void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2749 2906      _phase = phase;
2750 2907      _info = info;
2751 2908    }
2752 2909  
2753 2910    virtual void do_oop(oop* p) {
2754 2911      oop obj = oopDesc::load_decode_heap_oop(p);
2755 2912      do_object_work(obj);
2756 2913    }
2757 2914  
2758 2915    virtual void do_oop(narrowOop* p) {
2759 2916      // We should not come across narrow oops while scanning marking
2760 2917      // stacks and SATB buffers.
2761 2918      ShouldNotReachHere();
2762 2919    }
2763 2920  
2764 2921    virtual void do_object(oop obj) {
2765 2922      do_object_work(obj);
2766 2923    }
2767 2924  };
2768 2925  
2769 2926  void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2770 2927                                           bool verify_enqueued_buffers,
2771 2928                                           bool verify_thread_buffers,
2772 2929                                           bool verify_fingers) {
2773 2930    assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2774 2931    if (!G1CollectedHeap::heap()->mark_in_progress()) {
2775 2932      return;
2776 2933    }
2777 2934

↓ open down ↓

1600 lines elided

↑ open up ↑

2778 2935    VerifyNoCSetOopsClosure cl;
2779 2936  
2780 2937    if (verify_stacks) {
2781 2938      // Verify entries on the global mark stack
2782 2939      cl.set_phase(VerifyNoCSetOopsStack);
2783 2940      _markStack.oops_do(&cl);
2784 2941  
2785 2942      // Verify entries on the task queues
2786 2943      for (uint i = 0; i < _max_worker_id; i += 1) {
2787 2944        cl.set_phase(VerifyNoCSetOopsQueues, i);
2788      -      OopTaskQueue* queue = _task_queues->queue(i);
     2945 +      CMTaskQueue* queue = _task_queues->queue(i);
2789 2946        queue->oops_do(&cl);
2790 2947      }
2791 2948    }
2792 2949  
2793 2950    SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2794 2951  
2795 2952    // Verify entries on the enqueued SATB buffers
2796 2953    if (verify_enqueued_buffers) {
2797 2954      cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2798 2955      satb_qs.iterate_completed_buffers_read_only(&cl);

2799 2956    }
2800 2957  
2801 2958    // Verify entries on the per-thread SATB buffers
2802 2959    if (verify_thread_buffers) {
2803 2960      cl.set_phase(VerifyNoCSetOopsSATBThread);
2804 2961      satb_qs.iterate_thread_buffers_read_only(&cl);
2805 2962    }
2806 2963  
2807 2964    if (verify_fingers) {
2808 2965      // Verify the global finger
2809 2966      HeapWord* global_finger = finger();
2810 2967      if (global_finger != NULL && global_finger < _heap_end) {
2811 2968        // The global finger always points to a heap region boundary. We
2812 2969        // use heap_region_containing_raw() to get the containing region
2813 2970        // given that the global finger could be pointing to a free region
2814 2971        // which subsequently becomes continues humongous. If that
2815 2972        // happens, heap_region_containing() will return the bottom of the
2816 2973        // corresponding starts humongous region and the check below will
2817 2974        // not hold any more.
2818 2975        HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2819 2976        guarantee(global_finger == global_hr->bottom(),
2820 2977                  err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2821 2978                          global_finger, HR_FORMAT_PARAMS(global_hr)));
2822 2979      }
2823 2980  
2824 2981      // Verify the task fingers
2825 2982      assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2826 2983      for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2827 2984        CMTask* task = _tasks[i];
2828 2985        HeapWord* task_finger = task->finger();
2829 2986        if (task_finger != NULL && task_finger < _heap_end) {
2830 2987          // See above note on the global finger verification.
2831 2988          HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2832 2989          guarantee(task_finger == task_hr->bottom() ||

↓ open down ↓

34 lines elided

↑ open up ↑

2833 2990                    !task_hr->in_collection_set(),
2834 2991                    err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2835 2992                            task_finger, HR_FORMAT_PARAMS(task_hr)));
2836 2993        }
2837 2994      }
2838 2995    }
2839 2996  }
2840 2997  #endif // PRODUCT
2841 2998  
2842 2999  void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2843      -  _markStack.setEmpty();
2844      -  _markStack.clear_overflow();
     3000 +  _markStack.set_should_expand();
     3001 +  _markStack.setEmpty();        // Also clears the _markStack overflow flag
2845 3002    if (clear_overflow) {
2846 3003      clear_has_overflown();
2847 3004    } else {
2848 3005      assert(has_overflown(), "pre-condition");
2849 3006    }
2850 3007    _finger = _heap_start;
2851 3008  
2852 3009    for (uint i = 0; i < _max_worker_id; ++i) {
2853      -    OopTaskQueue* queue = _task_queues->queue(i);
     3010 +    CMTaskQueue* queue = _task_queues->queue(i);
2854 3011      queue->set_empty();
2855 3012    }
2856 3013  }
2857 3014  
2858 3015  // Aggregate the counting data that was constructed concurrently
2859 3016  // with marking.
2860 3017  class AggregateCountDataHRClosure: public HeapRegionClosure {
2861 3018    G1CollectedHeap* _g1h;
2862 3019    ConcurrentMark* _cm;
2863 3020    CardTableModRefBS* _ct_bs;

2864 3021    BitMap* _cm_card_bm;
2865 3022    uint _max_worker_id;
2866 3023  
2867 3024   public:
2868 3025    AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2869 3026                                BitMap* cm_card_bm,
2870 3027                                uint max_worker_id) :
2871 3028      _g1h(g1h), _cm(g1h->concurrent_mark()),
2872 3029      _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2873 3030      _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2874 3031  
2875 3032    bool doHeapRegion(HeapRegion* hr) {
2876 3033      if (hr->continuesHumongous()) {
2877 3034        // We will ignore these here and process them when their
2878 3035        // associated "starts humongous" region is processed.
2879 3036        // Note that we cannot rely on their associated
2880 3037        // "starts humongous" region to have their bit set to 1
2881 3038        // since, due to the region chunking in the parallel region
2882 3039        // iteration, a "continues humongous" region might be visited
2883 3040        // before its associated "starts humongous".
2884 3041        return false;
2885 3042      }
2886 3043  
2887 3044      HeapWord* start = hr->bottom();
2888 3045      HeapWord* limit = hr->next_top_at_mark_start();
2889 3046      HeapWord* end = hr->end();
2890 3047  
2891 3048      assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2892 3049             err_msg("Preconditions not met - "
2893 3050                     "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2894 3051                     "top: "PTR_FORMAT", end: "PTR_FORMAT,
2895 3052                     start, limit, hr->top(), hr->end()));
2896 3053  
2897 3054      assert(hr->next_marked_bytes() == 0, "Precondition");
2898 3055  
2899 3056      if (start == limit) {
2900 3057        // NTAMS of this region has not been set so nothing to do.
2901 3058        return false;
2902 3059      }
2903 3060  
2904 3061      // 'start' should be in the heap.
2905 3062      assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2906 3063      // 'end' *may* be just beyone the end of the heap (if hr is the last region)
2907 3064      assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2908 3065  
2909 3066      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2910 3067      BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2911 3068      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2912 3069  
2913 3070      // If ntams is not card aligned then we bump card bitmap index
2914 3071      // for limit so that we get the all the cards spanned by
2915 3072      // the object ending at ntams.
2916 3073      // Note: if this is the last region in the heap then ntams
2917 3074      // could be actually just beyond the end of the the heap;
2918 3075      // limit_idx will then  correspond to a (non-existent) card
2919 3076      // that is also outside the heap.
2920 3077      if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2921 3078        limit_idx += 1;
2922 3079      }
2923 3080  
2924 3081      assert(limit_idx <= end_idx, "or else use atomics");
2925 3082  
2926 3083      // Aggregate the "stripe" in the count data associated with hr.
2927 3084      uint hrs_index = hr->hrs_index();
2928 3085      size_t marked_bytes = 0;
2929 3086  
2930 3087      for (uint i = 0; i < _max_worker_id; i += 1) {
2931 3088        size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2932 3089        BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2933 3090  
2934 3091        // Fetch the marked_bytes in this region for task i and
2935 3092        // add it to the running total for this region.
2936 3093        marked_bytes += marked_bytes_array[hrs_index];
2937 3094  
2938 3095        // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2939 3096        // into the global card bitmap.
2940 3097        BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2941 3098  
2942 3099        while (scan_idx < limit_idx) {
2943 3100          assert(task_card_bm->at(scan_idx) == true, "should be");
2944 3101          _cm_card_bm->set_bit(scan_idx);
2945 3102          assert(_cm_card_bm->at(scan_idx) == true, "should be");
2946 3103  
2947 3104          // BitMap::get_next_one_offset() can handle the case when
2948 3105          // its left_offset parameter is greater than its right_offset
2949 3106          // parameter. It does, however, have an early exit if
2950 3107          // left_offset == right_offset. So let's limit the value
2951 3108          // passed in for left offset here.
2952 3109          BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2953 3110          scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2954 3111        }
2955 3112      }
2956 3113  
2957 3114      // Update the marked bytes for this region.
2958 3115      hr->add_to_marked_bytes(marked_bytes);
2959 3116  
2960 3117      // Next heap region
2961 3118      return false;
2962 3119    }
2963 3120  };
2964 3121  
2965 3122  class G1AggregateCountDataTask: public AbstractGangTask {
2966 3123  protected:
2967 3124    G1CollectedHeap* _g1h;
2968 3125    ConcurrentMark* _cm;
2969 3126    BitMap* _cm_card_bm;
2970 3127    uint _max_worker_id;
2971 3128    int _active_workers;
2972 3129  
2973 3130  public:
2974 3131    G1AggregateCountDataTask(G1CollectedHeap* g1h,
2975 3132                             ConcurrentMark* cm,
2976 3133                             BitMap* cm_card_bm,
2977 3134                             uint max_worker_id,
2978 3135                             int n_workers) :
2979 3136      AbstractGangTask("Count Aggregation"),
2980 3137      _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2981 3138      _max_worker_id(max_worker_id),
2982 3139      _active_workers(n_workers) { }
2983 3140  
2984 3141    void work(uint worker_id) {
2985 3142      AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2986 3143  
2987 3144      if (G1CollectedHeap::use_parallel_gc_threads()) {
2988 3145        _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2989 3146                                              _active_workers,
2990 3147                                              HeapRegion::AggregateCountClaimValue);
2991 3148      } else {
2992 3149        _g1h->heap_region_iterate(&cl);
2993 3150      }
2994 3151    }
2995 3152  };
2996 3153  
2997 3154  
2998 3155  void ConcurrentMark::aggregate_count_data() {
2999 3156    int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3000 3157                          _g1h->workers()->active_workers() :
3001 3158                          1);
3002 3159  
3003 3160    G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3004 3161                                             _max_worker_id, n_workers);
3005 3162  
3006 3163    if (G1CollectedHeap::use_parallel_gc_threads()) {
3007 3164      assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3008 3165             "sanity check");
3009 3166      _g1h->set_par_threads(n_workers);
3010 3167      _g1h->workers()->run_task(&g1_par_agg_task);
3011 3168      _g1h->set_par_threads(0);
3012 3169  
3013 3170      assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3014 3171             "sanity check");
3015 3172      _g1h->reset_heap_region_claim_values();
3016 3173    } else {
3017 3174      g1_par_agg_task.work(0);
3018 3175    }
3019 3176  }
3020 3177  
3021 3178  // Clear the per-worker arrays used to store the per-region counting data
3022 3179  void ConcurrentMark::clear_all_count_data() {
3023 3180    // Clear the global card bitmap - it will be filled during
3024 3181    // liveness count aggregation (during remark) and the
3025 3182    // final counting task.
3026 3183    _card_bm.clear();
3027 3184  
3028 3185    // Clear the global region bitmap - it will be filled as part
3029 3186    // of the final counting task.
3030 3187    _region_bm.clear();
3031 3188  
3032 3189    uint max_regions = _g1h->max_regions();
3033 3190    assert(_max_worker_id > 0, "uninitialized");
3034 3191  
3035 3192    for (uint i = 0; i < _max_worker_id; i += 1) {
3036 3193      BitMap* task_card_bm = count_card_bitmap_for(i);
3037 3194      size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3038 3195  
3039 3196      assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3040 3197      assert(marked_bytes_array != NULL, "uninitialized");
3041 3198  
3042 3199      memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3043 3200      task_card_bm->clear();
3044 3201    }
3045 3202  }
3046 3203  
3047 3204  void ConcurrentMark::print_stats() {
3048 3205    if (verbose_stats()) {
3049 3206      gclog_or_tty->print_cr("---------------------------------------------------------------------");
3050 3207      for (size_t i = 0; i < _active_tasks; ++i) {
3051 3208        _tasks[i]->print_stats();
3052 3209        gclog_or_tty->print_cr("---------------------------------------------------------------------");
3053 3210      }
3054 3211    }
3055 3212  }
3056 3213  
3057 3214  // abandon current marking iteration due to a Full GC
3058 3215  void ConcurrentMark::abort() {
3059 3216    // Clear all marks to force marking thread to do nothing
3060 3217    _nextMarkBitMap->clearAll();
3061 3218    // Clear the liveness counting data
3062 3219    clear_all_count_data();
3063 3220    // Empty mark stack
3064 3221    clear_marking_state();
3065 3222    for (uint i = 0; i < _max_worker_id; ++i) {
3066 3223      _tasks[i]->clear_region_fields();
3067 3224    }
3068 3225    _has_aborted = true;
3069 3226  
3070 3227    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3071 3228    satb_mq_set.abandon_partial_marking();
3072 3229    // This can be called either during or outside marking, we'll read
3073 3230    // the expected_active value from the SATB queue set.
3074 3231    satb_mq_set.set_active_all_threads(
3075 3232                                   false, /* new active value */
3076 3233                                   satb_mq_set.is_active() /* expected_active */);
3077 3234  }
3078 3235  
3079 3236  static void print_ms_time_info(const char* prefix, const char* name,
3080 3237                                 NumberSeq& ns) {
3081 3238    gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3082 3239                           prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3083 3240    if (ns.num() > 0) {
3084 3241      gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3085 3242                             prefix, ns.sd(), ns.maximum());
3086 3243    }
3087 3244  }
3088 3245  
3089 3246  void ConcurrentMark::print_summary_info() {
3090 3247    gclog_or_tty->print_cr(" Concurrent marking:");
3091 3248    print_ms_time_info("  ", "init marks", _init_times);
3092 3249    print_ms_time_info("  ", "remarks", _remark_times);
3093 3250    {
3094 3251      print_ms_time_info("     ", "final marks", _remark_mark_times);
3095 3252      print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3096 3253  
3097 3254    }
3098 3255    print_ms_time_info("  ", "cleanups", _cleanup_times);
3099 3256    gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3100 3257                           _total_counting_time,
3101 3258                           (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3102 3259                            (double)_cleanup_times.num()
3103 3260                           : 0.0));
3104 3261    if (G1ScrubRemSets) {
3105 3262      gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3106 3263                             _total_rs_scrub_time,
3107 3264                             (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3108 3265                              (double)_cleanup_times.num()
3109 3266                             : 0.0));
3110 3267    }
3111 3268    gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3112 3269                           (_init_times.sum() + _remark_times.sum() +
3113 3270                            _cleanup_times.sum())/1000.0);
3114 3271    gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3115 3272                  "(%8.2f s marking).",
3116 3273                  cmThread()->vtime_accum(),
3117 3274                  cmThread()->vtime_mark_accum());
3118 3275  }
3119 3276  
3120 3277  void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3121 3278    _parallel_workers->print_worker_threads_on(st);
3122 3279  }
3123 3280  
3124 3281  // We take a break if someone is trying to stop the world.
3125 3282  bool ConcurrentMark::do_yield_check(uint worker_id) {
3126 3283    if (should_yield()) {
3127 3284      if (worker_id == 0) {
3128 3285        _g1h->g1_policy()->record_concurrent_pause();
3129 3286      }
3130 3287      cmThread()->yield();
3131 3288      return true;
3132 3289    } else {
3133 3290      return false;
3134 3291    }
3135 3292  }
3136 3293  
3137 3294  bool ConcurrentMark::should_yield() {
3138 3295    return cmThread()->should_yield();
3139 3296  }
3140 3297  
3141 3298  bool ConcurrentMark::containing_card_is_marked(void* p) {
3142 3299    size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3143 3300    return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3144 3301  }
3145 3302  
3146 3303  bool ConcurrentMark::containing_cards_are_marked(void* start,
3147 3304                                                   void* last) {
3148 3305    return containing_card_is_marked(start) &&
3149 3306           containing_card_is_marked(last);
3150 3307  }
3151 3308  
3152 3309  #ifndef PRODUCT
3153 3310  // for debugging purposes
3154 3311  void ConcurrentMark::print_finger() {
3155 3312    gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3156 3313                           _heap_start, _heap_end, _finger);
3157 3314    for (uint i = 0; i < _max_worker_id; ++i) {
3158 3315      gclog_or_tty->print("   %u: "PTR_FORMAT, i, _tasks[i]->finger());
3159 3316    }
3160 3317    gclog_or_tty->print_cr("");
3161 3318  }
3162 3319  #endif
3163 3320  
3164 3321  void CMTask::scan_object(oop obj) {
3165 3322    assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3166 3323  
3167 3324    if (_cm->verbose_high()) {
3168 3325      gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3169 3326                             _worker_id, (void*) obj);
3170 3327    }
3171 3328  
3172 3329    size_t obj_size = obj->size();
3173 3330    _words_scanned += obj_size;
3174 3331  
3175 3332    obj->oop_iterate(_cm_oop_closure);
3176 3333    statsOnly( ++_objs_scanned );
3177 3334    check_limits();
3178 3335  }
3179 3336  
3180 3337  // Closure for iteration over bitmaps
3181 3338  class CMBitMapClosure : public BitMapClosure {
3182 3339  private:
3183 3340    // the bitmap that is being iterated over
3184 3341    CMBitMap*                   _nextMarkBitMap;
3185 3342    ConcurrentMark*             _cm;
3186 3343    CMTask*                     _task;
3187 3344  
3188 3345  public:
3189 3346    CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3190 3347      _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3191 3348  
3192 3349    bool do_bit(size_t offset) {
3193 3350      HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3194 3351      assert(_nextMarkBitMap->isMarked(addr), "invariant");
3195 3352      assert( addr < _cm->finger(), "invariant");
3196 3353  
3197 3354      statsOnly( _task->increase_objs_found_on_bitmap() );
3198 3355      assert(addr >= _task->finger(), "invariant");
3199 3356  
3200 3357      // We move that task's local finger along.
3201 3358      _task->move_finger_to(addr);
3202 3359  
3203 3360      _task->scan_object(oop(addr));
3204 3361      // we only partially drain the local queue and global stack
3205 3362      _task->drain_local_queue(true);
3206 3363      _task->drain_global_stack(true);
3207 3364  
3208 3365      // if the has_aborted flag has been raised, we need to bail out of
3209 3366      // the iteration
3210 3367      return !_task->has_aborted();
3211 3368    }
3212 3369  };
3213 3370  
3214 3371  // Closure for iterating over objects, currently only used for
3215 3372  // processing SATB buffers.
3216 3373  class CMObjectClosure : public ObjectClosure {
3217 3374  private:
3218 3375    CMTask* _task;
3219 3376  
3220 3377  public:
3221 3378    void do_object(oop obj) {
3222 3379      _task->deal_with_reference(obj);
3223 3380    }
3224 3381  
3225 3382    CMObjectClosure(CMTask* task) : _task(task) { }
3226 3383  };
3227 3384  
3228 3385  G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3229 3386                                 ConcurrentMark* cm,
3230 3387                                 CMTask* task)
3231 3388    : _g1h(g1h), _cm(cm), _task(task) {
3232 3389    assert(_ref_processor == NULL, "should be initialized to NULL");
3233 3390  
3234 3391    if (G1UseConcMarkReferenceProcessing) {
3235 3392      _ref_processor = g1h->ref_processor_cm();
3236 3393      assert(_ref_processor != NULL, "should not be NULL");
3237 3394    }
3238 3395  }
3239 3396  
3240 3397  void CMTask::setup_for_region(HeapRegion* hr) {
3241 3398    // Separated the asserts so that we know which one fires.
3242 3399    assert(hr != NULL,
3243 3400          "claim_region() should have filtered out continues humongous regions");
3244 3401    assert(!hr->continuesHumongous(),
3245 3402          "claim_region() should have filtered out continues humongous regions");
3246 3403  
3247 3404    if (_cm->verbose_low()) {
3248 3405      gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3249 3406                             _worker_id, hr);
3250 3407    }
3251 3408  
3252 3409    _curr_region  = hr;
3253 3410    _finger       = hr->bottom();
3254 3411    update_region_limit();
3255 3412  }
3256 3413  
3257 3414  void CMTask::update_region_limit() {
3258 3415    HeapRegion* hr            = _curr_region;
3259 3416    HeapWord* bottom          = hr->bottom();
3260 3417    HeapWord* limit           = hr->next_top_at_mark_start();
3261 3418  
3262 3419    if (limit == bottom) {
3263 3420      if (_cm->verbose_low()) {
3264 3421        gclog_or_tty->print_cr("[%u] found an empty region "
3265 3422                               "["PTR_FORMAT", "PTR_FORMAT")",
3266 3423                               _worker_id, bottom, limit);
3267 3424      }
3268 3425      // The region was collected underneath our feet.
3269 3426      // We set the finger to bottom to ensure that the bitmap
3270 3427      // iteration that will follow this will not do anything.
3271 3428      // (this is not a condition that holds when we set the region up,
3272 3429      // as the region is not supposed to be empty in the first place)
3273 3430      _finger = bottom;
3274 3431    } else if (limit >= _region_limit) {
3275 3432      assert(limit >= _finger, "peace of mind");
3276 3433    } else {
3277 3434      assert(limit < _region_limit, "only way to get here");
3278 3435      // This can happen under some pretty unusual circumstances.  An
3279 3436      // evacuation pause empties the region underneath our feet (NTAMS
3280 3437      // at bottom). We then do some allocation in the region (NTAMS
3281 3438      // stays at bottom), followed by the region being used as a GC
3282 3439      // alloc region (NTAMS will move to top() and the objects
3283 3440      // originally below it will be grayed). All objects now marked in
3284 3441      // the region are explicitly grayed, if below the global finger,
3285 3442      // and we do not need in fact to scan anything else. So, we simply
3286 3443      // set _finger to be limit to ensure that the bitmap iteration
3287 3444      // doesn't do anything.
3288 3445      _finger = limit;
3289 3446    }
3290 3447  
3291 3448    _region_limit = limit;
3292 3449  }
3293 3450  
3294 3451  void CMTask::giveup_current_region() {
3295 3452    assert(_curr_region != NULL, "invariant");
3296 3453    if (_cm->verbose_low()) {
3297 3454      gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3298 3455                             _worker_id, _curr_region);
3299 3456    }
3300 3457    clear_region_fields();
3301 3458  }
3302 3459  
3303 3460  void CMTask::clear_region_fields() {
3304 3461    // Values for these three fields that indicate that we're not
3305 3462    // holding on to a region.
3306 3463    _curr_region   = NULL;
3307 3464    _finger        = NULL;
3308 3465    _region_limit  = NULL;
3309 3466  }
3310 3467  
3311 3468  void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3312 3469    if (cm_oop_closure == NULL) {
3313 3470      assert(_cm_oop_closure != NULL, "invariant");
3314 3471    } else {
3315 3472      assert(_cm_oop_closure == NULL, "invariant");
3316 3473    }
3317 3474    _cm_oop_closure = cm_oop_closure;
3318 3475  }
3319 3476  
3320 3477  void CMTask::reset(CMBitMap* nextMarkBitMap) {
3321 3478    guarantee(nextMarkBitMap != NULL, "invariant");
3322 3479  
3323 3480    if (_cm->verbose_low()) {
3324 3481      gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3325 3482    }
3326 3483  
3327 3484    _nextMarkBitMap                = nextMarkBitMap;
3328 3485    clear_region_fields();
3329 3486  
3330 3487    _calls                         = 0;
3331 3488    _elapsed_time_ms               = 0.0;
3332 3489    _termination_time_ms           = 0.0;
3333 3490    _termination_start_time_ms     = 0.0;
3334 3491  
3335 3492  #if _MARKING_STATS_
3336 3493    _local_pushes                  = 0;
3337 3494    _local_pops                    = 0;
3338 3495    _local_max_size                = 0;
3339 3496    _objs_scanned                  = 0;
3340 3497    _global_pushes                 = 0;
3341 3498    _global_pops                   = 0;
3342 3499    _global_max_size               = 0;
3343 3500    _global_transfers_to           = 0;
3344 3501    _global_transfers_from         = 0;
3345 3502    _regions_claimed               = 0;
3346 3503    _objs_found_on_bitmap          = 0;
3347 3504    _satb_buffers_processed        = 0;
3348 3505    _steal_attempts                = 0;
3349 3506    _steals                        = 0;
3350 3507    _aborted                       = 0;
3351 3508    _aborted_overflow              = 0;
3352 3509    _aborted_cm_aborted            = 0;
3353 3510    _aborted_yield                 = 0;
3354 3511    _aborted_timed_out             = 0;
3355 3512    _aborted_satb                  = 0;
3356 3513    _aborted_termination           = 0;
3357 3514  #endif // _MARKING_STATS_
3358 3515  }
3359 3516  
3360 3517  bool CMTask::should_exit_termination() {
3361 3518    regular_clock_call();
3362 3519    // This is called when we are in the termination protocol. We should
3363 3520    // quit if, for some reason, this task wants to abort or the global
3364 3521    // stack is not empty (this means that we can get work from it).
3365 3522    return !_cm->mark_stack_empty() || has_aborted();
3366 3523  }
3367 3524  
3368 3525  void CMTask::reached_limit() {
3369 3526    assert(_words_scanned >= _words_scanned_limit ||
3370 3527           _refs_reached >= _refs_reached_limit ,
3371 3528           "shouldn't have been called otherwise");
3372 3529    regular_clock_call();
3373 3530  }
3374 3531  
3375 3532  void CMTask::regular_clock_call() {
3376 3533    if (has_aborted()) return;
3377 3534  
3378 3535    // First, we need to recalculate the words scanned and refs reached
3379 3536    // limits for the next clock call.
3380 3537    recalculate_limits();
3381 3538  
3382 3539    // During the regular clock call we do the following
3383 3540  
3384 3541    // (1) If an overflow has been flagged, then we abort.
3385 3542    if (_cm->has_overflown()) {
3386 3543      set_has_aborted();
3387 3544      return;
3388 3545    }
3389 3546  
3390 3547    // If we are not concurrent (i.e. we're doing remark) we don't need
3391 3548    // to check anything else. The other steps are only needed during
3392 3549    // the concurrent marking phase.
3393 3550    if (!concurrent()) return;
3394 3551  
3395 3552    // (2) If marking has been aborted for Full GC, then we also abort.
3396 3553    if (_cm->has_aborted()) {
3397 3554      set_has_aborted();
3398 3555      statsOnly( ++_aborted_cm_aborted );
3399 3556      return;
3400 3557    }
3401 3558  
3402 3559    double curr_time_ms = os::elapsedVTime() * 1000.0;
3403 3560  
3404 3561    // (3) If marking stats are enabled, then we update the step history.
3405 3562  #if _MARKING_STATS_
3406 3563    if (_words_scanned >= _words_scanned_limit) {
3407 3564      ++_clock_due_to_scanning;
3408 3565    }
3409 3566    if (_refs_reached >= _refs_reached_limit) {
3410 3567      ++_clock_due_to_marking;
3411 3568    }
3412 3569  
3413 3570    double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3414 3571    _interval_start_time_ms = curr_time_ms;
3415 3572    _all_clock_intervals_ms.add(last_interval_ms);
3416 3573  
3417 3574    if (_cm->verbose_medium()) {
3418 3575        gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3419 3576                          "scanned = %d%s, refs reached = %d%s",
3420 3577                          _worker_id, last_interval_ms,
3421 3578                          _words_scanned,
3422 3579                          (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3423 3580                          _refs_reached,
3424 3581                          (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3425 3582    }
3426 3583  #endif // _MARKING_STATS_
3427 3584  
3428 3585    // (4) We check whether we should yield. If we have to, then we abort.
3429 3586    if (_cm->should_yield()) {
3430 3587      // We should yield. To do this we abort the task. The caller is
3431 3588      // responsible for yielding.
3432 3589      set_has_aborted();
3433 3590      statsOnly( ++_aborted_yield );
3434 3591      return;
3435 3592    }
3436 3593  
3437 3594    // (5) We check whether we've reached our time quota. If we have,
3438 3595    // then we abort.
3439 3596    double elapsed_time_ms = curr_time_ms - _start_time_ms;
3440 3597    if (elapsed_time_ms > _time_target_ms) {
3441 3598      set_has_aborted();
3442 3599      _has_timed_out = true;
3443 3600      statsOnly( ++_aborted_timed_out );
3444 3601      return;
3445 3602    }
3446 3603  
3447 3604    // (6) Finally, we check whether there are enough completed STAB
3448 3605    // buffers available for processing. If there are, we abort.
3449 3606    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3450 3607    if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3451 3608      if (_cm->verbose_low()) {
3452 3609        gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3453 3610                               _worker_id);
3454 3611      }
3455 3612      // we do need to process SATB buffers, we'll abort and restart
3456 3613      // the marking task to do so
3457 3614      set_has_aborted();
3458 3615      statsOnly( ++_aborted_satb );
3459 3616      return;
3460 3617    }
3461 3618  }
3462 3619  
3463 3620  void CMTask::recalculate_limits() {
3464 3621    _real_words_scanned_limit = _words_scanned + words_scanned_period;
3465 3622    _words_scanned_limit      = _real_words_scanned_limit;
3466 3623  
3467 3624    _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3468 3625    _refs_reached_limit       = _real_refs_reached_limit;
3469 3626  }
3470 3627  
3471 3628  void CMTask::decrease_limits() {
3472 3629    // This is called when we believe that we're going to do an infrequent
3473 3630    // operation which will increase the per byte scanned cost (i.e. move
3474 3631    // entries to/from the global stack). It basically tries to decrease the
3475 3632    // scanning limit so that the clock is called earlier.
3476 3633  
3477 3634    if (_cm->verbose_medium()) {
3478 3635      gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3479 3636    }
3480 3637  
3481 3638    _words_scanned_limit = _real_words_scanned_limit -
3482 3639      3 * words_scanned_period / 4;
3483 3640    _refs_reached_limit  = _real_refs_reached_limit -
3484 3641      3 * refs_reached_period / 4;
3485 3642  }
3486 3643  
3487 3644  void CMTask::move_entries_to_global_stack() {
3488 3645    // local array where we'll store the entries that will be popped
3489 3646    // from the local queue
3490 3647    oop buffer[global_stack_transfer_size];
3491 3648  
3492 3649    int n = 0;
3493 3650    oop obj;
3494 3651    while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3495 3652      buffer[n] = obj;
3496 3653      ++n;
3497 3654    }
3498 3655  
3499 3656    if (n > 0) {
3500 3657      // we popped at least one entry from the local queue
3501 3658  
3502 3659      statsOnly( ++_global_transfers_to; _local_pops += n );
3503 3660  
3504 3661      if (!_cm->mark_stack_push(buffer, n)) {
3505 3662        if (_cm->verbose_low()) {
3506 3663          gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3507 3664                                 _worker_id);
3508 3665        }
3509 3666        set_has_aborted();
3510 3667      } else {
3511 3668        // the transfer was successful
3512 3669  
3513 3670        if (_cm->verbose_medium()) {
3514 3671          gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3515 3672                                 _worker_id, n);
3516 3673        }
3517 3674        statsOnly( int tmp_size = _cm->mark_stack_size();
3518 3675                   if (tmp_size > _global_max_size) {
3519 3676                     _global_max_size = tmp_size;
3520 3677                   }
3521 3678                   _global_pushes += n );
3522 3679      }
3523 3680    }
3524 3681  
3525 3682    // this operation was quite expensive, so decrease the limits
3526 3683    decrease_limits();
3527 3684  }
3528 3685  
3529 3686  void CMTask::get_entries_from_global_stack() {
3530 3687    // local array where we'll store the entries that will be popped
3531 3688    // from the global stack.
3532 3689    oop buffer[global_stack_transfer_size];
3533 3690    int n;
3534 3691    _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3535 3692    assert(n <= global_stack_transfer_size,
3536 3693           "we should not pop more than the given limit");
3537 3694    if (n > 0) {
3538 3695      // yes, we did actually pop at least one entry
3539 3696  
3540 3697      statsOnly( ++_global_transfers_from; _global_pops += n );
3541 3698      if (_cm->verbose_medium()) {
3542 3699        gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3543 3700                               _worker_id, n);
3544 3701      }
3545 3702      for (int i = 0; i < n; ++i) {
3546 3703        bool success = _task_queue->push(buffer[i]);
3547 3704        // We only call this when the local queue is empty or under a
3548 3705        // given target limit. So, we do not expect this push to fail.
3549 3706        assert(success, "invariant");
3550 3707      }
3551 3708  
3552 3709      statsOnly( int tmp_size = _task_queue->size();
3553 3710                 if (tmp_size > _local_max_size) {
3554 3711                   _local_max_size = tmp_size;
3555 3712                 }
3556 3713                 _local_pushes += n );
3557 3714    }
3558 3715  
3559 3716    // this operation was quite expensive, so decrease the limits
3560 3717    decrease_limits();
3561 3718  }
3562 3719  
3563 3720  void CMTask::drain_local_queue(bool partially) {
3564 3721    if (has_aborted()) return;
3565 3722  
3566 3723    // Decide what the target size is, depending whether we're going to
3567 3724    // drain it partially (so that other tasks can steal if they run out
3568 3725    // of things to do) or totally (at the very end).
3569 3726    size_t target_size;
3570 3727    if (partially) {
3571 3728      target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3572 3729    } else {
3573 3730      target_size = 0;
3574 3731    }
3575 3732  
3576 3733    if (_task_queue->size() > target_size) {
3577 3734      if (_cm->verbose_high()) {
3578 3735        gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3579 3736                               _worker_id, target_size);
3580 3737      }
3581 3738  
3582 3739      oop obj;
3583 3740      bool ret = _task_queue->pop_local(obj);
3584 3741      while (ret) {
3585 3742        statsOnly( ++_local_pops );
3586 3743  
3587 3744        if (_cm->verbose_high()) {
3588 3745          gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3589 3746                                 (void*) obj);
3590 3747        }
3591 3748  
3592 3749        assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3593 3750        assert(!_g1h->is_on_master_free_list(
3594 3751                    _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3595 3752  
3596 3753        scan_object(obj);
3597 3754  
3598 3755        if (_task_queue->size() <= target_size || has_aborted()) {
3599 3756          ret = false;
3600 3757        } else {
3601 3758          ret = _task_queue->pop_local(obj);
3602 3759        }
3603 3760      }
3604 3761  
3605 3762      if (_cm->verbose_high()) {
3606 3763        gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3607 3764                               _worker_id, _task_queue->size());
3608 3765      }
3609 3766    }
3610 3767  }
3611 3768  
3612 3769  void CMTask::drain_global_stack(bool partially) {
3613 3770    if (has_aborted()) return;
3614 3771  
3615 3772    // We have a policy to drain the local queue before we attempt to
3616 3773    // drain the global stack.
3617 3774    assert(partially || _task_queue->size() == 0, "invariant");
3618 3775  
3619 3776    // Decide what the target size is, depending whether we're going to
3620 3777    // drain it partially (so that other tasks can steal if they run out
3621 3778    // of things to do) or totally (at the very end).  Notice that,
3622 3779    // because we move entries from the global stack in chunks or
3623 3780    // because another task might be doing the same, we might in fact
3624 3781    // drop below the target. But, this is not a problem.
3625 3782    size_t target_size;
3626 3783    if (partially) {
3627 3784      target_size = _cm->partial_mark_stack_size_target();
3628 3785    } else {
3629 3786      target_size = 0;
3630 3787    }
3631 3788  
3632 3789    if (_cm->mark_stack_size() > target_size) {
3633 3790      if (_cm->verbose_low()) {
3634 3791        gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3635 3792                               _worker_id, target_size);
3636 3793      }
3637 3794  
3638 3795      while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3639 3796        get_entries_from_global_stack();
3640 3797        drain_local_queue(partially);
3641 3798      }
3642 3799  
3643 3800      if (_cm->verbose_low()) {
3644 3801        gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3645 3802                               _worker_id, _cm->mark_stack_size());
3646 3803      }
3647 3804    }
3648 3805  }
3649 3806  
3650 3807  // SATB Queue has several assumptions on whether to call the par or
3651 3808  // non-par versions of the methods. this is why some of the code is
3652 3809  // replicated. We should really get rid of the single-threaded version
3653 3810  // of the code to simplify things.
3654 3811  void CMTask::drain_satb_buffers() {
3655 3812    if (has_aborted()) return;
3656 3813  
3657 3814    // We set this so that the regular clock knows that we're in the
3658 3815    // middle of draining buffers and doesn't set the abort flag when it
3659 3816    // notices that SATB buffers are available for draining. It'd be
3660 3817    // very counter productive if it did that. :-)
3661 3818    _draining_satb_buffers = true;
3662 3819  
3663 3820    CMObjectClosure oc(this);
3664 3821    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3665 3822    if (G1CollectedHeap::use_parallel_gc_threads()) {
3666 3823      satb_mq_set.set_par_closure(_worker_id, &oc);
3667 3824    } else {
3668 3825      satb_mq_set.set_closure(&oc);
3669 3826    }
3670 3827  
3671 3828    // This keeps claiming and applying the closure to completed buffers
3672 3829    // until we run out of buffers or we need to abort.
3673 3830    if (G1CollectedHeap::use_parallel_gc_threads()) {
3674 3831      while (!has_aborted() &&
3675 3832             satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3676 3833        if (_cm->verbose_medium()) {
3677 3834          gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3678 3835        }
3679 3836        statsOnly( ++_satb_buffers_processed );
3680 3837        regular_clock_call();
3681 3838      }
3682 3839    } else {
3683 3840      while (!has_aborted() &&
3684 3841             satb_mq_set.apply_closure_to_completed_buffer()) {
3685 3842        if (_cm->verbose_medium()) {
3686 3843          gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3687 3844        }
3688 3845        statsOnly( ++_satb_buffers_processed );
3689 3846        regular_clock_call();
3690 3847      }
3691 3848    }
3692 3849  
3693 3850    if (!concurrent() && !has_aborted()) {
3694 3851      // We should only do this during remark.
3695 3852      if (G1CollectedHeap::use_parallel_gc_threads()) {
3696 3853        satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3697 3854      } else {
3698 3855        satb_mq_set.iterate_closure_all_threads();
3699 3856      }
3700 3857    }
3701 3858  
3702 3859    _draining_satb_buffers = false;
3703 3860  
3704 3861    assert(has_aborted() ||
3705 3862           concurrent() ||
3706 3863           satb_mq_set.completed_buffers_num() == 0, "invariant");
3707 3864  
3708 3865    if (G1CollectedHeap::use_parallel_gc_threads()) {
3709 3866      satb_mq_set.set_par_closure(_worker_id, NULL);
3710 3867    } else {
3711 3868      satb_mq_set.set_closure(NULL);
3712 3869    }
3713 3870  
3714 3871    // again, this was a potentially expensive operation, decrease the
3715 3872    // limits to get the regular clock call early
3716 3873    decrease_limits();
3717 3874  }
3718 3875  
3719 3876  void CMTask::print_stats() {
3720 3877    gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3721 3878                           _worker_id, _calls);
3722 3879    gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3723 3880                           _elapsed_time_ms, _termination_time_ms);
3724 3881    gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3725 3882                           _step_times_ms.num(), _step_times_ms.avg(),
3726 3883                           _step_times_ms.sd());
3727 3884    gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3728 3885                           _step_times_ms.maximum(), _step_times_ms.sum());
3729 3886  
3730 3887  #if _MARKING_STATS_
3731 3888    gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3732 3889                           _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3733 3890                           _all_clock_intervals_ms.sd());
3734 3891    gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3735 3892                           _all_clock_intervals_ms.maximum(),
3736 3893                           _all_clock_intervals_ms.sum());
3737 3894    gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3738 3895                           _clock_due_to_scanning, _clock_due_to_marking);
3739 3896    gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3740 3897                           _objs_scanned, _objs_found_on_bitmap);
3741 3898    gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3742 3899                           _local_pushes, _local_pops, _local_max_size);
3743 3900    gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3744 3901                           _global_pushes, _global_pops, _global_max_size);
3745 3902    gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3746 3903                           _global_transfers_to,_global_transfers_from);
3747 3904    gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3748 3905    gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3749 3906    gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3750 3907                           _steal_attempts, _steals);
3751 3908    gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3752 3909    gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3753 3910                           _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3754 3911    gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3755 3912                           _aborted_timed_out, _aborted_satb, _aborted_termination);
3756 3913  #endif // _MARKING_STATS_
3757 3914  }
3758 3915  
3759 3916  /*****************************************************************************
3760 3917  
3761 3918      The do_marking_step(time_target_ms) method is the building block
3762 3919      of the parallel marking framework. It can be called in parallel
3763 3920      with other invocations of do_marking_step() on different tasks
3764 3921      (but only one per task, obviously) and concurrently with the
3765 3922      mutator threads, or during remark, hence it eliminates the need
3766 3923      for two versions of the code. When called during remark, it will
3767 3924      pick up from where the task left off during the concurrent marking
3768 3925      phase. Interestingly, tasks are also claimable during evacuation
3769 3926      pauses too, since do_marking_step() ensures that it aborts before
3770 3927      it needs to yield.
3771 3928  
3772 3929      The data structures that is uses to do marking work are the
3773 3930      following:
3774 3931  
3775 3932        (1) Marking Bitmap. If there are gray objects that appear only
3776 3933        on the bitmap (this happens either when dealing with an overflow
3777 3934        or when the initial marking phase has simply marked the roots
3778 3935        and didn't push them on the stack), then tasks claim heap
3779 3936        regions whose bitmap they then scan to find gray objects. A
3780 3937        global finger indicates where the end of the last claimed region
3781 3938        is. A local finger indicates how far into the region a task has
3782 3939        scanned. The two fingers are used to determine how to gray an
3783 3940        object (i.e. whether simply marking it is OK, as it will be
3784 3941        visited by a task in the future, or whether it needs to be also
3785 3942        pushed on a stack).
3786 3943  
3787 3944        (2) Local Queue. The local queue of the task which is accessed
3788 3945        reasonably efficiently by the task. Other tasks can steal from
3789 3946        it when they run out of work. Throughout the marking phase, a
3790 3947        task attempts to keep its local queue short but not totally
3791 3948        empty, so that entries are available for stealing by other
3792 3949        tasks. Only when there is no more work, a task will totally
3793 3950        drain its local queue.
3794 3951  
3795 3952        (3) Global Mark Stack. This handles local queue overflow. During
3796 3953        marking only sets of entries are moved between it and the local
3797 3954        queues, as access to it requires a mutex and more fine-grain
3798 3955        interaction with it which might cause contention. If it
3799 3956        overflows, then the marking phase should restart and iterate
3800 3957        over the bitmap to identify gray objects. Throughout the marking
3801 3958        phase, tasks attempt to keep the global mark stack at a small
3802 3959        length but not totally empty, so that entries are available for
3803 3960        popping by other tasks. Only when there is no more work, tasks
3804 3961        will totally drain the global mark stack.
3805 3962  
3806 3963        (4) SATB Buffer Queue. This is where completed SATB buffers are
3807 3964        made available. Buffers are regularly removed from this queue
3808 3965        and scanned for roots, so that the queue doesn't get too
3809 3966        long. During remark, all completed buffers are processed, as
3810 3967        well as the filled in parts of any uncompleted buffers.
3811 3968  
3812 3969      The do_marking_step() method tries to abort when the time target
3813 3970      has been reached. There are a few other cases when the
3814 3971      do_marking_step() method also aborts:
3815 3972  
3816 3973        (1) When the marking phase has been aborted (after a Full GC).
3817 3974  
3818 3975        (2) When a global overflow (on the global stack) has been
3819 3976        triggered. Before the task aborts, it will actually sync up with
3820 3977        the other tasks to ensure that all the marking data structures
3821 3978        (local queues, stacks, fingers etc.)  are re-initialised so that
3822 3979        when do_marking_step() completes, the marking phase can
3823 3980        immediately restart.
3824 3981  
3825 3982        (3) When enough completed SATB buffers are available. The
3826 3983        do_marking_step() method only tries to drain SATB buffers right
3827 3984        at the beginning. So, if enough buffers are available, the
3828 3985        marking step aborts and the SATB buffers are processed at
3829 3986        the beginning of the next invocation.
3830 3987  
3831 3988        (4) To yield. when we have to yield then we abort and yield
3832 3989        right at the end of do_marking_step(). This saves us from a lot
3833 3990        of hassle as, by yielding we might allow a Full GC. If this
3834 3991        happens then objects will be compacted underneath our feet, the
3835 3992        heap might shrink, etc. We save checking for this by just
3836 3993        aborting and doing the yield right at the end.
3837 3994  
3838 3995      From the above it follows that the do_marking_step() method should
3839 3996      be called in a loop (or, otherwise, regularly) until it completes.
3840 3997  
3841 3998      If a marking step completes without its has_aborted() flag being
3842 3999      true, it means it has completed the current marking phase (and
3843 4000      also all other marking tasks have done so and have all synced up).
3844 4001  
3845 4002      A method called regular_clock_call() is invoked "regularly" (in
3846 4003      sub ms intervals) throughout marking. It is this clock method that
3847 4004      checks all the abort conditions which were mentioned above and
3848 4005      decides when the task should abort. A work-based scheme is used to
3849 4006      trigger this clock method: when the number of object words the
3850 4007      marking phase has scanned or the number of references the marking
3851 4008      phase has visited reach a given limit. Additional invocations to
3852 4009      the method clock have been planted in a few other strategic places
3853 4010      too. The initial reason for the clock method was to avoid calling
3854 4011      vtime too regularly, as it is quite expensive. So, once it was in
3855 4012      place, it was natural to piggy-back all the other conditions on it
3856 4013      too and not constantly check them throughout the code.
3857 4014  
3858 4015   *****************************************************************************/
3859 4016  
3860 4017  void CMTask::do_marking_step(double time_target_ms,
3861 4018                               bool do_stealing,
3862 4019                               bool do_termination) {
3863 4020    assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3864 4021    assert(concurrent() == _cm->concurrent(), "they should be the same");
3865 4022  
3866 4023    G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3867 4024    assert(_task_queues != NULL, "invariant");
3868 4025    assert(_task_queue != NULL, "invariant");
3869 4026    assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3870 4027  
3871 4028    assert(!_claimed,
3872 4029           "only one thread should claim this task at any one time");
3873 4030  
3874 4031    // OK, this doesn't safeguard again all possible scenarios, as it is
3875 4032    // possible for two threads to set the _claimed flag at the same
3876 4033    // time. But it is only for debugging purposes anyway and it will
3877 4034    // catch most problems.
3878 4035    _claimed = true;
3879 4036  
3880 4037    _start_time_ms = os::elapsedVTime() * 1000.0;
3881 4038    statsOnly( _interval_start_time_ms = _start_time_ms );
3882 4039  
3883 4040    double diff_prediction_ms =
3884 4041      g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3885 4042    _time_target_ms = time_target_ms - diff_prediction_ms;
3886 4043  
3887 4044    // set up the variables that are used in the work-based scheme to
3888 4045    // call the regular clock method
3889 4046    _words_scanned = 0;
3890 4047    _refs_reached  = 0;
3891 4048    recalculate_limits();
3892 4049  
3893 4050    // clear all flags
3894 4051    clear_has_aborted();
3895 4052    _has_timed_out = false;
3896 4053    _draining_satb_buffers = false;
3897 4054  
3898 4055    ++_calls;
3899 4056  
3900 4057    if (_cm->verbose_low()) {
3901 4058      gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3902 4059                             "target = %1.2lfms >>>>>>>>>>",
3903 4060                             _worker_id, _calls, _time_target_ms);
3904 4061    }
3905 4062  
3906 4063    // Set up the bitmap and oop closures. Anything that uses them is
3907 4064    // eventually called from this method, so it is OK to allocate these
3908 4065    // statically.
3909 4066    CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3910 4067    G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3911 4068    set_cm_oop_closure(&cm_oop_closure);
3912 4069  
3913 4070    if (_cm->has_overflown()) {
3914 4071      // This can happen if the mark stack overflows during a GC pause
3915 4072      // and this task, after a yield point, restarts. We have to abort
3916 4073      // as we need to get into the overflow protocol which happens
3917 4074      // right at the end of this task.
3918 4075      set_has_aborted();
3919 4076    }
3920 4077  
3921 4078    // First drain any available SATB buffers. After this, we will not
3922 4079    // look at SATB buffers before the next invocation of this method.
3923 4080    // If enough completed SATB buffers are queued up, the regular clock
3924 4081    // will abort this task so that it restarts.
3925 4082    drain_satb_buffers();
3926 4083    // ...then partially drain the local queue and the global stack
3927 4084    drain_local_queue(true);
3928 4085    drain_global_stack(true);
3929 4086  
3930 4087    do {
3931 4088      if (!has_aborted() && _curr_region != NULL) {
3932 4089        // This means that we're already holding on to a region.
3933 4090        assert(_finger != NULL, "if region is not NULL, then the finger "
3934 4091               "should not be NULL either");
3935 4092  
3936 4093        // We might have restarted this task after an evacuation pause
3937 4094        // which might have evacuated the region we're holding on to
3938 4095        // underneath our feet. Let's read its limit again to make sure
3939 4096        // that we do not iterate over a region of the heap that
3940 4097        // contains garbage (update_region_limit() will also move
3941 4098        // _finger to the start of the region if it is found empty).
3942 4099        update_region_limit();
3943 4100        // We will start from _finger not from the start of the region,
3944 4101        // as we might be restarting this task after aborting half-way
3945 4102        // through scanning this region. In this case, _finger points to
3946 4103        // the address where we last found a marked object. If this is a
3947 4104        // fresh region, _finger points to start().
3948 4105        MemRegion mr = MemRegion(_finger, _region_limit);
3949 4106  
3950 4107        if (_cm->verbose_low()) {
3951 4108          gclog_or_tty->print_cr("[%u] we're scanning part "
3952 4109                                 "["PTR_FORMAT", "PTR_FORMAT") "
3953 4110                                 "of region "PTR_FORMAT,
3954 4111                                 _worker_id, _finger, _region_limit, _curr_region);
3955 4112        }
3956 4113  
3957 4114        // Let's iterate over the bitmap of the part of the
3958 4115        // region that is left.
3959 4116        if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3960 4117          // We successfully completed iterating over the region. Now,
3961 4118          // let's give up the region.
3962 4119          giveup_current_region();
3963 4120          regular_clock_call();
3964 4121        } else {
3965 4122          assert(has_aborted(), "currently the only way to do so");
3966 4123          // The only way to abort the bitmap iteration is to return
3967 4124          // false from the do_bit() method. However, inside the
3968 4125          // do_bit() method we move the _finger to point to the
3969 4126          // object currently being looked at. So, if we bail out, we
3970 4127          // have definitely set _finger to something non-null.
3971 4128          assert(_finger != NULL, "invariant");
3972 4129  
3973 4130          // Region iteration was actually aborted. So now _finger
3974 4131          // points to the address of the object we last scanned. If we
3975 4132          // leave it there, when we restart this task, we will rescan
3976 4133          // the object. It is easy to avoid this. We move the finger by
3977 4134          // enough to point to the next possible object header (the
3978 4135          // bitmap knows by how much we need to move it as it knows its
3979 4136          // granularity).
3980 4137          assert(_finger < _region_limit, "invariant");
3981 4138          HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3982 4139          // Check if bitmap iteration was aborted while scanning the last object
3983 4140          if (new_finger >= _region_limit) {
3984 4141            giveup_current_region();
3985 4142          } else {
3986 4143            move_finger_to(new_finger);
3987 4144          }
3988 4145        }
3989 4146      }
3990 4147      // At this point we have either completed iterating over the
3991 4148      // region we were holding on to, or we have aborted.
3992 4149  
3993 4150      // We then partially drain the local queue and the global stack.
3994 4151      // (Do we really need this?)
3995 4152      drain_local_queue(true);
3996 4153      drain_global_stack(true);
3997 4154  
3998 4155      // Read the note on the claim_region() method on why it might
3999 4156      // return NULL with potentially more regions available for
4000 4157      // claiming and why we have to check out_of_regions() to determine
4001 4158      // whether we're done or not.
4002 4159      while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4003 4160        // We are going to try to claim a new region. We should have
4004 4161        // given up on the previous one.
4005 4162        // Separated the asserts so that we know which one fires.
4006 4163        assert(_curr_region  == NULL, "invariant");
4007 4164        assert(_finger       == NULL, "invariant");
4008 4165        assert(_region_limit == NULL, "invariant");
4009 4166        if (_cm->verbose_low()) {
4010 4167          gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4011 4168        }
4012 4169        HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4013 4170        if (claimed_region != NULL) {
4014 4171          // Yes, we managed to claim one
4015 4172          statsOnly( ++_regions_claimed );
4016 4173  
4017 4174          if (_cm->verbose_low()) {
4018 4175            gclog_or_tty->print_cr("[%u] we successfully claimed "
4019 4176                                   "region "PTR_FORMAT,
4020 4177                                   _worker_id, claimed_region);
4021 4178          }
4022 4179  
4023 4180          setup_for_region(claimed_region);
4024 4181          assert(_curr_region == claimed_region, "invariant");
4025 4182        }
4026 4183        // It is important to call the regular clock here. It might take
4027 4184        // a while to claim a region if, for example, we hit a large
4028 4185        // block of empty regions. So we need to call the regular clock
4029 4186        // method once round the loop to make sure it's called
4030 4187        // frequently enough.
4031 4188        regular_clock_call();
4032 4189      }
4033 4190  
4034 4191      if (!has_aborted() && _curr_region == NULL) {
4035 4192        assert(_cm->out_of_regions(),
4036 4193               "at this point we should be out of regions");
4037 4194      }
4038 4195    } while ( _curr_region != NULL && !has_aborted());
4039 4196  
4040 4197    if (!has_aborted()) {
4041 4198      // We cannot check whether the global stack is empty, since other
4042 4199      // tasks might be pushing objects to it concurrently.
4043 4200      assert(_cm->out_of_regions(),
4044 4201             "at this point we should be out of regions");
4045 4202  
4046 4203      if (_cm->verbose_low()) {
4047 4204        gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4048 4205      }
4049 4206  
4050 4207      // Try to reduce the number of available SATB buffers so that
4051 4208      // remark has less work to do.
4052 4209      drain_satb_buffers();
4053 4210    }
4054 4211  
4055 4212    // Since we've done everything else, we can now totally drain the
4056 4213    // local queue and global stack.
4057 4214    drain_local_queue(false);
4058 4215    drain_global_stack(false);
4059 4216  
4060 4217    // Attempt at work stealing from other task's queues.
4061 4218    if (do_stealing && !has_aborted()) {
4062 4219      // We have not aborted. This means that we have finished all that
4063 4220      // we could. Let's try to do some stealing...
4064 4221  
4065 4222      // We cannot check whether the global stack is empty, since other
4066 4223      // tasks might be pushing objects to it concurrently.
4067 4224      assert(_cm->out_of_regions() && _task_queue->size() == 0,
4068 4225             "only way to reach here");
4069 4226  
4070 4227      if (_cm->verbose_low()) {
4071 4228        gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4072 4229      }
4073 4230  
4074 4231      while (!has_aborted()) {
4075 4232        oop obj;
4076 4233        statsOnly( ++_steal_attempts );
4077 4234  
4078 4235        if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4079 4236          if (_cm->verbose_medium()) {
4080 4237            gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4081 4238                                   _worker_id, (void*) obj);
4082 4239          }
4083 4240  
4084 4241          statsOnly( ++_steals );
4085 4242  
4086 4243          assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4087 4244                 "any stolen object should be marked");
4088 4245          scan_object(obj);
4089 4246  
4090 4247          // And since we're towards the end, let's totally drain the
4091 4248          // local queue and global stack.
4092 4249          drain_local_queue(false);
4093 4250          drain_global_stack(false);
4094 4251        } else {
4095 4252          break;
4096 4253        }
4097 4254      }
4098 4255    }
4099 4256  
4100 4257    // If we are about to wrap up and go into termination, check if we
4101 4258    // should raise the overflow flag.
4102 4259    if (do_termination && !has_aborted()) {
4103 4260      if (_cm->force_overflow()->should_force()) {
4104 4261        _cm->set_has_overflown();
4105 4262        regular_clock_call();
4106 4263      }
4107 4264    }
4108 4265  
4109 4266    // We still haven't aborted. Now, let's try to get into the
4110 4267    // termination protocol.
4111 4268    if (do_termination && !has_aborted()) {
4112 4269      // We cannot check whether the global stack is empty, since other
4113 4270      // tasks might be concurrently pushing objects on it.
4114 4271      // Separated the asserts so that we know which one fires.
4115 4272      assert(_cm->out_of_regions(), "only way to reach here");
4116 4273      assert(_task_queue->size() == 0, "only way to reach here");
4117 4274  
4118 4275      if (_cm->verbose_low()) {
4119 4276        gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4120 4277      }
4121 4278  
4122 4279      _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4123 4280      // The CMTask class also extends the TerminatorTerminator class,
4124 4281      // hence its should_exit_termination() method will also decide
4125 4282      // whether to exit the termination protocol or not.
4126 4283      bool finished = _cm->terminator()->offer_termination(this);
4127 4284      double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4128 4285      _termination_time_ms +=
4129 4286        termination_end_time_ms - _termination_start_time_ms;
4130 4287  
4131 4288      if (finished) {
4132 4289        // We're all done.
4133 4290  
4134 4291        if (_worker_id == 0) {
4135 4292          // let's allow task 0 to do this
4136 4293          if (concurrent()) {
4137 4294            assert(_cm->concurrent_marking_in_progress(), "invariant");
4138 4295            // we need to set this to false before the next
4139 4296            // safepoint. This way we ensure that the marking phase
4140 4297            // doesn't observe any more heap expansions.
4141 4298            _cm->clear_concurrent_marking_in_progress();
4142 4299          }
4143 4300        }
4144 4301  
4145 4302        // We can now guarantee that the global stack is empty, since
4146 4303        // all other tasks have finished. We separated the guarantees so
4147 4304        // that, if a condition is false, we can immediately find out
4148 4305        // which one.
4149 4306        guarantee(_cm->out_of_regions(), "only way to reach here");
4150 4307        guarantee(_cm->mark_stack_empty(), "only way to reach here");
4151 4308        guarantee(_task_queue->size() == 0, "only way to reach here");
4152 4309        guarantee(!_cm->has_overflown(), "only way to reach here");
4153 4310        guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4154 4311  
4155 4312        if (_cm->verbose_low()) {
4156 4313          gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4157 4314        }
4158 4315      } else {
4159 4316        // Apparently there's more work to do. Let's abort this task. It
4160 4317        // will restart it and we can hopefully find more things to do.
4161 4318  
4162 4319        if (_cm->verbose_low()) {
4163 4320          gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4164 4321                                 _worker_id);
4165 4322        }
4166 4323  
4167 4324        set_has_aborted();
4168 4325        statsOnly( ++_aborted_termination );
4169 4326      }
4170 4327    }
4171 4328  
4172 4329    // Mainly for debugging purposes to make sure that a pointer to the
4173 4330    // closure which was statically allocated in this frame doesn't
4174 4331    // escape it by accident.
4175 4332    set_cm_oop_closure(NULL);
4176 4333    double end_time_ms = os::elapsedVTime() * 1000.0;
4177 4334    double elapsed_time_ms = end_time_ms - _start_time_ms;
4178 4335    // Update the step history.
4179 4336    _step_times_ms.add(elapsed_time_ms);
4180 4337  
4181 4338    if (has_aborted()) {
4182 4339      // The task was aborted for some reason.
4183 4340  
4184 4341      statsOnly( ++_aborted );
4185 4342  
4186 4343      if (_has_timed_out) {
4187 4344        double diff_ms = elapsed_time_ms - _time_target_ms;
4188 4345        // Keep statistics of how well we did with respect to hitting
4189 4346        // our target only if we actually timed out (if we aborted for
4190 4347        // other reasons, then the results might get skewed).
4191 4348        _marking_step_diffs_ms.add(diff_ms);
4192 4349      }
4193 4350  
4194 4351      if (_cm->has_overflown()) {
4195 4352        // This is the interesting one. We aborted because a global
4196 4353        // overflow was raised. This means we have to restart the
4197 4354        // marking phase and start iterating over regions. However, in
4198 4355        // order to do this we have to make sure that all tasks stop
4199 4356        // what they are doing and re-initialise in a safe manner. We
4200 4357        // will achieve this with the use of two barrier sync points.
4201 4358  
4202 4359        if (_cm->verbose_low()) {
4203 4360          gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4204 4361        }
4205 4362  
4206 4363        _cm->enter_first_sync_barrier(_worker_id);
4207 4364        // When we exit this sync barrier we know that all tasks have
4208 4365        // stopped doing marking work. So, it's now safe to
4209 4366        // re-initialise our data structures. At the end of this method,
4210 4367        // task 0 will clear the global data structures.
4211 4368  
4212 4369        statsOnly( ++_aborted_overflow );
4213 4370  
4214 4371        // We clear the local state of this task...
4215 4372        clear_region_fields();
4216 4373  
4217 4374        // ...and enter the second barrier.
4218 4375        _cm->enter_second_sync_barrier(_worker_id);
4219 4376        // At this point everything has bee re-initialised and we're
4220 4377        // ready to restart.
4221 4378      }
4222 4379  
4223 4380      if (_cm->verbose_low()) {
4224 4381        gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4225 4382                               "elapsed = %1.2lfms <<<<<<<<<<",
4226 4383                               _worker_id, _time_target_ms, elapsed_time_ms);
4227 4384        if (_cm->has_aborted()) {
4228 4385          gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4229 4386                                 _worker_id);
4230 4387        }
4231 4388      }
4232 4389    } else {
4233 4390      if (_cm->verbose_low()) {
4234 4391        gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4235 4392                               "elapsed = %1.2lfms <<<<<<<<<<",
4236 4393                               _worker_id, _time_target_ms, elapsed_time_ms);
4237 4394      }
4238 4395    }
4239 4396  
4240 4397    _claimed = false;
4241 4398  }
4242 4399  
4243 4400  CMTask::CMTask(uint worker_id,
4244 4401                 ConcurrentMark* cm,
4245 4402                 size_t* marked_bytes,
4246 4403                 BitMap* card_bm,
4247 4404                 CMTaskQueue* task_queue,
4248 4405                 CMTaskQueueSet* task_queues)
4249 4406    : _g1h(G1CollectedHeap::heap()),
4250 4407      _worker_id(worker_id), _cm(cm),
4251 4408      _claimed(false),
4252 4409      _nextMarkBitMap(NULL), _hash_seed(17),
4253 4410      _task_queue(task_queue),
4254 4411      _task_queues(task_queues),
4255 4412      _cm_oop_closure(NULL),
4256 4413      _marked_bytes_array(marked_bytes),
4257 4414      _card_bm(card_bm) {
4258 4415    guarantee(task_queue != NULL, "invariant");
4259 4416    guarantee(task_queues != NULL, "invariant");
4260 4417  
4261 4418    statsOnly( _clock_due_to_scanning = 0;
4262 4419               _clock_due_to_marking  = 0 );
4263 4420  
4264 4421    _marking_step_diffs_ms.add(0.5);
4265 4422  }
4266 4423  
4267 4424  // These are formatting macros that are used below to ensure
4268 4425  // consistent formatting. The *_H_* versions are used to format the
4269 4426  // header for a particular value and they should be kept consistent
4270 4427  // with the corresponding macro. Also note that most of the macros add
4271 4428  // the necessary white space (as a prefix) which makes them a bit
4272 4429  // easier to compose.
4273 4430  
4274 4431  // All the output lines are prefixed with this string to be able to
4275 4432  // identify them easily in a large log file.
4276 4433  #define G1PPRL_LINE_PREFIX            "###"
4277 4434  
4278 4435  #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4279 4436  #ifdef _LP64
4280 4437  #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4281 4438  #else // _LP64
4282 4439  #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4283 4440  #endif // _LP64
4284 4441  
4285 4442  // For per-region info
4286 4443  #define G1PPRL_TYPE_FORMAT            "   %-4s"
4287 4444  #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4288 4445  #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4289 4446  #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4290 4447  #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4291 4448  #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4292 4449  
4293 4450  // For summary info
4294 4451  #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4295 4452  #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4296 4453  #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4297 4454  #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4298 4455  
4299 4456  G1PrintRegionLivenessInfoClosure::
4300 4457  G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4301 4458    : _out(out),
4302 4459      _total_used_bytes(0), _total_capacity_bytes(0),
4303 4460      _total_prev_live_bytes(0), _total_next_live_bytes(0),
4304 4461      _hum_used_bytes(0), _hum_capacity_bytes(0),
4305 4462      _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4306 4463    G1CollectedHeap* g1h = G1CollectedHeap::heap();
4307 4464    MemRegion g1_committed = g1h->g1_committed();
4308 4465    MemRegion g1_reserved = g1h->g1_reserved();
4309 4466    double now = os::elapsedTime();
4310 4467  
4311 4468    // Print the header of the output.
4312 4469    _out->cr();
4313 4470    _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4314 4471    _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4315 4472                   G1PPRL_SUM_ADDR_FORMAT("committed")
4316 4473                   G1PPRL_SUM_ADDR_FORMAT("reserved")
4317 4474                   G1PPRL_SUM_BYTE_FORMAT("region-size"),
4318 4475                   g1_committed.start(), g1_committed.end(),
4319 4476                   g1_reserved.start(), g1_reserved.end(),
4320 4477                   HeapRegion::GrainBytes);
4321 4478    _out->print_cr(G1PPRL_LINE_PREFIX);
4322 4479    _out->print_cr(G1PPRL_LINE_PREFIX
4323 4480                   G1PPRL_TYPE_H_FORMAT
4324 4481                   G1PPRL_ADDR_BASE_H_FORMAT
4325 4482                   G1PPRL_BYTE_H_FORMAT
4326 4483                   G1PPRL_BYTE_H_FORMAT
4327 4484                   G1PPRL_BYTE_H_FORMAT
4328 4485                   G1PPRL_DOUBLE_H_FORMAT,
4329 4486                   "type", "address-range",
4330 4487                   "used", "prev-live", "next-live", "gc-eff");
4331 4488    _out->print_cr(G1PPRL_LINE_PREFIX
4332 4489                   G1PPRL_TYPE_H_FORMAT
4333 4490                   G1PPRL_ADDR_BASE_H_FORMAT
4334 4491                   G1PPRL_BYTE_H_FORMAT
4335 4492                   G1PPRL_BYTE_H_FORMAT
4336 4493                   G1PPRL_BYTE_H_FORMAT
4337 4494                   G1PPRL_DOUBLE_H_FORMAT,
4338 4495                   "", "",
4339 4496                   "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4340 4497  }
4341 4498  
4342 4499  // It takes as a parameter a reference to one of the _hum_* fields, it
4343 4500  // deduces the corresponding value for a region in a humongous region
4344 4501  // series (either the region size, or what's left if the _hum_* field
4345 4502  // is < the region size), and updates the _hum_* field accordingly.
4346 4503  size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4347 4504    size_t bytes = 0;
4348 4505    // The > 0 check is to deal with the prev and next live bytes which
4349 4506    // could be 0.
4350 4507    if (*hum_bytes > 0) {
4351 4508      bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4352 4509      *hum_bytes -= bytes;
4353 4510    }
4354 4511    return bytes;
4355 4512  }
4356 4513  
4357 4514  // It deduces the values for a region in a humongous region series
4358 4515  // from the _hum_* fields and updates those accordingly. It assumes
4359 4516  // that that _hum_* fields have already been set up from the "starts
4360 4517  // humongous" region and we visit the regions in address order.
4361 4518  void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4362 4519                                                       size_t* capacity_bytes,
4363 4520                                                       size_t* prev_live_bytes,
4364 4521                                                       size_t* next_live_bytes) {
4365 4522    assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4366 4523    *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4367 4524    *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4368 4525    *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4369 4526    *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4370 4527  }
4371 4528  
4372 4529  bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4373 4530    const char* type = "";
4374 4531    HeapWord* bottom       = r->bottom();
4375 4532    HeapWord* end          = r->end();
4376 4533    size_t capacity_bytes  = r->capacity();
4377 4534    size_t used_bytes      = r->used();
4378 4535    size_t prev_live_bytes = r->live_bytes();
4379 4536    size_t next_live_bytes = r->next_live_bytes();
4380 4537    double gc_eff          = r->gc_efficiency();
4381 4538    if (r->used() == 0) {
4382 4539      type = "FREE";
4383 4540    } else if (r->is_survivor()) {
4384 4541      type = "SURV";
4385 4542    } else if (r->is_young()) {
4386 4543      type = "EDEN";
4387 4544    } else if (r->startsHumongous()) {
4388 4545      type = "HUMS";
4389 4546  
4390 4547      assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4391 4548             _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4392 4549             "they should have been zeroed after the last time we used them");
4393 4550      // Set up the _hum_* fields.
4394 4551      _hum_capacity_bytes  = capacity_bytes;
4395 4552      _hum_used_bytes      = used_bytes;
4396 4553      _hum_prev_live_bytes = prev_live_bytes;
4397 4554      _hum_next_live_bytes = next_live_bytes;
4398 4555      get_hum_bytes(&used_bytes, &capacity_bytes,
4399 4556                    &prev_live_bytes, &next_live_bytes);
4400 4557      end = bottom + HeapRegion::GrainWords;
4401 4558    } else if (r->continuesHumongous()) {
4402 4559      type = "HUMC";
4403 4560      get_hum_bytes(&used_bytes, &capacity_bytes,
4404 4561                    &prev_live_bytes, &next_live_bytes);
4405 4562      assert(end == bottom + HeapRegion::GrainWords, "invariant");
4406 4563    } else {
4407 4564      type = "OLD";
4408 4565    }
4409 4566  
4410 4567    _total_used_bytes      += used_bytes;
4411 4568    _total_capacity_bytes  += capacity_bytes;
4412 4569    _total_prev_live_bytes += prev_live_bytes;
4413 4570    _total_next_live_bytes += next_live_bytes;
4414 4571  
4415 4572    // Print a line for this particular region.
4416 4573    _out->print_cr(G1PPRL_LINE_PREFIX
4417 4574                   G1PPRL_TYPE_FORMAT
4418 4575                   G1PPRL_ADDR_BASE_FORMAT
4419 4576                   G1PPRL_BYTE_FORMAT
4420 4577                   G1PPRL_BYTE_FORMAT
4421 4578                   G1PPRL_BYTE_FORMAT
4422 4579                   G1PPRL_DOUBLE_FORMAT,
4423 4580                   type, bottom, end,
4424 4581                   used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4425 4582  
4426 4583    return false;
4427 4584  }
4428 4585  
4429 4586  G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4430 4587    // Print the footer of the output.
4431 4588    _out->print_cr(G1PPRL_LINE_PREFIX);
4432 4589    _out->print_cr(G1PPRL_LINE_PREFIX
4433 4590                   " SUMMARY"
4434 4591                   G1PPRL_SUM_MB_FORMAT("capacity")
4435 4592                   G1PPRL_SUM_MB_PERC_FORMAT("used")
4436 4593                   G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4437 4594                   G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4438 4595                   bytes_to_mb(_total_capacity_bytes),
4439 4596                   bytes_to_mb(_total_used_bytes),
4440 4597                   perc(_total_used_bytes, _total_capacity_bytes),
4441 4598                   bytes_to_mb(_total_prev_live_bytes),
4442 4599                   perc(_total_prev_live_bytes, _total_capacity_bytes),
4443 4600                   bytes_to_mb(_total_next_live_bytes),
4444 4601                   perc(_total_next_live_bytes, _total_capacity_bytes));
4445 4602    _out->cr();
4446 4603  }

↓ open down ↓

1583 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX