openjdk-dev Wdiff src/hotspot/share/gc/g1/g1ConcurrentMark.cpp

Print this page

Abort concurrent mark

Split	Close
Expand all
Collapse all

          --- old/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
          +++ new/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp

   1    1  /*
   2    2   * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "classfile/classLoaderDataGraph.hpp"
  27   27  #include "code/codeCache.hpp"
  28   28  #include "gc/g1/g1BarrierSet.hpp"
  29   29  #include "gc/g1/g1CollectedHeap.inline.hpp"
  30   30  #include "gc/g1/g1CollectorState.hpp"
  31   31  #include "gc/g1/g1ConcurrentMark.inline.hpp"
  32   32  #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  33   33  #include "gc/g1/g1DirtyCardQueue.hpp"
  34   34  #include "gc/g1/g1HeapVerifier.hpp"
  35   35  #include "gc/g1/g1OopClosures.inline.hpp"
  36   36  #include "gc/g1/g1Policy.hpp"
  37   37  #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  38   38  #include "gc/g1/g1StringDedup.hpp"
  39   39  #include "gc/g1/g1ThreadLocalData.hpp"
  40   40  #include "gc/g1/g1Trace.hpp"
  41   41  #include "gc/g1/heapRegion.inline.hpp"
  42   42  #include "gc/g1/heapRegionRemSet.hpp"
  43   43  #include "gc/g1/heapRegionSet.inline.hpp"
  44   44  #include "gc/shared/gcId.hpp"
  45   45  #include "gc/shared/gcTimer.hpp"
  46   46  #include "gc/shared/gcTraceTime.inline.hpp"
  47   47  #include "gc/shared/gcVMOperations.hpp"
  48   48  #include "gc/shared/genOopClosures.inline.hpp"
  49   49  #include "gc/shared/referencePolicy.hpp"
  50   50  #include "gc/shared/strongRootsScope.hpp"
  51   51  #include "gc/shared/suspendibleThreadSet.hpp"
  52   52  #include "gc/shared/taskTerminator.hpp"
  53   53  #include "gc/shared/taskqueue.inline.hpp"
  54   54  #include "gc/shared/weakProcessor.inline.hpp"
  55   55  #include "gc/shared/workerPolicy.hpp"
  56   56  #include "include/jvm.h"
  57   57  #include "logging/log.hpp"
  58   58  #include "memory/allocation.hpp"
  59   59  #include "memory/iterator.hpp"
  60   60  #include "memory/resourceArea.hpp"
  61   61  #include "memory/universe.hpp"
  62   62  #include "oops/access.inline.hpp"
  63   63  #include "oops/oop.inline.hpp"
  64   64  #include "runtime/atomic.hpp"
  65   65  #include "runtime/handles.inline.hpp"
  66   66  #include "runtime/java.hpp"
  67   67  #include "runtime/orderAccess.hpp"
  68   68  #include "runtime/prefetch.inline.hpp"
  69   69  #include "services/memTracker.hpp"
  70   70  #include "utilities/align.hpp"
  71   71  #include "utilities/growableArray.hpp"
  72   72  
  73   73  bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  74   74    assert(addr < _cm->finger(), "invariant");
  75   75    assert(addr >= _task->finger(), "invariant");
  76   76  
  77   77    // We move that task's local finger along.
  78   78    _task->move_finger_to(addr);
  79   79  
  80   80    _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  81   81    // we only partially drain the local queue and global stack
  82   82    _task->drain_local_queue(true);
  83   83    _task->drain_global_stack(true);
  84   84  
  85   85    // if the has_aborted flag has been raised, we need to bail out of
  86   86    // the iteration
  87   87    return !_task->has_aborted();
  88   88  }
  89   89  
  90   90  G1CMMarkStack::G1CMMarkStack() :
  91   91    _max_chunk_capacity(0),
  92   92    _base(NULL),
  93   93    _chunk_capacity(0) {
  94   94    set_empty();
  95   95  }
  96   96  
  97   97  bool G1CMMarkStack::resize(size_t new_capacity) {
  98   98    assert(is_empty(), "Only resize when stack is empty.");
  99   99    assert(new_capacity <= _max_chunk_capacity,
 100  100           "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
 101  101  
 102  102    TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
 103  103  
 104  104    if (new_base == NULL) {
 105  105      log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 106  106      return false;
 107  107    }
 108  108    // Release old mapping.
 109  109    if (_base != NULL) {
 110  110      MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 111  111    }
 112  112  
 113  113    _base = new_base;
 114  114    _chunk_capacity = new_capacity;
 115  115    set_empty();
 116  116  
 117  117    return true;
 118  118  }
 119  119  
 120  120  size_t G1CMMarkStack::capacity_alignment() {
 121  121    return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 122  122  }
 123  123  
 124  124  bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 125  125    guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 126  126  
 127  127    size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 128  128  
 129  129    _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 130  130    size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 131  131  
 132  132    guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 133  133              "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 134  134              _max_chunk_capacity,
 135  135              initial_chunk_capacity);
 136  136  
 137  137    log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 138  138                  initial_chunk_capacity, _max_chunk_capacity);
 139  139  
 140  140    return resize(initial_chunk_capacity);
 141  141  }
 142  142  
 143  143  void G1CMMarkStack::expand() {
 144  144    if (_chunk_capacity == _max_chunk_capacity) {
 145  145      log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 146  146      return;
 147  147    }
 148  148    size_t old_capacity = _chunk_capacity;
 149  149    // Double capacity if possible
 150  150    size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 151  151  
 152  152    if (resize(new_capacity)) {
 153  153      log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 154  154                    old_capacity, new_capacity);
 155  155    } else {
 156  156      log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 157  157                      old_capacity, new_capacity);
 158  158    }
 159  159  }
 160  160  
 161  161  G1CMMarkStack::~G1CMMarkStack() {
 162  162    if (_base != NULL) {
 163  163      MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 164  164    }
 165  165  }
 166  166  
 167  167  void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 168  168    elem->next = *list;
 169  169    *list = elem;
 170  170  }
 171  171  
 172  172  void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 173  173    MutexLocker x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 174  174    add_chunk_to_list(&_chunk_list, elem);
 175  175    _chunks_in_chunk_list++;
 176  176  }
 177  177  
 178  178  void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 179  179    MutexLocker x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 180  180    add_chunk_to_list(&_free_list, elem);
 181  181  }
 182  182  
 183  183  G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 184  184    TaskQueueEntryChunk* result = *list;
 185  185    if (result != NULL) {
 186  186      *list = (*list)->next;
 187  187    }
 188  188    return result;
 189  189  }
 190  190  
 191  191  G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 192  192    MutexLocker x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 193  193    TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 194  194    if (result != NULL) {
 195  195      _chunks_in_chunk_list--;
 196  196    }
 197  197    return result;
 198  198  }
 199  199  
 200  200  G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 201  201    MutexLocker x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 202  202    return remove_chunk_from_list(&_free_list);
 203  203  }
 204  204  
 205  205  G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 206  206    // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 207  207    // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 208  208    // wraparound of _hwm.
 209  209    if (_hwm >= _chunk_capacity) {
 210  210      return NULL;
 211  211    }
 212  212  
 213  213    size_t cur_idx = Atomic::fetch_and_add(&_hwm, 1u);
 214  214    if (cur_idx >= _chunk_capacity) {
 215  215      return NULL;
 216  216    }
 217  217  
 218  218    TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 219  219    result->next = NULL;
 220  220    return result;
 221  221  }
 222  222  
 223  223  bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 224  224    // Get a new chunk.
 225  225    TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 226  226  
 227  227    if (new_chunk == NULL) {
 228  228      // Did not get a chunk from the free list. Allocate from backing memory.
 229  229      new_chunk = allocate_new_chunk();
 230  230  
 231  231      if (new_chunk == NULL) {
 232  232        return false;
 233  233      }
 234  234    }
 235  235  
 236  236    Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 237  237  
 238  238    add_chunk_to_chunk_list(new_chunk);
 239  239  
 240  240    return true;
 241  241  }
 242  242  
 243  243  bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 244  244    TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 245  245  
 246  246    if (cur == NULL) {
 247  247      return false;
 248  248    }
 249  249  
 250  250    Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 251  251  
 252  252    add_chunk_to_free_list(cur);
 253  253    return true;
 254  254  }
 255  255  
 256  256  void G1CMMarkStack::set_empty() {
 257  257    _chunks_in_chunk_list = 0;
 258  258    _hwm = 0;
 259  259    _chunk_list = NULL;
 260  260    _free_list = NULL;
 261  261  }
 262  262  
 263  263  G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 264  264      _root_regions(MemRegion::create_array(max_regions, mtGC)),
 265  265      _max_regions(max_regions),
 266  266      _num_root_regions(0),
 267  267      _claimed_root_regions(0),
 268  268      _scan_in_progress(false),
 269  269      _should_abort(false) { }
 270  270  
 271  271  G1CMRootMemRegions::~G1CMRootMemRegions() {
 272  272    MemRegion::destroy_array(_root_regions, _max_regions);
 273  273  }
 274  274  
 275  275  void G1CMRootMemRegions::reset() {
 276  276    _num_root_regions = 0;
 277  277  }
 278  278  
 279  279  void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 280  280    assert_at_safepoint();
 281  281    size_t idx = Atomic::fetch_and_add(&_num_root_regions, 1u);
 282  282    assert(idx < _max_regions, "Trying to add more root MemRegions than there is space " SIZE_FORMAT, _max_regions);
 283  283    assert(start != NULL && end != NULL && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 284  284           "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 285  285    _root_regions[idx].set_start(start);
 286  286    _root_regions[idx].set_end(end);
 287  287  }
 288  288  
 289  289  void G1CMRootMemRegions::prepare_for_scan() {
 290  290    assert(!scan_in_progress(), "pre-condition");
 291  291  
 292  292    _scan_in_progress = _num_root_regions > 0;
 293  293  
 294  294    _claimed_root_regions = 0;
 295  295    _should_abort = false;
 296  296  }
 297  297  
 298  298  const MemRegion* G1CMRootMemRegions::claim_next() {
 299  299    if (_should_abort) {
 300  300      // If someone has set the should_abort flag, we return NULL to
 301  301      // force the caller to bail out of their loop.
 302  302      return NULL;
 303  303    }
 304  304  
 305  305    if (_claimed_root_regions >= _num_root_regions) {
 306  306      return NULL;
 307  307    }
 308  308  
 309  309    size_t claimed_index = Atomic::fetch_and_add(&_claimed_root_regions, 1u);
 310  310    if (claimed_index < _num_root_regions) {
 311  311      return &_root_regions[claimed_index];
 312  312    }
 313  313    return NULL;
 314  314  }
 315  315  
 316  316  uint G1CMRootMemRegions::num_root_regions() const {
 317  317    return (uint)_num_root_regions;
 318  318  }
 319  319  
 320  320  void G1CMRootMemRegions::notify_scan_done() {
 321  321    MutexLocker x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 322  322    _scan_in_progress = false;
 323  323    RootRegionScan_lock->notify_all();
 324  324  }
 325  325  
 326  326  void G1CMRootMemRegions::cancel_scan() {
 327  327    notify_scan_done();
 328  328  }
 329  329  
 330  330  void G1CMRootMemRegions::scan_finished() {
 331  331    assert(scan_in_progress(), "pre-condition");
 332  332  
 333  333    if (!_should_abort) {
 334  334      assert(_claimed_root_regions >= num_root_regions(),
 335  335             "we should have claimed all root regions, claimed " SIZE_FORMAT ", length = %u",
 336  336             _claimed_root_regions, num_root_regions());
 337  337    }
 338  338  
 339  339    notify_scan_done();
 340  340  }
 341  341  
 342  342  bool G1CMRootMemRegions::wait_until_scan_finished() {
 343  343    if (!scan_in_progress()) {
 344  344      return false;
 345  345    }
 346  346  
 347  347    {
 348  348      MonitorLocker ml(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 349  349      while (scan_in_progress()) {
 350  350        ml.wait();
 351  351      }
 352  352    }
 353  353    return true;
 354  354  }
 355  355  
 356  356  G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 357  357                                     G1RegionToSpaceMapper* prev_bitmap_storage,
 358  358                                     G1RegionToSpaceMapper* next_bitmap_storage) :
 359  359    // _cm_thread set inside the constructor
 360  360    _g1h(g1h),
 361  361  
 362  362    _mark_bitmap_1(),
 363  363    _mark_bitmap_2(),
 364  364    _prev_mark_bitmap(&_mark_bitmap_1),
 365  365    _next_mark_bitmap(&_mark_bitmap_2),
 366  366  
 367  367    _heap(_g1h->reserved_region()),
 368  368  
 369  369    _root_regions(_g1h->max_regions()),
 370  370  
 371  371    _global_mark_stack(),
 372  372  
 373  373    // _finger set in set_non_marking_state
 374  374  
 375  375    _worker_id_offset(G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
 376  376    _max_num_tasks(ParallelGCThreads),
 377  377    // _num_active_tasks set in set_non_marking_state()

↓ open down ↓

377 lines elided

↑ open up ↑

 378  378    // _tasks set inside the constructor
 379  379  
 380  380    _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 381  381    _terminator((int) _max_num_tasks, _task_queues),
 382  382  
 383  383    _first_overflow_barrier_sync(),
 384  384    _second_overflow_barrier_sync(),
 385  385  
 386  386    _has_overflown(false),
 387  387    _concurrent(false),
 388      -  _has_aborted(false),
      388 +  _aborted_by_fullgc(false),
      389 +  _aborted_by_initial_mark(false),
 389  390    _restart_for_overflow(false),
 390  391    _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 391  392    _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 392  393  
 393  394    // _verbose_level set below
 394  395  
 395  396    _init_times(),
 396  397    _remark_times(),
 397  398    _remark_mark_times(),
 398  399    _remark_weak_ref_times(),

 399  400    _cleanup_times(),
 400  401    _total_cleanup_time(0.0),
 401  402  
 402  403    _accum_task_vtime(NULL),
 403  404  
 404  405    _concurrent_workers(NULL),
 405  406    _num_concurrent_workers(0),
 406  407    _max_concurrent_workers(0),
 407  408  
 408  409    _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)),
 409  410    _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC))
 410  411  {
 411  412    assert(CGC_lock != NULL, "CGC_lock must be initialized");
 412  413  
 413  414    _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 414  415    _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 415  416  
 416  417    // Create & start ConcurrentMark thread.
 417  418    _cm_thread = new G1ConcurrentMarkThread(this);
 418  419    if (_cm_thread->osthread() == NULL) {
 419  420      vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 420  421    }
 421  422  
 422  423    log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 423  424    log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 424  425  
 425  426    _num_concurrent_workers = ConcGCThreads;
 426  427    _max_concurrent_workers = _num_concurrent_workers;
 427  428  
 428  429    _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 429  430    _concurrent_workers->initialize_workers();
 430  431  
 431  432    if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 432  433      vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 433  434    }
 434  435  
 435  436    _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 436  437    _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 437  438  
 438  439    // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 439  440    _num_active_tasks = _max_num_tasks;
 440  441  
 441  442    for (uint i = 0; i < _max_num_tasks; ++i) {
 442  443      G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 443  444      task_queue->initialize();
 444  445      _task_queues->register_queue(i, task_queue);

↓ open down ↓

46 lines elided

↑ open up ↑

 445  446  
 446  447      _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions());
 447  448  
 448  449      _accum_task_vtime[i] = 0.0;
 449  450    }
 450  451  
 451  452    reset_at_marking_complete();
 452  453  }
 453  454  
 454  455  void G1ConcurrentMark::reset() {
 455      -  _has_aborted = false;
      456 +  _aborted_by_fullgc = false;
      457 +  _aborted_by_initial_mark = false;
 456  458  
 457  459    reset_marking_for_restart();
 458  460  
 459  461    // Reset all tasks, since different phases will use different number of active
 460  462    // threads. So, it's easiest to have all of them ready.
 461  463    for (uint i = 0; i < _max_num_tasks; ++i) {
 462  464      _tasks[i]->reset(_next_mark_bitmap);
 463  465    }
 464  466  
 465  467    uint max_regions = _g1h->max_regions();

 466  468    for (uint i = 0; i < max_regions; i++) {
 467  469      _top_at_rebuild_starts[i] = NULL;
 468  470      _region_mark_stats[i].clear();
 469  471    }
 470  472  }
 471  473  
 472  474  void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
 473  475    for (uint j = 0; j < _max_num_tasks; ++j) {
 474  476      _tasks[j]->clear_mark_stats_cache(region_idx);
 475  477    }
 476  478    _top_at_rebuild_starts[region_idx] = NULL;
 477  479    _region_mark_stats[region_idx].clear();
 478  480  }
 479  481  
 480  482  void G1ConcurrentMark::clear_statistics(HeapRegion* r) {
 481  483    uint const region_idx = r->hrm_index();
 482  484    if (r->is_humongous()) {
 483  485      assert(r->is_starts_humongous(), "Got humongous continues region here");
 484  486      uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size());
 485  487      for (uint j = region_idx; j < (region_idx + size_in_regions); j++) {
 486  488        clear_statistics_in_region(j);
 487  489      }
 488  490    } else {
 489  491      clear_statistics_in_region(region_idx);
 490  492    }
 491  493  }
 492  494  
 493  495  static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) {
 494  496    if (bitmap->is_marked(addr)) {
 495  497      bitmap->clear(addr);
 496  498    }
 497  499  }
 498  500  
 499  501  void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 500  502    assert_at_safepoint_on_vm_thread();
 501  503  
 502  504    // Need to clear all mark bits of the humongous object.
 503  505    clear_mark_if_set(_prev_mark_bitmap, r->bottom());
 504  506    clear_mark_if_set(_next_mark_bitmap, r->bottom());
 505  507  
 506  508    if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 507  509      return;
 508  510    }
 509  511  
 510  512    // Clear any statistics about the region gathered so far.
 511  513    clear_statistics(r);
 512  514  }
 513  515  
 514  516  void G1ConcurrentMark::reset_marking_for_restart() {
 515  517    _global_mark_stack.set_empty();
 516  518  
 517  519    // Expand the marking stack, if we have to and if we can.
 518  520    if (has_overflown()) {
 519  521      _global_mark_stack.expand();
 520  522  
 521  523      uint max_regions = _g1h->max_regions();
 522  524      for (uint i = 0; i < max_regions; i++) {
 523  525        _region_mark_stats[i].clear_during_overflow();
 524  526      }
 525  527    }
 526  528  
 527  529    clear_has_overflown();
 528  530    _finger = _heap.start();
 529  531  
 530  532    for (uint i = 0; i < _max_num_tasks; ++i) {
 531  533      G1CMTaskQueue* queue = _task_queues->queue(i);
 532  534      queue->set_empty();
 533  535    }
 534  536  }
 535  537  
 536  538  void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 537  539    assert(active_tasks <= _max_num_tasks, "we should not have more");
 538  540  
 539  541    _num_active_tasks = active_tasks;
 540  542    // Need to update the three data structures below according to the
 541  543    // number of active threads for this phase.
 542  544    _terminator.reset_for_reuse(active_tasks);
 543  545    _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 544  546    _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 545  547  }
 546  548  
 547  549  void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 548  550    set_concurrency(active_tasks);
 549  551  
 550  552    _concurrent = concurrent;
 551  553  
 552  554    if (!concurrent) {
 553  555      // At this point we should be in a STW phase, and completed marking.
 554  556      assert_at_safepoint_on_vm_thread();
 555  557      assert(out_of_regions(),
 556  558             "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 557  559             p2i(_finger), p2i(_heap.end()));
 558  560    }
 559  561  }
 560  562  
 561  563  void G1ConcurrentMark::reset_at_marking_complete() {
 562  564    // We set the global marking state to some default values when we're
 563  565    // not doing marking.
 564  566    reset_marking_for_restart();
 565  567    _num_active_tasks = 0;
 566  568  }
 567  569  
 568  570  G1ConcurrentMark::~G1ConcurrentMark() {
 569  571    FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 570  572    FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 571  573    // The G1ConcurrentMark instance is never freed.
 572  574    ShouldNotReachHere();
 573  575  }
 574  576  
 575  577  class G1ClearBitMapTask : public AbstractGangTask {
 576  578  public:
 577  579    static size_t chunk_size() { return M; }
 578  580  
 579  581  private:
 580  582    // Heap region closure used for clearing the given mark bitmap.
 581  583    class G1ClearBitmapHRClosure : public HeapRegionClosure {
 582  584    private:
 583  585      G1CMBitMap* _bitmap;
 584  586      G1ConcurrentMark* _cm;
 585  587    public:
 586  588      G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _bitmap(bitmap), _cm(cm) {
 587  589      }
 588  590  
 589  591      virtual bool do_heap_region(HeapRegion* r) {
 590  592        size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 591  593  
 592  594        HeapWord* cur = r->bottom();
 593  595        HeapWord* const end = r->end();
 594  596  
 595  597        while (cur < end) {
 596  598          MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 597  599          _bitmap->clear_range(mr);
 598  600  
 599  601          cur += chunk_size_in_words;
 600  602  
 601  603          // Abort iteration if after yielding the marking has been aborted.
 602  604          if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 603  605            return true;
 604  606          }
 605  607          // Repeat the asserts from before the start of the closure. We will do them
 606  608          // as asserts here to minimize their overhead on the product. However, we
 607  609          // will have them as guarantees at the beginning / end of the bitmap
 608  610          // clearing to get some checking in the product.
 609  611          assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 610  612          assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 611  613        }
 612  614        assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 613  615  
 614  616        return false;
 615  617      }
 616  618    };
 617  619  
 618  620    G1ClearBitmapHRClosure _cl;
 619  621    HeapRegionClaimer _hr_claimer;
 620  622    bool _suspendible; // If the task is suspendible, workers must join the STS.
 621  623  
 622  624  public:
 623  625    G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 624  626      AbstractGangTask("G1 Clear Bitmap"),
 625  627      _cl(bitmap, suspendible ? cm : NULL),
 626  628      _hr_claimer(n_workers),
 627  629      _suspendible(suspendible)
 628  630    { }
 629  631  
 630  632    void work(uint worker_id) {
 631  633      SuspendibleThreadSetJoiner sts_join(_suspendible);
 632  634      G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 633  635    }
 634  636  
 635  637    bool is_complete() {
 636  638      return _cl.is_complete();
 637  639    }
 638  640  };
 639  641  
 640  642  void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 641  643    assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 642  644  
 643  645    size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 644  646    size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 645  647  
 646  648    uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 647  649  
 648  650    G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 649  651  
 650  652    log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 651  653    workers->run_task(&cl, num_workers);
 652  654    guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 653  655  }
 654  656  
 655  657  void G1ConcurrentMark::cleanup_for_next_mark() {
 656  658    // Make sure that the concurrent mark thread looks to still be in
 657  659    // the current cycle.
 658  660    guarantee(cm_thread()->during_cycle(), "invariant");
 659  661  
 660  662    // We are finishing up the current cycle by clearing the next
 661  663    // marking bitmap and getting it ready for the next cycle. During
 662  664    // this time no other cycle can start. So, let's make sure that this
 663  665    // is the case.
 664  666    guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 665  667  
 666  668    clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 667  669  
 668  670    // Repeat the asserts from above.
 669  671    guarantee(cm_thread()->during_cycle(), "invariant");
 670  672    guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 671  673  }
 672  674  
 673  675  void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 674  676    assert_at_safepoint_on_vm_thread();
 675  677    clear_bitmap(_prev_mark_bitmap, workers, false);
 676  678  }
 677  679  
 678  680  class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 679  681  public:
 680  682    bool do_heap_region(HeapRegion* r) {
 681  683      r->note_start_of_marking();
 682  684      return false;
 683  685    }
 684  686  };
 685  687  
 686  688  void G1ConcurrentMark::pre_initial_mark() {
 687  689    assert_at_safepoint_on_vm_thread();
 688  690  
 689  691    // Reset marking state.
 690  692    reset();
 691  693  
 692  694    // For each region note start of marking.
 693  695    NoteStartOfMarkHRClosure startcl;
 694  696    _g1h->heap_region_iterate(&startcl);
 695  697  
 696  698    _root_regions.reset();
 697  699  }
 698  700  
 699  701  
 700  702  void G1ConcurrentMark::post_initial_mark() {
 701  703    // Start Concurrent Marking weak-reference discovery.
 702  704    ReferenceProcessor* rp = _g1h->ref_processor_cm();
 703  705    // enable ("weak") refs discovery
 704  706    rp->enable_discovery();
 705  707    rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 706  708  
 707  709    SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 708  710    // This is the start of  the marking cycle, we're expected all
 709  711    // threads to have SATB queues with active set to false.
 710  712    satb_mq_set.set_active_all_threads(true, /* new active value */
 711  713                                       false /* expected_active */);
 712  714  
 713  715    _root_regions.prepare_for_scan();
 714  716  
 715  717    // update_g1_committed() will be called at the end of an evac pause
 716  718    // when marking is on. So, it's also called at the end of the
 717  719    // initial-mark pause to update the heap end, if the heap expands
 718  720    // during it. No need to call it here.
 719  721  }
 720  722  
 721  723  /*
 722  724   * Notice that in the next two methods, we actually leave the STS
 723  725   * during the barrier sync and join it immediately afterwards. If we
 724  726   * do not do this, the following deadlock can occur: one thread could
 725  727   * be in the barrier sync code, waiting for the other thread to also
 726  728   * sync up, whereas another one could be trying to yield, while also
 727  729   * waiting for the other threads to sync up too.
 728  730   *
 729  731   * Note, however, that this code is also used during remark and in
 730  732   * this case we should not attempt to leave / enter the STS, otherwise
 731  733   * we'll either hit an assert (debug / fastdebug) or deadlock
 732  734   * (product). So we should only leave / enter the STS if we are
 733  735   * operating concurrently.
 734  736   *
 735  737   * Because the thread that does the sync barrier has left the STS, it
 736  738   * is possible to be suspended for a Full GC or an evacuation pause
 737  739   * could occur. This is actually safe, since the entering the sync
 738  740   * barrier is one of the last things do_marking_step() does, and it
 739  741   * doesn't manipulate any data structures afterwards.
 740  742   */
 741  743  
 742  744  void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 743  745    bool barrier_aborted;
 744  746    {
 745  747      SuspendibleThreadSetLeaver sts_leave(concurrent());
 746  748      barrier_aborted = !_first_overflow_barrier_sync.enter();
 747  749    }
 748  750  
 749  751    // at this point everyone should have synced up and not be doing any
 750  752    // more work
 751  753  
 752  754    if (barrier_aborted) {
 753  755      // If the barrier aborted we ignore the overflow condition and
 754  756      // just abort the whole marking phase as quickly as possible.
 755  757      return;
 756  758    }
 757  759  }
 758  760  
 759  761  void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 760  762    SuspendibleThreadSetLeaver sts_leave(concurrent());
 761  763    _second_overflow_barrier_sync.enter();
 762  764  
 763  765    // at this point everything should be re-initialized and ready to go
 764  766  }
 765  767  
 766  768  class G1CMConcurrentMarkingTask : public AbstractGangTask {
 767  769    G1ConcurrentMark*     _cm;
 768  770  
 769  771  public:
 770  772    void work(uint worker_id) {
 771  773      assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 772  774      ResourceMark rm;
 773  775  
 774  776      double start_vtime = os::elapsedVTime();
 775  777  
 776  778      {
 777  779        SuspendibleThreadSetJoiner sts_join;
 778  780  
 779  781        assert(worker_id < _cm->active_tasks(), "invariant");
 780  782  
 781  783        G1CMTask* task = _cm->task(worker_id);
 782  784        task->record_start_time();
 783  785        if (!_cm->has_aborted()) {
 784  786          do {
 785  787            task->do_marking_step(G1ConcMarkStepDurationMillis,
 786  788                                  true  /* do_termination */,
 787  789                                  false /* is_serial*/);
 788  790  
 789  791            _cm->do_yield_check();
 790  792          } while (!_cm->has_aborted() && task->has_aborted());
 791  793        }
 792  794        task->record_end_time();
 793  795        guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 794  796      }
 795  797  
 796  798      double end_vtime = os::elapsedVTime();
 797  799      _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 798  800    }
 799  801  
 800  802    G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
 801  803        AbstractGangTask("Concurrent Mark"), _cm(cm) { }
 802  804  
 803  805    ~G1CMConcurrentMarkingTask() { }
 804  806  };
 805  807  
 806  808  uint G1ConcurrentMark::calc_active_marking_workers() {
 807  809    uint result = 0;
 808  810    if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
 809  811      result = _max_concurrent_workers;
 810  812    } else {
 811  813      result =
 812  814        WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
 813  815                                                  1, /* Minimum workers */
 814  816                                                  _num_concurrent_workers,
 815  817                                                  Threads::number_of_non_daemon_threads());
 816  818      // Don't scale the result down by scale_concurrent_workers() because
 817  819      // that scaling has already gone into "_max_concurrent_workers".
 818  820    }
 819  821    assert(result > 0 && result <= _max_concurrent_workers,
 820  822           "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 821  823           _max_concurrent_workers, result);
 822  824    return result;
 823  825  }
 824  826  
 825  827  void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
 826  828  #ifdef ASSERT
 827  829    HeapWord* last = region->last();
 828  830    HeapRegion* hr = _g1h->heap_region_containing(last);
 829  831    assert(hr->is_old() || hr->next_top_at_mark_start() == hr->bottom(),
 830  832           "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
 831  833    assert(hr->next_top_at_mark_start() == region->start(),
 832  834           "MemRegion start should be equal to nTAMS");
 833  835  #endif
 834  836  
 835  837    G1RootRegionScanClosure cl(_g1h, this, worker_id);
 836  838  
 837  839    const uintx interval = PrefetchScanIntervalInBytes;
 838  840    HeapWord* curr = region->start();
 839  841    const HeapWord* end = region->end();
 840  842    while (curr < end) {
 841  843      Prefetch::read(curr, interval);
 842  844      oop obj = oop(curr);
 843  845      int size = obj->oop_iterate_size(&cl);
 844  846      assert(size == obj->size(), "sanity");
 845  847      curr += size;
 846  848    }
 847  849  }
 848  850  
 849  851  class G1CMRootRegionScanTask : public AbstractGangTask {
 850  852    G1ConcurrentMark* _cm;
 851  853  public:
 852  854    G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 853  855      AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 854  856  
 855  857    void work(uint worker_id) {
 856  858      assert(Thread::current()->is_ConcurrentGC_thread(),
 857  859             "this should only be done by a conc GC thread");
 858  860  
 859  861      G1CMRootMemRegions* root_regions = _cm->root_regions();
 860  862      const MemRegion* region = root_regions->claim_next();
 861  863      while (region != NULL) {
 862  864        _cm->scan_root_region(region, worker_id);
 863  865        region = root_regions->claim_next();
 864  866      }
 865  867    }
 866  868  };
 867  869  
 868  870  void G1ConcurrentMark::scan_root_regions() {
 869  871    // scan_in_progress() will have been set to true only if there was
 870  872    // at least one root region to scan. So, if it's false, we
 871  873    // should not attempt to do any further work.
 872  874    if (root_regions()->scan_in_progress()) {
 873  875      assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 874  876  
 875  877      _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 876  878                                     // We distribute work on a per-region basis, so starting
 877  879                                     // more threads than that is useless.
 878  880                                     root_regions()->num_root_regions());
 879  881      assert(_num_concurrent_workers <= _max_concurrent_workers,
 880  882             "Maximum number of marking threads exceeded");
 881  883  
 882  884      G1CMRootRegionScanTask task(this);
 883  885      log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 884  886                          task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 885  887      _concurrent_workers->run_task(&task, _num_concurrent_workers);
 886  888  
 887  889      // It's possible that has_aborted() is true here without actually
 888  890      // aborting the survivor scan earlier. This is OK as it's
 889  891      // mainly used for sanity checking.
 890  892      root_regions()->scan_finished();
 891  893    }
 892  894  }
 893  895  
 894  896  void G1ConcurrentMark::concurrent_cycle_start() {
 895  897    _gc_timer_cm->register_gc_start();
 896  898

↓ open down ↓

431 lines elided

↑ open up ↑

 897  899    _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 898  900  
 899  901    _g1h->trace_heap_before_gc(_gc_tracer_cm);
 900  902  }
 901  903  
 902  904  void G1ConcurrentMark::concurrent_cycle_end() {
 903  905    _g1h->collector_state()->set_clearing_next_bitmap(false);
 904  906  
 905  907    _g1h->trace_heap_after_gc(_gc_tracer_cm);
 906  908  
 907      -  if (has_aborted()) {
 908      -    log_info(gc, marking)("Concurrent Mark Abort");
      909 +  if (aborted_by_fullgc()) {
      910 +    log_info(gc, marking)("Concurrent Mark Abort due to Full GC");
 909  911      _gc_tracer_cm->report_concurrent_mode_failure();
      912 +  } else if (aborted_by_initial_mark()) {
      913 +    log_info(gc, marking)("Concurrent Mark Abort due to Humongous Reclaim");
 910  914    }
 911  915  
 912  916    _gc_timer_cm->register_gc_end();
 913  917  
 914  918    _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 915  919  }
 916  920  
 917  921  void G1ConcurrentMark::mark_from_roots() {
 918  922    _restart_for_overflow = false;
 919  923

 920  924    _num_concurrent_workers = calc_active_marking_workers();
 921  925  
 922  926    uint active_workers = MAX2(1U, _num_concurrent_workers);
 923  927  
 924  928    // Setting active workers is not guaranteed since fewer
 925  929    // worker threads may currently exist and more may not be
 926  930    // available.
 927  931    active_workers = _concurrent_workers->update_active_workers(active_workers);
 928  932    log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
 929  933  
 930  934    // Parallel task terminator is set in "set_concurrency_and_phase()"
 931  935    set_concurrency_and_phase(active_workers, true /* concurrent */);
 932  936  
 933  937    G1CMConcurrentMarkingTask marking_task(this);
 934  938    _concurrent_workers->run_task(&marking_task);
 935  939    print_stats();
 936  940  }
 937  941  
 938  942  void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) {
 939  943    G1HeapVerifier* verifier = _g1h->verifier();
 940  944  
 941  945    verifier->verify_region_sets_optional();
 942  946  
 943  947    if (VerifyDuringGC) {
 944  948      GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
 945  949  
 946  950      size_t const BufLen = 512;
 947  951      char buffer[BufLen];
 948  952  
 949  953      jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
 950  954      verifier->verify(type, vo, buffer);
 951  955    }
 952  956  
 953  957    verifier->check_bitmaps(caller);
 954  958  }
 955  959  
 956  960  class G1UpdateRemSetTrackingBeforeRebuildTask : public AbstractGangTask {
 957  961    G1CollectedHeap* _g1h;
 958  962    G1ConcurrentMark* _cm;
 959  963    HeapRegionClaimer _hrclaimer;
 960  964    uint volatile _total_selected_for_rebuild;
 961  965  
 962  966    G1PrintRegionLivenessInfoClosure _cl;
 963  967  
 964  968    class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure {
 965  969      G1CollectedHeap* _g1h;
 966  970      G1ConcurrentMark* _cm;
 967  971  
 968  972      G1PrintRegionLivenessInfoClosure* _cl;
 969  973  
 970  974      uint _num_regions_selected_for_rebuild;  // The number of regions actually selected for rebuild.
 971  975  
 972  976      void update_remset_before_rebuild(HeapRegion* hr) {
 973  977        G1RemSetTrackingPolicy* tracking_policy = _g1h->policy()->remset_tracker();
 974  978  
 975  979        bool selected_for_rebuild;
 976  980        if (hr->is_humongous()) {
 977  981          bool const is_live = _cm->liveness(hr->humongous_start_region()->hrm_index()) > 0;
 978  982          selected_for_rebuild = tracking_policy->update_humongous_before_rebuild(hr, is_live);
 979  983        } else {
 980  984          size_t const live_bytes = _cm->liveness(hr->hrm_index());
 981  985          selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes);
 982  986        }
 983  987        if (selected_for_rebuild) {
 984  988          _num_regions_selected_for_rebuild++;
 985  989        }
 986  990        _cm->update_top_at_rebuild_start(hr);
 987  991      }
 988  992  
 989  993      // Distribute the given words across the humongous object starting with hr and
 990  994      // note end of marking.
 991  995      void distribute_marked_bytes(HeapRegion* hr, size_t marked_words) {
 992  996        uint const region_idx = hr->hrm_index();
 993  997        size_t const obj_size_in_words = (size_t)oop(hr->bottom())->size();
 994  998        uint const num_regions_in_humongous = (uint)G1CollectedHeap::humongous_obj_size_in_regions(obj_size_in_words);
 995  999  
 996 1000        // "Distributing" zero words means that we only note end of marking for these
 997 1001        // regions.
 998 1002        assert(marked_words == 0 || obj_size_in_words == marked_words,
 999 1003               "Marked words should either be 0 or the same as humongous object (" SIZE_FORMAT ") but is " SIZE_FORMAT,
1000 1004               obj_size_in_words, marked_words);
1001 1005  
1002 1006        for (uint i = region_idx; i < (region_idx + num_regions_in_humongous); i++) {
1003 1007          HeapRegion* const r = _g1h->region_at(i);
1004 1008          size_t const words_to_add = MIN2(HeapRegion::GrainWords, marked_words);
1005 1009  
1006 1010          log_trace(gc, marking)("Adding " SIZE_FORMAT " words to humongous region %u (%s)",
1007 1011                                 words_to_add, i, r->get_type_str());
1008 1012          add_marked_bytes_and_note_end(r, words_to_add * HeapWordSize);
1009 1013          marked_words -= words_to_add;
1010 1014        }
1011 1015        assert(marked_words == 0,
1012 1016               SIZE_FORMAT " words left after distributing space across %u regions",
1013 1017               marked_words, num_regions_in_humongous);
1014 1018      }
1015 1019  
1016 1020      void update_marked_bytes(HeapRegion* hr) {
1017 1021        uint const region_idx = hr->hrm_index();
1018 1022        size_t const marked_words = _cm->liveness(region_idx);
1019 1023        // The marking attributes the object's size completely to the humongous starts
1020 1024        // region. We need to distribute this value across the entire set of regions a
1021 1025        // humongous object spans.
1022 1026        if (hr->is_humongous()) {
1023 1027          assert(hr->is_starts_humongous() || marked_words == 0,
1024 1028                 "Should not have marked words " SIZE_FORMAT " in non-starts humongous region %u (%s)",
1025 1029                 marked_words, region_idx, hr->get_type_str());
1026 1030          if (hr->is_starts_humongous()) {
1027 1031            distribute_marked_bytes(hr, marked_words);
1028 1032          }
1029 1033        } else {
1030 1034          log_trace(gc, marking)("Adding " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str());
1031 1035          add_marked_bytes_and_note_end(hr, marked_words * HeapWordSize);
1032 1036        }
1033 1037      }
1034 1038  
1035 1039      void add_marked_bytes_and_note_end(HeapRegion* hr, size_t marked_bytes) {
1036 1040        hr->add_to_marked_bytes(marked_bytes);
1037 1041        _cl->do_heap_region(hr);
1038 1042        hr->note_end_of_marking();
1039 1043      }
1040 1044  
1041 1045    public:
1042 1046      G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm, G1PrintRegionLivenessInfoClosure* cl) :
1043 1047        _g1h(g1h), _cm(cm), _cl(cl), _num_regions_selected_for_rebuild(0) { }
1044 1048  
1045 1049      virtual bool do_heap_region(HeapRegion* r) {
1046 1050        update_remset_before_rebuild(r);
1047 1051        update_marked_bytes(r);
1048 1052  
1049 1053        return false;
1050 1054      }
1051 1055  
1052 1056      uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; }
1053 1057    };
1054 1058  
1055 1059  public:
1056 1060    G1UpdateRemSetTrackingBeforeRebuildTask(G1CollectedHeap* g1h, G1ConcurrentMark* cm, uint num_workers) :
1057 1061      AbstractGangTask("G1 Update RemSet Tracking Before Rebuild"),
1058 1062      _g1h(g1h), _cm(cm), _hrclaimer(num_workers), _total_selected_for_rebuild(0), _cl("Post-Marking") { }
1059 1063  
1060 1064    virtual void work(uint worker_id) {
1061 1065      G1UpdateRemSetTrackingBeforeRebuild update_cl(_g1h, _cm, &_cl);
1062 1066      _g1h->heap_region_par_iterate_from_worker_offset(&update_cl, &_hrclaimer, worker_id);
1063 1067      Atomic::add(&_total_selected_for_rebuild, update_cl.num_selected_for_rebuild());
1064 1068    }
1065 1069  
1066 1070    uint total_selected_for_rebuild() const { return _total_selected_for_rebuild; }
1067 1071  
1068 1072    // Number of regions for which roughly one thread should be spawned for this work.
1069 1073    static const uint RegionsPerThread = 384;
1070 1074  };
1071 1075  
1072 1076  class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure {
1073 1077    G1CollectedHeap* _g1h;
1074 1078  public:
1075 1079    G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1076 1080  
1077 1081    virtual bool do_heap_region(HeapRegion* r) {
1078 1082      _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1079 1083      return false;
1080 1084    }
1081 1085  };
1082 1086  
1083 1087  void G1ConcurrentMark::remark() {
1084 1088    assert_at_safepoint_on_vm_thread();
1085 1089  
1086 1090    // If a full collection has happened, we should not continue. However we might
1087 1091    // have ended up here as the Remark VM operation has been scheduled already.
1088 1092    if (has_aborted()) {
1089 1093      return;
1090 1094    }
1091 1095  
1092 1096    G1Policy* policy = _g1h->policy();
1093 1097    policy->record_concurrent_mark_remark_start();
1094 1098  
1095 1099    double start = os::elapsedTime();
1096 1100  
1097 1101    verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before");
1098 1102  
1099 1103    {
1100 1104      GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1101 1105      finalize_marking();
1102 1106    }
1103 1107  
1104 1108    double mark_work_end = os::elapsedTime();
1105 1109  
1106 1110    bool const mark_finished = !has_overflown();
1107 1111    if (mark_finished) {
1108 1112      weak_refs_work(false /* clear_all_soft_refs */);
1109 1113  
1110 1114      SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1111 1115      // We're done with marking.
1112 1116      // This is the end of the marking cycle, we're expected all
1113 1117      // threads to have SATB queues with active set to true.
1114 1118      satb_mq_set.set_active_all_threads(false, /* new active value */
1115 1119                                         true /* expected_active */);
1116 1120  
1117 1121      {
1118 1122        GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1119 1123        flush_all_task_caches();
1120 1124      }
1121 1125  
1122 1126      // Install newly created mark bitmap as "prev".
1123 1127      swap_mark_bitmaps();
1124 1128      {
1125 1129        GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking Before Rebuild", _gc_timer_cm);
1126 1130  
1127 1131        uint const workers_by_capacity = (_g1h->num_regions() + G1UpdateRemSetTrackingBeforeRebuildTask::RegionsPerThread - 1) /
1128 1132                                         G1UpdateRemSetTrackingBeforeRebuildTask::RegionsPerThread;
1129 1133        uint const num_workers = MIN2(_g1h->workers()->active_workers(), workers_by_capacity);
1130 1134  
1131 1135        G1UpdateRemSetTrackingBeforeRebuildTask cl(_g1h, this, num_workers);
1132 1136        log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_regions());
1133 1137        _g1h->workers()->run_task(&cl, num_workers);
1134 1138  
1135 1139        log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1136 1140                                        _g1h->num_regions(), cl.total_selected_for_rebuild());
1137 1141      }
1138 1142      {
1139 1143        GCTraceTime(Debug, gc, phases) debug("Reclaim Empty Regions", _gc_timer_cm);
1140 1144        reclaim_empty_regions();
1141 1145      }
1142 1146  
1143 1147      // Clean out dead classes
1144 1148      if (ClassUnloadingWithConcurrentMark) {
1145 1149        GCTraceTime(Debug, gc, phases) debug("Purge Metaspace", _gc_timer_cm);
1146 1150        ClassLoaderDataGraph::purge();
1147 1151      }
1148 1152  
1149 1153      _g1h->resize_heap_if_necessary();
1150 1154  
1151 1155      compute_new_sizes();
1152 1156  
1153 1157      verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark after");
1154 1158  
1155 1159      assert(!restart_for_overflow(), "sanity");
1156 1160      // Completely reset the marking state since marking completed
1157 1161      reset_at_marking_complete();
1158 1162    } else {
1159 1163      // We overflowed.  Restart concurrent marking.
1160 1164      _restart_for_overflow = true;
1161 1165  
1162 1166      verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow");
1163 1167  
1164 1168      // Clear the marking state because we will be restarting
1165 1169      // marking due to overflowing the global mark stack.
1166 1170      reset_marking_for_restart();
1167 1171    }
1168 1172  
1169 1173    {
1170 1174      GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1171 1175      report_object_count(mark_finished);
1172 1176    }
1173 1177  
1174 1178    // Statistics
1175 1179    double now = os::elapsedTime();
1176 1180    _remark_mark_times.add((mark_work_end - start) * 1000.0);
1177 1181    _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1178 1182    _remark_times.add((now - start) * 1000.0);
1179 1183  
1180 1184    policy->record_concurrent_mark_remark_end();
1181 1185  }
1182 1186  
1183 1187  class G1ReclaimEmptyRegionsTask : public AbstractGangTask {
1184 1188    // Per-region work during the Cleanup pause.
1185 1189    class G1ReclaimEmptyRegionsClosure : public HeapRegionClosure {
1186 1190      G1CollectedHeap* _g1h;
1187 1191      size_t _freed_bytes;
1188 1192      FreeRegionList* _local_cleanup_list;
1189 1193      uint _old_regions_removed;
1190 1194      uint _humongous_regions_removed;
1191 1195  
1192 1196    public:
1193 1197      G1ReclaimEmptyRegionsClosure(G1CollectedHeap* g1h,
1194 1198                                   FreeRegionList* local_cleanup_list) :
1195 1199        _g1h(g1h),
1196 1200        _freed_bytes(0),
1197 1201        _local_cleanup_list(local_cleanup_list),
1198 1202        _old_regions_removed(0),
1199 1203        _humongous_regions_removed(0) { }
1200 1204  
1201 1205      size_t freed_bytes() { return _freed_bytes; }
1202 1206      const uint old_regions_removed() { return _old_regions_removed; }
1203 1207      const uint humongous_regions_removed() { return _humongous_regions_removed; }
1204 1208  
1205 1209      bool do_heap_region(HeapRegion *hr) {
1206 1210        if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1207 1211          _freed_bytes += hr->used();
1208 1212          hr->set_containing_set(NULL);
1209 1213          if (hr->is_humongous()) {
1210 1214            _humongous_regions_removed++;
1211 1215            _g1h->free_humongous_region(hr, _local_cleanup_list);
1212 1216          } else {
1213 1217            _old_regions_removed++;
1214 1218            _g1h->free_region(hr, _local_cleanup_list);
1215 1219          }
1216 1220          hr->clear_cardtable();
1217 1221          _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index());
1218 1222          log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom()));
1219 1223        }
1220 1224  
1221 1225        return false;
1222 1226      }
1223 1227    };
1224 1228  
1225 1229    G1CollectedHeap* _g1h;
1226 1230    FreeRegionList* _cleanup_list;
1227 1231    HeapRegionClaimer _hrclaimer;
1228 1232  
1229 1233  public:
1230 1234    G1ReclaimEmptyRegionsTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1231 1235      AbstractGangTask("G1 Cleanup"),
1232 1236      _g1h(g1h),
1233 1237      _cleanup_list(cleanup_list),
1234 1238      _hrclaimer(n_workers) {
1235 1239    }
1236 1240  
1237 1241    void work(uint worker_id) {
1238 1242      FreeRegionList local_cleanup_list("Local Cleanup List");
1239 1243      G1ReclaimEmptyRegionsClosure cl(_g1h, &local_cleanup_list);
1240 1244      _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id);
1241 1245      assert(cl.is_complete(), "Shouldn't have aborted!");
1242 1246  
1243 1247      // Now update the old/humongous region sets
1244 1248      _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed());
1245 1249      {
1246 1250        MutexLocker x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1247 1251        _g1h->decrement_summary_bytes(cl.freed_bytes());
1248 1252  
1249 1253        _cleanup_list->add_ordered(&local_cleanup_list);
1250 1254        assert(local_cleanup_list.is_empty(), "post-condition");
1251 1255      }
1252 1256    }
1253 1257  };
1254 1258  
1255 1259  void G1ConcurrentMark::reclaim_empty_regions() {
1256 1260    WorkGang* workers = _g1h->workers();
1257 1261    FreeRegionList empty_regions_list("Empty Regions After Mark List");
1258 1262  
1259 1263    G1ReclaimEmptyRegionsTask cl(_g1h, &empty_regions_list, workers->active_workers());
1260 1264    workers->run_task(&cl);
1261 1265  
1262 1266    if (!empty_regions_list.is_empty()) {
1263 1267      log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length());
1264 1268      // Now print the empty regions list.
1265 1269      G1HRPrinter* hrp = _g1h->hr_printer();
1266 1270      if (hrp->is_active()) {
1267 1271        FreeRegionListIterator iter(&empty_regions_list);
1268 1272        while (iter.more_available()) {
1269 1273          HeapRegion* hr = iter.get_next();
1270 1274          hrp->cleanup(hr);
1271 1275        }
1272 1276      }
1273 1277      // And actually make them available.
1274 1278      _g1h->prepend_to_freelist(&empty_regions_list);
1275 1279    }
1276 1280  }
1277 1281  
1278 1282  void G1ConcurrentMark::compute_new_sizes() {
1279 1283    MetaspaceGC::compute_new_size();
1280 1284  
1281 1285    // Cleanup will have freed any regions completely full of garbage.
1282 1286    // Update the soft reference policy with the new heap occupancy.
1283 1287    Universe::update_heap_info_at_gc();
1284 1288  
1285 1289    // We reclaimed old regions so we should calculate the sizes to make
1286 1290    // sure we update the old gen/space data.
1287 1291    _g1h->g1mm()->update_sizes();
1288 1292  }
1289 1293  
1290 1294  void G1ConcurrentMark::cleanup() {
1291 1295    assert_at_safepoint_on_vm_thread();
1292 1296  
1293 1297    // If a full collection has happened, we shouldn't do this.
1294 1298    if (has_aborted()) {
1295 1299      return;
1296 1300    }
1297 1301  
1298 1302    G1Policy* policy = _g1h->policy();
1299 1303    policy->record_concurrent_mark_cleanup_start();
1300 1304  
1301 1305    double start = os::elapsedTime();
1302 1306  
1303 1307    verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup before");
1304 1308  
1305 1309    {
1306 1310      GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1307 1311      G1UpdateRemSetTrackingAfterRebuild cl(_g1h);
1308 1312      _g1h->heap_region_iterate(&cl);
1309 1313    }
1310 1314  
1311 1315    if (log_is_enabled(Trace, gc, liveness)) {
1312 1316      G1PrintRegionLivenessInfoClosure cl("Post-Cleanup");
1313 1317      _g1h->heap_region_iterate(&cl);
1314 1318    }
1315 1319  
1316 1320    verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after");
1317 1321  
1318 1322    // We need to make this be a "collection" so any collection pause that
1319 1323    // races with it goes around and waits for Cleanup to finish.
1320 1324    _g1h->increment_total_collections();
1321 1325  
1322 1326    // Local statistics
1323 1327    double recent_cleanup_time = (os::elapsedTime() - start);
1324 1328    _total_cleanup_time += recent_cleanup_time;
1325 1329    _cleanup_times.add(recent_cleanup_time);
1326 1330  
1327 1331    {
1328 1332      GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1329 1333      policy->record_concurrent_mark_cleanup_end();
1330 1334    }
1331 1335  }
1332 1336  
1333 1337  // 'Keep Alive' oop closure used by both serial parallel reference processing.
1334 1338  // Uses the G1CMTask associated with a worker thread (for serial reference
1335 1339  // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1336 1340  // trace referent objects.
1337 1341  //
1338 1342  // Using the G1CMTask and embedded local queues avoids having the worker
1339 1343  // threads operating on the global mark stack. This reduces the risk
1340 1344  // of overflowing the stack - which we would rather avoid at this late
1341 1345  // state. Also using the tasks' local queues removes the potential
1342 1346  // of the workers interfering with each other that could occur if
1343 1347  // operating on the global stack.
1344 1348  
1345 1349  class G1CMKeepAliveAndDrainClosure : public OopClosure {
1346 1350    G1ConcurrentMark* _cm;
1347 1351    G1CMTask*         _task;
1348 1352    uint              _ref_counter_limit;
1349 1353    uint              _ref_counter;
1350 1354    bool              _is_serial;
1351 1355  public:
1352 1356    G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1353 1357      _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1354 1358      _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1355 1359      assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1356 1360    }
1357 1361  
1358 1362    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1359 1363    virtual void do_oop(      oop* p) { do_oop_work(p); }
1360 1364  
1361 1365    template <class T> void do_oop_work(T* p) {
1362 1366      if (_cm->has_overflown()) {
1363 1367        return;
1364 1368      }
1365 1369      if (!_task->deal_with_reference(p)) {
1366 1370        // We did not add anything to the mark bitmap (or mark stack), so there is
1367 1371        // no point trying to drain it.
1368 1372        return;
1369 1373      }
1370 1374      _ref_counter--;
1371 1375  
1372 1376      if (_ref_counter == 0) {
1373 1377        // We have dealt with _ref_counter_limit references, pushing them
1374 1378        // and objects reachable from them on to the local stack (and
1375 1379        // possibly the global stack). Call G1CMTask::do_marking_step() to
1376 1380        // process these entries.
1377 1381        //
1378 1382        // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1379 1383        // there's nothing more to do (i.e. we're done with the entries that
1380 1384        // were pushed as a result of the G1CMTask::deal_with_reference() calls
1381 1385        // above) or we overflow.
1382 1386        //
1383 1387        // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1384 1388        // flag while there may still be some work to do. (See the comment at
1385 1389        // the beginning of G1CMTask::do_marking_step() for those conditions -
1386 1390        // one of which is reaching the specified time target.) It is only
1387 1391        // when G1CMTask::do_marking_step() returns without setting the
1388 1392        // has_aborted() flag that the marking step has completed.
1389 1393        do {
1390 1394          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1391 1395          _task->do_marking_step(mark_step_duration_ms,
1392 1396                                 false      /* do_termination */,
1393 1397                                 _is_serial);
1394 1398        } while (_task->has_aborted() && !_cm->has_overflown());
1395 1399        _ref_counter = _ref_counter_limit;
1396 1400      }
1397 1401    }
1398 1402  };
1399 1403  
1400 1404  // 'Drain' oop closure used by both serial and parallel reference processing.
1401 1405  // Uses the G1CMTask associated with a given worker thread (for serial
1402 1406  // reference processing the G1CMtask for worker 0 is used). Calls the
1403 1407  // do_marking_step routine, with an unbelievably large timeout value,
1404 1408  // to drain the marking data structures of the remaining entries
1405 1409  // added by the 'keep alive' oop closure above.
1406 1410  
1407 1411  class G1CMDrainMarkingStackClosure : public VoidClosure {
1408 1412    G1ConcurrentMark* _cm;
1409 1413    G1CMTask*         _task;
1410 1414    bool              _is_serial;
1411 1415   public:
1412 1416    G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1413 1417      _cm(cm), _task(task), _is_serial(is_serial) {
1414 1418      assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1415 1419    }
1416 1420  
1417 1421    void do_void() {
1418 1422      do {
1419 1423        // We call G1CMTask::do_marking_step() to completely drain the local
1420 1424        // and global marking stacks of entries pushed by the 'keep alive'
1421 1425        // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1422 1426        //
1423 1427        // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1424 1428        // if there's nothing more to do (i.e. we've completely drained the
1425 1429        // entries that were pushed as a a result of applying the 'keep alive'
1426 1430        // closure to the entries on the discovered ref lists) or we overflow
1427 1431        // the global marking stack.
1428 1432        //
1429 1433        // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1430 1434        // flag while there may still be some work to do. (See the comment at
1431 1435        // the beginning of G1CMTask::do_marking_step() for those conditions -
1432 1436        // one of which is reaching the specified time target.) It is only
1433 1437        // when G1CMTask::do_marking_step() returns without setting the
1434 1438        // has_aborted() flag that the marking step has completed.
1435 1439  
1436 1440        _task->do_marking_step(1000000000.0 /* something very large */,
1437 1441                               true         /* do_termination */,
1438 1442                               _is_serial);
1439 1443      } while (_task->has_aborted() && !_cm->has_overflown());
1440 1444    }
1441 1445  };
1442 1446  
1443 1447  // Implementation of AbstractRefProcTaskExecutor for parallel
1444 1448  // reference processing at the end of G1 concurrent marking
1445 1449  
1446 1450  class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor {
1447 1451  private:
1448 1452    G1CollectedHeap*  _g1h;
1449 1453    G1ConcurrentMark* _cm;
1450 1454    WorkGang*         _workers;
1451 1455    uint              _active_workers;
1452 1456  
1453 1457  public:
1454 1458    G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1455 1459                            G1ConcurrentMark* cm,
1456 1460                            WorkGang* workers,
1457 1461                            uint n_workers) :
1458 1462      _g1h(g1h), _cm(cm),
1459 1463      _workers(workers), _active_workers(n_workers) { }
1460 1464  
1461 1465    virtual void execute(ProcessTask& task, uint ergo_workers);
1462 1466  };
1463 1467  
1464 1468  class G1CMRefProcTaskProxy : public AbstractGangTask {
1465 1469    typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1466 1470    ProcessTask&      _proc_task;
1467 1471    G1CollectedHeap*  _g1h;
1468 1472    G1ConcurrentMark* _cm;
1469 1473  
1470 1474  public:
1471 1475    G1CMRefProcTaskProxy(ProcessTask& proc_task,
1472 1476                         G1CollectedHeap* g1h,
1473 1477                         G1ConcurrentMark* cm) :
1474 1478      AbstractGangTask("Process reference objects in parallel"),
1475 1479      _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1476 1480      ReferenceProcessor* rp = _g1h->ref_processor_cm();
1477 1481      assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1478 1482    }
1479 1483  
1480 1484    virtual void work(uint worker_id) {
1481 1485      ResourceMark rm;
1482 1486      HandleMark hm;
1483 1487      G1CMTask* task = _cm->task(worker_id);
1484 1488      G1CMIsAliveClosure g1_is_alive(_g1h);
1485 1489      G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1486 1490      G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1487 1491  
1488 1492      _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1489 1493    }
1490 1494  };
1491 1495  
1492 1496  void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task, uint ergo_workers) {
1493 1497    assert(_workers != NULL, "Need parallel worker threads.");
1494 1498    assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1495 1499    assert(_workers->active_workers() >= ergo_workers,
1496 1500           "Ergonomically chosen workers(%u) should be less than or equal to active workers(%u)",
1497 1501           ergo_workers, _workers->active_workers());
1498 1502  
1499 1503    G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1500 1504  
1501 1505    // We need to reset the concurrency level before each
1502 1506    // proxy task execution, so that the termination protocol
1503 1507    // and overflow handling in G1CMTask::do_marking_step() knows
1504 1508    // how many workers to wait for.
1505 1509    _cm->set_concurrency(ergo_workers);
1506 1510    _workers->run_task(&proc_task_proxy, ergo_workers);
1507 1511  }
1508 1512  
1509 1513  void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1510 1514    ResourceMark rm;
1511 1515    HandleMark   hm;
1512 1516  
1513 1517    // Is alive closure.
1514 1518    G1CMIsAliveClosure g1_is_alive(_g1h);
1515 1519  
1516 1520    // Inner scope to exclude the cleaning of the string table
1517 1521    // from the displayed time.
1518 1522    {
1519 1523      GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1520 1524  
1521 1525      ReferenceProcessor* rp = _g1h->ref_processor_cm();
1522 1526  
1523 1527      // See the comment in G1CollectedHeap::ref_processing_init()
1524 1528      // about how reference processing currently works in G1.
1525 1529  
1526 1530      // Set the soft reference policy
1527 1531      rp->setup_policy(clear_all_soft_refs);
1528 1532      assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1529 1533  
1530 1534      // Instances of the 'Keep Alive' and 'Complete GC' closures used
1531 1535      // in serial reference processing. Note these closures are also
1532 1536      // used for serially processing (by the the current thread) the
1533 1537      // JNI references during parallel reference processing.
1534 1538      //
1535 1539      // These closures do not need to synchronize with the worker
1536 1540      // threads involved in parallel reference processing as these
1537 1541      // instances are executed serially by the current thread (e.g.
1538 1542      // reference processing is not multi-threaded and is thus
1539 1543      // performed by the current thread instead of a gang worker).
1540 1544      //
1541 1545      // The gang tasks involved in parallel reference processing create
1542 1546      // their own instances of these closures, which do their own
1543 1547      // synchronization among themselves.
1544 1548      G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1545 1549      G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1546 1550  
1547 1551      // We need at least one active thread. If reference processing
1548 1552      // is not multi-threaded we use the current (VMThread) thread,
1549 1553      // otherwise we use the work gang from the G1CollectedHeap and
1550 1554      // we utilize all the worker threads we can.
1551 1555      bool processing_is_mt = rp->processing_is_mt();
1552 1556      uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
1553 1557      active_workers = clamp(active_workers, 1u, _max_num_tasks);
1554 1558  
1555 1559      // Parallel processing task executor.
1556 1560      G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
1557 1561                                                _g1h->workers(), active_workers);
1558 1562      AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1559 1563  
1560 1564      // Set the concurrency level. The phase was already set prior to
1561 1565      // executing the remark task.
1562 1566      set_concurrency(active_workers);
1563 1567  
1564 1568      // Set the degree of MT processing here.  If the discovery was done MT,
1565 1569      // the number of threads involved during discovery could differ from
1566 1570      // the number of active workers.  This is OK as long as the discovered
1567 1571      // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1568 1572      rp->set_active_mt_degree(active_workers);
1569 1573  
1570 1574      ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1571 1575  
1572 1576      // Process the weak references.
1573 1577      const ReferenceProcessorStats& stats =
1574 1578          rp->process_discovered_references(&g1_is_alive,
1575 1579                                            &g1_keep_alive,
1576 1580                                            &g1_drain_mark_stack,
1577 1581                                            executor,
1578 1582                                            &pt);
1579 1583      _gc_tracer_cm->report_gc_reference_stats(stats);
1580 1584      pt.print_all_references();
1581 1585  
1582 1586      // The do_oop work routines of the keep_alive and drain_marking_stack
1583 1587      // oop closures will set the has_overflown flag if we overflow the
1584 1588      // global marking stack.
1585 1589  
1586 1590      assert(has_overflown() || _global_mark_stack.is_empty(),
1587 1591             "Mark stack should be empty (unless it has overflown)");
1588 1592  
1589 1593      assert(rp->num_queues() == active_workers, "why not");
1590 1594  
1591 1595      rp->verify_no_references_recorded();
1592 1596      assert(!rp->discovery_enabled(), "Post condition");
1593 1597    }
1594 1598  
1595 1599    if (has_overflown()) {
1596 1600      // We can not trust g1_is_alive and the contents of the heap if the marking stack
1597 1601      // overflowed while processing references. Exit the VM.
1598 1602      fatal("Overflow during reference processing, can not continue. Please "
1599 1603            "increase MarkStackSizeMax (current value: " SIZE_FORMAT ") and "
1600 1604            "restart.", MarkStackSizeMax);
1601 1605      return;
1602 1606    }
1603 1607  
1604 1608    assert(_global_mark_stack.is_empty(), "Marking should have completed");
1605 1609  
1606 1610    {
1607 1611      GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1608 1612      WeakProcessor::weak_oops_do(_g1h->workers(), &g1_is_alive, &do_nothing_cl, 1);
1609 1613    }
1610 1614  
1611 1615    // Unload Klasses, String, Code Cache, etc.
1612 1616    if (ClassUnloadingWithConcurrentMark) {
1613 1617      GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1614 1618      bool purged_classes = SystemDictionary::do_unloading(_gc_timer_cm);
1615 1619      _g1h->complete_cleaning(&g1_is_alive, purged_classes);
1616 1620    } else if (StringDedup::is_enabled()) {
1617 1621      GCTraceTime(Debug, gc, phases) debug("String Deduplication", _gc_timer_cm);
1618 1622      _g1h->string_dedup_cleaning(&g1_is_alive, NULL);
1619 1623    }
1620 1624  }
1621 1625  
1622 1626  class G1PrecleanYieldClosure : public YieldClosure {
1623 1627    G1ConcurrentMark* _cm;
1624 1628  
1625 1629  public:
1626 1630    G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1627 1631  
1628 1632    virtual bool should_return() {
1629 1633      return _cm->has_aborted();
1630 1634    }
1631 1635  
1632 1636    virtual bool should_return_fine_grain() {
1633 1637      _cm->do_yield_check();
1634 1638      return _cm->has_aborted();
1635 1639    }
1636 1640  };
1637 1641  
1638 1642  void G1ConcurrentMark::preclean() {
1639 1643    assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1640 1644  
1641 1645    SuspendibleThreadSetJoiner joiner;
1642 1646  
1643 1647    G1CMKeepAliveAndDrainClosure keep_alive(this, task(0), true /* is_serial */);
1644 1648    G1CMDrainMarkingStackClosure drain_mark_stack(this, task(0), true /* is_serial */);
1645 1649  
1646 1650    set_concurrency_and_phase(1, true);
1647 1651  
1648 1652    G1PrecleanYieldClosure yield_cl(this);
1649 1653  
1650 1654    ReferenceProcessor* rp = _g1h->ref_processor_cm();
1651 1655    // Precleaning is single threaded. Temporarily disable MT discovery.
1652 1656    ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1653 1657    rp->preclean_discovered_references(rp->is_alive_non_header(),
1654 1658                                       &keep_alive,
1655 1659                                       &drain_mark_stack,
1656 1660                                       &yield_cl,
1657 1661                                       _gc_timer_cm);
1658 1662  }
1659 1663  
1660 1664  // When sampling object counts, we already swapped the mark bitmaps, so we need to use
1661 1665  // the prev bitmap determining liveness.
1662 1666  class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1663 1667    G1CollectedHeap* _g1h;
1664 1668  public:
1665 1669    G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) { }
1666 1670  
1667 1671    bool do_object_b(oop obj) {
1668 1672      return obj != NULL &&
1669 1673             (!_g1h->is_in_g1_reserved(obj) || !_g1h->is_obj_dead(obj));
1670 1674    }
1671 1675  };
1672 1676  
1673 1677  void G1ConcurrentMark::report_object_count(bool mark_completed) {
1674 1678    // Depending on the completion of the marking liveness needs to be determined
1675 1679    // using either the next or prev bitmap.
1676 1680    if (mark_completed) {
1677 1681      G1ObjectCountIsAliveClosure is_alive(_g1h);
1678 1682      _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1679 1683    } else {
1680 1684      G1CMIsAliveClosure is_alive(_g1h);
1681 1685      _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1682 1686    }
1683 1687  }
1684 1688  
1685 1689  
1686 1690  void G1ConcurrentMark::swap_mark_bitmaps() {
1687 1691    G1CMBitMap* temp = _prev_mark_bitmap;
1688 1692    _prev_mark_bitmap = _next_mark_bitmap;
1689 1693    _next_mark_bitmap = temp;
1690 1694    _g1h->collector_state()->set_clearing_next_bitmap(true);
1691 1695  }
1692 1696  
1693 1697  // Closure for marking entries in SATB buffers.
1694 1698  class G1CMSATBBufferClosure : public SATBBufferClosure {
1695 1699  private:
1696 1700    G1CMTask* _task;
1697 1701    G1CollectedHeap* _g1h;
1698 1702  
1699 1703    // This is very similar to G1CMTask::deal_with_reference, but with
1700 1704    // more relaxed requirements for the argument, so this must be more
1701 1705    // circumspect about treating the argument as an object.
1702 1706    void do_entry(void* entry) const {
1703 1707      _task->increment_refs_reached();
1704 1708      oop const obj = static_cast<oop>(entry);
1705 1709      _task->make_reference_grey(obj);
1706 1710    }
1707 1711  
1708 1712  public:
1709 1713    G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1710 1714      : _task(task), _g1h(g1h) { }
1711 1715  
1712 1716    virtual void do_buffer(void** buffer, size_t size) {
1713 1717      for (size_t i = 0; i < size; ++i) {
1714 1718        do_entry(buffer[i]);
1715 1719      }
1716 1720    }
1717 1721  };
1718 1722  
1719 1723  class G1RemarkThreadsClosure : public ThreadClosure {
1720 1724    G1CMSATBBufferClosure _cm_satb_cl;
1721 1725    G1CMOopClosure _cm_cl;
1722 1726    MarkingCodeBlobClosure _code_cl;
1723 1727    uintx _claim_token;
1724 1728  
1725 1729   public:
1726 1730    G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1727 1731      _cm_satb_cl(task, g1h),
1728 1732      _cm_cl(g1h, task),
1729 1733      _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1730 1734      _claim_token(Threads::thread_claim_token()) {}
1731 1735  
1732 1736    void do_thread(Thread* thread) {
1733 1737      if (thread->claim_threads_do(true, _claim_token)) {
1734 1738        SATBMarkQueue& queue = G1ThreadLocalData::satb_mark_queue(thread);
1735 1739        queue.apply_closure_and_empty(&_cm_satb_cl);
1736 1740        if (thread->is_Java_thread()) {
1737 1741          // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1738 1742          // however the liveness of oops reachable from nmethods have very complex lifecycles:
1739 1743          // * Alive if on the stack of an executing method
1740 1744          // * Weakly reachable otherwise
1741 1745          // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1742 1746          // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1743 1747          JavaThread* jt = (JavaThread*)thread;
1744 1748          jt->nmethods_do(&_code_cl);
1745 1749        }
1746 1750      }
1747 1751    }
1748 1752  };
1749 1753  
1750 1754  class G1CMRemarkTask : public AbstractGangTask {
1751 1755    G1ConcurrentMark* _cm;
1752 1756  public:
1753 1757    void work(uint worker_id) {
1754 1758      G1CMTask* task = _cm->task(worker_id);
1755 1759      task->record_start_time();
1756 1760      {
1757 1761        ResourceMark rm;
1758 1762        HandleMark hm;
1759 1763  
1760 1764        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1761 1765        Threads::threads_do(&threads_f);
1762 1766      }
1763 1767  
1764 1768      do {
1765 1769        task->do_marking_step(1000000000.0 /* something very large */,
1766 1770                              true         /* do_termination       */,
1767 1771                              false        /* is_serial            */);
1768 1772      } while (task->has_aborted() && !_cm->has_overflown());
1769 1773      // If we overflow, then we do not want to restart. We instead
1770 1774      // want to abort remark and do concurrent marking again.
1771 1775      task->record_end_time();
1772 1776    }
1773 1777  
1774 1778    G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1775 1779      AbstractGangTask("Par Remark"), _cm(cm) {
1776 1780      _cm->terminator()->reset_for_reuse(active_workers);
1777 1781    }
1778 1782  };
1779 1783  
1780 1784  void G1ConcurrentMark::finalize_marking() {
1781 1785    ResourceMark rm;
1782 1786    HandleMark   hm;
1783 1787  
1784 1788    _g1h->ensure_parsability(false);
1785 1789  
1786 1790    // this is remark, so we'll use up all active threads
1787 1791    uint active_workers = _g1h->workers()->active_workers();
1788 1792    set_concurrency_and_phase(active_workers, false /* concurrent */);
1789 1793    // Leave _parallel_marking_threads at it's
1790 1794    // value originally calculated in the G1ConcurrentMark
1791 1795    // constructor and pass values of the active workers
1792 1796    // through the gang in the task.
1793 1797  
1794 1798    {
1795 1799      StrongRootsScope srs(active_workers);
1796 1800  
1797 1801      G1CMRemarkTask remarkTask(this, active_workers);
1798 1802      // We will start all available threads, even if we decide that the
1799 1803      // active_workers will be fewer. The extra ones will just bail out
1800 1804      // immediately.
1801 1805      _g1h->workers()->run_task(&remarkTask);
1802 1806    }
1803 1807  
1804 1808    SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1805 1809    guarantee(has_overflown() ||
1806 1810              satb_mq_set.completed_buffers_num() == 0,
1807 1811              "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1808 1812              BOOL_TO_STR(has_overflown()),
1809 1813              satb_mq_set.completed_buffers_num());
1810 1814  
1811 1815    print_stats();
1812 1816  }
1813 1817  
1814 1818  void G1ConcurrentMark::flush_all_task_caches() {
1815 1819    size_t hits = 0;
1816 1820    size_t misses = 0;
1817 1821    for (uint i = 0; i < _max_num_tasks; i++) {
1818 1822      Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1819 1823      hits += stats.first;
1820 1824      misses += stats.second;
1821 1825    }
1822 1826    size_t sum = hits + misses;
1823 1827    log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf",
1824 1828                         hits, misses, percent_of(hits, sum));
1825 1829  }
1826 1830  
1827 1831  void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1828 1832    _prev_mark_bitmap->clear_range(mr);
1829 1833  }
1830 1834  
1831 1835  HeapRegion*
1832 1836  G1ConcurrentMark::claim_region(uint worker_id) {
1833 1837    // "checkpoint" the finger
1834 1838    HeapWord* finger = _finger;
1835 1839  
1836 1840    while (finger < _heap.end()) {
1837 1841      assert(_g1h->is_in_g1_reserved(finger), "invariant");
1838 1842  
1839 1843      HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1840 1844      // Make sure that the reads below do not float before loading curr_region.
1841 1845      OrderAccess::loadload();
1842 1846      // Above heap_region_containing may return NULL as we always scan claim
1843 1847      // until the end of the heap. In this case, just jump to the next region.
1844 1848      HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1845 1849  
1846 1850      // Is the gap between reading the finger and doing the CAS too long?
1847 1851      HeapWord* res = Atomic::cmpxchg(&_finger, finger, end);
1848 1852      if (res == finger && curr_region != NULL) {
1849 1853        // we succeeded
1850 1854        HeapWord*   bottom        = curr_region->bottom();
1851 1855        HeapWord*   limit         = curr_region->next_top_at_mark_start();
1852 1856  
1853 1857        // notice that _finger == end cannot be guaranteed here since,
1854 1858        // someone else might have moved the finger even further
1855 1859        assert(_finger >= end, "the finger should have moved forward");
1856 1860  
1857 1861        if (limit > bottom) {
1858 1862          return curr_region;
1859 1863        } else {
1860 1864          assert(limit == bottom,
1861 1865                 "the region limit should be at bottom");
1862 1866          // we return NULL and the caller should try calling
1863 1867          // claim_region() again.
1864 1868          return NULL;
1865 1869        }
1866 1870      } else {
1867 1871        assert(_finger > finger, "the finger should have moved forward");
1868 1872        // read it again
1869 1873        finger = _finger;
1870 1874      }
1871 1875    }
1872 1876  
1873 1877    return NULL;
1874 1878  }
1875 1879  
1876 1880  #ifndef PRODUCT
1877 1881  class VerifyNoCSetOops {
1878 1882    G1CollectedHeap* _g1h;
1879 1883    const char* _phase;
1880 1884    int _info;
1881 1885  
1882 1886  public:
1883 1887    VerifyNoCSetOops(const char* phase, int info = -1) :
1884 1888      _g1h(G1CollectedHeap::heap()),
1885 1889      _phase(phase),
1886 1890      _info(info)
1887 1891    { }
1888 1892  
1889 1893    void operator()(G1TaskQueueEntry task_entry) const {
1890 1894      if (task_entry.is_array_slice()) {
1891 1895        guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1892 1896        return;
1893 1897      }
1894 1898      guarantee(oopDesc::is_oop(task_entry.obj()),
1895 1899                "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1896 1900                p2i(task_entry.obj()), _phase, _info);
1897 1901      HeapRegion* r = _g1h->heap_region_containing(task_entry.obj());
1898 1902      guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1899 1903                "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1900 1904                p2i(task_entry.obj()), _phase, _info, r->hrm_index());
1901 1905    }
1902 1906  };
1903 1907  
1904 1908  void G1ConcurrentMark::verify_no_collection_set_oops() {
1905 1909    assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1906 1910    if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1907 1911      return;
1908 1912    }
1909 1913  
1910 1914    // Verify entries on the global mark stack
1911 1915    _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1912 1916  
1913 1917    // Verify entries on the task queues
1914 1918    for (uint i = 0; i < _max_num_tasks; ++i) {
1915 1919      G1CMTaskQueue* queue = _task_queues->queue(i);
1916 1920      queue->iterate(VerifyNoCSetOops("Queue", i));
1917 1921    }
1918 1922  
1919 1923    // Verify the global finger
1920 1924    HeapWord* global_finger = finger();
1921 1925    if (global_finger != NULL && global_finger < _heap.end()) {
1922 1926      // Since we always iterate over all regions, we might get a NULL HeapRegion
1923 1927      // here.
1924 1928      HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1925 1929      guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1926 1930                "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1927 1931                p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1928 1932    }
1929 1933  
1930 1934    // Verify the task fingers
1931 1935    assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1932 1936    for (uint i = 0; i < _num_concurrent_workers; ++i) {
1933 1937      G1CMTask* task = _tasks[i];
1934 1938      HeapWord* task_finger = task->finger();
1935 1939      if (task_finger != NULL && task_finger < _heap.end()) {
1936 1940        // See above note on the global finger verification.
1937 1941        HeapRegion* r = _g1h->heap_region_containing(task_finger);
1938 1942        guarantee(r == NULL || task_finger == r->bottom() ||
1939 1943                  !r->in_collection_set() || !r->has_index_in_opt_cset(),
1940 1944                  "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1941 1945                  p2i(task_finger), HR_FORMAT_PARAMS(r));
1942 1946      }
1943 1947    }
1944 1948  }
1945 1949  #endif // PRODUCT
1946 1950  
1947 1951  void G1ConcurrentMark::rebuild_rem_set_concurrently() {
1948 1952    _g1h->rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset);
1949 1953  }
1950 1954  
1951 1955  void G1ConcurrentMark::print_stats() {

↓ open down ↓

1032 lines elided

↑ open up ↑

1952 1956    if (!log_is_enabled(Debug, gc, stats)) {
1953 1957      return;
1954 1958    }
1955 1959    log_debug(gc, stats)("---------------------------------------------------------------------");
1956 1960    for (size_t i = 0; i < _num_active_tasks; ++i) {
1957 1961      _tasks[i]->print_stats();
1958 1962      log_debug(gc, stats)("---------------------------------------------------------------------");
1959 1963    }
1960 1964  }
1961 1965  
1962      -void G1ConcurrentMark::concurrent_cycle_abort() {
1963      -  if (!cm_thread()->during_cycle() || _has_aborted) {
     1966 +void G1ConcurrentMark::concurrent_cycle_abort_by_initial_mark() {
     1967 +  _aborted_by_initial_mark = true;
     1968 +  _g1h->collector_state()->set_in_initial_mark_gc(false);
     1969 +}
     1970 +
     1971 +void G1ConcurrentMark::concurrent_cycle_abort_by_fullgc() {
     1972 +  if (!cm_thread()->during_cycle() || _aborted_by_fullgc) {
1964 1973      // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
1965 1974      return;
1966 1975    }
1967 1976  
1968 1977    // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
1969 1978    // concurrent bitmap clearing.
1970 1979    {
1971 1980      GCTraceTime(Debug, gc) debug("Clear Next Bitmap");
1972 1981      clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
1973 1982    }

1974 1983    // Note we cannot clear the previous marking bitmap here

↓ open down ↓

1 lines elided

↑ open up ↑

1975 1984    // since VerifyDuringGC verifies the objects marked during
1976 1985    // a full GC against the previous bitmap.
1977 1986  
1978 1987    // Empty mark stack
1979 1988    reset_marking_for_restart();
1980 1989    for (uint i = 0; i < _max_num_tasks; ++i) {
1981 1990      _tasks[i]->clear_region_fields();
1982 1991    }
1983 1992    _first_overflow_barrier_sync.abort();
1984 1993    _second_overflow_barrier_sync.abort();
1985      -  _has_aborted = true;
     1994 +  _aborted_by_fullgc = true;
1986 1995  
1987 1996    SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1988 1997    satb_mq_set.abandon_partial_marking();
1989 1998    // This can be called either during or outside marking, we'll read
1990 1999    // the expected_active value from the SATB queue set.
1991 2000    satb_mq_set.set_active_all_threads(
1992 2001                                   false, /* new active value */
1993 2002                                   satb_mq_set.is_active() /* expected_active */);
1994 2003  }
1995 2004

1996 2005  static void print_ms_time_info(const char* prefix, const char* name,
1997 2006                                 NumberSeq& ns) {
1998 2007    log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1999 2008                           prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2000 2009    if (ns.num() > 0) {
2001 2010      log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2002 2011                             prefix, ns.sd(), ns.maximum());
2003 2012    }
2004 2013  }
2005 2014  
2006 2015  void G1ConcurrentMark::print_summary_info() {
2007 2016    Log(gc, marking) log;
2008 2017    if (!log.is_trace()) {
2009 2018      return;
2010 2019    }
2011 2020  
2012 2021    log.trace(" Concurrent marking:");
2013 2022    print_ms_time_info("  ", "init marks", _init_times);
2014 2023    print_ms_time_info("  ", "remarks", _remark_times);
2015 2024    {
2016 2025      print_ms_time_info("     ", "final marks", _remark_mark_times);
2017 2026      print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2018 2027  
2019 2028    }
2020 2029    print_ms_time_info("  ", "cleanups", _cleanup_times);
2021 2030    log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2022 2031              _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2023 2032    log.trace("  Total stop_world time = %8.2f s.",
2024 2033              (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2025 2034    log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2026 2035              cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
2027 2036  }
2028 2037  
2029 2038  void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2030 2039    _concurrent_workers->print_worker_threads_on(st);
2031 2040  }
2032 2041  
2033 2042  void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2034 2043    _concurrent_workers->threads_do(tc);
2035 2044  }
2036 2045  
2037 2046  void G1ConcurrentMark::print_on_error(outputStream* st) const {
2038 2047    st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2039 2048                 p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
2040 2049    _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
2041 2050    _next_mark_bitmap->print_on_error(st, " Next Bits: ");
2042 2051  }
2043 2052  
2044 2053  static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2045 2054    ReferenceProcessor* result = g1h->ref_processor_cm();
2046 2055    assert(result != NULL, "CM reference processor should not be NULL");
2047 2056    return result;
2048 2057  }
2049 2058  
2050 2059  G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2051 2060                                 G1CMTask* task)
2052 2061    : MetadataVisitingOopIterateClosure(get_cm_oop_closure_ref_processor(g1h)),
2053 2062      _g1h(g1h), _task(task)
2054 2063  { }
2055 2064  
2056 2065  void G1CMTask::setup_for_region(HeapRegion* hr) {
2057 2066    assert(hr != NULL,
2058 2067          "claim_region() should have filtered out NULL regions");
2059 2068    _curr_region  = hr;
2060 2069    _finger       = hr->bottom();
2061 2070    update_region_limit();
2062 2071  }
2063 2072  
2064 2073  void G1CMTask::update_region_limit() {
2065 2074    HeapRegion* hr            = _curr_region;
2066 2075    HeapWord* bottom          = hr->bottom();
2067 2076    HeapWord* limit           = hr->next_top_at_mark_start();
2068 2077  
2069 2078    if (limit == bottom) {
2070 2079      // The region was collected underneath our feet.
2071 2080      // We set the finger to bottom to ensure that the bitmap
2072 2081      // iteration that will follow this will not do anything.
2073 2082      // (this is not a condition that holds when we set the region up,
2074 2083      // as the region is not supposed to be empty in the first place)
2075 2084      _finger = bottom;
2076 2085    } else if (limit >= _region_limit) {
2077 2086      assert(limit >= _finger, "peace of mind");
2078 2087    } else {
2079 2088      assert(limit < _region_limit, "only way to get here");
2080 2089      // This can happen under some pretty unusual circumstances.  An
2081 2090      // evacuation pause empties the region underneath our feet (NTAMS
2082 2091      // at bottom). We then do some allocation in the region (NTAMS
2083 2092      // stays at bottom), followed by the region being used as a GC
2084 2093      // alloc region (NTAMS will move to top() and the objects
2085 2094      // originally below it will be grayed). All objects now marked in
2086 2095      // the region are explicitly grayed, if below the global finger,
2087 2096      // and we do not need in fact to scan anything else. So, we simply
2088 2097      // set _finger to be limit to ensure that the bitmap iteration
2089 2098      // doesn't do anything.
2090 2099      _finger = limit;
2091 2100    }
2092 2101  
2093 2102    _region_limit = limit;
2094 2103  }
2095 2104  
2096 2105  void G1CMTask::giveup_current_region() {
2097 2106    assert(_curr_region != NULL, "invariant");
2098 2107    clear_region_fields();
2099 2108  }
2100 2109  
2101 2110  void G1CMTask::clear_region_fields() {
2102 2111    // Values for these three fields that indicate that we're not
2103 2112    // holding on to a region.
2104 2113    _curr_region   = NULL;
2105 2114    _finger        = NULL;
2106 2115    _region_limit  = NULL;
2107 2116  }
2108 2117  
2109 2118  void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2110 2119    if (cm_oop_closure == NULL) {
2111 2120      assert(_cm_oop_closure != NULL, "invariant");
2112 2121    } else {
2113 2122      assert(_cm_oop_closure == NULL, "invariant");
2114 2123    }
2115 2124    _cm_oop_closure = cm_oop_closure;
2116 2125  }
2117 2126  
2118 2127  void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2119 2128    guarantee(next_mark_bitmap != NULL, "invariant");
2120 2129    _next_mark_bitmap              = next_mark_bitmap;
2121 2130    clear_region_fields();
2122 2131  
2123 2132    _calls                         = 0;
2124 2133    _elapsed_time_ms               = 0.0;
2125 2134    _termination_time_ms           = 0.0;
2126 2135    _termination_start_time_ms     = 0.0;
2127 2136  
2128 2137    _mark_stats_cache.reset();
2129 2138  }
2130 2139  
2131 2140  bool G1CMTask::should_exit_termination() {
2132 2141    if (!regular_clock_call()) {
2133 2142      return true;
2134 2143    }
2135 2144  
2136 2145    // This is called when we are in the termination protocol. We should
2137 2146    // quit if, for some reason, this task wants to abort or the global
2138 2147    // stack is not empty (this means that we can get work from it).
2139 2148    return !_cm->mark_stack_empty() || has_aborted();
2140 2149  }
2141 2150  
2142 2151  void G1CMTask::reached_limit() {
2143 2152    assert(_words_scanned >= _words_scanned_limit ||
2144 2153           _refs_reached >= _refs_reached_limit ,
2145 2154           "shouldn't have been called otherwise");
2146 2155    abort_marking_if_regular_check_fail();
2147 2156  }
2148 2157  
2149 2158  bool G1CMTask::regular_clock_call() {
2150 2159    if (has_aborted()) {
2151 2160      return false;
2152 2161    }
2153 2162  
2154 2163    // First, we need to recalculate the words scanned and refs reached
2155 2164    // limits for the next clock call.
2156 2165    recalculate_limits();
2157 2166  
2158 2167    // During the regular clock call we do the following
2159 2168  
2160 2169    // (1) If an overflow has been flagged, then we abort.
2161 2170    if (_cm->has_overflown()) {
2162 2171      return false;
2163 2172    }
2164 2173  
2165 2174    // If we are not concurrent (i.e. we're doing remark) we don't need
2166 2175    // to check anything else. The other steps are only needed during
2167 2176    // the concurrent marking phase.
2168 2177    if (!_cm->concurrent()) {
2169 2178      return true;
2170 2179    }
2171 2180  
2172 2181    // (2) If marking has been aborted for Full GC, then we also abort.
2173 2182    if (_cm->has_aborted()) {
2174 2183      return false;
2175 2184    }
2176 2185  
2177 2186    double curr_time_ms = os::elapsedVTime() * 1000.0;
2178 2187  
2179 2188    // (4) We check whether we should yield. If we have to, then we abort.
2180 2189    if (SuspendibleThreadSet::should_yield()) {
2181 2190      // We should yield. To do this we abort the task. The caller is
2182 2191      // responsible for yielding.
2183 2192      return false;
2184 2193    }
2185 2194  
2186 2195    // (5) We check whether we've reached our time quota. If we have,
2187 2196    // then we abort.
2188 2197    double elapsed_time_ms = curr_time_ms - _start_time_ms;
2189 2198    if (elapsed_time_ms > _time_target_ms) {
2190 2199      _has_timed_out = true;
2191 2200      return false;
2192 2201    }
2193 2202  
2194 2203    // (6) Finally, we check whether there are enough completed STAB
2195 2204    // buffers available for processing. If there are, we abort.
2196 2205    SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2197 2206    if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2198 2207      // we do need to process SATB buffers, we'll abort and restart
2199 2208      // the marking task to do so
2200 2209      return false;
2201 2210    }
2202 2211    return true;
2203 2212  }
2204 2213  
2205 2214  void G1CMTask::recalculate_limits() {
2206 2215    _real_words_scanned_limit = _words_scanned + words_scanned_period;
2207 2216    _words_scanned_limit      = _real_words_scanned_limit;
2208 2217  
2209 2218    _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2210 2219    _refs_reached_limit       = _real_refs_reached_limit;
2211 2220  }
2212 2221  
2213 2222  void G1CMTask::decrease_limits() {
2214 2223    // This is called when we believe that we're going to do an infrequent
2215 2224    // operation which will increase the per byte scanned cost (i.e. move
2216 2225    // entries to/from the global stack). It basically tries to decrease the
2217 2226    // scanning limit so that the clock is called earlier.
2218 2227  
2219 2228    _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2220 2229    _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2221 2230  }
2222 2231  
2223 2232  void G1CMTask::move_entries_to_global_stack() {
2224 2233    // Local array where we'll store the entries that will be popped
2225 2234    // from the local queue.
2226 2235    G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2227 2236  
2228 2237    size_t n = 0;
2229 2238    G1TaskQueueEntry task_entry;
2230 2239    while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2231 2240      buffer[n] = task_entry;
2232 2241      ++n;
2233 2242    }
2234 2243    if (n < G1CMMarkStack::EntriesPerChunk) {
2235 2244      buffer[n] = G1TaskQueueEntry();
2236 2245    }
2237 2246  
2238 2247    if (n > 0) {
2239 2248      if (!_cm->mark_stack_push(buffer)) {
2240 2249        set_has_aborted();
2241 2250      }
2242 2251    }
2243 2252  
2244 2253    // This operation was quite expensive, so decrease the limits.
2245 2254    decrease_limits();
2246 2255  }
2247 2256  
2248 2257  bool G1CMTask::get_entries_from_global_stack() {
2249 2258    // Local array where we'll store the entries that will be popped
2250 2259    // from the global stack.
2251 2260    G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2252 2261  
2253 2262    if (!_cm->mark_stack_pop(buffer)) {
2254 2263      return false;
2255 2264    }
2256 2265  
2257 2266    // We did actually pop at least one entry.
2258 2267    for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2259 2268      G1TaskQueueEntry task_entry = buffer[i];
2260 2269      if (task_entry.is_null()) {
2261 2270        break;
2262 2271      }
2263 2272      assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2264 2273      bool success = _task_queue->push(task_entry);
2265 2274      // We only call this when the local queue is empty or under a
2266 2275      // given target limit. So, we do not expect this push to fail.
2267 2276      assert(success, "invariant");
2268 2277    }
2269 2278  
2270 2279    // This operation was quite expensive, so decrease the limits
2271 2280    decrease_limits();
2272 2281    return true;
2273 2282  }
2274 2283  
2275 2284  void G1CMTask::drain_local_queue(bool partially) {
2276 2285    if (has_aborted()) {
2277 2286      return;
2278 2287    }
2279 2288  
2280 2289    // Decide what the target size is, depending whether we're going to
2281 2290    // drain it partially (so that other tasks can steal if they run out
2282 2291    // of things to do) or totally (at the very end).
2283 2292    size_t target_size;
2284 2293    if (partially) {
2285 2294      target_size = MIN2((size_t)_task_queue->max_elems()/3, (size_t)GCDrainStackTargetSize);
2286 2295    } else {
2287 2296      target_size = 0;
2288 2297    }
2289 2298  
2290 2299    if (_task_queue->size() > target_size) {
2291 2300      G1TaskQueueEntry entry;
2292 2301      bool ret = _task_queue->pop_local(entry);
2293 2302      while (ret) {
2294 2303        scan_task_entry(entry);
2295 2304        if (_task_queue->size() <= target_size || has_aborted()) {
2296 2305          ret = false;
2297 2306        } else {
2298 2307          ret = _task_queue->pop_local(entry);
2299 2308        }
2300 2309      }
2301 2310    }
2302 2311  }
2303 2312  
2304 2313  void G1CMTask::drain_global_stack(bool partially) {
2305 2314    if (has_aborted()) {
2306 2315      return;
2307 2316    }
2308 2317  
2309 2318    // We have a policy to drain the local queue before we attempt to
2310 2319    // drain the global stack.
2311 2320    assert(partially || _task_queue->size() == 0, "invariant");
2312 2321  
2313 2322    // Decide what the target size is, depending whether we're going to
2314 2323    // drain it partially (so that other tasks can steal if they run out
2315 2324    // of things to do) or totally (at the very end).
2316 2325    // Notice that when draining the global mark stack partially, due to the racyness
2317 2326    // of the mark stack size update we might in fact drop below the target. But,
2318 2327    // this is not a problem.
2319 2328    // In case of total draining, we simply process until the global mark stack is
2320 2329    // totally empty, disregarding the size counter.
2321 2330    if (partially) {
2322 2331      size_t const target_size = _cm->partial_mark_stack_size_target();
2323 2332      while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2324 2333        if (get_entries_from_global_stack()) {
2325 2334          drain_local_queue(partially);
2326 2335        }
2327 2336      }
2328 2337    } else {
2329 2338      while (!has_aborted() && get_entries_from_global_stack()) {
2330 2339        drain_local_queue(partially);
2331 2340      }
2332 2341    }
2333 2342  }
2334 2343  
2335 2344  // SATB Queue has several assumptions on whether to call the par or
2336 2345  // non-par versions of the methods. this is why some of the code is
2337 2346  // replicated. We should really get rid of the single-threaded version
2338 2347  // of the code to simplify things.
2339 2348  void G1CMTask::drain_satb_buffers() {
2340 2349    if (has_aborted()) {
2341 2350      return;
2342 2351    }
2343 2352  
2344 2353    // We set this so that the regular clock knows that we're in the
2345 2354    // middle of draining buffers and doesn't set the abort flag when it
2346 2355    // notices that SATB buffers are available for draining. It'd be
2347 2356    // very counter productive if it did that. :-)
2348 2357    _draining_satb_buffers = true;
2349 2358  
2350 2359    G1CMSATBBufferClosure satb_cl(this, _g1h);
2351 2360    SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2352 2361  
2353 2362    // This keeps claiming and applying the closure to completed buffers
2354 2363    // until we run out of buffers or we need to abort.
2355 2364    while (!has_aborted() &&
2356 2365           satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2357 2366      abort_marking_if_regular_check_fail();
2358 2367    }
2359 2368  
2360 2369    // Can't assert qset is empty here, even if not aborted.  If concurrent,
2361 2370    // some other thread might be adding to the queue.  If not concurrent,
2362 2371    // some other thread might have won the race for the last buffer, but
2363 2372    // has not yet decremented the count.
2364 2373  
2365 2374    _draining_satb_buffers = false;
2366 2375  
2367 2376    // again, this was a potentially expensive operation, decrease the
2368 2377    // limits to get the regular clock call early
2369 2378    decrease_limits();
2370 2379  }
2371 2380  
2372 2381  void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2373 2382    _mark_stats_cache.reset(region_idx);
2374 2383  }
2375 2384  
2376 2385  Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2377 2386    return _mark_stats_cache.evict_all();
2378 2387  }
2379 2388  
2380 2389  void G1CMTask::print_stats() {
2381 2390    log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2382 2391    log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2383 2392                         _elapsed_time_ms, _termination_time_ms);
2384 2393    log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2385 2394                         _step_times_ms.num(),
2386 2395                         _step_times_ms.avg(),
2387 2396                         _step_times_ms.sd(),
2388 2397                         _step_times_ms.maximum(),
2389 2398                         _step_times_ms.sum());
2390 2399    size_t const hits = _mark_stats_cache.hits();
2391 2400    size_t const misses = _mark_stats_cache.misses();
2392 2401    log_debug(gc, stats)("  Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f",
2393 2402                         hits, misses, percent_of(hits, hits + misses));
2394 2403  }
2395 2404  
2396 2405  bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2397 2406    return _task_queues->steal(worker_id, task_entry);
2398 2407  }
2399 2408  
2400 2409  /*****************************************************************************
2401 2410  
2402 2411      The do_marking_step(time_target_ms, ...) method is the building
2403 2412      block of the parallel marking framework. It can be called in parallel
2404 2413      with other invocations of do_marking_step() on different tasks
2405 2414      (but only one per task, obviously) and concurrently with the
2406 2415      mutator threads, or during remark, hence it eliminates the need
2407 2416      for two versions of the code. When called during remark, it will
2408 2417      pick up from where the task left off during the concurrent marking
2409 2418      phase. Interestingly, tasks are also claimable during evacuation
2410 2419      pauses too, since do_marking_step() ensures that it aborts before
2411 2420      it needs to yield.
2412 2421  
2413 2422      The data structures that it uses to do marking work are the
2414 2423      following:
2415 2424  
2416 2425        (1) Marking Bitmap. If there are gray objects that appear only
2417 2426        on the bitmap (this happens either when dealing with an overflow
2418 2427        or when the initial marking phase has simply marked the roots
2419 2428        and didn't push them on the stack), then tasks claim heap
2420 2429        regions whose bitmap they then scan to find gray objects. A
2421 2430        global finger indicates where the end of the last claimed region
2422 2431        is. A local finger indicates how far into the region a task has
2423 2432        scanned. The two fingers are used to determine how to gray an
2424 2433        object (i.e. whether simply marking it is OK, as it will be
2425 2434        visited by a task in the future, or whether it needs to be also
2426 2435        pushed on a stack).
2427 2436  
2428 2437        (2) Local Queue. The local queue of the task which is accessed
2429 2438        reasonably efficiently by the task. Other tasks can steal from
2430 2439        it when they run out of work. Throughout the marking phase, a
2431 2440        task attempts to keep its local queue short but not totally
2432 2441        empty, so that entries are available for stealing by other
2433 2442        tasks. Only when there is no more work, a task will totally
2434 2443        drain its local queue.
2435 2444  
2436 2445        (3) Global Mark Stack. This handles local queue overflow. During
2437 2446        marking only sets of entries are moved between it and the local
2438 2447        queues, as access to it requires a mutex and more fine-grain
2439 2448        interaction with it which might cause contention. If it
2440 2449        overflows, then the marking phase should restart and iterate
2441 2450        over the bitmap to identify gray objects. Throughout the marking
2442 2451        phase, tasks attempt to keep the global mark stack at a small
2443 2452        length but not totally empty, so that entries are available for
2444 2453        popping by other tasks. Only when there is no more work, tasks
2445 2454        will totally drain the global mark stack.
2446 2455  
2447 2456        (4) SATB Buffer Queue. This is where completed SATB buffers are
2448 2457        made available. Buffers are regularly removed from this queue
2449 2458        and scanned for roots, so that the queue doesn't get too
2450 2459        long. During remark, all completed buffers are processed, as
2451 2460        well as the filled in parts of any uncompleted buffers.
2452 2461  
2453 2462      The do_marking_step() method tries to abort when the time target
2454 2463      has been reached. There are a few other cases when the
2455 2464      do_marking_step() method also aborts:
2456 2465  
2457 2466        (1) When the marking phase has been aborted (after a Full GC).
2458 2467  
2459 2468        (2) When a global overflow (on the global stack) has been
2460 2469        triggered. Before the task aborts, it will actually sync up with
2461 2470        the other tasks to ensure that all the marking data structures
2462 2471        (local queues, stacks, fingers etc.)  are re-initialized so that
2463 2472        when do_marking_step() completes, the marking phase can
2464 2473        immediately restart.
2465 2474  
2466 2475        (3) When enough completed SATB buffers are available. The
2467 2476        do_marking_step() method only tries to drain SATB buffers right
2468 2477        at the beginning. So, if enough buffers are available, the
2469 2478        marking step aborts and the SATB buffers are processed at
2470 2479        the beginning of the next invocation.
2471 2480  
2472 2481        (4) To yield. when we have to yield then we abort and yield
2473 2482        right at the end of do_marking_step(). This saves us from a lot
2474 2483        of hassle as, by yielding we might allow a Full GC. If this
2475 2484        happens then objects will be compacted underneath our feet, the
2476 2485        heap might shrink, etc. We save checking for this by just
2477 2486        aborting and doing the yield right at the end.
2478 2487  
2479 2488      From the above it follows that the do_marking_step() method should
2480 2489      be called in a loop (or, otherwise, regularly) until it completes.
2481 2490  
2482 2491      If a marking step completes without its has_aborted() flag being
2483 2492      true, it means it has completed the current marking phase (and
2484 2493      also all other marking tasks have done so and have all synced up).
2485 2494  
2486 2495      A method called regular_clock_call() is invoked "regularly" (in
2487 2496      sub ms intervals) throughout marking. It is this clock method that
2488 2497      checks all the abort conditions which were mentioned above and
2489 2498      decides when the task should abort. A work-based scheme is used to
2490 2499      trigger this clock method: when the number of object words the
2491 2500      marking phase has scanned or the number of references the marking
2492 2501      phase has visited reach a given limit. Additional invocations to
2493 2502      the method clock have been planted in a few other strategic places
2494 2503      too. The initial reason for the clock method was to avoid calling
2495 2504      vtime too regularly, as it is quite expensive. So, once it was in
2496 2505      place, it was natural to piggy-back all the other conditions on it
2497 2506      too and not constantly check them throughout the code.
2498 2507  
2499 2508      If do_termination is true then do_marking_step will enter its
2500 2509      termination protocol.
2501 2510  
2502 2511      The value of is_serial must be true when do_marking_step is being
2503 2512      called serially (i.e. by the VMThread) and do_marking_step should
2504 2513      skip any synchronization in the termination and overflow code.
2505 2514      Examples include the serial remark code and the serial reference
2506 2515      processing closures.
2507 2516  
2508 2517      The value of is_serial must be false when do_marking_step is
2509 2518      being called by any of the worker threads in a work gang.
2510 2519      Examples include the concurrent marking code (CMMarkingTask),
2511 2520      the MT remark code, and the MT reference processing closures.
2512 2521  
2513 2522   *****************************************************************************/
2514 2523  
2515 2524  void G1CMTask::do_marking_step(double time_target_ms,
2516 2525                                 bool do_termination,
2517 2526                                 bool is_serial) {
2518 2527    assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2519 2528  
2520 2529    _start_time_ms = os::elapsedVTime() * 1000.0;
2521 2530  
2522 2531    // If do_stealing is true then do_marking_step will attempt to
2523 2532    // steal work from the other G1CMTasks. It only makes sense to
2524 2533    // enable stealing when the termination protocol is enabled
2525 2534    // and do_marking_step() is not being called serially.
2526 2535    bool do_stealing = do_termination && !is_serial;
2527 2536  
2528 2537    G1Predictions const& predictor = _g1h->policy()->predictor();
2529 2538    double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2530 2539    _time_target_ms = time_target_ms - diff_prediction_ms;
2531 2540  
2532 2541    // set up the variables that are used in the work-based scheme to
2533 2542    // call the regular clock method
2534 2543    _words_scanned = 0;
2535 2544    _refs_reached  = 0;
2536 2545    recalculate_limits();
2537 2546  
2538 2547    // clear all flags
2539 2548    clear_has_aborted();
2540 2549    _has_timed_out = false;
2541 2550    _draining_satb_buffers = false;
2542 2551  
2543 2552    ++_calls;
2544 2553  
2545 2554    // Set up the bitmap and oop closures. Anything that uses them is
2546 2555    // eventually called from this method, so it is OK to allocate these
2547 2556    // statically.
2548 2557    G1CMBitMapClosure bitmap_closure(this, _cm);
2549 2558    G1CMOopClosure cm_oop_closure(_g1h, this);
2550 2559    set_cm_oop_closure(&cm_oop_closure);
2551 2560  
2552 2561    if (_cm->has_overflown()) {
2553 2562      // This can happen if the mark stack overflows during a GC pause
2554 2563      // and this task, after a yield point, restarts. We have to abort
2555 2564      // as we need to get into the overflow protocol which happens
2556 2565      // right at the end of this task.
2557 2566      set_has_aborted();
2558 2567    }
2559 2568  
2560 2569    // First drain any available SATB buffers. After this, we will not
2561 2570    // look at SATB buffers before the next invocation of this method.
2562 2571    // If enough completed SATB buffers are queued up, the regular clock
2563 2572    // will abort this task so that it restarts.
2564 2573    drain_satb_buffers();
2565 2574    // ...then partially drain the local queue and the global stack
2566 2575    drain_local_queue(true);
2567 2576    drain_global_stack(true);
2568 2577  
2569 2578    do {
2570 2579      if (!has_aborted() && _curr_region != NULL) {
2571 2580        // This means that we're already holding on to a region.
2572 2581        assert(_finger != NULL, "if region is not NULL, then the finger "
2573 2582               "should not be NULL either");
2574 2583  
2575 2584        // We might have restarted this task after an evacuation pause
2576 2585        // which might have evacuated the region we're holding on to
2577 2586        // underneath our feet. Let's read its limit again to make sure
2578 2587        // that we do not iterate over a region of the heap that
2579 2588        // contains garbage (update_region_limit() will also move
2580 2589        // _finger to the start of the region if it is found empty).
2581 2590        update_region_limit();
2582 2591        // We will start from _finger not from the start of the region,
2583 2592        // as we might be restarting this task after aborting half-way
2584 2593        // through scanning this region. In this case, _finger points to
2585 2594        // the address where we last found a marked object. If this is a
2586 2595        // fresh region, _finger points to start().
2587 2596        MemRegion mr = MemRegion(_finger, _region_limit);
2588 2597  
2589 2598        assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2590 2599               "humongous regions should go around loop once only");
2591 2600  
2592 2601        // Some special cases:
2593 2602        // If the memory region is empty, we can just give up the region.
2594 2603        // If the current region is humongous then we only need to check
2595 2604        // the bitmap for the bit associated with the start of the object,
2596 2605        // scan the object if it's live, and give up the region.
2597 2606        // Otherwise, let's iterate over the bitmap of the part of the region
2598 2607        // that is left.
2599 2608        // If the iteration is successful, give up the region.
2600 2609        if (mr.is_empty()) {
2601 2610          giveup_current_region();
2602 2611          abort_marking_if_regular_check_fail();
2603 2612        } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2604 2613          if (_next_mark_bitmap->is_marked(mr.start())) {
2605 2614            // The object is marked - apply the closure
2606 2615            bitmap_closure.do_addr(mr.start());
2607 2616          }
2608 2617          // Even if this task aborted while scanning the humongous object
2609 2618          // we can (and should) give up the current region.
2610 2619          giveup_current_region();
2611 2620          abort_marking_if_regular_check_fail();
2612 2621        } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2613 2622          giveup_current_region();
2614 2623          abort_marking_if_regular_check_fail();
2615 2624        } else {
2616 2625          assert(has_aborted(), "currently the only way to do so");
2617 2626          // The only way to abort the bitmap iteration is to return
2618 2627          // false from the do_bit() method. However, inside the
2619 2628          // do_bit() method we move the _finger to point to the
2620 2629          // object currently being looked at. So, if we bail out, we
2621 2630          // have definitely set _finger to something non-null.
2622 2631          assert(_finger != NULL, "invariant");
2623 2632  
2624 2633          // Region iteration was actually aborted. So now _finger
2625 2634          // points to the address of the object we last scanned. If we
2626 2635          // leave it there, when we restart this task, we will rescan
2627 2636          // the object. It is easy to avoid this. We move the finger by
2628 2637          // enough to point to the next possible object header.
2629 2638          assert(_finger < _region_limit, "invariant");
2630 2639          HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2631 2640          // Check if bitmap iteration was aborted while scanning the last object
2632 2641          if (new_finger >= _region_limit) {
2633 2642            giveup_current_region();
2634 2643          } else {
2635 2644            move_finger_to(new_finger);
2636 2645          }
2637 2646        }
2638 2647      }
2639 2648      // At this point we have either completed iterating over the
2640 2649      // region we were holding on to, or we have aborted.
2641 2650  
2642 2651      // We then partially drain the local queue and the global stack.
2643 2652      // (Do we really need this?)
2644 2653      drain_local_queue(true);
2645 2654      drain_global_stack(true);
2646 2655  
2647 2656      // Read the note on the claim_region() method on why it might
2648 2657      // return NULL with potentially more regions available for
2649 2658      // claiming and why we have to check out_of_regions() to determine
2650 2659      // whether we're done or not.
2651 2660      while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2652 2661        // We are going to try to claim a new region. We should have
2653 2662        // given up on the previous one.
2654 2663        // Separated the asserts so that we know which one fires.
2655 2664        assert(_curr_region  == NULL, "invariant");
2656 2665        assert(_finger       == NULL, "invariant");
2657 2666        assert(_region_limit == NULL, "invariant");
2658 2667        HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2659 2668        if (claimed_region != NULL) {
2660 2669          // Yes, we managed to claim one
2661 2670          setup_for_region(claimed_region);
2662 2671          assert(_curr_region == claimed_region, "invariant");
2663 2672        }
2664 2673        // It is important to call the regular clock here. It might take
2665 2674        // a while to claim a region if, for example, we hit a large
2666 2675        // block of empty regions. So we need to call the regular clock
2667 2676        // method once round the loop to make sure it's called
2668 2677        // frequently enough.
2669 2678        abort_marking_if_regular_check_fail();
2670 2679      }
2671 2680  
2672 2681      if (!has_aborted() && _curr_region == NULL) {
2673 2682        assert(_cm->out_of_regions(),
2674 2683               "at this point we should be out of regions");
2675 2684      }
2676 2685    } while ( _curr_region != NULL && !has_aborted());
2677 2686  
2678 2687    if (!has_aborted()) {
2679 2688      // We cannot check whether the global stack is empty, since other
2680 2689      // tasks might be pushing objects to it concurrently.
2681 2690      assert(_cm->out_of_regions(),
2682 2691             "at this point we should be out of regions");
2683 2692      // Try to reduce the number of available SATB buffers so that
2684 2693      // remark has less work to do.
2685 2694      drain_satb_buffers();
2686 2695    }
2687 2696  
2688 2697    // Since we've done everything else, we can now totally drain the
2689 2698    // local queue and global stack.
2690 2699    drain_local_queue(false);
2691 2700    drain_global_stack(false);
2692 2701  
2693 2702    // Attempt at work stealing from other task's queues.
2694 2703    if (do_stealing && !has_aborted()) {
2695 2704      // We have not aborted. This means that we have finished all that
2696 2705      // we could. Let's try to do some stealing...
2697 2706  
2698 2707      // We cannot check whether the global stack is empty, since other
2699 2708      // tasks might be pushing objects to it concurrently.
2700 2709      assert(_cm->out_of_regions() && _task_queue->size() == 0,
2701 2710             "only way to reach here");
2702 2711      while (!has_aborted()) {
2703 2712        G1TaskQueueEntry entry;
2704 2713        if (_cm->try_stealing(_worker_id, entry)) {
2705 2714          scan_task_entry(entry);
2706 2715  
2707 2716          // And since we're towards the end, let's totally drain the
2708 2717          // local queue and global stack.
2709 2718          drain_local_queue(false);
2710 2719          drain_global_stack(false);
2711 2720        } else {
2712 2721          break;
2713 2722        }
2714 2723      }
2715 2724    }
2716 2725  
2717 2726    // We still haven't aborted. Now, let's try to get into the
2718 2727    // termination protocol.
2719 2728    if (do_termination && !has_aborted()) {
2720 2729      // We cannot check whether the global stack is empty, since other
2721 2730      // tasks might be concurrently pushing objects on it.
2722 2731      // Separated the asserts so that we know which one fires.
2723 2732      assert(_cm->out_of_regions(), "only way to reach here");
2724 2733      assert(_task_queue->size() == 0, "only way to reach here");
2725 2734      _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2726 2735  
2727 2736      // The G1CMTask class also extends the TerminatorTerminator class,
2728 2737      // hence its should_exit_termination() method will also decide
2729 2738      // whether to exit the termination protocol or not.
2730 2739      bool finished = (is_serial ||
2731 2740                       _cm->terminator()->offer_termination(this));
2732 2741      double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2733 2742      _termination_time_ms +=
2734 2743        termination_end_time_ms - _termination_start_time_ms;
2735 2744  
2736 2745      if (finished) {
2737 2746        // We're all done.
2738 2747  
2739 2748        // We can now guarantee that the global stack is empty, since
2740 2749        // all other tasks have finished. We separated the guarantees so
2741 2750        // that, if a condition is false, we can immediately find out
2742 2751        // which one.
2743 2752        guarantee(_cm->out_of_regions(), "only way to reach here");
2744 2753        guarantee(_cm->mark_stack_empty(), "only way to reach here");
2745 2754        guarantee(_task_queue->size() == 0, "only way to reach here");
2746 2755        guarantee(!_cm->has_overflown(), "only way to reach here");
2747 2756        guarantee(!has_aborted(), "should never happen if termination has completed");
2748 2757      } else {
2749 2758        // Apparently there's more work to do. Let's abort this task. It
2750 2759        // will restart it and we can hopefully find more things to do.
2751 2760        set_has_aborted();
2752 2761      }
2753 2762    }
2754 2763  
2755 2764    // Mainly for debugging purposes to make sure that a pointer to the
2756 2765    // closure which was statically allocated in this frame doesn't
2757 2766    // escape it by accident.
2758 2767    set_cm_oop_closure(NULL);
2759 2768    double end_time_ms = os::elapsedVTime() * 1000.0;
2760 2769    double elapsed_time_ms = end_time_ms - _start_time_ms;
2761 2770    // Update the step history.
2762 2771    _step_times_ms.add(elapsed_time_ms);
2763 2772  
2764 2773    if (has_aborted()) {
2765 2774      // The task was aborted for some reason.
2766 2775      if (_has_timed_out) {
2767 2776        double diff_ms = elapsed_time_ms - _time_target_ms;
2768 2777        // Keep statistics of how well we did with respect to hitting
2769 2778        // our target only if we actually timed out (if we aborted for
2770 2779        // other reasons, then the results might get skewed).
2771 2780        _marking_step_diff_ms.add(diff_ms);
2772 2781      }
2773 2782  
2774 2783      if (_cm->has_overflown()) {
2775 2784        // This is the interesting one. We aborted because a global
2776 2785        // overflow was raised. This means we have to restart the
2777 2786        // marking phase and start iterating over regions. However, in
2778 2787        // order to do this we have to make sure that all tasks stop
2779 2788        // what they are doing and re-initialize in a safe manner. We
2780 2789        // will achieve this with the use of two barrier sync points.
2781 2790  
2782 2791        if (!is_serial) {
2783 2792          // We only need to enter the sync barrier if being called
2784 2793          // from a parallel context
2785 2794          _cm->enter_first_sync_barrier(_worker_id);
2786 2795  
2787 2796          // When we exit this sync barrier we know that all tasks have
2788 2797          // stopped doing marking work. So, it's now safe to
2789 2798          // re-initialize our data structures.
2790 2799        }
2791 2800  
2792 2801        clear_region_fields();
2793 2802        flush_mark_stats_cache();
2794 2803  
2795 2804        if (!is_serial) {
2796 2805          // If we're executing the concurrent phase of marking, reset the marking
2797 2806          // state; otherwise the marking state is reset after reference processing,
2798 2807          // during the remark pause.
2799 2808          // If we reset here as a result of an overflow during the remark we will
2800 2809          // see assertion failures from any subsequent set_concurrency_and_phase()
2801 2810          // calls.
2802 2811          if (_cm->concurrent() && _worker_id == 0) {
2803 2812            // Worker 0 is responsible for clearing the global data structures because
2804 2813            // of an overflow. During STW we should not clear the overflow flag (in
2805 2814            // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2806 2815            // method to abort the pause and restart concurrent marking.
2807 2816            _cm->reset_marking_for_restart();
2808 2817  
2809 2818            log_info(gc, marking)("Concurrent Mark reset for overflow");
2810 2819          }
2811 2820  
2812 2821          // ...and enter the second barrier.
2813 2822          _cm->enter_second_sync_barrier(_worker_id);
2814 2823        }
2815 2824        // At this point, if we're during the concurrent phase of
2816 2825        // marking, everything has been re-initialized and we're
2817 2826        // ready to restart.
2818 2827      }
2819 2828    }
2820 2829  }
2821 2830  
2822 2831  G1CMTask::G1CMTask(uint worker_id,
2823 2832                     G1ConcurrentMark* cm,
2824 2833                     G1CMTaskQueue* task_queue,
2825 2834                     G1RegionMarkStats* mark_stats,
2826 2835                     uint max_regions) :
2827 2836    _objArray_processor(this),
2828 2837    _worker_id(worker_id),
2829 2838    _g1h(G1CollectedHeap::heap()),
2830 2839    _cm(cm),
2831 2840    _next_mark_bitmap(NULL),
2832 2841    _task_queue(task_queue),
2833 2842    _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize),
2834 2843    _calls(0),
2835 2844    _time_target_ms(0.0),
2836 2845    _start_time_ms(0.0),
2837 2846    _cm_oop_closure(NULL),
2838 2847    _curr_region(NULL),
2839 2848    _finger(NULL),
2840 2849    _region_limit(NULL),
2841 2850    _words_scanned(0),
2842 2851    _words_scanned_limit(0),
2843 2852    _real_words_scanned_limit(0),
2844 2853    _refs_reached(0),
2845 2854    _refs_reached_limit(0),
2846 2855    _real_refs_reached_limit(0),
2847 2856    _has_aborted(false),
2848 2857    _has_timed_out(false),
2849 2858    _draining_satb_buffers(false),
2850 2859    _step_times_ms(),
2851 2860    _elapsed_time_ms(0.0),
2852 2861    _termination_time_ms(0.0),
2853 2862    _termination_start_time_ms(0.0),
2854 2863    _marking_step_diff_ms()
2855 2864  {
2856 2865    guarantee(task_queue != NULL, "invariant");
2857 2866  
2858 2867    _marking_step_diff_ms.add(0.5);
2859 2868  }
2860 2869  
2861 2870  // These are formatting macros that are used below to ensure
2862 2871  // consistent formatting. The *_H_* versions are used to format the
2863 2872  // header for a particular value and they should be kept consistent
2864 2873  // with the corresponding macro. Also note that most of the macros add
2865 2874  // the necessary white space (as a prefix) which makes them a bit
2866 2875  // easier to compose.
2867 2876  
2868 2877  // All the output lines are prefixed with this string to be able to
2869 2878  // identify them easily in a large log file.
2870 2879  #define G1PPRL_LINE_PREFIX            "###"
2871 2880  
2872 2881  #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2873 2882  #ifdef _LP64
2874 2883  #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2875 2884  #else // _LP64
2876 2885  #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2877 2886  #endif // _LP64
2878 2887  
2879 2888  // For per-region info
2880 2889  #define G1PPRL_TYPE_FORMAT            "   %-4s"
2881 2890  #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2882 2891  #define G1PPRL_STATE_FORMAT           "   %-5s"
2883 2892  #define G1PPRL_STATE_H_FORMAT         "   %5s"
2884 2893  #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2885 2894  #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2886 2895  #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2887 2896  #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2888 2897  
2889 2898  // For summary info
2890 2899  #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2891 2900  #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2892 2901  #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2893 2902  #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2894 2903  
2895 2904  G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2896 2905    _total_used_bytes(0), _total_capacity_bytes(0),
2897 2906    _total_prev_live_bytes(0), _total_next_live_bytes(0),
2898 2907    _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2899 2908  {
2900 2909    if (!log_is_enabled(Trace, gc, liveness)) {
2901 2910      return;
2902 2911    }
2903 2912  
2904 2913    G1CollectedHeap* g1h = G1CollectedHeap::heap();
2905 2914    MemRegion g1_reserved = g1h->g1_reserved();
2906 2915    double now = os::elapsedTime();
2907 2916  
2908 2917    // Print the header of the output.
2909 2918    log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2910 2919    log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2911 2920                            G1PPRL_SUM_ADDR_FORMAT("reserved")
2912 2921                            G1PPRL_SUM_BYTE_FORMAT("region-size"),
2913 2922                            p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2914 2923                            HeapRegion::GrainBytes);
2915 2924    log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2916 2925    log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2917 2926                            G1PPRL_TYPE_H_FORMAT
2918 2927                            G1PPRL_ADDR_BASE_H_FORMAT
2919 2928                            G1PPRL_BYTE_H_FORMAT
2920 2929                            G1PPRL_BYTE_H_FORMAT
2921 2930                            G1PPRL_BYTE_H_FORMAT
2922 2931                            G1PPRL_DOUBLE_H_FORMAT
2923 2932                            G1PPRL_BYTE_H_FORMAT
2924 2933                            G1PPRL_STATE_H_FORMAT
2925 2934                            G1PPRL_BYTE_H_FORMAT,
2926 2935                            "type", "address-range",
2927 2936                            "used", "prev-live", "next-live", "gc-eff",
2928 2937                            "remset", "state", "code-roots");
2929 2938    log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2930 2939                            G1PPRL_TYPE_H_FORMAT
2931 2940                            G1PPRL_ADDR_BASE_H_FORMAT
2932 2941                            G1PPRL_BYTE_H_FORMAT
2933 2942                            G1PPRL_BYTE_H_FORMAT
2934 2943                            G1PPRL_BYTE_H_FORMAT
2935 2944                            G1PPRL_DOUBLE_H_FORMAT
2936 2945                            G1PPRL_BYTE_H_FORMAT
2937 2946                            G1PPRL_STATE_H_FORMAT
2938 2947                            G1PPRL_BYTE_H_FORMAT,
2939 2948                            "", "",
2940 2949                            "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2941 2950                            "(bytes)", "", "(bytes)");
2942 2951  }
2943 2952  
2944 2953  bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
2945 2954    if (!log_is_enabled(Trace, gc, liveness)) {
2946 2955      return false;
2947 2956    }
2948 2957  
2949 2958    const char* type       = r->get_type_str();
2950 2959    HeapWord* bottom       = r->bottom();
2951 2960    HeapWord* end          = r->end();
2952 2961    size_t capacity_bytes  = r->capacity();
2953 2962    size_t used_bytes      = r->used();
2954 2963    size_t prev_live_bytes = r->live_bytes();
2955 2964    size_t next_live_bytes = r->next_live_bytes();
2956 2965    double gc_eff          = r->gc_efficiency();
2957 2966    size_t remset_bytes    = r->rem_set()->mem_size();
2958 2967    size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2959 2968    const char* remset_type = r->rem_set()->get_short_state_str();
2960 2969  
2961 2970    _total_used_bytes      += used_bytes;
2962 2971    _total_capacity_bytes  += capacity_bytes;
2963 2972    _total_prev_live_bytes += prev_live_bytes;
2964 2973    _total_next_live_bytes += next_live_bytes;
2965 2974    _total_remset_bytes    += remset_bytes;
2966 2975    _total_strong_code_roots_bytes += strong_code_roots_bytes;
2967 2976  
2968 2977    // Print a line for this particular region.
2969 2978    log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2970 2979                            G1PPRL_TYPE_FORMAT
2971 2980                            G1PPRL_ADDR_BASE_FORMAT
2972 2981                            G1PPRL_BYTE_FORMAT
2973 2982                            G1PPRL_BYTE_FORMAT
2974 2983                            G1PPRL_BYTE_FORMAT
2975 2984                            G1PPRL_DOUBLE_FORMAT
2976 2985                            G1PPRL_BYTE_FORMAT
2977 2986                            G1PPRL_STATE_FORMAT
2978 2987                            G1PPRL_BYTE_FORMAT,
2979 2988                            type, p2i(bottom), p2i(end),
2980 2989                            used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2981 2990                            remset_bytes, remset_type, strong_code_roots_bytes);
2982 2991  
2983 2992    return false;
2984 2993  }
2985 2994  
2986 2995  G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2987 2996    if (!log_is_enabled(Trace, gc, liveness)) {
2988 2997      return;
2989 2998    }
2990 2999  
2991 3000    // add static memory usages to remembered set sizes
2992 3001    _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2993 3002    // Print the footer of the output.
2994 3003    log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2995 3004    log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2996 3005                           " SUMMARY"
2997 3006                           G1PPRL_SUM_MB_FORMAT("capacity")
2998 3007                           G1PPRL_SUM_MB_PERC_FORMAT("used")
2999 3008                           G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3000 3009                           G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3001 3010                           G1PPRL_SUM_MB_FORMAT("remset")
3002 3011                           G1PPRL_SUM_MB_FORMAT("code-roots"),
3003 3012                           bytes_to_mb(_total_capacity_bytes),
3004 3013                           bytes_to_mb(_total_used_bytes),
3005 3014                           percent_of(_total_used_bytes, _total_capacity_bytes),
3006 3015                           bytes_to_mb(_total_prev_live_bytes),
3007 3016                           percent_of(_total_prev_live_bytes, _total_capacity_bytes),
3008 3017                           bytes_to_mb(_total_next_live_bytes),
3009 3018                           percent_of(_total_next_live_bytes, _total_capacity_bytes),
3010 3019                           bytes_to_mb(_total_remset_bytes),
3011 3020                           bytes_to_mb(_total_strong_code_roots_bytes));
3012 3021  }

↓ open down ↓

1017 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX