Print this page
rev 3708 : 8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack
Summary: Set the value of MarkStackSize to a value based on the number of parallel marking threads with a reasonable minimum. Expand the marking stack if we have to restart marking due to an overflow up to a reasonable maximum. Allocate the underlying space for the marking stack from virtual memory.
Reviewed-by: jmasa

Split Close
Expand all
Collapse all
          --- old/src/share/vm/gc_implementation/g1/concurrentMark.hpp
          +++ new/src/share/vm/gc_implementation/g1/concurrentMark.hpp
↓ open down ↓ 55 lines elided ↑ open up ↑
  56   56  class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
  57   57   protected:
  58   58    HeapWord* _bmStartWord;      // base address of range covered by map
  59   59    size_t    _bmWordSize;       // map size (in #HeapWords covered)
  60   60    const int _shifter;          // map to char or bit
  61   61    VirtualSpace _virtual_space; // underlying the bit map
  62   62    BitMap    _bm;               // the bit map itself
  63   63  
  64   64   public:
  65   65    // constructor
  66      -  CMBitMapRO(ReservedSpace rs, int shifter);
       66 +  CMBitMapRO(int shifter);
  67   67  
  68   68    enum { do_yield = true };
  69   69  
  70   70    // inquiries
  71   71    HeapWord* startWord()   const { return _bmStartWord; }
  72   72    size_t    sizeInWords() const { return _bmWordSize;  }
  73   73    // the following is one past the last word in space
  74   74    HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
  75   75  
  76   76    // read marks
↓ open down ↓ 33 lines elided ↑ open up ↑
 110  110    }
 111  111  
 112  112    // debugging
 113  113    NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
 114  114  };
 115  115  
 116  116  class CMBitMap : public CMBitMapRO {
 117  117  
 118  118   public:
 119  119    // constructor
 120      -  CMBitMap(ReservedSpace rs, int shifter) :
 121      -    CMBitMapRO(rs, shifter) {}
      120 +  CMBitMap(int shifter) :
      121 +    CMBitMapRO(shifter) {}
      122 +
      123 +  // Allocates the back store for the marking bitmap
      124 +  bool allocate(ReservedSpace heap_rs);
 122  125  
 123  126    // write marks
 124  127    void mark(HeapWord* addr) {
 125  128      assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
 126  129             "outside underlying space?");
 127  130      _bm.set_bit(heapWordToOffset(addr));
 128  131    }
 129  132    void clear(HeapWord* addr) {
 130  133      assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
 131  134             "outside underlying space?");
↓ open down ↓ 16 lines elided ↑ open up ↑
 148  151    // Starting at the bit corresponding to "addr" (inclusive), find the next
 149  152    // "1" bit, if any.  This bit starts some run of consecutive "1"'s; find
 150  153    // the end of this run (stopping at "end_addr").  Return the MemRegion
 151  154    // covering from the start of the region corresponding to the first bit
 152  155    // of the run to the end of the region corresponding to the last bit of
 153  156    // the run.  If there is no "1" bit at or after "addr", return an empty
 154  157    // MemRegion.
 155  158    MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
 156  159  };
 157  160  
 158      -// Represents a marking stack used by the CM collector.
 159      -// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
      161 +// Represents a marking stack used by ConcurrentMarking in the G1 collector.
 160  162  class CMMarkStack VALUE_OBJ_CLASS_SPEC {
      163 +  VirtualSpace _virtual_space;   // Underlying backing store for actual stack
 161  164    ConcurrentMark* _cm;
 162  165    oop*   _base;        // bottom of stack
 163      -  jint   _index;       // one more than last occupied index
 164      -  jint   _capacity;    // max #elements
 165      -  jint   _saved_index; // value of _index saved at start of GC
 166      -  NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
      166 +  jint _index;       // one more than last occupied index
      167 +  jint _capacity;    // max #elements
      168 +  jint _saved_index; // value of _index saved at start of GC
      169 +  NOT_PRODUCT(jint _max_depth;)   // max depth plumbed during run
 167  170  
 168      -  bool   _overflow;
      171 +  bool  _overflow;
      172 +  bool  _should_expand;
 169  173    DEBUG_ONLY(bool _drain_in_progress;)
 170  174    DEBUG_ONLY(bool _drain_in_progress_yields;)
 171  175  
 172  176   public:
 173  177    CMMarkStack(ConcurrentMark* cm);
 174  178    ~CMMarkStack();
 175  179  
 176      -  void allocate(size_t size);
      180 +#ifndef PRODUCT
      181 +  jint max_depth() const {
      182 +    return _max_depth;
      183 +  }
      184 +#endif
      185 +
      186 +  bool allocate(size_t capacity);
 177  187  
 178  188    oop pop() {
 179  189      if (!isEmpty()) {
 180  190        return _base[--_index] ;
 181  191      }
 182  192      return NULL;
 183  193    }
 184  194  
 185  195    // If overflow happens, don't do the push, and record the overflow.
 186  196    // *Requires* that "ptr" is already marked.
↓ open down ↓ 37 lines elided ↑ open up ↑
 224  234    // argument, if non-null, may be used to verify that only marked objects
 225  235    // are on the mark stack.  If "yield_after" is "true", then the
 226  236    // concurrent marker performing the drain offers to yield after
 227  237    // processing each object.  If a yield occurs, stops the drain operation
 228  238    // and returns false.  Otherwise, returns true.
 229  239    template<class OopClosureClass>
 230  240    bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
 231  241  
 232  242    bool isEmpty()    { return _index == 0; }
 233  243    bool isFull()     { return _index == _capacity; }
 234      -  int maxElems()    { return _capacity; }
      244 +  int  maxElems()   { return _capacity; }
 235  245  
 236  246    bool overflow() { return _overflow; }
 237  247    void clear_overflow() { _overflow = false; }
 238  248  
      249 +  bool should_expand() const { return _should_expand; }
      250 +  void set_should_expand();
      251 +
      252 +  // Expand the stack, typically in response to an overflow condition
      253 +  void expand();
      254 +
 239  255    int  size() { return _index; }
 240  256  
 241  257    void setEmpty()   { _index = 0; clear_overflow(); }
 242  258  
 243  259    // Record the current index.
 244  260    void note_start_of_gc();
 245  261  
 246  262    // Make sure that we have not added any entries to the stack during GC.
 247  263    void note_end_of_gc();
 248  264  
↓ open down ↓ 88 lines elided ↑ open up ↑
 337  353    void scan_finished();
 338  354  
 339  355    // If CM threads are still scanning root regions, wait until they
 340  356    // are done. Return true if we had to wait, false otherwise.
 341  357    bool wait_until_scan_finished();
 342  358  };
 343  359  
 344  360  class ConcurrentMarkThread;
 345  361  
 346  362  class ConcurrentMark: public CHeapObj<mtGC> {
      363 +  friend class CMMarkStack;
 347  364    friend class ConcurrentMarkThread;
 348  365    friend class CMTask;
 349  366    friend class CMBitMapClosure;
 350  367    friend class CMGlobalObjectClosure;
 351  368    friend class CMRemarkTask;
 352  369    friend class CMConcurrentMarkingTask;
 353  370    friend class G1ParNoteEndTask;
 354  371    friend class CalcLiveObjectsClosure;
 355  372    friend class G1CMRefProcTaskProxy;
 356  373    friend class G1CMRefProcTaskExecutor;
↓ open down ↓ 213 lines elided ↑ open up ↑
 570  587    BitMap*  _count_card_bitmaps;
 571  588  
 572  589    // Used to record the number of marked live bytes
 573  590    // (for each region, by worker thread).
 574  591    size_t** _count_marked_bytes;
 575  592  
 576  593    // Card index of the bottom of the G1 heap. Used for biasing indices into
 577  594    // the card bitmaps.
 578  595    intptr_t _heap_bottom_card_num;
 579  596  
      597 +  // Set to true when initialization is complete
      598 +  bool _completed_initialization;
      599 +
 580  600  public:
 581  601    // Manipulation of the global mark stack.
 582  602    // Notice that the first mark_stack_push is CAS-based, whereas the
 583  603    // two below are Mutex-based. This is OK since the first one is only
 584  604    // called during evacuation pauses and doesn't compete with the
 585  605    // other two (which are called by the marking tasks during
 586  606    // concurrent marking or remark).
 587  607    bool mark_stack_push(oop p) {
 588  608      _markStack.par_push(p);
 589  609      if (_markStack.overflow()) {
↓ open down ↓ 39 lines elided ↑ open up ↑
 629  649      for (uint i = 0; i < _max_worker_id; ++i)
 630  650        ret += _accum_task_vtime[i];
 631  651      return ret;
 632  652    }
 633  653  
 634  654    // Attempts to steal an object from the task queues of other tasks
 635  655    bool try_stealing(uint worker_id, int* hash_seed, oop& obj) {
 636  656      return _task_queues->steal(worker_id, hash_seed, obj);
 637  657    }
 638  658  
 639      -  ConcurrentMark(ReservedSpace rs, uint max_regions);
      659 +  ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs);
 640  660    ~ConcurrentMark();
 641  661  
 642  662    ConcurrentMarkThread* cmThread() { return _cmThread; }
 643  663  
 644  664    CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
 645  665    CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
 646  666  
 647  667    // Returns the number of GC threads to be used in a concurrent
 648  668    // phase based on the number of GC threads being used in a STW
 649  669    // phase.
↓ open down ↓ 250 lines elided ↑ open up ↑
 900  920    // Unconditionally mark the given object, and unconditinally count
 901  921    // the object in the counting structures for worker id 0.
 902  922    // Should *not* be called from parallel code.
 903  923    inline bool mark_and_count(oop obj, HeapRegion* hr);
 904  924  
 905  925    // Similar to the above routine but we don't know the heap region that
 906  926    // contains the object to be marked/counted, which this routine looks up.
 907  927    // Should *not* be called from parallel code.
 908  928    inline bool mark_and_count(oop obj);
 909  929  
      930 +  // Returns true if initialization was successfully completed.
      931 +  bool completed_initialization() const {
      932 +    return _completed_initialization;
      933 +  }
      934 +
 910  935  protected:
 911  936    // Clear all the per-task bitmaps and arrays used to store the
 912  937    // counting data.
 913  938    void clear_all_count_data();
 914  939  
 915  940    // Aggregates the counting data for each worker/task
 916  941    // that was constructed while marking. Also sets
 917  942    // the amount of marked bytes for each region and
 918  943    // the top at concurrent mark count.
 919  944    void aggregate_count_data();
↓ open down ↓ 316 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX