Print this page
rev 3708 : 8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack
Summary: Set the value of MarkStackSize to a value based on the number of parallel marking threads with a reasonable minimum. Expand the marking stack if we have to restart marking due to an overflow up to a reasonable maximum. Allocate the underlying space for the marking stack from virtual memory.
Reviewed-by: jmasa
rev 3709 : imported patch reuse-old-marking-stack

Split Close
Expand all
Collapse all
          --- old/src/share/vm/gc_implementation/g1/concurrentMark.hpp
          +++ new/src/share/vm/gc_implementation/g1/concurrentMark.hpp
↓ open down ↓ 55 lines elided ↑ open up ↑
  56   56  class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
  57   57   protected:
  58   58    HeapWord* _bmStartWord;      // base address of range covered by map
  59   59    size_t    _bmWordSize;       // map size (in #HeapWords covered)
  60   60    const int _shifter;          // map to char or bit
  61   61    VirtualSpace _virtual_space; // underlying the bit map
  62   62    BitMap    _bm;               // the bit map itself
  63   63  
  64   64   public:
  65   65    // constructor
  66      -  CMBitMapRO(ReservedSpace rs, int shifter);
       66 +  CMBitMapRO(int shifter);
  67   67  
  68   68    enum { do_yield = true };
  69   69  
  70   70    // inquiries
  71   71    HeapWord* startWord()   const { return _bmStartWord; }
  72   72    size_t    sizeInWords() const { return _bmWordSize;  }
  73   73    // the following is one past the last word in space
  74   74    HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
  75   75  
  76   76    // read marks
↓ open down ↓ 33 lines elided ↑ open up ↑
 110  110    }
 111  111  
 112  112    // debugging
 113  113    NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
 114  114  };
 115  115  
 116  116  class CMBitMap : public CMBitMapRO {
 117  117  
 118  118   public:
 119  119    // constructor
 120      -  CMBitMap(ReservedSpace rs, int shifter) :
 121      -    CMBitMapRO(rs, shifter) {}
      120 +  CMBitMap(int shifter) :
      121 +    CMBitMapRO(shifter) {}
      122 +
      123 +  // Allocates the back store for the marking bitmap
      124 +  bool allocate(ReservedSpace heap_rs);
 122  125  
 123  126    // write marks
 124  127    void mark(HeapWord* addr) {
 125  128      assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
 126  129             "outside underlying space?");
 127  130      _bm.set_bit(heapWordToOffset(addr));
 128  131    }
 129  132    void clear(HeapWord* addr) {
 130  133      assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
 131  134             "outside underlying space?");
↓ open down ↓ 16 lines elided ↑ open up ↑
 148  151    // Starting at the bit corresponding to "addr" (inclusive), find the next
 149  152    // "1" bit, if any.  This bit starts some run of consecutive "1"'s; find
 150  153    // the end of this run (stopping at "end_addr").  Return the MemRegion
 151  154    // covering from the start of the region corresponding to the first bit
 152  155    // of the run to the end of the region corresponding to the last bit of
 153  156    // the run.  If there is no "1" bit at or after "addr", return an empty
 154  157    // MemRegion.
 155  158    MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
 156  159  };
 157  160  
 158      -// Represents a marking stack used by the CM collector.
 159      -// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
      161 +// Represents a marking stack used by ConcurrentMarking in the G1 collector.
 160  162  class CMMarkStack VALUE_OBJ_CLASS_SPEC {
      163 +  ReservedSpace _rs;
      164 +  VirtualSpace _virtual_space;   // Underlying backing store for actual stack
 161  165    ConcurrentMark* _cm;
 162      -  oop*   _base;        // bottom of stack
 163      -  jint   _index;       // one more than last occupied index
 164      -  jint   _capacity;    // max #elements
 165      -  jint   _saved_index; // value of _index saved at start of GC
 166      -  NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
      166 +  oop* _base;        // bottom of stack
      167 +  jint _index;       // one more than last occupied index
      168 +  jint _capacity;    // max #elements
      169 +  jint _saved_index; // value of _index saved at start of GC
      170 +  NOT_PRODUCT(jint _max_depth;)   // max depth plumbed during run
 167  171  
 168      -  bool   _overflow;
      172 +  bool  _overflow;
      173 +  bool  _should_expand;
 169  174    DEBUG_ONLY(bool _drain_in_progress;)
 170  175    DEBUG_ONLY(bool _drain_in_progress_yields;)
 171  176  
 172  177   public:
 173  178    CMMarkStack(ConcurrentMark* cm);
 174  179    ~CMMarkStack();
 175  180  
 176      -  void allocate(size_t size);
      181 +#ifndef PRODUCT
      182 +  jint max_depth() const {
      183 +    return _max_depth;
      184 +  }
      185 +#endif
      186 +
      187 +  bool allocate(size_t capacity);
 177  188  
 178  189    oop pop() {
 179  190      if (!isEmpty()) {
 180  191        return _base[--_index] ;
 181  192      }
 182  193      return NULL;
 183  194    }
 184  195  
 185  196    // If overflow happens, don't do the push, and record the overflow.
 186  197    // *Requires* that "ptr" is already marked.
↓ open down ↓ 37 lines elided ↑ open up ↑
 224  235    // argument, if non-null, may be used to verify that only marked objects
 225  236    // are on the mark stack.  If "yield_after" is "true", then the
 226  237    // concurrent marker performing the drain offers to yield after
 227  238    // processing each object.  If a yield occurs, stops the drain operation
 228  239    // and returns false.  Otherwise, returns true.
 229  240    template<class OopClosureClass>
 230  241    bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
 231  242  
 232  243    bool isEmpty()    { return _index == 0; }
 233  244    bool isFull()     { return _index == _capacity; }
 234      -  int maxElems()    { return _capacity; }
      245 +  int  maxElems()   { return _capacity; }
 235  246  
 236  247    bool overflow() { return _overflow; }
 237  248    void clear_overflow() { _overflow = false; }
 238  249  
      250 +  bool should_expand() const { return _should_expand; }
      251 +  void set_should_expand();
      252 +
      253 +  // Expand the stack, typically in response to an overflow condition
      254 +  void expand();
      255 +
 239  256    int  size() { return _index; }
 240  257  
 241  258    void setEmpty()   { _index = 0; clear_overflow(); }
 242  259  
 243  260    // Record the current index.
 244  261    void note_start_of_gc();
 245  262  
 246  263    // Make sure that we have not added any entries to the stack during GC.
 247  264    void note_end_of_gc();
 248  265  
↓ open down ↓ 88 lines elided ↑ open up ↑
 337  354    void scan_finished();
 338  355  
 339  356    // If CM threads are still scanning root regions, wait until they
 340  357    // are done. Return true if we had to wait, false otherwise.
 341  358    bool wait_until_scan_finished();
 342  359  };
 343  360  
 344  361  class ConcurrentMarkThread;
 345  362  
 346  363  class ConcurrentMark: public CHeapObj<mtGC> {
      364 +  friend class CMMarkStack;
 347  365    friend class ConcurrentMarkThread;
 348  366    friend class CMTask;
 349  367    friend class CMBitMapClosure;
 350  368    friend class CMGlobalObjectClosure;
 351  369    friend class CMRemarkTask;
 352  370    friend class CMConcurrentMarkingTask;
 353  371    friend class G1ParNoteEndTask;
 354  372    friend class CalcLiveObjectsClosure;
 355  373    friend class G1CMRefProcTaskProxy;
 356  374    friend class G1CMRefProcTaskExecutor;
↓ open down ↓ 213 lines elided ↑ open up ↑
 570  588    BitMap*  _count_card_bitmaps;
 571  589  
 572  590    // Used to record the number of marked live bytes
 573  591    // (for each region, by worker thread).
 574  592    size_t** _count_marked_bytes;
 575  593  
 576  594    // Card index of the bottom of the G1 heap. Used for biasing indices into
 577  595    // the card bitmaps.
 578  596    intptr_t _heap_bottom_card_num;
 579  597  
      598 +  // Set to true when initialization is complete
      599 +  bool _completed_initialization;
      600 +
 580  601  public:
 581  602    // Manipulation of the global mark stack.
 582  603    // Notice that the first mark_stack_push is CAS-based, whereas the
 583  604    // two below are Mutex-based. This is OK since the first one is only
 584  605    // called during evacuation pauses and doesn't compete with the
 585  606    // other two (which are called by the marking tasks during
 586  607    // concurrent marking or remark).
 587  608    bool mark_stack_push(oop p) {
 588  609      _markStack.par_push(p);
 589  610      if (_markStack.overflow()) {
↓ open down ↓ 39 lines elided ↑ open up ↑
 629  650      for (uint i = 0; i < _max_worker_id; ++i)
 630  651        ret += _accum_task_vtime[i];
 631  652      return ret;
 632  653    }
 633  654  
 634  655    // Attempts to steal an object from the task queues of other tasks
 635  656    bool try_stealing(uint worker_id, int* hash_seed, oop& obj) {
 636  657      return _task_queues->steal(worker_id, hash_seed, obj);
 637  658    }
 638  659  
 639      -  ConcurrentMark(ReservedSpace rs, uint max_regions);
      660 +  ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs);
 640  661    ~ConcurrentMark();
 641  662  
 642  663    ConcurrentMarkThread* cmThread() { return _cmThread; }
 643  664  
 644  665    CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
 645  666    CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
 646  667  
 647  668    // Returns the number of GC threads to be used in a concurrent
 648  669    // phase based on the number of GC threads being used in a STW
 649  670    // phase.
↓ open down ↓ 250 lines elided ↑ open up ↑
 900  921    // Unconditionally mark the given object, and unconditinally count
 901  922    // the object in the counting structures for worker id 0.
 902  923    // Should *not* be called from parallel code.
 903  924    inline bool mark_and_count(oop obj, HeapRegion* hr);
 904  925  
 905  926    // Similar to the above routine but we don't know the heap region that
 906  927    // contains the object to be marked/counted, which this routine looks up.
 907  928    // Should *not* be called from parallel code.
 908  929    inline bool mark_and_count(oop obj);
 909  930  
      931 +  // Returns true if initialization was successfully completed.
      932 +  bool completed_initialization() const {
      933 +    return _completed_initialization;
      934 +  }
      935 +
 910  936  protected:
 911  937    // Clear all the per-task bitmaps and arrays used to store the
 912  938    // counting data.
 913  939    void clear_all_count_data();
 914  940  
 915  941    // Aggregates the counting data for each worker/task
 916  942    // that was constructed while marking. Also sets
 917  943    // the amount of marked bytes for each region and
 918  944    // the top at concurrent mark count.
 919  945    void aggregate_count_data();
↓ open down ↓ 316 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX