g1-hot-card-cache Sdiff src/share/vm/gc

src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp

rev 4561 : 7176479: G1: JVM crashes on T5-8 system with 1.5 TB heap
Summary: Refactor G1's hot card cache and card counts table into their own files. Simplify the card counts table, including removing the encoding of the card index in each entry. The card counts table now has a 1:1 correspondence with the cards spanned by heap. Space for the card counts table is reserved from virtual memory (rather than C heap) during JVM startup and is committed/expanded when the heap is expanded. Changes were also reviewed-by Vitaly Davidovich.
Reviewed-by:

   1 /*
   2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP
  26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP
  27 

  28 #include "memory/allocation.hpp"
  29 #include "memory/cardTableModRefBS.hpp"
  30 #include "runtime/thread.hpp"
  31 #include "utilities/globalDefinitions.hpp"
  32 
  33 // Forward decl
  34 class ConcurrentG1RefineThread;


  35 class G1RemSet;
  36 
  37 class ConcurrentG1Refine: public CHeapObj<mtGC> {
  38   ConcurrentG1RefineThread** _threads;
  39   int _n_threads;
  40   int _n_worker_threads;
  41  /*
  42   * The value of the update buffer queue length falls into one of 3 zones:
  43   * green, yellow, red. If the value is in [0, green) nothing is
  44   * done, the buffers are left unprocessed to enable the caching effect of the
  45   * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement
  46   * threads are gradually activated. In [yellow, red) all threads are
  47   * running. If the length becomes red (max queue length) the mutators start
  48   * processing the buffers.
  49   *
  50   * There are some interesting cases (when G1UseAdaptiveConcRefinement
  51   * is turned off):
  52   * 1) green = yellow = red = 0. In this case the mutator will process all
  53   *    buffers. Except for those that are created by the deferred updates
  54   *    machinery during a collection.
  55   * 2) green = 0. Means no caching. Can be a good way to minimize the
  56   *    amount of time spent updating rsets during a collection.
  57   */
  58   int _green_zone;
  59   int _yellow_zone;
  60   int _red_zone;
  61 
  62   int _thread_threshold_step;
  63 



  64   // Reset the threshold step value based of the current zone boundaries.
  65   void reset_threshold_step();
  66 
  67   // The cache for card refinement.
  68   bool   _use_cache;
  69   bool   _def_use_cache;
  70 
  71   size_t _n_periods;    // Used as clearing epoch
  72 
  73   // An evicting cache of the number of times each card
  74   // is accessed. Reduces, but does not eliminate, the amount
  75   // of duplicated processing of dirty cards.
  76 
  77   enum SomePrivateConstants {
  78     epoch_bits           = 32,
  79     card_num_shift       = epoch_bits,
  80     epoch_mask           = AllBits,
  81     card_num_mask        = AllBits,
  82 
  83     // The initial cache size is approximately this fraction
  84     // of a maximal cache (i.e. the size needed for all cards
  85     // in the heap)
  86     InitialCacheFraction = 512
  87   };
  88 
  89   const static julong card_num_mask_in_place =
  90                         (julong) card_num_mask << card_num_shift;
  91 
  92   typedef struct {
  93     julong _value;      // |  card_num   |  epoch   |
  94   } CardEpochCacheEntry;
  95 
  96   julong make_epoch_entry(unsigned int card_num, unsigned int epoch) {
  97     assert(0 <= card_num && card_num < _max_cards, "Bounds");
  98     assert(0 <= epoch && epoch <= _n_periods, "must be");
  99 
 100     return ((julong) card_num << card_num_shift) | epoch;
 101   }
 102 
 103   unsigned int extract_epoch(julong v) {
 104     return (v & epoch_mask);
 105   }
 106 
 107   unsigned int extract_card_num(julong v) {
 108     return (v & card_num_mask_in_place) >> card_num_shift;
 109   }
 110 
 111   typedef struct {
 112     unsigned char _count;
 113     unsigned char _evict_count;
 114   } CardCountCacheEntry;
 115 
 116   CardCountCacheEntry* _card_counts;
 117   CardEpochCacheEntry* _card_epochs;
 118 
 119   // The current number of buckets in the card count cache
 120   size_t _n_card_counts;
 121 
 122   // The number of cards for the entire reserved heap
 123   size_t _max_cards;
 124 
 125   // The max number of buckets for the card counts and epochs caches.
 126   // This is the maximum that the counts and epochs will grow to.
 127   // It is specified as a fraction or percentage of _max_cards using
 128   // G1MaxHotCardCountSizePercent.
 129   size_t _max_n_card_counts;
 130 
 131   // Possible sizes of the cache: odd primes that roughly double in size.
 132   // (See jvmtiTagMap.cpp).
 133   enum {
 134     MAX_CC_CACHE_INDEX = 15    // maximum index into the cache size array.
 135   };
 136 
 137   static size_t _cc_cache_sizes[MAX_CC_CACHE_INDEX];
 138 
 139   // The index in _cc_cache_sizes corresponding to the size of
 140   // _card_counts.
 141   int _cache_size_index;
 142 
 143   bool _expand_card_counts;
 144 
 145   const jbyte* _ct_bot;
 146 
 147   jbyte**      _hot_cache;
 148   int          _hot_cache_size;
 149   int          _n_hot;
 150   int          _hot_cache_idx;
 151 
 152   int          _hot_cache_par_chunk_size;
 153   volatile int _hot_cache_par_claimed_idx;
 154 
 155   // Needed to workaround 6817995
 156   CardTableModRefBS* _ct_bs;
 157   G1CollectedHeap*   _g1h;
 158 
 159   // Helper routine for expand_card_count_cache().
 160   // The arrays used to hold the card counts and the epochs must have
 161   // a 1:1 correspondence. Hence they are allocated and freed together.
 162   // Returns true if the allocations of both the counts and epochs
 163   // were successful; false otherwise.
 164   bool allocate_card_count_cache(size_t n,
 165                                  CardCountCacheEntry** counts,
 166                                  CardEpochCacheEntry** epochs);
 167 
 168   // Expands the arrays that hold the card counts and epochs
 169   // to the cache size at index. Returns true if the expansion/
 170   // allocation was successful; false otherwise.
 171   bool expand_card_count_cache(int index);
 172 
 173   // hash a given key (index of card_ptr) with the specified size
 174   static unsigned int hash(size_t key, size_t size) {
 175     return (unsigned int) (key % size);
 176   }
 177 
 178   // hash a given key (index of card_ptr)
 179   unsigned int hash(size_t key) {
 180     return hash(key, _n_card_counts);
 181   }
 182 
 183   unsigned int ptr_2_card_num(jbyte* card_ptr) {
 184     return (unsigned int) (card_ptr - _ct_bot);
 185   }
 186 
 187   jbyte* card_num_2_ptr(unsigned int card_num) {
 188     return (jbyte*) (_ct_bot + card_num);
 189   }
 190 
 191   // Returns the count of this card after incrementing it.
 192   jbyte* add_card_count(jbyte* card_ptr, int* count, bool* defer);
 193 
 194   // Returns true if this card is in a young region
 195   bool is_young_card(jbyte* card_ptr);
 196 
 197  public:
 198   ConcurrentG1Refine();
 199   ~ConcurrentG1Refine();
 200 
 201   void init(); // Accomplish some initialization that has to wait.
 202   void stop();
 203 
 204   void reinitialize_threads();
 205 
 206   // Iterate over the conc refine threads
 207   void threads_do(ThreadClosure *tc);
 208 
 209   // If this is the first entry for the slot, writes into the cache and
 210   // returns NULL.  If it causes an eviction, returns the evicted pointer.
 211   // Otherwise, its a cache hit, and returns NULL.
 212   jbyte* cache_insert(jbyte* card_ptr, bool* defer);
 213 
 214   // Process the cached entries.
 215   void clean_up_cache(int worker_i, G1RemSet* g1rs, DirtyCardQueue* into_cset_dcq);
 216 
 217   // Set up for parallel processing of the cards in the hot cache
 218   void clear_hot_cache_claimed_index() {
 219     _hot_cache_par_claimed_idx = 0;
 220   }
 221 
 222   // Discard entries in the hot cache.
 223   void clear_hot_cache() {
 224     _hot_cache_idx = 0; _n_hot = 0;
 225   }
 226 
 227   bool hot_cache_is_empty() { return _n_hot == 0; }
 228 
 229   bool use_cache() { return _use_cache; }
 230   void set_use_cache(bool b) {
 231     if (b) _use_cache = _def_use_cache;
 232     else   _use_cache = false;
 233   }
 234 
 235   void clear_and_record_card_counts();
 236 
 237   static int thread_num();
 238 
 239   void print_worker_threads_on(outputStream* st) const;
 240 
 241   void set_green_zone(int x)  { _green_zone = x;  }
 242   void set_yellow_zone(int x) { _yellow_zone = x; }
 243   void set_red_zone(int x)    { _red_zone = x;    }
 244 
 245   int green_zone() const      { return _green_zone;  }
 246   int yellow_zone() const     { return _yellow_zone; }
 247   int red_zone() const        { return _red_zone;    }
 248 
 249   int total_thread_num() const  { return _n_threads;        }
 250   int worker_thread_num() const { return _n_worker_threads; }
 251 
 252   int thread_threshold_step() const { return _thread_threshold_step; }


 253 };
 254 
 255 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP

   1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP
  26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP
  27 
  28 #include "gc_implementation/g1/g1HotCardCache.hpp"
  29 #include "memory/allocation.hpp"

  30 #include "runtime/thread.hpp"
  31 #include "utilities/globalDefinitions.hpp"
  32 
  33 // Forward decl
  34 class ConcurrentG1RefineThread;
  35 class G1CollectedHeap;
  36 class G1HotCardCache;
  37 class G1RemSet;
  38 
  39 class ConcurrentG1Refine: public CHeapObj<mtGC> {
  40   ConcurrentG1RefineThread** _threads;
  41   int _n_threads;
  42   int _n_worker_threads;
  43  /*
  44   * The value of the update buffer queue length falls into one of 3 zones:
  45   * green, yellow, red. If the value is in [0, green) nothing is
  46   * done, the buffers are left unprocessed to enable the caching effect of the
  47   * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement
  48   * threads are gradually activated. In [yellow, red) all threads are
  49   * running. If the length becomes red (max queue length) the mutators start
  50   * processing the buffers.
  51   *
  52   * There are some interesting cases (when G1UseAdaptiveConcRefinement
  53   * is turned off):
  54   * 1) green = yellow = red = 0. In this case the mutator will process all
  55   *    buffers. Except for those that are created by the deferred updates
  56   *    machinery during a collection.
  57   * 2) green = 0. Means no caching. Can be a good way to minimize the
  58   *    amount of time spent updating rsets during a collection.
  59   */
  60   int _green_zone;
  61   int _yellow_zone;
  62   int _red_zone;
  63 
  64   int _thread_threshold_step;
  65 
  66   // We delay the refinement of 'hot' cards using the hot card cache.
  67   G1HotCardCache _hot_card_cache;
  68 
  69   // Reset the threshold step value based of the current zone boundaries.
  70   void reset_threshold_step();
  71 


































































































































  72  public:
  73   ConcurrentG1Refine(G1CollectedHeap* g1h);
  74   ~ConcurrentG1Refine();
  75 
  76   void init(); // Accomplish some initialization that has to wait.
  77   void stop();
  78 
  79   void reinitialize_threads();
  80 
  81   // Iterate over the conc refine threads
  82   void threads_do(ThreadClosure *tc);
  83 




























  84   static int thread_num();
  85 
  86   void print_worker_threads_on(outputStream* st) const;
  87 
  88   void set_green_zone(int x)  { _green_zone = x;  }
  89   void set_yellow_zone(int x) { _yellow_zone = x; }
  90   void set_red_zone(int x)    { _red_zone = x;    }
  91 
  92   int green_zone() const      { return _green_zone;  }
  93   int yellow_zone() const     { return _yellow_zone; }
  94   int red_zone() const        { return _red_zone;    }
  95 
  96   int total_thread_num() const  { return _n_threads;        }
  97   int worker_thread_num() const { return _n_worker_threads; }
  98 
  99   int thread_threshold_step() const { return _thread_threshold_step; }
 100 
 101   G1HotCardCache* hot_card_cache() { return &_hot_card_cache; }
 102 };
 103 
 104 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP