1 /*
   2  * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 # include "incls/_precompiled.incl"
  26 # include "incls/_parCardTableModRefBS.cpp.incl"
  27 
  28 void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
  29                                                         DirtyCardToOopClosure* dcto_cl,
  30                                                         MemRegionClosure* cl,
  31                                                         bool clear,
  32                                                         int n_threads) {
  33   if (n_threads > 0) {
  34     assert((n_threads == 1 && ParallelGCThreads == 0) ||
  35            n_threads <= (int)ParallelGCThreads,
  36            "# worker threads != # requested!");
  37     // Make sure the LNC array is valid for the space.
  38     jbyte**   lowest_non_clean;
  39     uintptr_t lowest_non_clean_base_chunk_index;
  40     size_t    lowest_non_clean_chunk_size;
  41     get_LNC_array_for_space(sp, lowest_non_clean,
  42                             lowest_non_clean_base_chunk_index,
  43                             lowest_non_clean_chunk_size);
  44 
  45     int n_strides = n_threads * StridesPerThread;
  46     SequentialSubTasksDone* pst = sp->par_seq_tasks();
  47     pst->set_par_threads(n_threads);
  48     pst->set_n_tasks(n_strides);
  49 
  50     int stride = 0;
  51     while (!pst->is_task_claimed(/* reference */ stride)) {
  52       process_stride(sp, mr, stride, n_strides, dcto_cl, cl, clear,
  53                      lowest_non_clean,
  54                      lowest_non_clean_base_chunk_index,
  55                      lowest_non_clean_chunk_size);
  56     }
  57     if (pst->all_tasks_completed()) {
  58       // Clear lowest_non_clean array for next time.
  59       intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
  60       uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
  61       for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
  62         intptr_t ind = ch - lowest_non_clean_base_chunk_index;
  63         assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
  64                "Bounds error");
  65         lowest_non_clean[ind] = NULL;
  66       }
  67     }
  68   }
  69 }
  70 
  71 void
  72 CardTableModRefBS::
  73 process_stride(Space* sp,
  74                MemRegion used,
  75                jint stride, int n_strides,
  76                DirtyCardToOopClosure* dcto_cl,
  77                MemRegionClosure* cl,
  78                bool clear,
  79                jbyte** lowest_non_clean,
  80                uintptr_t lowest_non_clean_base_chunk_index,
  81                size_t    lowest_non_clean_chunk_size) {
  82   // We don't have to go downwards here; it wouldn't help anyway,
  83   // because of parallelism.
  84 
  85   // Find the first card address of the first chunk in the stride that is
  86   // at least "bottom" of the used region.
  87   jbyte*    start_card  = byte_for(used.start());
  88   jbyte*    end_card    = byte_after(used.last());
  89   uintptr_t start_chunk = addr_to_chunk_index(used.start());
  90   uintptr_t start_chunk_stride_num = start_chunk % n_strides;
  91   jbyte* chunk_card_start;
  92 
  93   if ((uintptr_t)stride >= start_chunk_stride_num) {
  94     chunk_card_start = (jbyte*)(start_card +
  95                                 (stride - start_chunk_stride_num) *
  96                                 CardsPerStrideChunk);
  97   } else {
  98     // Go ahead to the next chunk group boundary, then to the requested stride.
  99     chunk_card_start = (jbyte*)(start_card +
 100                                 (n_strides - start_chunk_stride_num + stride) *
 101                                 CardsPerStrideChunk);
 102   }
 103 
 104   while (chunk_card_start < end_card) {
 105     // We don't have to go downwards here; it wouldn't help anyway,
 106     // because of parallelism.  (We take care with "min_done"; see below.)
 107     // Invariant: chunk_mr should be fully contained within the "used" region.
 108     jbyte*    chunk_card_end = chunk_card_start + CardsPerStrideChunk;
 109     MemRegion chunk_mr       = MemRegion(addr_for(chunk_card_start),
 110                                          chunk_card_end >= end_card ?
 111                                            used.end() : addr_for(chunk_card_end));
 112     assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)");
 113     assert(used.contains(chunk_mr), "chunk_mr should be subset of used");
 114 
 115     // Process the chunk.
 116     process_chunk_boundaries(sp,
 117                              dcto_cl,
 118                              chunk_mr,
 119                              used,
 120                              lowest_non_clean,
 121                              lowest_non_clean_base_chunk_index,
 122                              lowest_non_clean_chunk_size);
 123 
 124     non_clean_card_iterate_work(chunk_mr, cl, clear);
 125 
 126     // Find the next chunk of the stride.
 127     chunk_card_start += CardsPerStrideChunk * n_strides;
 128   }
 129 }
 130 
 131 void
 132 CardTableModRefBS::
 133 process_chunk_boundaries(Space* sp,
 134                          DirtyCardToOopClosure* dcto_cl,
 135                          MemRegion chunk_mr,
 136                          MemRegion used,
 137                          jbyte** lowest_non_clean,
 138                          uintptr_t lowest_non_clean_base_chunk_index,
 139                          size_t    lowest_non_clean_chunk_size)
 140 {
 141   // We must worry about the chunk boundaries.
 142 
 143   // First, set our max_to_do:
 144   HeapWord* max_to_do = NULL;
 145   uintptr_t cur_chunk_index = addr_to_chunk_index(chunk_mr.start());
 146   cur_chunk_index           = cur_chunk_index - lowest_non_clean_base_chunk_index;
 147 
 148   if (chunk_mr.end() < used.end()) {
 149     // This is not the last chunk in the used region.  What is the last
 150     // object?
 151     HeapWord* last_block = sp->block_start(chunk_mr.end());
 152     assert(last_block <= chunk_mr.end(), "In case this property changes.");
 153     if (last_block == chunk_mr.end()
 154         || !sp->block_is_obj(last_block)) {
 155       max_to_do = chunk_mr.end();
 156 
 157     } else {
 158       // It is an object and starts before the end of the current chunk.
 159       // last_obj_card is the card corresponding to the start of the last object
 160       // in the chunk.  Note that the last object may not start in
 161       // the chunk.
 162       jbyte* last_obj_card = byte_for(last_block);
 163       if (!card_may_have_been_dirty(*last_obj_card)) {
 164         // The card containing the head is not dirty.  Any marks in
 165         // subsequent cards still in this chunk must have been made
 166         // precisely; we can cap processing at the end.
 167         max_to_do = chunk_mr.end();
 168       } else {
 169         // The last object must be considered dirty, and extends onto the
 170         // following chunk.  Look for a dirty card in that chunk that will
 171         // bound our processing.
 172         jbyte* limit_card = NULL;
 173         size_t last_block_size = sp->block_size(last_block);
 174         jbyte* last_card_of_last_obj =
 175           byte_for(last_block + last_block_size - 1);
 176         jbyte* first_card_of_next_chunk = byte_for(chunk_mr.end());
 177         // This search potentially goes a long distance looking
 178         // for the next card that will be scanned.  For example,
 179         // an object that is an array of primitives will not
 180         // have any cards covering regions interior to the array
 181         // that will need to be scanned. The scan can be terminated
 182         // at the last card of the next chunk.  That would leave
 183         // limit_card as NULL and would result in "max_to_do"
 184         // being set with the LNC value or with the end
 185         // of the last block.
 186         jbyte* last_card_of_next_chunk = first_card_of_next_chunk +
 187           CardsPerStrideChunk;
 188         assert(byte_for(chunk_mr.end()) - byte_for(chunk_mr.start())
 189           == CardsPerStrideChunk, "last card of next chunk may be wrong");
 190         jbyte* last_card_to_check = (jbyte*) MIN2(last_card_of_last_obj,
 191                                                   last_card_of_next_chunk);
 192         for (jbyte* cur = first_card_of_next_chunk;
 193              cur <= last_card_to_check; cur++) {
 194           if (card_will_be_scanned(*cur)) {
 195             limit_card = cur; break;
 196           }
 197         }
 198         assert(0 <= cur_chunk_index+1 &&
 199                cur_chunk_index+1 < lowest_non_clean_chunk_size,
 200                "Bounds error.");
 201         // LNC for the next chunk
 202         jbyte* lnc_card = lowest_non_clean[cur_chunk_index+1];
 203         if (limit_card == NULL) {
 204           limit_card = lnc_card;
 205         }
 206         if (limit_card != NULL) {
 207           if (lnc_card != NULL) {
 208             limit_card = (jbyte*)MIN2((intptr_t)limit_card,
 209                                       (intptr_t)lnc_card);
 210           }
 211           max_to_do = addr_for(limit_card);
 212         } else {
 213           max_to_do = last_block + last_block_size;
 214         }
 215       }
 216     }
 217     assert(max_to_do != NULL, "OOPS!");
 218   } else {
 219     max_to_do = used.end();
 220   }
 221   // Now we can set the closure we're using so it doesn't to beyond
 222   // max_to_do.
 223   dcto_cl->set_min_done(max_to_do);
 224 #ifndef PRODUCT
 225   dcto_cl->set_last_bottom(max_to_do);
 226 #endif
 227 
 228   // Now we set *our" lowest_non_clean entry.
 229   // Find the object that spans our boundary, if one exists.
 230   // Nothing to do on the first chunk.
 231   if (chunk_mr.start() > used.start()) {
 232     // first_block is the block possibly spanning the chunk start
 233     HeapWord* first_block = sp->block_start(chunk_mr.start());
 234     // Does the block span the start of the chunk and is it
 235     // an object?
 236     if (first_block < chunk_mr.start() &&
 237         sp->block_is_obj(first_block)) {
 238       jbyte* first_dirty_card = NULL;
 239       jbyte* last_card_of_first_obj =
 240           byte_for(first_block + sp->block_size(first_block) - 1);
 241       jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start());
 242       jbyte* last_card_of_cur_chunk = byte_for(chunk_mr.last());
 243       jbyte* last_card_to_check =
 244         (jbyte*) MIN2((intptr_t) last_card_of_cur_chunk,
 245                       (intptr_t) last_card_of_first_obj);
 246       for (jbyte* cur = first_card_of_cur_chunk;
 247            cur <= last_card_to_check; cur++) {
 248         if (card_will_be_scanned(*cur)) {
 249           first_dirty_card = cur; break;
 250         }
 251       }
 252       if (first_dirty_card != NULL) {
 253         assert(0 <= cur_chunk_index &&
 254                  cur_chunk_index < lowest_non_clean_chunk_size,
 255                "Bounds error.");
 256         lowest_non_clean[cur_chunk_index] = first_dirty_card;
 257       }
 258     }
 259   }
 260 }
 261 
 262 void
 263 CardTableModRefBS::
 264 get_LNC_array_for_space(Space* sp,
 265                         jbyte**& lowest_non_clean,
 266                         uintptr_t& lowest_non_clean_base_chunk_index,
 267                         size_t& lowest_non_clean_chunk_size) {
 268 
 269   int       i        = find_covering_region_containing(sp->bottom());
 270   MemRegion covered  = _covered[i];
 271   size_t    n_chunks = chunks_to_cover(covered);
 272 
 273   // Only the first thread to obtain the lock will resize the
 274   // LNC array for the covered region.  Any later expansion can't affect
 275   // the used_at_save_marks region.
 276   // (I observed a bug in which the first thread to execute this would
 277   // resize, and then it would cause "expand_and_allocates" that would
 278   // Increase the number of chunks in the covered region.  Then a second
 279   // thread would come and execute this, see that the size didn't match,
 280   // and free and allocate again.  So the first thread would be using a
 281   // freed "_lowest_non_clean" array.)
 282 
 283   // Do a dirty read here. If we pass the conditional then take the rare
 284   // event lock and do the read again in case some other thread had already
 285   // succeeded and done the resize.
 286   int cur_collection = Universe::heap()->total_collections();
 287   if (_last_LNC_resizing_collection[i] != cur_collection) {
 288     MutexLocker x(ParGCRareEvent_lock);
 289     if (_last_LNC_resizing_collection[i] != cur_collection) {
 290       if (_lowest_non_clean[i] == NULL ||
 291           n_chunks != _lowest_non_clean_chunk_size[i]) {
 292 
 293         // Should we delete the old?
 294         if (_lowest_non_clean[i] != NULL) {
 295           assert(n_chunks != _lowest_non_clean_chunk_size[i],
 296                  "logical consequence");
 297           FREE_C_HEAP_ARRAY(CardPtr, _lowest_non_clean[i]);
 298           _lowest_non_clean[i] = NULL;
 299         }
 300         // Now allocate a new one if necessary.
 301         if (_lowest_non_clean[i] == NULL) {
 302           _lowest_non_clean[i]                  = NEW_C_HEAP_ARRAY(CardPtr, n_chunks);
 303           _lowest_non_clean_chunk_size[i]       = n_chunks;
 304           _lowest_non_clean_base_chunk_index[i] = addr_to_chunk_index(covered.start());
 305           for (int j = 0; j < (int)n_chunks; j++)
 306             _lowest_non_clean[i][j] = NULL;
 307         }
 308       }
 309       _last_LNC_resizing_collection[i] = cur_collection;
 310     }
 311   }
 312   // In any case, now do the initialization.
 313   lowest_non_clean                  = _lowest_non_clean[i];
 314   lowest_non_clean_base_chunk_index = _lowest_non_clean_base_chunk_index[i];
 315   lowest_non_clean_chunk_size       = _lowest_non_clean_chunk_size[i];
 316 }