hotspot Sdiff src/share/vm/gc/cms

src/share/vm/gc/cms/parCardTableModRefBS.cpp

  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/cardTableModRefBS.hpp"
  27 #include "gc/shared/cardTableRS.hpp"
  28 #include "gc/shared/collectedHeap.hpp"
  29 #include "gc/shared/genCollectedHeap.hpp"
  30 #include "gc/shared/space.inline.hpp"
  31 #include "memory/allocation.inline.hpp"
  32 #include "memory/virtualspace.hpp"
  33 #include "oops/oop.inline.hpp"
  34 #include "runtime/java.hpp"
  35 #include "runtime/mutexLocker.hpp"
  36 #include "runtime/orderAccess.inline.hpp"
  37 #include "runtime/vmThread.hpp"
  38 
  39 void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
  40                                                              OopsInGenClosure* cl,
  41                                                              CardTableRS* ct,
  42                                                              uint n_threads) {
  43   assert(n_threads > 0, "Error: expected n_threads > 0");
  44   assert(n_threads <= (uint)ParallelGCThreads, "# worker threads != # requested!");

  45 
  46   // Make sure the LNC array is valid for the space.
  47   jbyte**   lowest_non_clean;
  48   uintptr_t lowest_non_clean_base_chunk_index;
  49   size_t    lowest_non_clean_chunk_size;
  50   get_LNC_array_for_space(sp, lowest_non_clean,
  51                           lowest_non_clean_base_chunk_index,
  52                           lowest_non_clean_chunk_size);
  53 
  54   uint n_strides = n_threads * ParGCStridesPerThread;
  55   SequentialSubTasksDone* pst = sp->par_seq_tasks();
  56   // Sets the condition for completion of the subtask (how many threads
  57   // need to finish in order to be done).
  58   pst->set_n_threads(n_threads);
  59   pst->set_n_tasks(n_strides);
  60 
  61   bool parallel = n_threads > 0;
  62 
  63   uint stride = 0;
  64   while (!pst->is_task_claimed(/* reference */ stride)) {
  65     process_stride(sp, mr, stride, n_strides,
  66                        parallel,
  67                    cl, ct,
  68                    lowest_non_clean,
  69                    lowest_non_clean_base_chunk_index,
  70                    lowest_non_clean_chunk_size);
  71   }
  72   if (pst->all_tasks_completed()) {
  73     // Clear lowest_non_clean array for next time.
  74     intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
  75     uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
  76     for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
  77       intptr_t ind = ch - lowest_non_clean_base_chunk_index;
  78       assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
  79              "Bounds error");
  80       lowest_non_clean[ind] = NULL;
  81     }
  82   }
  83 }
  84 
  85 void
  86 CardTableModRefBS::
  87 process_stride(Space* sp,
  88                MemRegion used,
  89                jint stride, int n_strides,
  90                bool parallel,
  91                OopsInGenClosure* cl,
  92                CardTableRS* ct,
  93                jbyte** lowest_non_clean,
  94                uintptr_t lowest_non_clean_base_chunk_index,
  95                size_t    lowest_non_clean_chunk_size) {
  96   // We go from higher to lower addresses here; it wouldn't help that much
  97   // because of the strided parallelism pattern used here.
  98 
  99   // Find the first card address of the first chunk in the stride that is
 100   // at least "bottom" of the used region.
 101   jbyte*    start_card  = byte_for(used.start());
 102   jbyte*    end_card    = byte_after(used.last());
 103   uintptr_t start_chunk = addr_to_chunk_index(used.start());
 104   uintptr_t start_chunk_stride_num = start_chunk % n_strides;
 105   jbyte* chunk_card_start;
 106 
 107   if ((uintptr_t)stride >= start_chunk_stride_num) {
 108     chunk_card_start = (jbyte*)(start_card +
 109                                 (stride - start_chunk_stride_num) *
 110                                 ParGCCardsPerStrideChunk);

 113     chunk_card_start = (jbyte*)(start_card +
 114                                 (n_strides - start_chunk_stride_num + stride) *
 115                                 ParGCCardsPerStrideChunk);
 116   }
 117 
 118   while (chunk_card_start < end_card) {
 119     // Even though we go from lower to higher addresses below, the
 120     // strided parallelism can interleave the actual processing of the
 121     // dirty pages in various ways. For a specific chunk within this
 122     // stride, we take care to avoid double scanning or missing a card
 123     // by suitably initializing the "min_done" field in process_chunk_boundaries()
 124     // below, together with the dirty region extension accomplished in
 125     // DirtyCardToOopClosure::do_MemRegion().
 126     jbyte*    chunk_card_end = chunk_card_start + ParGCCardsPerStrideChunk;
 127     // Invariant: chunk_mr should be fully contained within the "used" region.
 128     MemRegion chunk_mr       = MemRegion(addr_for(chunk_card_start),
 129                                          chunk_card_end >= end_card ?
 130                                            used.end() : addr_for(chunk_card_end));
 131     assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)");
 132     assert(used.contains(chunk_mr), "chunk_mr should be subset of used");



 133 
 134     DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(),
 135                                                      cl->gen_boundary(),
 136                                                      parallel);
 137     ClearNoncleanCardWrapper clear_cl(dcto_cl, ct, parallel);
 138 
 139 
 140     // Process the chunk.
 141     process_chunk_boundaries(sp,
 142                              dcto_cl,
 143                              chunk_mr,
 144                              used,
 145                              lowest_non_clean,
 146                              lowest_non_clean_base_chunk_index,
 147                              lowest_non_clean_chunk_size);
 148 
 149     // We want the LNC array updates above in process_chunk_boundaries
 150     // to be visible before any of the card table value changes as a
 151     // result of the dirty card iteration below.
 152     OrderAccess::storestore();

  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/cardTableModRefBS.hpp"
  27 #include "gc/shared/cardTableRS.hpp"
  28 #include "gc/shared/collectedHeap.hpp"
  29 #include "gc/shared/genCollectedHeap.hpp"
  30 #include "gc/shared/space.inline.hpp"
  31 #include "memory/allocation.inline.hpp"
  32 #include "memory/virtualspace.hpp"
  33 #include "oops/oop.inline.hpp"
  34 #include "runtime/java.hpp"
  35 #include "runtime/mutexLocker.hpp"
  36 #include "runtime/orderAccess.inline.hpp"
  37 #include "runtime/vmThread.hpp"
  38 
  39 void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
  40                                                              OopsInGenClosure* cl,
  41                                                              CardTableRS* ct,
  42                                                              uint n_threads) {
  43   assert(n_threads > 0, "Error: expected n_threads > 0");
  44   assert(n_threads <= (uint)ParallelGCThreads,
  45          err_msg("Error: n_threads: %u > ParallelGCThreads: %u", n_threads, (uint)ParallelGCThreads));
  46 
  47   // Make sure the LNC array is valid for the space.
  48   jbyte**   lowest_non_clean;
  49   uintptr_t lowest_non_clean_base_chunk_index;
  50   size_t    lowest_non_clean_chunk_size;
  51   get_LNC_array_for_space(sp, lowest_non_clean,
  52                           lowest_non_clean_base_chunk_index,
  53                           lowest_non_clean_chunk_size);
  54 
  55   uint n_strides = n_threads * ParGCStridesPerThread;
  56   SequentialSubTasksDone* pst = sp->par_seq_tasks();
  57   // Sets the condition for completion of the subtask (how many threads
  58   // need to finish in order to be done).
  59   pst->set_n_threads(n_threads);
  60   pst->set_n_tasks(n_strides);
  61 


  62   uint stride = 0;
  63   while (!pst->is_task_claimed(/* reference */ stride)) {
  64     process_stride(sp, mr, stride, n_strides,

  65                    cl, ct,
  66                    lowest_non_clean,
  67                    lowest_non_clean_base_chunk_index,
  68                    lowest_non_clean_chunk_size);
  69   }
  70   if (pst->all_tasks_completed()) {
  71     // Clear lowest_non_clean array for next time.
  72     intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
  73     uintptr_t last_chunk_index  = addr_to_chunk_index(mr.last());
  74     for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
  75       intptr_t ind = ch - lowest_non_clean_base_chunk_index;
  76       assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
  77              "Bounds error");
  78       lowest_non_clean[ind] = NULL;
  79     }
  80   }
  81 }
  82 
  83 void
  84 CardTableModRefBS::
  85 process_stride(Space* sp,
  86                MemRegion used,
  87                jint stride, int n_strides,

  88                OopsInGenClosure* cl,
  89                CardTableRS* ct,
  90                jbyte** lowest_non_clean,
  91                uintptr_t lowest_non_clean_base_chunk_index,
  92                size_t    lowest_non_clean_chunk_size) {
  93   // We go from higher to lower addresses here; it wouldn't help that much
  94   // because of the strided parallelism pattern used here.
  95 
  96   // Find the first card address of the first chunk in the stride that is
  97   // at least "bottom" of the used region.
  98   jbyte*    start_card  = byte_for(used.start());
  99   jbyte*    end_card    = byte_after(used.last());
 100   uintptr_t start_chunk = addr_to_chunk_index(used.start());
 101   uintptr_t start_chunk_stride_num = start_chunk % n_strides;
 102   jbyte* chunk_card_start;
 103 
 104   if ((uintptr_t)stride >= start_chunk_stride_num) {
 105     chunk_card_start = (jbyte*)(start_card +
 106                                 (stride - start_chunk_stride_num) *
 107                                 ParGCCardsPerStrideChunk);

 110     chunk_card_start = (jbyte*)(start_card +
 111                                 (n_strides - start_chunk_stride_num + stride) *
 112                                 ParGCCardsPerStrideChunk);
 113   }
 114 
 115   while (chunk_card_start < end_card) {
 116     // Even though we go from lower to higher addresses below, the
 117     // strided parallelism can interleave the actual processing of the
 118     // dirty pages in various ways. For a specific chunk within this
 119     // stride, we take care to avoid double scanning or missing a card
 120     // by suitably initializing the "min_done" field in process_chunk_boundaries()
 121     // below, together with the dirty region extension accomplished in
 122     // DirtyCardToOopClosure::do_MemRegion().
 123     jbyte*    chunk_card_end = chunk_card_start + ParGCCardsPerStrideChunk;
 124     // Invariant: chunk_mr should be fully contained within the "used" region.
 125     MemRegion chunk_mr       = MemRegion(addr_for(chunk_card_start),
 126                                          chunk_card_end >= end_card ?
 127                                            used.end() : addr_for(chunk_card_end));
 128     assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)");
 129     assert(used.contains(chunk_mr), "chunk_mr should be subset of used");
 130 
 131     // This function is used by the parallel card table iteration.
 132     const bool parallel = true;
 133 
 134     DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(),
 135                                                      cl->gen_boundary(),
 136                                                      parallel);
 137     ClearNoncleanCardWrapper clear_cl(dcto_cl, ct, parallel);
 138 
 139 
 140     // Process the chunk.
 141     process_chunk_boundaries(sp,
 142                              dcto_cl,
 143                              chunk_mr,
 144                              used,
 145                              lowest_non_clean,
 146                              lowest_non_clean_base_chunk_index,
 147                              lowest_non_clean_chunk_size);
 148 
 149     // We want the LNC array updates above in process_chunk_boundaries
 150     // to be visible before any of the card table value changes as a
 151     // result of the dirty card iteration below.
 152     OrderAccess::storestore();

< prev index next >