1 /*
   2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
  26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
  27 
  28 #include "gc_implementation/g1/concurrentMark.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 
  31 // Returns the index in the liveness accounting card bitmap
  32 // for the given address
  33 inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
  34   // Below, the term "card num" means the result of shifting an address
  35   // by the card shift -- address 0 corresponds to card number 0.  One
  36   // must subtract the card num of the bottom of the heap to obtain a
  37   // card table index.
  38 
  39   intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
  40   return card_num - heap_bottom_card_num();
  41 }
  42 
  43 // Counts the given memory region in the given task/worker
  44 // counting data structures.
  45 inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
  46                                          size_t* marked_bytes_array,
  47                                          BitMap* task_card_bm) {
  48   G1CollectedHeap* g1h = _g1h;
  49   HeapWord* start = mr.start();
  50   HeapWord* last = mr.last();
  51   size_t region_size_bytes = mr.byte_size();
  52   uint index = hr->hrs_index();
  53 
  54   assert(!hr->continuesHumongous(), "should not be HC region");
  55   assert(hr == g1h->heap_region_containing(start), "sanity");
  56   assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
  57   assert(marked_bytes_array != NULL, "pre-condition");
  58   assert(task_card_bm != NULL, "pre-condition");
  59 
  60   // Add to the task local marked bytes for this region.
  61   marked_bytes_array[index] += region_size_bytes;
  62 
  63   BitMap::idx_t start_idx = card_bitmap_index_for(start);
  64   BitMap::idx_t last_idx = card_bitmap_index_for(last);
  65 
  66   // The card bitmap is task/worker specific => no need to use 'par' routines.
  67   // Set bits in the inclusive bit range [start_idx, last_idx].
  68   //
  69   // For small ranges use a simple loop; otherwise use set_range
  70   // The range are the cards that are spanned by the object/region
  71   // so 8 cards will allow objects/regions up to 4K to be handled
  72   // using the loop.
  73   if ((last_idx - start_idx) <= 8) {
  74     for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
  75      task_card_bm->set_bit(i);
  76     }
  77   } else {
  78     assert(last_idx < task_card_bm->size(), "sanity");
  79     // Note: BitMap::set_range() is exclusive.
  80     task_card_bm->set_range(start_idx, last_idx+1);
  81   }
  82 }
  83 
  84 // Counts the given memory region in the task/worker counting
  85 // data structures for the given worker id.
  86 inline void ConcurrentMark::count_region(MemRegion mr,
  87                                          HeapRegion* hr,
  88                                          uint worker_id) {
  89   size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
  90   BitMap* task_card_bm = count_card_bitmap_for(worker_id);
  91   count_region(mr, hr, marked_bytes_array, task_card_bm);
  92 }
  93 
  94 // Counts the given memory region, which may be a single object, in the
  95 // task/worker counting data structures for the given worker id.
  96 inline void ConcurrentMark::count_region(MemRegion mr, uint worker_id) {
  97   HeapWord* addr = mr.start();
  98   HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
  99   count_region(mr, hr, worker_id);
 100 }
 101 
 102 // Counts the given object in the given task/worker counting data structures.
 103 inline void ConcurrentMark::count_object(oop obj,
 104                                          HeapRegion* hr,
 105                                          size_t* marked_bytes_array,
 106                                          BitMap* task_card_bm) {
 107   MemRegion mr((HeapWord*)obj, obj->size());
 108   count_region(mr, hr, marked_bytes_array, task_card_bm);
 109 }
 110 
 111 // Counts the given object in the task/worker counting data
 112 // structures for the given worker id.
 113 inline void ConcurrentMark::count_object(oop obj,
 114                                          HeapRegion* hr,
 115                                          uint worker_id) {
 116   size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
 117   BitMap* task_card_bm = count_card_bitmap_for(worker_id);
 118   HeapWord* addr = (HeapWord*) obj;
 119   count_object(obj, hr, marked_bytes_array, task_card_bm);
 120 }
 121 
 122 // Attempts to mark the given object and, if successful, counts
 123 // the object in the given task/worker counting structures.
 124 inline bool ConcurrentMark::par_mark_and_count(oop obj,
 125                                                HeapRegion* hr,
 126                                                size_t* marked_bytes_array,
 127                                                BitMap* task_card_bm) {
 128   HeapWord* addr = (HeapWord*)obj;
 129   if (_nextMarkBitMap->parMark(addr)) {
 130     // Update the task specific count data for the object.
 131     count_object(obj, hr, marked_bytes_array, task_card_bm);
 132     return true;
 133   }
 134   return false;
 135 }
 136 
 137 // Attempts to mark the given object and, if successful, counts
 138 // the object in the task/worker counting structures for the
 139 // given worker id.
 140 inline bool ConcurrentMark::par_mark_and_count(oop obj,
 141                                                size_t word_size,
 142                                                HeapRegion* hr,
 143                                                uint worker_id) {
 144   HeapWord* addr = (HeapWord*)obj;
 145   if (_nextMarkBitMap->parMark(addr)) {
 146     MemRegion mr(addr, word_size);
 147     count_region(mr, hr, worker_id);
 148     return true;
 149   }
 150   return false;
 151 }
 152 
 153 // Attempts to mark the given object and, if successful, counts
 154 // the object in the task/worker counting structures for the
 155 // given worker id.
 156 inline bool ConcurrentMark::par_mark_and_count(oop obj,
 157                                                HeapRegion* hr,
 158                                                uint worker_id) {
 159   HeapWord* addr = (HeapWord*)obj;
 160   if (_nextMarkBitMap->parMark(addr)) {
 161     // Update the task specific count data for the object.
 162     count_object(obj, hr, worker_id);
 163     return true;
 164   }
 165   return false;
 166 }
 167 
 168 // As above - but we don't know the heap region containing the
 169 // object and so have to supply it.
 170 inline bool ConcurrentMark::par_mark_and_count(oop obj, uint worker_id) {
 171   HeapWord* addr = (HeapWord*)obj;
 172   HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
 173   return par_mark_and_count(obj, hr, worker_id);
 174 }
 175 
 176 // Similar to the above routine but we already know the size, in words, of
 177 // the object that we wish to mark/count
 178 inline bool ConcurrentMark::par_mark_and_count(oop obj,
 179                                                size_t word_size,
 180                                                uint worker_id) {
 181   HeapWord* addr = (HeapWord*)obj;
 182   if (_nextMarkBitMap->parMark(addr)) {
 183     // Update the task specific count data for the object.
 184     MemRegion mr(addr, word_size);
 185     count_region(mr, worker_id);
 186     return true;
 187   }
 188   return false;
 189 }
 190 
 191 // Unconditionally mark the given object, and unconditinally count
 192 // the object in the counting structures for worker id 0.
 193 // Should *not* be called from parallel code.
 194 inline bool ConcurrentMark::mark_and_count(oop obj, HeapRegion* hr) {
 195   HeapWord* addr = (HeapWord*)obj;
 196   _nextMarkBitMap->mark(addr);
 197   // Update the task specific count data for the object.
 198   count_object(obj, hr, 0 /* worker_id */);
 199   return true;
 200 }
 201 
 202 // As above - but we don't have the heap region containing the
 203 // object, so we have to supply it.
 204 inline bool ConcurrentMark::mark_and_count(oop obj) {
 205   HeapWord* addr = (HeapWord*)obj;
 206   HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
 207   return mark_and_count(obj, hr);
 208 }
 209 
 210 inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 211   HeapWord* start_addr = MAX2(startWord(), mr.start());
 212   HeapWord* end_addr = MIN2(endWord(), mr.end());
 213 
 214   if (end_addr > start_addr) {
 215     // Right-open interval [start-offset, end-offset).
 216     BitMap::idx_t start_offset = heapWordToOffset(start_addr);
 217     BitMap::idx_t end_offset = heapWordToOffset(end_addr);
 218 
 219     start_offset = _bm.get_next_one_offset(start_offset, end_offset);
 220     while (start_offset < end_offset) {
 221       HeapWord* obj_addr = offsetToHeapWord(start_offset);
 222       oop obj = (oop) obj_addr;
 223       if (!cl->do_bit(start_offset)) {
 224         return false;
 225       }
 226       HeapWord* next_addr = MIN2(obj_addr + obj->size(), end_addr);
 227       BitMap::idx_t next_offset = heapWordToOffset(next_addr);
 228       start_offset = _bm.get_next_one_offset(next_offset, end_offset);
 229     }
 230   }
 231   return true;
 232 }
 233 
 234 inline bool CMBitMapRO::iterate(BitMapClosure* cl) {
 235   MemRegion mr(startWord(), sizeInWords());
 236   return iterate(cl, mr);
 237 }
 238 
 239 inline void CMTask::push(oop obj) {
 240   HeapWord* objAddr = (HeapWord*) obj;
 241   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
 242   assert(!_g1h->is_on_master_free_list(
 243               _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
 244   assert(!_g1h->is_obj_ill(obj), "invariant");
 245   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
 246 
 247   if (_cm->verbose_high()) {
 248     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
 249   }
 250 
 251   if (!_task_queue->push(obj)) {
 252     // The local task queue looks full. We need to push some entries
 253     // to the global stack.
 254 
 255     if (_cm->verbose_medium()) {
 256       gclog_or_tty->print_cr("[%d] task queue overflow, "
 257                              "moving entries to the global stack",
 258                              _task_id);
 259     }
 260     move_entries_to_global_stack();
 261 
 262     // this should succeed since, even if we overflow the global
 263     // stack, we should have definitely removed some entries from the
 264     // local queue. So, there must be space on it.
 265     bool success = _task_queue->push(obj);
 266     assert(success, "invariant");
 267   }
 268 
 269   statsOnly( int tmp_size = _task_queue->size();
 270              if (tmp_size > _local_max_size) {
 271                _local_max_size = tmp_size;
 272              }
 273              ++_local_pushes );
 274 }
 275 
 276 // This determines whether the method below will check both the local
 277 // and global fingers when determining whether to push on the stack a
 278 // gray object (value 1) or whether it will only check the global one
 279 // (value 0). The tradeoffs are that the former will be a bit more
 280 // accurate and possibly push less on the stack, but it might also be
 281 // a little bit slower.
 282 
 283 #define _CHECK_BOTH_FINGERS_      1
 284 
 285 inline void CMTask::deal_with_reference(oop obj) {
 286   if (_cm->verbose_high()) {
 287     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
 288                            _task_id, (void*) obj);
 289   }
 290 
 291   ++_refs_reached;
 292 
 293   HeapWord* objAddr = (HeapWord*) obj;
 294   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
 295   if (_g1h->is_in_g1_reserved(objAddr)) {
 296     assert(obj != NULL, "null check is implicit");
 297     if (!_nextMarkBitMap->isMarked(objAddr)) {
 298       // Only get the containing region if the object is not marked on the
 299       // bitmap (otherwise, it's a waste of time since we won't do
 300       // anything with it).
 301       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
 302       if (!hr->obj_allocated_since_next_marking(obj)) {
 303         if (_cm->verbose_high()) {
 304           gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
 305                                  _task_id, (void*) obj);
 306         }
 307 
 308         // we need to mark it first
 309         if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
 310           // No OrderAccess:store_load() is needed. It is implicit in the
 311           // CAS done in CMBitMap::parMark() call in the routine above.
 312           HeapWord* global_finger = _cm->finger();
 313 
 314 #if _CHECK_BOTH_FINGERS_
 315           // we will check both the local and global fingers
 316 
 317           if (_finger != NULL && objAddr < _finger) {
 318             if (_cm->verbose_high()) {
 319               gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
 320                                      "pushing it", _task_id, _finger);
 321             }
 322             push(obj);
 323           } else if (_curr_region != NULL && objAddr < _region_limit) {
 324             // do nothing
 325           } else if (objAddr < global_finger) {
 326             // Notice that the global finger might be moving forward
 327             // concurrently. This is not a problem. In the worst case, we
 328             // mark the object while it is above the global finger and, by
 329             // the time we read the global finger, it has moved forward
 330             // passed this object. In this case, the object will probably
 331             // be visited when a task is scanning the region and will also
 332             // be pushed on the stack. So, some duplicate work, but no
 333             // correctness problems.
 334 
 335             if (_cm->verbose_high()) {
 336               gclog_or_tty->print_cr("[%d] below the global finger "
 337                                      "("PTR_FORMAT"), pushing it",
 338                                      _task_id, global_finger);
 339             }
 340             push(obj);
 341           } else {
 342             // do nothing
 343           }
 344 #else // _CHECK_BOTH_FINGERS_
 345           // we will only check the global finger
 346 
 347           if (objAddr < global_finger) {
 348             // see long comment above
 349 
 350             if (_cm->verbose_high()) {
 351               gclog_or_tty->print_cr("[%d] below the global finger "
 352                                      "("PTR_FORMAT"), pushing it",
 353                                      _task_id, global_finger);
 354             }
 355             push(obj);
 356           }
 357 #endif // _CHECK_BOTH_FINGERS_
 358         }
 359       }
 360     }
 361   }
 362 }
 363 
 364 inline void ConcurrentMark::markPrev(oop p) {
 365   assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
 366   // Note we are overriding the read-only view of the prev map here, via
 367   // the cast.
 368   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
 369 }
 370 
 371 inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
 372                                      uint worker_id, HeapRegion* hr) {
 373   assert(obj != NULL, "pre-condition");
 374   HeapWord* addr = (HeapWord*) obj;
 375   if (hr == NULL) {
 376     hr = _g1h->heap_region_containing_raw(addr);
 377   } else {
 378     assert(hr->is_in(addr), "pre-condition");
 379   }
 380   assert(hr != NULL, "sanity");
 381   // Given that we're looking for a region that contains an object
 382   // header it's impossible to get back a HC region.
 383   assert(!hr->continuesHumongous(), "sanity");
 384 
 385   // We cannot assert that word_size == obj->size() given that obj
 386   // might not be in a consistent state (another thread might be in
 387   // the process of copying it). So the best thing we can do is to
 388   // assert that word_size is under an upper bound which is its
 389   // containing region's capacity.
 390   assert(word_size * HeapWordSize <= hr->capacity(),
 391          err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
 392                  word_size * HeapWordSize, hr->capacity(),
 393                  HR_FORMAT_PARAMS(hr)));
 394 
 395   if (addr < hr->next_top_at_mark_start()) {
 396     if (!_nextMarkBitMap->isMarked(addr)) {
 397       par_mark_and_count(obj, word_size, hr, worker_id);
 398     }
 399   }
 400 }
 401 
 402 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP