6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/cms/cmsHeap.hpp"
27 #include "gc/shared/cardTableBarrierSet.hpp"
28 #include "gc/shared/cardTableRS.hpp"
29 #include "gc/shared/collectedHeap.hpp"
30 #include "gc/shared/space.inline.hpp"
31 #include "memory/allocation.inline.hpp"
32 #include "memory/virtualspace.hpp"
33 #include "oops/oop.inline.hpp"
34 #include "runtime/java.hpp"
35 #include "runtime/mutexLocker.hpp"
36 #include "runtime/orderAccess.inline.hpp"
37 #include "runtime/vmThread.hpp"
38
39 void CardTableRS::
40 non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
41 OopsInGenClosure* cl,
42 CardTableRS* ct,
43 uint n_threads) {
44 assert(n_threads > 0, "expected n_threads > 0");
45 assert(n_threads <= ParallelGCThreads,
46 "n_threads: %u > ParallelGCThreads: %u", n_threads, ParallelGCThreads);
47
48 // Make sure the LNC array is valid for the space.
49 jbyte** lowest_non_clean;
50 uintptr_t lowest_non_clean_base_chunk_index;
51 size_t lowest_non_clean_chunk_size;
52 get_LNC_array_for_space(sp, lowest_non_clean,
53 lowest_non_clean_base_chunk_index,
54 lowest_non_clean_chunk_size);
55
56 uint n_strides = n_threads * ParGCStridesPerThread;
57 SequentialSubTasksDone* pst = sp->par_seq_tasks();
58 // Sets the condition for completion of the subtask (how many threads
59 // need to finish in order to be done).
65 process_stride(sp, mr, stride, n_strides,
66 cl, ct,
67 lowest_non_clean,
68 lowest_non_clean_base_chunk_index,
69 lowest_non_clean_chunk_size);
70 }
71 if (pst->all_tasks_completed()) {
72 // Clear lowest_non_clean array for next time.
73 intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
74 uintptr_t last_chunk_index = addr_to_chunk_index(mr.last());
75 for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
76 intptr_t ind = ch - lowest_non_clean_base_chunk_index;
77 assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
78 "Bounds error");
79 lowest_non_clean[ind] = NULL;
80 }
81 }
82 }
83
84 void
85 CardTableRS::
86 process_stride(Space* sp,
87 MemRegion used,
88 jint stride, int n_strides,
89 OopsInGenClosure* cl,
90 CardTableRS* ct,
91 jbyte** lowest_non_clean,
92 uintptr_t lowest_non_clean_base_chunk_index,
93 size_t lowest_non_clean_chunk_size) {
94 // We go from higher to lower addresses here; it wouldn't help that much
95 // because of the strided parallelism pattern used here.
96
97 // Find the first card address of the first chunk in the stride that is
98 // at least "bottom" of the used region.
99 jbyte* start_card = byte_for(used.start());
100 jbyte* end_card = byte_after(used.last());
101 uintptr_t start_chunk = addr_to_chunk_index(used.start());
102 uintptr_t start_chunk_stride_num = start_chunk % n_strides;
103 jbyte* chunk_card_start;
104
105 if ((uintptr_t)stride >= start_chunk_stride_num) {
145 used,
146 lowest_non_clean,
147 lowest_non_clean_base_chunk_index,
148 lowest_non_clean_chunk_size);
149
150 // We want the LNC array updates above in process_chunk_boundaries
151 // to be visible before any of the card table value changes as a
152 // result of the dirty card iteration below.
153 OrderAccess::storestore();
154
155 // We want to clear the cards: clear_cl here does the work of finding
156 // contiguous dirty ranges of cards to process and clear.
157 clear_cl.do_MemRegion(chunk_mr);
158
159 // Find the next chunk of the stride.
160 chunk_card_start += ParGCCardsPerStrideChunk * n_strides;
161 }
162 }
163
164 void
165 CardTableRS::
166 process_chunk_boundaries(Space* sp,
167 DirtyCardToOopClosure* dcto_cl,
168 MemRegion chunk_mr,
169 MemRegion used,
170 jbyte** lowest_non_clean,
171 uintptr_t lowest_non_clean_base_chunk_index,
172 size_t lowest_non_clean_chunk_size)
173 {
174 // We must worry about non-array objects that cross chunk boundaries,
175 // because such objects are both precisely and imprecisely marked:
176 // .. if the head of such an object is dirty, the entire object
177 // needs to be scanned, under the interpretation that this
178 // was an imprecise mark
179 // .. if the head of such an object is not dirty, we can assume
180 // precise marking and it's efficient to scan just the dirty
181 // cards.
182 // In either case, each scanned reference must be scanned precisely
183 // once so as to avoid cloning of a young referent. For efficiency,
184 // our closures depend on this property and do not protect against
185 // double scans.
354 } // else continue to look for a non-NULL entry if any
355 }
356 assert(limit_card != NULL && max_to_do != NULL, "Error");
357 }
358 assert(max_to_do != NULL, "OOPS 1 !");
359 }
360 assert(max_to_do != NULL, "OOPS 2!");
361 } else {
362 max_to_do = used.end();
363 }
364 assert(max_to_do != NULL, "OOPS 3!");
365 // Now we can set the closure we're using so it doesn't to beyond
366 // max_to_do.
367 dcto_cl->set_min_done(max_to_do);
368 #ifndef PRODUCT
369 dcto_cl->set_last_bottom(max_to_do);
370 #endif
371 }
372
373 void
374 CardTableRS::
375 get_LNC_array_for_space(Space* sp,
376 jbyte**& lowest_non_clean,
377 uintptr_t& lowest_non_clean_base_chunk_index,
378 size_t& lowest_non_clean_chunk_size) {
379
380 int i = find_covering_region_containing(sp->bottom());
381 MemRegion covered = _covered[i];
382 size_t n_chunks = chunks_to_cover(covered);
383
384 // Only the first thread to obtain the lock will resize the
385 // LNC array for the covered region. Any later expansion can't affect
386 // the used_at_save_marks region.
387 // (I observed a bug in which the first thread to execute this would
388 // resize, and then it would cause "expand_and_allocate" that would
389 // increase the number of chunks in the covered region. Then a second
390 // thread would come and execute this, see that the size didn't match,
391 // and free and allocate again. So the first thread would be using a
392 // freed "_lowest_non_clean" array.)
393
394 // Do a dirty read here. If we pass the conditional then take the rare
413 _lowest_non_clean[i] = NULL;
414 }
415 // Now allocate a new one if necessary.
416 if (_lowest_non_clean[i] == NULL) {
417 _lowest_non_clean[i] = NEW_C_HEAP_ARRAY(CardPtr, n_chunks, mtGC);
418 _lowest_non_clean_chunk_size[i] = n_chunks;
419 _lowest_non_clean_base_chunk_index[i] = addr_to_chunk_index(covered.start());
420 for (int j = 0; j < (int)n_chunks; j++)
421 _lowest_non_clean[i][j] = NULL;
422 }
423 }
424 // Make sure this gets visible only after _lowest_non_clean* was initialized
425 OrderAccess::release_store(&_last_LNC_resizing_collection[i], cur_collection);
426 }
427 }
428 // In any case, now do the initialization.
429 lowest_non_clean = _lowest_non_clean[i];
430 lowest_non_clean_base_chunk_index = _lowest_non_clean_base_chunk_index[i];
431 lowest_non_clean_chunk_size = _lowest_non_clean_chunk_size[i];
432 }
|
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/cms/cmsCardTable.hpp"
27 #include "gc/cms/cmsHeap.hpp"
28 #include "gc/shared/cardTableBarrierSet.hpp"
29 #include "gc/shared/cardTableRS.hpp"
30 #include "gc/shared/collectedHeap.hpp"
31 #include "gc/shared/space.inline.hpp"
32 #include "memory/allocation.inline.hpp"
33 #include "memory/virtualspace.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "runtime/java.hpp"
36 #include "runtime/mutexLocker.hpp"
37 #include "runtime/orderAccess.inline.hpp"
38 #include "runtime/vmThread.hpp"
39
40 CMSCardTable::CMSCardTable(MemRegion whole_heap) :
41 CardTableRS(whole_heap, CMSPrecleaningEnabled /* scanned_concurrently */) {
42 }
43
44 // Returns the number of chunks necessary to cover "mr".
45 size_t CMSCardTable::chunks_to_cover(MemRegion mr) {
46 return (size_t)(addr_to_chunk_index(mr.last()) -
47 addr_to_chunk_index(mr.start()) + 1);
48 }
49
50 // Returns the index of the chunk in a stride which
51 // covers the given address.
52 uintptr_t CMSCardTable::addr_to_chunk_index(const void* addr) {
53 uintptr_t card = (uintptr_t) byte_for(addr);
54 return card / ParGCCardsPerStrideChunk;
55 }
56
57 void CMSCardTable::
58 non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
59 OopsInGenClosure* cl,
60 CardTableRS* ct,
61 uint n_threads) {
62 assert(n_threads > 0, "expected n_threads > 0");
63 assert(n_threads <= ParallelGCThreads,
64 "n_threads: %u > ParallelGCThreads: %u", n_threads, ParallelGCThreads);
65
66 // Make sure the LNC array is valid for the space.
67 jbyte** lowest_non_clean;
68 uintptr_t lowest_non_clean_base_chunk_index;
69 size_t lowest_non_clean_chunk_size;
70 get_LNC_array_for_space(sp, lowest_non_clean,
71 lowest_non_clean_base_chunk_index,
72 lowest_non_clean_chunk_size);
73
74 uint n_strides = n_threads * ParGCStridesPerThread;
75 SequentialSubTasksDone* pst = sp->par_seq_tasks();
76 // Sets the condition for completion of the subtask (how many threads
77 // need to finish in order to be done).
83 process_stride(sp, mr, stride, n_strides,
84 cl, ct,
85 lowest_non_clean,
86 lowest_non_clean_base_chunk_index,
87 lowest_non_clean_chunk_size);
88 }
89 if (pst->all_tasks_completed()) {
90 // Clear lowest_non_clean array for next time.
91 intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
92 uintptr_t last_chunk_index = addr_to_chunk_index(mr.last());
93 for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
94 intptr_t ind = ch - lowest_non_clean_base_chunk_index;
95 assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
96 "Bounds error");
97 lowest_non_clean[ind] = NULL;
98 }
99 }
100 }
101
102 void
103 CMSCardTable::
104 process_stride(Space* sp,
105 MemRegion used,
106 jint stride, int n_strides,
107 OopsInGenClosure* cl,
108 CardTableRS* ct,
109 jbyte** lowest_non_clean,
110 uintptr_t lowest_non_clean_base_chunk_index,
111 size_t lowest_non_clean_chunk_size) {
112 // We go from higher to lower addresses here; it wouldn't help that much
113 // because of the strided parallelism pattern used here.
114
115 // Find the first card address of the first chunk in the stride that is
116 // at least "bottom" of the used region.
117 jbyte* start_card = byte_for(used.start());
118 jbyte* end_card = byte_after(used.last());
119 uintptr_t start_chunk = addr_to_chunk_index(used.start());
120 uintptr_t start_chunk_stride_num = start_chunk % n_strides;
121 jbyte* chunk_card_start;
122
123 if ((uintptr_t)stride >= start_chunk_stride_num) {
163 used,
164 lowest_non_clean,
165 lowest_non_clean_base_chunk_index,
166 lowest_non_clean_chunk_size);
167
168 // We want the LNC array updates above in process_chunk_boundaries
169 // to be visible before any of the card table value changes as a
170 // result of the dirty card iteration below.
171 OrderAccess::storestore();
172
173 // We want to clear the cards: clear_cl here does the work of finding
174 // contiguous dirty ranges of cards to process and clear.
175 clear_cl.do_MemRegion(chunk_mr);
176
177 // Find the next chunk of the stride.
178 chunk_card_start += ParGCCardsPerStrideChunk * n_strides;
179 }
180 }
181
182 void
183 CMSCardTable::
184 process_chunk_boundaries(Space* sp,
185 DirtyCardToOopClosure* dcto_cl,
186 MemRegion chunk_mr,
187 MemRegion used,
188 jbyte** lowest_non_clean,
189 uintptr_t lowest_non_clean_base_chunk_index,
190 size_t lowest_non_clean_chunk_size)
191 {
192 // We must worry about non-array objects that cross chunk boundaries,
193 // because such objects are both precisely and imprecisely marked:
194 // .. if the head of such an object is dirty, the entire object
195 // needs to be scanned, under the interpretation that this
196 // was an imprecise mark
197 // .. if the head of such an object is not dirty, we can assume
198 // precise marking and it's efficient to scan just the dirty
199 // cards.
200 // In either case, each scanned reference must be scanned precisely
201 // once so as to avoid cloning of a young referent. For efficiency,
202 // our closures depend on this property and do not protect against
203 // double scans.
372 } // else continue to look for a non-NULL entry if any
373 }
374 assert(limit_card != NULL && max_to_do != NULL, "Error");
375 }
376 assert(max_to_do != NULL, "OOPS 1 !");
377 }
378 assert(max_to_do != NULL, "OOPS 2!");
379 } else {
380 max_to_do = used.end();
381 }
382 assert(max_to_do != NULL, "OOPS 3!");
383 // Now we can set the closure we're using so it doesn't to beyond
384 // max_to_do.
385 dcto_cl->set_min_done(max_to_do);
386 #ifndef PRODUCT
387 dcto_cl->set_last_bottom(max_to_do);
388 #endif
389 }
390
391 void
392 CMSCardTable::
393 get_LNC_array_for_space(Space* sp,
394 jbyte**& lowest_non_clean,
395 uintptr_t& lowest_non_clean_base_chunk_index,
396 size_t& lowest_non_clean_chunk_size) {
397
398 int i = find_covering_region_containing(sp->bottom());
399 MemRegion covered = _covered[i];
400 size_t n_chunks = chunks_to_cover(covered);
401
402 // Only the first thread to obtain the lock will resize the
403 // LNC array for the covered region. Any later expansion can't affect
404 // the used_at_save_marks region.
405 // (I observed a bug in which the first thread to execute this would
406 // resize, and then it would cause "expand_and_allocate" that would
407 // increase the number of chunks in the covered region. Then a second
408 // thread would come and execute this, see that the size didn't match,
409 // and free and allocate again. So the first thread would be using a
410 // freed "_lowest_non_clean" array.)
411
412 // Do a dirty read here. If we pass the conditional then take the rare
431 _lowest_non_clean[i] = NULL;
432 }
433 // Now allocate a new one if necessary.
434 if (_lowest_non_clean[i] == NULL) {
435 _lowest_non_clean[i] = NEW_C_HEAP_ARRAY(CardPtr, n_chunks, mtGC);
436 _lowest_non_clean_chunk_size[i] = n_chunks;
437 _lowest_non_clean_base_chunk_index[i] = addr_to_chunk_index(covered.start());
438 for (int j = 0; j < (int)n_chunks; j++)
439 _lowest_non_clean[i][j] = NULL;
440 }
441 }
442 // Make sure this gets visible only after _lowest_non_clean* was initialized
443 OrderAccess::release_store(&_last_LNC_resizing_collection[i], cur_collection);
444 }
445 }
446 // In any case, now do the initialization.
447 lowest_non_clean = _lowest_non_clean[i];
448 lowest_non_clean_base_chunk_index = _lowest_non_clean_base_chunk_index[i];
449 lowest_non_clean_chunk_size = _lowest_non_clean_chunk_size[i];
450 }
451
452 #ifdef ASSERT
453 void CMSCardTable::verify_used_region_at_save_marks(Space* sp) const {
454 MemRegion ur = sp->used_region();
455 MemRegion urasm = sp->used_region_at_save_marks();
456
457 if (!ur.contains(urasm)) {
458 log_warning(gc)("CMS+ParNew: Did you forget to call save_marks()? "
459 "[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in "
460 "[" PTR_FORMAT ", " PTR_FORMAT ")",
461 p2i(urasm.start()), p2i(urasm.end()), p2i(ur.start()), p2i(ur.end()));
462 MemRegion ur2 = sp->used_region();
463 MemRegion urasm2 = sp->used_region_at_save_marks();
464 if (!ur.equals(ur2)) {
465 log_warning(gc)("CMS+ParNew: Flickering used_region()!!");
466 }
467 if (!urasm.equals(urasm2)) {
468 log_warning(gc)("CMS+ParNew: Flickering used_region_at_save_marks()!!");
469 }
470 ShouldNotReachHere();
471 }
472 }
473 #endif // ASSERT
|