18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24 #include "precompiled.hpp"
25
26 #include "gc_implementation/shenandoah/shenandoahPacer.hpp"
27 #include "gc_implementation/shenandoah/shenandoahHeap.hpp"
28 #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp"
29 #include "gc_implementation/shenandoah/shenandoahFreeSet.hpp"
30
31 /*
32 * In normal concurrent cycle, we have to pace the application to let GC finish.
33 *
34 * Here, we do not know how large would be the collection set, and what are the
35 * relative performances of the each stage in the concurrent cycle, and so we have to
36 * make some assumptions.
37 *
38 * We assume, for pessimistic reasons, that the entire heap is full of alive objects,
39 * and it will be evacuated fully. Therefore, we count live objects visited by all three
40 * stages against the heap used at the beginning of the collection. That means if there
41 * are dead objects, they would not be accounted for in this budget, and that would mean
42 * allocation would be pacified excessively. But that *also* means the collection cycle
43 * would finish earlier than pacer expects.
44 *
45 * The allocatable space when GC is running is "free" at the start of cycle, but the
46 * accounted budget is based on "used". So, we need to adjust the tax knowing that.
47 * Also, since we effectively count the used space three times (mark, evac, update-refs),
48 * we need to multiply the tax by 3. Example: for 10 MB free and 90 MB used, GC would
49 * come back with 3*90 MB budget, and thus for each 1 MB of allocation, we have to pay
50 * 3*90 / 10 MBs. In the end, we would pay back the entire budget.
51 */
52
53 void ShenandoahPacer::setup_for_mark() {
54 assert(ShenandoahPacing, "Only be here when pacing is enabled");
55
56 size_t used = _heap->used();
57 size_t free = _heap->free_set()->available();
58
59 size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
60 size_t taxable = free - non_taxable;
61
62 double tax = 1.0 * used / taxable; // base tax for available free space
63 tax *= 3; // mark is phase 1 of 3, claim 1/3 of free for it
64 tax = MAX2<double>(1, tax); // never allocate more than GC collects during the cycle
65 tax *= 1.1; // additional surcharge to help unclutter heap
66
67 restart_with(non_taxable, tax);
68
69 log_info(gc, ergo)("Pacer for Mark. Used: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
70 "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
71 used / M, free / M, non_taxable / M, tax);
72 }
73
74 void ShenandoahPacer::setup_for_evac() {
75 assert(ShenandoahPacing, "Only be here when pacing is enabled");
76
77 size_t cset = _heap->collection_set()->live_data();
78 size_t free = _heap->free_set()->available();
79
80 size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
81 size_t taxable = free - non_taxable;
82
83 double tax = 1.0 * cset / taxable; // base tax for available free space
84 tax *= 2; // evac is phase 2 of 3, claim 1/2 of remaining free
85 tax = MAX2<double>(1, tax); // never allocate more than GC collects during the cycle
86 tax *= 1.1; // additional surcharge to help unclutter heap
87
88 restart_with(non_taxable, tax);
89
90 log_info(gc, ergo)("Pacer for Evacuation. CSet: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
91 "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
92 cset / M, free / M, non_taxable / M, tax);
93 }
94
95 void ShenandoahPacer::setup_for_updaterefs() {
96 assert(ShenandoahPacing, "Only be here when pacing is enabled");
97
98 size_t used = _heap->used();
99 size_t free = _heap->free_set()->available();
100
101 size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
102 size_t taxable = free - non_taxable;
103
104 double tax = 1.0 * used / taxable; // base tax for available free space
105 tax *= 1; // update-refs is phase 3 of 3, claim the remaining free
106 tax = MAX2<double>(1, tax); // never allocate more than GC collects during the cycle
107 tax *= 1.1; // additional surcharge to help unclutter heap
108
109 restart_with(non_taxable, tax);
110
111 log_info(gc, ergo)("Pacer for Update-Refs. Used: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
112 "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
113 used / M, free / M, non_taxable / M, tax);
114 }
115
116 /*
117 * In idle phase, we have to pace the application to let control thread react with GC start.
118 *
119 * Here, we have rendezvous with concurrent thread that adds up the budget as it acknowledges
120 * it had seen recent allocations. It will naturally pace the allocations if control thread is
121 * not catching up. To bootstrap this feedback cycle, we need to start with some initial budget
122 * for applications to allocate at.
123 */
124
125 void ShenandoahPacer::setup_for_idle() {
126 assert(ShenandoahPacing, "Only be here when pacing is enabled");
127
128 size_t initial = _heap->capacity() * ShenandoahPacingIdleSlack / 100;
129 double tax = 1;
130
131 restart_with(initial, tax);
132
133 log_info(gc, ergo)("Pacer for Idle. Initial: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
134 initial / M, tax);
135 }
136
137 void ShenandoahPacer::restart_with(jlong non_taxable_bytes, jdouble tax_rate) {
138 STATIC_ASSERT(sizeof(size_t) <= sizeof(intptr_t));
139 intptr_t initial = (size_t)(non_taxable_bytes * tax_rate) >> LogHeapWordSize;
140 intptr_t cur;
141 do {
142 cur = OrderAccess::load_acquire(&_budget);
143 } while (Atomic::cmpxchg(initial, &_budget, cur) != cur);
144 OrderAccess::release_store(&_tax_rate, tax_rate);
145 }
146
147 bool ShenandoahPacer::claim_for_alloc(size_t words, bool force) {
148 assert(ShenandoahPacing, "Only be here when pacing is enabled");
149
150 intptr_t tax = MAX2<intptr_t>(1, words * OrderAccess::load_acquire(&_tax_rate));
151
152 intptr_t cur = 0;
153 intptr_t new_val = 0;
154 do {
155 cur = OrderAccess::load_acquire(&_budget);
156 if (cur < tax) {
157 // Progress depleted, alas.
158 return false;
159 }
160 new_val = cur - tax;
161 } while (Atomic::cmpxchg(new_val, &_budget, cur) != cur);
162 return true;
163 }
164
165 void ShenandoahPacer::pace_for_alloc(size_t words) {
166 assert(ShenandoahPacing, "Only be here when pacing is enabled");
167
168 // Fast path: try to allocate right away
169 if (claim_for_alloc(words, false)) {
170 return;
171 }
172
173 size_t max_wait_ms = ShenandoahPacingMaxDelay;
174 double start = os::elapsedTime();
175
176 while (true) {
177 // We could instead assist GC, but this would suffice for now.
178 // This code should also participate in safepointing.
179 os::sleep(Thread::current(), 1, true);
180
181 double end = os::elapsedTime();
182 size_t ms = (size_t)((end - start) * 1000);
183 if (ms > max_wait_ms) {
184 // Spent local time budget to wait for enough GC progress.
185 // Breaking out and allocating anyway, which may mean we outpace GC,
186 // and start Degenerated GC cycle.
187 _delays.add(ms);
188
189 // Forcefully claim the budget: it may go negative at this point, and
190 // GC should replenish for this and subsequent allocations
191 claim_for_alloc(words, true);
192 break;
193 }
194
195 if (claim_for_alloc(words, false)) {
196 // Acquired enough permit, nice. Can allocate now.
197 _delays.add(ms);
198 break;
199 }
200 }
201 }
202
203 void ShenandoahPacer::print_on(outputStream* out) const {
204 out->print_cr("ALLOCATION PACING:");
205 out->cr();
206
207 out->print_cr("Max pacing delay is set for " UINTX_FORMAT " ms.", ShenandoahPacingMaxDelay);
208 out->cr();
209
210 out->print_cr("Higher delay would prevent application outpacing the GC, but it will hide the GC latencies");
211 out->print_cr("from the STW pause times. Pacing affects the individual threads, and so it would also be");
212 out->print_cr("invisible to the usual profiling tools, but would add up to end-to-end application latency.");
213 out->print_cr("Raise max pacing delay with care.");
214 out->cr();
215
216 out->print_cr("Actual pacing delays histogram:");
217 out->cr();
218
219 out->print_cr("%10s - %10s %12s", "From", "To", "Count");
220 for (int c = _delays.min_level(); c <= _delays.max_level(); c++) {
221 out->print("%7d ms - %7d ms:", (c == 0) ? 0 : 1 << (c - 1), 1 << c);
222 out->print_cr(SIZE_FORMAT_W(12), _delays.level(c));
223 }
224 out->cr();
225 }
|
18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24 #include "precompiled.hpp"
25
26 #include "gc_implementation/shenandoah/shenandoahPacer.hpp"
27 #include "gc_implementation/shenandoah/shenandoahHeap.hpp"
28 #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp"
29 #include "gc_implementation/shenandoah/shenandoahFreeSet.hpp"
30
31 /*
32 * In normal concurrent cycle, we have to pace the application to let GC finish.
33 *
34 * Here, we do not know how large would be the collection set, and what are the
35 * relative performances of the each stage in the concurrent cycle, and so we have to
36 * make some assumptions.
37 *
38 * For concurrent mark, there is no clear notion of progress. The moderately accurate
39 * and easy to get metric is the amount of live objects the mark had encountered. But,
40 * that does directly correlate with the used heap, because the heap might be fully
41 * dead or fully alive. We cannot assume either of the extremes: we would either allow
42 * application to run out of memory if we assume heap is fully dead but it is not, and,
43 * conversely, we would pacify application excessively if we assume heap is fully alive
44 * but it is not. So we need to guesstimate the particular expected value for heap liveness.
45 * The best way to do this is apparently recording the past history.
46 *
47 * For concurrent evac and update-refs, we are walking the heap per-region, and so the
48 * notion of progress is clear: we get reported the "used" size from the processed regions
49 * and use the global heap-used as the baseline.
50 *
51 * The allocatable space when GC is running is "free" at the start of cycle, but the
52 * accounted budget is based on "used". So, we need to adjust the tax knowing that.
53 * Also, since we effectively count the used space three times (mark, evac, update-refs),
54 * we need to multiply the tax by 3. Example: for 10 MB free and 90 MB used, GC would
55 * come back with 3*90 MB budget, and thus for each 1 MB of allocation, we have to pay
56 * 3*90 / 10 MBs. In the end, we would pay back the entire budget.
57 */
58
59 void ShenandoahPacer::setup_for_mark() {
60 assert(ShenandoahPacing, "Only be here when pacing is enabled");
61
62 size_t live = update_and_get_progress_history();
63 size_t free = _heap->free_set()->available();
64
65 size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
66 size_t taxable = free - non_taxable;
67
68 double tax = 1.0 * live / taxable; // base tax for available free space
69 tax *= 3; // mark is phase 1 of 3, claim 1/3 of free for it
70 tax *= ShenandoahPacingSurcharge; // additional surcharge to help unclutter heap
71
72 restart_with(non_taxable, tax);
73
74 log_info(gc, ergo)("Pacer for Mark. Expected Live: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
75 "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
76 live / M, free / M, non_taxable / M, tax);
77 }
78
79 void ShenandoahPacer::setup_for_evac() {
80 assert(ShenandoahPacing, "Only be here when pacing is enabled");
81
82 size_t used = _heap->collection_set()->used();
83 size_t free = _heap->free_set()->available();
84
85 size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
86 size_t taxable = free - non_taxable;
87
88 double tax = 1.0 * used / taxable; // base tax for available free space
89 tax *= 2; // evac is phase 2 of 3, claim 1/2 of remaining free
90 tax = MAX2<double>(1, tax); // never allocate more than GC processes during the phase
91 tax *= ShenandoahPacingSurcharge; // additional surcharge to help unclutter heap
92
93 restart_with(non_taxable, tax);
94
95 log_info(gc, ergo)("Pacer for Evacuation. Used CSet: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
96 "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
97 used / M, free / M, non_taxable / M, tax);
98 }
99
100 void ShenandoahPacer::setup_for_updaterefs() {
101 assert(ShenandoahPacing, "Only be here when pacing is enabled");
102
103 size_t used = _heap->used();
104 size_t free = _heap->free_set()->available();
105
106 size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
107 size_t taxable = free - non_taxable;
108
109 double tax = 1.0 * used / taxable; // base tax for available free space
110 tax *= 1; // update-refs is phase 3 of 3, claim the remaining free
111 tax = MAX2<double>(1, tax); // never allocate more than GC processes during the phase
112 tax *= ShenandoahPacingSurcharge; // additional surcharge to help unclutter heap
113
114 restart_with(non_taxable, tax);
115
116 log_info(gc, ergo)("Pacer for Update Refs. Used: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
117 "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
118 used / M, free / M, non_taxable / M, tax);
119 }
120
121 /*
122 * In idle phase, we have to pace the application to let control thread react with GC start.
123 *
124 * Here, we have rendezvous with concurrent thread that adds up the budget as it acknowledges
125 * it had seen recent allocations. It will naturally pace the allocations if control thread is
126 * not catching up. To bootstrap this feedback cycle, we need to start with some initial budget
127 * for applications to allocate at.
128 */
129
130 void ShenandoahPacer::setup_for_idle() {
131 assert(ShenandoahPacing, "Only be here when pacing is enabled");
132
133 size_t initial = _heap->capacity() * ShenandoahPacingIdleSlack / 100;
134 double tax = 1;
135
136 restart_with(initial, tax);
137
138 log_info(gc, ergo)("Pacer for Idle. Initial: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
139 initial / M, tax);
140 }
141
142 size_t ShenandoahPacer::update_and_get_progress_history() {
143 if (_progress == -1) {
144 // First initialization, report some prior
145 Atomic::store((intptr_t)PACING_PROGRESS_ZERO, &_progress);
146 return (size_t) (_heap->capacity() * 0.1);
147 } else {
148 // Record history, and reply historical data
149 _progress_history->add(_progress);
150 Atomic::store((intptr_t)PACING_PROGRESS_ZERO, &_progress);
151 return (size_t) (_progress_history->avg() * HeapWordSize);
152 }
153 }
154
155 void ShenandoahPacer::restart_with(jlong non_taxable_bytes, jdouble tax_rate) {
156 STATIC_ASSERT(sizeof(size_t) <= sizeof(intptr_t));
157 {
158 intptr_t initial = (size_t) (non_taxable_bytes * tax_rate) >> LogHeapWordSize;
159 intptr_t cur;
160 do {
161 cur = OrderAccess::load_acquire(&_budget);
162 } while (Atomic::cmpxchg(initial, &_budget, cur) != cur);
163 }
164
165 OrderAccess::release_store(&_tax_rate, tax_rate);
166
167 {
168 intptr_t cur, val;
169 do {
170 cur = OrderAccess::load_acquire(&_epoch);
171 val = cur + 1;
172 } while (Atomic::cmpxchg(val, &_epoch, cur) != cur);
173 }
174 }
175
176 bool ShenandoahPacer::claim_for_alloc(size_t words, bool force) {
177 assert(ShenandoahPacing, "Only be here when pacing is enabled");
178
179 intptr_t tax = MAX2<intptr_t>(1, (intptr_t)(words * OrderAccess::load_acquire(&_tax_rate)));
180
181 intptr_t cur = 0;
182 intptr_t new_val = 0;
183 do {
184 cur = OrderAccess::load_acquire(&_budget);
185 if (cur < tax) {
186 // Progress depleted, alas.
187 return false;
188 }
189 new_val = cur - tax;
190 } while (Atomic::cmpxchg(new_val, &_budget, cur) != cur);
191 return true;
192 }
193
194 void ShenandoahPacer::unpace_for_alloc(intptr_t epoch, size_t words) {
195 assert(ShenandoahPacing, "Only be here when pacing is enabled");
196
197 if (_epoch != epoch) {
198 // Stale ticket, no need to unpace.
199 return;
200 }
201
202 intptr_t tax = MAX2<intptr_t>(1, (intptr_t)(words * OrderAccess::load_acquire(&_tax_rate)));
203 Atomic::add(tax, &_budget);
204 }
205
206 intptr_t ShenandoahPacer::epoch() {
207 return OrderAccess::load_acquire(&_epoch);
208 }
209
210 void ShenandoahPacer::pace_for_alloc(size_t words) {
211 assert(ShenandoahPacing, "Only be here when pacing is enabled");
212
213 // Fast path: try to allocate right away
214 if (claim_for_alloc(words, false)) {
215 return;
216 }
217
218 size_t max = ShenandoahPacingMaxDelay;
219 double start = os::elapsedTime();
220
221 size_t total = 0;
222 size_t cur = 0;
223
224 while (true) {
225 // We could instead assist GC, but this would suffice for now.
226 // This code should also participate in safepointing.
227 // Perform the exponential backoff, limited by max.
228
229 cur = cur * 2;
230 if (total + cur > max) {
231 cur = (max > total) ? (max - total) : 0;
232 }
233 cur = MAX2<size_t>(1, cur);
234
235 os::sleep(Thread::current(), cur, true);
236
237 double end = os::elapsedTime();
238 total = (size_t)((end - start) * 1000);
239
240 if (total > max) {
241 // Spent local time budget to wait for enough GC progress.
242 // Breaking out and allocating anyway, which may mean we outpace GC,
243 // and start Degenerated GC cycle.
244 _delays.add(total);
245
246 // Forcefully claim the budget: it may go negative at this point, and
247 // GC should replenish for this and subsequent allocations
248 claim_for_alloc(words, true);
249 break;
250 }
251
252 if (claim_for_alloc(words, false)) {
253 // Acquired enough permit, nice. Can allocate now.
254 _delays.add(total);
255 break;
256 }
257 }
258 }
259
260 void ShenandoahPacer::print_on(outputStream* out) const {
261 out->print_cr("ALLOCATION PACING:");
262 out->cr();
263
264 out->print_cr("Max pacing delay is set for " UINTX_FORMAT " ms.", ShenandoahPacingMaxDelay);
265 out->cr();
266
267 out->print_cr("Higher delay would prevent application outpacing the GC, but it will hide the GC latencies");
268 out->print_cr("from the STW pause times. Pacing affects the individual threads, and so it would also be");
269 out->print_cr("invisible to the usual profiling tools, but would add up to end-to-end application latency.");
270 out->print_cr("Raise max pacing delay with care.");
271 out->cr();
272
273 out->print_cr("Actual pacing delays histogram:");
274 out->cr();
275
276 out->print_cr("%10s - %10s %12s%12s", "From", "To", "Count", "Sum");
277
278 size_t total_count = 0;
279 size_t total_sum = 0;
280 for (int c = _delays.min_level(); c <= _delays.max_level(); c++) {
281 int l = (c == 0) ? 0 : 1 << (c - 1);
282 int r = 1 << c;
283 size_t count = _delays.level(c);
284 size_t sum = count * (r - l) / 2;
285 total_count += count;
286 total_sum += sum;
287
288 out->print_cr("%7d ms - %7d ms: " SIZE_FORMAT_W(12) SIZE_FORMAT_W(12) " ms", l, r, count, sum);
289 }
290 out->print_cr("%23s: " SIZE_FORMAT_W(12) SIZE_FORMAT_W(12) " ms", "Total", total_count, total_sum);
291 out->cr();
292 }
|