11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26
27 #include "gc/shenandoah/shenandoahFreeSet.hpp"
28 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
29 #include "gc/shenandoah/shenandoahPacer.hpp"
30 #include "runtime/atomic.hpp"
31
32 /*
33 * In normal concurrent cycle, we have to pace the application to let GC finish.
34 *
35 * Here, we do not know how large would be the collection set, and what are the
36 * relative performances of the each stage in the concurrent cycle, and so we have to
37 * make some assumptions.
38 *
39 * For concurrent mark, there is no clear notion of progress. The moderately accurate
40 * and easy to get metric is the amount of live objects the mark had encountered. But,
41 * that does directly correlate with the used heap, because the heap might be fully
42 * dead or fully alive. We cannot assume either of the extremes: we would either allow
43 * application to run out of memory if we assume heap is fully dead but it is not, and,
44 * conversely, we would pacify application excessively if we assume heap is fully alive
45 * but it is not. So we need to guesstimate the particular expected value for heap liveness.
46 * The best way to do this is apparently recording the past history.
47 *
48 * For concurrent evac and update-refs, we are walking the heap per-region, and so the
49 * notion of progress is clear: we get reported the "used" size from the processed regions
50 * and use the global heap-used as the baseline.
222 return;
223 }
224
225 intptr_t tax = MAX2<intptr_t>(1, words * Atomic::load(&_tax_rate));
226 Atomic::add(&_budget, tax);
227 }
228
229 intptr_t ShenandoahPacer::epoch() {
230 return Atomic::load(&_epoch);
231 }
232
233 void ShenandoahPacer::pace_for_alloc(size_t words) {
234 assert(ShenandoahPacing, "Only be here when pacing is enabled");
235
236 // Fast path: try to allocate right away
237 if (claim_for_alloc(words, false)) {
238 return;
239 }
240
241 // Threads that are attaching should not block at all: they are not
242 // fully initialized yet. Calling sleep() on them would be awkward.
243 // This is probably the path that allocates the thread oop itself.
244 // Forcefully claim without waiting.
245 if (JavaThread::current()->is_attaching_via_jni()) {
246 claim_for_alloc(words, true);
247 return;
248 }
249
250 size_t max = ShenandoahPacingMaxDelay;
251 double start = os::elapsedTime();
252
253 size_t total = 0;
254 size_t cur = 0;
255
256 while (true) {
257 // We could instead assist GC, but this would suffice for now.
258 // This code should also participate in safepointing.
259 // Perform the exponential backoff, limited by max.
260
261 cur = cur * 2;
262 if (total + cur > max) {
263 cur = (max > total) ? (max - total) : 0;
264 }
265 cur = MAX2<size_t>(1, cur);
266
267 JavaThread::current()->sleep(cur);
268
269 double end = os::elapsedTime();
270 total = (size_t)((end - start) * 1000);
271
272 if (total > max) {
273 // Spent local time budget to wait for enough GC progress.
274 // Breaking out and allocating anyway, which may mean we outpace GC,
275 // and start Degenerated GC cycle.
276 _delays.add(total);
277
278 // Forcefully claim the budget: it may go negative at this point, and
279 // GC should replenish for this and subsequent allocations
280 claim_for_alloc(words, true);
281 break;
282 }
283
284 if (claim_for_alloc(words, false)) {
285 // Acquired enough permit, nice. Can allocate now.
286 _delays.add(total);
287 break;
288 }
289 }
290 }
291
292 void ShenandoahPacer::print_on(outputStream* out) const {
293 out->print_cr("ALLOCATION PACING:");
294 out->cr();
295
296 out->print_cr("Max pacing delay is set for " UINTX_FORMAT " ms.", ShenandoahPacingMaxDelay);
297 out->cr();
298
299 out->print_cr("Higher delay would prevent application outpacing the GC, but it will hide the GC latencies");
300 out->print_cr("from the STW pause times. Pacing affects the individual threads, and so it would also be");
301 out->print_cr("invisible to the usual profiling tools, but would add up to end-to-end application latency.");
302 out->print_cr("Raise max pacing delay with care.");
303 out->cr();
304
305 out->print_cr("Actual pacing delays histogram:");
306 out->cr();
307
308 out->print_cr("%10s - %10s %12s%12s", "From", "To", "Count", "Sum");
309
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26
27 #include "gc/shenandoah/shenandoahFreeSet.hpp"
28 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
29 #include "gc/shenandoah/shenandoahPacer.hpp"
30 #include "runtime/atomic.hpp"
31 #include "runtime/mutexLocker.hpp"
32
33 /*
34 * In normal concurrent cycle, we have to pace the application to let GC finish.
35 *
36 * Here, we do not know how large would be the collection set, and what are the
37 * relative performances of the each stage in the concurrent cycle, and so we have to
38 * make some assumptions.
39 *
40 * For concurrent mark, there is no clear notion of progress. The moderately accurate
41 * and easy to get metric is the amount of live objects the mark had encountered. But,
42 * that does directly correlate with the used heap, because the heap might be fully
43 * dead or fully alive. We cannot assume either of the extremes: we would either allow
44 * application to run out of memory if we assume heap is fully dead but it is not, and,
45 * conversely, we would pacify application excessively if we assume heap is fully alive
46 * but it is not. So we need to guesstimate the particular expected value for heap liveness.
47 * The best way to do this is apparently recording the past history.
48 *
49 * For concurrent evac and update-refs, we are walking the heap per-region, and so the
50 * notion of progress is clear: we get reported the "used" size from the processed regions
51 * and use the global heap-used as the baseline.
223 return;
224 }
225
226 intptr_t tax = MAX2<intptr_t>(1, words * Atomic::load(&_tax_rate));
227 Atomic::add(&_budget, tax);
228 }
229
230 intptr_t ShenandoahPacer::epoch() {
231 return Atomic::load(&_epoch);
232 }
233
234 void ShenandoahPacer::pace_for_alloc(size_t words) {
235 assert(ShenandoahPacing, "Only be here when pacing is enabled");
236
237 // Fast path: try to allocate right away
238 if (claim_for_alloc(words, false)) {
239 return;
240 }
241
242 // Threads that are attaching should not block at all: they are not
243 // fully initialized yet. Blocking them would be awkward.
244 // This is probably the path that allocates the thread oop itself.
245 // Forcefully claim without waiting.
246 if (JavaThread::current()->is_attaching_via_jni()) {
247 claim_for_alloc(words, true);
248 return;
249 }
250
251 size_t max = ShenandoahPacingMaxDelay;
252 double start = os::elapsedTime();
253
254 size_t total = 0;
255 size_t cur = 0;
256
257 while (true) {
258 // We could instead assist GC, but this would suffice for now.
259 // This code should also participate in safepointing.
260 // Perform the exponential backoff, limited by max.
261
262 cur = cur * 2;
263 if (total + cur > max) {
264 cur = (max > total) ? (max - total) : 0;
265 }
266 cur = MAX2<size_t>(1, cur);
267
268 wait(cur);
269
270 double end = os::elapsedTime();
271 total = (size_t)((end - start) * 1000);
272
273 if (total > max) {
274 // Spent local time budget to wait for enough GC progress.
275 // Breaking out and allocating anyway, which may mean we outpace GC,
276 // and start Degenerated GC cycle.
277 _delays.add(total);
278
279 // Forcefully claim the budget: it may go negative at this point, and
280 // GC should replenish for this and subsequent allocations
281 claim_for_alloc(words, true);
282 break;
283 }
284
285 if (claim_for_alloc(words, false)) {
286 // Acquired enough permit, nice. Can allocate now.
287 _delays.add(total);
288 break;
289 }
290 }
291 }
292
293 void ShenandoahPacer::wait(long time_ms) {
294 // Perform timed wait. It works like like sleep(), except without modifying
295 // the thread interruptible status. MonitorLocker also checks for safepoints.
296 assert(time_ms > 0, "Should not call this with zero argument, as it would stall until notify");
297 MonitorLocker locker(_wait_monitor);
298 _wait_monitor->wait(time_ms);
299 }
300
301 void ShenandoahPacer::notify_waiters() {
302 MonitorLocker locker(_wait_monitor);
303 _wait_monitor->notify_all();
304 }
305
306 void ShenandoahPacer::print_on(outputStream* out) const {
307 out->print_cr("ALLOCATION PACING:");
308 out->cr();
309
310 out->print_cr("Max pacing delay is set for " UINTX_FORMAT " ms.", ShenandoahPacingMaxDelay);
311 out->cr();
312
313 out->print_cr("Higher delay would prevent application outpacing the GC, but it will hide the GC latencies");
314 out->print_cr("from the STW pause times. Pacing affects the individual threads, and so it would also be");
315 out->print_cr("invisible to the usual profiling tools, but would add up to end-to-end application latency.");
316 out->print_cr("Raise max pacing delay with care.");
317 out->cr();
318
319 out->print_cr("Actual pacing delays histogram:");
320 out->cr();
321
322 out->print_cr("%10s - %10s %12s%12s", "From", "To", "Count", "Sum");
323
|