Old src/share/vm/gc_implementation/shenandoah/shenandoahPacer.cpp

   1 /*
   2  * Copyright (c) 2018, Red Hat, Inc. and/or its affiliates.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.
   7  *
   8  * This code is distributed in the hope that it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  11  * version 2 for more details (a copy is included in the LICENSE file that
  12  * accompanied this code).
  13  *
  14  * You should have received a copy of the GNU General Public License version
  15  * 2 along with this work; if not, write to the Free Software Foundation,
  16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  17  *
  18  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  19  * or visit www.oracle.com if you need additional information or have any
  20  * questions.
  21  *
  22  */
  23 
  24 #include "precompiled.hpp"
  25 
  26 #include "gc_implementation/shenandoah/shenandoahPacer.hpp"
  27 #include "gc_implementation/shenandoah/shenandoahHeap.hpp"
  28 #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp"
  29 #include "gc_implementation/shenandoah/shenandoahFreeSet.hpp"
  30 
  31 /*
  32  * In normal concurrent cycle, we have to pace the application to let GC finish.
  33  *
  34  * Here, we do not know how large would be the collection set, and what are the
  35  * relative performances of the each stage in the concurrent cycle, and so we have to
  36  * make some assumptions.
  37  *
  38  * We assume, for pessimistic reasons, that the entire heap is full of alive objects,
  39  * and it will be evacuated fully. Therefore, we count live objects visited by all three
  40  * stages against the heap used at the beginning of the collection. That means if there
  41  * are dead objects, they would not be accounted for in this budget, and that would mean
  42  * allocation would be pacified excessively. But that *also* means the collection cycle
  43  * would finish earlier than pacer expects.
  44  *
  45  * The allocatable space when GC is running is "free" at the start of cycle, but the
  46  * accounted budget is based on "used". So, we need to adjust the tax knowing that.
  47  * Also, since we effectively count the used space three times (mark, evac, update-refs),
  48  * we need to multiply the tax by 3. Example: for 10 MB free and 90 MB used, GC would
  49  * come back with 3*90 MB budget, and thus for each 1 MB of allocation, we have to pay
  50  * 3*90 / 10 MBs. In the end, we would pay back the entire budget.
  51  */
  52 
  53 void ShenandoahPacer::setup_for_mark() {
  54   assert(ShenandoahPacing, "Only be here when pacing is enabled");
  55 
  56   size_t used = _heap->used();
  57   size_t free = _heap->free_set()->available();
  58 
  59   size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
  60   size_t taxable = free - non_taxable;
  61 
  62   double tax = 1.0 * used / taxable; // base tax for available free space
  63   tax *= 3;                          // mark is phase 1 of 3, claim 1/3 of free for it
  64   tax = MAX2<double>(1, tax);        // never allocate more than GC collects during the cycle
  65   tax *= 1.1;                        // additional surcharge to help unclutter heap
  66 
  67   restart_with(non_taxable, tax);
  68 
  69   log_info(gc, ergo)("Pacer for Mark. Used: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
  70                      "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
  71                      used / M, free / M, non_taxable / M, tax);
  72 }
  73 
  74 void ShenandoahPacer::setup_for_evac() {
  75   assert(ShenandoahPacing, "Only be here when pacing is enabled");
  76 
  77   size_t cset = _heap->collection_set()->live_data();
  78   size_t free = _heap->free_set()->available();
  79 
  80   size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
  81   size_t taxable = free - non_taxable;
  82 
  83   double tax = 1.0 * cset / taxable; // base tax for available free space
  84   tax *= 2;                          // evac is phase 2 of 3, claim 1/2 of remaining free
  85   tax = MAX2<double>(1, tax);        // never allocate more than GC collects during the cycle
  86   tax *= 1.1;                        // additional surcharge to help unclutter heap
  87 
  88   restart_with(non_taxable, tax);
  89 
  90   log_info(gc, ergo)("Pacer for Evacuation. CSet: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
  91                      "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
  92                      cset / M, free / M, non_taxable / M, tax);
  93 }
  94 
  95 void ShenandoahPacer::setup_for_updaterefs() {
  96   assert(ShenandoahPacing, "Only be here when pacing is enabled");
  97 
  98   size_t used = _heap->used();
  99   size_t free = _heap->free_set()->available();
 100 
 101   size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
 102   size_t taxable = free - non_taxable;
 103 
 104   double tax = 1.0 * used / taxable; // base tax for available free space
 105   tax *= 1;                          // update-refs is phase 3 of 3, claim the remaining free
 106   tax = MAX2<double>(1, tax);        // never allocate more than GC collects during the cycle
 107   tax *= 1.1;                        // additional surcharge to help unclutter heap
 108 
 109   restart_with(non_taxable, tax);
 110 
 111   log_info(gc, ergo)("Pacer for Update-Refs. Used: " SIZE_FORMAT "M, Free: " SIZE_FORMAT
 112                      "M, Non-Taxable: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
 113                      used / M, free / M, non_taxable / M, tax);
 114 }
 115 
 116 /*
 117  * In idle phase, we have to pace the application to let control thread react with GC start.
 118  *
 119  * Here, we have rendezvous with concurrent thread that adds up the budget as it acknowledges
 120  * it had seen recent allocations. It will naturally pace the allocations if control thread is
 121  * not catching up. To bootstrap this feedback cycle, we need to start with some initial budget
 122  * for applications to allocate at.
 123  */
 124 
 125 void ShenandoahPacer::setup_for_idle() {
 126   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 127 
 128   size_t initial = _heap->capacity() * ShenandoahPacingIdleSlack / 100;
 129   double tax = 1;
 130 
 131   restart_with(initial, tax);
 132 
 133   log_info(gc, ergo)("Pacer for Idle. Initial: " SIZE_FORMAT "M, Alloc Tax Rate: %.1fx",
 134                      initial / M, tax);
 135 }
 136 
 137 void ShenandoahPacer::restart_with(jlong non_taxable_bytes, jdouble tax_rate) {
 138   STATIC_ASSERT(sizeof(size_t) <= sizeof(intptr_t));
 139   intptr_t initial = (size_t)(non_taxable_bytes * tax_rate) >> LogHeapWordSize;
 140   intptr_t cur;
 141   do {
 142     cur = OrderAccess::load_acquire(&_budget);
 143   } while (Atomic::cmpxchg(initial, &_budget, cur) != cur);
 144   OrderAccess::release_store(&_tax_rate, tax_rate);
 145 }
 146 
 147 bool ShenandoahPacer::claim_for_alloc(size_t words, bool force) {
 148   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 149 
 150   intptr_t tax = MAX2<intptr_t>(1, words * OrderAccess::load_acquire(&_tax_rate));
 151 
 152   intptr_t cur = 0;
 153   intptr_t new_val = 0;
 154   do {
 155     cur = OrderAccess::load_acquire(&_budget);
 156     if (cur < tax) {
 157       // Progress depleted, alas.
 158       return false;
 159     }
 160     new_val = cur - tax;
 161   } while (Atomic::cmpxchg(new_val, &_budget, cur) != cur);
 162   return true;
 163 }
 164 
 165 void ShenandoahPacer::pace_for_alloc(size_t words) {
 166   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 167 
 168   // Fast path: try to allocate right away
 169   if (claim_for_alloc(words, false)) {
 170     return;
 171   }
 172 
 173   size_t max_wait_ms = ShenandoahPacingMaxDelay;
 174   double start = os::elapsedTime();
 175 
 176   while (true) {
 177     // We could instead assist GC, but this would suffice for now.
 178     // This code should also participate in safepointing.
 179     os::sleep(Thread::current(), 1, true);
 180 
 181     double end = os::elapsedTime();
 182     size_t ms = (size_t)((end - start) * 1000);
 183     if (ms > max_wait_ms) {
 184       // Spent local time budget to wait for enough GC progress.
 185       // Breaking out and allocating anyway, which may mean we outpace GC,
 186       // and start Degenerated GC cycle.
 187       _delays.add(ms);
 188 
 189       // Forcefully claim the budget: it may go negative at this point, and
 190       // GC should replenish for this and subsequent allocations
 191       claim_for_alloc(words, true);
 192       break;
 193     }
 194 
 195     if (claim_for_alloc(words, false)) {
 196       // Acquired enough permit, nice. Can allocate now.
 197       _delays.add(ms);
 198       break;
 199     }
 200   }
 201 }
 202 
 203 void ShenandoahPacer::print_on(outputStream* out) const {
 204   out->print_cr("ALLOCATION PACING:");
 205   out->cr();
 206 
 207   out->print_cr("Max pacing delay is set for " UINTX_FORMAT " ms.", ShenandoahPacingMaxDelay);
 208   out->cr();
 209 
 210   out->print_cr("Higher delay would prevent application outpacing the GC, but it will hide the GC latencies");
 211   out->print_cr("from the STW pause times. Pacing affects the individual threads, and so it would also be");
 212   out->print_cr("invisible to the usual profiling tools, but would add up to end-to-end application latency.");
 213   out->print_cr("Raise max pacing delay with care.");
 214   out->cr();
 215 
 216   out->print_cr("Actual pacing delays histogram:");
 217   out->cr();
 218 
 219   out->print_cr("%10s - %10s %12s", "From", "To", "Count");
 220   for (int c = _delays.min_level(); c <= _delays.max_level(); c++) {
 221     out->print("%7d ms - %7d ms:", (c == 0) ? 0 : 1 << (c - 1), 1 << c);
 222     out->print_cr(SIZE_FORMAT_W(12), _delays.level(c));
 223   }
 224   out->cr();
 225 }