1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "runtime/handles.inline.hpp"
  28 #include "runtime/orderAccess.hpp"
  29 #include "runtime/sharedRuntime.hpp"
  30 #include "runtime/threadHeapSampler.hpp"
  31 
  32 // Cheap random number generator
  33 uint64_t ThreadHeapSampler::_rnd;
  34 // Default is 512kb.
  35 int ThreadHeapSampler::_sampling_interval = 512 * 1024;
  36 
  37 namespace {
  38 // Statics for the fast log
  39 const int FastLogNumBits = 10;
  40 const int FastLogMask = (1 << FastLogNumBits) - 1;
  41 double internal_log_table[1<<FastLogNumBits];  // Constant
  42 }  // anonymous namespace
  43 
  44 double *ThreadHeapSampler::_log_table = init_log_table();
  45 
  46 double* ThreadHeapSampler::init_log_table() {
  47   for (int i = 0; i < (1 << FastLogNumBits); i++) {
  48     internal_log_table[i] = (log(1.0 + static_cast<double>(i+0.5) / (1 << FastLogNumBits))
  49                     / log(2.0));
  50   }
  51   return internal_log_table;
  52 }
  53 
  54 // Returns the next prng value.
  55 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
  56 // This is the lrand64 generator.
  57 static uint64_t next_random(uint64_t rnd) {
  58   const uint64_t PrngMult = 0x5DEECE66DLL;
  59   const uint64_t PrngAdd = 0xB;
  60   const uint64_t PrngModPower = 48;
  61   const uint64_t PrngModMask = ((uint64_t)1 << PrngModPower) - 1;
  62   //assert(IS_SAFE_SIZE_MUL(PrngMult, rnd), "Overflow on multiplication.");
  63   //assert(IS_SAFE_SIZE_ADD(PrngMult * rnd, PrngAdd), "Overflow on addition.");
  64   return (PrngMult * rnd + PrngAdd) & PrngModMask;
  65 }
  66 
  67 double ThreadHeapSampler::fast_log2(const double& d) {
  68   assert(d>0, "bad value passed to assert");
  69   uint64_t x = 0;
  70   assert(sizeof(d) == sizeof(x),
  71          "double and uint64_t do not have the same size");
  72   x = *reinterpret_cast<const uint64_t*>(&d);
  73   const uint32_t x_high = x >> 32;
  74   assert(FastLogNumBits <= 20, "FastLogNumBits should be less than 20.");
  75   const uint32_t y = x_high >> (20 - FastLogNumBits) & FastLogMask;
  76   const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
  77   return exponent + internal_log_table[y];
  78 }
  79 
  80 // Generates a geometric variable with the specified mean (512K by default).
  81 // This is done by generating a random number between 0 and 1 and applying
  82 // the inverse cumulative distribution function for an exponential.
  83 // Specifically: Let m be the inverse of the sample interval, then
  84 // the probability distribution function is m*exp(-mx) so the CDF is
  85 // p = 1 - exp(-mx), so
  86 // q = 1 - p = exp(-mx)
  87 // log_e(q) = -mx
  88 // -log_e(q)/m = x
  89 // log_2(q) * (-log_e(2) * 1/m) = x
  90 // In the code, q is actually in the range 1 to 2**26, hence the -26 below
  91 void ThreadHeapSampler::pick_next_geometric_sample() {
  92   _rnd = next_random(_rnd);
  93   // Take the top 26 bits as the random number
  94   // (This plus a 1<<58 sampling bound gives a max possible step of
  95   // 5194297183973780480 bytes.  In this case,
  96   // for sample_parameter = 1<<19, max possible step is
  97   // 9448372 bytes (24 bits).
  98   const uint64_t PrngModPower = 48;  // Number of bits in prng
  99   // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
 100   // under piii debug for some binaries.
 101   double q = static_cast<uint32_t>(_rnd >> (PrngModPower - 26)) + 1.0;
 102   // Put the computed p-value through the CDF of a geometric.
 103   // For faster performance (save ~1/20th exec time), replace
 104   // min(0.0, FastLog2(q) - 26)  by  (Fastlog2(q) - 26.000705)
 105   // The value 26.000705 is used rather than 26 to compensate
 106   // for inaccuracies in FastLog2 which otherwise result in a
 107   // negative answer.
 108   double log_val = (fast_log2(q) - 26);
 109   double result =
 110       (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (get_sampling_interval())) + 1;
 111   assert(result > 0 && result < SIZE_MAX, "Result is not in an acceptable range.");
 112   size_t interval = static_cast<size_t>(result);
 113   _bytes_until_sample = interval;
 114 }
 115 
 116 void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) {
 117   // Explicitly test if the sampling interval is 0, return 0 to sample every
 118   // allocation.
 119   if (get_sampling_interval() == 0) {
 120     _bytes_until_sample = 0;
 121     return;
 122   }
 123 
 124   pick_next_geometric_sample();
 125 
 126   // Try to correct sample size by removing extra space from last allocation.
 127   if (overflowed_bytes > 0 && _bytes_until_sample > overflowed_bytes) {
 128     _bytes_until_sample -= overflowed_bytes;
 129   }
 130 }
 131 
 132 void ThreadHeapSampler::check_for_sampling(oop obj, size_t allocation_size, size_t bytes_since_allocation) {
 133   size_t total_allocated_bytes = bytes_since_allocation + allocation_size;
 134 
 135   // If not yet time for a sample, skip it.
 136   if (total_allocated_bytes < _bytes_until_sample) {
 137     _bytes_until_sample -= total_allocated_bytes;
 138     return;
 139   }
 140 
 141   JvmtiExport::sampled_object_alloc_event_collector(obj);
 142 
 143   size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample;
 144   pick_next_sample(overflow_bytes);
 145 }
 146 
 147 int ThreadHeapSampler::get_sampling_interval() {
 148   return OrderAccess::load_acquire(&_sampling_interval);
 149 }
 150 
 151 void ThreadHeapSampler::set_sampling_interval(int sampling_interval) {
 152   OrderAccess::release_store(&_sampling_interval, sampling_interval);
 153 }
 154 
 155 // Methods used in assertion mode to check if a collector is present or not at
 156 // the moment of TLAB sampling, ie a slow allocation path.
 157 bool ThreadHeapSampler::sampling_collector_present() const {
 158   return _collectors_present > 0;
 159 }
 160 
 161 bool ThreadHeapSampler::remove_sampling_collector() {
 162   assert(_collectors_present > 0, "Problem with collector counter.");
 163   _collectors_present--;
 164   return true;
 165 }
 166 
 167 bool ThreadHeapSampler::add_sampling_collector() {
 168   _collectors_present++;
 169   return true;
 170 }