New src/hotspot/share/runtime/threadHeapSampler.cpp

   1 /*
   2  * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "runtime/sharedRuntime.hpp"
  26 #include "runtime/threadHeapSampler.hpp"
  27 
  28 // Cheap random number generator
  29 uint64_t ThreadHeapSampler::_rnd;
  30 // Default is 512kb.
  31 int ThreadHeapSampler::_sampling_rate = 512 * 1024;
  32 int ThreadHeapSampler::_enabled;
  33 
  34 // Statics for the fast log
  35 static const int FastLogNumBits = 10;
  36 static const int FastLogMask = (1 << FastLogNumBits) - 1;
  37 static double log_table[1<<FastLogNumBits];  // Constant
  38 static bool log_table_initialized;
  39 
  40 // Returns the next prng value.
  41 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
  42 // This is the lrand64 generator.
  43 static uint64_t next_random(uint64_t rnd) {
  44   const uint64_t PrngMult = 0x5DEECE66DLL;
  45   const uint64_t PrngAdd = 0xB;
  46   const uint64_t PrngModPower = 48;
  47   const uint64_t PrngModMask = right_n_bits(PrngModPower);
  48   return (PrngMult * rnd + PrngAdd) & PrngModMask;
  49 }
  50 
  51 static double fast_log2(const double & d) {
  52   assert(d>0, "bad value passed to assert");
  53   uint64_t x = 0;
  54   memcpy(&x, &d, sizeof(uint64_t));
  55   const uint32_t x_high = x >> 32;
  56   const uint32_t y = x_high >> (20 - FastLogNumBits) & FastLogMask;
  57   const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
  58   return exponent + log_table[y];
  59 }
  60 
  61 // Generates a geometric variable with the specified mean (512K by default).
  62 // This is done by generating a random number between 0 and 1 and applying
  63 // the inverse cumulative distribution function for an exponential.
  64 // Specifically: Let m be the inverse of the sample rate, then
  65 // the probability distribution function is m*exp(-mx) so the CDF is
  66 // p = 1 - exp(-mx), so
  67 // q = 1 - p = exp(-mx)
  68 // log_e(q) = -mx
  69 // -log_e(q)/m = x
  70 // log_2(q) * (-log_e(2) * 1/m) = x
  71 // In the code, q is actually in the range 1 to 2**26, hence the -26 below
  72 void ThreadHeapSampler::pick_next_geometric_sample() {
  73   _rnd = next_random(_rnd);
  74   // Take the top 26 bits as the random number
  75   // (This plus a 1<<58 sampling bound gives a max possible step of
  76   // 5194297183973780480 bytes.  In this case,
  77   // for sample_parameter = 1<<19, max possible step is
  78   // 9448372 bytes (24 bits).
  79   const uint64_t PrngModPower = 48;  // Number of bits in prng
  80   // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
  81   // under piii debug for some binaries.
  82   double q = static_cast<uint32_t>(_rnd >> (PrngModPower - 26)) + 1.0;
  83   // Put the computed p-value through the CDF of a geometric.
  84   // For faster performance (save ~1/20th exec time), replace
  85   // min(0.0, FastLog2(q) - 26)  by  (Fastlog2(q) - 26.000705)
  86   // The value 26.000705 is used rather than 26 to compensate
  87   // for inaccuracies in FastLog2 which otherwise result in a
  88   // negative answer.
  89   double log_val = (fast_log2(q) - 26);
  90   size_t rate = static_cast<size_t>(
  91       (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (_sampling_rate)) + 1);
  92   _bytes_until_sample = rate;
  93 }
  94 
  95 void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) {
  96   if (_sampling_rate == 1) {
  97     _bytes_until_sample = 1;
  98     return;
  99   }
 100 
 101   pick_next_geometric_sample();
 102 
 103   // Try to correct sample size by removing extra space from last allocation.
 104   if (overflowed_bytes > 0 && _bytes_until_sample > overflowed_bytes) {
 105     _bytes_until_sample -= overflowed_bytes;
 106   }
 107 }
 108 
 109 void ThreadHeapSampler::check_for_sampling(HeapWord* ptr, size_t allocation_size, size_t bytes_since_allocation) {
 110   oopDesc* oop = reinterpret_cast<oopDesc*>(ptr);
 111   size_t total_allocated_bytes = bytes_since_allocation + allocation_size;
 112 
 113   // If not yet time for a sample, skip it.
 114   if (total_allocated_bytes < _bytes_until_sample) {
 115     _bytes_until_sample -= total_allocated_bytes;
 116     return;
 117   }
 118 
 119   JvmtiExport::sampled_object_alloc_event_collector(oop);
 120 
 121   size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample;
 122   pick_next_sample(overflow_bytes);
 123 }
 124 
 125 void ThreadHeapSampler::init_log_table() {
 126   MutexLocker mu(ThreadHeapSampler_lock);
 127 
 128   if (log_table_initialized) {
 129     return;
 130   }
 131 
 132   for (int i = 0; i < (1 << FastLogNumBits); i++) {
 133     log_table[i] = (log(1.0 + static_cast<double>(i+0.5) / (1 << FastLogNumBits))
 134                      / log(2.0));
 135   }
 136 
 137   log_table_initialized = true;
 138 }
 139 
 140 void ThreadHeapSampler::set_tlab_heap_sampling(int sampling_rate) {
 141   MutexLocker mu(ThreadHeapSampler_lock);
 142   _sampling_rate = sampling_rate;
 143 }