10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "runtime/sharedRuntime.hpp"
26 #include "runtime/threadHeapSampler.hpp"
27
28 // Cheap random number generator
29 uint64_t ThreadHeapSampler::_rnd;
30 int ThreadHeapSampler::_monitoring_rate;
31 int ThreadHeapSampler::_enabled;
32
33 // Statics for the fast log
34 static const int FastLogNumBits = 10;
35 static const int FastLogMask = (1 << FastLogNumBits) - 1;
36 static double _log_table[1<<FastLogNumBits]; // Constant
37
38 // Returns the next prng value.
39 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48
40 // This is the lrand64 generator.
41 static uint64_t next_random(uint64_t rnd) {
42 const uint64_t PrngMult = 0x5DEECE66DLL;
43 const uint64_t PrngAdd = 0xB;
44 const uint64_t PrngModPower = 48;
45 const uint64_t PrngModMask = right_n_bits(PrngModPower);
46 return (PrngMult * rnd + PrngAdd) & PrngModMask;
47 }
48
49 static double fast_log2(const double & d) {
50 assert(d>0, "bad value passed to assert");
51 uint64_t x = 0;
52 memcpy(&x, &d, sizeof(uint64_t));
53 const uint32_t x_high = x >> 32;
54 const uint32_t y = x_high >> (20 - FastLogNumBits) & FastLogMask;
55 const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
56 return exponent + _log_table[y];
57 }
58
59 // Generates a geometric variable with the specified mean (512K by default).
60 // This is done by generating a random number between 0 and 1 and applying
61 // the inverse cumulative distribution function for an exponential.
62 // Specifically: Let m be the inverse of the sample rate, then
63 // the probability distribution function is m*exp(-mx) so the CDF is
64 // p = 1 - exp(-mx), so
65 // q = 1 - p = exp(-mx)
66 // log_e(q) = -mx
67 // -log_e(q)/m = x
68 // log_2(q) * (-log_e(2) * 1/m) = x
69 // In the code, q is actually in the range 1 to 2**26, hence the -26 below
70 void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) {
71 _rnd = next_random(_rnd);
72 // Take the top 26 bits as the random number
73 // (This plus a 1<<58 sampling bound gives a max possible step of
74 // 5194297183973780480 bytes. In this case,
75 // for sample_parameter = 1<<19, max possible step is
76 // 9448372 bytes (24 bits).
77 const uint64_t PrngModPower = 48; // Number of bits in prng
78 // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
79 // under piii debug for some binaries.
80 double q = static_cast<uint32_t>(_rnd >> (PrngModPower - 26)) + 1.0;
81 // Put the computed p-value through the CDF of a geometric.
82 // For faster performance (save ~1/20th exec time), replace
83 // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705)
84 // The value 26.000705 is used rather than 26 to compensate
85 // for inaccuracies in FastLog2 which otherwise result in a
86 // negative answer.
87 double log_val = (fast_log2(q) - 26);
88 size_t rate = static_cast<size_t>(
89 (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (_monitoring_rate)) + 1);
90 _bytes_until_sample = rate;
91
92 // Try to correct sample size by removing extra space from last allocation.
93 if (overflowed_bytes > 0 && _bytes_until_sample > overflowed_bytes) {
94 _bytes_until_sample -= overflowed_bytes;
95 }
96 }
97
98 void ThreadHeapSampler::check_for_sampling(HeapWord* ptr, size_t allocation_size, size_t bytes_since_allocation) {
99 oopDesc* oop = reinterpret_cast<oopDesc*>(ptr);
100 size_t total_allocated_bytes = bytes_since_allocation + allocation_size;
101
102 // If not yet time for a sample, skip it.
103 if (total_allocated_bytes < _bytes_until_sample) {
104 _bytes_until_sample -= total_allocated_bytes;
105 return;
106 }
107
108 JvmtiExport::sampled_object_alloc_event_collector(oop);
109
110 size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample;
111 pick_next_sample(overflow_bytes);
112 }
113
114 void ThreadHeapSampler::set_tlab_heap_sampling(int monitoring_rate) {
115 if (monitoring_rate == 0) {
116 disable();
117 _monitoring_rate = monitoring_rate;
118 } else {
119 _monitoring_rate = monitoring_rate;
120 enable();
121 }
122 }
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "runtime/sharedRuntime.hpp"
26 #include "runtime/threadHeapSampler.hpp"
27
28 // Cheap random number generator
29 uint64_t ThreadHeapSampler::_rnd;
30 int ThreadHeapSampler::_sampling_rate;
31 int ThreadHeapSampler::_enabled;
32
33 // Statics for the fast log
34 static const int FastLogNumBits = 10;
35 static const int FastLogMask = (1 << FastLogNumBits) - 1;
36 static double log_table[1<<FastLogNumBits]; // Constant
37 static bool log_table_initialized;
38
39 // Returns the next prng value.
40 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48
41 // This is the lrand64 generator.
42 static uint64_t next_random(uint64_t rnd) {
43 const uint64_t PrngMult = 0x5DEECE66DLL;
44 const uint64_t PrngAdd = 0xB;
45 const uint64_t PrngModPower = 48;
46 const uint64_t PrngModMask = right_n_bits(PrngModPower);
47 return (PrngMult * rnd + PrngAdd) & PrngModMask;
48 }
49
50 static double fast_log2(const double & d) {
51 assert(d>0, "bad value passed to assert");
52 uint64_t x = 0;
53 memcpy(&x, &d, sizeof(uint64_t));
54 const uint32_t x_high = x >> 32;
55 const uint32_t y = x_high >> (20 - FastLogNumBits) & FastLogMask;
56 const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
57 return exponent + log_table[y];
58 }
59
60 // Generates a geometric variable with the specified mean (512K by default).
61 // This is done by generating a random number between 0 and 1 and applying
62 // the inverse cumulative distribution function for an exponential.
63 // Specifically: Let m be the inverse of the sample rate, then
64 // the probability distribution function is m*exp(-mx) so the CDF is
65 // p = 1 - exp(-mx), so
66 // q = 1 - p = exp(-mx)
67 // log_e(q) = -mx
68 // -log_e(q)/m = x
69 // log_2(q) * (-log_e(2) * 1/m) = x
70 // In the code, q is actually in the range 1 to 2**26, hence the -26 below
71 void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) {
72 _rnd = next_random(_rnd);
73 // Take the top 26 bits as the random number
74 // (This plus a 1<<58 sampling bound gives a max possible step of
75 // 5194297183973780480 bytes. In this case,
76 // for sample_parameter = 1<<19, max possible step is
77 // 9448372 bytes (24 bits).
78 const uint64_t PrngModPower = 48; // Number of bits in prng
79 // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
80 // under piii debug for some binaries.
81 double q = static_cast<uint32_t>(_rnd >> (PrngModPower - 26)) + 1.0;
82 // Put the computed p-value through the CDF of a geometric.
83 // For faster performance (save ~1/20th exec time), replace
84 // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705)
85 // The value 26.000705 is used rather than 26 to compensate
86 // for inaccuracies in FastLog2 which otherwise result in a
87 // negative answer.
88 double log_val = (fast_log2(q) - 26);
89 size_t rate = static_cast<size_t>(
90 (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (_sampling_rate)) + 1);
91 _bytes_until_sample = rate;
92
93 // Try to correct sample size by removing extra space from last allocation.
94 if (overflowed_bytes > 0 && _bytes_until_sample > overflowed_bytes) {
95 _bytes_until_sample -= overflowed_bytes;
96 }
97 }
98
99 void ThreadHeapSampler::check_for_sampling(HeapWord* ptr, size_t allocation_size, size_t bytes_since_allocation) {
100 oopDesc* oop = reinterpret_cast<oopDesc*>(ptr);
101 size_t total_allocated_bytes = bytes_since_allocation + allocation_size;
102
103 // If not yet time for a sample, skip it.
104 if (total_allocated_bytes < _bytes_until_sample) {
105 _bytes_until_sample -= total_allocated_bytes;
106 return;
107 }
108
109 JvmtiExport::sampled_object_alloc_event_collector(oop);
110
111 size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample;
112 pick_next_sample(overflow_bytes);
113 }
114
115 void ThreadHeapSampler::init_log_table() {
116 MutexLocker mu(ThreadHeapSampler_lock);
117
118 if (log_table_initialized) {
119 return;
120 }
121
122 for (int i = 0; i < (1 << FastLogNumBits); i++) {
123 log_table[i] = (log(1.0 + static_cast<double>(i+0.5) / (1 << FastLogNumBits))
124 / log(2.0));
125 }
126
127 log_table_initialized = true;
128 }
129
130 void ThreadHeapSampler::set_tlab_heap_sampling(int sampling_rate) {
131 if (sampling_rate == 0) {
132 disable();
133 _sampling_rate = sampling_rate;
134 } else {
135 _sampling_rate = sampling_rate;
136 enable();
137 }
138 }
|