1 /* 2 * Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP 26 #define SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP 27 28 #include "utilities/macros.hpp" 29 #if INCLUDE_ALL_GCS 30 #include "gc_implementation/shared/gcUtil.hpp" 31 #include "gc_implementation/shared/mutableSpace.hpp" 32 #endif // INCLUDE_ALL_GCS 33 34 /* 35 * The NUMA-aware allocator (MutableNUMASpace) is basically a modification 36 * of MutableSpace which preserves interfaces but implements different 37 * functionality. The space is split into chunks for each locality group 38 * (resizing for adaptive size policy is also supported). For each thread 39 * allocations are performed in the chunk corresponding to the home locality 40 * group of the thread. Whenever any chunk fills-in the young generation 41 * collection occurs. 42 * The chunks can be also be adaptively resized. The idea behind the adaptive 43 * sizing is to reduce the loss of the space in the eden due to fragmentation. 44 * The main cause of fragmentation is uneven allocation rates of threads. 45 * The allocation rate difference between locality groups may be caused either by 46 * application specifics or by uneven LWP distribution by the OS. Besides, 47 * application can have less threads then the number of locality groups. 48 * In order to resize the chunk we measure the allocation rate of the 49 * application between collections. After that we reshape the chunks to reflect 50 * the allocation rate pattern. The AdaptiveWeightedAverage exponentially 51 * decaying average is used to smooth the measurements. The NUMASpaceResizeRate 52 * parameter is used to control the adaptation speed by restricting the number of 53 * bytes that can be moved during the adaptation phase. 54 * Chunks may contain pages from a wrong locality group. The page-scanner has 55 * been introduced to address the problem. Remote pages typically appear due to 56 * the memory shortage in the target locality group. Besides Solaris would 57 * allocate a large page from the remote locality group even if there are small 58 * local pages available. The page-scanner scans the pages right after the 59 * collection and frees remote pages in hope that subsequent reallocation would 60 * be more successful. This approach proved to be useful on systems with high 61 * load where multiple processes are competing for the memory. 62 */ 63 64 class MutableNUMASpace : public MutableSpace { 65 friend class VMStructs; 66 67 class LGRPSpace : public CHeapObj<mtGC> { 68 int _lgrp_id; 69 MutableSpace* _space; 70 MemRegion _invalid_region; 71 AdaptiveWeightedAverage *_alloc_rate; 72 bool _allocation_failed; 73 74 struct SpaceStats { 75 size_t _local_space, _remote_space, _unbiased_space, _uncommited_space; 76 size_t _large_pages, _small_pages; 77 78 SpaceStats() { 79 _local_space = 0; 80 _remote_space = 0; 81 _unbiased_space = 0; 82 _uncommited_space = 0; 83 _large_pages = 0; 84 _small_pages = 0; 85 } 86 }; 87 88 SpaceStats _space_stats; 89 90 char* _last_page_scanned; 91 char* last_page_scanned() { return _last_page_scanned; } 92 void set_last_page_scanned(char* p) { _last_page_scanned = p; } 93 public: 94 LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) { 95 _space = new MutableSpace(alignment); 96 _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight); 97 } 98 ~LGRPSpace() { 99 delete _space; 100 delete _alloc_rate; 101 } 102 103 void add_invalid_region(MemRegion r) { 104 if (!_invalid_region.is_empty()) { 105 _invalid_region.set_start(MIN2(_invalid_region.start(), r.start())); 106 _invalid_region.set_end(MAX2(_invalid_region.end(), r.end())); 107 } else { 108 _invalid_region = r; 109 } 110 } 111 112 static bool equals(void* lgrp_id_value, LGRPSpace* p) { 113 return *(int*)lgrp_id_value == p->lgrp_id(); 114 } 115 116 // Report a failed allocation. 117 void set_allocation_failed() { _allocation_failed = true; } 118 119 void sample() { 120 // If there was a failed allocation make allocation rate equal 121 // to the size of the whole chunk. This ensures the progress of 122 // the adaptation process. 123 size_t alloc_rate_sample; 124 if (_allocation_failed) { 125 alloc_rate_sample = space()->capacity_in_bytes(); 126 _allocation_failed = false; 127 } else { 128 alloc_rate_sample = space()->used_in_bytes(); 129 } 130 alloc_rate()->sample(alloc_rate_sample); 131 } 132 133 MemRegion invalid_region() const { return _invalid_region; } 134 void set_invalid_region(MemRegion r) { _invalid_region = r; } 135 int lgrp_id() const { return _lgrp_id; } 136 MutableSpace* space() const { return _space; } 137 AdaptiveWeightedAverage* alloc_rate() const { return _alloc_rate; } 138 void clear_alloc_rate() { _alloc_rate->clear(); } 139 SpaceStats* space_stats() { return &_space_stats; } 140 void clear_space_stats() { _space_stats = SpaceStats(); } 141 142 void accumulate_statistics(size_t page_size); 143 void scan_pages(size_t page_size, size_t page_count); 144 }; 145 146 GrowableArray<LGRPSpace*>* _lgrp_spaces; 147 size_t _page_size; 148 unsigned _adaptation_cycles, _samples_count; 149 150 void set_page_size(size_t psz) { _page_size = psz; } 151 size_t page_size() const { return _page_size; } 152 153 unsigned adaptation_cycles() { return _adaptation_cycles; } 154 void set_adaptation_cycles(int v) { _adaptation_cycles = v; } 155 156 unsigned samples_count() { return _samples_count; } 157 void increment_samples_count() { ++_samples_count; } 158 159 size_t _base_space_size; 160 void set_base_space_size(size_t v) { _base_space_size = v; } 161 size_t base_space_size() const { return _base_space_size; } 162 163 // Check if the NUMA topology has changed. Add and remove spaces if needed. 164 // The update can be forced by setting the force parameter equal to true. 165 bool update_layout(bool force); 166 // Bias region towards the lgrp. 167 void bias_region(MemRegion mr, int lgrp_id); 168 // Free pages in a given region. 169 void free_region(MemRegion mr); 170 // Get current chunk size. 171 size_t current_chunk_size(int i); 172 // Get default chunk size (equally divide the space). 173 size_t default_chunk_size(); 174 // Adapt the chunk size to follow the allocation rate. 175 size_t adaptive_chunk_size(int i, size_t limit); 176 // Scan and free invalid pages. 177 void scan_pages(size_t page_count); 178 // Return the bottom_region and the top_region. Align them to page_size() boundary. 179 // |------------------new_region---------------------------------| 180 // |----bottom_region--|---intersection---|------top_region------| 181 void select_tails(MemRegion new_region, MemRegion intersection, 182 MemRegion* bottom_region, MemRegion *top_region); 183 // Try to merge the invalid region with the bottom or top region by decreasing 184 // the intersection area. Return the invalid_region aligned to the page_size() 185 // boundary if it's inside the intersection. Return non-empty invalid_region 186 // if it lies inside the intersection (also page-aligned). 187 // |------------------new_region---------------------------------| 188 // |----------------|-------invalid---|--------------------------| 189 // |----bottom_region--|---intersection---|------top_region------| 190 void merge_regions(MemRegion new_region, MemRegion* intersection, 191 MemRegion *invalid_region); 192 193 public: 194 GrowableArray<LGRPSpace*>* lgrp_spaces() const { return _lgrp_spaces; } 195 MutableNUMASpace(size_t alignment); 196 virtual ~MutableNUMASpace(); 197 // Space initialization. 198 virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages); 199 // Update space layout if necessary. Do all adaptive resizing job. 200 virtual void update(); 201 // Update allocation rate averages. 202 virtual void accumulate_statistics(); 203 204 virtual void clear(bool mangle_space); 205 virtual void mangle_unused_area() PRODUCT_RETURN; 206 virtual void mangle_unused_area_complete() PRODUCT_RETURN; 207 virtual void mangle_region(MemRegion mr) PRODUCT_RETURN; 208 virtual void check_mangled_unused_area(HeapWord* limit) PRODUCT_RETURN; 209 virtual void check_mangled_unused_area_complete() PRODUCT_RETURN; 210 virtual void set_top_for_allocations(HeapWord* v) PRODUCT_RETURN; 211 virtual void set_top_for_allocations() PRODUCT_RETURN; 212 213 virtual void ensure_parsability(); 214 virtual size_t used_in_words() const; 215 virtual size_t free_in_words() const; 216 217 using MutableSpace::capacity_in_words; 218 virtual size_t capacity_in_words(Thread* thr) const; 219 virtual size_t tlab_capacity(Thread* thr) const; 220 virtual size_t tlab_used(Thread* thr) const; 221 virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; 222 223 // Allocation (return NULL if full) 224 virtual HeapWord* allocate(size_t word_size); 225 virtual HeapWord* cas_allocate(size_t word_size); 226 227 // Debugging 228 virtual void print_on(outputStream* st) const; 229 virtual void print_short_on(outputStream* st) const; 230 virtual void verify(); 231 232 virtual void set_top(HeapWord* value); 233 }; 234 235 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP