1 /*
   2  * Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/parallel/mutableNUMASpace.hpp"
  27 #include "gc/shared/collectedHeap.hpp"
  28 #include "gc/shared/spaceDecorator.hpp"
  29 #include "oops/oop.inline.hpp"
  30 #include "runtime/atomic.hpp"
  31 #include "runtime/thread.inline.hpp"
  32 #include "utilities/align.hpp"
  33 
  34 MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment), _must_use_large_pages(false) {
  35   _lgrp_spaces = new (ResourceObj::C_HEAP, mtGC) GrowableArray<LGRPSpace*>(0, true);
  36   _page_size = os::vm_page_size();
  37   _adaptation_cycles = 0;
  38   _samples_count = 0;
  39 
  40 #ifdef LINUX
  41   // Changing the page size can lead to freeing of memory. When using large pages
  42   // and the memory has been both reserved and committed, Linux does not support
  43   // freeing parts of it.
  44     if (UseLargePages && !os::can_commit_large_page_memory()) {
  45       _must_use_large_pages = true;
  46     }
  47 #endif // LINUX
  48 
  49   update_layout(true);
  50 }
  51 
  52 MutableNUMASpace::~MutableNUMASpace() {
  53   for (int i = 0; i < lgrp_spaces()->length(); i++) {
  54     delete lgrp_spaces()->at(i);
  55   }
  56   delete lgrp_spaces();
  57 }
  58 
  59 #ifndef PRODUCT
  60 void MutableNUMASpace::mangle_unused_area() {
  61   // This method should do nothing.
  62   // It can be called on a numa space during a full compaction.
  63 }
  64 void MutableNUMASpace::mangle_unused_area_complete() {
  65   // This method should do nothing.
  66   // It can be called on a numa space during a full compaction.
  67 }
  68 void MutableNUMASpace::mangle_region(MemRegion mr) {
  69   // This method should do nothing because numa spaces are not mangled.
  70 }
  71 void MutableNUMASpace::set_top_for_allocations(HeapWord* v) {
  72   assert(false, "Do not mangle MutableNUMASpace's");
  73 }
  74 void MutableNUMASpace::set_top_for_allocations() {
  75   // This method should do nothing.
  76 }
  77 void MutableNUMASpace::check_mangled_unused_area(HeapWord* limit) {
  78   // This method should do nothing.
  79 }
  80 void MutableNUMASpace::check_mangled_unused_area_complete() {
  81   // This method should do nothing.
  82 }
  83 #endif  // NOT_PRODUCT
  84 
  85 // There may be unallocated holes in the middle chunks
  86 // that should be filled with dead objects to ensure parsability.
  87 void MutableNUMASpace::ensure_parsability() {
  88   for (int i = 0; i < lgrp_spaces()->length(); i++) {
  89     LGRPSpace *ls = lgrp_spaces()->at(i);
  90     MutableSpace *s = ls->space();
  91     if (s->top() < top()) { // For all spaces preceding the one containing top()
  92       if (s->free_in_words() > 0) {
  93         intptr_t cur_top = (intptr_t)s->top();
  94         size_t words_left_to_fill = pointer_delta(s->end(), s->top());;
  95         while (words_left_to_fill > 0) {
  96           size_t words_to_fill = MIN2(words_left_to_fill, CollectedHeap::filler_array_max_size());
  97           assert(words_to_fill >= CollectedHeap::min_fill_size(),
  98                  "Remaining size (" SIZE_FORMAT ") is too small to fill (based on " SIZE_FORMAT " and " SIZE_FORMAT ")",
  99                  words_to_fill, words_left_to_fill, CollectedHeap::filler_array_max_size());
 100           CollectedHeap::fill_with_object((HeapWord*)cur_top, words_to_fill);
 101           if (!os::numa_has_static_binding()) {
 102             size_t touched_words = words_to_fill;
 103 #ifndef ASSERT
 104             if (!ZapUnusedHeapArea) {
 105               touched_words = MIN2((size_t)align_object_size(typeArrayOopDesc::header_size(T_INT)),
 106                 touched_words);
 107             }
 108 #endif
 109             MemRegion invalid;
 110             HeapWord *crossing_start = align_up((HeapWord*)cur_top, os::vm_page_size());
 111             HeapWord *crossing_end = align_down((HeapWord*)(cur_top + touched_words), os::vm_page_size());
 112             if (crossing_start != crossing_end) {
 113               // If object header crossed a small page boundary we mark the area
 114               // as invalid rounding it to a page_size().
 115               HeapWord *start = MAX2(align_down((HeapWord*)cur_top, page_size()), s->bottom());
 116               HeapWord *end = MIN2(align_up((HeapWord*)(cur_top + touched_words), page_size()), s->end());
 117               invalid = MemRegion(start, end);
 118             }
 119 
 120             ls->add_invalid_region(invalid);
 121           }
 122           cur_top = cur_top + (words_to_fill * HeapWordSize);
 123           words_left_to_fill -= words_to_fill;
 124         }
 125       }
 126     } else {
 127       if (!os::numa_has_static_binding()) {
 128 #ifdef ASSERT
 129         MemRegion invalid(s->top(), s->end());
 130         ls->add_invalid_region(invalid);
 131 #else
 132         if (ZapUnusedHeapArea) {
 133           MemRegion invalid(s->top(), s->end());
 134           ls->add_invalid_region(invalid);
 135         } else {
 136           return;
 137         }
 138 #endif
 139       } else {
 140           return;
 141       }
 142     }
 143   }
 144 }
 145 
 146 size_t MutableNUMASpace::used_in_words() const {
 147   size_t s = 0;
 148   for (int i = 0; i < lgrp_spaces()->length(); i++) {
 149     s += lgrp_spaces()->at(i)->space()->used_in_words();
 150   }
 151   return s;
 152 }
 153 
 154 size_t MutableNUMASpace::free_in_words() const {
 155   size_t s = 0;
 156   for (int i = 0; i < lgrp_spaces()->length(); i++) {
 157     s += lgrp_spaces()->at(i)->space()->free_in_words();
 158   }
 159   return s;
 160 }
 161 
 162 
 163 size_t MutableNUMASpace::tlab_capacity(Thread *thr) const {
 164   guarantee(thr != NULL, "No thread");
 165   int lgrp_id = thr->lgrp_id();
 166   if (lgrp_id == -1) {
 167     // This case can occur after the topology of the system has
 168     // changed. Thread can change their location, the new home
 169     // group will be determined during the first allocation
 170     // attempt. For now we can safely assume that all spaces
 171     // have equal size because the whole space will be reinitialized.
 172     if (lgrp_spaces()->length() > 0) {
 173       return capacity_in_bytes() / lgrp_spaces()->length();
 174     } else {
 175       assert(false, "There should be at least one locality group");
 176       return 0;
 177     }
 178   }
 179   // That's the normal case, where we know the locality group of the thread.
 180   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
 181   if (i == -1) {
 182     return 0;
 183   }
 184   return lgrp_spaces()->at(i)->space()->capacity_in_bytes();
 185 }
 186 
 187 size_t MutableNUMASpace::tlab_used(Thread *thr) const {
 188   // Please see the comments for tlab_capacity().
 189   guarantee(thr != NULL, "No thread");
 190   int lgrp_id = thr->lgrp_id();
 191   if (lgrp_id == -1) {
 192     if (lgrp_spaces()->length() > 0) {
 193       return (used_in_bytes()) / lgrp_spaces()->length();
 194     } else {
 195       assert(false, "There should be at least one locality group");
 196       return 0;
 197     }
 198   }
 199   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
 200   if (i == -1) {
 201     return 0;
 202   }
 203   return lgrp_spaces()->at(i)->space()->used_in_bytes();
 204 }
 205 
 206 
 207 size_t MutableNUMASpace::unsafe_max_tlab_alloc(Thread *thr) const {
 208   // Please see the comments for tlab_capacity().
 209   guarantee(thr != NULL, "No thread");
 210   int lgrp_id = thr->lgrp_id();
 211   if (lgrp_id == -1) {
 212     if (lgrp_spaces()->length() > 0) {
 213       return free_in_bytes() / lgrp_spaces()->length();
 214     } else {
 215       assert(false, "There should be at least one locality group");
 216       return 0;
 217     }
 218   }
 219   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
 220   if (i == -1) {
 221     return 0;
 222   }
 223   return lgrp_spaces()->at(i)->space()->free_in_bytes();
 224 }
 225 
 226 
 227 size_t MutableNUMASpace::capacity_in_words(Thread* thr) const {
 228   guarantee(thr != NULL, "No thread");
 229   int lgrp_id = thr->lgrp_id();
 230   if (lgrp_id == -1) {
 231     if (lgrp_spaces()->length() > 0) {
 232       return capacity_in_words() / lgrp_spaces()->length();
 233     } else {
 234       assert(false, "There should be at least one locality group");
 235       return 0;
 236     }
 237   }
 238   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
 239   if (i == -1) {
 240     return 0;
 241   }
 242   return lgrp_spaces()->at(i)->space()->capacity_in_words();
 243 }
 244 
 245 // Check if the NUMA topology has changed. Add and remove spaces if needed.
 246 // The update can be forced by setting the force parameter equal to true.
 247 bool MutableNUMASpace::update_layout(bool force) {
 248   // Check if the topology had changed.
 249   bool changed = os::numa_topology_changed();
 250   if (force || changed) {
 251     // Compute lgrp intersection. Add/remove spaces.
 252     int lgrp_limit = (int)os::numa_get_groups_num();
 253     int *lgrp_ids = NEW_C_HEAP_ARRAY(int, lgrp_limit, mtGC);
 254     int lgrp_num = (int)os::numa_get_leaf_groups(lgrp_ids, lgrp_limit);
 255     assert(lgrp_num > 0, "There should be at least one locality group");
 256     // Add new spaces for the new nodes
 257     for (int i = 0; i < lgrp_num; i++) {
 258       bool found = false;
 259       for (int j = 0; j < lgrp_spaces()->length(); j++) {
 260         if (lgrp_spaces()->at(j)->lgrp_id() == lgrp_ids[i]) {
 261           found = true;
 262           break;
 263         }
 264       }
 265       if (!found) {
 266         lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i], alignment()));
 267       }
 268     }
 269 
 270     // Remove spaces for the removed nodes.
 271     for (int i = 0; i < lgrp_spaces()->length();) {
 272       bool found = false;
 273       for (int j = 0; j < lgrp_num; j++) {
 274         if (lgrp_spaces()->at(i)->lgrp_id() == lgrp_ids[j]) {
 275           found = true;
 276           break;
 277         }
 278       }
 279       if (!found) {
 280         delete lgrp_spaces()->at(i);
 281         lgrp_spaces()->remove_at(i);
 282       } else {
 283         i++;
 284       }
 285     }
 286 
 287     FREE_C_HEAP_ARRAY(int, lgrp_ids);
 288 
 289     if (changed) {
 290       for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
 291         thread->set_lgrp_id(-1);
 292       }
 293     }
 294     return true;
 295   }
 296   return false;
 297 }
 298 
 299 // Bias region towards the first-touching lgrp. Set the right page sizes.
 300 void MutableNUMASpace::bias_region(MemRegion mr, int lgrp_id) {
 301   HeapWord *start = align_up(mr.start(), page_size());
 302   HeapWord *end = align_down(mr.end(), page_size());
 303   if (end > start) {
 304     MemRegion aligned_region(start, end);
 305     assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
 306            (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
 307     assert(region().contains(aligned_region), "Sanity");
 308     // First we tell the OS which page size we want in the given range. The underlying
 309     // large page can be broken down if we require small pages.
 310     os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
 311     // Then we uncommit the pages in the range.
 312     os::free_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
 313     // And make them local/first-touch biased.
 314     os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
 315   }
 316 }
 317 
 318 // Free all pages in the region.
 319 void MutableNUMASpace::free_region(MemRegion mr) {
 320   HeapWord *start = align_up(mr.start(), page_size());
 321   HeapWord *end = align_down(mr.end(), page_size());
 322   if (end > start) {
 323     MemRegion aligned_region(start, end);
 324     assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
 325            (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
 326     assert(region().contains(aligned_region), "Sanity");
 327     os::free_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
 328   }
 329 }
 330 
 331 // Update space layout. Perform adaptation.
 332 void MutableNUMASpace::update() {
 333   if (update_layout(false)) {
 334     // If the topology has changed, make all chunks zero-sized.
 335     // And clear the alloc-rate statistics.
 336     // In future we may want to handle this more gracefully in order
 337     // to avoid the reallocation of the pages as much as possible.
 338     for (int i = 0; i < lgrp_spaces()->length(); i++) {
 339       LGRPSpace *ls = lgrp_spaces()->at(i);
 340       MutableSpace *s = ls->space();
 341       s->set_end(s->bottom());
 342       s->set_top(s->bottom());
 343       ls->clear_alloc_rate();
 344     }
 345     // A NUMA space is never mangled
 346     initialize(region(),
 347                SpaceDecorator::Clear,
 348                SpaceDecorator::DontMangle);
 349   } else {
 350     bool should_initialize = false;
 351     if (!os::numa_has_static_binding()) {
 352       for (int i = 0; i < lgrp_spaces()->length(); i++) {
 353         if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
 354           should_initialize = true;
 355           break;
 356         }
 357       }
 358     }
 359 
 360     if (should_initialize ||
 361         (UseAdaptiveNUMAChunkSizing && adaptation_cycles() < samples_count())) {
 362       // A NUMA space is never mangled
 363       initialize(region(),
 364                  SpaceDecorator::Clear,
 365                  SpaceDecorator::DontMangle);
 366     }
 367   }
 368 
 369   if (NUMAStats) {
 370     for (int i = 0; i < lgrp_spaces()->length(); i++) {
 371       lgrp_spaces()->at(i)->accumulate_statistics(page_size());
 372     }
 373   }
 374 
 375   scan_pages(NUMAPageScanRate);
 376 }
 377 
 378 // Scan pages. Free pages that have smaller size or wrong placement.
 379 void MutableNUMASpace::scan_pages(size_t page_count)
 380 {
 381   size_t pages_per_chunk = page_count / lgrp_spaces()->length();
 382   if (pages_per_chunk > 0) {
 383     for (int i = 0; i < lgrp_spaces()->length(); i++) {
 384       LGRPSpace *ls = lgrp_spaces()->at(i);
 385       ls->scan_pages(page_size(), pages_per_chunk);
 386     }
 387   }
 388 }
 389 
 390 // Accumulate statistics about the allocation rate of each lgrp.
 391 void MutableNUMASpace::accumulate_statistics() {
 392   if (UseAdaptiveNUMAChunkSizing) {
 393     for (int i = 0; i < lgrp_spaces()->length(); i++) {
 394       lgrp_spaces()->at(i)->sample();
 395     }
 396     increment_samples_count();
 397   }
 398 
 399   if (NUMAStats) {
 400     for (int i = 0; i < lgrp_spaces()->length(); i++) {
 401       lgrp_spaces()->at(i)->accumulate_statistics(page_size());
 402     }
 403   }
 404 }
 405 
 406 // Get the current size of a chunk.
 407 // This function computes the size of the chunk based on the
 408 // difference between chunk ends. This allows it to work correctly in
 409 // case the whole space is resized and during the process of adaptive
 410 // chunk resizing.
 411 size_t MutableNUMASpace::current_chunk_size(int i) {
 412   HeapWord *cur_end, *prev_end;
 413   if (i == 0) {
 414     prev_end = bottom();
 415   } else {
 416     prev_end = lgrp_spaces()->at(i - 1)->space()->end();
 417   }
 418   if (i == lgrp_spaces()->length() - 1) {
 419     cur_end = end();
 420   } else {
 421     cur_end = lgrp_spaces()->at(i)->space()->end();
 422   }
 423   if (cur_end > prev_end) {
 424     return pointer_delta(cur_end, prev_end, sizeof(char));
 425   }
 426   return 0;
 427 }
 428 
 429 // Return the default chunk size by equally diving the space.
 430 // page_size() aligned.
 431 size_t MutableNUMASpace::default_chunk_size() {
 432   return base_space_size() / lgrp_spaces()->length() * page_size();
 433 }
 434 
 435 // Produce a new chunk size. page_size() aligned.
 436 // This function is expected to be called on sequence of i's from 0 to
 437 // lgrp_spaces()->length().
 438 size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) {
 439   size_t pages_available = base_space_size();
 440   for (int j = 0; j < i; j++) {
 441     pages_available -= align_down(current_chunk_size(j), page_size()) / page_size();
 442   }
 443   pages_available -= lgrp_spaces()->length() - i - 1;
 444   assert(pages_available > 0, "No pages left");
 445   float alloc_rate = 0;
 446   for (int j = i; j < lgrp_spaces()->length(); j++) {
 447     alloc_rate += lgrp_spaces()->at(j)->alloc_rate()->average();
 448   }
 449   size_t chunk_size = 0;
 450   if (alloc_rate > 0) {
 451     LGRPSpace *ls = lgrp_spaces()->at(i);
 452     chunk_size = (size_t)(ls->alloc_rate()->average() / alloc_rate * pages_available) * page_size();
 453   }
 454   chunk_size = MAX2(chunk_size, page_size());
 455 
 456   if (limit > 0) {
 457     limit = align_down(limit, page_size());
 458     if (chunk_size > current_chunk_size(i)) {
 459       size_t upper_bound = pages_available * page_size();
 460       if (upper_bound > limit &&
 461           current_chunk_size(i) < upper_bound - limit) {
 462         // The resulting upper bound should not exceed the available
 463         // amount of memory (pages_available * page_size()).
 464         upper_bound = current_chunk_size(i) + limit;
 465       }
 466       chunk_size = MIN2(chunk_size, upper_bound);
 467     } else {
 468       size_t lower_bound = page_size();
 469       if (current_chunk_size(i) > limit) { // lower_bound shouldn't underflow.
 470         lower_bound = current_chunk_size(i) - limit;
 471       }
 472       chunk_size = MAX2(chunk_size, lower_bound);
 473     }
 474   }
 475   assert(chunk_size <= pages_available * page_size(), "Chunk size out of range");
 476   return chunk_size;
 477 }
 478 
 479 
 480 // Return the bottom_region and the top_region. Align them to page_size() boundary.
 481 // |------------------new_region---------------------------------|
 482 // |----bottom_region--|---intersection---|------top_region------|
 483 void MutableNUMASpace::select_tails(MemRegion new_region, MemRegion intersection,
 484                                     MemRegion* bottom_region, MemRegion *top_region) {
 485   // Is there bottom?
 486   if (new_region.start() < intersection.start()) { // Yes
 487     // Try to coalesce small pages into a large one.
 488     if (UseLargePages && page_size() >= alignment()) {
 489       HeapWord* p = align_up(intersection.start(), alignment());
 490       if (new_region.contains(p)
 491           && pointer_delta(p, new_region.start(), sizeof(char)) >= alignment()) {
 492         if (intersection.contains(p)) {
 493           intersection = MemRegion(p, intersection.end());
 494         } else {
 495           intersection = MemRegion(p, p);
 496         }
 497       }
 498     }
 499     *bottom_region = MemRegion(new_region.start(), intersection.start());
 500   } else {
 501     *bottom_region = MemRegion();
 502   }
 503 
 504   // Is there top?
 505   if (intersection.end() < new_region.end()) { // Yes
 506     // Try to coalesce small pages into a large one.
 507     if (UseLargePages && page_size() >= alignment()) {
 508       HeapWord* p = align_down(intersection.end(), alignment());
 509       if (new_region.contains(p)
 510           && pointer_delta(new_region.end(), p, sizeof(char)) >= alignment()) {
 511         if (intersection.contains(p)) {
 512           intersection = MemRegion(intersection.start(), p);
 513         } else {
 514           intersection = MemRegion(p, p);
 515         }
 516       }
 517     }
 518     *top_region = MemRegion(intersection.end(), new_region.end());
 519   } else {
 520     *top_region = MemRegion();
 521   }
 522 }
 523 
 524 // Try to merge the invalid region with the bottom or top region by decreasing
 525 // the intersection area. Return the invalid_region aligned to the page_size()
 526 // boundary if it's inside the intersection. Return non-empty invalid_region
 527 // if it lies inside the intersection (also page-aligned).
 528 // |------------------new_region---------------------------------|
 529 // |----------------|-------invalid---|--------------------------|
 530 // |----bottom_region--|---intersection---|------top_region------|
 531 void MutableNUMASpace::merge_regions(MemRegion new_region, MemRegion* intersection,
 532                                      MemRegion *invalid_region) {
 533   if (intersection->start() >= invalid_region->start() && intersection->contains(invalid_region->end())) {
 534     *intersection = MemRegion(invalid_region->end(), intersection->end());
 535     *invalid_region = MemRegion();
 536   } else
 537     if (intersection->end() <= invalid_region->end() && intersection->contains(invalid_region->start())) {
 538       *intersection = MemRegion(intersection->start(), invalid_region->start());
 539       *invalid_region = MemRegion();
 540     } else
 541       if (intersection->equals(*invalid_region) || invalid_region->contains(*intersection)) {
 542         *intersection = MemRegion(new_region.start(), new_region.start());
 543         *invalid_region = MemRegion();
 544       } else
 545         if (intersection->contains(invalid_region)) {
 546             // That's the only case we have to make an additional bias_region() call.
 547             HeapWord* start = invalid_region->start();
 548             HeapWord* end = invalid_region->end();
 549             if (UseLargePages && page_size() >= alignment()) {
 550               HeapWord *p = align_down(start, alignment());
 551               if (new_region.contains(p)) {
 552                 start = p;
 553               }
 554               p = align_up(end, alignment());
 555               if (new_region.contains(end)) {
 556                 end = p;
 557               }
 558             }
 559             if (intersection->start() > start) {
 560               *intersection = MemRegion(start, intersection->end());
 561             }
 562             if (intersection->end() < end) {
 563               *intersection = MemRegion(intersection->start(), end);
 564             }
 565             *invalid_region = MemRegion(start, end);
 566         }
 567 }
 568 
 569 void MutableNUMASpace::initialize(MemRegion mr,
 570                                   bool clear_space,
 571                                   bool mangle_space,
 572                                   bool setup_pages) {
 573   assert(clear_space, "Reallocation will destroy data!");
 574   assert(lgrp_spaces()->length() > 0, "There should be at least one space");
 575 
 576   MemRegion old_region = region(), new_region;
 577   set_bottom(mr.start());
 578   set_end(mr.end());
 579   // Must always clear the space
 580   clear(SpaceDecorator::DontMangle);
 581 
 582   // Compute chunk sizes
 583   size_t prev_page_size = page_size();
 584   set_page_size(UseLargePages ? alignment() : os::vm_page_size());
 585   HeapWord* rounded_bottom = align_up(bottom(), page_size());
 586   HeapWord* rounded_end = align_down(end(), page_size());
 587   size_t base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size();
 588 
 589   // Try small pages if the chunk size is too small
 590   if (base_space_size_pages / lgrp_spaces()->length() == 0
 591       && page_size() > (size_t)os::vm_page_size()) {
 592     // Changing the page size below can lead to freeing of memory. So we fail initialization.
 593     if (_must_use_large_pages) {
 594       vm_exit_during_initialization("Failed initializing NUMA with large pages. Too small heap size");
 595     }
 596     set_page_size(os::vm_page_size());
 597     rounded_bottom = align_up(bottom(), page_size());
 598     rounded_end = align_down(end(), page_size());
 599     base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size();
 600   }
 601   guarantee(base_space_size_pages / lgrp_spaces()->length() > 0, "Space too small");
 602   set_base_space_size(base_space_size_pages);
 603 
 604   // Handle space resize
 605   MemRegion top_region, bottom_region;
 606   if (!old_region.equals(region())) {
 607     new_region = MemRegion(rounded_bottom, rounded_end);
 608     MemRegion intersection = new_region.intersection(old_region);
 609     if (intersection.start() == NULL ||
 610         intersection.end() == NULL   ||
 611         prev_page_size > page_size()) { // If the page size got smaller we have to change
 612                                         // the page size preference for the whole space.
 613       intersection = MemRegion(new_region.start(), new_region.start());
 614     }
 615     select_tails(new_region, intersection, &bottom_region, &top_region);
 616     bias_region(bottom_region, lgrp_spaces()->at(0)->lgrp_id());
 617     bias_region(top_region, lgrp_spaces()->at(lgrp_spaces()->length() - 1)->lgrp_id());
 618   }
 619 
 620   // Check if the space layout has changed significantly?
 621   // This happens when the space has been resized so that either head or tail
 622   // chunk became less than a page.
 623   bool layout_valid = UseAdaptiveNUMAChunkSizing          &&
 624                       current_chunk_size(0) > page_size() &&
 625                       current_chunk_size(lgrp_spaces()->length() - 1) > page_size();
 626 
 627 
 628   for (int i = 0; i < lgrp_spaces()->length(); i++) {
 629     LGRPSpace *ls = lgrp_spaces()->at(i);
 630     MutableSpace *s = ls->space();
 631     old_region = s->region();
 632 
 633     size_t chunk_byte_size = 0, old_chunk_byte_size = 0;
 634     if (i < lgrp_spaces()->length() - 1) {
 635       if (!UseAdaptiveNUMAChunkSizing                                ||
 636           (UseAdaptiveNUMAChunkSizing && NUMAChunkResizeWeight == 0) ||
 637            samples_count() < AdaptiveSizePolicyReadyThreshold) {
 638         // No adaptation. Divide the space equally.
 639         chunk_byte_size = default_chunk_size();
 640       } else
 641         if (!layout_valid || NUMASpaceResizeRate == 0) {
 642           // Fast adaptation. If no space resize rate is set, resize
 643           // the chunks instantly.
 644           chunk_byte_size = adaptive_chunk_size(i, 0);
 645         } else {
 646           // Slow adaptation. Resize the chunks moving no more than
 647           // NUMASpaceResizeRate bytes per collection.
 648           size_t limit = NUMASpaceResizeRate /
 649                          (lgrp_spaces()->length() * (lgrp_spaces()->length() + 1) / 2);
 650           chunk_byte_size = adaptive_chunk_size(i, MAX2(limit * (i + 1), page_size()));
 651         }
 652 
 653       assert(chunk_byte_size >= page_size(), "Chunk size too small");
 654       assert(chunk_byte_size <= capacity_in_bytes(), "Sanity check");
 655     }
 656 
 657     if (i == 0) { // Bottom chunk
 658       if (i != lgrp_spaces()->length() - 1) {
 659         new_region = MemRegion(bottom(), rounded_bottom + (chunk_byte_size >> LogHeapWordSize));
 660       } else {
 661         new_region = MemRegion(bottom(), end());
 662       }
 663     } else
 664       if (i < lgrp_spaces()->length() - 1) { // Middle chunks
 665         MutableSpace *ps = lgrp_spaces()->at(i - 1)->space();
 666         new_region = MemRegion(ps->end(),
 667                                ps->end() + (chunk_byte_size >> LogHeapWordSize));
 668       } else { // Top chunk
 669         MutableSpace *ps = lgrp_spaces()->at(i - 1)->space();
 670         new_region = MemRegion(ps->end(), end());
 671       }
 672     guarantee(region().contains(new_region), "Region invariant");
 673 
 674 
 675     // The general case:
 676     // |---------------------|--invalid---|--------------------------|
 677     // |------------------new_region---------------------------------|
 678     // |----bottom_region--|---intersection---|------top_region------|
 679     //                     |----old_region----|
 680     // The intersection part has all pages in place we don't need to migrate them.
 681     // Pages for the top and bottom part should be freed and then reallocated.
 682 
 683     MemRegion intersection = old_region.intersection(new_region);
 684 
 685     if (intersection.start() == NULL || intersection.end() == NULL) {
 686       intersection = MemRegion(new_region.start(), new_region.start());
 687     }
 688 
 689     if (!os::numa_has_static_binding()) {
 690       MemRegion invalid_region = ls->invalid_region().intersection(new_region);
 691       // Invalid region is a range of memory that could've possibly
 692       // been allocated on the other node. That's relevant only on Solaris where
 693       // there is no static memory binding.
 694       if (!invalid_region.is_empty()) {
 695         merge_regions(new_region, &intersection, &invalid_region);
 696         free_region(invalid_region);
 697         ls->set_invalid_region(MemRegion());
 698       }
 699     }
 700 
 701     select_tails(new_region, intersection, &bottom_region, &top_region);
 702 
 703     if (!os::numa_has_static_binding()) {
 704       // If that's a system with the first-touch policy then it's enough
 705       // to free the pages.
 706       free_region(bottom_region);
 707       free_region(top_region);
 708     } else {
 709       // In a system with static binding we have to change the bias whenever
 710       // we reshape the heap.
 711       bias_region(bottom_region, ls->lgrp_id());
 712       bias_region(top_region, ls->lgrp_id());
 713     }
 714 
 715     // Clear space (set top = bottom) but never mangle.
 716     s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle, MutableSpace::DontSetupPages);
 717 
 718     set_adaptation_cycles(samples_count());
 719   }
 720 }
 721 
 722 // Set the top of the whole space.
 723 // Mark the the holes in chunks below the top() as invalid.
 724 void MutableNUMASpace::set_top(HeapWord* value) {
 725   bool found_top = false;
 726   for (int i = 0; i < lgrp_spaces()->length();) {
 727     LGRPSpace *ls = lgrp_spaces()->at(i);
 728     MutableSpace *s = ls->space();
 729     HeapWord *top = MAX2(align_down(s->top(), page_size()), s->bottom());
 730 
 731     if (s->contains(value)) {
 732       // Check if setting the chunk's top to a given value would create a hole less than
 733       // a minimal object; assuming that's not the last chunk in which case we don't care.
 734       if (i < lgrp_spaces()->length() - 1) {
 735         size_t remainder = pointer_delta(s->end(), value);
 736         const size_t min_fill_size = CollectedHeap::min_fill_size();
 737         if (remainder < min_fill_size && remainder > 0) {
 738           // Add a minimum size filler object; it will cross the chunk boundary.
 739           CollectedHeap::fill_with_object(value, min_fill_size);
 740           value += min_fill_size;
 741           assert(!s->contains(value), "Should be in the next chunk");
 742           // Restart the loop from the same chunk, since the value has moved
 743           // to the next one.
 744           continue;
 745         }
 746       }
 747 
 748       if (!os::numa_has_static_binding() && top < value && top < s->end()) {
 749         ls->add_invalid_region(MemRegion(top, value));
 750       }
 751       s->set_top(value);
 752       found_top = true;
 753     } else {
 754         if (found_top) {
 755             s->set_top(s->bottom());
 756         } else {
 757           if (!os::numa_has_static_binding() && top < s->end()) {
 758             ls->add_invalid_region(MemRegion(top, s->end()));
 759           }
 760           s->set_top(s->end());
 761         }
 762     }
 763     i++;
 764   }
 765   MutableSpace::set_top(value);
 766 }
 767 
 768 void MutableNUMASpace::clear(bool mangle_space) {
 769   MutableSpace::set_top(bottom());
 770   for (int i = 0; i < lgrp_spaces()->length(); i++) {
 771     // Never mangle NUMA spaces because the mangling will
 772     // bind the memory to a possibly unwanted lgroup.
 773     lgrp_spaces()->at(i)->space()->clear(SpaceDecorator::DontMangle);
 774   }
 775 }
 776 
 777 /*
 778    Linux supports static memory binding, therefore the most part of the
 779    logic dealing with the possible invalid page allocation is effectively
 780    disabled. Besides there is no notion of the home node in Linux. A
 781    thread is allowed to migrate freely. Although the scheduler is rather
 782    reluctant to move threads between the nodes. We check for the current
 783    node every allocation. And with a high probability a thread stays on
 784    the same node for some time allowing local access to recently allocated
 785    objects.
 786  */
 787 
 788 HeapWord* MutableNUMASpace::allocate(size_t size) {
 789   Thread* thr = Thread::current();
 790   int lgrp_id = thr->lgrp_id();
 791   if (lgrp_id == -1 || !os::numa_has_group_homing()) {
 792     lgrp_id = os::numa_get_group_id();
 793     thr->set_lgrp_id(lgrp_id);
 794   }
 795 
 796   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
 797 
 798   // It is possible that a new CPU has been hotplugged and
 799   // we haven't reshaped the space accordingly.
 800   if (i == -1) {
 801     i = os::random() % lgrp_spaces()->length();
 802   }
 803 
 804   LGRPSpace* ls = lgrp_spaces()->at(i);
 805   MutableSpace *s = ls->space();
 806   HeapWord *p = s->allocate(size);
 807 
 808   if (p != NULL) {
 809     size_t remainder = s->free_in_words();
 810     if (remainder < CollectedHeap::min_fill_size() && remainder > 0) {
 811       s->set_top(s->top() - size);
 812       p = NULL;
 813     }
 814   }
 815   if (p != NULL) {
 816     if (top() < s->top()) { // Keep _top updated.
 817       MutableSpace::set_top(s->top());
 818     }
 819   }
 820   // Make the page allocation happen here if there is no static binding..
 821   if (p != NULL && !os::numa_has_static_binding()) {
 822     for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
 823       *(int*)i = 0;
 824     }
 825   }
 826   if (p == NULL) {
 827     ls->set_allocation_failed();
 828   }
 829   return p;
 830 }
 831 
 832 // This version is lock-free.
 833 HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
 834   Thread* thr = Thread::current();
 835   int lgrp_id = thr->lgrp_id();
 836   if (lgrp_id == -1 || !os::numa_has_group_homing()) {
 837     lgrp_id = os::numa_get_group_id();
 838     thr->set_lgrp_id(lgrp_id);
 839   }
 840 
 841   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
 842   // It is possible that a new CPU has been hotplugged and
 843   // we haven't reshaped the space accordingly.
 844   if (i == -1) {
 845     i = os::random() % lgrp_spaces()->length();
 846   }
 847   LGRPSpace *ls = lgrp_spaces()->at(i);
 848   MutableSpace *s = ls->space();
 849   HeapWord *p = s->cas_allocate(size);
 850   if (p != NULL) {
 851     size_t remainder = pointer_delta(s->end(), p + size);
 852     if (remainder < CollectedHeap::min_fill_size() && remainder > 0) {
 853       if (s->cas_deallocate(p, size)) {
 854         // We were the last to allocate and created a fragment less than
 855         // a minimal object.
 856         p = NULL;
 857       } else {
 858         guarantee(false, "Deallocation should always succeed");
 859       }
 860     }
 861   }
 862   if (p != NULL) {
 863     HeapWord* cur_top, *cur_chunk_top = p + size;
 864     while ((cur_top = top()) < cur_chunk_top) { // Keep _top updated.
 865       if (Atomic::cmpxchg(cur_chunk_top, top_addr(), cur_top) == cur_top) {
 866         break;
 867       }
 868     }
 869   }
 870 
 871   // Make the page allocation happen here if there is no static binding.
 872   if (p != NULL && !os::numa_has_static_binding() ) {
 873     for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
 874       *(int*)i = 0;
 875     }
 876   }
 877   if (p == NULL) {
 878     ls->set_allocation_failed();
 879   }
 880   return p;
 881 }
 882 
 883 void MutableNUMASpace::print_short_on(outputStream* st) const {
 884   MutableSpace::print_short_on(st);
 885   st->print(" (");
 886   for (int i = 0; i < lgrp_spaces()->length(); i++) {
 887     st->print("lgrp %d: ", lgrp_spaces()->at(i)->lgrp_id());
 888     lgrp_spaces()->at(i)->space()->print_short_on(st);
 889     if (i < lgrp_spaces()->length() - 1) {
 890       st->print(", ");
 891     }
 892   }
 893   st->print(")");
 894 }
 895 
 896 void MutableNUMASpace::print_on(outputStream* st) const {
 897   MutableSpace::print_on(st);
 898   for (int i = 0; i < lgrp_spaces()->length(); i++) {
 899     LGRPSpace *ls = lgrp_spaces()->at(i);
 900     st->print("    lgrp %d", ls->lgrp_id());
 901     ls->space()->print_on(st);
 902     if (NUMAStats) {
 903       for (int i = 0; i < lgrp_spaces()->length(); i++) {
 904         lgrp_spaces()->at(i)->accumulate_statistics(page_size());
 905       }
 906       st->print("    local/remote/unbiased/uncommitted: " SIZE_FORMAT "K/"
 907                 SIZE_FORMAT "K/" SIZE_FORMAT "K/" SIZE_FORMAT
 908                 "K, large/small pages: " SIZE_FORMAT "/" SIZE_FORMAT "\n",
 909                 ls->space_stats()->_local_space / K,
 910                 ls->space_stats()->_remote_space / K,
 911                 ls->space_stats()->_unbiased_space / K,
 912                 ls->space_stats()->_uncommited_space / K,
 913                 ls->space_stats()->_large_pages,
 914                 ls->space_stats()->_small_pages);
 915     }
 916   }
 917 }
 918 
 919 void MutableNUMASpace::verify() {
 920   // This can be called after setting an arbitrary value to the space's top,
 921   // so an object can cross the chunk boundary. We ensure the parsability
 922   // of the space and just walk the objects in linear fashion.
 923   ensure_parsability();
 924   MutableSpace::verify();
 925 }
 926 
 927 // Scan pages and gather stats about page placement and size.
 928 void MutableNUMASpace::LGRPSpace::accumulate_statistics(size_t page_size) {
 929   clear_space_stats();
 930   char *start = (char*)align_up(space()->bottom(), page_size);
 931   char* end = (char*)align_down(space()->end(), page_size);
 932   if (start < end) {
 933     for (char *p = start; p < end;) {
 934       os::page_info info;
 935       if (os::get_page_info(p, &info)) {
 936         if (info.size > 0) {
 937           if (info.size > (size_t)os::vm_page_size()) {
 938             space_stats()->_large_pages++;
 939           } else {
 940             space_stats()->_small_pages++;
 941           }
 942           if (info.lgrp_id == lgrp_id()) {
 943             space_stats()->_local_space += info.size;
 944           } else {
 945             space_stats()->_remote_space += info.size;
 946           }
 947           p += info.size;
 948         } else {
 949           p += os::vm_page_size();
 950           space_stats()->_uncommited_space += os::vm_page_size();
 951         }
 952       } else {
 953         return;
 954       }
 955     }
 956   }
 957   space_stats()->_unbiased_space = pointer_delta(start, space()->bottom(), sizeof(char)) +
 958                                    pointer_delta(space()->end(), end, sizeof(char));
 959 
 960 }
 961 
 962 // Scan page_count pages and verify if they have the right size and right placement.
 963 // If invalid pages are found they are freed in hope that subsequent reallocation
 964 // will be more successful.
 965 void MutableNUMASpace::LGRPSpace::scan_pages(size_t page_size, size_t page_count)
 966 {
 967   char* range_start = (char*)align_up(space()->bottom(), page_size);
 968   char* range_end = (char*)align_down(space()->end(), page_size);
 969 
 970   if (range_start > last_page_scanned() || last_page_scanned() >= range_end) {
 971     set_last_page_scanned(range_start);
 972   }
 973 
 974   char *scan_start = last_page_scanned();
 975   char* scan_end = MIN2(scan_start + page_size * page_count, range_end);
 976 
 977   os::page_info page_expected, page_found;
 978   page_expected.size = page_size;
 979   page_expected.lgrp_id = lgrp_id();
 980 
 981   char *s = scan_start;
 982   while (s < scan_end) {
 983     char *e = os::scan_pages(s, (char*)scan_end, &page_expected, &page_found);
 984     if (e == NULL) {
 985       break;
 986     }
 987     if (e != scan_end) {
 988       assert(e < scan_end, "e: " PTR_FORMAT " scan_end: " PTR_FORMAT, p2i(e), p2i(scan_end));
 989 
 990       if ((page_expected.size != page_size || page_expected.lgrp_id != lgrp_id())
 991           && page_expected.size != 0) {
 992         os::free_memory(s, pointer_delta(e, s, sizeof(char)), page_size);
 993       }
 994       page_expected = page_found;
 995     }
 996     s = e;
 997   }
 998 
 999   set_last_page_scanned(scan_end);
1000 }