1 /*
   2  * Copyright (c) 2016, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/gcLocker.hpp"
  27 #include "memory/vtBuffer.hpp"
  28 #include "oops/oop.inline.hpp"
  29 #include "oops/valueKlass.hpp"
  30 #include "runtime/frame.hpp"
  31 #include "runtime/thread.hpp"
  32 #include "utilities/globalDefinitions.hpp"
  33 #include "utilities/ticks.hpp"
  34 #include "utilities/ticks.inline.hpp"
  35 
  36 VTBufferChunk* VTBuffer::_free_list = NULL;
  37 Mutex* VTBuffer::_pool_lock = new Mutex(Mutex::leaf, "VTBuffer::_pool_lock", true, Monitor::_safepoint_check_never);
  38 int VTBuffer::_pool_counter = 0;
  39 int VTBuffer::_max_pool_counter = 0;
  40 int VTBuffer::_total_allocated = 0;
  41 int VTBuffer::_total_deallocated = 0;
  42 
  43 oop VTBuffer::allocate_value(ValueKlass* k, TRAPS) {
  44   assert(THREAD->is_Java_thread(), "Only JavaThreads have a buffer for value types");
  45   JavaThread* thread = (JavaThread*)THREAD;
  46   if (thread->vt_alloc_ptr() == NULL) {
  47     if (!allocate_vt_chunk(thread)) {
  48       return NULL; // will trigger fall back strategy: allocation in Java heap
  49     }
  50   }
  51   assert(thread->vt_alloc_ptr() != NULL, "should not be null if chunk allocation was successful");
  52   int size_in_bytes = k->size_helper() * wordSize;
  53   if ((char*)thread->vt_alloc_ptr() + size_in_bytes  >= thread->vt_alloc_limit()) {
  54     if (size_in_bytes > (int)VTBufferChunk::max_alloc_size()) {
  55       // Too big to be allocated in a buffer
  56       return NULL;
  57     }
  58     if (!allocate_vt_chunk(thread)) {
  59       return NULL; // will trigger fall back strategy: allocation in Java heap
  60     }
  61   }
  62   assert((char*)thread->vt_alloc_ptr() + size_in_bytes < thread->vt_alloc_limit(),"otherwise the logic above is wrong");
  63   oop new_vt = (oop)thread->vt_alloc_ptr();
  64   int size_in_words = k->size_helper();
  65   thread->increment_vtchunk_total_memory_buffered(size_in_words * HeapWordSize);
  66   int increment = align_object_size(size_in_words);
  67   void* new_ptr = (char*)thread->vt_alloc_ptr() + increment * HeapWordSize;
  68   new_ptr = MIN2(new_ptr, thread->vt_alloc_limit());
  69   assert(VTBufferChunk::chunk(new_ptr) == VTBufferChunk::chunk(thread->vt_alloc_ptr()),
  70       "old and new alloc ptr must be in the same chunk");
  71   thread->set_vt_alloc_ptr(new_ptr);
  72   // the value and its header must be initialized before being returned!!!
  73   memset(((char*)(oopDesc*)new_vt), 0, size_in_bytes);
  74   new_vt->set_klass(k);
  75   new_vt->set_mark(markOop(k->java_mirror()));
  76   return new_vt;
  77 }
  78 
  79 bool VTBuffer::allocate_vt_chunk(JavaThread* thread) {
  80   VTBufferChunk* new_chunk = NULL;
  81   // Trying local cache;
  82   if (thread->local_free_chunk() != NULL) {
  83     new_chunk = thread->local_free_chunk();
  84     thread->set_local_free_chunk(NULL);
  85   } else {
  86     // Trying global pool
  87     MutexLockerEx ml(_pool_lock, Mutex::_no_safepoint_check_flag);
  88     if (_free_list != NULL) {
  89       new_chunk = _free_list;
  90       _free_list = new_chunk->next();
  91       if (_free_list != NULL) {
  92         _free_list->set_prev(NULL);
  93       }
  94       new_chunk->set_next(NULL);
  95       _pool_counter--;
  96     } else {
  97       // A new chunk has to be allocated
  98       // Done with _pool_lock to maintain counters
  99       if ((_total_allocated + 1) > ValueTypesBufferMaxMemory) {
 100         // Maximum memory for value types buffer has been reached
 101         // fallback to Java heap allocations
 102         return false;
 103       }
 104       new_chunk = new VTBufferChunk(thread);
 105       _total_allocated++;
 106     }
 107   }
 108   if (new_chunk == NULL) return false; // allocation failed
 109   VTBufferChunk* current = thread->current_chunk();
 110   assert(new_chunk->owner() == thread || new_chunk->owner()== NULL, "Sanity check");
 111   assert(new_chunk->index() == -1, "Sanity check");
 112   new_chunk->set_owner(thread);
 113   if(current != NULL) {
 114     new_chunk->set_prev(current);
 115     new_chunk->set_index(current->index() + 1);
 116     current->set_next(new_chunk);
 117   } else {
 118     new_chunk->set_index(0);
 119   }
 120   thread->increment_vtchunk_in_use();
 121   thread->set_vt_alloc_ptr(new_chunk->first_alloc());
 122   thread->set_vt_alloc_limit(new_chunk->alloc_limit());
 123   return true; // allocation was successful
 124 }
 125 
 126 void VTBuffer::recycle_chunk(JavaThread* thread, VTBufferChunk* chunk) {
 127   if (thread->local_free_chunk() == NULL) {
 128     chunk->set_prev(NULL);
 129     chunk->set_next(NULL);
 130     chunk->set_index(-1);
 131     thread->set_local_free_chunk(chunk);
 132   } else {
 133     return_vt_chunk(thread, chunk);
 134   }
 135   thread->decrement_vtchunk_in_use();
 136 }
 137 
 138 // This is the main way to recycle VTBuffer memory, it is called from
 139 // remove_activation() when an interpreter frame is about to be removed
 140 // from the stack. All memory used in the context of this frame is freed,
 141 // and the vt_alloc_ptr is restored to the value it had when the frame
 142 // was created (modulo a possible adjustment if a value is being returned)
 143 void VTBuffer::recycle_vtbuffer(JavaThread* thread, frame current_frame) {
 144   address current_ptr = (address)thread->vt_alloc_ptr();
 145   assert(current_ptr != NULL, "Should not reach here if NULL");
 146   VTBufferChunk* current_chunk = VTBufferChunk::chunk(current_ptr);
 147   assert(current_chunk->owner() == thread, "Sanity check");
 148   address previous_ptr = (address)current_frame.interpreter_frame_vt_alloc_ptr();
 149   if (previous_ptr == NULL) {
 150     // vt_alloc_ptr has not been initialized in this frame
 151     // let's initialize it to the first_alloc() value of the first chunk
 152     VTBufferChunk* first_chunk = current_chunk;
 153     while (first_chunk->prev() != NULL) {
 154       first_chunk = first_chunk->prev();
 155     }
 156     previous_ptr = (address)first_chunk->first_alloc();
 157   }
 158   assert(previous_ptr != NULL, "Should not reach here if NULL");
 159   VTBufferChunk* previous_chunk = VTBufferChunk::chunk(previous_ptr);
 160   assert(previous_chunk->owner() == thread, "Sanity check");
 161   if (current_ptr == previous_ptr) return;
 162   assert(current_chunk != previous_chunk || current_ptr >= previous_ptr, "Sanity check");
 163   VTBufferChunk* del = previous_chunk->next();
 164   previous_chunk->set_next(NULL);
 165   thread->set_vt_alloc_ptr(previous_ptr);
 166   thread->set_vt_alloc_limit(previous_chunk->alloc_limit());
 167   while (del != NULL) {
 168     VTBufferChunk* temp = del->next();
 169     VTBuffer::recycle_chunk(thread, del);
 170     del = temp;
 171   }
 172 }
 173 
 174 void VTBuffer::return_vt_chunk(JavaThread* thread, VTBufferChunk* chunk) {
 175   chunk->set_prev(NULL);
 176   chunk->set_owner(NULL);
 177   chunk->set_index(-1);
 178   MutexLockerEx ml(_pool_lock, Mutex::_no_safepoint_check_flag);
 179   if (_pool_counter < _max_free_list) {
 180     if (_free_list != NULL) {
 181       chunk->set_next(_free_list);
 182       _free_list->set_prev(chunk);
 183       _free_list = chunk;
 184     } else {
 185       chunk->set_next(NULL);
 186       _free_list = chunk;
 187     }
 188     _pool_counter++;
 189     if (_pool_counter > _max_pool_counter) {
 190       _max_pool_counter = _pool_counter;
 191     }
 192   } else {
 193     delete chunk;
 194     _total_deallocated++;
 195   }
 196   thread->increment_vtchunk_returned();
 197 }
 198 
 199 bool VTBuffer::value_belongs_to_frame(oop p, frame* f) {
 200   // the code below assumes that frame f is the last interpreted frame
 201   // on the execution stack
 202   int p_chunk_idx = VTBufferChunk::chunk(p)->index();
 203   int frame_first_chunk_idx;
 204   if (f->interpreter_frame_vt_alloc_ptr() != NULL) {
 205     frame_first_chunk_idx = VTBufferChunk::chunk(f->interpreter_frame_vt_alloc_ptr())->index();
 206   } else {
 207     frame_first_chunk_idx = 0;
 208   }
 209   if (p_chunk_idx == frame_first_chunk_idx) {
 210     return (intptr_t*)p >= f->interpreter_frame_vt_alloc_ptr();
 211   } else {
 212     return  p_chunk_idx > frame_first_chunk_idx;
 213   }
 214 
 215 }
 216 
 217 void VTBuffer::fix_frame_vt_alloc_ptr(frame f, VTBufferChunk* chunk) {
 218   assert(f.is_interpreted_frame(), "recycling can only be triggered from interpreted frames");
 219   assert(chunk != NULL, "Should not be called if null");
 220   while (chunk->prev() != NULL) {
 221     chunk = chunk->prev();
 222   }
 223   f.interpreter_frame_set_vt_alloc_ptr((intptr_t*)chunk->first_alloc());
 224 }
 225 
 226 extern "C" {
 227   static int compare_reloc_entries(const void* void_a, const void* void_b) {
 228     struct VT_relocation_entry* entry_a = (struct VT_relocation_entry*)void_a;
 229     struct VT_relocation_entry* entry_b = (struct VT_relocation_entry*)void_b;
 230     if (entry_a->chunk_index == entry_b->chunk_index) {
 231       if (entry_a->old_ptr < entry_b->old_ptr) {
 232         return -1;
 233       } else {
 234         return 1;
 235       }
 236     } else {
 237       if (entry_a->chunk_index < entry_b->chunk_index) {
 238         return -1;
 239       } else {
 240         return 1;
 241       }
 242     }
 243   }
 244 }
 245 
 246 void dump_reloc_table(struct VT_relocation_entry* table, int nelem, bool print_new_ptr) {
 247   ResourceMark rm;
 248   for (int i = 0; i < nelem; i++) {
 249           InstanceKlass* ik = InstanceKlass::cast(((oop)table[i].old_ptr)->klass());
 250     tty->print("%d:\t%p\t%d\t%s\t%x", i, table[i].old_ptr, table[i].chunk_index,
 251                 ik->name()->as_C_string(), ik->size_helper() * HeapWordSize);
 252     if (print_new_ptr) {
 253         tty->print_cr("\t%p\t%d\n", table[i].new_ptr, VTBufferChunk::chunk(table[i].new_ptr)->index());
 254     } else {
 255         tty->print_cr("");
 256     }
 257   }
 258 }
 259 
 260 // Relocate value 'old' after value 'previous'
 261 address VTBuffer::relocate_value(address old, address previous, int previous_size_in_words) {
 262   InstanceKlass* ik_old = InstanceKlass::cast(((oop)old)->klass());
 263   assert(ik_old->is_value(), "Sanity check");
 264   VTBufferChunk* chunk = VTBufferChunk::chunk(previous);
 265   address next_alloc = previous + align_object_size(ik_old->size_helper());
 266   if(next_alloc + ik_old->size_helper() * HeapWordSize < chunk->alloc_limit()) {
 267     // relocation can be performed in the same chunk
 268     return previous + align_object_size(previous_size_in_words) * HeapWordSize;
 269   } else {
 270     // relocation must be performed in the next chunk
 271     VTBufferChunk* next_chunk = chunk->next();
 272     assert(next_chunk != NULL, "Because we are compacting, there should be enough in use chunks");
 273     return (address)next_chunk->first_alloc();
 274   }
 275 }
 276 
 277 oop VTBuffer::relocate_return_value(JavaThread* thread, frame current_frame, oop obj) {
 278   assert(!Universe::heap()->is_in_reserved(obj), "This method should never be called on Java heap allocated values");
 279   assert(obj->klass()->is_value(), "Sanity check");
 280   ValueKlass* vk = ValueKlass::cast(obj->klass());
 281   address current_ptr = (address)thread->vt_alloc_ptr();
 282   VTBufferChunk* current_chunk = VTBufferChunk::chunk(current_ptr);
 283   address previous_ptr = (address)current_frame.interpreter_frame_vt_alloc_ptr();
 284   if (previous_ptr == NULL) {
 285     fix_frame_vt_alloc_ptr(current_frame, current_chunk);
 286     previous_ptr = (address)current_frame.interpreter_frame_vt_alloc_ptr();
 287   }
 288   VTBufferChunk* previous_chunk = VTBufferChunk::chunk(previous_ptr);
 289   address dest;
 290   if ((address)obj != previous_ptr) {
 291     if (previous_chunk == current_chunk
 292         || (previous_ptr + vk->size_helper() * wordSize) < previous_chunk->alloc_limit()) {
 293       dest = previous_ptr;
 294     } else {
 295       assert(previous_chunk->next() != NULL, "Should not happen");
 296       dest = (address)previous_chunk->next()->first_alloc();
 297     }
 298     // Copying header
 299     memcpy(dest, obj, vk->first_field_offset());
 300     // Copying value content
 301     vk->value_store(((char*)(address)obj) + vk->first_field_offset(),
 302                     dest + vk->first_field_offset(), false, true);
 303   } else {
 304     dest = (address)obj;
 305   }
 306   address new_alloc_ptr = dest + vk->size_helper() * wordSize;
 307   current_frame.interpreter_frame_set_vt_alloc_ptr((intptr_t*)new_alloc_ptr);
 308   VTBufferChunk* last = VTBufferChunk::chunk(dest);
 309   VTBufferChunk* del = last->next();
 310   thread->set_vt_alloc_ptr(new_alloc_ptr);
 311   thread->set_vt_alloc_limit(last->alloc_limit());
 312   last->set_next(NULL);
 313   while (del != NULL) {
 314     VTBufferChunk* tmp = del->next();
 315     VTBuffer::recycle_chunk(thread, del);
 316     del = tmp;
 317   }
 318   return (oop)dest;
 319 }
 320 
 321 // This method is called to recycle VTBuffer memory when the VM has detected
 322 // that too much memory is being consumed in the current frame context. This
 323 // can only happen when the method contains at least one loop in which new
 324 // values are created.
 325 void VTBuffer::recycle_vt_in_frame(JavaThread* thread, frame* f) {
 326   Ticks begin, end;
 327   Ticks step1, step2, step3, step4, step5, step6, step7;
 328   int returned_chunks = 0;
 329 
 330   if (ReportVTBufferRecyclingTimes) {
 331     begin = Ticks::now();
 332   }
 333   assert(f->is_interpreted_frame(), "only interpreted frames are using VT buffering so far");
 334   ResourceMark rm(thread);
 335 
 336   // 1 - allocate relocation table
 337   Method* m = f->interpreter_frame_method();
 338   int max_entries = m->max_locals() + m->max_stack();
 339   VT_relocation_entry* reloc_table = NEW_RESOURCE_ARRAY_IN_THREAD(thread, struct VT_relocation_entry, max_entries);
 340   int n_entries = 0;
 341   if (ReportVTBufferRecyclingTimes) {
 342     step1 = Ticks::now();
 343   }
 344 
 345   {
 346     // No GC should occur during the phases 2->5
 347     // either because the mark word (usually containing the pointer
 348     // to the Java mirror) is used for marking, or because the values are being relocated
 349     NoSafepointVerifier nsv;
 350 
 351     // 2 - marking phase + populate relocation table
 352     BufferedValuesMarking marking_closure = BufferedValuesMarking(f, reloc_table, max_entries, &n_entries);
 353     f->buffered_values_interpreted_do(&marking_closure);
 354     if (ReportVTBufferRecyclingTimes) {
 355       step2 = Ticks::now();
 356     }
 357 
 358     if (n_entries > 0) {
 359       // 3 - sort relocation table entries and compute compaction
 360       qsort(reloc_table, n_entries, sizeof(struct VT_relocation_entry), compare_reloc_entries);
 361       if (f->interpreter_frame_vt_alloc_ptr() == NULL) {
 362         VTBufferChunk* chunk = VTBufferChunk::chunk(reloc_table[0].old_ptr);
 363         while (chunk->prev() != NULL) chunk = chunk->prev();
 364         //f->interpreter_frame_set_vt_alloc_ptr((intptr_t*)chunk->first_alloc());
 365         reloc_table[0].new_ptr = (address)chunk->first_alloc();
 366       } else {
 367         reloc_table[0].new_ptr = (address)f->interpreter_frame_vt_alloc_ptr();
 368       }
 369       ((oop)reloc_table[0].old_ptr)->set_mark((markOop)reloc_table[0].new_ptr);
 370       for (int i = 1; i < n_entries; i++) {
 371         reloc_table[i].new_ptr = relocate_value(reloc_table[i].old_ptr, reloc_table[i-1].new_ptr,
 372             InstanceKlass::cast(((oop)reloc_table[i-1].old_ptr)->klass())->size_helper());
 373         ((oop)reloc_table[i].old_ptr)->set_mark((markOop)reloc_table[i].new_ptr);
 374       }
 375       if (ReportVTBufferRecyclingTimes) {
 376         step3 = Ticks::now();
 377       }
 378 
 379       // 4 - update pointers
 380       BufferedValuesPointersUpdate update_closure = BufferedValuesPointersUpdate(f);
 381       f->buffered_values_interpreted_do(&update_closure);
 382       if (ReportVTBufferRecyclingTimes) {
 383         step4 = Ticks::now();
 384       }
 385 
 386       // 5 - relocate values
 387       for (int i = 0; i < n_entries; i++) {
 388         if (reloc_table[i].old_ptr != reloc_table[i].new_ptr) {
 389           InstanceKlass* ik_old = InstanceKlass::cast(((oop)reloc_table[i].old_ptr)->klass());
 390           // instead of memcpy, a value_store() might be required here
 391           memcpy(reloc_table[i].new_ptr, reloc_table[i].old_ptr, ik_old->size_helper() * HeapWordSize);
 392         }
 393         // Resetting the mark word
 394         ((oop)reloc_table[i].new_ptr)->set_mark(markOop(((oop)reloc_table[i].new_ptr)->klass()->java_mirror()));
 395       }
 396       if (ReportVTBufferRecyclingTimes) {
 397         step5 = Ticks::now();
 398       }
 399 
 400       // 6 - update thread allocation pointer
 401       oop last_oop = (oop)reloc_table[n_entries - 1].new_ptr;
 402       InstanceKlass* ik = InstanceKlass::cast(last_oop->klass());
 403       thread->set_vt_alloc_ptr((address)last_oop + ik->size_helper() * HeapWordSize);
 404       thread->set_vt_alloc_limit(VTBufferChunk::chunk(thread->vt_alloc_ptr())->alloc_limit());
 405       if (ReportVTBufferRecyclingTimes) {
 406         step6 = Ticks::now();
 407       }
 408 
 409       // 7 - free/return unused chunks
 410       VTBufferChunk* chunk = VTBufferChunk::chunk(reloc_table[n_entries - 1].new_ptr);
 411       VTBufferChunk* temp = chunk;
 412       chunk = chunk->next();
 413       temp->set_next(NULL);
 414       while (chunk != NULL) {
 415         returned_chunks++;
 416         temp = chunk->next();
 417         VTBuffer::recycle_chunk(thread, chunk);
 418         chunk = temp;
 419       }
 420       if (ReportVTBufferRecyclingTimes) {
 421         step7 = Ticks::now();
 422       }
 423     } else {
 424       f->interpreter_frame_set_vt_alloc_ptr((intptr_t*)thread->vt_alloc_ptr());
 425     }
 426   }
 427 
 428   // 8 - free relocation table
 429   FREE_RESOURCE_ARRAY(struct VT_relocation_entry, reloc_table, max_entries);
 430   if (ReportVTBufferRecyclingTimes) {
 431     end = Ticks::now();
 432     ResourceMark rm(thread);
 433     tty->print_cr("VTBufferRecyling: %s : %s.%s %s : %ldus",
 434         thread->name(),
 435         f->interpreter_frame_method()->klass_name()->as_C_string(),
 436         f->interpreter_frame_method()->name()->as_C_string(),
 437         f->interpreter_frame_method()->signature()->as_C_string(),
 438         (end.value() - begin.value()) / 1000);
 439     tty->print("Step1 : %6ldns ", step1.value() - begin.value());
 440     tty->print("Step2 : %6ldns ", step2.value() - step1.value());
 441     tty->print("Step3 : %6ldns ", step3.value() - step2.value());
 442     tty->print("Step4 : %6ldns ", step4.value() - step3.value());
 443     tty->print("Step5 : %6ldns ", step5.value() - step4.value());
 444     tty->print("Step6 : %6ldns ", step6.value() - step5.value());
 445     tty->print("Step7 : %6ldns ", step7.value() - step6.value());
 446     tty->print("Step8 : %6ldns ", end.value() - step7.value());
 447     tty->print_cr("Returned chunks: %d", returned_chunks);
 448   }
 449 }
 450 
 451 void BufferedValuesMarking::do_buffered_value(oop* p) {
 452   assert(!Universe::heap()->is_in_reserved_or_null(*p), "Sanity check");
 453   if (VTBuffer::value_belongs_to_frame(*p, _frame)) {
 454     if (!(*p)->mark()->is_marked()) {
 455       assert(*_index < _size, "index outside of relocation table range");
 456       _reloc_table[*_index].old_ptr = (address)*p;
 457       _reloc_table[*_index].chunk_index = VTBufferChunk::chunk(*p)->index();
 458       *_index = (*_index) + 1;
 459       (*p)->set_mark((*p)->mark()->set_marked());
 460     }
 461   }
 462 }
 463 
 464 void BufferedValuesPointersUpdate::do_buffered_value(oop* p) {
 465   assert(!Universe::heap()->is_in_reserved_or_null(*p), "Sanity check");
 466   // might be coded more efficiently just by checking mark word is not NULL
 467   if (VTBuffer::value_belongs_to_frame(*p, _frame)) {
 468     *p = (oop)(*p)->mark();
 469   }
 470 }