changeset: 9466:c1b5450e17dd user: rkennke date: Fri Aug 05 08:20:59 2016 -0400 summary: Improve logging of GC phases in Shenandoah. diff -r 81ae2ba6ac6b -r c1b5450e17dd src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Mon Jul 25 09:50:03 2016 +0200 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Fri Aug 05 08:20:59 2016 -0400 @@ -168,26 +168,6 @@ void ShenandoahCollectorPolicy::record_phase_start(TimingPhase phase) { _timing_data[phase]._start = os::elapsedTime(); - if (PrintGCTimeStamps) { - if (phase == init_mark) - _tracer->report_gc_start(GCCause::_shenandoah_init_mark, _conc_timer->gc_start()); - else if (phase == full_gc) - _tracer->report_gc_start(GCCause::_last_ditch_collection, _stw_timer->gc_start()); - - gclog_or_tty->gclog_stamp(_tracer->gc_id()); - gclog_or_tty->print("[GC %s start", _phase_names[phase]); - ShenandoahHeap* heap = (ShenandoahHeap*) Universe::heap(); - - gclog_or_tty->print(" total = " SIZE_FORMAT " K, used = " SIZE_FORMAT " K free = " SIZE_FORMAT " K", heap->capacity()/ K, heap->used() /K, - ((heap->capacity() - heap->used())/K) ); - - if (heap->calculateUsed() != heap->used()) { - gclog_or_tty->print("calc used = " SIZE_FORMAT " K heap used = " SIZE_FORMAT " K", - heap->calculateUsed() / K, heap->used() / K); - } - // assert(heap->calculateUsed() == heap->used(), "Just checking"); - gclog_or_tty->print_cr("]"); - } } void ShenandoahCollectorPolicy::record_phase_end(TimingPhase phase) { @@ -199,40 +179,9 @@ tty->print_cr("PolicyPrint: %s "SIZE_FORMAT" took %lf ms", _phase_names[phase], _timing_data[phase]._count++, elapsed * 1000); } - if (PrintGCTimeStamps) { - ShenandoahHeap* heap = (ShenandoahHeap*) Universe::heap(); - gclog_or_tty->gclog_stamp(_tracer->gc_id()); - - gclog_or_tty->print("[GC %s end, %lf secs", _phase_names[phase], elapsed ); - gclog_or_tty->print(" total = " SIZE_FORMAT " K, used = " SIZE_FORMAT " K free = " SIZE_FORMAT " K", heap->capacity()/ K, heap->used() /K, - ((heap->capacity() - heap->used())/K) ); - - if (heap->calculateUsed() != heap->used()) { - gclog_or_tty->print("calc used = " SIZE_FORMAT " K heap used = " SIZE_FORMAT " K", - heap->calculateUsed() / K, heap->used() / K); - } - // assert(heap->calculateUsed() == heap->used(), "Stashed heap used must be equal to calculated heap used"); - gclog_or_tty->print_cr("]"); - - if (phase == recycle_regions) { - _tracer->report_gc_end(_conc_timer->gc_end(), _conc_timer->time_partitions()); - } else if (phase == full_gc) { - _tracer->report_gc_end(_stw_timer->gc_end(), _stw_timer->time_partitions()); - } else if (phase == conc_mark || phase == conc_evac || phase == prepare_evac) { - if (_conc_gc_aborted) { - _tracer->report_gc_end(_conc_timer->gc_end(), _conc_timer->time_partitions()); - clear_conc_gc_aborted(); - } - } - } } void ShenandoahCollectorPolicy::report_concgc_cancelled() { - if (PrintGCTimeStamps) { - gclog_or_tty->print("Concurrent GC Cancelled\n"); - set_conc_gc_aborted(); - // _tracer->report_gc_end(_conc_timer->gc_end(), _conc_timer->time_partitions()); - } } void ShenandoahHeuristics::record_bytes_allocated(size_t bytes) { @@ -393,7 +342,8 @@ } if (shouldStartConcurrentMark && ShenandoahTracePhases) { - tty->print_cr("Start GC at available: "SIZE_FORMAT", capacity: "SIZE_FORMAT", used: "SIZE_FORMAT", factor: "UINTX_FORMAT", update-refs: %s", available, free_capacity, free_used, factor, BOOL_TO_STR(heap->need_update_refs())); + gclog_or_tty->print_cr("Start GC at available: "SIZE_FORMAT", capacity: "SIZE_FORMAT", used: "SIZE_FORMAT", factor: "UINTX_FORMAT", update-refs: %s", available, free_capacity, free_used, factor, BOOL_TO_STR(heap->need_update_refs())); + gclog_or_tty->flush(); } return shouldStartConcurrentMark; } diff -r 81ae2ba6ac6b -r c1b5450e17dd src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Jul 25 09:50:03 2016 +0200 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Fri Aug 05 08:20:59 2016 -0400 @@ -210,9 +210,6 @@ _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); } ShenandoahHeap* heap = ShenandoahHeap::heap(); - if (ShenandoahTracePhases && heap->cancelled_concgc()) { - tty->print_cr("Cancelled concurrent marking"); - } } }; diff -r 81ae2ba6ac6b -r c1b5450e17dd src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Mon Jul 25 09:50:03 2016 +0200 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Fri Aug 05 08:20:59 2016 -0400 @@ -21,6 +21,7 @@ * */ +#include "gc_implementation/shared/gcTraceTime.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentThread.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahJNICritical.hpp" @@ -58,6 +59,9 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); + GCTimer* gc_timer = heap->shenandoahPolicy()->conc_timer(); + GCTracer* gc_tracer = heap->tracer(); + GCId gc_id = gc_tracer->gc_id(); while (!_should_terminate) { if (_do_full_gc) { { @@ -78,7 +82,7 @@ } else if (heap->shenandoahPolicy()->should_start_concurrent_mark(heap->used(), heap->capacity())) { - + gc_timer->register_gc_start(); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); TraceMemoryManagerStats tmms(false, GCCause::_no_cause_specified); if (ShenandoahGCVerbose) @@ -94,6 +98,7 @@ heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_mark_gross); } { + GCTraceTime time("Concurrent marking", ShenandoahTracePhases, true, NULL, gc_id); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); ShenandoahHeap::heap()->concurrentMark()->mark_from_roots(); } @@ -107,6 +112,7 @@ } if (! _should_terminate) { + GCTraceTime time("Concurrent evacuation", ShenandoahTracePhases, true, NULL, gc_id); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); heap->do_evacuation(); } @@ -118,6 +124,7 @@ heap->reset_mark_bitmap(); heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::reset_bitmaps); + gc_timer->register_gc_end(); } else { Thread::current()->_ParkEvent->park(10) ; // yield(); diff -r 81ae2ba6ac6b -r c1b5450e17dd src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Mon Jul 25 09:50:03 2016 +0200 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Aug 05 08:20:59 2016 -0400 @@ -295,16 +295,11 @@ }; void ShenandoahHeap::reset_mark_bitmap() { - if (ShenandoahTracePhases) { - tty->print_cr("Shenandoah starting concurrent reset bitmaps"); - } + GCTraceTime time("Concurrent reset bitmaps", ShenandoahTracePhases, true, NULL, tracer()->gc_id()); ResetBitmapTask task = ResetBitmapTask(_ordered_regions); conc_workers()->set_active_workers(_max_conc_workers); conc_workers()->run_task(&task); - if (ShenandoahTracePhases) { - tty->print_cr("Shenandoah finishing concurrent reset bitmaps"); - } } void ShenandoahHeap::reset_mark_bitmap_range(HeapWord* from, HeapWord* to) { @@ -858,9 +853,6 @@ if (_sh->cancelled_concgc()) { // tty->print("We cancelled concgc while working on region %d\n", from_hr->region_number()); // from_hr->print(); - if (ShenandoahTracePhases) { - tty->print_cr("Cancelled concurrent evacuation"); - } break; } from_hr = _cs->claim_next(); @@ -1165,7 +1157,7 @@ _free_regions->print(); */ - if (PrintGCTimeStamps) { + if (ShenandoahPrintCollectionSet) { gclog_or_tty->print("Collection set live = " SIZE_FORMAT " K reclaimable = " SIZE_FORMAT " K\n", _collection_set->live_data() / K, _collection_set->garbage() / K); } @@ -1506,20 +1498,11 @@ void ShenandoahHeap::collect(GCCause::Cause cause) { if (GCCause::is_user_requested_gc(cause)) { if (! DisableExplicitGC) { - if (ShenandoahTraceFullGC) { - gclog_or_tty->print_cr("Shenandoah-full-gc: requested full GC"); - } cancel_concgc(); _concurrent_gc_thread->do_full_gc(cause); } } else if (cause == GCCause::_allocation_failure) { - if (ShenandoahTraceFullGC) { - size_t f_used = free_regions()->used(); - size_t f_capacity = free_regions()->capacity(); - assert(f_used <= f_capacity, "must use less than we have"); - gclog_or_tty->print_cr("Shenandoah-full-gc: full GC for allocation failure heap free: "SIZE_FORMAT", available: "SIZE_FORMAT, capacity() - used(), f_capacity - f_used); - } cancel_concgc(); collector_policy()->set_should_clear_all_soft_refs(true); _concurrent_gc_thread->do_full_gc(cause); @@ -2064,26 +2047,11 @@ } void ShenandoahHeap::set_concurrent_mark_in_progress(bool in_progress) { - if (ShenandoahTracePhases) { - if (in_progress) { - gclog_or_tty->print_cr("Shenandoah starting concurrent marking, heap used: "SIZE_FORMAT" MB", used() / M); - } else { - gclog_or_tty->print_cr("Shenandoah finishing concurrent marking, heap used: "SIZE_FORMAT" MB", used() / M); - } - } - _concurrent_mark_in_progress = in_progress; JavaThread::satb_mark_queue_set().set_active_all_threads(in_progress, ! in_progress); } void ShenandoahHeap::set_evacuation_in_progress(bool in_progress) { - if (ShenandoahTracePhases) { - if (in_progress) { - gclog_or_tty->print_cr("Shenandoah starting concurrent evacuation, heap used: "SIZE_FORMAT" MB", used() / M); - } else { - gclog_or_tty->print_cr("Shenandoah finishing concurrent evacuation, heap used: "SIZE_FORMAT" MB", used() / M); - } - } JavaThread::set_evacuation_in_progress_all_threads(in_progress); _evacuation_in_progress = in_progress; OrderAccess::fence(); @@ -2299,9 +2267,6 @@ void ShenandoahHeap::cancel_concgc() { // only report it once if (!_cancelled_concgc) { - if (ShenandoahTracePhases) { - tty->print_cr("Cancelling GC"); - } _cancelled_concgc = true; OrderAccess::fence(); _shenandoah_policy->report_concgc_cancelled(); diff -r 81ae2ba6ac6b -r c1b5450e17dd src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Mon Jul 25 09:50:03 2016 +0200 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Fri Aug 05 08:20:59 2016 -0400 @@ -22,6 +22,7 @@ */ #include "code/codeCache.hpp" +#include "gc_implementation/shared/gcTraceTime.hpp" #include "gc_implementation/shared/isGCActiveMark.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" @@ -76,6 +77,9 @@ ShenandoahHeap* _heap = ShenandoahHeap::heap(); + GCTimer* gc_timer = _heap->shenandoahPolicy()->conc_timer(); + gc_timer->register_gc_start(); + COMPILER2_PRESENT(DerivedPointerTable::clear()); _heap->set_full_gc_in_progress(true); @@ -106,11 +110,12 @@ } */ - if (ShenandoahTraceFullGC) { - tty->print_cr("Shenandoah-full-gc: start with heap used: "SIZE_FORMAT" MB", _heap->used() / M); - tty->print_cr("Shenandoah-full-gc: phase 1: marking the heap"); - // _heap->print_heap_regions(); - } + BarrierSet* old_bs = oopDesc::bs(); + ShenandoahMarkCompactBarrierSet bs(_heap); + oopDesc::set_bs(&bs); + + { + GCTraceTime time("Pause Init-Mark", ShenandoahTraceFullGC, true, _heap->shenandoahPolicy()->conc_timer(), _heap->tracer()->gc_id()); if (UseTLAB) { _heap->ensure_parsability(true); @@ -122,10 +127,6 @@ // The marking doesn't preserve the marks of biased objects. //BiasedLocking::preserve_marks(); - BarrierSet* old_bs = oopDesc::bs(); - ShenandoahMarkCompactBarrierSet bs(_heap); - oopDesc::set_bs(&bs); - _heap->set_need_update_refs(true); OrderAccess::fence(); @@ -134,27 +135,16 @@ OrderAccess::fence(); - if (ShenandoahTraceFullGC) { - tty->print_cr("Shenandoah-full-gc: phase 2: calculating target addresses"); - } ShenandoahHeapRegionSet* copy_queues[_heap->max_parallel_workers()]; phase2_calculate_target_addresses(copy_queues); OrderAccess::fence(); - if (ShenandoahTraceFullGC) { - tty->print_cr("Shenandoah-full-gc: phase 3: updating references"); - } - // Don't add any more derived pointers during phase3 COMPILER2_PRESENT(DerivedPointerTable::set_active(false)); phase3_update_references(); - if (ShenandoahTraceFullGC) { - tty->print_cr("Shenandoah-full-gc: phase 4: compacting objects"); - } - phase4_compact_objects(copy_queues); CodeCache::gc_epilogue(); @@ -167,19 +157,18 @@ _heap->verify_heap_after_evacuation(); } - if (ShenandoahTraceFullGC) { - tty->print_cr("Shenandoah-full-gc: finish with heap used: "SIZE_FORMAT" MB", _heap->used() / M); - } - _heap->reset_mark_bitmap(); _heap->_bytesAllocSinceCM = 0; _heap->set_need_update_refs(false); _heap->set_full_gc_in_progress(false); + } COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + gc_timer->register_gc_end(); + _heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::full_gc); oopDesc::set_bs(old_bs); @@ -254,6 +243,8 @@ void ShenandoahMarkCompact::phase1_mark_heap() { ShenandoahHeap* _heap = ShenandoahHeap::heap(); + GCTraceTime time("Phase 1: Mark live objects", ShenandoahTraceFullGC, true, _heap->shenandoahPolicy()->conc_timer(), _heap->tracer()->gc_id()); + #ifdef ASSERT ShenandoahMCVerifyBeforeMarkingRegionClosure cl1; _heap->heap_region_iterate(&cl1); @@ -409,6 +400,7 @@ void ShenandoahMarkCompact::phase2_calculate_target_addresses(ShenandoahHeapRegionSet** copy_queues) { ShenandoahHeap* heap = ShenandoahHeap::heap(); + GCTraceTime time("Phase 2: Compute new object addresses", ShenandoahTraceFullGC, true, heap->shenandoahPolicy()->conc_timer(), heap->tracer()->gc_id()); // Initialize copy queues. for (int i = 0; i < heap->max_parallel_workers(); i++) { @@ -505,6 +497,7 @@ void ShenandoahMarkCompact::phase3_update_references() { ShenandoahHeap* heap = ShenandoahHeap::heap(); + GCTraceTime time("Phase 3: Adjust pointers", ShenandoahTraceFullGC, true, heap->shenandoahPolicy()->conc_timer(), heap->tracer()->gc_id()); // Need cleared claim bits for the roots processing ClassLoaderDataGraph::clear_claimed_marks(); @@ -614,6 +607,7 @@ void ShenandoahMarkCompact::phase4_compact_objects(ShenandoahHeapRegionSet** copy_queues) { ShenandoahHeap* heap = ShenandoahHeap::heap(); + GCTraceTime time("Phase 4: Move objects", ShenandoahTraceFullGC, true, heap->shenandoahPolicy()->conc_timer(), heap->tracer()->gc_id()); ShenandoahCompactObjectsTask compact_task(copy_queues); heap->workers()->run_task(&compact_task); diff -r 81ae2ba6ac6b -r c1b5450e17dd src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Mon Jul 25 09:50:03 2016 +0200 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Fri Aug 05 08:20:59 2016 -0400 @@ -21,6 +21,7 @@ * */ +#include "gc_implementation/shared/gcTraceTime.hpp" #include "gc_implementation/shenandoah/shenandoahMarkCompact.hpp" #include "gc_implementation/shenandoah/vm_operations_shenandoah.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" @@ -35,6 +36,7 @@ void VM_ShenandoahInitMark::doit() { ShenandoahHeap *sh = (ShenandoahHeap*) Universe::heap(); + GCTraceTime time("Pause Init-Mark", ShenandoahTracePhases, true, sh->shenandoahPolicy()->conc_timer(), sh->tracer()->gc_id()); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::init_mark); assert(sh->is_bitmap_clear(), "need clear marking bitmap"); @@ -102,6 +104,7 @@ if (ShenandoahGCVerbose) tty->print("vm_ShenandoahFinalMark\n"); + GCTraceTime time("Pause Init-Evacuation", ShenandoahTracePhases, true, sh->shenandoahPolicy()->conc_timer(), sh->tracer()->gc_id()); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::final_mark); sh->concurrentMark()->finish_mark_from_roots(); sh->stop_concurrent_marking(); changeset: 9468:fbb5979a404c user: rkennke date: Fri Nov 04 07:17:46 2016 -0400 summary: Fix interpreter on aarch64. diff -r 18b229dadb81 -r fbb5979a404c src/cpu/aarch64/vm/templateTable_aarch64.cpp --- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp Fri Nov 04 07:06:10 2016 -0400 +++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp Fri Nov 04 07:17:46 2016 -0400 @@ -2697,6 +2697,7 @@ { __ pop(ztos); if (!is_static) pop_and_check_object(obj); + oopDesc::bs()->interpreter_write_barrier(_masm, obj); __ andw(r0, r0, 0x1); __ strb(r0, field); if (!is_static) { changeset: 9469:ec7e1dbcb443 user: rkennke date: Fri Nov 04 07:17:51 2016 -0400 summary: AArchh64: Added missing barriers. diff -r fbb5979a404c -r ec7e1dbcb443 src/cpu/aarch64/vm/interp_masm_aarch64.cpp --- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Fri Nov 04 07:17:46 2016 -0400 +++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Fri Nov 04 07:17:51 2016 -0400 @@ -273,6 +273,7 @@ ldr(result, Address(result, ConstantPool::resolved_references_offset_in_bytes())); // JNIHandles::resolve(obj); ldr(result, Address(result, 0)); + oopDesc::bs()->interpreter_read_barrier_not_null(this, result); // Add in the index add(result, result, tmp); load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); diff -r fbb5979a404c -r ec7e1dbcb443 src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp --- a/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp Fri Nov 04 07:17:46 2016 -0400 +++ b/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp Fri Nov 04 07:17:51 2016 -0400 @@ -83,6 +83,7 @@ // robj ^ rcounter ^ rcounter == robj // robj is address dependent on rcounter. __ ldr(robj, Address(robj, 0)); // *obj + oopDesc::bs()->interpreter_read_barrier(masm, robj); __ lsr(roffset, c_rarg2, 2); // offset assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); diff -r fbb5979a404c -r ec7e1dbcb443 src/cpu/aarch64/vm/methodHandles_aarch64.cpp --- a/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Fri Nov 04 07:17:46 2016 -0400 +++ b/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Fri Nov 04 07:17:51 2016 -0400 @@ -132,10 +132,13 @@ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); // Load the invoker, as MH -> MH.form -> LF.vmentry + oopDesc::bs()->interpreter_read_barrier(_masm, recv); __ verify_oop(recv); __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()))); + oopDesc::bs()->interpreter_read_barrier(_masm, method_temp); __ verify_oop(method_temp); __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); + oopDesc::bs()->interpreter_read_barrier(_masm, method_temp); __ verify_oop(method_temp); // the following assumes that a Method* is normally compressed in the vmtarget field: __ ldr(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); @@ -148,10 +151,11 @@ sizeof(u2), /*is_signed*/ false); // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); Label L; - __ ldr(rscratch1, __ argument_address(temp2, -1)); - __ cmp(recv, rscratch1); + __ ldr(temp2, __ argument_address(temp2, -1)); + __ cmp(recv, temp2); + oopDesc::bs()->asm_acmp_barrier(_masm, recv, temp2); __ br(Assembler::EQ, L); - __ ldr(r0, __ argument_address(temp2, -1)); + __ ldr(r0, temp2); __ hlt(0); __ BIND(L); } @@ -327,6 +331,7 @@ // r13 - interpreter linkage (if interpreted) ??? FIXME // r1 ... r0 - compiler arguments (if compiled) + oopDesc::bs()->interpreter_read_barrier(_masm, member_reg); Label L_incompatible_class_change_error; switch (iid) { case vmIntrinsics::_linkToSpecial: diff -r fbb5979a404c -r ec7e1dbcb443 src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp --- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Fri Nov 04 07:17:46 2016 -0400 +++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Fri Nov 04 07:17:51 2016 -0400 @@ -1984,6 +1984,7 @@ // Get locked oop from the handle we passed to jni __ ldr(obj_reg, Address(oop_handle_reg, 0)); + oopDesc::bs()->interpreter_write_barrier(masm, obj_reg); Label done; diff -r fbb5979a404c -r ec7e1dbcb443 src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp --- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Fri Nov 04 07:17:46 2016 -0400 +++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Fri Nov 04 07:17:51 2016 -0400 @@ -841,6 +841,7 @@ __ ldrw(crc, Address(esp, 4*wordSize)); // Initial CRC } else { __ ldr(buf, Address(esp, 2*wordSize)); // byte[] array + oopDesc::bs()->interpreter_read_barrier_not_null(_masm, buf); __ add(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size __ ldrw(off, Address(esp, wordSize)); // offset __ add(buf, buf, off); // + offset changeset: 9470:6356360317d5 user: rkennke date: Fri Nov 04 07:17:53 2016 -0400 summary: Bugfix: Fix off-by-one in reclaim-humongous. Contributed by Zhengyu Gu. diff -r ec7e1dbcb443 -r 6356360317d5 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Nov 04 07:17:51 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Nov 04 07:17:53 2016 -0400 @@ -1067,7 +1067,7 @@ oop humongous_obj = oop(r->bottom() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE); size_t size = humongous_obj->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; - uint required_regions = (size * HeapWordSize) / ShenandoahHeapRegion::RegionSizeBytes + 1; + uint required_regions = ShenandoahHumongous::required_regions(size * HeapWordSize); uint index = r->region_number(); changeset: 9471:b3ac96859740 user: rkennke date: Fri Nov 04 07:17:54 2016 -0400 summary: Exit gracefully when trying Shenandoah on unsupported platforms. diff -r 6356360317d5 -r b3ac96859740 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Fri Nov 04 07:17:53 2016 -0400 +++ b/src/share/vm/runtime/arguments.cpp Fri Nov 04 07:17:54 2016 -0400 @@ -1725,6 +1725,11 @@ } void Arguments::set_shenandoah_gc_flags() { + +#if !(defined AARCH64 || defined AMD64) + UNSUPPORTED_OPTION(UseShenandoahGC); +#endif + FLAG_SET_DEFAULT(UseDynamicNumberOfGCThreads, true); FLAG_SET_DEFAULT(ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads()); changeset: 9472:4d4db1b520f5 user: rkennke date: Fri Nov 04 07:17:56 2016 -0400 summary: Add ShenandoahGC into GC flag collision test. diff -r b3ac96859740 -r 4d4db1b520f5 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Fri Nov 04 07:17:54 2016 -0400 +++ b/src/share/vm/runtime/arguments.cpp Fri Nov 04 07:17:56 2016 -0400 @@ -2127,6 +2127,7 @@ if (UseConcMarkSweepGC || UseParNewGC) i++; if (UseParallelGC || UseParallelOldGC) i++; if (UseG1GC) i++; + if (UseShenandoahGC) i++; if (i > 1) { jio_fprintf(defaultStream::error_stream(), "Conflicting collector combinations in option list; " changeset: 9473:5f5550235b86 user: rkennke date: Fri Nov 04 07:17:58 2016 -0400 summary: Handle single-threaded GC correctly. diff -r 4d4db1b520f5 -r 5f5550235b86 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Nov 04 07:17:56 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Nov 04 07:17:58 2016 -0400 @@ -2209,11 +2209,11 @@ isAlive.init(ShenandoahHeap::heap()); _ref_processor = new ReferenceProcessor(mr, // span - ParallelRefProcEnabled && (ConcGCThreads > 1), + ParallelRefProcEnabled, // mt processing (int) ConcGCThreads, // degree of mt processing - (ConcGCThreads > 1), + true, // mt discovery (int) ConcGCThreads, // degree of mt discovery changeset: 9474:2adacfde3d42 user: rkennke date: Fri Nov 04 07:17:59 2016 -0400 summary: Throw proper OOMEs instead of crashing on internal asserts. diff -r 5f5550235b86 -r 2adacfde3d42 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Nov 04 07:17:58 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Nov 04 07:17:59 2016 -0400 @@ -633,6 +633,11 @@ return allocate_large_memory(word_size); } + // Not enough memory in free region set. + // Coming out of full GC, it is possible that there is not + // free region available, so current_index may not be valid. + if (word_size * HeapWordSize > _free_regions->capacity()) return NULL; + jlong current_idx = _free_regions->current_index(); assert(current_idx >= 0, "expect >= 0"); ShenandoahHeapRegion* my_current_region = _free_regions->get(current_idx); @@ -681,7 +686,8 @@ HeapWord* ShenandoahHeap::allocate_large_memory(size_t words) { uint required_regions = ShenandoahHumongous::required_regions(words * HeapWordSize); - assert(required_regions <= _max_regions, "sanity check"); + if (required_regions > _max_regions) return NULL; + ShenandoahHeapRegion* r = _free_regions->claim_contiguous(required_regions); HeapWord* result = NULL; changeset: 9475:064cb87d9756 user: rkennke date: Fri Nov 04 07:18:01 2016 -0400 summary: Insert load-load fence in obj-eq-barrier, to prevent brooks ptr loads from floating above comparison. diff -r 2adacfde3d42 -r 064cb87d9756 src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.cpp Fri Nov 04 07:17:59 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.cpp Fri Nov 04 07:18:01 2016 -0400 @@ -275,8 +275,9 @@ bool ShenandoahBarrierSet::obj_equals(oop obj1, oop obj2) { bool eq = oopDesc::unsafe_equals(obj1, obj2); if (! eq) { - obj1 = read_barrier(obj1); - obj2 = read_barrier(obj2); + OrderAccess::loadload(); + obj1 = resolve_oop_static(obj1); + obj2 = resolve_oop_static(obj2); eq = oopDesc::unsafe_equals(obj1, obj2); } return eq; changeset: 9476:48663656550c user: rkennke date: Fri Nov 04 07:20:22 2016 -0400 summary: Make sure to sync local and global evac-in-progress flags correctly. diff -r 064cb87d9756 -r 48663656550c src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Fri Nov 04 07:18:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Fri Nov 04 07:20:22 2016 -0400 @@ -118,6 +118,7 @@ } if (heap->is_evacuation_in_progress()) { + MutexLocker mu(Threads_lock); heap->set_evacuation_in_progress(false); } heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); diff -r 064cb87d9756 -r 48663656550c src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Fri Nov 04 07:18:01 2016 -0400 +++ b/src/share/vm/runtime/thread.cpp Fri Nov 04 07:20:22 2016 -0400 @@ -1963,6 +1963,8 @@ // The dirty card queue should have been constructed with its // active field set to true. assert(dirty_queue.is_active(), "dirty card queue should be active"); + + _evacuation_in_progress = _evacuation_in_progress_global; } bool JavaThread::evacuation_in_progress() const { @@ -1974,8 +1976,9 @@ } void JavaThread::set_evacuation_in_progress_all_threads(bool in_prog) { + assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); _evacuation_in_progress_global = in_prog; - for (JavaThread* t = Threads::first(); t; t = t->next()) { + for (JavaThread* t = Threads::first(); t != NULL; t = t->next()) { t->set_evacuation_in_progress(in_prog); } } changeset: 9477:b1cf900aa021 user: rkennke date: Fri Nov 04 07:21:01 2016 -0400 summary: Optimization of a.getClass() == b.getClass() to a.klass == b.klass needs to take shenandoah's acmp pattern into account diff -r 48663656550c -r b1cf900aa021 src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Fri Nov 04 07:20:22 2016 -0400 +++ b/src/share/vm/opto/phaseX.cpp Fri Nov 04 07:21:01 2016 -0400 @@ -1525,6 +1525,13 @@ Node* imem = use->as_Initialize()->proj_out(TypeFunc::Memory); if (imem != NULL) add_users_to_worklist0(imem); } + + if (use->is_ShenandoahBarrier()) { + Node* cmp = use->find_out_with(Op_CmpP); + if (cmp != NULL) { + _worklist.push(cmp); + } + } } } diff -r 48663656550c -r b1cf900aa021 src/share/vm/opto/shenandoahSupport.cpp --- a/src/share/vm/opto/shenandoahSupport.cpp Fri Nov 04 07:20:22 2016 -0400 +++ b/src/share/vm/opto/shenandoahSupport.cpp Fri Nov 04 07:21:01 2016 -0400 @@ -216,14 +216,17 @@ Node* input = in(Memory); if (input->Opcode() == Op_ShenandoahWBMemProj) { - input = input->in(0); - if (input->is_top()) return NULL; // Dead path. - assert(input->Opcode() == Op_ShenandoahWriteBarrier, "expect write barrier"); - const Type* in_type = phase->type(input); + Node* wb = input->in(0); + if (wb->is_top()) return NULL; // Dead path. + assert(wb->Opcode() == Op_ShenandoahWriteBarrier, "expect write barrier"); + const Type* in_type = phase->type(wb); const Type* this_type = phase->type(this); if (is_independent(in_type, this_type)) { - phase->igvn_rehash_node_delayed(input); - set_req(Memory, input->in(Memory)); + phase->igvn_rehash_node_delayed(wb); + set_req(Memory, wb->in(Memory)); + if (can_reshape && input->outcnt() == 0) { + phase->is_IterGVN()->_worklist.push(input); + } return this; } } diff -r 48663656550c -r b1cf900aa021 src/share/vm/opto/subnode.cpp --- a/src/share/vm/opto/subnode.cpp Fri Nov 04 07:20:22 2016 -0400 +++ b/src/share/vm/opto/subnode.cpp Fri Nov 04 07:21:01 2016 -0400 @@ -34,6 +34,7 @@ #include "opto/mulnode.hpp" #include "opto/opcodes.hpp" #include "opto/phaseX.hpp" +#include "opto/shenandoahSupport.hpp" #include "opto/subnode.hpp" #include "runtime/sharedRuntime.hpp" @@ -787,12 +788,11 @@ return TypeInt::CC; } -static inline Node* isa_java_mirror_load(PhaseGVN* phase, Node* n) { +static inline Node* isa_java_mirror_load_helper(PhaseGVN* phase, Node* n) { // Return the klass node for // LoadP(AddP(foo:Klass, #java_mirror)) // or NULL if not matching. - if (n->Opcode() != Op_LoadP) return NULL; - + assert(n->Opcode() == Op_LoadP, "expects a load"); const TypeInstPtr* tp = phase->type(n)->isa_instptr(); if (!tp || tp->klass() != phase->C->env()->Class_klass()) return NULL; @@ -807,6 +807,74 @@ return k; } +static inline Node* isa_java_mirror_load(PhaseGVN* phase, Node* n) { + if (!UseShenandoahGC) { + if (n->Opcode() == Op_LoadP) { + return isa_java_mirror_load_helper(phase, n); + } + } else { + // When Shenandoah is enabled acmp is compiled as: + // if (a != b) { + // a = read_barrier(a); + // b = read_barrier(b); + // } + // if (a == b) { + // .. + // } else { + // .. + // } + // + // If the comparison of the form is a.getClass() == b.getClass(), + // then that would be optimized to: + // c = a.getClass(); + // d = b.getClass(); + // if (a.klass != b.klass) { + // c = read_barrier(c); + // d = read_barrier(d); + // } + // if (c == d) { + // + // And the second comparison can happen without barriers (and + // fail). Here we match the second comparison only and optimize + // that pattern to: + // c = a.getClass(); + // d = b.getClass(); + // if (c != d) { + // c = read_barrier(c); + // d = read_barrier(d); + // } + // if (a.klass == b.klass) { + // + // Because c and d are not not used anymore, the first if should + // go away as well + if (n->is_Phi() && n->req() == 3 && + n->in(2) != NULL && + n->in(2)->is_ShenandoahBarrier() && + n->in(1) == n->in(2)->in(ShenandoahBarrierNode::ValueIn) && + n->in(1) != NULL && + n->in(1)->Opcode() == Op_LoadP) { + return isa_java_mirror_load_helper(phase, n->in(1)); + } else if (n->is_ShenandoahBarrier() && + n->in(ShenandoahBarrierNode::ValueIn)->Opcode() == Op_LoadP) { + // After split if, the pattern above becomes: + // if (a != b) { + // a = read_barrier(a); + // b = read_barrier(b); + // if (a == b) { + // .. + // } else { + // .. + // } + // } else { + // + // Recognize that pattern here for the second comparison + return isa_java_mirror_load_helper(phase, n->in(ShenandoahBarrierNode::ValueIn)); + } + } + + return NULL; +} + static inline Node* isa_const_java_mirror(PhaseGVN* phase, Node* n) { // for ConP(Foo.class) return ConP(Foo.klass) // otherwise return NULL @@ -831,6 +899,76 @@ return phase->makecon(TypeKlassPtr::make(mirror_type->as_klass())); } +bool CmpPNode::shenandoah_optimize_java_mirror_cmp(PhaseGVN *phase, bool can_reshape) { + assert(UseShenandoahGC, "shenandoah only"); + if (in(1)->is_Phi()) { + Node* region = in(1)->in(0); + if (!in(2)->is_Phi() || region == in(2)->in(0)) { + if (region->in(1) != NULL && + region->in(2) != NULL && + region->in(1)->in(0) == region->in(2)->in(0) && + region->in(1)->in(0)->is_If()) { + Node* iff = region->in(1)->in(0); + if (iff->in(1) != NULL && + iff->in(1)->is_Bool() && + iff->in(1)->in(1) != NULL && + iff->in(1)->in(1)->Opcode() == Op_CmpP) { + Node* cmp = iff->in(1)->in(1); + if (in(1)->in(1) == cmp->in(1) && + (!in(2)->is_Phi() || in(2)->in(1) == cmp->in(2)) && + in(1)->in(2)->in(ShenandoahBarrierNode::ValueIn) == cmp->in(1) && + (!in(2)->is_Phi() || in(2)->in(2)->in(ShenandoahBarrierNode::ValueIn) == cmp->in(2))) { + return true; + } + } + } + } + } else if (in(1)->is_ShenandoahBarrier()) { + // For this pattern: + // if (a != b) { + // a = read_barrier(a); + // b = read_barrier(b); + // if (a == b) { + // .. + // } else { + // .. + // } + // } else { + // + // Change the second test to a.klass == b.klass and replace the + // first compare by that new test if possible. + if (can_reshape) { + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + Node* u = fast_out(i); + if (u->is_Bool()) { + for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) { + Node* uu = u->fast_out(j); + if (uu->is_If() && + uu->in(0) != NULL && + uu->in(0)->Opcode() == Op_IfTrue) { + Node* iff = uu->in(0)->in(0); + if (iff->in(1) != NULL && + iff->in(1)->is_Bool() && + iff->in(1)->as_Bool()->_test._test == BoolTest::ne && + iff->in(1)->in(1) != NULL && + iff->in(1)->in(1)->Opcode() == Op_CmpP) { + Node* cmp = iff->in(1)->in(1); + if (in(1)->in(ShenandoahBarrierNode::ValueIn) == cmp->in(1) && + (!in(2)->is_ShenandoahBarrier() || in(2)->in(ShenandoahBarrierNode::ValueIn) == cmp->in(2))) { + PhaseIterGVN* igvn = phase->is_IterGVN(); + igvn->replace_input_of(iff->in(1), 1, this); + return true; + } + } + } + } + } + } + } + } + return false; +} + //------------------------------Ideal------------------------------------------ // Normalize comparisons between Java mirror loads to compare the klass instead. // @@ -857,11 +995,13 @@ Node* conk2 = isa_const_java_mirror(phase, in(2)); if (k1 && (k2 || conk2)) { - Node* lhs = k1; - Node* rhs = (k2 != NULL) ? k2 : conk2; - this->set_req(1, lhs); - this->set_req(2, rhs); - return this; + if (!UseShenandoahGC || shenandoah_optimize_java_mirror_cmp(phase, can_reshape)) { + Node* lhs = k1; + Node* rhs = (k2 != NULL) ? k2 : conk2; + this->set_req(1, lhs); + this->set_req(2, rhs); + return this; + } } } diff -r 48663656550c -r b1cf900aa021 src/share/vm/opto/subnode.hpp --- a/src/share/vm/opto/subnode.hpp Fri Nov 04 07:20:22 2016 -0400 +++ b/src/share/vm/opto/subnode.hpp Fri Nov 04 07:21:01 2016 -0400 @@ -166,6 +166,9 @@ //------------------------------CmpPNode--------------------------------------- // Compare 2 pointer values, returning condition codes (-1, 0 or 1). class CmpPNode : public CmpNode { +private: + bool shenandoah_optimize_java_mirror_cmp(PhaseGVN *phase, bool can_reshape); + public: CmpPNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {} virtual int Opcode() const; changeset: 9478:87059e2365be user: rkennke date: Wed Dec 07 21:03:02 2016 +0100 summary: Backport JDK9 Shenandoah to JDK8u diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/aarch64.ad --- a/src/cpu/aarch64/vm/aarch64.ad Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/aarch64.ad Wed Dec 07 21:03:02 2016 +0100 @@ -926,6 +926,8 @@ source_hpp %{ +#include "gc_implementation/shenandoah/brooksPointer.hpp" + class CallStubImpl { //-------------------------------------------------------------- @@ -3651,12 +3653,15 @@ } } -#define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN) \ +enum mem_op { is_load, is_store }; + +#define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN, MEM_OP) \ MacroAssembler _masm(&cbuf); \ { \ guarantee(INDEX == -1, "mode not permitted for volatile"); \ guarantee(DISP == 0, "mode not permitted for volatile"); \ guarantee(SCALE == 0, "mode not permitted for volatile"); \ + if (MEM_OP == is_store) { __ shenandoah_store_addr_check(as_Register(BASE)); } \ __ INSN(REG, as_Register(BASE)); \ } @@ -3668,7 +3673,7 @@ // Used for all non-volatile memory accesses. The use of // $mem->opcode() to discover whether this pattern uses sign-extended // offsets is something of a kludge. - static void loadStore(MacroAssembler masm, mem_insn insn, + static void loadStore(MacroAssembler masm, mem_insn insn, mem_op mo, Register reg, int opcode, Register base, int index, int size, int disp) { @@ -3690,6 +3695,7 @@ scale = Address::lsl(size); } + if (mo == is_store) masm.shenandoah_store_addr_check(base); if (index == -1) { (masm.*insn)(reg, Address(base, disp)); } else { @@ -3702,7 +3708,7 @@ } } - static void loadStore(MacroAssembler masm, mem_float_insn insn, + static void loadStore(MacroAssembler masm, mem_float_insn insn, mem_op mo, FloatRegister reg, int opcode, Register base, int index, int size, int disp) { @@ -3719,7 +3725,8 @@ scale = Address::lsl(size); } - if (index == -1) { + if (mo == is_store) masm.shenandoah_store_addr_check(base); + if (index == -1) { (masm.*insn)(reg, Address(base, disp)); } else { if (disp == 0) { @@ -3731,10 +3738,11 @@ } } - static void loadStore(MacroAssembler masm, mem_vector_insn insn, + static void loadStore(MacroAssembler masm, mem_vector_insn insn, mem_op mo, FloatRegister reg, MacroAssembler::SIMD_RegVariant T, int opcode, Register base, int index, int size, int disp) { + if (mo == is_store) masm.shenandoah_store_addr_check(base); if (index == -1) { (masm.*insn)(reg, T, Address(base, disp)); } else { @@ -3792,146 +3800,146 @@ enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load, dst_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load, dst_reg, MacroAssembler::S, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D, + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load, dst_reg, MacroAssembler::D, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q, + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load, dst_reg, MacroAssembler::Q, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strb(iRegI src, memory mem) %{ Register src_reg = as_Register($src$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, is_store, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strb0(memory mem) %{ MacroAssembler _masm(&cbuf); - loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(), + loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strb0_ordered(memory mem) %{ MacroAssembler _masm(&cbuf); __ membar(Assembler::StoreStore); - loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(), + loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strh(iRegI src, memory mem) %{ Register src_reg = as_Register($src$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, is_store, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strh0(memory mem) %{ MacroAssembler _masm(&cbuf); - loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(), + loadStore(_masm, &MacroAssembler::strh, is_store, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strw(iRegI src, memory mem) %{ Register src_reg = as_Register($src$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, is_store, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strw0(memory mem) %{ MacroAssembler _masm(&cbuf); - loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(), + loadStore(_masm, &MacroAssembler::strw, is_store, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} @@ -3945,43 +3953,43 @@ __ mov(rscratch2, sp); src_reg = rscratch2; } - loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_str0(memory mem) %{ MacroAssembler _masm(&cbuf); - loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(), + loadStore(_masm, &MacroAssembler::str, is_store, zr, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strs(vRegF src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, is_store, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strd(vRegD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, is_store, src_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strvS(vecD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::S, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strvD(vecD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D, + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::D, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} enc_class aarch64_enc_strvQ(vecX src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q, + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::Q, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} @@ -4053,21 +4061,21 @@ enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{ MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, stlrb); + rscratch1, stlrb, is_store); if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) __ dmb(__ ISH); %} enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{ MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, stlrh); + rscratch1, stlrh, is_store); if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) __ dmb(__ ISH); %} enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{ MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, stlrw); + rscratch1, stlrw, is_store); if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) __ dmb(__ ISH); %} @@ -4076,75 +4084,75 @@ enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarb); + rscratch1, ldarb, is_load); __ sxtbw(dst_reg, dst_reg); %} enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarb); + rscratch1, ldarb, is_load); __ sxtb(dst_reg, dst_reg); %} enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{ MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarb); + rscratch1, ldarb, is_load); %} enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{ MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarb); + rscratch1, ldarb, is_load); %} enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarh); + rscratch1, ldarh, is_load); __ sxthw(dst_reg, dst_reg); %} enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{ Register dst_reg = as_Register($dst$$reg); MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarh); + rscratch1, ldarh, is_load); __ sxth(dst_reg, dst_reg); %} enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{ MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarh); + rscratch1, ldarh, is_load); %} enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{ MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarh); + rscratch1, ldarh, is_load); %} enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{ MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarw); + rscratch1, ldarw, is_load); %} enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{ MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarw); + rscratch1, ldarw, is_load); %} enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{ MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldar); + rscratch1, ldar, is_load); %} enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{ MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldarw); + rscratch1, ldarw, is_load); __ fmovs(as_FloatRegister($dst$$reg), rscratch1); %} enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{ MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, ldar); + rscratch1, ldar, is_load); __ fmovd(as_FloatRegister($dst$$reg), rscratch1); %} @@ -4159,7 +4167,7 @@ src_reg = rscratch2; } MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, stlr); + rscratch1, stlr, is_store); if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) __ dmb(__ ISH); %} @@ -4171,7 +4179,7 @@ __ fmovs(rscratch2, src_reg); } MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, stlrw); + rscratch1, stlrw, is_store); if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) __ dmb(__ ISH); %} @@ -4183,7 +4191,7 @@ __ fmovd(rscratch2, src_reg); } MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, - rscratch1, stlr); + rscratch1, stlr, is_store); if (VM_Version::cpu_cpuFeatures() & VM_Version::CPU_DMB_ATOMICS) __ dmb(__ ISH); %} @@ -4252,6 +4260,7 @@ enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ shenandoah_store_addr_check($mem$$base$$Register); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, /*acquire*/ false, /*release*/ true); %} @@ -4259,11 +4268,21 @@ enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ shenandoah_store_addr_check($mem$$base$$Register); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, Assembler::word, /*acquire*/ false, /*release*/ true); %} + enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{ + MacroAssembler _masm(&cbuf); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + Register tmp = $tmp$$Register; + __ mov(tmp, $oldval$$Register); // Must not clobber oldval. + __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, + Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false); + %} + // The only difference between aarch64_enc_cmpxchg and // aarch64_enc_cmpxchg_acq is that we use load-acquire in the // CompareAndSwap sequence to serve as a barrier on acquiring a @@ -4271,6 +4290,7 @@ enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ shenandoah_store_addr_check($mem$$base$$Register); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, /*acquire*/ true, /*release*/ true); %} @@ -4278,10 +4298,20 @@ enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ shenandoah_store_addr_check($mem$$base$$Register); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, Assembler::word, /*acquire*/ true, /*release*/ true); %} + enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{ + MacroAssembler _masm(&cbuf); + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + Register tmp = $tmp$$Register; + __ mov(tmp, $oldval$$Register); // Must not clobber oldval. + __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, + Assembler::xword, /*acquire*/ true, /*release*/ true, /*weak*/ false); + %} + // auxiliary used for CompareAndSwapX to set result register enc_class aarch64_enc_cset_eq(iRegINoSp res) %{ MacroAssembler _masm(&cbuf); @@ -4764,7 +4794,7 @@ assert_different_registers(oop, box, tmp, disp_hdr); - __ shenandoah_store_check(oop); + __ shenandoah_store_addr_check(oop); // Load markOop from object into displaced_header. __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); @@ -4924,7 +4954,7 @@ assert_different_registers(oop, box, tmp, disp_hdr); - __ shenandoah_store_check(oop); + __ shenandoah_store_addr_check(oop); // Always do locking in runtime. if (EmitSync & 0x01) { @@ -8459,8 +8489,7 @@ } } - if (reg != rscratch2) - __ shenandoah_store_check(reg, adr); + __ shenandoah_store_check(adr, reg); __ str(reg, adr); %} @@ -9050,7 +9079,7 @@ "mov $dst, $tmp\t# vector (1D)" %} ins_encode %{ FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load, tmp_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); @@ -9093,7 +9122,7 @@ "mov $dst, $tmp\t# vector (1D)" %} ins_encode %{ FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); - loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(), + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load, tmp_reg, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); @@ -9319,7 +9348,7 @@ ins_encode %{ Register s = $src$$Register; Register d = $dst$$Register; - __ ldr(d, Address(s, -8)); + __ ldr(d, Address(s, BrooksPointer::byte_offset())); %} ins_pipe(pipe_class_memory); %} @@ -9334,21 +9363,17 @@ Register s = $src$$Register; Register d = $dst$$Register; assert(d == r0, "result in r0"); - Address evacuation_in_progress = Address(rthread, in_bytes(JavaThread::evacuation_in_progress_offset())); __ block_comment("Shenandoah write barrier {"); - __ ldr(d, Address(s, -8)); - __ ldrb(rscratch1, evacuation_in_progress); - __ membar(Assembler::LoadLoad); - __ ldr(d, Address(s, -8)); - __ cbzw(rscratch1, done); - __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::aarch64::shenandoah_wb())), NULL, lr); - __ bind(done); + // We need that first read barrier in order to trigger a SEGV/NPE on incoming NULL. + // Also, it brings s into d in preparation for the call to shenandoah_write_barrier(). + __ ldr(d, Address(s, BrooksPointer::byte_offset())); + __ shenandoah_write_barrier(d); __ block_comment("} Shenandoah write barrier"); %} ins_pipe(pipe_slow); %} - +// Convert oop pointer into compressed form instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); match(Set dst (EncodeP src)); @@ -9621,6 +9646,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + predicate(!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR); match(Set res (CompareAndSwapP mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); @@ -9637,8 +9663,28 @@ ins_pipe(pipe_slow); %} +instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + + predicate(UseShenandoahGC); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + ins_cost(3 * VOLATILE_REF_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ + predicate(!UseShenandoahGC); match(Set res (CompareAndSwapN mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); @@ -9655,6 +9701,28 @@ ins_pipe(pipe_slow); %} +instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + + predicate(UseShenandoahGC); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + ins_cost(3 * VOLATILE_REF_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mov(tmp, $oldval$$Register); // Must not clobber oldval. + __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false); + __ cset($res$$Register, Assembler::EQ); + %} + + ins_pipe(pipe_slow); +%} // alternative CompareAndSwapX when we are eliding barriers @@ -9698,7 +9766,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR)); match(Set res (CompareAndSwapP mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); @@ -9715,9 +9783,28 @@ ins_pipe(pipe_slow); %} +instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchg_acq_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && ! UseShenandoahGC); match(Set res (CompareAndSwapN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); @@ -9734,6 +9821,28 @@ ins_pipe(pipe_slow); %} +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + ins_cost(3 * VOLATILE_REF_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mov(tmp, $oldval$$Register); // Must not clobber oldval. + __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ true, /*release*/ true, /*weak*/ false); + __ cset($res$$Register, Assembler::EQ); + %} + + ins_pipe(pipe_slow); +%} instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{ match(Set prev (GetAndSetI mem newv)); diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -833,11 +833,15 @@ if (type == T_ARRAY || type == T_OBJECT) { __ verify_oop(src->as_register()); + __ shenandoah_store_check(as_Address(to_addr), src->as_register()); + if (UseCompressedOops && !wide) { __ encode_heap_oop(compressed_src, src->as_register()); } else { compressed_src = src->as_register(); } + } else { + __ shenandoah_store_addr_check(to_addr->base()->as_pointer_register()); } int null_check_here = code_offset(); @@ -857,7 +861,6 @@ if (UseCompressedOops && !wide) { __ strw(compressed_src, as_Address(to_addr, rscratch2)); } else { - __ shenandoah_store_check(compressed_src, as_Address(to_addr)); __ str(compressed_src, as_Address(to_addr)); } break; @@ -1161,50 +1164,31 @@ void LIR_Assembler::emit_opShenandoahWriteBarrier(LIR_OpShenandoahWriteBarrier* op) { - Label done; + Register obj = op->in_opr()->as_register(); Register res = op->result_opr()->as_register(); - Register tmp1 = op->tmp1_opr()->as_register(); - Register tmp2 = op->tmp2_opr()->as_register(); - assert_different_registers(res, tmp1, tmp2); + + Label done; __ block_comment("Shenandoah write barrier {"); + if (res != obj) { + __ mov(res, obj); + } // Check for null. if (op->need_null_check()) { - if (res != obj) __ mov(res, obj); __ cbz(res, done); } - // Check for evacuation-in-progress - Address evacuation_in_progress - = Address(rthread, in_bytes(JavaThread::evacuation_in_progress_offset())); - __ ldrb(rscratch2, evacuation_in_progress); - __ membar(Assembler::LoadLoad); - - // The read-barrier. - __ ldr(res, Address(obj, BrooksPointer::BYTE_OFFSET)); - - __ cbzw(rscratch2, done); - - // Check for object in collection set. - __ lsr(tmp1, res, ShenandoahHeapRegion::RegionSizeShift); - __ mov(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); - __ ldrb(tmp2, Address(tmp2, tmp1)); - __ tbz(tmp2, 0, done); - - RegSet to_save = RegSet::of(r0, r1) - res; - __ push(to_save, sp); - __ mov(r1, res); - __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::shenandoah_write_barrier_slow_id))); - __ mov(res, r0); - __ pop(to_save, sp); + __ shenandoah_write_barrier(res); + + __ bind(done); __ block_comment("} Shenandoah write barrier"); - __ bind(done); + } - + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { LIR_Opr src = op->in_opr(); LIR_Opr dest = op->result_opr(); @@ -1707,57 +1691,44 @@ Register addr = as_reg(op->addr()); Register newval = as_reg(op->new_value()); Register cmpval = as_reg(op->cmp_value()); - Label succeed, fail, around; + Register res = op->result_opr()->as_register(); if (op->code() == lir_cas_obj) { + assert(op->tmp1()->is_valid(), "must be"); + Register t1 = op->tmp1()->as_register(); if (UseCompressedOops) { - Register t1 = op->tmp1()->as_register(); - assert(op->tmp1()->is_valid(), "must be"); - __ encode_heap_oop(t1, cmpval); - cmpval = t1; - __ encode_heap_oop(rscratch2, newval); - newval = rscratch2; - casw(addr, newval, cmpval); - } else if (UseShenandoahGC) { - Register tmp1 = as_reg(op->tmp1()); - Register tmp2 = as_reg(op->tmp2()); - Label done, retry; - - __ mov(tmp1, cmpval); - - __ bind(retry); - - casl(addr, newval, tmp1); - - // If the cmpxchg succeeded, then we're done. - __ cbz(rscratch1, done); - - // Resolve the original cmp value into rscratch2. - __ mov(rscratch2, tmp1); - oopDesc::bs()->interpreter_read_barrier(masm(), rscratch2); - - // Resolve the old value at address into rscratch1. - __ ldr(tmp1, Address(addr)); - __ mov(rscratch1, tmp1); - oopDesc::bs()->interpreter_read_barrier(masm(), rscratch1); - - // We're done if the expected/cmp value is not the same as - // old. It's a valid cmpxchg failure then. Otherwise we need - // special treatment for Shenandoah to prevent false negatives. - __ cmp(rscratch1, rscratch2); - __ br(Assembler::EQ, retry); - __ mov(rscratch1, 1); // Failed - - __ bind(done); - volatile address xx = _masm->pc(); - asm("nop"); + if (UseShenandoahGC) { + __ encode_heap_oop(t1, cmpval); + cmpval = t1; + assert(op->tmp2()->is_valid(), "must be"); + Register t2 = op->tmp2()->as_register(); + __ encode_heap_oop(t2, newval); + newval = t2; + __ cmpxchg_oop_shenandoah(addr, cmpval, newval, Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false); + __ csetw(res, Assembler::EQ); + } else { + __ encode_heap_oop(t1, cmpval); + cmpval = t1; + __ encode_heap_oop(rscratch2, newval); + newval = rscratch2; + casw(addr, newval, cmpval); + __ eorw (res, r8, 1); + } } else { + if (UseShenandoahGC) { + __ cmpxchg_oop_shenandoah(addr, cmpval, newval, Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false); + __ csetw(res, Assembler::EQ); + } else { casl(addr, newval, cmpval); + __ eorw (res, r8, 1); + } } } else if (op->code() == lir_cas_int) { casw(addr, newval, cmpval); + __ eorw (res, r8, 1); } else { casl(addr, newval, cmpval); + __ eorw (res, r8, 1); } } @@ -2058,6 +2029,8 @@ return; } if (opr2->is_double_cpu()) { + guarantee(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "need acmp barrier?"); + guarantee(opr1->type() != T_OBJECT && opr1->type() != T_ARRAY, "need acmp barrier?"); // cpu register - cpu register Register reg2 = opr2->as_register_lo(); __ cmp(reg1, reg2); diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -813,7 +813,7 @@ LIR_Address* a; LIR_Opr obj_op = obj.result(); - obj_op = shenandoah_write_barrier(obj_op, NULL, false); + obj_op = shenandoah_write_barrier(obj_op, NULL, true); if(offset.result()->is_constant()) { jlong c = offset.result()->as_jlong(); @@ -854,15 +854,13 @@ __ cas_obj(addr, cmp.result(), val_op, new_register(T_INT), new_register(T_INT), result); } else if (type == intType) - __ cas_int(addr, cmp.result(), val_op, ill, ill); + __ cas_int(addr, cmp.result(), val_op, ill, ill, result); else if (type == longType) - __ cas_long(addr, cmp.result(), val_op, ill, ill); + __ cas_long(addr, cmp.result(), val_op, ill, ill, result); else { ShouldNotReachHere(); } - __ logical_xor(FrameMap::r8_opr, LIR_OprFact::intConst(1), result); - if (type == objectType) { // Write-barrier needed for Object fields. // Seems to be precise post_barrier(addr, val_op); @@ -1432,7 +1430,7 @@ } LIR_Opr src_op = src.result(); - src_op = shenandoah_write_barrier(src_op, NULL, false); + src_op = shenandoah_write_barrier(src_op, NULL, true); if (is_obj) { data = shenandoah_read_barrier(data, NULL, true); } diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -69,7 +69,7 @@ verify_oop(obj); - shenandoah_store_check(obj); + shenandoah_store_addr_check(obj); // save object being locked into the BasicObjectLock str(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); @@ -134,8 +134,6 @@ // load object ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - shenandoah_store_check(obj); - biased_locking_exit(obj, hdr, done); } @@ -149,7 +147,8 @@ ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); } verify_oop(obj); - shenandoah_store_check(obj); + + shenandoah_store_addr_check(obj); // test if object header is pointing to the displaced header, and if so, restore // the displaced header in the object - if the object header is not pointing to diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1171,22 +1171,6 @@ #if INCLUDE_ALL_GCS - case shenandoah_write_barrier_slow_id: - { - StubFrame f(sasm, "shenandoah_write_barrier", dont_gc_arguments); - - __ enter(); - __ push_call_clobbered_registers(); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahBarrierSet::write_barrier_c1), - rthread, r1); - __ mov(rscratch1, r0); - __ pop_call_clobbered_registers(); - __ mov(r0, rscratch1); - __ leave(); - __ verify_oop(r0); - - } - break; case g1_pre_barrier_slow_id: { StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/interp_masm_aarch64.cpp --- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -524,6 +524,7 @@ lea(c_rarg1, monitor); // address of first monitor ldr(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + shenandoah_store_addr_check(r0); // Invariant cbnz(r0, unlock); pop(state); @@ -601,6 +602,7 @@ bind(loop); // check if current entry is used ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + shenandoah_store_addr_check(rscratch1); // Invariant cbnz(rscratch1, exception); add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry @@ -666,7 +668,7 @@ // Load object pointer into obj_reg %c_rarg3 ldr(obj_reg, Address(lock_reg, obj_offset)); - shenandoah_store_check(obj_reg); + shenandoah_store_addr_check(obj_reg); if (UseBiasedLocking) { biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); @@ -767,7 +769,7 @@ // Load oop into obj_reg(%c_rarg3) ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); - shenandoah_store_check(obj_reg); + shenandoah_store_addr_check(obj_reg); // Free entry str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/macroAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -426,6 +426,8 @@ Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); Address saved_mark_addr(lock_reg, 0); + shenandoah_store_addr_check(obj_reg); + // Biased locking // See whether the lock is currently biased toward our thread and // whether the epoch is still valid @@ -576,6 +578,7 @@ // a higher level. Second, if the bias was revoked while we held the // lock, the object could not be rebiased toward another thread, so // the bias bit would be clear. + shenandoah_store_addr_check(obj_reg); // Access mark word ldr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); andr(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); cmp(temp_reg, markOopDesc::biased_lock_pattern); @@ -1638,7 +1641,7 @@ void MacroAssembler::mov(Register dst, address addr) { assert(Universe::heap() == NULL || !Universe::heap()->is_in(addr), "use movptr for oop pointers"); - mov_immediate64(dst, (uintptr_t)addr); + mov_immediate64(dst, (uintptr_t)addr); } // Form an address from base + offset in Rd. Rd may or may @@ -2204,6 +2207,69 @@ } } +void MacroAssembler::cmpxchg_oop_shenandoah(Register addr, Register expected, + Register new_val, + enum operand_size size, + bool acquire, bool release, + bool weak, + Register result, Register tmp2) { + assert(UseShenandoahGC, "only for shenandoah"); + bool is_cae = (result != noreg); + bool is_narrow = (size == word); + + if (! is_cae) result = rscratch1; + + assert_different_registers(addr, expected, new_val, result, tmp2); + + if (ShenandoahStoreCheck) { + if (is_narrow) { + decode_heap_oop(tmp2, new_val); + shenandoah_store_check(addr, tmp2); + } else { + shenandoah_store_check(addr, new_val); + } + } + Label retry, done, fail; + + // CAS, using LL/SC pair. + bind(retry); + load_exclusive(result, addr, size, acquire); + if (is_narrow) { + cmpw(result, expected); + } else { + cmp(result, expected); + } + br(Assembler::NE, fail); + store_exclusive(tmp2, new_val, addr, size, release); + if (weak) { + cmpw(tmp2, 0u); // If the store fails, return NE to our caller + } else { + cbnzw(tmp2, retry); + } + b(done); + + bind(fail); + // Check if rb(expected)==rb(result) + // Shuffle registers so that we have memory value ready for next expected. + mov(tmp2, expected); + mov(expected, result); + if (is_narrow) { + decode_heap_oop(result, result); + decode_heap_oop(tmp2, tmp2); + } + oopDesc::bs()->interpreter_read_barrier(this, result); + oopDesc::bs()->interpreter_read_barrier(this, tmp2); + cmp(result, tmp2); + // Retry with expected now being the value we just loaded from addr. + br(Assembler::EQ, retry); + if (is_narrow && is_cae) { + // For cmp-and-exchange and narrow oops, we need to restore + // the compressed old-value. We moved it to 'expected' a few lines up. + mov(result, expected); + } + bind(done); +} + static bool different(Register a, RegisterOrConstant b, Register c) { if (b.is_constant()) return a != c; @@ -2402,19 +2468,17 @@ } #endif -void MacroAssembler::push_call_clobbered_registers() { - push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); - +void MacroAssembler::push_call_clobbered_fp_registers() { // Push v0-v7, v16-v31. for (int i = 30; i >= 0; i -= 2) { if (i <= v7->encoding() || i >= v16->encoding()) { - stpd(as_FloatRegister(i), as_FloatRegister(i+1), - Address(pre(sp, -2 * wordSize))); + stpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(pre(sp, -2 * wordSize))); } } } -void MacroAssembler::pop_call_clobbered_registers() { +void MacroAssembler::pop_call_clobbered_fp_registers() { for (int i = 0; i < 32; i += 2) { if (i <= v7->encoding() || i >= v16->encoding()) { @@ -2422,6 +2486,17 @@ Address(post(sp, 2 * wordSize))); } } +} + +void MacroAssembler::push_call_clobbered_registers() { + push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); + + push_call_clobbered_fp_registers(); +} + +void MacroAssembler::pop_call_clobbered_registers() { + + pop_call_clobbered_fp_registers(); pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); } @@ -3511,7 +3586,6 @@ } void MacroAssembler::store_heap_oop(Address dst, Register src) { - shenandoah_store_check(src, dst); if (UseCompressedOops) { assert(!dst.uses(src), "not enough registers"); encode_heap_oop(src); @@ -3706,6 +3780,42 @@ bind(done); } +void MacroAssembler::shenandoah_write_barrier(Register dst) { + assert(UseShenandoahGC, "must only be called with Shenandoah GC active"); + assert(dst != rscratch1, "need rscratch1"); + assert(dst != rscratch2, "need rscratch2"); + + Label done; + + // Check for evacuation-in-progress + Address evacuation_in_progress = Address(rthread, in_bytes(JavaThread::evacuation_in_progress_offset())); + ldrb(rscratch1, evacuation_in_progress); + membar(Assembler::LoadLoad); + + // The read-barrier. + ldr(dst, Address(dst, BrooksPointer::byte_offset())); + + // Evac-check ... + cbzw(rscratch1, done); + + RegSet to_save = RegSet::of(r0); + if (dst != r0) { + push(to_save, sp); + mov(r0, dst); + } + + assert(StubRoutines::aarch64::shenandoah_wb() != NULL, "need write barrier stub"); + far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::aarch64::shenandoah_wb()))); + + if (dst != r0) { + mov(dst, r0); + pop(to_save, sp); + } + block_comment("} Shenandoah write barrier"); + + bind(done); +} + #endif // INCLUDE_ALL_GCS Address MacroAssembler::allocate_metadata_address(Metadata* obj) { @@ -4926,140 +5036,153 @@ // written to, and that fromspace pointers are not written into // objects during concurrent marking. These methods check for that. -const bool ShenandoahStoreCheck = false; - -void MacroAssembler::in_heap_check(Register r, Label &nope) { +void MacroAssembler::in_heap_check(Register r, Register tmp, Label &nope) { ShenandoahHeap *h = (ShenandoahHeap *)Universe::heap(); HeapWord* first_region_bottom = h->first_region_bottom(); HeapWord* last_region_end = first_region_bottom + (ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize) * h->max_regions(); - mov(rscratch1, (uintptr_t)first_region_bottom); - cmp(r, rscratch1); + mov(tmp, (uintptr_t)first_region_bottom); + cmp(r, tmp); br(Assembler::LO, nope); - mov(rscratch1, (uintptr_t)last_region_end); - cmp(r, rscratch1); + mov(tmp, (uintptr_t)last_region_end); + cmp(r, tmp); br(Assembler::HS, nope); } -void MacroAssembler::shenandoah_store_check(Register r, Address dest) { - if (! ShenandoahStoreCheck) - return; - - if (! UseShenandoahGC) - return; - - assert_different_registers(rscratch1, rscratch2, r); - assert(! dest.uses(rscratch1), "must be"); - assert(! dest.uses(rscratch2), "must be"); +void MacroAssembler::shenandoah_cset_check(Register obj, Register tmp1, Register tmp2, Label& done) { + + // Test that oop is not in to-space. + lsr(tmp1, obj, ShenandoahHeapRegion::RegionSizeShift); + assert(ShenandoahHeap::in_cset_fast_test_addr() != 0, "sanity"); + mov(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); + ldrb(tmp2, Address(tmp2, tmp1)); + tbz(tmp2, 0, done); + + // Check for cancelled GC. + assert(ShenandoahHeap::cancelled_concgc_addr() != 0, "sanity"); + mov(tmp2, ShenandoahHeap::cancelled_concgc_addr()); + ldrb(tmp2, Address(tmp2)); + cbnz(tmp2, done); +} + +void MacroAssembler::_shenandoah_store_check(Address addr, Register value, const char* msg, const char* file, int line) { + _shenandoah_store_check(addr.base(), value, msg, file, line); +} + +void MacroAssembler::_shenandoah_store_check(Register addr, Register value, const char* msg, const char* file, int line) { + + if (! UseShenandoahGC || ! ShenandoahStoreCheck) return; + if (addr == r31_sp || addr == sp) return; // Stack-based target + + Register raddr = r8; + Register rval = r9; + Register tmp1 = r10; + Register tmp2 = r11; + + RegSet to_save = RegSet::of(raddr, rval, tmp1, tmp2); + + // Push tmp regs and flags. + push(to_save, sp); + get_nzcv(tmp1); + push(RegSet::of(tmp1), sp); + + mov(rval, value); + mov(raddr, addr); Label done; - cbz(r, done); - - mov(rscratch2, ShenandoahHeap::concurrent_mark_in_progress_addr()); - Assembler::ldrw(rscratch2, Address(rscratch2)); - cbzw(rscratch2, done); - - in_heap_check(r, done); - - // Check for object in collection set. - lsr(rscratch1, r, ShenandoahHeapRegion::RegionSizeShift); - mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); - ldrb(rscratch2, Address(rscratch2, rscratch1)); - tbz(rscratch2, 0, done); - - // Check for dest in heap - lea(rscratch2, dest); - in_heap_check(rscratch2, done); - - lsr(rscratch1, rscratch2, ShenandoahHeapRegion::RegionSizeShift); - mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); - ldrb(rscratch2, Address(rscratch2, rscratch1)); - tbz(rscratch2, 0, done); - - ldr(rscratch2, Address(r, BrooksPointer::BYTE_OFFSET)); - - stop("Shenandoah: store of oop in collection set during marking!", &done); - should_not_reach_here(); + + // If not in-heap target, skip check. + in_heap_check(raddr, tmp1, done); + + // Test that target oop is not in to-space. + shenandoah_cset_check(raddr, tmp1, tmp2, done); + + // Do value-check only when concurrent mark is in progress. + mov(tmp1, ShenandoahHeap::concurrent_mark_in_progress_addr()); + ldrw(tmp1, Address(tmp1)); + cbzw(tmp1, done); + + // Null-check value. + cbz(rval, done); + + // Test that value oop is not in to-space. + shenandoah_cset_check(rval, tmp1, tmp2, done); + + // Failure. + // Pop tmp regs and flags. + pop(RegSet::of(tmp1), sp); + set_nzcv(tmp1); + pop(to_save, sp); + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("shenandoah_store_check: %s in file: %s line: %i", msg, file, line); + b = code_string(ss.as_string()); + } + // hlt(0); + + stop(b); bind(done); + // Pop tmp regs and flags. + pop(RegSet::of(tmp1), sp); + set_nzcv(tmp1); + pop(to_save, sp); } -void MacroAssembler::shenandoah_store_check(Address dest) { - if (! ShenandoahStoreCheck) - return; - - if (! UseShenandoahGC) - return; - - block_comment("shenandoah_store_check {"); - - assert(! dest.uses(rscratch1), "must be"); - assert(! dest.uses(rscratch2), "must be"); - - Label done, yes; - - ldr(rscratch2, Address(rthread, in_bytes(JavaThread::evacuation_in_progress_offset()))); - cbnzw(rscratch2, yes); - - mov(rscratch2, ShenandoahHeap::concurrent_mark_in_progress_addr()); - Assembler::ldrw(rscratch2, Address(rscratch2)); - cbzw(rscratch2, done); - - bind(yes); - - // Check for dest in heap - lea(rscratch2, dest); - cbz(rscratch2, done); - in_heap_check(rscratch2, done); - - lsr(rscratch1, rscratch2, ShenandoahHeapRegion::RegionSizeShift); - mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); - ldrb(rscratch2, Address(rscratch2, rscratch1)); - tbz(rscratch2, 0, done); - - stop("Shenandoah: store in collection set during marking/evacuation!", &done); - should_not_reach_here(); +void MacroAssembler::_shenandoah_store_addr_check(Address addr, const char* msg, const char* file, int line) { + _shenandoah_store_addr_check(addr.base(), msg, file, line); +} + +void MacroAssembler::_shenandoah_store_addr_check(Register dst, const char* msg, const char* file, int line) { + + if (! UseShenandoahGC || ! ShenandoahStoreCheck) return; + if (dst == r31_sp || dst == sp) return; // Stack-based target + + Register addr = r8; + Register tmp1 = r9; + Register tmp2 = r10; + + Label done; + RegSet to_save = RegSet::of(addr, tmp1, tmp2); + + // Push tmp regs and flags. + push(to_save, sp); + get_nzcv(tmp1); + push(RegSet::of(tmp1), sp); + + orr(addr, zr, dst); + // mov(addr, dst); + + // Check null. + cbz(addr, done); + + in_heap_check(addr, tmp1, done); + + shenandoah_cset_check(addr, tmp1, tmp2, done); + + // Fail. + // Pop tmp regs and flags. + pop(RegSet::of(tmp1), sp); + set_nzcv(tmp1); + pop(to_save, sp); + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("shenandoah_store_check: %s in file: %s line: %i", msg, file, line); + b = code_string(ss.as_string()); + } + // hlt(0); + stop(b); + // should_not_reach_here(); bind(done); - block_comment("} shenandoah_store_check"); + // Pop tmp regs and flags. + pop(RegSet::of(tmp1), sp); + set_nzcv(tmp1); + pop(to_save, sp); + } - -void MacroAssembler::shenandoah_store_check(Register dest) { - if (! ShenandoahStoreCheck) - return; - - if (! UseShenandoahGC) - return; - - block_comment("shenandoah_store_check {"); - - assert_different_registers(rscratch1, rscratch2, dest); - - Label done, yes; - - ldr(rscratch2, Address(rthread, in_bytes(JavaThread::evacuation_in_progress_offset()))); - cbnzw(rscratch2, yes); - - mov(rscratch2, ShenandoahHeap::concurrent_mark_in_progress_addr()); - Assembler::ldrw(rscratch2, Address(rscratch2)); - cbzw(rscratch2, done); - - bind(yes); - - // Check for dest in heap - cbz(dest, done); - in_heap_check(dest, done); - - lsr(rscratch1, dest, ShenandoahHeapRegion::RegionSizeShift); - mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); - ldrb(rscratch2, Address(rscratch2, rscratch1)); - tbz(rscratch2, 0, done); - - stop("Shenandoah: store in collection set during marking/evacuation!", &done); - should_not_reach_here(); - - bind(done); - block_comment("} shenandoah_store_check"); -} - diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/macroAssembler_aarch64.hpp --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -443,6 +443,8 @@ // 64 bits of each vector register. void push_call_clobbered_registers(); void pop_call_clobbered_registers(); + void push_call_clobbered_fp_registers(); + void pop_call_clobbered_fp_registers(); // now mov instructions for loading absolute addresses and 32 or // 64 bit integers @@ -533,6 +535,17 @@ msr(0b011, 0b0100, 0b0100, 0b001, zr); } + // Macro instructions for accessing and updating the condition flags + inline void get_nzcv(Register reg) + { + mrs(0b011, 0b0100, 0b0010, 0b000, reg); + } + + inline void set_nzcv(Register reg) + { + msr(0b011, 0b0100, 0b0010, 0b000, reg); + } + // DCZID_EL0: op1 == 011 // CRn == 0000 // CRm == 0000 @@ -778,6 +791,8 @@ Register tmp, Register tmp2); + void shenandoah_write_barrier(Register dst); + #endif // INCLUDE_ALL_GCS // split store_check(Register obj) to enhance instruction interleaving @@ -993,6 +1008,10 @@ bool acquire, bool release, Register tmp = rscratch1); + void cmpxchg_oop_shenandoah(Register addr, Register expected, Register new_val, + enum operand_size size, + bool acquire, bool release, bool weak, + Register result = noreg, Register tmp2 = rscratch2); // Calls void trampoline_call(Address entry, CodeBuffer *cbuf = NULL); @@ -1210,10 +1229,19 @@ Register tmp3, Register tmp4, int int_cnt1, Register result); - void in_heap_check(Register r, Label &nope); - void shenandoah_store_check(Register r, Address addr); - void shenandoah_store_check(Address addr); - void shenandoah_store_check(Register addr); + void in_heap_check(Register r, Register tmp, Label &nope); + +private: + void shenandoah_cset_check(Register obj, Register tmp1, Register tmp2, Label& done); + +public: + void _shenandoah_store_addr_check(Register addr, const char* msg, const char* file, int line); + void _shenandoah_store_addr_check(Address addr, const char* msg, const char* file, int line); +#define shenandoah_store_addr_check(reg) _shenandoah_store_addr_check(reg, "oop not safe for writing", __FILE__, __LINE__) + + void _shenandoah_store_check(Address addr, Register value, const char* msg, const char* file, int line); + void _shenandoah_store_check(Register addr, Register value, const char* msg, const char* file, int line); +#define shenandoah_store_check(addr, value) _shenandoah_store_check(addr, value, "oop not safe for writing", __FILE__, __LINE__) private: void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/methodHandles_aarch64.cpp --- a/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -151,11 +151,11 @@ sizeof(u2), /*is_signed*/ false); // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); Label L; - __ ldr(temp2, __ argument_address(temp2, -1)); - __ cmp(recv, temp2); - oopDesc::bs()->asm_acmp_barrier(_masm, recv, temp2); + __ ldr(rscratch1, __ argument_address(temp2, -1)); + __ cmp(recv, rscratch1); + oopDesc::bs()->asm_acmp_barrier(_masm, recv, rscratch1); __ br(Assembler::EQ, L); - __ ldr(r0, temp2); + __ ldr(r0, __ argument_address(temp2, -1)); __ hlt(0); __ BIND(L); } diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp --- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -523,6 +523,10 @@ range_check(masm, rax, r11, StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), L_ok); + if (StubRoutines::code3() != NULL) + range_check(masm, rax, r11, + StubRoutines::code3()->code_begin(), StubRoutines::code3()->code_end(), + L_ok); const char* msg = "i2c adapter must return to an interpreter frame"; __ block_comment(msg); __ stop(msg); @@ -1824,6 +1828,7 @@ } // Load (object->mark() | 1) into swap_reg r0 + __ shenandoah_store_addr_check(obj_reg); // Access mark word __ ldr(rscratch1, Address(obj_reg, 0)); __ orr(swap_reg, rscratch1, 1); @@ -1944,6 +1949,8 @@ // due to cache line collision. __ serialize_memory(rthread, r2); } + } else { + __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); } // check for safepoint operation in progress and/or pending suspend requests @@ -1988,7 +1995,7 @@ Label done; - __ shenandoah_store_check(obj_reg); + __ shenandoah_store_addr_check(obj_reg); if (UseBiasedLocking) { __ biased_locking_exit(obj_reg, old_hdr, done); diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/shenandoahBarrierSet_aarch64.cpp --- a/src/cpu/aarch64/vm/shenandoahBarrierSet_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/shenandoahBarrierSet_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -59,7 +59,7 @@ compile_resolve_oop_runtime(masm, dst); return; } - __ ldr(dst, Address(dst, BrooksPointer::BYTE_OFFSET)); + __ ldr(dst, Address(dst, BrooksPointer::byte_offset())); } } @@ -69,48 +69,11 @@ return interpreter_read_barrier(masm, dst); } - assert(dst != rscratch1, "different regs"); - assert(dst != rscratch2, "Need rscratch2"); - - Label done; - - Address evacuation_in_progress - = Address(rthread, in_bytes(JavaThread::evacuation_in_progress_offset())); - - __ ldrb(rscratch2, evacuation_in_progress); - __ membar(Assembler::LoadLoad); - - // Now check if evacuation is in progress. - interpreter_read_barrier_not_null(masm, dst); - - __ cbzw(rscratch2, done); - - __ lsr(rscratch1, dst, ShenandoahHeapRegion::RegionSizeShift); - __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); - __ ldrb(rscratch2, Address(rscratch2, rscratch1)); - __ tst(rscratch2, 0x1); - __ br(Assembler::EQ, done); - - // Save possibly live regs. - RegSet live_regs = RegSet::range(r0, r4) - dst; - __ push(live_regs, sp); - __ strd(v0, __ pre(sp, 2 * -wordSize)); - - // Call into runtime - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahBarrierSet::write_barrier_interp), dst); - - // Move result into dst reg. - __ mov(dst, r0); - - // Restore possibly live regs. - __ ldrd(v0, __ post(sp, 2 * wordSize)); - __ pop(live_regs, sp); - - __ bind(done); + __ shenandoah_write_barrier(dst); } void ShenandoahHeap::compile_prepare_oop(MacroAssembler* masm, Register obj) { - __ add(obj, obj, BrooksPointer::BROOKS_POINTER_OBJ_SIZE * HeapWordSize); + __ add(obj, obj, BrooksPointer::byte_size()); __ str(obj, Address(obj, -1 * HeapWordSize)); } diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/stubGenerator_aarch64.cpp --- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -562,15 +562,13 @@ // // Trash rscratch1, rscratch2. Preserve everything else. - address generate_shenandoah_wb() { + address generate_shenandoah_wb(bool c_abi) { StubCodeMark mark(this, "StubRoutines", "shenandoah_wb"); __ align(6); address start = __ pc(); Label work, slow_case, lose, not_an_instance, is_array; - Address evacuation_in_progress - = Address(rthread, in_bytes(JavaThread::evacuation_in_progress_offset())); __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); __ lsr(rscratch1, r0, ShenandoahHeapRegion::RegionSizeShift); @@ -581,7 +579,9 @@ __ bind(work); RegSet saved = RegSet::range(r1, r4); - __ push(saved, sp); + if (!c_abi) { + __ push(saved, sp); + } Register obj = r0, size = r2, newobj = r3, newobj_end = rscratch2; @@ -642,14 +642,16 @@ // All copied. Now try to CAS the Brooks pointer. Label succeed; - __ lea(r2, Address(obj, BrooksPointer::BYTE_OFFSET)); + __ lea(r2, Address(obj, BrooksPointer::byte_offset())); __ cmpxchgptr(obj, newobj, r2, rscratch1, succeed, NULL); // If we lose the CAS we are racing with someone who just beat // us evacuating the object. This leaves the address of the // evacuated object in r0. // We lost. - __ pop(saved, sp); + if (!c_abi) { + __ pop(saved, sp); + } __ ret(lr); // We won. @@ -657,7 +659,9 @@ __ mov(obj, newobj); // dst points to end of newobj. __ str(dst, Address(rthread, JavaThread::gclab_top_offset())); - __ pop(saved, sp); + if (!c_abi) { + __ pop(saved, sp); + } __ ret(lr); // Come here if the count of HeapWords is odd. @@ -692,18 +696,27 @@ { // Make a runtime call to evacuate the object. __ bind(slow_case); - __ pop(saved, sp); + if (!c_abi) { + __ pop(saved, sp); + } __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push_call_clobbered_registers(); + if (!c_abi) { + __ push_call_clobbered_registers(); + } else { + __ push_call_clobbered_fp_registers(); + } __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahBarrierSet::write_barrier_c2)); __ blrt(lr, 1, 0, MacroAssembler::ret_type_integral); - __ mov(rscratch1, obj); - - __ pop_call_clobbered_registers(); - __ mov(obj, rscratch1); + if (!c_abi) { + __ mov(rscratch1, obj); + __ pop_call_clobbered_registers(); + __ mov(obj, rscratch1); + } else { + __ pop_call_clobbered_fp_registers(); + } __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(lr); @@ -825,6 +838,7 @@ break; default: ShouldNotReachHere(); + } } } @@ -1796,7 +1810,7 @@ // used by generate_conjoint_int_oop_copy(). // address generate_disjoint_int_copy(bool aligned, address *entry, - const char *name) { + const char *name) { const bool not_oop = false; return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); } @@ -2909,7 +2923,7 @@ __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); - Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; + Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52, _L_finish; const Register from = c_rarg0; // source array address const Register to = c_rarg1; // destination array address @@ -2920,9 +2934,12 @@ const Register keylen = rscratch1; address start = __ pc(); + __ enter(); - __ mov(rscratch2, len_reg); + __ subsw(rscratch2, len_reg, zr); + __ br(Assembler::LE, _L_finish); + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); __ ld1(v0, __ T16B, rvec); @@ -2980,11 +2997,13 @@ __ eor(v0, __ T16B, v0, v31); __ st1(v0, __ T16B, __ post(to, 16)); - __ sub(len_reg, len_reg, 16); - __ cbnz(len_reg, L_aes_loop); + + __ subw(len_reg, len_reg, 16); + __ cbnzw(len_reg, L_aes_loop); __ st1(v0, __ T16B, rvec); + __ BIND(_L_finish); __ mov(r0, rscratch2); __ leave(); @@ -3010,7 +3029,7 @@ __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); - Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52; + Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52, _L_finish; const Register from = c_rarg0; // source array address const Register to = c_rarg1; // destination array address @@ -3021,9 +3040,12 @@ const Register keylen = rscratch1; address start = __ pc(); + __ enter(); - __ mov(rscratch2, len_reg); + __ subsw(rscratch2, len_reg, zr); + __ br(Assembler::LE, _L_finish); + __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); __ ld1(v2, __ T16B, rvec); @@ -3086,11 +3108,12 @@ __ st1(v0, __ T16B, __ post(to, 16)); __ orr(v2, __ T16B, v1, v1); - __ sub(len_reg, len_reg, 16); - __ cbnz(len_reg, L_aes_loop); + __ subw(len_reg, len_reg, 16); + __ cbnzw(len_reg, L_aes_loop); __ st1(v2, __ T16B, rvec); + __ BIND(_L_finish); __ mov(r0, rscratch2); __ leave(); @@ -4419,10 +4442,6 @@ StubRoutines::_multiplyToLen = generate_multiplyToLen(); } - if (UseShenandoahGC) { - StubRoutines::aarch64::_shenandoah_wb = generate_shenandoah_wb(); - } - if (UseMontgomeryMultiplyIntrinsic) { StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); @@ -4464,16 +4483,27 @@ #endif } + void generate_barriers() { + if (UseShenandoahGC) { + StubRoutines::aarch64::_shenandoah_wb = generate_shenandoah_wb(false); + StubRoutines::_shenandoah_wb_C = generate_shenandoah_wb(true); + } + } + public: - StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { - if (all) { + StubGenerator(CodeBuffer* code, int phase) : StubCodeGenerator(code) { + if (phase == 2) { generate_all(); + } else if (phase == 1) { + generate_initial(); + } else if (phase == 3) { + generate_barriers(); } else { - generate_initial(); + ShouldNotReachHere(); } } }; // end class declaration -void StubGenerator_generate(CodeBuffer* code, bool all) { - StubGenerator g(code, all); +void StubGenerator_generate(CodeBuffer* code, int phase) { + StubGenerator g(code, phase); } diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/stubRoutines_aarch64.hpp --- a/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -42,13 +42,16 @@ enum platform_dependent_constants { code_size1 = 19000, // simply increase if too small (assembler will crash if too small) - code_size2 = 22000 // simply increase if too small (assembler will crash if too small) + code_size2 = 22000, // simply increase if too small (assembler will crash if too small) + code_size3 = 2000 // simply increase if too small (assembler will crash if too small) }; class aarch64 { friend class StubGenerator; private: + static address _shenandoah_wb; + static address _get_previous_fp_entry; static address _get_previous_sp_entry; @@ -61,7 +64,6 @@ static address _float_sign_flip; static address _double_sign_mask; static address _double_sign_flip; - static address _shenandoah_wb; static address _zero_longs; diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp --- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -593,6 +593,7 @@ __ mov(rscratch1, esp); __ str(rscratch1, monitor_block_top); // set new monitor block top // store object + __ shenandoah_store_addr_check(r0); __ str(r0, Address(esp, BasicObjectLock::obj_offset_in_bytes())); __ mov(c_rarg1, esp); // object address __ lock_object(c_rarg1); @@ -1273,6 +1274,7 @@ wordSize - sizeof(BasicObjectLock)))); __ ldr(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ shenandoah_store_addr_check(t); // Invariant __ cbnz(t, unlock); // Entry already unlocked, need to throw exception diff -r b1cf900aa021 -r 87059e2365be src/cpu/aarch64/vm/templateTable_aarch64.cpp --- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -3760,6 +3760,7 @@ // check if current entry is used // if not used then remember entry in c_rarg1 __ ldr(rscratch1, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); + __ shenandoah_store_addr_check(rscratch1); // Invariant oopDesc::bs()->interpreter_read_barrier(_masm, rscratch1); __ cmp(zr, rscratch1); __ csel(c_rarg1, c_rarg3, c_rarg1, Assembler::EQ); @@ -3816,6 +3817,7 @@ __ increment(rbcp); // store object + __ shenandoah_store_addr_check(r0); // Invariant __ str(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); __ lock_object(c_rarg1); @@ -3861,6 +3863,7 @@ __ bind(loop); // check if current entry is for same object __ ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ shenandoah_store_addr_check(rscratch1); // Invariant oopDesc::bs()->interpreter_read_barrier(_masm, rscratch1); __ cmp(r0, rscratch1); // if same object then stop searching diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1539,6 +1539,16 @@ } } +void Assembler::jccb_if_possible(Condition cc, Label& L) { + +#ifdef ASSERT + if (UseShenandoahGC) { + jcc(cc, L); + } else +#endif + jccb(cc, L); +} + void Assembler::jmp(Address adr) { InstructionMark im(this); prefix(adr); @@ -1612,6 +1622,16 @@ } } +void Assembler::jmpb_if_possible(Label& L) { + +#ifdef ASSERT + if (UseShenandoahGC) { + jmp(L); + } else +#endif + jmpb(L); +} + void Assembler::ldmxcsr( Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/assembler_x86.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -1250,6 +1250,7 @@ // not bound within an 8-bit offset of this instruction, a run-time error // will occur. void jccb(Condition cc, Label& L); + void jccb_if_possible(Condition cc, Label& L); void jmp(Address entry); // pc <- entry @@ -1263,6 +1264,7 @@ // not bound within an 8-bit offset of this instruction, a run-time error // will occur. void jmpb(Label& L); + void jmpb_if_possible(Label& L); void ldmxcsr( Address src ); diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -34,6 +34,7 @@ #include "ci/ciInstance.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" #include "gc_interface/collectedHeap.hpp" #include "memory/barrierSet.hpp" #include "memory/cardTableModRefBS.hpp" @@ -1011,6 +1012,7 @@ if (type == T_ARRAY || type == T_OBJECT) { __ verify_oop(src->as_register()); + __ shenandoah_store_check(as_Address(to_addr), src->as_register()); #ifdef _LP64 if (UseCompressedOops && !wide) { __ movptr(compressed_src, src->as_register()); @@ -1020,6 +1022,8 @@ } } #endif + } else { + __ shenandoah_store_addr_check(to_addr->base()->as_pointer_register()); } if (patch_code != lir_patch_none) { @@ -1533,32 +1537,7 @@ __ jcc(Assembler::zero, done); } - // Check for evacuation-in-progress - Address evacuation_in_progress = Address(r15_thread, in_bytes(JavaThread::evacuation_in_progress_offset())); - __ cmpb(evacuation_in_progress, 0); - - // The read-barrier. - __ movptr(res, Address(res, BrooksPointer::BYTE_OFFSET)); - - __ jcc(Assembler::equal, done); - - // Check for object in collection set. - __ movptr(tmp1, res); - __ shrptr(tmp1, ShenandoahHeapRegion::RegionSizeShift); - __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); - __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); - __ testb(tmp2, 0x1); - __ jcc(Assembler::zero, done); - - if (res != rax) { - __ xchgptr(res, rax); // Move obj into rax and save rax into obj. - } - - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::shenandoah_write_barrier_slow_id))); - - if (res != rax) { - __ xchgptr(rax, res); // Swap back obj with rax. - } + __ shenandoah_write_barrier(res); __ bind(done); @@ -2049,54 +2028,37 @@ if ( op->code() == lir_cas_obj) { #ifdef _LP64 if (UseCompressedOops) { - __ encode_heap_oop(cmpval); - __ mov(rscratch1, newval); - __ encode_heap_oop(rscratch1); - if (os::is_MP()) { - __ lock(); + if (UseShenandoahGC) { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + + __ encode_heap_oop(cmpval); + __ mov(rscratch1, newval); + __ encode_heap_oop(rscratch1); + __ cmpxchg_oop_shenandoah(NULL, Address(addr, 0), cmpval, rscratch1, true, tmp1, tmp2); + } else { + __ encode_heap_oop(cmpval); + __ mov(rscratch1, newval); + __ encode_heap_oop(rscratch1); + if (os::is_MP()) { + __ lock(); + } + // cmpval (rax) is implicitly used by this instruction + __ cmpxchgl(rscratch1, Address(addr, 0)); } - // cmpval (rax) is implicitly used by this instruction - __ cmpxchgl(rscratch1, Address(addr, 0)); } else #endif { if (UseShenandoahGC) { - Label done; - Label retry; - - __ bind(retry); - - // Save original cmp-value into tmp1, before following cas destroys it. - __ movptr(op->tmp1()->as_register(), op->cmp_value()->as_register()); - - if (os::is_MP()) { - __ lock(); - } - __ cmpxchgptr(newval, Address(addr, 0)); - - // If the cmpxchg succeeded, then we're done. - __ jcc(Assembler::equal, done); - - // Resolve the original cmp value. - oopDesc::bs()->interpreter_read_barrier(masm(), op->tmp1()->as_register()); - // Resolve the old value at address. We get the old value in cmp/rax - // when the comparison in cmpxchg failed. - __ movptr(op->tmp2()->as_register(), cmpval); - oopDesc::bs()->interpreter_read_barrier(masm(), op->tmp2()->as_register()); - - // We're done if the expected/cmp value is not the same as old. It's a valid - // cmpxchg failure then. Otherwise we need special treatment for Shenandoah - // to prevent false positives. - __ cmpptr(op->tmp1()->as_register(), op->tmp2()->as_register()); - __ jcc(Assembler::equal, retry); - - __ bind(done); + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + __ cmpxchg_oop_shenandoah(NULL, Address(addr, 0), cmpval, newval, true, tmp1, tmp2); } else { if (os::is_MP()) { __ lock(); } __ cmpxchgptr(newval, Address(addr, 0)); - } + } } } else { assert(op->code() == lir_cas_int, "lir_cas_int expected"); diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -766,7 +766,7 @@ LIR_Address* a; LIR_Opr obj_op = obj.result(); - obj_op = shenandoah_write_barrier(obj_op, NULL, false); + obj_op = shenandoah_write_barrier(obj_op, NULL, true); if(offset.result()->is_constant()) { #ifdef _LP64 @@ -916,8 +916,10 @@ LIRItem dst_pos(x->argument_at(3), this); LIRItem length(x->argument_at(4), this); + dst.load_item(); LIR_Opr dst_op = dst.result(); dst_op = shenandoah_write_barrier(dst_op, info, x->arg_needs_null_check(2)); + src.load_item(); LIR_Opr src_op = src.result(); src_op = shenandoah_read_barrier(src_op, info, x->arg_needs_null_check(0)); @@ -1486,7 +1488,7 @@ assert (type == T_INT || (!x->is_add() && is_obj) LP64_ONLY( || type == T_LONG ), "unexpected type"); LIR_Opr src_op = src.result(); - src_op = shenandoah_write_barrier(src_op, NULL, false); + src_op = shenandoah_write_barrier(src_op, NULL, true); if (is_obj) { data = shenandoah_read_barrier(data, NULL, true); } diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/c1_MacroAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -45,6 +45,8 @@ verify_oop(obj); + shenandoah_store_addr_check(obj); + // save object being locked into the BasicObjectLock movptr(Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()), obj); @@ -122,6 +124,9 @@ movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); } verify_oop(obj); + + shenandoah_store_addr_check(obj); + // test if object header is pointing to the displaced header, and if so, restore // the displaced header in the object - if the object header is not pointing to // the displaced header, get the object header instead diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/c1_Runtime1_x86.cpp --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -39,7 +39,6 @@ #include "utilities/macros.hpp" #include "vmreg_x86.inline.hpp" #if INCLUDE_ALL_GCS -#include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" #endif @@ -1610,17 +1609,6 @@ break; #if INCLUDE_ALL_GCS - case shenandoah_write_barrier_slow_id: - { - StubFrame f(sasm, "shenandoah_write_barrier", dont_gc_arguments); - - save_live_registers(sasm, 1); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahBarrierSet::write_barrier_c1), r15_thread, rax); - restore_live_registers_except_rax(sasm); - __ verify_oop(rax); - - } - break; case g1_pre_barrier_slow_id: { StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/interp_masm_x86_64.cpp --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -574,6 +574,7 @@ lea(c_rarg1, monitor); // address of first monitor movptr(rax, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + shenandoah_store_addr_check(rax); // Invariant testptr(rax, rax); jcc(Assembler::notZero, unlock); @@ -651,6 +652,7 @@ bind(loop); // check if current entry is used + shenandoah_lock_check(c_rarg1); cmpptr(Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()), (int32_t) NULL); jcc(Assembler::notEqual, exception); @@ -712,6 +714,8 @@ // Load object pointer into obj_reg %c_rarg3 movptr(obj_reg, Address(lock_reg, obj_offset)); + shenandoah_store_addr_check(obj_reg); + if (UseBiasedLocking) { biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, done, &slow_case); } @@ -728,6 +732,7 @@ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); + // obj_reg has been checked a few lines up. if (os::is_MP()) lock(); cmpxchgptr(lock_reg, Address(obj_reg, 0)); if (PrintBiasedLockingStatistics) { @@ -802,6 +807,7 @@ // Load oop into obj_reg(%c_rarg3) movptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + shenandoah_store_addr_check(obj_reg); // Invariant // Free entry movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD); diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -43,6 +43,8 @@ #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" #include "gc_implementation/g1/heapRegion.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" #endif // INCLUDE_ALL_GCS #ifdef PRODUCT @@ -1082,6 +1084,8 @@ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); Address saved_mark_addr(lock_reg, 0); + shenandoah_store_addr_check(obj_reg); + if (PrintBiasedLockingStatistics && counters == NULL) { counters = BiasedLocking::counters(); } @@ -1157,7 +1161,7 @@ // the prototype header is no longer biased and we have to revoke // the bias on this object. testptr(header_reg, markOopDesc::biased_lock_mask_in_place); - jccb(Assembler::notZero, try_revoke_bias); + jccb_if_possible(Assembler::notZero, try_revoke_bias); // Biasing is still enabled for this data type. See whether the // epoch of the current bias is still valid, meaning that the epoch @@ -1169,7 +1173,7 @@ // otherwise the manipulations it performs on the mark word are // illegal. testptr(header_reg, markOopDesc::epoch_mask_in_place); - jccb(Assembler::notZero, try_rebias); + jccb_if_possible(Assembler::notZero, try_rebias); // The epoch of the current bias is still valid but we know nothing // about the owner; it might be set or it might be clear. Try to @@ -1296,6 +1300,7 @@ // a higher level. Second, if the bias was revoked while we held the // lock, the object could not be rebiased toward another thread, so // the bias bit would be clear. + shenandoah_store_addr_check(obj_reg); // Access mark word movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); cmpptr(temp_reg, markOopDesc::biased_lock_pattern); @@ -1489,6 +1494,7 @@ movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort bind(L_rtm_retry); } + shenandoah_store_addr_check(objReg); // Access mark word movptr(tmpReg, Address(objReg, 0)); testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased jcc(Assembler::notZero, IsInflated); @@ -1504,6 +1510,7 @@ bind(L_noincrement); } xbegin(L_on_abort); + shenandoah_store_addr_check(objReg); // Access mark word movptr(tmpReg, Address(objReg, 0)); // fetch markword andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked @@ -1566,6 +1573,7 @@ bind(L_noincrement); } xbegin(L_on_abort); + shenandoah_store_addr_check(objReg); // Access mark word movptr(tmpReg, Address(objReg, 0)); movptr(tmpReg, Address(tmpReg, owner_offset)); testptr(tmpReg, tmpReg); @@ -1712,6 +1720,8 @@ assert_different_registers(objReg, boxReg, tmpReg, scrReg); } + shenandoah_store_addr_check(objReg); // Access mark word + if (counters != NULL) { atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg); } @@ -1782,7 +1792,7 @@ movptr(tmpReg, Address(objReg, 0)); // [FETCH] testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased - jccb(Assembler::notZero, IsInflated); + jccb_if_possible(Assembler::notZero, IsInflated); // Attempt stack-locking ... orptr (tmpReg, markOopDesc::unlocked_value); @@ -1873,7 +1883,7 @@ // Test-And-CAS instead of CAS movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner testptr(tmpReg, tmpReg); // Locked ? - jccb (Assembler::notZero, DONE_LABEL); + jccb_if_possible(Assembler::notZero, DONE_LABEL); } // Appears unlocked - try to swing _owner from null to non-null. @@ -1889,7 +1899,7 @@ } cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3 - jccb (Assembler::notZero, DONE_LABEL); + jccb_if_possible(Assembler::notZero, DONE_LABEL); get_thread (scrReg); // beware: clobbers ICCs movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg); xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success @@ -1918,7 +1928,7 @@ // Can suffer RTS->RTO upgrades on shared or cold $ lines movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner testptr(tmpReg, tmpReg); // Locked ? - jccb (Assembler::notZero, DONE_LABEL); + jccb_if_possible(Assembler::notZero, DONE_LABEL); } // Appears unlocked - try to swing _owner from null to non-null. @@ -1953,7 +1963,7 @@ movptr (boxReg, tmpReg); movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); testptr(tmpReg, tmpReg); - jccb (Assembler::notZero, DONE_LABEL); + jccb_if_possible(Assembler::notZero, DONE_LABEL); // It's inflated and appears unlocked if (os::is_MP()) { @@ -2011,6 +2021,8 @@ assert(boxReg == rax, ""); assert_different_registers(objReg, boxReg, tmpReg); + shenandoah_store_addr_check(objReg); // Access mark word + if (EmitSync & 4) { // Disable - inhibit all inlining. Force control through the slow-path cmpptr (rsp, 0); @@ -2071,7 +2083,7 @@ testptr(boxReg, boxReg); jccb(Assembler::notZero, L_regular_inflated_unlock); xend(); - jmpb(DONE_LABEL); + jmpb_if_possible(DONE_LABEL); bind(L_regular_inflated_unlock); } #endif @@ -2115,18 +2127,18 @@ orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); - jccb (Assembler::notZero, DONE_LABEL); + jccb_if_possible(Assembler::notZero, DONE_LABEL); movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); - jmpb (DONE_LABEL); + jmpb_if_possible(DONE_LABEL); } else { xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); - jccb (Assembler::notZero, DONE_LABEL); + jccb_if_possible(Assembler::notZero, DONE_LABEL); movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); - jccb (Assembler::notZero, CheckSucc); + jccb_if_possible(Assembler::notZero, CheckSucc); movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); - jmpb (DONE_LABEL); + jmpb_if_possible(DONE_LABEL); } // The Following code fragment (EmitSync & 65536) improves the performance of @@ -2198,11 +2210,11 @@ bind (LGoSlowPath); orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure - jmpb (DONE_LABEL); + jmpb_if_possible(DONE_LABEL); bind (LSuccess); xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success - jmpb (DONE_LABEL); + jmpb_if_possible(DONE_LABEL); } bind (Stacked); @@ -2231,12 +2243,12 @@ movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); xorptr(boxReg, r15_thread); orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); - jccb (Assembler::notZero, DONE_LABEL); + jccb_if_possible(Assembler::notZero, DONE_LABEL); movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); jccb (Assembler::notZero, CheckSucc); movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); - jmpb (DONE_LABEL); + jmpb_if_possible(DONE_LABEL); if ((EmitSync & 65536) == 0) { Label LSuccess, LGoSlowPath ; @@ -2263,11 +2275,11 @@ bind (LGoSlowPath); orl (boxReg, 1); // set ICC.ZF=0 to indicate failure - jmpb (DONE_LABEL); + jmpb_if_possible(DONE_LABEL); bind (LSuccess); testl (boxReg, 0); // set ICC.ZF=1 to indicate success - jmpb (DONE_LABEL); + jmpb_if_possible(DONE_LABEL); } bind (Stacked); @@ -3202,7 +3214,24 @@ jmp(done); } else { // Stack: X Y - Label x_negative, y_odd; + Label x_negative, y_not_2; + + static double two = 2.0; + ExternalAddress two_addr((address)&two); + + // constant maybe too far on 64 bit + lea(tmp2, two_addr); + fld_d(Address(tmp2, 0)); // Stack: 2 X Y + fcmp(tmp, 2, true, false); // Stack: X Y + jcc(Assembler::parity, y_not_2); + jcc(Assembler::notEqual, y_not_2); + + fxch(); fpop(); // Stack: X + fmul(0); // Stack: X*X + + jmp(done); + + bind(y_not_2); fldz(); // Stack: 0 X Y fcmp(tmp, 1, true, false); // Stack: X Y @@ -3844,6 +3873,92 @@ movl(as_Address(ArrayAddress(page, index)), tmp); } +// Special Shenandoah CAS implementation that handles false negatives +// due to concurrent evacuation. +void MacroAssembler::cmpxchg_oop_shenandoah(Register res, Address addr, Register oldval, Register newval, + bool exchange, + Register tmp1, Register tmp2) { + assert (UseShenandoahGC, "Should only be used with Shenandoah"); + assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); + + Label retry, done; + + // Remember oldval for retry logic below + if (UseCompressedOops) { + movl(tmp1, oldval); + } else { + movptr(tmp1, oldval); + } + + // Step 1. Try to CAS with given arguments. If successful, then we are done, + // and can safely return. + if (os::is_MP()) lock(); + if (UseCompressedOops) { + cmpxchgl(newval, addr); + } else { + cmpxchgptr(newval, addr); + } + jcc(Assembler::equal, done, true); + + // Step 2. CAS had failed. This may be a false negative. + // + // The trouble comes when we compare the to-space pointer with the from-space + // pointer to the same object. To resolve this, it will suffice to read both + // oldval and the value from memory through the read barriers -- this will give + // both to-space pointers. If they mismatch, then it was a legitimate failure. + // + if (UseCompressedOops) { + decode_heap_oop(tmp1); + } + oopDesc::bs()->interpreter_read_barrier(this, tmp1); + + if (UseCompressedOops) { + movl(tmp2, oldval); + decode_heap_oop(tmp2); + } else { + movptr(tmp2, oldval); + } + oopDesc::bs()->interpreter_read_barrier(this, tmp2); + + cmpptr(tmp1, tmp2); + jcc(Assembler::notEqual, done, true); + + // Step 3. Try to CAS again with resolved to-space pointers. + // + // Corner case: it may happen that somebody stored the from-space pointer + // to memory while we were preparing for retry. Therefore, we can fail again + // on retry, and so need to do this in loop, always re-reading the failure + // witness through the read barrier. + bind(retry); + if (os::is_MP()) lock(); + if (UseCompressedOops) { + cmpxchgl(newval, addr); + } else { + cmpxchgptr(newval, addr); + } + jcc(Assembler::equal, done, true); + + if (UseCompressedOops) { + movl(tmp2, oldval); + decode_heap_oop(tmp2); + } else { + movptr(tmp2, oldval); + } + oopDesc::bs()->interpreter_read_barrier(this, tmp2); + + cmpptr(tmp1, tmp2); + jcc(Assembler::equal, retry, true); + + // Step 4. If we need a boolean result out of CAS, check the flag again, + // and promote the result. Note that we handle the flag from both the CAS + // itself and from the retry loop. + bind(done); + if (!exchange) { + setb(Assembler::equal, res); + movzbl(res, res); + } +} + // Calls to C land // // When entering C land, the rbp, & rsp of the last Java frame have to be recorded @@ -4323,6 +4438,34 @@ bind(done); } +void MacroAssembler::shenandoah_write_barrier(Register dst) { + assert(UseShenandoahGC, "must only be called with Shenandoah GC active"); + + Label done; + + // Check for evacuation-in-progress + Address evacuation_in_progress = Address(r15_thread, in_bytes(JavaThread::evacuation_in_progress_offset())); + cmpb(evacuation_in_progress, 0); + + // The read-barrier. + movptr(dst, Address(dst, BrooksPointer::byte_offset())); + + jccb(Assembler::equal, done); + + if (dst != rax) { + xchgptr(dst, rax); // Move obj into rax and save rax into obj. + } + + assert(StubRoutines::x86::shenandoah_wb() != NULL, "need write barrier stub"); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::shenandoah_wb()))); + + if (dst != rax) { + xchgptr(rax, dst); // Swap back obj with rax. + } + + bind(done); +} + #endif // INCLUDE_ALL_GCS ////////////////////////////////////////////////////////////////////////////////// @@ -5203,6 +5346,169 @@ } +void MacroAssembler::in_heap_check(Register raddr, Register tmp, Label& done) { + ShenandoahHeap *h = (ShenandoahHeap *)Universe::heap(); + + HeapWord* first_region_bottom = h->first_region_bottom(); + HeapWord* last_region_end = first_region_bottom + (ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize) * h->max_regions(); + guarantee(first_region_bottom < last_region_end, err_msg("sanity: %p < %p", first_region_bottom, last_region_end)); + movptr(tmp, (intptr_t) first_region_bottom); + cmpptr(raddr, tmp); + jcc(Assembler::below, done); + movptr(tmp, (intptr_t) last_region_end); + cmpptr(raddr, tmp); + jcc(Assembler::aboveEqual, done); + +} + +void MacroAssembler::shenandoah_cset_check(Register raddr, Register tmp1, Register tmp2, Label& done) { + // Test that oop is not in to-space. + movptr(tmp1, raddr); + shrptr(tmp1, ShenandoahHeapRegion::RegionSizeShift); + movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); + movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); + testbool(tmp2); + jcc(Assembler::zero, done); + + // Check for cancelled GC. + movptr(tmp2, (intptr_t) ShenandoahHeap::cancelled_concgc_addr()); + movbool(tmp2, Address(tmp2, 0)); + testbool(tmp2); + jcc(Assembler::notZero, done); + +} + +void MacroAssembler::_shenandoah_store_addr_check(Address addr, const char* msg, const char* file, int line) { + _shenandoah_store_addr_check(addr.base(), msg, file, line); +} + +void MacroAssembler::_shenandoah_store_addr_check(Register dst, const char* msg, const char* file, int line) { + if (! UseShenandoahGC || ! ShenandoahStoreCheck) return; + if (dst == rsp) return; // Stack-based target + + Register raddr = r9; + Register tmp1 = r10; + Register tmp2 = r11; + + Label done; + + pushf(); + push(raddr); + push(tmp1); + push(tmp2); + + movptr(raddr, dst); + + // Check null. + testptr(raddr, raddr); + jcc(Assembler::zero, done); + + in_heap_check(raddr, tmp1, done); + shenandoah_cset_check(raddr, tmp1, tmp2, done); + + // Fail. + pop(tmp2); + pop(tmp1); + pop(raddr); + popf(); + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("shenandoah_store_check: %s in file: %s line: %i", msg, file, line); + b = code_string(ss.as_string()); + } + stop(b); + + bind(done); + + pop(tmp2); + pop(tmp1); + pop(raddr); + popf(); +} + +void MacroAssembler::_shenandoah_store_check(Register dst, Register value, const char* msg, const char* file, int line) { + if (! UseShenandoahGC || ! ShenandoahStoreCheck) return; + if (dst == rsp) return; // Stack-based target + + Register raddr = r8; + Register rval = r9; + Register tmp1 = r10; + Register tmp2 = r11; + + // Push tmp regs and flags. + pushf(); + push(raddr); + push(rval); + push(tmp1); + push(tmp2); + + movptr(raddr, dst); + movptr(rval, value); + + Label done; + + // If not in-heap target, skip check. + in_heap_check(raddr, tmp1, done); + + // Test that target oop is not in to-space. + shenandoah_cset_check(raddr, tmp1, tmp2, done); + + // Do value-check only when concurrent mark is in progress. + movptr(tmp1, (intptr_t) ShenandoahHeap::concurrent_mark_in_progress_addr()); + movbool(tmp1, Address(tmp1, 0)); + testbool(tmp1); + jcc(Assembler::zero, done); + + // Null-check value. + testptr(rval, rval); + jcc(Assembler::zero, done); + + // Test that value oop is not in to-space. + shenandoah_cset_check(rval, tmp1, tmp2, done); + + // Failure. + // Pop tmp regs and flags. + pop(tmp2); + pop(tmp1); + pop(rval); + pop(raddr); + popf(); + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("shenandoah_store_check: %s in file: %s line: %i", msg, file, line); + b = code_string(ss.as_string()); + } + stop(b); + + bind(done); + + // Pop tmp regs and flags. + pop(tmp2); + pop(tmp1); + pop(rval); + pop(raddr); + popf(); +} + +void MacroAssembler::_shenandoah_store_check(Address addr, Register value, const char* msg, const char* file, int line) { + _shenandoah_store_check(addr.base(), value, msg, file, line); +} + +void MacroAssembler::_shenandoah_lock_check(Register dst, const char* msg, const char* file, int line) { +#ifdef ASSERT + if (! UseShenandoahGC || ! ShenandoahStoreCheck) return; + + push(r8); + movptr(r8, Address(dst, BasicObjectLock::obj_offset_in_bytes())); + _shenandoah_store_addr_check(r8, msg, file, line); + pop(r8); +#endif +} + RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset) { diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/macroAssembler_x86.hpp --- a/src/cpu/x86/vm/macroAssembler_x86.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -313,6 +313,8 @@ Register tmp, Register tmp2); + void shenandoah_write_barrier(Register dst); + #endif // INCLUDE_ALL_GCS // split store_check(Register obj) to enhance instruction interleaving @@ -579,6 +581,20 @@ void verify_oop(Register reg, const char* s = "broken oop"); void verify_oop_addr(Address addr, const char * s = "broken oop addr"); + void in_heap_check(Register raddr, Register tmp, Label& done); + void shenandoah_cset_check(Register raddr, Register tmp1, Register tmp2, Label& done); + + void _shenandoah_store_addr_check(Register dst, const char* msg, const char* file, int line); + void _shenandoah_store_addr_check(Address dst, const char* msg, const char* file, int line); +#define shenandoah_store_addr_check(reg) _shenandoah_store_addr_check(reg, "oop not safe for writing", __FILE__, __LINE__) + + void _shenandoah_store_check(Address addr, Register value, const char* msg, const char* file, int line); + void _shenandoah_store_check(Register addr, Register value, const char* msg, const char* file, int line); +#define shenandoah_store_check(addr, value) _shenandoah_store_check(addr, value, "oop not safe for writing", __FILE__, __LINE__) + + void _shenandoah_lock_check(Register dst, const char* msg, const char* file, int line); +#define shenandoah_lock_check(reg) _shenandoah_lock_check(reg, "lock/oop not safe for writing", __FILE__, __LINE__) + // TODO: verify method and klass metadata (compare against vptr?) void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} @@ -748,6 +764,12 @@ void cmpxchgptr(Register reg, Address adr); + // Special Shenandoah CAS implementation that handles false negatives + // due to concurrent evacuation. + void cmpxchg_oop_shenandoah(Register res, Address addr, Register oldval, Register newval, + bool exchange, + Register tmp1, Register tmp2); + void locked_cmpxchgptr(Register reg, AddressLiteral adr); diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -696,6 +696,10 @@ range_check(masm, rax, r11, StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), L_ok); + if (StubRoutines::code3() != NULL) + range_check(masm, rax, r11, + StubRoutines::code3()->code_begin(), StubRoutines::code3()->code_end(), + L_ok); const char* msg = "i2c adapter must return to an interpreter frame"; __ block_comment(msg); __ stop(msg); @@ -2453,6 +2457,7 @@ Label done; + __ shenandoah_store_addr_check(obj_reg); if (UseBiasedLocking) { __ biased_locking_exit(obj_reg, old_hdr, done); } diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/shenandoahBarrierSet_x86.cpp --- a/src/cpu/x86/vm/shenandoahBarrierSet_x86.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/shenandoahBarrierSet_x86.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -182,7 +182,7 @@ compile_resolve_oop_runtime(masm, dst); return; } - __ movptr(dst, Address(dst, BrooksPointer::BYTE_OFFSET)); + __ movptr(dst, Address(dst, BrooksPointer::byte_offset())); } } @@ -192,85 +192,7 @@ return interpreter_read_barrier(masm, dst); } - assert(dst != rscratch1, "different regs"); - //assert(dst != rscratch2, "Need rscratch2"); - - Label done; - - Address evacuation_in_progress = Address(r15_thread, in_bytes(JavaThread::evacuation_in_progress_offset())); - - __ cmpb(evacuation_in_progress, 0); - - // Now check if evacuation is in progress. - interpreter_read_barrier_not_null(masm, dst); - - __ jcc(Assembler::equal, done); - __ push(rscratch1); - __ push(rscratch2); - - __ movptr(rscratch1, dst); - __ shrptr(rscratch1, ShenandoahHeapRegion::RegionSizeShift); - __ movptr(rscratch2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); - __ movbool(rscratch2, Address(rscratch2, rscratch1, Address::times_1)); - __ testb(rscratch2, 0x1); - - __ pop(rscratch2); - __ pop(rscratch1); - - __ jcc(Assembler::zero, done); - - __ push(rscratch1); - - // Save possibly live regs. - if (dst != rax) { - __ push(rax); - } - if (dst != rbx) { - __ push(rbx); - } - if (dst != rcx) { - __ push(rcx); - } - if (dst != rdx) { - __ push(rdx); - } - if (dst != c_rarg1) { - __ push(c_rarg1); - } - - __ subptr(rsp, 2 * wordSize); - __ movdbl(Address(rsp, 0), xmm0); - - // Call into runtime - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahBarrierSet::write_barrier_interp), dst); - __ mov(rscratch1, rax); - - // Restore possibly live regs. - __ movdbl(xmm0, Address(rsp, 0)); - __ addptr(rsp, 2 * Interpreter::stackElementSize); - - if (dst != c_rarg1) { - __ pop(c_rarg1); - } - if (dst != rdx) { - __ pop(rdx); - } - if (dst != rcx) { - __ pop(rcx); - } - if (dst != rbx) { - __ pop(rbx); - } - if (dst != rax) { - __ pop(rax); - } - - // Move result into dst reg. - __ mov(dst, rscratch1); - - __ pop(rscratch1); - - __ bind(done); + __ shenandoah_write_barrier(dst); } void ShenandoahBarrierSet::asm_acmp_barrier(MacroAssembler* masm, Register op1, Register op2) { @@ -283,7 +205,7 @@ } void ShenandoahHeap::compile_prepare_oop(MacroAssembler* masm, Register obj) { - __ incrementq(obj, BrooksPointer::BROOKS_POINTER_OBJ_SIZE * HeapWordSize); - __ movptr(Address(obj, -1 * HeapWordSize), obj); + __ incrementq(obj, BrooksPointer::byte_size()); + __ movptr(Address(obj, BrooksPointer::byte_offset()), obj); } #endif diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -756,7 +756,7 @@ return start; } - address generate_shenandoah_wb() { + address generate_shenandoah_wb(bool c_abi) { StubCodeMark mark(this, "StubRoutines", "shenandoah_wb"); address start = __ pc(); @@ -765,33 +765,43 @@ // We use RDI, which also serves as argument register for slow call. // RAX always holds the src object ptr, except after the slow call and // the cmpxchg, then it holds the result. - // RBX and RCX are used as temporary registers. - __ push(rdi); - __ push(rbx); + // R8 and RCX are used as temporary registers. + if (!c_abi) { + __ push(rdi); + __ push(r8); + } // Check for object beeing in the collection set. // TODO: Can we use only 1 register here? // The source object arrives here in rax. // live: rax // live: rdi - __ movptr(rdi, rax); + if (!c_abi) { + __ mov(rdi, rax); + } else { + __ mov(rax, rdi); + } __ shrptr(rdi, ShenandoahHeapRegion::RegionSizeShift); - // live: rbx - __ movptr(rbx, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); - __ movbool(rbx, Address(rbx, rdi, Address::times_1)); + // live: r8 + __ movptr(r8, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); + __ movbool(r8, Address(r8, rdi, Address::times_1)); // unlive: rdi - __ testbool(rbx); - // unlive: rbx + __ testbool(r8); + // unlive: r8 __ jccb(Assembler::notZero, not_done); - __ pop(rbx); - __ pop(rdi); + if (!c_abi) { + __ pop(r8); + __ pop(rdi); + } __ ret(0); __ bind(not_done); - __ push(rcx); - Register new_obj = rbx; + if (!c_abi) { + __ push(rcx); + } + Register new_obj = r8; __ movptr(new_obj, Address(r15_thread, JavaThread::gclab_top_offset())); __ testptr(new_obj, new_obj); __ jcc(Assembler::zero, slow_case); // No TLAB. @@ -806,7 +816,7 @@ __ jcc(Assembler::lessEqual, not_an_instance); // Thrashes rcx, returns size in rcx. Uses rax. __ bind(is_array); - // Size in rdi, new_obj in rbx, src obj in rax + // Size in rdi, new_obj in r8, src obj in rax Register new_obj_end = rdi; int oop_extra_words = Universe::heap()->oop_extra_words(); @@ -819,38 +829,50 @@ // Store Brooks pointer and adjust start of newobj. Universe::heap()->compile_prepare_oop(_masm, new_obj); - // Size in rcx, new_obj in rbx, src obj in rax + // Size in rcx, new_obj in r8, src obj in rax // Copy object. Label loop; - __ push(rdi); // Save new_obj_end - __ push(rsi); + if (!c_abi) { + __ push(rdi); // Save new_obj_end + __ push(rsi); + } else { + __ mov(r9, rdi); // Save new_obj_end + } __ shrl(rcx, 3); // Make it num-64-bit-words - __ mov(rdi, rbx); // Mov dst into rdi + __ mov(rdi, r8); // Mov dst into rdi __ mov(rsi, rax); // Src into rsi. __ rep_mov(); - __ pop(rsi); // Restore rsi. - __ pop(rdi); // Restore new_obj_end + if (!c_abi) { + __ pop(rsi); // Restore rsi. + __ pop(rdi); // Restore new_obj_end + } else { + __ mov(rdi, r9); // Restore new_obj_end + } // Src obj still in rax. if (os::is_MP()) { __ lock(); } - __ cmpxchgptr(new_obj, Address(rax, BrooksPointer::BYTE_OFFSET, Address::times_1)); + __ cmpxchgptr(new_obj, Address(rax, BrooksPointer::byte_offset(), Address::times_1)); __ jccb(Assembler::notEqual, done); // Failed. Updated object in rax. // Otherwise, we succeeded. __ mov(rax, new_obj); __ movptr(Address(r15_thread, JavaThread::gclab_top_offset()), new_obj_end); __ bind(done); - __ pop(rcx); - __ pop(rbx); - __ pop(rdi); + if (!c_abi) { + __ pop(rcx); + __ pop(r8); + __ pop(rdi); + } __ ret(0); __ bind(not_an_instance); - __ push(rdx); + if (!c_abi) { + __ push(rdx); + } // Layout_helper bits are in rcx __ movl(rdx, rcx); // Move layout_helper bits to rdx __ movl(rdi, Address(rax, arrayOopDesc::length_offset_in_bytes())); @@ -863,43 +885,48 @@ // Round up. __ addl(rdi, HeapWordSize-1); __ andl(rdi, -HeapWordSize); - __ pop(rdx); + if (!c_abi) { + __ pop(rdx); + } // Move size (rdi) into rcx __ movl(rcx, rdi); __ jmp(is_array); __ bind(slow_case); - __ push(rdx); - __ push(rdi); - __ push(rsi); - __ push(r8); - __ push(r9); - __ push(r10); - __ push(r11); - __ push(r12); - __ push(r13); - __ push(r14); - __ push(r15); + if (!c_abi) { + __ push(rdx); + __ push(rdi); + __ push(rsi); + __ push(r8); + __ push(r9); + __ push(r10); + __ push(r11); + __ push(r12); + __ push(r13); + __ push(r14); + __ push(r15); + } __ save_vector_registers(); __ movptr(rdi, rax); __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahBarrierSet::write_barrier_c2), rdi); __ restore_vector_registers(); - __ pop(r15); - __ pop(r14); - __ pop(r13); - __ pop(r12); - __ pop(r11); - __ pop(r10); - __ pop(r9); - __ pop(r8); - __ pop(rsi); - __ pop(rdi); - __ pop(rdx); - - __ pop(rcx); - __ pop(rbx); - __ pop(rdi); - + if (!c_abi) { + __ pop(r15); + __ pop(r14); + __ pop(r13); + __ pop(r12); + __ pop(r11); + __ pop(r10); + __ pop(r9); + __ pop(r8); + __ pop(rsi); + __ pop(rdi); + __ pop(rdx); + + __ pop(rcx); + __ pop(r8); + __ pop(rdi); + } __ ret(0); return start; @@ -4204,9 +4231,6 @@ throw_NullPointerException_at_call)); // entry points that are platform specific - if (UseShenandoahGC) { - StubRoutines::x86::_shenandoah_wb = generate_shenandoah_wb(); - } StubRoutines::x86::_f2i_fixup = generate_f2i_fixup(); StubRoutines::x86::_f2l_fixup = generate_f2l_fixup(); StubRoutines::x86::_d2i_fixup = generate_d2i_fixup(); @@ -4266,16 +4290,27 @@ #endif // COMPILER2 } + void generate_barriers() { + if (UseShenandoahGC) { + StubRoutines::x86::_shenandoah_wb = generate_shenandoah_wb(false); + StubRoutines::_shenandoah_wb_C = generate_shenandoah_wb(true); + } + } + public: - StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { - if (all) { + StubGenerator(CodeBuffer* code, int phase) : StubCodeGenerator(code) { + if (phase == 2) { generate_all(); + } else if (phase == 1) { + generate_initial(); + } else if (phase == 3) { + generate_barriers(); } else { - generate_initial(); + ShouldNotReachHere(); } } }; // end class declaration -void StubGenerator_generate(CodeBuffer* code, bool all) { - StubGenerator g(code, all); +void StubGenerator_generate(CodeBuffer* code, int phase) { + StubGenerator g(code, phase); } diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/stubRoutines_x86_64.hpp --- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -33,7 +33,8 @@ enum platform_dependent_constants { code_size1 = 19000, // simply increase if too small (assembler will crash if too small) - code_size2 = 23000 // simply increase if too small (assembler will crash if too small) + code_size2 = 23000, // simply increase if too small (assembler will crash if too small) + code_size3 = 2000 // simply increase if too small (assembler will crash if too small) }; class x86 { diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -549,6 +549,7 @@ __ subptr(rsp, entry_size); // add space for a monitor entry __ movptr(monitor_block_top, rsp); // set new monitor block top // store object + __ shenandoah_store_addr_check(rax); __ movptr(Address(rsp, BasicObjectLock::obj_offset_in_bytes()), rax); __ movptr(c_rarg1, rsp); // object address __ lock_object(c_rarg1); @@ -1360,6 +1361,7 @@ __ lea(c_rarg1, monitor); // address of first monitor __ movptr(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ shenandoah_store_addr_check(t); // Invariant __ testptr(t, t); __ jcc(Assembler::notZero, unlock); diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/templateTable_x86_64.cpp --- a/src/cpu/x86/vm/templateTable_x86_64.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -3727,11 +3727,7 @@ // starting with top-most entry __ lea(c_rarg2, monitor_block_bot); // points to word before bottom // of monitor block - if (UseShenandoahGC && ShenandoahVerifyReadsToFromSpace) { - __ jmp(entry); - } else { - __ jmpb(entry); - } + __ jmpb_if_possible(entry); __ bind(loop); // check if current entry is used @@ -3739,9 +3735,8 @@ // if not used then remember entry in c_rarg1 __ cmov(Assembler::equal, c_rarg1, c_rarg3); // check if current entry is for same object - __ movptr(rscratch1, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); - oopDesc::bs()->interpreter_read_barrier(_masm, rscratch1); - __ cmpptr(rax, rscratch1); + __ shenandoah_lock_check(c_rarg3); // Invariant + __ cmpptr(rax, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); // if same object then stop searching __ jccb(Assembler::equal, exit); // otherwise advance to next entry @@ -3790,6 +3785,7 @@ __ increment(r13); // store object + __ shenandoah_store_addr_check(rax); // Invariant __ movptr(Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()), rax); __ lock_object(c_rarg1); @@ -3829,17 +3825,12 @@ // starting with top-most entry __ lea(c_rarg2, monitor_block_bot); // points to word before bottom // of monitor block - if (UseShenandoahGC && ShenandoahVerifyReadsToFromSpace) { - __ jmp(entry); - } else { - __ jmpb(entry); - } + __ jmpb_if_possible(entry); __ bind(loop); // check if current entry is for same object - __ movptr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); - oopDesc::bs()->interpreter_read_barrier(_masm, rscratch1); - __ cmpptr(rax, rscratch1); + __ shenandoah_lock_check(c_rarg1); // Invariant + __ cmpptr(rax, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); // if same object then stop searching __ jcc(Assembler::equal, found); // otherwise advance to next entry diff -r b1cf900aa021 -r 87059e2365be src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Fri Nov 04 07:21:01 2016 -0400 +++ b/src/cpu/x86/vm/x86_64.ad Wed Dec 07 21:03:02 2016 +0100 @@ -2685,6 +2685,15 @@ RELOC_DISP32); %} + enc_class shenandoah_store_check(memory mem, any_RegP src) %{ + MacroAssembler _masm(&cbuf); + __ shenandoah_store_check($mem$$Address, $src$$Register); + %} + + enc_class shenandoah_store_addr_check(memory mem) %{ + MacroAssembler _masm(&cbuf); + __ shenandoah_store_addr_check($mem$$Address); + %} %} @@ -5636,7 +5645,7 @@ ins_cost(125); // XXX format %{ "movb $mem, $src\t# byte" %} opcode(0x88); - ins_encode(REX_breg_mem(src, mem), OpcP, reg_mem(src, mem)); + ins_encode(shenandoah_store_addr_check(mem), REX_breg_mem(src, mem), OpcP, reg_mem(src, mem)); ins_pipe(ialu_mem_reg); %} @@ -5648,7 +5657,7 @@ ins_cost(125); // XXX format %{ "movw $mem, $src\t# char/short" %} opcode(0x89); - ins_encode(SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem)); + ins_encode(shenandoah_store_addr_check(mem), SizePrefix, REX_reg_mem(src, mem), OpcP, reg_mem(src, mem)); ins_pipe(ialu_mem_reg); %} @@ -5660,7 +5669,7 @@ ins_cost(125); // XXX format %{ "movl $mem, $src\t# int" %} opcode(0x89); - ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem)); + ins_encode(shenandoah_store_addr_check(mem), REX_reg_mem(src, mem), OpcP, reg_mem(src, mem)); ins_pipe(ialu_mem_reg); %} @@ -5672,7 +5681,7 @@ ins_cost(125); // XXX format %{ "movq $mem, $src\t# long" %} opcode(0x89); - ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem)); + ins_encode(shenandoah_store_addr_check(mem), REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem)); ins_pipe(ialu_mem_reg); // XXX %} @@ -5684,7 +5693,7 @@ ins_cost(125); // XXX format %{ "movq $mem, $src\t# ptr" %} opcode(0x89); - ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem)); + ins_encode(shenandoah_store_check(mem, src), REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem)); ins_pipe(ialu_mem_reg); %} @@ -5696,6 +5705,7 @@ ins_cost(125); // XXX format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movq($mem$$Address, r12); %} ins_pipe(ialu_mem_reg); @@ -5709,7 +5719,7 @@ ins_cost(150); // XXX format %{ "movq $mem, $src\t# ptr" %} opcode(0xC7); /* C7 /0 */ - ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); + ins_encode(shenandoah_store_addr_check(mem), REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); ins_pipe(ialu_mem_imm); %} @@ -5721,6 +5731,7 @@ ins_cost(125); // XXX format %{ "movl $mem, $src\t# compressed ptr" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movl($mem$$Address, $src$$Register); %} ins_pipe(ialu_mem_reg); @@ -5733,6 +5744,7 @@ ins_cost(125); // XXX format %{ "movl $mem, $src\t# compressed klass ptr" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movl($mem$$Address, $src$$Register); %} ins_pipe(ialu_mem_reg); @@ -5746,6 +5758,7 @@ ins_cost(125); // XXX format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movl($mem$$Address, r12); %} ins_pipe(ialu_mem_reg); @@ -5759,6 +5772,7 @@ format %{ "movl $mem, $src\t# compressed ptr" %} ins_encode %{ address con = (address)$src$$constant; + __ shenandoah_store_addr_check($mem$$Address); if (con == NULL) { __ movl($mem$$Address, (int32_t)0); } else { @@ -5775,6 +5789,7 @@ ins_cost(150); // XXX format %{ "movl $mem, $src\t# compressed klass ptr" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant); %} ins_pipe(ialu_mem_imm); @@ -5789,6 +5804,7 @@ ins_cost(125); // XXX format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movl($mem$$Address, r12); %} ins_pipe(ialu_mem_reg); @@ -5801,7 +5817,7 @@ ins_cost(150); format %{ "movl $mem, $src\t# int" %} opcode(0xC7); /* C7 /0 */ - ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); + ins_encode(shenandoah_store_addr_check(mem), REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); ins_pipe(ialu_mem_imm); %} @@ -5814,6 +5830,7 @@ ins_cost(125); // XXX format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movq($mem$$Address, r12); %} ins_pipe(ialu_mem_reg); @@ -5826,7 +5843,7 @@ ins_cost(150); format %{ "movq $mem, $src\t# long" %} opcode(0xC7); /* C7 /0 */ - ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); + ins_encode(shenandoah_store_addr_check(mem), REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); ins_pipe(ialu_mem_imm); %} @@ -5839,6 +5856,7 @@ ins_cost(125); // XXX format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movw($mem$$Address, r12); %} ins_pipe(ialu_mem_reg); @@ -5852,7 +5870,7 @@ ins_cost(150); format %{ "movw $mem, $src\t# short/char" %} opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ - ins_encode(SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src)); + ins_encode(shenandoah_store_addr_check(mem), SizePrefix, REX_mem(mem), OpcP, RM_opc_mem(0x00, mem),Con16(src)); ins_pipe(ialu_mem_imm); %} @@ -5865,6 +5883,7 @@ ins_cost(125); // XXX format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movb($mem$$Address, r12); %} ins_pipe(ialu_mem_reg); @@ -5877,7 +5896,7 @@ ins_cost(150); // XXX format %{ "movb $mem, $src\t# byte" %} opcode(0xC6); /* C6 /0 */ - ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src)); + ins_encode(shenandoah_store_addr_check(mem), REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src)); ins_pipe(ialu_mem_imm); %} @@ -5914,6 +5933,7 @@ ins_cost(95); // XXX format %{ "movss $mem, $src\t# float" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movflt($mem$$Address, $src$$XMMRegister); %} ins_pipe(pipe_slow); // XXX @@ -5928,6 +5948,7 @@ ins_cost(25); // XXX format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movl($mem$$Address, r12); %} ins_pipe(ialu_mem_reg); @@ -5940,7 +5961,7 @@ ins_cost(50); format %{ "movl $mem, $src\t# float" %} opcode(0xC7); /* C7 /0 */ - ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src)); + ins_encode(shenandoah_store_addr_check(mem), REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src)); ins_pipe(ialu_mem_imm); %} @@ -5952,6 +5973,7 @@ ins_cost(95); // XXX format %{ "movsd $mem, $src\t# double" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movdbl($mem$$Address, $src$$XMMRegister); %} ins_pipe(pipe_slow); // XXX @@ -5966,7 +5988,7 @@ ins_cost(50); format %{ "movq $mem, $src\t# double 0." %} opcode(0xC7); /* C7 /0 */ - ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src)); + ins_encode(shenandoah_store_addr_check(mem), REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32F_as_bits(src)); ins_pipe(ialu_mem_imm); %} @@ -5978,6 +6000,7 @@ ins_cost(25); // XXX format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %} ins_encode %{ + __ shenandoah_store_addr_check($mem$$Address); __ movq($mem$$Address, r12); %} ins_pipe(ialu_mem_reg); @@ -6427,11 +6450,39 @@ match(Set dst (ShenandoahReadBarrier src)); effect(DEF dst, USE src); ins_cost(125); // XXX - format %{ "shenandoah_rb $dst,$src" %} - ins_encode %{ + format %{ "shenandoah_rb $dst, $src" %} + ins_encode %{ + Register d = $dst$$Register; Register s = $src$$Register; + __ movptr(d, Address(s, BrooksPointer::byte_offset())); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct shenandoahRBNarrow(rRegP dst, rRegN src) %{ + predicate(UseCompressedOops && (Universe::narrow_oop_shift() == 0)); + match(Set dst (ShenandoahReadBarrier (DecodeN src))); + effect(DEF dst, USE src); + ins_cost(125); // XXX + format %{ "shenandoah_rb $dst, $src" %} + ins_encode %{ Register d = $dst$$Register; - __ movptr(d, Address(s, -8)); + Register s = $src$$Register; + __ movptr(d, Address(r12, s, Address::times_1, BrooksPointer::byte_offset())); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct shenandoahRBNarrowShift(rRegP dst, rRegN src) %{ + predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8)); + match(Set dst (ShenandoahReadBarrier (DecodeN src))); + effect(DEF dst, USE src); + ins_cost(125); // XXX + format %{ "shenandoah_rb $dst, $src" %} + ins_encode %{ + Register d = $dst$$Register; + Register s = $src$$Register; + __ movptr(d, Address(r12, s, Address::times_8, BrooksPointer::byte_offset())); %} ins_pipe(ialu_reg_mem); %} @@ -6442,22 +6493,12 @@ ins_cost(300); // XXX format %{ "shenandoah_wb $dst,$src" %} ins_encode %{ - Label done; Register s = $src$$Register; Register d = $dst$$Register; - Address evacuation_in_progress = Address(r15_thread, in_bytes(JavaThread::evacuation_in_progress_offset())); - __ movptr(d, Address(s, -8)); - __ cmpb(evacuation_in_progress, 0); - __ movptr(d, Address(s, -8)); - __ jccb(Assembler::equal, done); - if (rax != d) { - __ xchgptr(rax, d); - } - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::shenandoah_wb()))); - if (rax != d) { - __ xchgptr(rax, d); - } - __ bind(done); + // We need that first read barrier in order to trigger a SEGV/NPE on incoming NULL. + // Also, it brings s into d in preparation for the call to shenandoah_write_barrier(). + __ movptr(d, Address(s, BrooksPointer::byte_offset())); + __ shenandoah_write_barrier(d); %} ins_pipe(pipe_slow); %} @@ -7341,39 +7382,13 @@ effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval); ins_cost(1000); - format %{ "movl $res, 1\n\t" - "mov $tmp1, $oldval\n\t" - "retry:\n\t" - "cmpxchgq $mem_ptr,$newval\t# " - "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" - "read_barrier $tmp1\n\t" - "mov $tmp2, $oldval\n\t" - "read_barrier $tmp2\n\t" - "cmpptr $tmp1, $tmp2\n\t" - "je retry\n\t" - "done:\n\t" - "sete $res\n\t" - "movzbl $res, $res" %} - ins_encode %{ - Label retry, done; - - __ mov($tmp1$$Register, $oldval$$Register); - __ bind(retry); - if (os::is_MP()) { - __ lock(); - } - __ cmpxchgptr($newval$$Register, $mem_ptr$$Address); - __ jccb(Assembler::equal, done); - - oopDesc::bs()->interpreter_read_barrier(&_masm, $tmp1$$Register); - __ mov($tmp2$$Register, $oldval$$Register); - oopDesc::bs()->interpreter_read_barrier(&_masm, $tmp2$$Register); - __ cmpptr($tmp1$$Register, $tmp2$$Register); - __ jccb(Assembler::equal, retry); - - __ bind(done); - __ setb(Assembler::zero, $res$$Register); - __ movzbl($res$$Register, $res$$Register); + format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} + + ins_encode %{ + __ cmpxchg_oop_shenandoah($res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, + false, // swap + $tmp1$$Register, $tmp2$$Register + ); %} ins_pipe( pipe_cmpxchg ); %} @@ -7430,6 +7445,7 @@ memory mem_ptr, rax_RegN oldval, rRegN newval, rFlagsReg cr) %{ + predicate(!UseShenandoahGC); match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); @@ -7448,6 +7464,27 @@ ins_pipe( pipe_cmpxchg ); %} +instruct compareAndSwapN_shenandoah(rRegI res, + memory mem_ptr, + rRegP tmp1, rRegP tmp2, + rax_RegN oldval, rRegN newval, + rFlagsReg cr) %{ + predicate(UseShenandoahGC); + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval); + ins_cost(1000); + + format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} + + ins_encode %{ + __ cmpxchg_oop_shenandoah($res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, + false, // swap + $tmp1$$Register, $tmp2$$Register + ); + %} + ins_pipe( pipe_cmpxchg ); +%} + instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddI mem add)); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/asm/assembler.cpp --- a/src/share/vm/asm/assembler.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/asm/assembler.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -308,7 +308,8 @@ // the 'offset' is equal to [heap_base + offset] for // narrow oop implicit null checks. uintptr_t base = (uintptr_t)Universe::narrow_oop_base(); - if ((uintptr_t)offset >= base) { + int adj = MIN2(0, UseShenandoahGC ? BrooksPointer::byte_offset() : 0); + if ((uintptr_t)(offset - adj) >= base) { // Normalize offset for the next check. offset = (intptr_t)(pointer_delta((void*)offset, (void*)base, 1)); } @@ -324,7 +325,7 @@ if (UseShenandoahGC && ((offset & address_bits) - == (BrooksPointer::BYTE_OFFSET & address_bits))) + == (BrooksPointer::byte_offset() & address_bits))) return false; return offset < 0 || os::vm_page_size() <= offset; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1848,7 +1848,7 @@ __ cmp(lir_cond_equal, result, LIR_OprFact::oopConst(NULL)); __ branch(lir_cond_equal, T_LONG, done->label()); } - LIR_Address* brooks_ptr_address = generate_address(result, BrooksPointer::BYTE_OFFSET, T_ADDRESS); + LIR_Address* brooks_ptr_address = generate_address(result, BrooksPointer::byte_offset(), T_ADDRESS); __ load(brooks_ptr_address, result, info ? new CodeEmitInfo(info) : NULL, lir_patch_none); __ branch_destination(done->label()); @@ -3084,9 +3084,9 @@ // Code for : x->x() {x->cond()} x->y() ? x->tval() : x->fval() void LIRGenerator::do_IfOp(IfOp* x) { - ValueTag xtag = x->x()->type()->tag(); #ifdef ASSERT { + ValueTag xtag = x->x()->type()->tag(); ValueTag ttag = x->tval()->type()->tag(); assert(xtag == intTag || xtag == objectTag, "cannot handle others"); assert(ttag == addressTag || ttag == intTag || ttag == objectTag || ttag == longTag, "cannot handle others"); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/c1/c1_Runtime1.cpp --- a/src/share/vm/c1/c1_Runtime1.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/c1/c1_Runtime1.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -38,7 +38,6 @@ #include "code/scopeDesc.hpp" #include "code/vtableStubs.hpp" #include "compiler/disassembler.hpp" -#include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" #include "gc_interface/collectedHeap.hpp" #include "interpreter/bytecode.hpp" #include "interpreter/interpreter.hpp" @@ -217,7 +216,6 @@ switch (id) { // These stubs don't need to have an oopmap case dtrace_object_alloc_id: - case shenandoah_write_barrier_slow_id: case g1_pre_barrier_slow_id: case g1_post_barrier_slow_id: case slow_subtype_check_id: @@ -330,7 +328,6 @@ FUNCTION_CASE(entry, TRACE_TIME_METHOD); #endif FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32()); - FUNCTION_CASE(entry, ShenandoahBarrierSet::write_barrier_c1); #undef FUNCTION_CASE @@ -688,7 +685,7 @@ if (PrintBiasedLockingStatistics) { Atomic::inc(BiasedLocking::slow_path_entry_count_addr()); } - Handle h_obj(thread, oopDesc::bs()->write_barrier(obj)); + Handle h_obj(thread, obj); assert(h_obj()->is_oop(), "must be NULL or an object"); if (UseBiasedLocking) { // Retry fast entry if bias is revoked to avoid unnecessary inflation @@ -713,7 +710,7 @@ // monitorexit is non-blocking (leaf routine) => no exceptions can be thrown EXCEPTION_MARK; - oop obj = oopDesc::bs()->write_barrier(lock->obj()); + oop obj = lock->obj(); assert(obj->is_oop(), "must be NULL or an object"); if (UseFastLocking) { // When using fast locking, the compiled code has already tried the fast case diff -r b1cf900aa021 -r 87059e2365be src/share/vm/c1/c1_Runtime1.hpp --- a/src/share/vm/c1/c1_Runtime1.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/c1/c1_Runtime1.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -68,7 +68,6 @@ stub(load_klass_patching) \ stub(load_mirror_patching) \ stub(load_appendix_patching) \ - stub(shenandoah_write_barrier_slow)\ stub(g1_pre_barrier_slow) \ stub(g1_post_barrier_slow) \ stub(fpu2long_stub) \ diff -r b1cf900aa021 -r 87059e2365be src/share/vm/ci/ciInstanceKlass.cpp --- a/src/share/vm/ci/ciInstanceKlass.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/ci/ciInstanceKlass.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -177,12 +177,12 @@ // ciInstanceKlass* ciInstanceKlass::get_canonical_holder(int offset) { #ifdef ASSERT - if (!(offset >= 0 && offset < layout_helper() || (offset == BrooksPointer::BYTE_OFFSET && UseShenandoahGC))) { + if (!(offset >= 0 && offset < layout_helper() || (offset == BrooksPointer::byte_offset() && UseShenandoahGC))) { tty->print("*** get_canonical_holder(%d) on ", offset); this->print(); tty->print_cr(" ***"); }; - assert(offset >= 0 && offset < layout_helper() || (offset == BrooksPointer::BYTE_OFFSET && UseShenandoahGC), "offset must be tame"); + assert(offset >= 0 && offset < layout_helper() || (offset == BrooksPointer::byte_offset() && UseShenandoahGC), "offset must be tame"); #endif if (offset < instanceOopDesc::base_offset_in_bytes()) { diff -r b1cf900aa021 -r 87059e2365be src/share/vm/classfile/classLoaderData.cpp --- a/src/share/vm/classfile/classLoaderData.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/classfile/classLoaderData.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -78,6 +78,7 @@ // The null-class-loader should always be kept alive. _keep_alive(is_anonymous || h_class_loader.is_null()), _metaspace(NULL), _unloading(false), _klasses(NULL), + _parallel_cld_claimed(0), _claimed(0), _jmethod_ids(NULL), _handles(NULL), _deallocate_list(NULL), _next(NULL), _dependencies(dependencies), _metaspace_lock(new Mutex(Monitor::leaf+1, "Metaspace allocation lock", true)) { @@ -104,6 +105,11 @@ return (int) Atomic::cmpxchg(1, &_claimed, 0) == 0; } +bool ClassLoaderData::parallel_claim_cld() { + if (_parallel_cld_claimed == 1) return false; + return Atomic::cmpxchg(1, &_parallel_cld_claimed, 0) == 0; +} + void ClassLoaderData::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) { if (must_claim && !claim()) { return; @@ -230,7 +236,7 @@ // Have to lock and put the new dependency on the end of the dependency // array so the card mark for CMS sees that this dependency is new. // Can probably do this lock free with some effort. - ObjectLocker ol(Handle(THREAD, oopDesc::bs()->write_barrier(_list_head)), THREAD); + ObjectLocker ol(Handle(THREAD, _list_head), THREAD); oop loader_or_mirror = new_dependency->obj_at(0); @@ -953,4 +959,49 @@ event.commit(); } + +// Implemenation of ParallelCLDRootIterator +void ParallelCLDRootIterator::init(ClassLoaderData* head) { + assert(SafepointSynchronize::is_at_safepoint(), "Must at safepoint"); + _head = head; + _end = head; + _cur = head; + while (_end != NULL) { + _end->clear_parallel_cld_claimed(); + if (_end->next() == NULL) { + break; + } else { + _end = _end->next(); + } + } +} + +ClassLoaderData* ParallelCLDRootIterator::claim() { + ClassLoaderData* my_cur = _cur; + ClassLoaderData* next; + while (true) { + if (my_cur->parallel_claim_cld()) { + return my_cur; + } + + if (my_cur == _end) return NULL; + + next = my_cur->next(); + my_cur = (ClassLoaderData*)Atomic::cmpxchg_ptr(next, &_cur, my_cur); + } + +} + + +bool ParallelCLDRootIterator::root_cld_do(CLDClosure* strong, CLDClosure* weak) { + ClassLoaderData* cld = claim(); + if (cld == NULL) return false; + + CLDClosure* closure = cld->keep_alive() ? strong : weak; + if (closure != NULL) { + closure->do_cld(cld); + } + return true; +} + #endif // INCLUDE_TRACE diff -r b1cf900aa021 -r 87059e2365be src/share/vm/classfile/classLoaderData.hpp --- a/src/share/vm/classfile/classLoaderData.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/classfile/classLoaderData.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -29,6 +29,7 @@ #include "memory/memRegion.hpp" #include "memory/metaspace.hpp" #include "memory/metaspaceCounters.hpp" +#include "runtime/handles.hpp" #include "runtime/mutex.hpp" #include "utilities/growableArray.hpp" #include "utilities/macros.hpp" @@ -54,6 +55,27 @@ class JNIHandleBlock; class Metadebug; +class ParallelCLDRootIterator VALUE_OBJ_CLASS_SPEC { + friend class ClassLoaderDataGraph; + + private: + ClassLoaderData* _head; + ClassLoaderData* _end; + + ClassLoaderData* volatile _cur; + + public: + bool root_cld_do(CLDClosure* strong, CLDClosure* weak); + + + ParallelCLDRootIterator(ClassLoaderData* head) { + init(head); + } + + void init(ClassLoaderData* head); + ClassLoaderData* claim(); +}; + // GC root for walking class loader data created class ClassLoaderDataGraph : public AllStatic { @@ -86,6 +108,11 @@ static void roots_cld_do(CLDClosure* strong, CLDClosure* weak); static void keep_alive_cld_do(CLDClosure* cl); static void always_strong_cld_do(CLDClosure* cl); + + static ParallelCLDRootIterator parallel_cld_root_iterator() { + return ParallelCLDRootIterator(_head); + } + // klass do static void classes_do(KlassClosure* klass_closure); static void classes_do(void f(Klass* const)); @@ -150,6 +177,7 @@ friend class ClassLoaderDataGraphMetaspaceIterator; friend class MetaDataFactory; friend class Method; + friend class ParallelCLDRootIterator; static ClassLoaderData * _the_null_class_loader_data; @@ -183,6 +211,9 @@ // Support for walking class loader data objects ClassLoaderData* _next; /// Next loader_datas created + // Parallel class loader data support + volatile int _parallel_cld_claimed; + // ReadOnly and ReadWrite metaspaces (static because only on the null // class loader for now). static Metaspace* _ro_metaspace; @@ -206,6 +237,11 @@ bool claimed() const { return _claimed == 1; } bool claim(); + // Parallel GC support + void clear_parallel_cld_claimed() { _parallel_cld_claimed = 0; } + bool parallel_cld_claimed() const { return _parallel_cld_claimed == 1; } + bool parallel_claim_cld(); + void unload(); bool keep_alive() const { return _keep_alive; } void classes_do(void f(Klass*)); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/classfile/javaClasses.cpp --- a/src/share/vm/classfile/javaClasses.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/classfile/javaClasses.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -2904,14 +2904,14 @@ } bool java_lang_invoke_MethodType::equals(oop mt1, oop mt2) { - if (mt1 == mt2) + if (oopDesc::equals(mt1, mt2)) return true; - if (rtype(mt1) != rtype(mt2)) + if (! oopDesc::equals(rtype(mt1), rtype(mt2))) return false; if (ptype_count(mt1) != ptype_count(mt2)) return false; for (int i = ptype_count(mt1) - 1; i >= 0; i--) { - if (ptype(mt1, i) != ptype(mt2, i)) + if (! oopDesc::equals(ptype(mt1, i), ptype(mt2, i))) return false; } return true; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/classfile/systemDictionary.cpp --- a/src/share/vm/classfile/systemDictionary.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/classfile/systemDictionary.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1543,9 +1543,9 @@ Handle SystemDictionary::compute_loader_lock_object(Handle class_loader, TRAPS) { // If class_loader is NULL we synchronize on _system_loader_lock_obj if (class_loader.is_null()) { - return Handle(THREAD, oopDesc::bs()->write_barrier(_system_loader_lock_obj)); + return Handle(THREAD, _system_loader_lock_obj); } else { - return Handle(THREAD, oopDesc::bs()->write_barrier(class_loader())); + return class_loader; } } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/code/codeCache.cpp --- a/src/share/vm/code/codeCache.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/code/codeCache.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -338,7 +338,7 @@ void CodeCache::scavenge_root_nmethods_do(CodeBlobClosure* f) { assert_locked_or_safepoint(CodeCache_lock); - if (UseG1GC) { + if (UseG1GC || UseShenandoahGC) { return; } @@ -368,7 +368,7 @@ void CodeCache::add_scavenge_root_nmethod(nmethod* nm) { assert_locked_or_safepoint(CodeCache_lock); - if (UseG1GC) { + if (UseG1GC || UseShenandoahGC) { return; } @@ -381,7 +381,7 @@ void CodeCache::drop_scavenge_root_nmethod(nmethod* nm) { assert_locked_or_safepoint(CodeCache_lock); - if (UseG1GC) { + if (UseG1GC || UseShenandoahGC) { return; } @@ -407,7 +407,7 @@ void CodeCache::prune_scavenge_root_nmethods() { assert_locked_or_safepoint(CodeCache_lock); - if (UseG1GC) { + if (UseG1GC || UseShenandoahGC) { return; } @@ -443,7 +443,7 @@ #ifndef PRODUCT void CodeCache::asserted_non_scavengable_nmethods_do(CodeBlobClosure* f) { - if (UseG1GC) { + if (UseG1GC || UseShenandoahGC) { return; } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/code/nmethod.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -491,7 +491,7 @@ _oops_do_mark_link = NULL; _jmethod_id = NULL; _osr_link = NULL; - if (UseG1GC) { + if (UseG1GC || UseShenandoahGC) { _unloading_next = NULL; } else { _scavenge_root_link = NULL; @@ -1945,7 +1945,7 @@ void nmethod::mark_metadata_on_stack_non_relocs() { // Visit the metadata section for (Metadata** p = metadata_begin(); p < metadata_end(); p++) { - if (*p == Universe::non_oop_word() || *p == NULL) continue; // skip non-oops + if (*p == Universe::non_oop_word() || *p == NULL) continue; // skip non-oops Metadata* md = *p; Metadata::mark_on_stack(md); } @@ -2853,7 +2853,7 @@ }; void nmethod::verify_scavenge_root_oops() { - if (UseG1GC) { + if (UseG1GC || UseShenandoahGC) { return; } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -58,6 +58,7 @@ #include "gc_implementation/shared/gcTrace.hpp" #include "gc_implementation/shared/gcTraceTime.hpp" #include "gc_implementation/shared/isGCActiveMark.hpp" +#include "gc_implementation/shared/parallelCleaning.hpp" #include "memory/allocation.hpp" #include "memory/gcLocker.inline.hpp" #include "memory/generationSpec.hpp" @@ -4802,370 +4803,6 @@ } }; -class G1StringSymbolTableUnlinkTask : public AbstractGangTask { -private: - BoolObjectClosure* _is_alive; - int _initial_string_table_size; - int _initial_symbol_table_size; - - bool _process_strings; - int _strings_processed; - int _strings_removed; - - bool _process_symbols; - int _symbols_processed; - int _symbols_removed; - - bool _do_in_parallel; -public: - G1StringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) : - AbstractGangTask("String/Symbol Unlinking"), - _is_alive(is_alive), - _do_in_parallel(G1CollectedHeap::use_parallel_gc_threads()), - _process_strings(process_strings), _strings_processed(0), _strings_removed(0), - _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) { - - _initial_string_table_size = StringTable::the_table()->table_size(); - _initial_symbol_table_size = SymbolTable::the_table()->table_size(); - if (process_strings) { - StringTable::clear_parallel_claimed_index(); - } - if (process_symbols) { - SymbolTable::clear_parallel_claimed_index(); - } - } - - ~G1StringSymbolTableUnlinkTask() { - guarantee(!_process_strings || !_do_in_parallel || StringTable::parallel_claimed_index() >= _initial_string_table_size, - err_msg("claim value "INT32_FORMAT" after unlink less than initial string table size "INT32_FORMAT, - StringTable::parallel_claimed_index(), _initial_string_table_size)); - guarantee(!_process_symbols || !_do_in_parallel || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size, - err_msg("claim value "INT32_FORMAT" after unlink less than initial symbol table size "INT32_FORMAT, - SymbolTable::parallel_claimed_index(), _initial_symbol_table_size)); - - if (G1TraceStringSymbolTableScrubbing) { - gclog_or_tty->print_cr("Cleaned string and symbol table, " - "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, " - "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed", - strings_processed(), strings_removed(), - symbols_processed(), symbols_removed()); - } - } - - void work(uint worker_id) { - if (_do_in_parallel) { - int strings_processed = 0; - int strings_removed = 0; - int symbols_processed = 0; - int symbols_removed = 0; - if (_process_strings) { - StringTable::possibly_parallel_unlink(_is_alive, &strings_processed, &strings_removed); - Atomic::add(strings_processed, &_strings_processed); - Atomic::add(strings_removed, &_strings_removed); - } - if (_process_symbols) { - SymbolTable::possibly_parallel_unlink(&symbols_processed, &symbols_removed); - Atomic::add(symbols_processed, &_symbols_processed); - Atomic::add(symbols_removed, &_symbols_removed); - } - } else { - if (_process_strings) { - StringTable::unlink(_is_alive, &_strings_processed, &_strings_removed); - } - if (_process_symbols) { - SymbolTable::unlink(&_symbols_processed, &_symbols_removed); - } - } - } - - size_t strings_processed() const { return (size_t)_strings_processed; } - size_t strings_removed() const { return (size_t)_strings_removed; } - - size_t symbols_processed() const { return (size_t)_symbols_processed; } - size_t symbols_removed() const { return (size_t)_symbols_removed; } -}; - -class G1CodeCacheUnloadingTask VALUE_OBJ_CLASS_SPEC { -private: - static Monitor* _lock; - - BoolObjectClosure* const _is_alive; - const bool _unloading_occurred; - const uint _num_workers; - - // Variables used to claim nmethods. - nmethod* _first_nmethod; - volatile nmethod* _claimed_nmethod; - - // The list of nmethods that need to be processed by the second pass. - volatile nmethod* _postponed_list; - volatile uint _num_entered_barrier; - - public: - G1CodeCacheUnloadingTask(uint num_workers, BoolObjectClosure* is_alive, bool unloading_occurred) : - _is_alive(is_alive), - _unloading_occurred(unloading_occurred), - _num_workers(num_workers), - _first_nmethod(NULL), - _claimed_nmethod(NULL), - _postponed_list(NULL), - _num_entered_barrier(0) - { - nmethod::increase_unloading_clock(); - _first_nmethod = CodeCache::alive_nmethod(CodeCache::first()); - _claimed_nmethod = (volatile nmethod*)_first_nmethod; - } - - ~G1CodeCacheUnloadingTask() { - CodeCache::verify_clean_inline_caches(); - - CodeCache::set_needs_cache_clean(false); - guarantee(CodeCache::scavenge_root_nmethods() == NULL, "Must be"); - - CodeCache::verify_icholder_relocations(); - } - - private: - void add_to_postponed_list(nmethod* nm) { - nmethod* old; - do { - old = (nmethod*)_postponed_list; - nm->set_unloading_next(old); - } while ((nmethod*)Atomic::cmpxchg_ptr(nm, &_postponed_list, old) != old); - } - - void clean_nmethod(nmethod* nm) { - bool postponed = nm->do_unloading_parallel(_is_alive, _unloading_occurred); - - if (postponed) { - // This nmethod referred to an nmethod that has not been cleaned/unloaded yet. - add_to_postponed_list(nm); - } - - // Mark that this thread has been cleaned/unloaded. - // After this call, it will be safe to ask if this nmethod was unloaded or not. - nm->set_unloading_clock(nmethod::global_unloading_clock()); - } - - void clean_nmethod_postponed(nmethod* nm) { - nm->do_unloading_parallel_postponed(_is_alive, _unloading_occurred); - } - - static const int MaxClaimNmethods = 16; - - void claim_nmethods(nmethod** claimed_nmethods, int *num_claimed_nmethods) { - nmethod* first; - nmethod* last; - - do { - *num_claimed_nmethods = 0; - - first = last = (nmethod*)_claimed_nmethod; - - if (first != NULL) { - for (int i = 0; i < MaxClaimNmethods; i++) { - last = CodeCache::alive_nmethod(CodeCache::next(last)); - - if (last == NULL) { - break; - } - - claimed_nmethods[i] = last; - (*num_claimed_nmethods)++; - } - } - - } while ((nmethod*)Atomic::cmpxchg_ptr(last, &_claimed_nmethod, first) != first); - } - - nmethod* claim_postponed_nmethod() { - nmethod* claim; - nmethod* next; - - do { - claim = (nmethod*)_postponed_list; - if (claim == NULL) { - return NULL; - } - - next = claim->unloading_next(); - - } while ((nmethod*)Atomic::cmpxchg_ptr(next, &_postponed_list, claim) != claim); - - return claim; - } - - public: - // Mark that we're done with the first pass of nmethod cleaning. - void barrier_mark(uint worker_id) { - MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); - _num_entered_barrier++; - if (_num_entered_barrier == _num_workers) { - ml.notify_all(); - } - } - - // See if we have to wait for the other workers to - // finish their first-pass nmethod cleaning work. - void barrier_wait(uint worker_id) { - if (_num_entered_barrier < _num_workers) { - MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); - while (_num_entered_barrier < _num_workers) { - ml.wait(Mutex::_no_safepoint_check_flag, 0, false); - } - } - } - - // Cleaning and unloading of nmethods. Some work has to be postponed - // to the second pass, when we know which nmethods survive. - void work_first_pass(uint worker_id) { - // The first nmethods is claimed by the first worker. - if (worker_id == 0 && _first_nmethod != NULL) { - clean_nmethod(_first_nmethod); - _first_nmethod = NULL; - } - - int num_claimed_nmethods; - nmethod* claimed_nmethods[MaxClaimNmethods]; - - while (true) { - claim_nmethods(claimed_nmethods, &num_claimed_nmethods); - - if (num_claimed_nmethods == 0) { - break; - } - - for (int i = 0; i < num_claimed_nmethods; i++) { - clean_nmethod(claimed_nmethods[i]); - } - } - - // The nmethod cleaning helps out and does the CodeCache part of MetadataOnStackMark. - // Need to retire the buffers now that this thread has stopped cleaning nmethods. - MetadataOnStackMark::retire_buffer_for_thread(Thread::current()); - } - - void work_second_pass(uint worker_id) { - nmethod* nm; - // Take care of postponed nmethods. - while ((nm = claim_postponed_nmethod()) != NULL) { - clean_nmethod_postponed(nm); - } - } -}; - -Monitor* G1CodeCacheUnloadingTask::_lock = new Monitor(Mutex::leaf, "Code Cache Unload lock"); - -class G1KlassCleaningTask : public StackObj { - BoolObjectClosure* _is_alive; - volatile jint _clean_klass_tree_claimed; - ClassLoaderDataGraphKlassIteratorAtomic _klass_iterator; - - public: - G1KlassCleaningTask(BoolObjectClosure* is_alive) : - _is_alive(is_alive), - _clean_klass_tree_claimed(0), - _klass_iterator() { - } - - private: - bool claim_clean_klass_tree_task() { - if (_clean_klass_tree_claimed) { - return false; - } - - return Atomic::cmpxchg(1, (jint*)&_clean_klass_tree_claimed, 0) == 0; - } - - InstanceKlass* claim_next_klass() { - Klass* klass; - do { - klass =_klass_iterator.next_klass(); - } while (klass != NULL && !klass->oop_is_instance()); - - return (InstanceKlass*)klass; - } - -public: - - void clean_klass(InstanceKlass* ik) { - ik->clean_weak_instanceklass_links(_is_alive); - - if (JvmtiExport::has_redefined_a_class()) { - InstanceKlass::purge_previous_versions(ik); - } - } - - void work() { - ResourceMark rm; - - // One worker will clean the subklass/sibling klass tree. - if (claim_clean_klass_tree_task()) { - Klass::clean_subklass_tree(_is_alive); - } - - // All workers will help cleaning the classes, - InstanceKlass* klass; - while ((klass = claim_next_klass()) != NULL) { - clean_klass(klass); - } - } -}; - -// To minimize the remark pause times, the tasks below are done in parallel. -class G1ParallelCleaningTask : public AbstractGangTask { -private: - G1StringSymbolTableUnlinkTask _string_symbol_task; - G1CodeCacheUnloadingTask _code_cache_task; - G1KlassCleaningTask _klass_cleaning_task; - -public: - // The constructor is run in the VMThread. - G1ParallelCleaningTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, uint num_workers, bool unloading_occurred) : - AbstractGangTask("Parallel Cleaning"), - _string_symbol_task(is_alive, process_strings, process_symbols), - _code_cache_task(num_workers, is_alive, unloading_occurred), - _klass_cleaning_task(is_alive) { - } - - void pre_work_verification() { - // The VM Thread will have registered Metadata during the single-threaded phase of MetadataStackOnMark. - assert(Thread::current()->is_VM_thread() - || !MetadataOnStackMark::has_buffer_for_thread(Thread::current()), "Should be empty"); - } - - void post_work_verification() { - assert(!MetadataOnStackMark::has_buffer_for_thread(Thread::current()), "Should be empty"); - } - - // The parallel work done by all worker threads. - void work(uint worker_id) { - pre_work_verification(); - - // Do first pass of code cache cleaning. - _code_cache_task.work_first_pass(worker_id); - - // Let the threads mark that the first pass is done. - _code_cache_task.barrier_mark(worker_id); - - // Clean the Strings and Symbols. - _string_symbol_task.work(worker_id); - - // Wait for all workers to finish the first code cache cleaning pass. - _code_cache_task.barrier_wait(worker_id); - - // Do the second code cache cleaning work, which realize on - // the liveness information gathered during the first pass. - _code_cache_task.work_second_pass(worker_id); - - // Clean all klasses that were not unloaded. - _klass_cleaning_task.work(); - - post_work_verification(); - } -}; - - void G1CollectedHeap::parallel_cleaning(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, @@ -5173,8 +4810,8 @@ uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? workers()->active_workers() : 1); - G1ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols, - n_workers, class_unloading_occurred); + ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols, + n_workers, class_unloading_occurred); if (G1CollectedHeap::use_parallel_gc_threads()) { set_par_threads(n_workers); workers()->run_task(&g1_unlink_task); @@ -5189,7 +4826,7 @@ { uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? _g1h->workers()->active_workers() : 1); - G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols); + StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols); if (G1CollectedHeap::use_parallel_gc_threads()) { set_par_threads(n_workers); workers()->run_task(&g1_unlink_task); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shared/parallelCleaning.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/parallelCleaning.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "gc_implementation/shared/parallelCleaning.hpp" + +Monitor* CodeCacheUnloadingTask::_lock = new Monitor(Mutex::leaf, "Code Cache Unload lock"); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shared/parallelCleaning.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/parallelCleaning.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_PARALLELCLEANING_HPP +#define SHARE_VM_GC_IMPLEMENTATION_SHARED_PARALLELCLEANING_HPP + +#include "classfile/metadataOnStackMark.hpp" +#include "classfile/symbolTable.hpp" +#include "code/codeCache.hpp" +#include "gc_interface/collectedHeap.hpp" +#include "memory/resourceArea.hpp" +#include "utilities/workgroup.hpp" + +class StringSymbolTableUnlinkTask : public AbstractGangTask { +private: + BoolObjectClosure* _is_alive; + int _initial_string_table_size; + int _initial_symbol_table_size; + + bool _process_strings; + int _strings_processed; + int _strings_removed; + + bool _process_symbols; + int _symbols_processed; + int _symbols_removed; + + bool _do_in_parallel; +public: + StringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) : + AbstractGangTask("String/Symbol Unlinking"), + _is_alive(is_alive), + _do_in_parallel(Universe::heap()->use_parallel_gc_threads()), + _process_strings(process_strings), _strings_processed(0), _strings_removed(0), + _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) { + + _initial_string_table_size = StringTable::the_table()->table_size(); + _initial_symbol_table_size = SymbolTable::the_table()->table_size(); + if (process_strings) { + StringTable::clear_parallel_claimed_index(); + } + if (process_symbols) { + SymbolTable::clear_parallel_claimed_index(); + } + } + + ~StringSymbolTableUnlinkTask() { + guarantee(!_process_strings || !_do_in_parallel || StringTable::parallel_claimed_index() >= _initial_string_table_size, + err_msg("claim value "INT32_FORMAT" after unlink less than initial string table size "INT32_FORMAT, + StringTable::parallel_claimed_index(), _initial_string_table_size)); + guarantee(!_process_symbols || !_do_in_parallel || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size, + err_msg("claim value "INT32_FORMAT" after unlink less than initial symbol table size "INT32_FORMAT, + SymbolTable::parallel_claimed_index(), _initial_symbol_table_size)); + + if (G1TraceStringSymbolTableScrubbing) { + gclog_or_tty->print_cr("Cleaned string and symbol table, " + "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, " + "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed", + strings_processed(), strings_removed(), + symbols_processed(), symbols_removed()); + } + } + + void work(uint worker_id) { + if (_do_in_parallel) { + int strings_processed = 0; + int strings_removed = 0; + int symbols_processed = 0; + int symbols_removed = 0; + if (_process_strings) { + StringTable::possibly_parallel_unlink(_is_alive, &strings_processed, &strings_removed); + Atomic::add(strings_processed, &_strings_processed); + Atomic::add(strings_removed, &_strings_removed); + } + if (_process_symbols) { + SymbolTable::possibly_parallel_unlink(&symbols_processed, &symbols_removed); + Atomic::add(symbols_processed, &_symbols_processed); + Atomic::add(symbols_removed, &_symbols_removed); + } + } else { + if (_process_strings) { + StringTable::unlink(_is_alive, &_strings_processed, &_strings_removed); + } + if (_process_symbols) { + SymbolTable::unlink(&_symbols_processed, &_symbols_removed); + } + } + } + + size_t strings_processed() const { return (size_t)_strings_processed; } + size_t strings_removed() const { return (size_t)_strings_removed; } + + size_t symbols_processed() const { return (size_t)_symbols_processed; } + size_t symbols_removed() const { return (size_t)_symbols_removed; } +}; + +class CodeCacheUnloadingTask VALUE_OBJ_CLASS_SPEC { +private: + static Monitor* _lock; + + BoolObjectClosure* const _is_alive; + const bool _unloading_occurred; + const uint _num_workers; + + // Variables used to claim nmethods. + nmethod* _first_nmethod; + volatile nmethod* _claimed_nmethod; + + // The list of nmethods that need to be processed by the second pass. + volatile nmethod* _postponed_list; + volatile uint _num_entered_barrier; + + public: + CodeCacheUnloadingTask(uint num_workers, BoolObjectClosure* is_alive, bool unloading_occurred) : + _is_alive(is_alive), + _unloading_occurred(unloading_occurred), + _num_workers(num_workers), + _first_nmethod(NULL), + _claimed_nmethod(NULL), + _postponed_list(NULL), + _num_entered_barrier(0) + { + nmethod::increase_unloading_clock(); + _first_nmethod = CodeCache::alive_nmethod(CodeCache::first()); + _claimed_nmethod = (volatile nmethod*)_first_nmethod; + } + + ~CodeCacheUnloadingTask() { + CodeCache::verify_clean_inline_caches(); + + CodeCache::set_needs_cache_clean(false); + guarantee(CodeCache::scavenge_root_nmethods() == NULL, "Must be"); + + CodeCache::verify_icholder_relocations(); + } + + private: + void add_to_postponed_list(nmethod* nm) { + nmethod* old; + do { + old = (nmethod*)_postponed_list; + nm->set_unloading_next(old); + } while ((nmethod*)Atomic::cmpxchg_ptr(nm, &_postponed_list, old) != old); + } + + void clean_nmethod(nmethod* nm) { + bool postponed = nm->do_unloading_parallel(_is_alive, _unloading_occurred); + + if (postponed) { + // This nmethod referred to an nmethod that has not been cleaned/unloaded yet. + add_to_postponed_list(nm); + } + + // Mark that this thread has been cleaned/unloaded. + // After this call, it will be safe to ask if this nmethod was unloaded or not. + nm->set_unloading_clock(nmethod::global_unloading_clock()); + } + + void clean_nmethod_postponed(nmethod* nm) { + nm->do_unloading_parallel_postponed(_is_alive, _unloading_occurred); + } + + static const int MaxClaimNmethods = 16; + + void claim_nmethods(nmethod** claimed_nmethods, int *num_claimed_nmethods) { + nmethod* first; + nmethod* last; + + do { + *num_claimed_nmethods = 0; + + first = last = (nmethod*)_claimed_nmethod; + + if (first != NULL) { + for (int i = 0; i < MaxClaimNmethods; i++) { + last = CodeCache::alive_nmethod(CodeCache::next(last)); + + if (last == NULL) { + break; + } + + claimed_nmethods[i] = last; + (*num_claimed_nmethods)++; + } + } + + } while ((nmethod*)Atomic::cmpxchg_ptr(last, &_claimed_nmethod, first) != first); + } + + nmethod* claim_postponed_nmethod() { + nmethod* claim; + nmethod* next; + + do { + claim = (nmethod*)_postponed_list; + if (claim == NULL) { + return NULL; + } + + next = claim->unloading_next(); + + } while ((nmethod*)Atomic::cmpxchg_ptr(next, &_postponed_list, claim) != claim); + + return claim; + } + + public: + // Mark that we're done with the first pass of nmethod cleaning. + void barrier_mark(uint worker_id) { + MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); + _num_entered_barrier++; + if (_num_entered_barrier == _num_workers) { + ml.notify_all(); + } + } + + // See if we have to wait for the other workers to + // finish their first-pass nmethod cleaning work. + void barrier_wait(uint worker_id) { + if (_num_entered_barrier < _num_workers) { + MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); + while (_num_entered_barrier < _num_workers) { + ml.wait(Mutex::_no_safepoint_check_flag, 0, false); + } + } + } + + // Cleaning and unloading of nmethods. Some work has to be postponed + // to the second pass, when we know which nmethods survive. + void work_first_pass(uint worker_id) { + // The first nmethods is claimed by the first worker. + if (worker_id == 0 && _first_nmethod != NULL) { + clean_nmethod(_first_nmethod); + _first_nmethod = NULL; + } + + int num_claimed_nmethods; + nmethod* claimed_nmethods[MaxClaimNmethods]; + + while (true) { + claim_nmethods(claimed_nmethods, &num_claimed_nmethods); + + if (num_claimed_nmethods == 0) { + break; + } + + for (int i = 0; i < num_claimed_nmethods; i++) { + clean_nmethod(claimed_nmethods[i]); + } + } + + // The nmethod cleaning helps out and does the CodeCache part of MetadataOnStackMark. + // Need to retire the buffers now that this thread has stopped cleaning nmethods. + MetadataOnStackMark::retire_buffer_for_thread(Thread::current()); + } + + void work_second_pass(uint worker_id) { + nmethod* nm; + // Take care of postponed nmethods. + while ((nm = claim_postponed_nmethod()) != NULL) { + clean_nmethod_postponed(nm); + } + } +}; + +class KlassCleaningTask : public StackObj { + BoolObjectClosure* _is_alive; + volatile jint _clean_klass_tree_claimed; + ClassLoaderDataGraphKlassIteratorAtomic _klass_iterator; + + public: + KlassCleaningTask(BoolObjectClosure* is_alive) : + _is_alive(is_alive), + _clean_klass_tree_claimed(0), + _klass_iterator() { + } + + private: + bool claim_clean_klass_tree_task() { + if (_clean_klass_tree_claimed) { + return false; + } + + return Atomic::cmpxchg(1, (jint*)&_clean_klass_tree_claimed, 0) == 0; + } + + InstanceKlass* claim_next_klass() { + Klass* klass; + do { + klass =_klass_iterator.next_klass(); + } while (klass != NULL && !klass->oop_is_instance()); + + return (InstanceKlass*)klass; + } + +public: + + void clean_klass(InstanceKlass* ik) { + ik->clean_weak_instanceklass_links(_is_alive); + + if (JvmtiExport::has_redefined_a_class()) { + InstanceKlass::purge_previous_versions(ik); + } + } + + void work() { + ResourceMark rm; + + // One worker will clean the subklass/sibling klass tree. + if (claim_clean_klass_tree_task()) { + Klass::clean_subklass_tree(_is_alive); + } + + // All workers will help cleaning the classes, + InstanceKlass* klass; + while ((klass = claim_next_klass()) != NULL) { + clean_klass(klass); + } + } +}; + +// To minimize the remark pause times, the tasks below are done in parallel. +class ParallelCleaningTask : public AbstractGangTask { +private: + StringSymbolTableUnlinkTask _string_symbol_task; + CodeCacheUnloadingTask _code_cache_task; + KlassCleaningTask _klass_cleaning_task; + +public: + // The constructor is run in the VMThread. + ParallelCleaningTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, uint num_workers, bool unloading_occurred) : + AbstractGangTask("Parallel Cleaning"), + _string_symbol_task(is_alive, process_strings, process_symbols), + _code_cache_task(num_workers, is_alive, unloading_occurred), + _klass_cleaning_task(is_alive) { + } + + void pre_work_verification() { + // The VM Thread will have registered Metadata during the single-threaded phase of MetadataStackOnMark. + assert(Thread::current()->is_VM_thread() + || !MetadataOnStackMark::has_buffer_for_thread(Thread::current()), "Should be empty"); + } + + void post_work_verification() { + assert(!MetadataOnStackMark::has_buffer_for_thread(Thread::current()), "Should be empty"); + } + + // The parallel work done by all worker threads. + void work(uint worker_id) { + pre_work_verification(); + + // Do first pass of code cache cleaning. + _code_cache_task.work_first_pass(worker_id); + + // Let the threads mark that the first pass is done. + _code_cache_task.barrier_mark(worker_id); + + // Clean the Strings and Symbols. + _string_symbol_task.work(worker_id); + + // Wait for all workers to finish the first code cache cleaning pass. + _code_cache_task.barrier_wait(worker_id); + + // Do the second code cache cleaning work, which realize on + // the liveness information gathered during the first pass. + _code_cache_task.work_second_pass(worker_id); + + // Clean all klasses that were not unloaded. + _klass_cleaning_task.work(); + + post_work_verification(); + } +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_PARALLELCLEANING_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/brooksPointer.cpp --- a/src/share/vm/gc_implementation/shenandoah/brooksPointer.cpp Fri Nov 04 07:21:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2013, 2015, Red Hat, Inc. and/or its affiliates. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "memory/universe.hpp" -#include "gc_interface/collectedHeap.hpp" -#include "gc_implementation/shenandoah/brooksPointer.hpp" -#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" - -BrooksPointer::BrooksPointer(HeapWord** hw) : _heap_word(hw) {} - -bool BrooksPointer::check_forwardee_is_in_heap(oop forwardee) { - return Universe::heap()->is_in(forwardee); -} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/brooksPointer.hpp --- a/src/share/vm/gc_implementation/shenandoah/brooksPointer.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/brooksPointer.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -26,29 +26,76 @@ #include "oops/oop.hpp" #include "utilities/globalDefinitions.hpp" -#include "gc_implementation/shenandoah/shenandoahHeap.hpp" class BrooksPointer { public: - static const uint BROOKS_POINTER_OBJ_SIZE = 1; - static const int BYTE_OFFSET = -8; + + /* + * Notes: + * + * a. It is important to have byte_offset and word_offset return constant + * expressions, because that will allow to constant-fold forwarding ptr + * accesses. This is not a problem in JIT compilers that would generate + * the code once, but it is problematic in GC hotpath code. + * + * b. With filler object mechanics, we may need to allocate more space for + * the forwarding ptr to meet alignment requirements for objects. This + * means *_offset and *_size calls are NOT interchangeable. The accesses + * to forwarding ptrs should always be via *_offset. Storage size + * calculations should always be via *_size. + */ + + /* Offset from the object start, in HeapWords. */ + static inline int word_offset() { + return -1; // exactly one HeapWord + } + + /* Offset from the object start, in bytes. */ + static inline int byte_offset() { + return -HeapWordSize; // exactly one HeapWord + } + + /* Allocated size, in HeapWords. */ + static inline size_t word_size() { + return MinObjAlignment; + } + + /* Allocated size, in bytes */ + static inline size_t byte_size() { + return MinObjAlignmentInBytes; + } + + /* Initializes Brooks pointer (to self). + */ + static inline void initialize(oop obj); + + /* Gets forwardee from the given object. + */ + static inline oop forwardee(oop obj); + + /* Forcefully sets forwardee in $holder to $update. + */ + static inline void set_forwardee(oop obj, oop update); + + /* Tries to atomically update forwardee in $holder object to $update. + * Assumes $holder points at itself. + * Asserts $holder is in from-space. + * Asserts $update is in to-space. + */ + static inline oop try_update_forwardee(oop obj, oop update); + + /* Sets raw value for forwardee slot. + * THIS IS DANGEROUS: USERS HAVE TO INITIALIZE/SET FORWARDEE BACK AFTER THEY ARE DONE. + */ + static inline void set_raw(oop obj, HeapWord* update); + + /* Returns the raw value from forwardee slot. + */ + static inline HeapWord* get_raw(oop obj); private: - - HeapWord** _heap_word; - - BrooksPointer(HeapWord** heap_word); - -public: - - bool check_forwardee_is_in_heap(oop forwardee); - - inline void set_forwardee(oop forwardee); - inline HeapWord* get_forwardee(); - inline HeapWord* cas_forwardee(HeapWord* old, HeapWord* forwardee); - - static inline BrooksPointer get(oop obj); + static inline HeapWord** brooks_ptr_addr(oop obj); }; #endif // SHARE_VM_GC_SHENANDOAH_BROOKSPOINTER_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/brooksPointer.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/brooksPointer.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/brooksPointer.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -25,64 +25,139 @@ #define SHARE_VM_GC_SHENANDOAH_BROOKSPOINTER_INLINE_HPP #include "gc_implementation/shenandoah/brooksPointer.hpp" -#include "runtime/atomic.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" +#include "gc_implementation/shenandoah/shenandoahLogging.hpp" +#include "runtime/atomic.hpp" -inline BrooksPointer BrooksPointer::get(oop obj) { - HeapWord* hw_obj = (HeapWord*) obj; - HeapWord* brooks_ptr = hw_obj - 1; - // We know that the value in that memory location is a pointer to another - // heapword/oop. - return BrooksPointer((HeapWord**) brooks_ptr); +inline HeapWord** BrooksPointer::brooks_ptr_addr(oop obj) { + return (HeapWord**)((HeapWord*) obj + word_offset()); } -inline void BrooksPointer::set_forwardee(oop forwardee) { - assert(ShenandoahHeap::heap()->is_in(forwardee), "forwardee must be valid oop in the heap"); - *_heap_word = (HeapWord*) forwardee; +inline void BrooksPointer::initialize(oop obj) { #ifdef ASSERT - if (ShenandoahTraceBrooksPointers) { - tty->print_cr("setting_forwardee to "PTR_FORMAT" = "PTR_FORMAT, p2i((HeapWord*) forwardee), p2i(*_heap_word)); - } + log_develop_trace(gc)("Init forwardee for "PTR_FORMAT" to self", p2i(obj)); + + assert(UseShenandoahGC, "must only be called when Shenandoah is used."); + assert(obj != NULL, "oop is not NULL"); + assert(ShenandoahHeap::heap()->is_in(obj), "oop must point to a heap address"); #endif + + *brooks_ptr_addr(obj) = (HeapWord*) obj; } -inline HeapWord* BrooksPointer::get_forwardee() { - return *_heap_word; +inline void BrooksPointer::set_forwardee(oop holder, oop update) { +#ifdef ASSERT + log_develop_trace(gc)("Setting forwardee to "PTR_FORMAT" = "PTR_FORMAT, p2i(holder), p2i(update)); + + assert(UseShenandoahGC, "must only be called when Shenandoah is used."); + assert(holder != NULL, "holder should not be NULL"); + assert(update != NULL, "update should not be NULL"); + assert(ShenandoahHeap::heap()->is_in(holder), "holder must point to a heap address"); + assert(ShenandoahHeap::heap()->is_in(update), "update must point to a heap address"); + + assert (holder->klass() == update->klass(), "klasses should match"); + + assert(!oopDesc::unsafe_equals(holder, update), "forwarding should make progress"); + assert(ShenandoahHeap::heap()->heap_region_containing(holder) != + ShenandoahHeap::heap()->heap_region_containing(update), "forwarding should be across regions"); + assert( ShenandoahHeap::heap()->in_collection_set(holder), "holder should be in collection set"); + assert(!ShenandoahHeap::heap()->in_collection_set(update), "update should not be in collection set"); +#endif + + *brooks_ptr_addr(holder) = (HeapWord*) update; } -inline HeapWord* BrooksPointer::cas_forwardee(HeapWord* old, HeapWord* forwardee) { +inline void BrooksPointer::set_raw(oop holder, HeapWord* update) { + log_develop_trace(gc)("Setting RAW forwardee for "PTR_FORMAT" = "PTR_FORMAT, p2i(holder), p2i(update)); + assert(UseShenandoahGC, "must only be called when Shenandoah is used."); + *brooks_ptr_addr(holder) = update; +} + +inline HeapWord* BrooksPointer::get_raw(oop holder) { + assert(UseShenandoahGC, "must only be called when Shenandoah is used."); + HeapWord* res = *brooks_ptr_addr(holder); + log_develop_trace(gc)("Getting RAW forwardee for "PTR_FORMAT" = "PTR_FORMAT, p2i(holder), p2i(res)); + return res; +} + +inline oop BrooksPointer::forwardee(oop obj) { +#ifdef ASSERT + assert(UseShenandoahGC, "must only be called when Shenandoah is used."); + assert(Universe::heap()->is_in(obj), err_msg("We shouldn't be calling this on objects not in the heap: "PTR_FORMAT, p2i(obj))); + + oop forwardee; + if (ShenandoahVerifyReadsToFromSpace) { + ShenandoahHeap* heap = (ShenandoahHeap *) Universe::heap(); + ShenandoahHeapRegion* region = heap->heap_region_containing(obj); + { + region->memProtectionOff(); + forwardee = oop(*brooks_ptr_addr(obj)); + region->memProtectionOn(); + } + } else { + forwardee = oop(*brooks_ptr_addr(obj)); + } + + assert(forwardee != NULL, "forwardee is not NULL"); assert(ShenandoahHeap::heap()->is_in(forwardee), "forwardee must point to a heap address"); + assert((oopDesc::unsafe_equals(forwardee, obj) || + (ShenandoahHeap::heap()->heap_region_containing(forwardee) != + ShenandoahHeap::heap()->heap_region_containing(obj))), "forwardee should be self, or another region"); - HeapWord* o = old; - HeapWord* n = forwardee; - HeapWord* result; - -#ifdef ASSERT - if (ShenandoahTraceBrooksPointers) { - tty->print_cr("Attempting to CAS "PTR_FORMAT" value "PTR_FORMAT" from "PTR_FORMAT" to "PTR_FORMAT, p2i(_heap_word), p2i(*_heap_word), p2i(o), p2i(n)); + if (!oopDesc::unsafe_equals(obj, forwardee)) { + oop second_forwardee = oop(*brooks_ptr_addr(forwardee)); + if (!oopDesc::unsafe_equals(forwardee, second_forwardee)) { + // We should never be forwarded more than once. + fatal(err_msg("Multiple forwardings: "PTR_FORMAT" -> "PTR_FORMAT" -> "PTR_FORMAT, p2i(obj), p2i(forwardee), p2i(second_forwardee))); + } } +#else + oop forwardee = oop(*brooks_ptr_addr(obj)); #endif -#ifdef ASSERT + log_develop_trace(gc)("Forwardee for "PTR_FORMAT" = "PTR_FORMAT, p2i(obj), p2i(forwardee)); + + return forwardee; +} + +inline oop BrooksPointer::try_update_forwardee(oop holder, oop update) { + #ifdef ASSERT + assert(holder != NULL, "holder should not be NULL"); + assert(update != NULL, "update should not be NULL"); + assert(!oopDesc::unsafe_equals(holder, update), "forwarding should make progress"); + assert(ShenandoahHeap::heap()->is_in(holder), "holder must point to a heap address"); + assert(ShenandoahHeap::heap()->is_in(update), "update must point to a heap address"); + assert(ShenandoahHeap::heap()->heap_region_containing(holder) != + ShenandoahHeap::heap()->heap_region_containing(update), "forwarding should be across regions"); + assert( ShenandoahHeap::heap()->in_collection_set(holder), "holder should be in collection set"); + assert(!ShenandoahHeap::heap()->in_collection_set(update), "update should not be in collection set"); + assert (holder->klass() == update->klass(), "klasses should match"); + + oop result; if (ShenandoahVerifyWritesToFromSpace || ShenandoahVerifyReadsToFromSpace) { ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); - ShenandoahHeapRegion* hr = sh->heap_region_containing(old); + ShenandoahHeapRegion* hr = sh->heap_region_containing(holder); { hr->memProtectionOff(); - result = (HeapWord*) (HeapWord*) Atomic::cmpxchg_ptr(n, _heap_word, o); + result = (oop) Atomic::cmpxchg_ptr(update, brooks_ptr_addr(holder), holder); hr->memProtectionOn(); } } else { - result = (HeapWord*) (HeapWord*) Atomic::cmpxchg_ptr(n, _heap_word, o); + result = (oop) Atomic::cmpxchg_ptr(update, brooks_ptr_addr(holder), holder); + } + + assert(result != NULL, "CAS result is not NULL"); + assert(ShenandoahHeap::heap()->is_in(result), "CAS result must point to a heap address"); + + if (oopDesc::unsafe_equals(result, holder)) { + log_develop_trace(gc)("Updated forwardee for "PTR_FORMAT" to "PTR_FORMAT, p2i(holder), p2i(update)); + } else { + log_develop_trace(gc)("Failed to set forwardee for "PTR_FORMAT" to "PTR_FORMAT", was already "PTR_FORMAT, p2i(holder), p2i(update), p2i(result)); } #else - result = (HeapWord*) (HeapWord*) Atomic::cmpxchg_ptr(n, _heap_word, o); -#endif - -#ifdef ASSERT - if (ShenandoahTraceBrooksPointers) { - tty->print_cr("Result of CAS from "PTR_FORMAT" to "PTR_FORMAT" was "PTR_FORMAT" read value was "PTR_FORMAT, p2i(o), p2i(n), p2i(result), p2i(*_heap_word)); - } + oop result = (oop) Atomic::cmpxchg_ptr(update, brooks_ptr_addr(holder), holder); #endif return result; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -23,26 +23,29 @@ #include "precompiled.hpp" #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" -#include "gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp" -#include "memory/universe.hpp" -#include "utilities/array.hpp" +#include "runtime/interfaceSupport.hpp" class UpdateRefsForOopClosure: public ExtendedOopClosure { private: ShenandoahHeap* _heap; + template + inline void do_oop_work(T* p) { + _heap->maybe_update_oop_ref(p); + } public: UpdateRefsForOopClosure() { _heap = ShenandoahHeap::heap(); } void do_oop(oop* p) { - _heap->maybe_update_oop_ref(p); + do_oop_work(p); } void do_oop(narrowOop* p) { - Unimplemented(); + do_oop_work(p); } }; @@ -161,11 +164,22 @@ return _heap->concurrent_mark_in_progress() && _heap->need_update_refs(); } -void ShenandoahBarrierSet::write_ref_array_work(MemRegion mr) { +void ShenandoahBarrierSet::write_ref_array_work(MemRegion r) { + ShouldNotReachHere(); +} + +void ShenandoahBarrierSet::write_ref_array(HeapWord* start, size_t count) { if (! need_update_refs_barrier()) return; - for (HeapWord* word = mr.start(); word < mr.end(); word++) { - oop* oop_ptr = (oop*) word; - _heap->maybe_update_oop_ref(oop_ptr); + if (UseCompressedOops) { + narrowOop* dst = (narrowOop*) start; + for (size_t i = 0; i < count; i++, dst++) { + _heap->maybe_update_oop_ref(dst); + } + } else { + oop* dst = (oop*) start; + for (size_t i = 0; i < count; i++, dst++) { + _heap->maybe_update_oop_ref(dst); + } } } @@ -174,7 +188,7 @@ #ifdef ASSERT if (_heap->is_in(dst) && - _heap->heap_region_containing((HeapWord*) dst)->is_in_collection_set() && + _heap->in_collection_set(dst) && ! _heap->cancelled_concgc()) { tty->print_cr("dst = "PTR_FORMAT, p2i(dst)); _heap->heap_region_containing((HeapWord*) dst)->print(); @@ -183,14 +197,12 @@ #endif if (! JavaThread::satb_mark_queue_set().is_active()) return; - // tty->print_cr("write_ref_array_pre_work: "PTR_FORMAT", "INT32_FORMAT, dst, count); T* elem_ptr = dst; for (int i = 0; i < count; i++, elem_ptr++) { T heap_oop = oopDesc::load_heap_oop(elem_ptr); if (!oopDesc::is_null(heap_oop)) { G1SATBCardTableModRefBS::enqueue(oopDesc::decode_heap_oop_not_null(heap_oop)); } - // tty->print_cr("write_ref_array_pre_work: oop: "PTR_FORMAT, heap_oop); } } @@ -213,17 +225,20 @@ #ifdef ASSERT ShenandoahHeap* heap = ShenandoahHeap::heap(); if (heap->is_in(field) && - heap->heap_region_containing((HeapWord*)field)->is_in_collection_set() && + heap->in_collection_set(field) && ! heap->cancelled_concgc()) { tty->print_cr("field = "PTR_FORMAT, p2i(field)); + tty->print_cr("in_cset: %s", BOOL_TO_STR(heap->in_collection_set(field))); heap->heap_region_containing((HeapWord*)field)->print(); + tty->print_cr("marking: %s, evacuating: %s", + BOOL_TO_STR(heap->concurrent_mark_in_progress()), + BOOL_TO_STR(heap->is_evacuation_in_progress())); assert(false, "We should have fixed this earlier"); } #endif if (!oopDesc::is_null(heap_oop)) { G1SATBCardTableModRefBS::enqueue(oopDesc::decode_heap_oop(heap_oop)); - // tty->print_cr("write_ref_field_pre_static: v = "PTR_FORMAT" o = "PTR_FORMAT" old: "PTR_FORMAT, field, newVal, heap_oop); } } @@ -246,10 +261,18 @@ } void ShenandoahBarrierSet::write_ref_field_work(void* v, oop o, bool release) { +#ifdef ASSERT + ShenandoahHeap* heap = ShenandoahHeap::heap(); + if (!(heap->cancelled_concgc() || !heap->in_collection_set(v))) { + tty->print_cr("field not in collection set: "PTR_FORMAT, p2i(v)); + tty->print_cr("containing heap region:"); + ShenandoahHeap::heap()->heap_region_containing(v)->print(); + } + assert(heap->cancelled_concgc() || !heap->in_collection_set(v), "only write to to-space"); if (! need_update_refs_barrier()) return; - assert (! UseCompressedOops, "compressed oops not supported yet"); - _heap->maybe_update_oop_ref((oop*) v); - // tty->print_cr("write_ref_field_work: v = "PTR_FORMAT" o = "PTR_FORMAT, v, o); + assert(o == NULL || oopDesc::unsafe_equals(o, resolve_oop_static(o)), "only write to-space values"); + assert(o == NULL || !heap->in_collection_set(o), "only write to-space values"); +#endif } void ShenandoahBarrierSet::write_region_work(MemRegion mr) { @@ -261,7 +284,6 @@ // it would be NULL in any case. But we *are* interested in any oop* // that potentially need to be updated. - // tty->print_cr("write_region_work: "PTR_FORMAT", "PTR_FORMAT, mr.start(), mr.end()); oop obj = oop(mr.start()); assert(obj->is_oop(), "must be an oop"); UpdateRefsForOopClosure cl; @@ -284,14 +306,13 @@ } bool ShenandoahBarrierSet::obj_equals(narrowOop obj1, narrowOop obj2) { - Unimplemented(); - return false; + return obj_equals(oopDesc::decode_heap_oop(obj1), oopDesc::decode_heap_oop(obj2)); } #ifdef ASSERT bool ShenandoahBarrierSet::is_safe(oop o) { if (o == NULL) return true; - if (_heap->heap_region_containing(o)->is_in_collection_set()) { + if (_heap->in_collection_set(o)) { return false; } if (! oopDesc::unsafe_equals(o, read_barrier(o))) { @@ -301,15 +322,14 @@ } bool ShenandoahBarrierSet::is_safe(narrowOop o) { - Unimplemented(); - return true; + return is_safe(oopDesc::decode_heap_oop(o)); } #endif oop ShenandoahBarrierSet::resolve_and_maybe_copy_oop_work(oop src) { assert(src != NULL, "only evacuated non NULL oops"); - if (_heap->in_cset_fast_test((HeapWord*) src)) { + if (_heap->in_collection_set(src)) { return resolve_and_maybe_copy_oop_work2(src); } else { return src; @@ -318,44 +338,32 @@ oop ShenandoahBarrierSet::resolve_and_maybe_copy_oop_work2(oop src) { assert(src != NULL, "only evacuated non NULL oops"); - assert(_heap->heap_region_containing(src)->is_in_collection_set(), "only evacuate objects in collection set"); + assert(_heap->in_collection_set(src), "only evacuate objects in collection set"); assert(! _heap->heap_region_containing(src)->is_humongous(), "never evacuate humongous objects"); // TODO: Consider passing thread from caller. oop dst = _heap->evacuate_object(src, Thread::current()); -#ifdef ASSERT - if (ShenandoahTraceEvacuations) { - tty->print_cr("src = "PTR_FORMAT" dst = "PTR_FORMAT" src = "PTR_FORMAT" src-2 = "PTR_FORMAT, - p2i((HeapWord*) src), p2i((HeapWord*) dst), p2i((HeapWord*) src), p2i(((HeapWord*) src) - 2)); - } -#endif + + log_develop_trace(gc, compaction)("src = "PTR_FORMAT" dst = "PTR_FORMAT" src-2 = "PTR_FORMAT, + p2i(src), p2i(dst), p2i(((HeapWord*) src) - 2)); + assert(_heap->is_in(dst), "result should be in the heap"); return dst; } oop ShenandoahBarrierSet::resolve_and_maybe_copy_oopHelper(oop src) { assert(src != NULL, "checked before"); - if (! _heap->is_evacuation_in_progress()) { - OrderAccess::loadload(); - return resolve_oop_static(src); + bool evac = _heap->is_evacuation_in_progress(); + OrderAccess::loadload(); + src = resolve_oop_static_not_null(src); + if (! evac) { + return src; + } else { + return resolve_and_maybe_copy_oop_work(src); } - return resolve_and_maybe_copy_oop_work(src); } JRT_LEAF(oopDesc*, ShenandoahBarrierSet::write_barrier_c2(oopDesc* src)) - oop result = ((ShenandoahBarrierSet*) oopDesc::bs())->resolve_and_maybe_copy_oop_work2(oop(src)); - // tty->print_cr("called C2 write barrier with: %p result: %p copy: %d", (oopDesc*) src, (oopDesc*) result, src != result); - return (oopDesc*) result; -JRT_END - -IRT_LEAF(oopDesc*, ShenandoahBarrierSet::write_barrier_interp(oopDesc* src)) - oop result = ((ShenandoahBarrierSet*)oopDesc::bs())->resolve_and_maybe_copy_oop_work2(oop(src)); - // tty->print_cr("called interpreter write barrier with: %p result: %p", src, result); - return (oopDesc*) result; -IRT_END - -JRT_LEAF(oopDesc*, ShenandoahBarrierSet::write_barrier_c1(JavaThread* thread, oopDesc* src)) - oop result = ((ShenandoahBarrierSet*)oopDesc::bs())->resolve_and_maybe_copy_oop_work2(oop(src)); - // tty->print_cr("called static write barrier (2) with: "PTR_FORMAT" result: "PTR_FORMAT, p2i(src), p2i((oopDesc*)(result))); + oop result = ((ShenandoahBarrierSet*) oopDesc::bs())->resolve_and_maybe_copy_oop_work2(src); return (oopDesc*) result; JRT_END @@ -364,9 +372,30 @@ assert(_heap->is_in(src), "sanity"); assert(src != NULL, "checked before"); oop result = resolve_and_maybe_copy_oopHelper(src); - assert(_heap->is_in(result) && result->is_oop(), "resolved oop must be NULL, or a valid oop in the heap"); + assert(_heap->is_in(result) /*&& result->is_oop()*/, "resolved oop must be NULL, or a valid oop in the heap"); return result; } else { return NULL; } } + +#ifdef ASSERT +void ShenandoahBarrierSet::verify_safe_oop(oop p) { + ShenandoahHeap* heap = ShenandoahHeap::heap(); + if (p == NULL) return; + if (heap->in_collection_set(p) && + ! heap->cancelled_concgc()) { + tty->print_cr("oop = "PTR_FORMAT", resolved: "PTR_FORMAT", marked-next %s, marked-complete: %s", + p2i(p), + p2i(read_barrier(p)), + BOOL_TO_STR(heap->is_marked_next(p)), + BOOL_TO_STR(heap->is_marked_complete(p))); + tty->print_cr("in_cset: %s", BOOL_TO_STR(heap->in_collection_set(p))); + heap->heap_region_containing((HeapWord*) p)->print(); + tty->print_cr("top-at-mark-start: %p", heap->next_top_at_mark_start((HeapWord*) p)); + tty->print_cr("top-at-prev-mark-start: %p", heap->complete_top_at_mark_start((HeapWord*) p)); + tty->print_cr("marking: %s, evacuating: %s", BOOL_TO_STR(heap->concurrent_mark_in_progress()), BOOL_TO_STR(heap->is_evacuation_in_progress())); + assert(false, "We should have fixed this earlier"); + } +} +#endif diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -24,14 +24,14 @@ #ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHBARRIERSET_HPP #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHBARRIERSET_HPP -#include "gc_implementation/shenandoah/shenandoahHeap.hpp" #include "memory/barrierSet.hpp" +class ShenandoahHeap; + class ShenandoahBarrierSet: public BarrierSet { private: ShenandoahHeap* _heap; - static inline oop get_shenandoah_forwardee_helper(oop p); public: @@ -66,7 +66,8 @@ void write_prim_array(MemRegion mr); void write_prim_field(HeapWord* hw, size_t s , juint x, juint y); bool write_prim_needs_barrier(HeapWord* hw, size_t s, juint x, juint y); - void write_ref_array_work(MemRegion mr); + void write_ref_array(HeapWord* start, size_t count); + void write_ref_array_work(MemRegion r); template void write_ref_array_pre_work(T* dst, int count); @@ -96,9 +97,6 @@ static inline oop resolve_oop_static(oop p); - static inline oop resolve_oop_static_no_check(oop p); - - oop resolve_and_maybe_copy_oopHelper(oop src); oop resolve_and_maybe_copy_oop_work(oop src); oop resolve_and_maybe_copy_oop_work2(oop src); @@ -110,11 +108,10 @@ #ifdef ASSERT virtual bool is_safe(oop o); virtual bool is_safe(narrowOop o); + virtual void verify_safe_oop(oop p); #endif static oopDesc* write_barrier_c2(oopDesc* src); - static oopDesc* write_barrier_interp(oopDesc* src); - static oopDesc* write_barrier_c1(JavaThread* thread, oopDesc* src); private: bool need_update_refs_barrier(); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -27,56 +27,8 @@ #include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" -inline oop ShenandoahBarrierSet::get_shenandoah_forwardee_helper(oop p) { - assert(UseShenandoahGC, "must only be called when Shenandoah is used."); - assert(Universe::heap()->is_in(p), "We shouldn't be calling this on objects not in the heap"); - oop forwardee; -#ifdef ASSERT - if (ShenandoahVerifyReadsToFromSpace) { - ShenandoahHeap* heap = (ShenandoahHeap *) Universe::heap(); - ShenandoahHeapRegion* region = heap->heap_region_containing(p); - { - region->memProtectionOff(); - forwardee = oop( *((HeapWord**) ((HeapWord*) p) - 1)); - region->memProtectionOn(); - } - } else { - forwardee = oop( *((HeapWord**) ((HeapWord*) p) - 1)); - } -#else - forwardee = oop( *((HeapWord**) ((HeapWord*) p) - 1)); -#endif - return forwardee; -} - inline oop ShenandoahBarrierSet::resolve_oop_static_not_null(oop p) { - assert(p != NULL, "Must be NULL checked"); - - oop result = get_shenandoah_forwardee_helper(p); - - assert(result != NULL, "expect not NULL"); -#ifdef ASSERT - if (! oopDesc::unsafe_equals(result, p)) { - oop second_forwarding = get_shenandoah_forwardee_helper(result); - - // We should never be forwarded more than once. - if (! oopDesc::unsafe_equals(result, second_forwarding)) { - ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); - tty->print("first reference "PTR_FORMAT" is in heap region:\n", p2i((HeapWord*) p)); - sh->heap_region_containing(p)->print(); - tty->print("first_forwarding "PTR_FORMAT" is in heap region:\n", p2i((HeapWord*) result)); - sh->heap_region_containing(result)->print(); - tty->print("final reference "PTR_FORMAT" is in heap region:\n", p2i((HeapWord*) second_forwarding)); - sh->heap_region_containing(second_forwarding)->print(); - assert(get_shenandoah_forwardee_helper(result) == result, "Only one fowarding per customer"); - } - } - if (! ShenandoahVerifyReadsToFromSpace) { - // is_oop() would trigger a SEGFAULT when we're checking from-space-access. - assert(ShenandoahHeap::heap()->is_in(result) && result->is_oop(), "resolved oop must be a valid oop in the heap"); - } -#endif - return result; + return BrooksPointer::forwardee(p); } inline oop ShenandoahBarrierSet::resolve_oop_static(oop p) { @@ -87,12 +39,4 @@ } } -inline oop ShenandoahBarrierSet::resolve_oop_static_no_check(oop p) { - if (((HeapWord*) p) != NULL) { - return get_shenandoah_forwardee_helper(p); - } else { - return p; - } -} - #endif //SHARE_VM_GC_SHENANDOAH_SHENANDOAHBARRIERSET_INLINE_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahCollectionSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectionSet.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectionSet.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -23,6 +23,7 @@ #include "gc_implementation/shenandoah/shenandoahCollectionSet.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" ShenandoahCollectionSet::ShenandoahCollectionSet(size_t max_regions) : ShenandoahHeapRegionSet(max_regions), @@ -35,7 +36,7 @@ void ShenandoahCollectionSet::add_region(ShenandoahHeapRegion* r) { ShenandoahHeapRegionSet::add_region(r); _garbage += r->garbage(); - _live_data += r->getLiveData(); + _live_data += r->get_live_data(); } size_t ShenandoahCollectionSet::garbage() { @@ -48,9 +49,6 @@ void ShenandoahCollectionSet::clear() { size_t end = _active_end; - for (size_t i = 0; i < end; i++) { - get(i)->set_is_in_collection_set(false); - } ShenandoahHeapRegionSet::clear(); ShenandoahHeap::heap()->clear_cset_fast_test(); _garbage = 0; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -21,13 +21,78 @@ * */ -#include "gc_implementation/shared/gcPolicyCounters.hpp" +#include "gc_implementation/shared/gcTimer.hpp" #include "gc_implementation/shenandoah/shenandoahCollectionSet.hpp" #include "gc_implementation/shenandoah/shenandoahFreeSet.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahLogging.hpp" +#include "gc_implementation/shenandoah/shenandoahPhaseTimes.hpp" -int compareHeapRegionsByGarbage(ShenandoahHeapRegion* a, ShenandoahHeapRegion* b) { +class ShenandoahHeuristics : public CHeapObj { + + NumberSeq _allocation_rate_bytes; + NumberSeq _reclamation_rate_bytes; + + size_t _bytes_allocated_since_CM; + size_t _bytes_reclaimed_this_cycle; + +protected: + size_t _bytes_allocated_start_CM; + size_t _bytes_allocated_during_CM; + +public: + + ShenandoahHeuristics(); + + void record_bytes_allocated(size_t bytes); + void record_bytes_reclaimed(size_t bytes); + void record_bytes_start_CM(size_t bytes); + void record_bytes_end_CM(size_t bytes); + + virtual void print_thresholds() { + } + + virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const=0; + + virtual void start_choose_collection_set() { + } + virtual void end_choose_collection_set() { + } + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) = 0; + + void choose_collection_set(ShenandoahCollectionSet* collection_set); + + virtual void choose_free_set(ShenandoahFreeSet* free_set); + + virtual bool process_references() { + if (ShenandoahRefProcFrequency == 0) return false; + size_t cycle = ShenandoahHeap::heap()->shenandoahPolicy()->cycle_counter(); + // Process references every Nth GC cycle. + return cycle % ShenandoahRefProcFrequency == 0; + } + + virtual bool unload_classes() { + if (ShenandoahUnloadClassesFrequency == 0) return false; + size_t cycle = ShenandoahHeap::heap()->shenandoahPolicy()->cycle_counter(); + // Process references every Nth GC cycle. + return cycle % ShenandoahUnloadClassesFrequency == 0; + } + +private: + static int compare_heap_regions_by_garbage(ShenandoahHeapRegion* a, ShenandoahHeapRegion* b); + +}; + +ShenandoahHeuristics::ShenandoahHeuristics() : + _bytes_allocated_since_CM(0), + _bytes_reclaimed_this_cycle(0), + _bytes_allocated_start_CM(0), + _bytes_allocated_during_CM(0) +{ +} + +int ShenandoahHeuristics::compare_heap_regions_by_garbage(ShenandoahHeapRegion* a, ShenandoahHeapRegion* b) { if (a == NULL) { if (b == NULL) { return 0; @@ -48,125 +113,92 @@ else return 0; } -class ShenandoahHeuristics : public CHeapObj { - - NumberSeq _allocation_rate_bytes; - NumberSeq _reclamation_rate_bytes; - - size_t _bytes_allocated_since_CM; - size_t _bytes_reclaimed_this_cycle; - -protected: - size_t _bytes_allocated_start_CM; - size_t _bytes_allocated_during_CM; - -private: - size_t _garbage_threshold; - -public: - - ShenandoahHeuristics(); - - void record_bytes_allocated(size_t bytes); - void record_bytes_reclaimed(size_t bytes); - void record_bytes_start_CM(size_t bytes); - void record_bytes_end_CM(size_t bytes); - - virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const=0; - - virtual void choose_collection_set(ShenandoahCollectionSet* collection_set); - virtual void choose_collection_set_min_garbage(ShenandoahCollectionSet* collection_set, size_t min_garbage); - virtual void choose_free_set(ShenandoahFreeSet* free_set); - - void print_tracing_info(); - -protected: - - void set_garbage_threshold(size_t threshold) { - _garbage_threshold = threshold; - } - - size_t garbage_threshold() { - return _garbage_threshold; - } -}; - -ShenandoahHeuristics::ShenandoahHeuristics() : - _bytes_allocated_since_CM(0), - _bytes_reclaimed_this_cycle(0), - _bytes_allocated_start_CM(0), - _bytes_allocated_during_CM(0), - _garbage_threshold(ShenandoahHeapRegion::RegionSizeBytes / 2) -{ - if (PrintGCDetails) - tty->print_cr("initializing heuristics"); -} - void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collection_set) { ShenandoahHeapRegionSet* sorted_regions = ShenandoahHeap::heap()->sorted_regions(); - sorted_regions->sort(compareHeapRegionsByGarbage); + sorted_regions->sort(compare_heap_regions_by_garbage); - jlong i = 0; - jlong end = sorted_regions->active_regions(); + start_choose_collection_set(); - while (i < end) { - ShenandoahHeapRegion* region = sorted_regions->get(i++); - if (region->garbage() > _garbage_threshold && ! region->is_humongous()) { - // tty->print("choose region %d with garbage = " SIZE_FORMAT " and live = " SIZE_FORMAT " and _garbage_threshold = " SIZE_FORMAT "\n", - // region->region_number(), region->garbage(), region->getLiveData(), _garbage_threshold); + size_t i = 0; + size_t end = sorted_regions->active_regions(); + ShenandoahHeap* heap = ShenandoahHeap::heap(); + size_t total_garbage = heap->garbage(); + size_t immediate_garbage = 0; + size_t immediate_regions = 0; + for (size_t i = 0; i < end; i++) { + ShenandoahHeapRegion* region = sorted_regions->get(i); - assert(! region->is_humongous(), "no humongous regions in collection set"); - - if (region->getLiveData() == 0) { + if (! region->is_humongous() && ! region->is_pinned()) { + if ((! region->is_empty()) && region->get_live_data() == 0) { // We can recycle it right away and put it in the free set. - ShenandoahHeap::heap()->decrease_used(region->used()); + immediate_regions++; + immediate_garbage += region->garbage(); + heap->decrease_used(region->used()); region->recycle(); - } else { + log_develop_trace(gc)("Choose region " SIZE_FORMAT " for immediate reclaim with garbage = " SIZE_FORMAT + " and live = " SIZE_FORMAT "\n", + region->region_number(), region->garbage(), region->get_live_data()); + } else if (region_in_collection_set(region, immediate_garbage)) { + log_develop_trace(gc)("Choose region " SIZE_FORMAT " with garbage = " SIZE_FORMAT + " and live = " SIZE_FORMAT "\n", + region->region_number(), region->garbage(), region->get_live_data()); collection_set->add_region(region); - region->set_is_in_collection_set(true); + region->set_in_collection_set(true); } - // } else { - // tty->print("rejected region %d with garbage = " SIZE_FORMAT " and live = " SIZE_FORMAT " and _garbage_threshold = " SIZE_FORMAT "\n", - // region->region_number(), region->garbage(), region->getLiveData(), _garbage_threshold); + } else { + assert(region->get_live_data() != 0 || region->is_empty() || region->is_pinned() || region->is_humongous(), "check rejected"); + log_develop_trace(gc)("Rejected region " SIZE_FORMAT " with garbage = " SIZE_FORMAT + " and live = " SIZE_FORMAT "\n", + region->region_number(), region->garbage(), region->get_live_data()); } } -} + end_choose_collection_set(); -void ShenandoahHeuristics::choose_collection_set_min_garbage(ShenandoahCollectionSet* collection_set, size_t min_garbage) { - ShenandoahHeapRegionSet* sorted_regions = ShenandoahHeap::heap()->sorted_regions(); - sorted_regions->sort(compareHeapRegionsByGarbage); - jlong i = 0; - jlong end = sorted_regions->active_regions(); - - size_t garbage = 0; - while (i < end && garbage < min_garbage) { - ShenandoahHeapRegion* region = sorted_regions->get(i++); - if (region->garbage() > _garbage_threshold && ! region->is_humongous()) { - collection_set->add_region(region); - garbage += region->garbage(); - region->set_is_in_collection_set(true); - } - } + log_debug(gc)("Total Garbage: "SIZE_FORMAT, total_garbage); + log_debug(gc)("Immediate Garbage: "SIZE_FORMAT, immediate_garbage); + log_debug(gc)("Immediate Garbage regions: "SIZE_FORMAT, immediate_regions); + log_debug(gc)("Garbage to be collected: "SIZE_FORMAT, collection_set->garbage()); + log_debug(gc)("Objects to be evacuated: "SIZE_FORMAT, collection_set->live_data()); + log_debug(gc)("Live / Garbage ratio: "SIZE_FORMAT"%%", collection_set->live_data() * 100 / MAX2(collection_set->garbage(), 1UL)); + log_debug(gc)("Collected-Garbage ratio / Total-garbage: "SIZE_FORMAT"%%", collection_set->garbage() * 100 / MAX2(total_garbage, 1UL)); } void ShenandoahHeuristics::choose_free_set(ShenandoahFreeSet* free_set) { ShenandoahHeapRegionSet* ordered_regions = ShenandoahHeap::heap()->regions(); - jlong i = 0; - jlong end = ordered_regions->active_regions(); + size_t i = 0; + size_t end = ordered_regions->active_regions(); + ShenandoahHeap* heap = ShenandoahHeap::heap(); while (i < end) { ShenandoahHeapRegion* region = ordered_regions->get(i++); - if ((! region->is_in_collection_set()) - && (! region->is_humongous())) { + if ((! heap->in_collection_set(region)) + && (! region->is_humongous()) + && (! region->is_pinned())) { free_set->add_region(region); } } } +void ShenandoahCollectorPolicy::record_workers_start(TimingPhase phase) { + for (uint i = 0; i < ShenandoahPhaseTimes::GCParPhasesSentinel; i++) { + _phase_times->reset(i); + } +} + +void ShenandoahCollectorPolicy::record_workers_end(TimingPhase phase) { + if (phase != _num_phases) { + for (uint i = 0; i < ShenandoahPhaseTimes::GCParPhasesSentinel; i++) { + double t = _phase_times->average(i); + _timing_data[phase + i]._ms.add(t * 1000.0); + } + } +} + void ShenandoahCollectorPolicy::record_phase_start(TimingPhase phase) { _timing_data[phase]._start = os::elapsedTime(); + } void ShenandoahCollectorPolicy::record_phase_end(TimingPhase phase) { @@ -174,10 +206,6 @@ double elapsed = end - _timing_data[phase]._start; _timing_data[phase]._ms.add(elapsed * 1000); - if (ShenandoahGCVerbose && PrintGCDetails) { - tty->print_cr("PolicyPrint: %s "SIZE_FORMAT" took %lf ms", _phase_names[phase], - _timing_data[phase]._count++, elapsed * 1000); - } } void ShenandoahCollectorPolicy::report_concgc_cancelled() { @@ -203,118 +231,65 @@ : bytes; } +class PassiveHeuristics : public ShenandoahHeuristics { +public: + PassiveHeuristics() : ShenandoahHeuristics() { + } + + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { + return r->garbage() > 0; + } + + virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const { + // Never do concurrent GCs. + return false; + } + + virtual bool process_references() { + // Randomly process refs with 50% chance. + return (os::random() & 1) == 1; + } + + virtual bool unload_classes() { + // Randomly unload classes with 50% chance. + return (os::random() & 1) == 1; + } +}; + class AggressiveHeuristics : public ShenandoahHeuristics { public: - AggressiveHeuristics() : ShenandoahHeuristics(){ - if (PrintGCDetails) - tty->print_cr("Initializing aggressive heuristics"); + AggressiveHeuristics() : ShenandoahHeuristics() { + } - set_garbage_threshold(8); + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { + return r->garbage() > 0; } virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const { return true; } -}; -class HalfwayHeuristics : public ShenandoahHeuristics { -public: - HalfwayHeuristics() : ShenandoahHeuristics() { - if (PrintGCDetails) - tty->print_cr("Initializing halfway heuristics"); - - set_garbage_threshold(ShenandoahHeapRegion::RegionSizeBytes / 2); + virtual bool process_references() { + // Randomly process refs with 50% chance. + return (os::random() & 1) == 1; } - bool should_start_concurrent_mark(size_t used, size_t capacity) const { - ShenandoahHeap* heap = ShenandoahHeap::heap(); - size_t threshold_bytes_allocated = heap->capacity() / 4; - if (used * 2 > capacity && heap->_bytesAllocSinceCM > threshold_bytes_allocated) - return true; - else - return false; + virtual bool unload_classes() { + // Randomly unload classes with 50% chance. + return (os::random() & 1) == 1; } }; -// GC as little as possible -class LazyHeuristics : public ShenandoahHeuristics { +class DynamicHeuristics : public ShenandoahHeuristics { public: - LazyHeuristics() : ShenandoahHeuristics() { - if (PrintGCDetails) { - tty->print_cr("Initializing lazy heuristics"); - } + DynamicHeuristics() : ShenandoahHeuristics() { } - virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const { - size_t targetStartMarking = (capacity / 5) * 4; - if (used > targetStartMarking) { - return true; - } else { - return false; - } - } - -}; - -// These are the heuristics in place when we made this class -class StatusQuoHeuristics : public ShenandoahHeuristics { -public: - StatusQuoHeuristics() : ShenandoahHeuristics() { - if (PrintGCDetails) { - tty->print_cr("Initializing status quo heuristics"); - } - } - - virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const { - size_t targetStartMarking = capacity / 16; - ShenandoahHeap* heap = ShenandoahHeap::heap(); - size_t threshold_bytes_allocated = heap->capacity() / 4; - - if (used > targetStartMarking - && heap->_bytesAllocSinceCM > threshold_bytes_allocated) { - // Need to check that an appropriate number of regions have - // been allocated since last concurrent mark too. - return true; - } else { - return false; - } - } -}; - -static uintx clamp(uintx value, uintx min, uintx max) { - value = MAX2(value, min); - value = MIN2(value, max); - return value; -} - -static double get_percent(uintx value) { - double _percent = static_cast(clamp(value, 0, 100)); - return _percent / 100.; -} - -class DynamicHeuristics : public ShenandoahHeuristics { -private: - double _free_threshold_factor; - double _garbage_threshold_factor; - double _allocation_threshold_factor; - - uintx _free_threshold; - uintx _garbage_threshold; - uintx _allocation_threshold; - -public: - DynamicHeuristics() : ShenandoahHeuristics() { - if (PrintGCDetails) { - tty->print_cr("Initializing dynamic heuristics"); - } - - _free_threshold = 0; - _garbage_threshold = 0; - _allocation_threshold = 0; - - _free_threshold_factor = 0.; - _garbage_threshold_factor = 0.; - _allocation_threshold_factor = 0.; + void print_thresholds() { + log_info(gc, init)("Shenandoah heuristics thresholds: allocation "SIZE_FORMAT", free "SIZE_FORMAT", garbage "SIZE_FORMAT, + ShenandoahAllocationThreshold, + ShenandoahFreeThreshold, + ShenandoahGarbageThreshold); } virtual ~DynamicHeuristics() {} @@ -331,49 +306,23 @@ uintx factor = heap->need_update_refs() ? ShenandoahFreeThreshold : ShenandoahInitialFreeThreshold; size_t targetStartMarking = (capacity * factor) / 100; - size_t threshold_bytes_allocated = heap->capacity() * _allocation_threshold_factor; + size_t threshold_bytes_allocated = heap->capacity() * ShenandoahAllocationThreshold / 100; if (available < targetStartMarking && - heap->_bytesAllocSinceCM > threshold_bytes_allocated) + heap->bytes_allocated_since_cm() > threshold_bytes_allocated) { // Need to check that an appropriate number of regions have // been allocated since last concurrent mark too. shouldStartConcurrentMark = true; } - if (shouldStartConcurrentMark && ShenandoahTracePhases) { - gclog_or_tty->print_cr("Start GC at available: "SIZE_FORMAT", capacity: "SIZE_FORMAT", used: "SIZE_FORMAT", factor: "UINTX_FORMAT", update-refs: %s", available, free_capacity, free_used, factor, BOOL_TO_STR(heap->need_update_refs())); - gclog_or_tty->flush(); - } return shouldStartConcurrentMark; } - void set_free_threshold(uintx free_threshold) { - this->_free_threshold_factor = get_percent(free_threshold); - this->_free_threshold = free_threshold; + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { + size_t threshold = ShenandoahHeapRegion::RegionSizeBytes * ShenandoahGarbageThreshold / 100; + return r->garbage() > threshold; } - void set_garbage_threshold_x(uintx garbage_threshold) { - this->_garbage_threshold_factor = get_percent(garbage_threshold); - this->_garbage_threshold = garbage_threshold; - set_garbage_threshold(ShenandoahHeapRegion::RegionSizeBytes * _garbage_threshold_factor); - } - - void set_allocation_threshold(uintx allocationThreshold) { - this->_allocation_threshold_factor = get_percent(allocationThreshold); - this->_allocation_threshold = allocationThreshold; - } - - uintx get_allocation_threshold() { - return this->_allocation_threshold; - } - - uintx get_garbage_threshold_x() { - return this->_garbage_threshold; - } - - uintx get_free_threshold() { - return this->_free_threshold; - } }; @@ -388,12 +337,9 @@ uintx _garbage_threshold; uintx _allocation_threshold; + size_t _garbage; public: AdaptiveHeuristics() : ShenandoahHeuristics() { - if (PrintGCDetails) { - tty->print_cr("Initializing adaptive heuristics"); - } - _max_live_data = 0; _used_threshold = 0; @@ -407,6 +353,22 @@ virtual ~AdaptiveHeuristics() {} + virtual void start_choose_collection_set() { + _garbage = 0; + } + + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { + size_t bytes_alloc = ShenandoahHeap::heap()->bytes_allocated_since_cm(); + size_t min_garbage = bytes_alloc/* * 1.1*/; + size_t threshold = ShenandoahHeapRegion::RegionSizeBytes * ShenandoahGarbageThreshold / 100; + if (_garbage + immediate_garbage < min_garbage && r->garbage() > threshold) { + _garbage += r->garbage(); + return true; + } else { + return false; + } + } + virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const { ShenandoahHeap* _heap = ShenandoahHeap::heap(); @@ -419,7 +381,7 @@ } else { max_live_data *= 1.3; // Add some wiggle room. } - size_t max_cycle_allocated = _heap->_max_allocated_gc; + size_t max_cycle_allocated = _heap->max_allocated_gc(); if (max_cycle_allocated == 0) { max_cycle_allocated = capacity * 0.3; // Very generous. } else { @@ -434,154 +396,67 @@ return shouldStartConcurrentMark; } - virtual void choose_collection_set(ShenandoahCollectionSet* collection_set) { - size_t bytes_alloc = ShenandoahHeap::heap()->_bytesAllocSinceCM; - size_t min_garbage = bytes_alloc/* * 1.1*/; - set_garbage_threshold(ShenandoahHeapRegion::RegionSizeBytes * _garbage_threshold_factor); - ShenandoahHeuristics::choose_collection_set_min_garbage(collection_set, min_garbage); - /* - tty->print_cr("garbage to be collected: "SIZE_FORMAT, collection_set->garbage()); - tty->print_cr("objects to be evacuated: "SIZE_FORMAT, collection_set->live_data()); - */ - _max_live_data = MAX2(_max_live_data, collection_set->live_data()); +}; + +class GlobalHeuristics : public DynamicHeuristics { +private: + size_t _garbage; + size_t _min_garbage; +public: + GlobalHeuristics() : DynamicHeuristics() { + if (FLAG_IS_DEFAULT(ShenandoahGarbageThreshold)) { + FLAG_SET_DEFAULT(ShenandoahGarbageThreshold, 90); + } + } + virtual ~GlobalHeuristics() {} + + virtual void start_choose_collection_set() { + _garbage = 0; + size_t heap_garbage = ShenandoahHeap::heap()->garbage(); + _min_garbage = heap_garbage * ShenandoahGarbageThreshold / 100; } - void set_used_threshold(uintx used_threshold) { - this->_used_threshold_factor = get_percent(used_threshold); - this->_used_threshold = used_threshold; + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { + if (_garbage + immediate_garbage < _min_garbage) { + _garbage += r->garbage(); + return true; + } else { + return false; + } } - void set_garbage_threshold_x(uintx garbage_threshold) { - this->_garbage_threshold_factor = get_percent(garbage_threshold); - this->_garbage_threshold = garbage_threshold; +}; + +class RatioHeuristics : public DynamicHeuristics { +private: + size_t _garbage; + size_t _live; +public: + RatioHeuristics() : DynamicHeuristics() { + if (FLAG_IS_DEFAULT(ShenandoahGarbageThreshold)) { + FLAG_SET_DEFAULT(ShenandoahGarbageThreshold, 95); + } + } + virtual ~RatioHeuristics() {} + + virtual void start_choose_collection_set() { + _garbage = 0; + _live = 0; } - void set_allocation_threshold(uintx allocationThreshold) { - this->_allocation_threshold_factor = get_percent(allocationThreshold); - this->_allocation_threshold = allocationThreshold; - } - - uintx get_allocation_threshold() { - return this->_allocation_threshold; - } - - uintx get_garbage_threshold_x() { - return this->_garbage_threshold; - } - - uintx get_used_threshold() { - return this->_used_threshold; + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { + size_t min_ratio = 100 - ShenandoahGarbageThreshold; + if (_live * 100 / MAX2(_garbage + immediate_garbage, 1UL) < min_ratio) { + _garbage += r->garbage(); + _live += r->get_live_data(); + return true; + } else { + return false; + } } }; -class NewAdaptiveHeuristics : public ShenandoahHeuristics { -private: - size_t _max_live_data; - double _target_heap_occupancy_factor; - double _allocation_threshold_factor; - size_t _last_bytesAllocSinceCM; - - uintx _target_heap_occupancy; - uintx _allocation_threshold; - -public: - NewAdaptiveHeuristics() : ShenandoahHeuristics() - { - if (PrintGCDetails) { - tty->print_cr("Initializing newadaptive heuristics"); - } - _max_live_data = 0; - _allocation_threshold = 0; - _target_heap_occupancy_factor = 0.; - _allocation_threshold_factor = 0.; - _last_bytesAllocSinceCM = 0; - } - - virtual ~NewAdaptiveHeuristics() {} - - virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const - { - if (this->_bytes_allocated_during_CM > 0) { - // Not the first concurrent mark. - // _bytes_allocated_during_CM - ShenandoahHeap *heap = ShenandoahHeap::heap(); - size_t threshold_bytes_allocated = heap->capacity() / 4; - size_t targetStartMarking = (size_t) capacity * this->_target_heap_occupancy_factor; - return (used > targetStartMarking) && (this->_bytes_allocated_during_CM > threshold_bytes_allocated); - } else { - // First concurrent mark. - size_t targetStartMarking = capacity / 2; - ShenandoahHeap *heap = ShenandoahHeap::heap(); - size_t threshold_bytes_allocated = heap->capacity() / 4; - - // Need to check that an appropriate number of regions have - // been allocated since last concurrent mark too. - return (used > targetStartMarking) && (heap->_bytesAllocSinceCM > threshold_bytes_allocated); - } - } - - virtual void choose_collection_set(ShenandoahCollectionSet* collection_set) { - ShenandoahHeap *_heap = ShenandoahHeap::heap(); - this->_last_bytesAllocSinceCM = ShenandoahHeap::heap()->_bytesAllocSinceCM; - if (this->_last_bytesAllocSinceCM > 0) { - size_t min_garbage = this->_last_bytesAllocSinceCM; - ShenandoahHeuristics::choose_collection_set_min_garbage(collection_set, min_garbage); - } else { - set_garbage_threshold(ShenandoahHeapRegion::RegionSizeBytes / 2); - ShenandoahHeuristics::choose_collection_set(collection_set); - } - this->_max_live_data = MAX2(this->_max_live_data, collection_set->live_data()); - } - - void set_target_heap_occupancy(uintx target_heap_occupancy) { - this->_target_heap_occupancy_factor = get_percent(target_heap_occupancy); - this->_target_heap_occupancy = target_heap_occupancy; - } - - void set_allocation_threshold(uintx allocationThreshold) { - this->_allocation_threshold_factor = get_percent(allocationThreshold); - this->_allocation_threshold = allocationThreshold; - } - - uintx get_allocation_threshold() { - return this->_allocation_threshold; - } - - uintx get_target_heap_occupancy() { - return this->_target_heap_occupancy; - } -}; - - -static DynamicHeuristics *configureDynamicHeuristics() { - DynamicHeuristics *heuristics = new DynamicHeuristics(); - - heuristics->set_garbage_threshold_x(ShenandoahGarbageThreshold); - heuristics->set_allocation_threshold(ShenandoahAllocationThreshold); - heuristics->set_free_threshold(ShenandoahFreeThreshold); - if (ShenandoahLogConfig) { - tty->print_cr("Shenandoah dynamic heuristics thresholds: allocation "SIZE_FORMAT", free "SIZE_FORMAT", garbage "SIZE_FORMAT, - heuristics->get_allocation_threshold(), - heuristics->get_free_threshold(), - heuristics->get_garbage_threshold_x()); - } - return heuristics; -} - - -static NewAdaptiveHeuristics* configureNewAdaptiveHeuristics() { - NewAdaptiveHeuristics* heuristics = new NewAdaptiveHeuristics(); - - heuristics->set_target_heap_occupancy(ShenandoahTargetHeapOccupancy); - if (ShenandoahLogConfig) { - tty->print_cr( "Shenandoah newadaptive heuristics target heap occupancy: "SIZE_FORMAT, - heuristics->get_target_heap_occupancy() ); - } - return heuristics; -} - - -ShenandoahCollectorPolicy::ShenandoahCollectorPolicy() { +ShenandoahCollectorPolicy::ShenandoahCollectorPolicy() : _cycle_counter(0) { ShenandoahHeapRegion::setup_heap_region_size(initial_heap_byte_size(), max_heap_byte_size()); @@ -594,74 +469,103 @@ _allocation_failure_gcs = 0; _conc_gc_aborted = false; - _phase_names[init_mark] = "InitMark"; - _phase_names[init_mark_gross] = "InitMarkGross"; - _phase_names[final_mark] = "FinalMark"; - _phase_names[final_mark_gross] = "FinalMarkGross"; - _phase_names[accumulate_stats] = "AccumulateStats"; - _phase_names[make_parsable] = "MakeParsable"; - _phase_names[clear_liveness] = "ClearLiveness"; - _phase_names[scan_roots] = "ScanRoots"; - _phase_names[rescan_roots] = "RescanRoots"; - _phase_names[drain_satb] = "DrainSATB"; - _phase_names[drain_queues] = "DrainQueues"; - _phase_names[weakrefs] = "WeakRefs"; - _phase_names[class_unloading] = "ClassUnloading"; - _phase_names[prepare_evac] = "PrepareEvac"; - _phase_names[init_evac] = "InitEvac"; + _phase_names[init_mark] = "Initial Mark Pauses (net)"; + _phase_names[init_mark_gross] = "Initial Mark Pauses (gross)"; + _phase_names[final_mark] = "Final Mark Pauses (net)"; + _phase_names[final_mark_gross] = "Final Mark Pauses (gross)"; + _phase_names[accumulate_stats] = " Accumulate Stats"; + _phase_names[make_parsable] = " Make Parsable"; + _phase_names[clear_liveness] = " Clear Liveness"; + _phase_names[scan_roots] = " Scan Roots"; + _phase_names[update_roots] = " Update Roots"; + _phase_names[drain_satb] = " Drain SATB"; + _phase_names[weakrefs] = " Weak References"; + _phase_names[class_unloading] = " Class Unloading"; + _phase_names[prepare_evac] = " Prepare Evacuation"; + _phase_names[init_evac] = " Initial Evacuation"; - _phase_names[recycle_regions] = "RecycleRegions"; + _phase_names[scan_thread_roots] = " Scan Thread Roots"; + _phase_names[scan_code_roots] = " Scan Code Cache Roots"; + _phase_names[scan_string_table_roots] = " Scan String Table Roots"; + _phase_names[scan_universe_roots] = " Scan Universe Roots"; + _phase_names[scan_jni_roots] = " Scan JNI Roots"; + _phase_names[scan_jni_weak_roots] = " Scan JNI Weak Roots"; + _phase_names[scan_synchronizer_roots] = " Scan Synchronizer Roots"; + _phase_names[scan_flat_profiler_roots] = " Scan Flat Profiler Roots"; + _phase_names[scan_management_roots] = " Scan Management Roots"; + _phase_names[scan_system_dictionary_roots] = " Scan System Dictionary Roots"; + _phase_names[scan_cldg_roots] = " Scan CLDG Roots"; + _phase_names[scan_jvmti_roots] = " Scan JVMTI Roots"; + + _phase_names[update_thread_roots] = " Update Thread Roots"; + _phase_names[update_code_roots] = " Update Code Cache Roots"; + _phase_names[update_string_table_roots] = " Update String Table Roots"; + _phase_names[update_universe_roots] = " Update Universe Roots"; + _phase_names[update_jni_roots] = " Update JNI Roots"; + _phase_names[update_jni_weak_roots] = " Update JNI Weak Roots"; + _phase_names[update_synchronizer_roots] = " Update Synchronizer Roots"; + _phase_names[update_flat_profiler_roots] = " Update Flat Profiler Roots"; + _phase_names[update_management_roots] = " Update Management Roots"; + _phase_names[update_system_dictionary_roots] = " Update System Dictionary Roots"; + _phase_names[update_cldg_roots] = " Update CLDG Roots"; + _phase_names[update_jvmti_roots] = " Update JVMTI Roots"; + + _phase_names[evac_thread_roots] = " Evacuate Thread Roots"; + _phase_names[evac_code_roots] = " Evacuate Code Cache Roots"; + _phase_names[evac_string_table_roots] = " Evacuate String Table Roots"; + _phase_names[evac_universe_roots] = " Evacuate Universe Roots"; + _phase_names[evac_jni_roots] = " Evacuate JNI Roots"; + _phase_names[evac_jni_weak_roots] = " Evacuate JNI Weak Roots"; + _phase_names[evac_synchronizer_roots] = " Evacuate Synchronizer Roots"; + _phase_names[evac_flat_profiler_roots] = " Evacuate Flat Profiler Roots"; + _phase_names[evac_management_roots] = " Evacuate Management Roots"; + _phase_names[evac_system_dictionary_roots] = " Evacuate System Dictionary Roots"; + _phase_names[evac_cldg_roots] = " Evacuate CLDG Roots"; + _phase_names[evac_jvmti_roots] = " Evacuate JVMTI Roots"; + + _phase_names[recycle_regions] = " Recycle regions"; _phase_names[reset_bitmaps] = "ResetBitmaps"; - _phase_names[resize_tlabs] = "ResizeTLABs"; + _phase_names[resize_tlabs] = "Resize TLABs"; - _phase_names[full_gc] = "FullGC"; - _phase_names[conc_mark] = "ConcurrentMark"; - _phase_names[conc_evac] = "ConcurrentEvacuation"; + _phase_names[full_gc] = "Full GC Times"; + _phase_names[full_gc_mark] = " Mark"; + _phase_names[full_gc_mark_drain_queues] = " Drain Queues"; + _phase_names[full_gc_mark_weakrefs] = " Weak References"; + _phase_names[full_gc_mark_class_unloading] = " Class Unloading"; + _phase_names[full_gc_calculate_addresses] = " Calculate Addresses"; + _phase_names[full_gc_adjust_pointers] = " Adjust Pointers"; + _phase_names[full_gc_copy_objects] = " Copy Objects"; + + _phase_names[conc_mark] = "Concurrent Marking Times"; + _phase_names[conc_evac] = "Concurrent Evacuation Times"; if (ShenandoahGCHeuristics != NULL) { if (strcmp(ShenandoahGCHeuristics, "aggressive") == 0) { - if (ShenandoahLogConfig) { - tty->print_cr("Shenandoah heuristics: aggressive"); - } + log_info(gc, init)("Shenandoah heuristics: aggressive"); _heuristics = new AggressiveHeuristics(); - } else if (strcmp(ShenandoahGCHeuristics, "statusquo") == 0) { - if (ShenandoahLogConfig) { - tty->print_cr("Shenandoah heuristics: statusquo"); - } - _heuristics = new StatusQuoHeuristics(); - } else if (strcmp(ShenandoahGCHeuristics, "halfway") == 0) { - if (ShenandoahLogConfig) { - tty->print_cr("Shenandoah heuristics: halfway"); - } - _heuristics = new HalfwayHeuristics(); - } else if (strcmp(ShenandoahGCHeuristics, "lazy") == 0) { - if (ShenandoahLogConfig) { - tty->print_cr("Shenandoah heuristics: lazy"); - } - _heuristics = new LazyHeuristics(); } else if (strcmp(ShenandoahGCHeuristics, "dynamic") == 0) { - if (ShenandoahLogConfig) { - tty->print_cr("Shenandoah heuristics: dynamic"); - } - _heuristics = configureDynamicHeuristics(); + log_info(gc, init)("Shenandoah heuristics: dynamic"); + _heuristics = new DynamicHeuristics(); + } else if (strcmp(ShenandoahGCHeuristics, "global") == 0) { + log_info(gc, init)("Shenandoah heuristics: global"); + _heuristics = new GlobalHeuristics(); + } else if (strcmp(ShenandoahGCHeuristics, "ratio") == 0) { + log_info(gc, init)("Shenandoah heuristics: ratio"); + _heuristics = new RatioHeuristics(); } else if (strcmp(ShenandoahGCHeuristics, "adaptive") == 0) { - if (ShenandoahLogConfig) { - tty->print_cr("Shenandoah heuristics: adaptive"); - } + log_info(gc, init)("Shenandoah heuristics: adaptive"); _heuristics = new AdaptiveHeuristics(); - } else if (strcmp(ShenandoahGCHeuristics, "newadaptive") == 0) { - if (ShenandoahLogConfig) { - tty->print_cr("Shenandoah heuristics: newadaptive"); - } - _heuristics = configureNewAdaptiveHeuristics(); + } else if (strcmp(ShenandoahGCHeuristics, "passive") == 0) { + log_info(gc, init)("Shenandoah heuristics: passive"); + _heuristics = new PassiveHeuristics(); } else { - fatal("Unknown -XX:ShenandoahGCHeuristics option"); + vm_exit_during_initialization("Unknown -XX:ShenandoahGCHeuristics option"); } + _heuristics->print_thresholds(); } else { - ShouldNotReachHere(); + ShouldNotReachHere(); } - - _gc_policy_counters = new GCPolicyCounters("Shenandoah", 3, 1); + _phase_times = new ShenandoahPhaseTimes(MAX2(ConcGCThreads, ParallelGCThreads)); } ShenandoahCollectorPolicy* ShenandoahCollectorPolicy::as_pgc_policy() { @@ -721,7 +625,6 @@ bool ShenandoahCollectorPolicy::should_start_concurrent_mark(size_t used, size_t capacity) { - ShenandoahHeap* heap = ShenandoahHeap::heap(); return _heuristics->should_start_concurrent_mark(used, capacity); } @@ -733,41 +636,25 @@ _heuristics->choose_free_set(free_set); } -void ShenandoahCollectorPolicy::print_tracing_info() { - print_summary_sd("Initial Mark Pauses (gross)", 0, &(_timing_data[init_mark_gross]._ms)); - print_summary_sd("Initial Mark Pauses (net)", 0, &(_timing_data[init_mark]._ms)); - print_summary_sd("Accumulate Stats", 2, &(_timing_data[accumulate_stats]._ms)); - print_summary_sd("Make Parsable", 2, &(_timing_data[make_parsable]._ms)); - print_summary_sd("Clear Liveness", 2, &(_timing_data[clear_liveness]._ms)); - print_summary_sd("Scan Roots", 2, &(_timing_data[scan_roots]._ms)); - print_summary_sd("Resize TLABs", 2, &(_timing_data[resize_tlabs]._ms)); - print_summary_sd("Final Mark Pauses (gross)", 0, &(_timing_data[final_mark_gross]._ms)); - print_summary_sd("Final Mark Pauses (net)", 0, &(_timing_data[final_mark]._ms)); +bool ShenandoahCollectorPolicy::process_references() { + return _heuristics->process_references(); +} - print_summary_sd("Rescan Roots", 2, &(_timing_data[rescan_roots]._ms)); - print_summary_sd("Drain SATB", 2, &(_timing_data[drain_satb]._ms)); - print_summary_sd("Drain Queues", 2, &(_timing_data[drain_queues]._ms)); - if (ShenandoahProcessReferences) { - print_summary_sd("Weak References", 2, &(_timing_data[weakrefs]._ms)); +bool ShenandoahCollectorPolicy::unload_classes() { + return _heuristics->unload_classes(); +} + +void ShenandoahCollectorPolicy::print_tracing_info(outputStream* out) { + for (uint i = 0; i < _num_phases; i++) { + if (_timing_data[i]._ms.maximum() != 0) { + print_summary_sd(out, _phase_names[i], &(_timing_data[i]._ms)); + } } - if (ClassUnloadingWithConcurrentMark) { - print_summary_sd("Class Unloading", 2, &(_timing_data[class_unloading]._ms)); - } - print_summary_sd("Prepare Evacuation", 2, &(_timing_data[prepare_evac]._ms)); - print_summary_sd("Recycle regions", 2, &(_timing_data[recycle_regions]._ms)); - print_summary_sd("Initial Evacuation", 2, &(_timing_data[init_evac]._ms)); + out->print_cr("User requested GCs: "SIZE_FORMAT, _user_requested_gcs); + out->print_cr("Allocation failure GCs: "SIZE_FORMAT, _allocation_failure_gcs); - gclog_or_tty->print_cr(" "); - print_summary_sd("Concurrent Marking Times", 0, &(_timing_data[conc_mark]._ms)); - print_summary_sd("Concurrent Evacuation Times", 0, &(_timing_data[conc_evac]._ms)); - print_summary_sd("Concurrent Reset bitmaps", 0, &(_timing_data[reset_bitmaps]._ms)); - print_summary_sd("Full GC Times", 0, &(_timing_data[full_gc]._ms)); - - gclog_or_tty->print_cr("User requested GCs: "SIZE_FORMAT, _user_requested_gcs); - gclog_or_tty->print_cr("Allocation failure GCs: "SIZE_FORMAT, _allocation_failure_gcs); - - gclog_or_tty->print_cr(" "); + out->print_cr(" "); double total_sum = _timing_data[init_mark_gross]._ms.sum() + _timing_data[final_mark_gross]._ms.sum(); double total_avg = (_timing_data[init_mark_gross]._ms.avg() + @@ -775,16 +662,30 @@ double total_max = MAX2(_timing_data[init_mark_gross]._ms.maximum(), _timing_data[final_mark_gross]._ms.maximum()); - gclog_or_tty->print_cr("%-27s = %8.2lf s, avg = %8.2lf ms, max = %8.2lf ms", + out->print_cr("%-27s = %8.2lf s, avg = %8.2lf ms, max = %8.2lf ms", "Total", total_sum / 1000.0, total_avg, total_max); } -void ShenandoahCollectorPolicy::print_summary_sd(const char* str, uint indent, const NumberSeq* seq) { + +void ShenandoahCollectorPolicy::print_summary_sd(outputStream* out, const char* str, const NumberSeq* seq) { double sum = seq->sum(); - for (uint i = 0; i < indent; i++) gclog_or_tty->print(" "); - gclog_or_tty->print_cr("%-27s = %8.2lf s (avg = %8.2lf ms)", - str, sum / 1000.0, seq->avg()); - for (uint i = 0; i < indent; i++) gclog_or_tty->print(" "); - gclog_or_tty->print_cr("%s = "INT32_FORMAT_W(5)", std dev = %8.2lf ms, max = %8.2lf ms)", + out->print("%-34s = %8.2lf s (avg = %8.2lf ms)", + str, sum / 1000.0, seq->avg()); + out->print_cr(" %s = "INT32_FORMAT_W(5)", std dev = %8.2lf ms, max = %8.2lf ms)", "(num", seq->num(), seq->sd(), seq->maximum()); } + +void ShenandoahCollectorPolicy::increase_cycle_counter() { + _cycle_counter++; +} + +size_t ShenandoahCollectorPolicy::cycle_counter() const { + return _cycle_counter; +} + +ShenandoahPhaseTimes* ShenandoahCollectorPolicy::phase_times() { + return _phase_times; +} + +GCTimer* ShenandoahCollectorPolicy::conc_timer() {return _conc_timer;} +GCTimer* ShenandoahCollectorPolicy::stw_timer() {return _stw_timer;} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -24,8 +24,6 @@ #ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAH_COLLECTOR_POLICY_HPP #define SHARE_VM_GC_SHENANDOAH_SHENANDOAH_COLLECTOR_POLICY_HPP -#include "gc_implementation/shared/gcTrace.hpp" -#include "gc_implementation/shared/gcTimer.hpp" #include "memory/collectorPolicy.hpp" #include "runtime/arguments.hpp" #include "utilities/numberSeq.hpp" @@ -34,33 +32,82 @@ class ShenandoahFreeSet; class ShenandoahHeap; class ShenandoahHeuristics; +class ShenandoahPhaseTimes; + +class STWGCTimer; +class ConcurrentGCTimer; class ShenandoahCollectorPolicy: public CollectorPolicy { public: enum TimingPhase { + init_mark_gross, init_mark, - final_mark, - init_mark_gross, - final_mark_gross, accumulate_stats, make_parsable, clear_liveness, scan_roots, - rescan_roots, + scan_thread_roots, + scan_code_roots, + scan_string_table_roots, + scan_universe_roots, + scan_jni_roots, + scan_jni_weak_roots, + scan_synchronizer_roots, + scan_flat_profiler_roots, + scan_management_roots, + scan_system_dictionary_roots, + scan_cldg_roots, + scan_jvmti_roots, + + resize_tlabs, + + final_mark_gross, + final_mark, + update_roots, + update_thread_roots, + update_code_roots, + update_string_table_roots, + update_universe_roots, + update_jni_roots, + update_jni_weak_roots, + update_synchronizer_roots, + update_flat_profiler_roots, + update_management_roots, + update_system_dictionary_roots, + update_cldg_roots, + update_jvmti_roots, drain_satb, - drain_queues, weakrefs, class_unloading, prepare_evac, + recycle_regions, init_evac, + evac_thread_roots, + evac_code_roots, + evac_string_table_roots, + evac_universe_roots, + evac_jni_roots, + evac_jni_weak_roots, + evac_synchronizer_roots, + evac_flat_profiler_roots, + evac_management_roots, + evac_system_dictionary_roots, + evac_cldg_roots, + evac_jvmti_roots, - recycle_regions, - reset_bitmaps, - resize_tlabs, - full_gc, conc_mark, conc_evac, + reset_bitmaps, + + full_gc, + full_gc_mark, + full_gc_mark_drain_queues, + full_gc_mark_weakrefs, + full_gc_mark_class_unloading, + full_gc_calculate_addresses, + full_gc_adjust_pointers, + full_gc_copy_objects, _num_phases }; @@ -87,9 +134,15 @@ bool _conc_gc_aborted; + size_t _cycle_counter; + + ShenandoahPhaseTimes* _phase_times; + public: ShenandoahCollectorPolicy(); + ShenandoahPhaseTimes* phase_times(); + virtual ShenandoahCollectorPolicy* as_pgc_policy(); BarrierSet::Name barrier_set_name(); @@ -106,6 +159,10 @@ void record_phase_start(TimingPhase phase); void record_phase_end(TimingPhase phase); + + void record_workers_start(TimingPhase phase); + void record_workers_end(TimingPhase phase); + void report_concgc_cancelled(); void record_user_requested_gc(); @@ -120,17 +177,23 @@ void choose_collection_set(ShenandoahCollectionSet* collection_set); void choose_free_set(ShenandoahFreeSet* free_set); - void print_tracing_info(); + bool process_references(); + bool unload_classes(); - GCTimer* conc_timer(){return _conc_timer;} - GCTimer* stw_timer() {return _stw_timer;} + void print_tracing_info(outputStream* out); + + GCTimer* conc_timer(); + GCTimer* stw_timer(); ShenandoahTracer* tracer() {return _tracer;} void set_conc_gc_aborted() { _conc_gc_aborted = true;} void clear_conc_gc_aborted() {_conc_gc_aborted = false;} + void increase_cycle_counter(); + size_t cycle_counter() const; + private: - void print_summary_sd(const char* str, uint indent, const NumberSeq* seq); + void print_summary_sd(outputStream* out, const char* str, const NumberSeq* seq); }; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -21,17 +21,17 @@ * */ -#include "gc_implementation/shared/gcTimer.hpp" #include "gc_implementation/shared/isGCActiveMark.hpp" +#include "gc_implementation/shared/parallelCleaning.hpp" #include "gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahRootProcessor.hpp" -#include "gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp" #include "gc_implementation/shenandoah/shenandoah_specialized_oop_closures.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "memory/referenceProcessor.hpp" -#include "memory/sharedHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahTaskqueue.hpp" #include "code/codeCache.hpp" #include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" @@ -39,58 +39,41 @@ #include "oops/oop.inline.hpp" #include "utilities/taskqueue.hpp" -class ShenandoahMarkUpdateRootsClosure : public OopClosure { +class ShenandoahInitMarkRootsClosure : public OopClosure { SCMObjToScanQueue* _queue; ShenandoahHeap* _heap; public: - ShenandoahMarkUpdateRootsClosure(SCMObjToScanQueue* q) : + ShenandoahInitMarkRootsClosure(SCMObjToScanQueue* q) : _queue(q), _heap((ShenandoahHeap*) Universe::heap()) { } - void do_oop(narrowOop* p) { - Unimplemented(); - } - - inline void do_oop(oop* p) { - oop obj = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(obj)) { - obj = _heap->update_oop_ref_not_null(p, obj); - ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); - } - } - -}; - -class ShenandoahMarkRootsClosure : public OopClosure { - SCMObjToScanQueue* _queue; - ShenandoahHeap* _heap; - -public: - ShenandoahMarkRootsClosure(SCMObjToScanQueue* q) : - _queue(q), - _heap((ShenandoahHeap*) Universe::heap()) - { - } - - void do_oop(narrowOop* p) { - Unimplemented(); - } - - inline void do_oop(oop* p) { - oop obj = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(obj)) { +private: + template + inline void do_oop_work(T* p) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + obj = ShenandoahBarrierSet::resolve_oop_static_not_null(obj); assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), "expect forwarded oop"); ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); } } +public: + void do_oop(narrowOop* p) { + do_oop_work(p); + } + + inline void do_oop(oop* p) { + do_oop_work(p); + } + }; - class SCMUpdateRefsClosure: public OopClosure { private: ShenandoahHeap* _heap; @@ -99,90 +82,108 @@ SCMUpdateRefsClosure() : _heap(ShenandoahHeap::heap()) { } - inline void do_oop(oop* p) { - oop obj = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(obj)) { +private: + template + inline void do_oop_work(T* p) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); _heap->update_oop_ref_not_null(p, obj); } } +public: + inline void do_oop(oop* p) { + do_oop_work(p); + } + void do_oop(narrowOop* p) { - Unimplemented(); + do_oop_work(p); } }; // Mark the object and add it to the queue to be scanned -template -ShenandoahMarkObjsClosure::ShenandoahMarkObjsClosure(QHolder* q) : +template +ShenandoahMarkObjsClosure::ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : _heap((ShenandoahHeap*)(Universe::heap())), - _mark_refs(T(q)), _queue(q), + _mark_refs(T(q, rp)), _last_region_idx(0), _live_data(0) { } -template -ShenandoahMarkObjsClosure::~ShenandoahMarkObjsClosure() { - // tty->print_cr("got "SIZE_FORMAT" x region: "UINT32_FORMAT, _live_data_count, _last_region_idx); - ShenandoahHeapRegion* r = _heap->regions()->get(_last_region_idx); - r->increase_live_data(_live_data); +template +ShenandoahMarkObjsClosure::~ShenandoahMarkObjsClosure() { + if (CL) { + ShenandoahHeapRegion *r = _heap->regions()->get(_last_region_idx); + r->increase_live_data(_live_data); + } } -ShenandoahMarkUpdateRefsClosure::ShenandoahMarkUpdateRefsClosure(QHolder* q) : - MetadataAwareOopClosure(((ShenandoahHeap *) Universe::heap())->ref_processor()), +ShenandoahMarkUpdateRefsClosure::ShenandoahMarkUpdateRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : + MetadataAwareOopClosure(rp), _queue(q), _heap((ShenandoahHeap*) Universe::heap()) { } -ShenandoahMarkRefsClosure::ShenandoahMarkRefsClosure(QHolder* q) : - MetadataAwareOopClosure(((ShenandoahHeap *) Universe::heap())->ref_processor()), +ShenandoahMarkRefsClosure::ShenandoahMarkRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : + MetadataAwareOopClosure(rp), _queue(q), _heap((ShenandoahHeap*) Universe::heap()) { } -class ShenandoahMarkRootsTask : public AbstractGangTask { +class ShenandoahInitMarkRootsTask : public AbstractGangTask { private: ShenandoahRootProcessor* _rp; - bool _update_refs; + bool _process_refs; public: - ShenandoahMarkRootsTask(ShenandoahRootProcessor* rp, bool update_refs) : - AbstractGangTask("Shenandoah update roots task"), _update_refs(update_refs), + ShenandoahInitMarkRootsTask(ShenandoahRootProcessor* rp, bool process_refs) : + AbstractGangTask("Shenandoah init mark roots task"), + _rp(rp), + _process_refs(process_refs) { + } + + void work(uint worker_id) { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); + + ShenandoahHeap* heap = ShenandoahHeap::heap(); + SCMObjToScanQueueSet* queues = heap->concurrentMark()->task_queues(); + assert(queues->get_reserved() > worker_id, err_msg("Queue has not been reserved for worker id: %d", worker_id)); + + SCMObjToScanQueue* q = queues->queue(worker_id); + ShenandoahInitMarkRootsClosure mark_cl(q); + CLDToOopClosure cldCl(&mark_cl); + MarkingCodeBlobClosure blobsCl(&mark_cl, ! CodeBlobToOopClosure::FixRelocations); + + ResourceMark m; + if (heap->concurrentMark()->unload_classes()) { + _rp->process_strong_roots(&mark_cl, _process_refs ? NULL : &mark_cl, &cldCl, &blobsCl, worker_id); + } else { + _rp->process_all_roots(&mark_cl, _process_refs ? NULL : &mark_cl, &cldCl, ShenandoahConcurrentCodeRoots ? NULL : &blobsCl, worker_id); + } + } +}; + +class ShenandoahUpdateRootsTask : public AbstractGangTask { +private: + ShenandoahRootProcessor* _rp; +public: + ShenandoahUpdateRootsTask(ShenandoahRootProcessor* rp) : + AbstractGangTask("Shenandoah update roots task"), _rp(rp) { } void work(uint worker_id) { - // tty->print_cr("start mark roots worker: "INT32_FORMAT, worker_id); + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); + ShenandoahHeap* heap = ShenandoahHeap::heap(); - SCMObjToScanQueue* q = heap->concurrentMark()->get_queue(worker_id); - OopClosure* cl; - ShenandoahMarkUpdateRootsClosure mark_update_cl(q); - ShenandoahMarkRootsClosure mark_cl(q); - if (_update_refs) { - cl = &mark_update_cl; - } else { - cl = &mark_cl; - } - MarkingCodeBlobClosure blobsCl(cl, CodeBlobToOopClosure::FixRelocations); - CLDToOopClosure cldCl(cl); + SCMUpdateRefsClosure cl; + CLDToOopClosure cldCl(&cl); - ResourceMark m; - if (ClassUnloadingWithConcurrentMark) { - SCMUpdateRefsClosure uprefs; - // Can't use the MarkingCodeBlobClosure here: if we do this, the update-refs - // pass could claim nmethods from marking passes. - CodeBlobToOopClosure upcode(&uprefs, CodeBlobToOopClosure::FixRelocations); - CLDToOopClosure upcld(&uprefs, false); - _rp->process_roots(cl, &uprefs, &cldCl, &upcld, &cldCl, &blobsCl, &upcode); - } else { - _rp->process_all_roots(cl, &cldCl, &blobsCl); - SCMUpdateRefsClosure uprefs; - ShenandoahAlwaysTrueClosure always_true; - JNIHandles::weak_oops_do(&always_true, &uprefs); - } - // tty->print_cr("finish mark roots worker: "INT32_FORMAT, worker_id); + _rp->process_all_roots(&cl, &cl, &cldCl, NULL, worker_id); } }; @@ -199,14 +200,26 @@ void work(uint worker_id) { - SCMObjToScanQueue* q = _cm->get_queue(worker_id); - QHolder qh(q); + ReferenceProcessor* rp; + if (_cm->process_references()) { + rp = ShenandoahHeap::heap()->ref_processor(); + } else { + rp = NULL; + } + if (ShenandoahConcurrentCodeRoots && _cm->claim_codecache()) { + if (! _cm->unload_classes()) { + ShenandoahMarkRefsClosure cl(q, rp); + CodeBlobToOopClosure blobs(&cl, ! CodeBlobToOopClosure::FixRelocations); + MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); + CodeCache::blobs_do(&blobs); + } + } if (_update_refs) { - ShenandoahMarkObjsClosure cl(&qh); + ShenandoahMarkObjsClosure cl(q, rp); _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); } else { - ShenandoahMarkObjsClosure cl(&qh); + ShenandoahMarkObjsClosure cl(q, rp); _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); } } @@ -217,175 +230,189 @@ ShenandoahConcurrentMark* _cm; ParallelTaskTerminator* _terminator; bool _update_refs; + bool _count_live; public: - SCMFinalMarkingTask(ShenandoahConcurrentMark* cm, ParallelTaskTerminator* terminator, bool update_refs) : - AbstractGangTask("Shenandoah Final Marking"), _cm(cm), _terminator(terminator), _update_refs(update_refs) { + SCMFinalMarkingTask(ShenandoahConcurrentMark* cm, ParallelTaskTerminator* terminator, bool update_refs, bool count_live) : + AbstractGangTask("Shenandoah Final Marking"), _cm(cm), _terminator(terminator), _update_refs(update_refs), _count_live(count_live) { } void work(uint worker_id) { + // First drain remaining SATB buffers. + // Notice that this is not strictly necessary for mark-compact. But since + // it requires a StrongRootsScope around the task, we need to claim the + // threads, and performance-wise it doesn't really matter. Adds about 1ms to + // full-gc. + _cm->drain_satb_buffers(worker_id, true); + ReferenceProcessor* rp; + if (_cm->process_references()) { + rp = ShenandoahHeap::heap()->ref_processor(); + } else { + rp = NULL; + } SCMObjToScanQueue* q = _cm->get_queue(worker_id); - QHolder qh(q); + // Templates need constexprs, so we have to switch by the flags ourselves. if (_update_refs) { - ShenandoahMarkObjsClosure cl(&qh); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); + if (_count_live) { + ShenandoahMarkObjsClosure cl(q, rp); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } } else { - ShenandoahMarkObjsClosure cl(&qh); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); + if (_count_live) { + ShenandoahMarkObjsClosure cl(q, rp); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } } + + assert(_cm->task_queues()->is_empty(), "Should be empty"); } }; -void ShenandoahConcurrentMark::prepare_unmarked_root_objs() { - - ShenandoahHeap* heap = ShenandoahHeap::heap(); - bool update_refs = heap->need_update_refs(); - - if (update_refs) { - COMPILER2_PRESENT(DerivedPointerTable::clear()); - } - - prepare_unmarked_root_objs_no_derived_ptrs(update_refs); - - if (update_refs) { - COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); - } - -} - -void ShenandoahConcurrentMark::prepare_unmarked_root_objs_no_derived_ptrs(bool update_refs) { +void ShenandoahConcurrentMark::mark_roots() { assert(Thread::current()->is_VM_thread(), "can only do this in VMThread"); + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); ShenandoahHeap* heap = ShenandoahHeap::heap(); ClassLoaderDataGraph::clear_claimed_marks(); - heap->set_par_threads(_max_conc_worker_id); - heap->conc_workers()->set_active_workers(_max_conc_worker_id); - ShenandoahRootProcessor root_proc(heap, _max_conc_worker_id); + uint nworkers = heap->max_parallel_workers(); + assert(nworkers <= task_queues()->size(), "Just check"); + + ShenandoahRootProcessor root_proc(heap, nworkers, ShenandoahCollectorPolicy::scan_thread_roots); TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); - ShenandoahMarkRootsTask mark_roots(&root_proc, update_refs); - heap->conc_workers()->run_task(&mark_roots); - heap->set_par_threads(0); + task_queues()->reserve(nworkers); + assert(heap->workers()->active_workers() == nworkers, "Not expecting other tasks"); - if (! ShenandoahProcessReferences) { - ShenandoahMarkUpdateRootsClosure cl(get_queue(0)); - heap->weak_roots_iterate(&cl); + ShenandoahInitMarkRootsTask mark_roots(&root_proc, process_references()); + heap->workers()->run_task(&mark_roots, nworkers); + if (ShenandoahConcurrentCodeRoots) { + clear_claim_codecache(); } - // tty->print_cr("all root marker threads done"); } +void ShenandoahConcurrentMark::init_mark_roots() { + assert(Thread::current()->is_VM_thread(), "can only do this in VMThread"); + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); -void ShenandoahConcurrentMark::initialize() { - _max_conc_worker_id = MAX2((uint) ConcGCThreads, 1U); - _task_queues = new SCMObjToScanQueueSet((int) _max_conc_worker_id); + ShenandoahHeap* heap = ShenandoahHeap::heap(); - for (uint i = 0; i < _max_conc_worker_id; ++i) { + // Set up ref processing and class unloading. + ShenandoahCollectorPolicy* policy = heap->shenandoahPolicy(); + set_process_references(policy->process_references()); + set_unload_classes(policy->unload_classes()); + + mark_roots(); +} + +void ShenandoahConcurrentMark::update_roots() { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); + ShenandoahHeap* heap = ShenandoahHeap::heap(); + + ClassLoaderDataGraph::clear_claimed_marks(); + uint nworkers = heap->max_parallel_workers(); + assert(heap->workers()->active_workers() == nworkers, "Not expecting other tasks"); + ShenandoahRootProcessor root_proc(heap, nworkers, ShenandoahCollectorPolicy::update_thread_roots); + ShenandoahUpdateRootsTask update_roots(&root_proc); + heap->workers()->run_task(&update_roots); + +} + +void ShenandoahConcurrentMark::final_update_roots() { + assert(Thread::current()->is_VM_thread(), "can only do this in VMThread"); + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); + + COMPILER2_PRESENT(DerivedPointerTable::clear()); + + update_roots(); + + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); +} + + +void ShenandoahConcurrentMark::initialize(uint workers) { + uint num_queues = MAX2(workers, 1U); + + _task_queues = new SCMObjToScanQueueSet((int) num_queues); + + for (uint i = 0; i < num_queues; ++i) { SCMObjToScanQueue* task_queue = new SCMObjToScanQueue(); task_queue->initialize(); _task_queues->register_queue(i, task_queue); } - JavaThread::satb_mark_queue_set().set_buffer_size(1014 /* G1SATBBufferSize */); + _process_references = false; + _unload_classes = false; + _claimed_codecache = 0; + + JavaThread::satb_mark_queue_set().set_buffer_size(ShenandoahSATBBufferSize); } void ShenandoahConcurrentMark::mark_from_roots() { - if (ShenandoahGCVerbose) { - tty->print_cr("STOPPING THE WORLD: before marking"); - tty->print_cr("Starting markFromRoots"); - } - ShenandoahHeap* sh = (ShenandoahHeap *) Universe::heap(); bool update_refs = sh->need_update_refs(); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::conc_mark); - ParallelTaskTerminator terminator(_max_conc_worker_id, _task_queues); - if (ShenandoahProcessReferences) { + // Concurrent marking, uses concurrent workers + uint nworkers = sh->max_conc_workers(); + if (process_references()) { ReferenceProcessor* rp = sh->ref_processor(); + rp->set_active_mt_degree(nworkers); + // enable ("weak") refs discovery rp->enable_discovery(true /*verify_no_refs*/, true); rp->setup_policy(sh->is_full_gc_in_progress()); // snapshot the soft ref policy to be used in this cycle } - SCMConcurrentMarkingTask markingTask = SCMConcurrentMarkingTask(this, &terminator, update_refs); - sh->set_par_threads(_max_conc_worker_id); - sh->conc_workers()->set_active_workers(_max_conc_worker_id); - sh->conc_workers()->run_task(&markingTask); - sh->set_par_threads(0); + task_queues()->reserve(nworkers); + assert(sh->conc_workers()->active_workers() == nworkers, "Not expecting other tasks"); - if (ShenandoahGCVerbose) { - tty->print("total workers = %u active workers = %u\n", - sh->conc_workers()->total_workers(), - sh->conc_workers()->active_workers()); - if (! sh->cancelled_concgc()) { - TASKQUEUE_STATS_ONLY(print_taskqueue_stats()); - } - TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); + if (UseShenandoahOWST) { + ShenandoahTaskTerminator terminator(nworkers, task_queues()); + SCMConcurrentMarkingTask markingTask = SCMConcurrentMarkingTask(this, &terminator, update_refs); + sh->conc_workers()->run_task(&markingTask, nworkers); + } else { + ParallelTaskTerminator terminator(nworkers, task_queues()); + SCMConcurrentMarkingTask markingTask = SCMConcurrentMarkingTask(this, &terminator, update_refs); + sh->conc_workers()->run_task(&markingTask, nworkers); } - if (ShenandoahGCVerbose) { - tty->print_cr("Finishing markFromRoots"); - tty->print_cr("RESUMING THE WORLD: after marking"); + assert(task_queues()->is_empty(), "Should be empty"); + if (! sh->cancelled_concgc()) { + TASKQUEUE_STATS_ONLY(print_taskqueue_stats()); } + TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); + sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::conc_mark); } -class FinishDrainSATBBuffersTask : public AbstractGangTask { -private: - ShenandoahConcurrentMark* _cm; - ParallelTaskTerminator* _terminator; -public: - FinishDrainSATBBuffersTask(ShenandoahConcurrentMark* cm, ParallelTaskTerminator* terminator) : - AbstractGangTask("Finish draining SATB buffers"), _cm(cm), _terminator(terminator) { - } - - void work(uint worker_id) { - _cm->drain_satb_buffers(worker_id, true); - } -}; - void ShenandoahConcurrentMark::finish_mark_from_roots() { - if (ShenandoahGCVerbose) { - tty->print_cr("Starting finishMarkFromRoots"); - } + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); IsGCActiveMark is_active; ShenandoahHeap* sh = (ShenandoahHeap *) Universe::heap(); - // Trace any (new) unmarked root references. - sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::rescan_roots); - prepare_unmarked_root_objs(); - sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::rescan_roots); - sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::drain_satb); - { - SharedHeap::StrongRootsScope scope(sh, true); - ParallelTaskTerminator terminator(_max_conc_worker_id, _task_queues); - // drain_satb_buffers(0, true); - FinishDrainSATBBuffersTask drain_satb_buffers(this, &terminator); - sh->set_par_threads(_max_conc_worker_id); - sh->conc_workers()->set_active_workers(_max_conc_worker_id); - sh->conc_workers()->run_task(&drain_satb_buffers); - sh->set_par_threads(0); - sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::drain_satb); + TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); + + shared_finish_mark_from_roots(/* full_gc = */ false); + + sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::update_roots); + if (sh->need_update_refs()) { + final_update_roots(); } + sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::update_roots); - shared_finish_mark_from_roots(); - - if (ShenandoahGCVerbose) { - tty->print_cr("Finishing finishMarkFromRoots"); -#ifdef SLOWDEBUG - for (int i = 0; i <(int)_max_conc_worker_id; i++) { - tty->print("Queue: "INT32_FORMAT":", i); - _task_queues->queue(i)->stats.print(tty, 10); - tty->cr(); - _task_queues->queue(i)->stats.verify(); - } -#endif - } + TASKQUEUE_STATS_ONLY(print_taskqueue_stats()); #ifdef ASSERT verify_roots(); @@ -397,67 +424,98 @@ #endif } -void ShenandoahConcurrentMark::shared_finish_mark_from_roots() { +void ShenandoahConcurrentMark::shared_finish_mark_from_roots(bool full_gc) { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); ShenandoahHeap* sh = ShenandoahHeap::heap(); + ShenandoahCollectorPolicy* policy = sh->shenandoahPolicy(); + uint nworkers = sh->max_parallel_workers(); // Finally mark everything else we've got in our queues during the previous steps. + // It does two different things for concurrent vs. mark-compact GC: + // - For concurrent GC, it starts with empty task queues, drains the remaining + // SATB buffers, and then completes the marking closure. + // - For mark-compact GC, it starts out with the task queues seeded by initial + // root scan, and completes the closure, thus marking through all live objects + // The implementation is the same, so it's shared here. { - sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::drain_queues); - ParallelTaskTerminator terminator(_max_conc_worker_id, _task_queues); - SCMFinalMarkingTask markingTask = SCMFinalMarkingTask(this, &terminator, sh->need_update_refs()); - sh->set_par_threads(_max_conc_worker_id); - sh->conc_workers()->run_task(&markingTask); - sh->set_par_threads(0); - sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::drain_queues); + policy->record_phase_start(full_gc ? + ShenandoahCollectorPolicy::full_gc_mark_drain_queues : + ShenandoahCollectorPolicy::drain_satb); + bool count_live = !(ShenandoahNoLivenessFullGC && full_gc); // we do not need liveness data for full GC + task_queues()->reserve(nworkers); + + SharedHeap::StrongRootsScope scope(sh, true); + if (UseShenandoahOWST) { + ShenandoahTaskTerminator terminator(nworkers, task_queues()); + SCMFinalMarkingTask markingTask = SCMFinalMarkingTask(this, &terminator, sh->need_update_refs(), count_live); + sh->workers()->run_task(&markingTask); + } else { + ParallelTaskTerminator terminator(nworkers, task_queues()); + SCMFinalMarkingTask markingTask = SCMFinalMarkingTask(this, &terminator, sh->need_update_refs(), count_live); + sh->workers()->run_task(&markingTask); + } + policy->record_phase_end(full_gc ? + ShenandoahCollectorPolicy::full_gc_mark_drain_queues : + ShenandoahCollectorPolicy::drain_satb); } -#ifdef ASSERT - for (int i = 0; i < (int) _max_conc_worker_id; i++) { - assert(_task_queues->queue(i)->is_empty(), "Should be empty"); - } -#endif + assert(task_queues()->is_empty(), "Should be empty"); // When we're done marking everything, we process weak references. - if (ShenandoahProcessReferences) { - sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::weakrefs); + policy->record_phase_start(full_gc ? + ShenandoahCollectorPolicy::full_gc_mark_weakrefs : + ShenandoahCollectorPolicy::weakrefs); + if (process_references()) { weak_refs_work(); - sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::weakrefs); } + policy->record_phase_end(full_gc ? + ShenandoahCollectorPolicy::full_gc_mark_weakrefs : + ShenandoahCollectorPolicy::weakrefs); // And finally finish class unloading - if (ClassUnloadingWithConcurrentMark) { - sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::class_unloading); - ShenandoahIsAliveClosure is_alive; + policy->record_phase_start(full_gc ? + ShenandoahCollectorPolicy::full_gc_mark_class_unloading : + ShenandoahCollectorPolicy::class_unloading); + if (unload_classes()) { + ShenandoahForwardedIsAliveClosure is_alive; // Unload classes and purge SystemDictionary. - bool purged_class = SystemDictionary::do_unloading(&is_alive, true); - // Unload nmethods. - CodeCache::do_unloading(&is_alive, purged_class); - // Prune dead klasses from subklass/sibling/implementor lists. - Klass::clean_weak_klass_links(&is_alive); - // Delete entries from dead interned strings. - // Clean up unreferenced symbols in symbol table. - sh->unlink_string_and_symbol_table(&is_alive); + bool purged_class = SystemDictionary::do_unloading(&is_alive, false); + ParallelCleaningTask unlink_task(&is_alive, true, true, nworkers, purged_class); + sh->workers()->run_task(&unlink_task, nworkers); + ClassLoaderDataGraph::purge(); + } + policy->record_phase_end(full_gc ? + ShenandoahCollectorPolicy::full_gc_mark_class_unloading : + ShenandoahCollectorPolicy::class_unloading); - ClassLoaderDataGraph::purge(); - sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::class_unloading); - } - -#ifdef ASSERT - for (int i = 0; i < (int) _max_conc_worker_id; i++) { - assert(_task_queues->queue(i)->is_empty(), "Should be empty"); - } -#endif - + assert(task_queues()->is_empty(), "Should be empty"); } #ifdef ASSERT +template +void ShenandoahVerifyRootsClosure1::do_oop_work(T* p) { + ShenandoahHeap* heap = ShenandoahHeap::heap(); + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + if (! oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj))) { + tty->print_cr("from-space marked: %s, to-space marked: %s, unload_classes: %s", + BOOL_TO_STR(heap->is_marked_next(obj)), + BOOL_TO_STR(heap->is_marked_next(ShenandoahBarrierSet::resolve_oop_static_not_null(obj))), + BOOL_TO_STR(heap->concurrentMark()->unload_classes())); + } + guarantee(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), "oop must not be forwarded"); + guarantee(heap->is_marked_next(obj), "oop must be marked"); + } +} + void ShenandoahVerifyRootsClosure1::do_oop(oop* p) { - oop obj = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(obj)) { - guarantee(ShenandoahHeap::heap()->is_marked_current(obj), "oop must be marked"); - guarantee(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), "oop must not be forwarded"); - } + do_oop_work(p); +} + +void ShenandoahVerifyRootsClosure1::do_oop(narrowOop* p) { + do_oop_work(p); } void ShenandoahConcurrentMark::verify_roots() { @@ -466,10 +524,8 @@ CLDToOopClosure cldCl(&cl); ClassLoaderDataGraph::clear_claimed_marks(); ShenandoahRootProcessor rp(ShenandoahHeap::heap(), 1); - rp.process_roots(&cl, &cl, &cldCl, &cldCl, &cldCl, &blobsCl, &blobsCl); + rp.process_all_roots(&cl, &cl, &cldCl, &blobsCl, 0); - ShenandoahAlwaysTrueClosure always_true; - JNIHandles::weak_oops_do(&always_true, &cl); } #endif @@ -497,10 +553,6 @@ }; void ShenandoahConcurrentMark::drain_satb_buffers(uint worker_id, bool remark) { - - // tty->print_cr("start draining SATB buffers"); - - ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); SCMObjToScanQueue* q = get_queue(worker_id); ShenandoahSATBBufferClosure cl(q); @@ -511,9 +563,6 @@ ShenandoahSATBThreadsClosure tc(&cl); Threads::threads_do(&tc); } - - // tty->print_cr("end draining SATB buffers"); - } #if TASKQUEUE_STATS @@ -523,41 +572,33 @@ st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr(); } -void ShenandoahConcurrentMark::print_taskqueue_stats(outputStream* const st) const { +void ShenandoahConcurrentMark::print_taskqueue_stats() const { + if (! ShenandoahLogTrace) { + return; + } + ResourceMark rm; + outputStream* st = gclog_or_tty; print_taskqueue_stats_hdr(st); + ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); TaskQueueStats totals; - const int n = sh->max_conc_workers(); + const int n = _task_queues->size(); for (int i = 0; i < n; ++i) { st->print(INT32_FORMAT_W(3), i); _task_queues->queue(i)->stats.print(st); - st->print("\n"); + st->cr(); totals += _task_queues->queue(i)->stats; } - st->print_raw("tot "); totals.print(st); st->cr(); + st->print("tot "); totals.print(st); st->cr(); DEBUG_ONLY(totals.verify()); } -void ShenandoahConcurrentMark::print_push_only_taskqueue_stats(outputStream* const st) const { - print_taskqueue_stats_hdr(st); - ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); - TaskQueueStats totals; - const int n = sh->max_conc_workers(); - for (int i = 0; i < n; ++i) { - st->print(INT32_FORMAT_W(3), i); - _task_queues->queue(i)->stats.print(st); - st->print("\n"); - totals += _task_queues->queue(i)->stats; - } - st->print_raw("tot "); totals.print(st); st->cr(); -} - void ShenandoahConcurrentMark::reset_taskqueue_stats() { ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); - const int n = sh->max_conc_workers(); + const int n = task_queues()->size(); for (int i = 0; i < n; ++i) { - _task_queues->queue(i)->stats.reset(); + task_queues()->queue(i)->stats.reset(); } } #endif // TASKQUEUE_STATS @@ -575,16 +616,22 @@ void do_void() { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); ShenandoahHeap* sh = ShenandoahHeap::heap(); ShenandoahConcurrentMark* scm = sh->concurrentMark(); + ReferenceProcessor* rp; + if (scm->process_references()) { + rp = ShenandoahHeap::heap()->ref_processor(); + } else { + rp = NULL; + } SCMObjToScanQueue* q = scm->get_queue(_worker_id); - QHolder qh(q); if (sh->need_update_refs()) { - ShenandoahMarkObjsClosure cl(&qh); + ShenandoahMarkObjsClosure cl(q, rp); scm->final_mark_loop(&cl, _worker_id, q, _terminator); } else { - ShenandoahMarkObjsClosure cl(&qh); + ShenandoahMarkObjsClosure cl(q, rp); scm->final_mark_loop(&cl, _worker_id, q, _terminator); } } @@ -601,25 +648,37 @@ _sh = (ShenandoahHeap*) Universe::heap(); } - void do_oop(narrowOop* p) { - assert(false, "narrowOops Aren't implemented"); +private: + template + inline void do_oop_work(T* p) { + + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + assert(oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)), "only get updated oops in weak ref processing"); + +#ifdef ASSERT + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print("\twe're looking at location " + "*"PTR_FORMAT" = "PTR_FORMAT, + p2i(p), p2i((void*) obj)); + obj->print_on(out); + } +#endif + ShenandoahConcurrentMark::mark_and_push(obj, _sh, _queue); + } } +public: + void do_oop(narrowOop* p) { + do_oop_work(p); + } + void do_oop(oop* p) { - - oop obj = oopDesc::load_heap_oop(p); - assert(oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)), "only get updated oops in weak ref processing"); - - if (! oopDesc::is_null(obj)) { - if (Verbose && ShenandoahTraceWeakReferences) { - gclog_or_tty->print_cr("\twe're looking at location " - "*"PTR_FORMAT" = "PTR_FORMAT, - p2i(p), p2i((void*) obj)); - obj->print(); - } - ShenandoahConcurrentMark::mark_and_push(obj, _sh, _queue); - } + do_oop_work(p); } }; @@ -634,26 +693,35 @@ _sh = (ShenandoahHeap*) Universe::heap(); } - void do_oop(narrowOop* p) { - assert(false, "narrowOops Aren't implemented"); +private: + template + inline void do_oop_work(T* p) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + obj = _sh->update_oop_ref_not_null(p, obj); + assert(oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)), "only get updated oops in weak ref processing"); +#ifdef ASSERT + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print("\twe're looking at location " + "*"PTR_FORMAT" = "PTR_FORMAT, + p2i(p), p2i((void*) obj)); + obj->print_on(out); + } +#endif + ShenandoahConcurrentMark::mark_and_push(obj, _sh, _queue); + } } +public: + void do_oop(narrowOop* p) { + do_oop_work(p); + } void do_oop(oop* p) { - - oop obj = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(obj)) { - obj = _sh->update_oop_ref_not_null(p, obj); - assert(oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)), "only get updated oops in weak ref processing"); - if (Verbose && ShenandoahTraceWeakReferences) { - gclog_or_tty->print_cr("\twe're looking at location " - "*"PTR_FORMAT" = "PTR_FORMAT, - p2i(p), p2i((void*) obj)); - obj->print(); - } - ShenandoahConcurrentMark::mark_and_push(obj, _sh, _queue); - } - + do_oop_work(p); } }; @@ -673,6 +741,7 @@ } void work(uint worker_id) { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); ShenandoahHeap* heap = ShenandoahHeap::heap(); ShenandoahForwardedIsAliveClosure is_alive; ShenandoahCMDrainMarkingStackClosure complete_gc(worker_id, _terminator); @@ -710,93 +779,86 @@ public: - ShenandoahRefProcTaskExecutor() : _workers(ShenandoahHeap::heap()->conc_workers()) { + ShenandoahRefProcTaskExecutor() : _workers(ShenandoahHeap::heap()->workers()) { } // Executes a task using worker threads. void execute(ProcessTask& task) { - ShenandoahHeap* heap = ShenandoahHeap::heap(); - ShenandoahConcurrentMark* cm = heap->concurrentMark(); - ParallelTaskTerminator terminator(cm->max_conc_worker_id(), cm->task_queues()); - ShenandoahRefProcTaskProxy proc_task_proxy(task, &terminator); - heap->set_par_threads(cm->max_conc_worker_id()); - _workers->run_task(&proc_task_proxy); - heap->set_par_threads(0); + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); + + ShenandoahConcurrentMark* cm = ShenandoahHeap::heap()->concurrentMark(); + uint nworkers = _workers->active_workers(); + cm->task_queues()->reserve(nworkers); + if (UseShenandoahOWST) { + ShenandoahTaskTerminator terminator(nworkers, cm->task_queues()); + ShenandoahRefProcTaskProxy proc_task_proxy(task, &terminator); + _workers->run_task(&proc_task_proxy); + } else { + ParallelTaskTerminator terminator(nworkers, cm->task_queues()); + ShenandoahRefProcTaskProxy proc_task_proxy(task, &terminator); + _workers->run_task(&proc_task_proxy); + } } void execute(EnqueueTask& task) { - ShenandoahHeap* heap = ShenandoahHeap::heap(); - ShenandoahConcurrentMark* cm = heap->concurrentMark(); ShenandoahRefEnqueueTaskProxy enqueue_task_proxy(task); - heap->set_par_threads(cm->max_conc_worker_id()); _workers->run_task(&enqueue_task_proxy); - heap->set_par_threads(0); } }; void ShenandoahConcurrentMark::weak_refs_work() { - ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); - ReferenceProcessor* rp = sh->ref_processor(); + assert(process_references(), "sanity"); + ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); + ReferenceProcessor* rp = sh->ref_processor(); - // Setup collector policy for softref cleaning. - bool clear_soft_refs = sh->collector_policy()->use_should_clear_all_soft_refs(true /* bogus arg*/); - if (ShenandoahTraceWeakReferences) { - tty->print_cr("clearing soft refs: %s", BOOL_TO_STR(clear_soft_refs)); - } - rp->setup_policy(clear_soft_refs); + // Setup collector policy for softref cleaning. + bool clear_soft_refs = sh->collector_policy()->use_should_clear_all_soft_refs(true /* bogus arg*/); + log_develop_debug(gc, ref)("clearing soft refs: %s", BOOL_TO_STR(clear_soft_refs)); + rp->setup_policy(clear_soft_refs); + rp->set_active_mt_degree(sh->max_parallel_workers()); - uint serial_worker_id = 0; - ShenandoahForwardedIsAliveClosure is_alive; - ParallelTaskTerminator terminator(1, task_queues()); - ShenandoahCMDrainMarkingStackClosure complete_gc(serial_worker_id, &terminator); - ShenandoahRefProcTaskExecutor executor; + uint serial_worker_id = 0; + ShenandoahForwardedIsAliveClosure is_alive; - if (ShenandoahTraceWeakReferences) { - gclog_or_tty->print_cr("start processing references"); - } + assert(task_queues()->is_empty(), "Should be empty"); - if (sh->need_update_refs()) { - ShenandoahCMKeepAliveUpdateClosure keep_alive(get_queue(serial_worker_id)); - rp->process_discovered_references(&is_alive, &keep_alive, - &complete_gc, &executor, - NULL, - ShenandoahHeap::heap()->tracer()->gc_id()); - } else { - ShenandoahCMKeepAliveClosure keep_alive(get_queue(serial_worker_id)); - rp->process_discovered_references(&is_alive, &keep_alive, - &complete_gc, &executor, - NULL, - ShenandoahHeap::heap()->tracer()->gc_id()); - } + ParallelTaskTerminator terminator(1, task_queues()); + ShenandoahCMDrainMarkingStackClosure complete_gc(serial_worker_id, &terminator); + ShenandoahRefProcTaskExecutor executor; -#ifdef ASSERT - for (int i = 0; i < (int) _max_conc_worker_id; i++) { - assert(_task_queues->queue(i)->is_empty(), "Should be empty"); - } -#endif + log_develop_trace(gc, ref)("start processing references"); - if (ShenandoahTraceWeakReferences) { - gclog_or_tty->print_cr("finished processing references"); - gclog_or_tty->print_cr("start enqueuing references"); - } + if (sh->need_update_refs()) { + ShenandoahCMKeepAliveUpdateClosure keep_alive(get_queue(serial_worker_id)); + rp->process_discovered_references(&is_alive, &keep_alive, + &complete_gc, &executor, + NULL, sh->shenandoahPolicy()->tracer()->gc_id()); + } else { + ShenandoahCMKeepAliveClosure keep_alive(get_queue(serial_worker_id)); + rp->process_discovered_references(&is_alive, &keep_alive, + &complete_gc, &executor, + NULL, sh->shenandoahPolicy()->tracer()->gc_id()); + } - rp->enqueue_discovered_references(&executor); + assert(task_queues()->is_empty(), "Should be empty"); - if (ShenandoahTraceWeakReferences) { - gclog_or_tty->print_cr("finished enqueueing references"); - } + log_develop_trace(gc, ref)("finished processing references"); + log_develop_trace(gc, ref)("start enqueuing references"); - rp->verify_no_references_recorded(); - assert(!rp->discovery_enabled(), "Post condition"); + rp->enqueue_discovered_references(&executor); + log_develop_trace(gc, ref)("finished enqueueing references"); + + rp->verify_no_references_recorded(); + assert(!rp->discovery_enabled(), "Post condition"); } void ShenandoahConcurrentMark::cancel() { ShenandoahHeap* sh = ShenandoahHeap::heap(); // Cancel weak-ref discovery. - if (ShenandoahProcessReferences) { + if (process_references()) { ReferenceProcessor* rp = sh->ref_processor(); rp->abandon_partial_discovery(); rp->disable_discovery(); @@ -804,45 +866,91 @@ // Clean up marking stacks. SCMObjToScanQueueSet* queues = task_queues(); - for (uint i = 0; i < _max_conc_worker_id; ++i) { - SCMObjToScanQueue* task_queue = queues->queue(i); - task_queue->set_empty(); - task_queue->overflow_stack()->clear(); - } + queues->clear(); // Cancel SATB buffers. JavaThread::satb_mark_queue_set().abandon_partial_marking(); } + SCMObjToScanQueue* ShenandoahConcurrentMark::get_queue(uint worker_id) { - worker_id = worker_id % _max_conc_worker_id; + assert(task_queues()->get_reserved() > worker_id, err_msg("No reserved queue for worker id: %d", worker_id)); return _task_queues->queue(worker_id); } -template -void ShenandoahConcurrentMark::concurrent_mark_loop(ShenandoahMarkObjsClosure* cl, +void ShenandoahConcurrentMark::clear_queue(SCMObjToScanQueue *q) { + q->set_empty(); + q->overflow_stack()->clear(); + q->clear_buffer(); +} + +template +void ShenandoahConcurrentMark::concurrent_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* terminator) { ShenandoahHeap* heap = ShenandoahHeap::heap(); int seed = 17; + uint stride = ShenandoahMarkLoopStride; + SCMObjToScanQueueSet* queues = task_queues(); + bool done_queues = false; + while (true) { - if (heap->cancelled_concgc()) q->set_empty(); - if (heap->cancelled_concgc() || - (!try_queue(q, cl) && - !try_draining_an_satb_buffer(q) && - !try_to_steal(worker_id, cl, &seed)) - ) { - if (terminator->offer_termination()) break; + if (heap->cancelled_concgc()) { + clear_queue(q); + + // Clear other queues for termination + while ((q = queues->claim_next()) != NULL) { + clear_queue(q); + } + + while (! terminator->offer_termination()); + return; + } + + if (!done_queues) { + done_queues = true; + if (!concurrent_process_queues(heap, q, cl)) { + // concurrent GC cancelled + continue; + } + } + + for (uint i = 0; i < stride; i++) { + if (!try_queue(q, cl) && + !try_draining_an_satb_buffer(q) && + !try_to_steal(worker_id, cl, &seed)) { + if (terminator->offer_termination()) return; + } } } } -template -void ShenandoahConcurrentMark::final_mark_loop(ShenandoahMarkObjsClosure* cl, +template +bool ShenandoahConcurrentMark::concurrent_process_queues(ShenandoahHeap* heap, + SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl) { + SCMObjToScanQueueSet* queues = task_queues(); + uint stride = ShenandoahMarkLoopStride; + while (true) { + if (heap->cancelled_concgc()) return false; + + for (uint i = 0; i < stride; i++) { + if (!try_queue(q, cl)) { + assert(q->is_empty(), "Must be empty"); + q = queues->claim_next(); + if (q == NULL) { + return true; + } + } + } + } +} + + +template +void ShenandoahConcurrentMark::final_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* terminator) { - ShenandoahHeap* heap = ShenandoahHeap::heap(); int seed = 17; while (true) { if (!try_queue(q, cl) && @@ -851,3 +959,30 @@ } } } + +void ShenandoahConcurrentMark::set_process_references(bool pr) { + _process_references = pr; +} + +bool ShenandoahConcurrentMark::process_references() const { + return _process_references; +} + +void ShenandoahConcurrentMark::set_unload_classes(bool uc) { + _unload_classes = uc; +} + +bool ShenandoahConcurrentMark::unload_classes() const { + return _unload_classes; +} + +bool ShenandoahConcurrentMark::claim_codecache() { + assert(ShenandoahConcurrentCodeRoots, "must not be called otherwise"); + jbyte old = Atomic::cmpxchg(1, &_claimed_codecache, 0); + return old == 0; +} + +void ShenandoahConcurrentMark::clear_claim_codecache() { + assert(ShenandoahConcurrentCodeRoots, "must not be called otherwise"); + _claimed_codecache = 0; +} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -26,47 +26,38 @@ #include "utilities/taskqueue.hpp" #include "utilities/workgroup.hpp" +#include "gc_implementation/shenandoah/shenandoahTaskqueue.hpp" -typedef OverflowTaskQueue ShenandoahOverflowTaskQueue; -typedef Padded SCMObjToScanQueue; -typedef GenericTaskQueueSet SCMObjToScanQueueSet; +typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; +typedef Padded SCMObjToScanQueue; class ShenandoahConcurrentMark; -class QHolder { -private: - SCMObjToScanQueue* _queue; -public: - QHolder(SCMObjToScanQueue* q) : _queue(q) { - } - inline SCMObjToScanQueue* queue() { - return _queue; - } -}; - #ifdef ASSERT class ShenandoahVerifyRootsClosure1 : public OopClosure { +private: + template + inline void do_oop_work(T* p); + +public: void do_oop(oop* p); - - void do_oop(narrowOop* p) { - Unimplemented(); - } + void do_oop(narrowOop* p); }; #endif -template +template class ShenandoahMarkObjsClosure { ShenandoahHeap* _heap; T _mark_refs; - QHolder* _queue; + SCMObjToScanQueue* _queue; uint _last_region_idx; size_t _live_data; public: - ShenandoahMarkObjsClosure(QHolder* q); + ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp); ~ShenandoahMarkObjsClosure(); - inline void do_object(oop obj, int index); - inline void do_objarray(objArrayOop array, int index); + inline void do_object_or_array(oop obj, int from, int to); + inline void do_array(objArrayOop array, int from, int to); inline void count_liveness(oop obj); }; @@ -76,11 +67,23 @@ // The per-worker-thread work queues SCMObjToScanQueueSet* _task_queues; - uint _max_conc_worker_id; + bool _process_references; + bool _unload_classes; + + jbyte _claimed_codecache; public: // We need to do this later when the heap is already created. - void initialize(); + void initialize(uint workers); + + void set_process_references(bool pr); + bool process_references() const; + + void set_unload_classes(bool uc); + bool unload_classes() const; + + bool claim_codecache(); + void clear_claim_codecache(); static inline void mark_and_push(oop obj, ShenandoahHeap* heap, SCMObjToScanQueue* q); @@ -88,28 +91,33 @@ // Prepares unmarked root objects by marking them and putting // them into the marking task queue. - void prepare_unmarked_root_objs(); - void prepare_unmarked_root_objs_no_derived_ptrs(bool update_refs); + void init_mark_roots(); + void mark_roots(); + void update_roots(); + void final_update_roots(); - void shared_finish_mark_from_roots(); + void shared_finish_mark_from_roots(bool full_gc); void finish_mark_from_roots(); // Those are only needed public because they're called from closures. - template - void concurrent_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* t); + template + void concurrent_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* t); - template - void final_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* t); + template + void final_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* t); + + template + inline bool try_queue(SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl); + + template + inline bool try_to_steal(uint worker_id, ShenandoahMarkObjsClosure* cl, int *seed); SCMObjToScanQueue* get_queue(uint worker_id); - template - inline bool try_queue(SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl); - template - inline bool try_to_steal(uint worker_id, ShenandoahMarkObjsClosure* cl, int *seed); + void clear_queue(SCMObjToScanQueue *q); + inline bool try_draining_an_satb_buffer(SCMObjToScanQueue* q); void drain_satb_buffers(uint worker_id, bool remark = false); SCMObjToScanQueueSet* task_queues() { return _task_queues;} - uint max_conc_worker_id() { return _max_conc_worker_id; } void cancel(); @@ -121,10 +129,16 @@ void weak_refs_work(); + /** + * Process assigned queue and others if there are any to be claimed. + * Return false if the process is terminated by concurrent gc cancellation. + */ + template + bool concurrent_process_queues(ShenandoahHeap* heap, SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl); + #if TASKQUEUE_STATS - static void print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty); - void print_taskqueue_stats(outputStream* const st = gclog_or_tty) const; - void print_push_only_taskqueue_stats(outputStream* const st = gclog_or_tty) const; + static void print_taskqueue_stats_hdr(outputStream* const st = tty); + void print_taskqueue_stats() const; void reset_taskqueue_stats(); #endif // TASKQUEUE_STATS diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -27,20 +27,20 @@ #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.hpp" +#include "gc_implementation/shenandoah/shenandoahTaskqueue.inline.hpp" #include "memory/iterator.inline.hpp" #include "oops/oop.inline.hpp" #include "runtime/prefetch.inline.hpp" -template -void ShenandoahMarkObjsClosure::do_object(oop obj, int index) { - +template +void ShenandoahMarkObjsClosure::do_object_or_array(oop obj, int from, int to) { assert(obj != NULL, "expect non-null object"); - assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static(obj)), "need to-space object here"); + assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), "expect forwarded obj in queue"); #ifdef ASSERT if (! oopDesc::bs()->is_safe(obj)) { - tty->print_cr("trying to mark obj: "PTR_FORMAT" (%s) in dirty region: ", p2i((HeapWord*) obj), BOOL_TO_STR(_heap->is_marked_current(obj))); + tty->print_cr("trying to mark obj: "PTR_FORMAT" (%s) in dirty region: ", p2i((HeapWord*) obj), BOOL_TO_STR(_heap->is_marked_next(obj))); // _heap->heap_region_containing(obj)->print(); // _heap->print_heap_regions(); } @@ -49,79 +49,82 @@ || oopDesc::bs()->is_safe(obj), "we don't want to mark objects in from-space"); assert(_heap->is_in(obj), "referenced objects must be in the heap. No?"); - assert(_heap->is_marked_current(obj), "only marked objects on task queue"); + assert(_heap->is_marked_next(obj), "only marked objects on task queue"); - // Calculate liveness of heap region containing object. - if (index == -1) { // Normal oop or obj-array-head + if (from == -1) { count_liveness(obj); if (obj->is_objArray()) { - // Process metadata. + // Case 1: Array instance and no task bounds set. Must be the first time + // we visit it. Process its metadata, and submit the chunked array task + // with proper bounds. _mark_refs.do_klass(obj->klass()); objArrayOop array = objArrayOop(obj); if (array->length() > 0) { - do_objarray(array, 0); + do_array(array, 0, array->length()); } } else { + // Case 2: Normal oop, process as usual. obj->oop_iterate(&_mark_refs); } - } else { // Chunked obj array processing + } else { + // Case 3: Array chunk, has sensible (from, to) bounds. Process it. assert(obj->is_objArray(), "expect object array"); objArrayOop array = objArrayOop(obj); - do_objarray(array, index); + do_array(array, from, to); } } -template -inline void ShenandoahMarkObjsClosure::count_liveness(oop obj) { +template +inline void ShenandoahMarkObjsClosure::count_liveness(oop obj) { + if (!CL) return; // no need to count liveness! uint region_idx = _heap->heap_region_index_containing(obj); if (region_idx == _last_region_idx) { - _live_data += (obj->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE) * HeapWordSize; + _live_data += (obj->size() + BrooksPointer::word_size()) * HeapWordSize; } else { ShenandoahHeapRegion* r = _heap->regions()->get(_last_region_idx); r->increase_live_data(_live_data); _last_region_idx = region_idx; - _live_data = (obj->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE) * HeapWordSize; + _live_data = (obj->size() + BrooksPointer::word_size()) * HeapWordSize; } } -template -inline void ShenandoahMarkObjsClosure::do_objarray(objArrayOop array, int index) { +template +inline void ShenandoahMarkObjsClosure::do_array(objArrayOop array, int from, int to) { + assert (from < to, "sanity"); + assert (ObjArrayMarkingStride > 0, "sanity"); - const int len = array->length(); - const int beg_index = index; - assert(beg_index < len, "index too large"); - const int stride = MIN2(len - beg_index, (int) ObjArrayMarkingStride); - const int end_index = beg_index + stride; - // tty->print_cr("strided obj array scan: %p, %d -> %d", array, beg_index, end_index); - // Now scan our stride - array->oop_iterate_range(&_mark_refs, beg_index, end_index); - // Push continuation - if (end_index < len) { - bool pushed = _queue->queue()->push(ObjArrayTask(array, end_index)); + // Fork out tasks until we hit the leaf task. Larger tasks would go to the + // "stealing" part of the queue, which will seed other workers efficiently. + while ((to - from) > (int)ObjArrayMarkingStride) { + int mid = from + (to - from) / 2; + bool pushed = _queue->push(ObjArrayFromToTask(array, mid, to)); assert(pushed, "overflow queue should always succeed pushing"); + to = mid; } + + // Execute the leaf task + array->oop_iterate_range(&_mark_refs, from, to); } -template -inline bool ShenandoahConcurrentMark::try_queue(SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl) { - ObjArrayTask task; - if (q->pop_local(task)) { +template +inline bool ShenandoahConcurrentMark::try_queue(SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl) { + ObjArrayFromToTask task; + if (q->pop_buffer(task) || + q->pop_local(task) || + q->pop_overflow(task)) { assert(task.obj() != NULL, "Can't mark null"); - cl->do_object(task.obj(), task.index()); - return true; - } else if (q->pop_overflow(task)) { - cl->do_object(task.obj(), task.index()); + cl->do_object_or_array(task.obj(), task.from(), task.to()); return true; } else { return false; } } -template -inline bool ShenandoahConcurrentMark::try_to_steal(uint worker_id, ShenandoahMarkObjsClosure* cl, int *seed) { - ObjArrayTask task; +template +inline bool ShenandoahConcurrentMark::try_to_steal(uint worker_id, ShenandoahMarkObjsClosure* cl, int *seed) { + ObjArrayFromToTask task; if (task_queues()->steal(worker_id, seed, task)) { - cl->do_object(task.obj(), task.index()); + cl->do_object_or_array(task.obj(), task.from(), task.to()); return true; } else return false; @@ -138,11 +141,9 @@ } void do_buffer(void** buffer, size_t size) { - // tty->print_cr("draining one satb buffer"); for (size_t i = 0; i < size; ++i) { void* entry = buffer[i]; oop obj = oop(entry); - // tty->print_cr("satb buffer entry: "PTR_FORMAT, p2i((HeapWord*) obj)); if (!oopDesc::is_null(obj)) { obj = ShenandoahBarrierSet::resolve_oop_static_not_null(obj); ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); @@ -158,15 +159,23 @@ } inline void ShenandoahConcurrentMark::mark_and_push(oop obj, ShenandoahHeap* heap, SCMObjToScanQueue* q) { +#ifdef ASSERT + if (! oopDesc::bs()->is_safe(obj)) { + tty->print_cr("obj in cset: %s, obj: "PTR_FORMAT", forw: "PTR_FORMAT, + BOOL_TO_STR(heap->in_collection_set(obj)), + p2i(obj), + p2i(ShenandoahBarrierSet::resolve_oop_static_not_null(obj))); + heap->heap_region_containing((HeapWord*) obj)->print(); + } +#endif assert(oopDesc::bs()->is_safe(obj), "no ref in cset"); - if (heap->mark_current(obj)) { + assert(Universe::heap()->is_in(obj), err_msg("We shouldn't be calling this on objects not in the heap: "PTR_FORMAT, p2i(obj))); + if (heap->mark_next(obj)) { #ifdef ASSERT - if (ShenandoahTraceConcurrentMarking) { - tty->print_cr("marked obj: "PTR_FORMAT, p2i((HeapWord*) obj)); - } + log_develop_trace(gc, marking)("marked obj: "PTR_FORMAT, p2i((HeapWord*) obj)); - if (heap->heap_region_containing(obj)->is_in_collection_set()) { - tty->print_cr("trying to mark obj: "PTR_FORMAT" (%s) in dirty region: ", p2i((HeapWord*) obj), BOOL_TO_STR(heap->is_marked_current(obj))); + if (! oopDesc::bs()->is_safe(obj)) { + tty->print_cr("trying to mark obj: "PTR_FORMAT" (%s) in dirty region: ", p2i((HeapWord*) obj), BOOL_TO_STR(heap->is_marked_next(obj))); // _heap->heap_region_containing(obj)->print(); // _heap->print_heap_regions(); } @@ -175,16 +184,14 @@ || oopDesc::bs()->is_safe(obj), "we don't want to mark objects in from-space"); - bool pushed = q->push(ObjArrayTask(obj, -1)); + bool pushed = q->push(ObjArrayFromToTask(obj, -1, -1)); assert(pushed, "overflow queue should always succeed pushing"); } #ifdef ASSERT else { - if (ShenandoahTraceConcurrentMarking) { - tty->print_cr("failed to mark obj (already marked): "PTR_FORMAT, p2i((HeapWord*) obj)); - } - assert(heap->is_marked_current(obj), "make sure object is marked"); + log_develop_trace(gc, marking)("failed to mark obj (already marked): "PTR_FORMAT, p2i((HeapWord*) obj)); + assert(heap->is_marked_next(obj), "make sure object is marked"); } #endif } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -21,12 +21,13 @@ * */ +#include "gc_implementation/shared/gcTimer.hpp" #include "gc_implementation/shared/gcTraceTime.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentThread.hpp" +#include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" -#include "gc_implementation/shenandoah/shenandoahJNICritical.hpp" +#include "gc_implementation/shenandoah/shenandoahMonitoringSupport.hpp" #include "gc_implementation/shenandoah/vm_operations_shenandoah.hpp" -#include "gc_implementation/shenandoah/shenandoahMonitoringSupport.hpp" #include "memory/iterator.hpp" #include "memory/universe.hpp" #include "runtime/vmThread.hpp" @@ -35,6 +36,7 @@ ShenandoahConcurrentThread::ShenandoahConcurrentThread() : ConcurrentGCThread(), + _full_gc_lock(Mutex::leaf, "ShenandoahFullGC_lock", true), _do_full_gc(false) { create_and_start(); @@ -59,10 +61,9 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); - GCTimer* gc_timer = heap->shenandoahPolicy()->conc_timer(); - GCTracer* gc_tracer = heap->tracer(); - GCId gc_id = gc_tracer->gc_id(); - while (!_should_terminate) { + GCTimer* gc_timer = heap->gc_timer(); + GCTracer* gc_tracer = heap->shenandoahPolicy()->tracer(); + while (! _should_terminate) { if (_do_full_gc) { { if (_full_gc_cause == GCCause::_allocation_failure) { @@ -73,22 +74,22 @@ TraceCollectorStats tcs(heap->monitoring_support()->full_collection_counters()); TraceMemoryManagerStats tmms(true, _full_gc_cause); - VM_ShenandoahFullGC full_gc; - heap->jni_critical()->execute_in_vm_thread(&full_gc); + VM_ShenandoahFullGC full_gc(_full_gc_cause); + VMThread::execute(&full_gc); } - MonitorLockerEx ml(ShenandoahFullGC_lock); + MonitorLockerEx ml(&_full_gc_lock); _do_full_gc = false; ml.notify_all(); } else if (heap->shenandoahPolicy()->should_start_concurrent_mark(heap->used(), heap->capacity())) { + gc_timer->register_gc_start(); + + heap->shenandoahPolicy()->increase_cycle_counter(); + TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); TraceMemoryManagerStats tmms(false, GCCause::_no_cause_specified); - if (ShenandoahGCVerbose) - tty->print("Capacity = "SIZE_FORMAT" Used = "SIZE_FORMAT" doing initMark\n", heap->capacity(), heap->used()); - - if (ShenandoahGCVerbose) tty->print("Starting a mark"); { TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); @@ -98,7 +99,7 @@ heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_mark_gross); } { - GCTraceTime time("Concurrent marking", ShenandoahTracePhases, true, NULL, gc_id); + // GCTraceTime time("Concurrent marking", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); ShenandoahHeap::heap()->concurrentMark()->mark_from_roots(); } @@ -107,12 +108,12 @@ TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); VM_ShenandoahStartEvacuation finishMark; heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::final_mark_gross); - heap->jni_critical()->execute_in_vm_thread(&finishMark); + VMThread::execute(&finishMark); heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::final_mark_gross); } if (! _should_terminate) { - GCTraceTime time("Concurrent evacuation", ShenandoahTracePhases, true, NULL, gc_id); + // GCTraceTime time("Concurrent evacuation ", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); heap->do_evacuation(); } @@ -122,7 +123,7 @@ heap->set_evacuation_in_progress(false); } heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); - heap->reset_mark_bitmap(); + heap->reset_next_mark_bitmap(heap->conc_workers()); heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::reset_bitmaps); gc_timer->register_gc_end(); @@ -131,28 +132,43 @@ // yield(); } - if (heap->cancelled_concgc()) { - // tty->print("Concurrent thread is about to clear cancelled concgc"); - heap->clear_cancelled_concgc(); - } // Make sure the _do_full_gc flag changes are seen. OrderAccess::storeload(); } terminate(); } +void ShenandoahConcurrentThread::stop() { + { + MutexLockerEx ml(Terminator_lock); + _should_terminate = true; + } + + { + MutexLockerEx ml(CGC_lock, Mutex::_no_safepoint_check_flag); + CGC_lock->notify_all(); + } + + { + MutexLockerEx ml(Terminator_lock); + while (!_has_terminated) { + Terminator_lock->wait(); + } + } +} + void ShenandoahConcurrentThread::do_full_gc(GCCause::Cause cause) { assert(Thread::current()->is_Java_thread(), "expect Java thread here"); - MonitorLockerEx ml(ShenandoahFullGC_lock); + MonitorLockerEx ml(&_full_gc_lock); schedule_full_gc(); _full_gc_cause = cause; while (_do_full_gc) { ml.wait(); OrderAccess::storeload(); } - assert(_do_full_gc == false, "expect full GC to have completed"); + assert(!_do_full_gc, "expect full GC to have completed"); } void ShenandoahConcurrentThread::schedule_full_gc() { @@ -183,22 +199,3 @@ assert(_slt == NULL, "SLT already created"); _slt = SurrogateLockerThread::make(THREAD); } - -void ShenandoahConcurrentThread::shutdown() { - { - MutexLockerEx ml(Terminator_lock); - _should_terminate = true; - } - - { - MutexLockerEx ml(CGC_lock, Mutex::_no_safepoint_check_flag); - CGC_lock->notify_all(); - } - - { - MutexLockerEx ml(Terminator_lock); - while (!_has_terminated) { - Terminator_lock->wait(); - } - } -} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -28,15 +28,23 @@ #include "gc_interface/gcCause.hpp" #include "memory/resourceArea.hpp" +// For now we just want to have a concurrent marking thread. +// Once we have that working we will build a concurrent evacuation thread. + class ShenandoahConcurrentThread: public ConcurrentGCThread { friend class VMStructs; - public: - virtual void run(); +private: + Monitor _full_gc_lock; private: static SurrogateLockerThread* _slt; +public: + void run(); + void stop(); + +private: bool _do_full_gc; GCCause::Cause _full_gc_cause; @@ -64,8 +72,6 @@ static void safepoint_synchronize(); static void safepoint_desynchronize(); - - void shutdown(); }; #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHCONCURRENTTHREAD_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -22,9 +22,9 @@ */ #include "gc_implementation/shenandoah/shenandoahFreeSet.hpp" -#include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp" -#include "runtime/atomic.inline.hpp" +#include "runtime/atomic.hpp" ShenandoahFreeSet::ShenandoahFreeSet(size_t max_regions) : ShenandoahHeapRegionSet(max_regions), @@ -39,7 +39,7 @@ void ShenandoahFreeSet::increase_used(size_t num_bytes) { assert(_used <= _capacity, "must not use more than we have"); - Atomic::add_ptr((intptr_t) num_bytes, (intptr_t*) &_used); + Atomic::add_ptr(num_bytes, (intptr_t*) &_used); } size_t ShenandoahFreeSet::used() { @@ -105,7 +105,7 @@ for (size_t i = start; i != end; i = (i + 1) % _reserved_end) { ShenandoahHeapRegion* r = get(i); // We subtract the capacity here, and add it back in par_add_region. - Atomic::add_ptr(- ((intptr_t)r->free()), (intptr_t*) &_capacity); + Atomic::add(-((jlong) r->free()), (jlong*) &_capacity); } par_add_regions(_regions, start, diff_to_end(start, end), _reserved_end); } @@ -171,7 +171,7 @@ void ShenandoahFreeSet::par_add_regions(ShenandoahHeapRegion** regions, size_t start, size_t num, size_t max) { - size_t next = (size_t) Atomic::add_ptr((intptr_t) num, (intptr_t*) &_write_index); + size_t next = Atomic::add_ptr(num, (intptr_t*) &_write_index); assert(next >= num, "don't get negative"); size_t bottom = (next - num) % _reserved_end; next = next % _reserved_end; @@ -190,18 +190,18 @@ while (true) { size_t test = (size_t) Atomic::cmpxchg((jlong) next, (jlong*) &_active_end, (jlong) bottom); if (test == bottom) { - Atomic::add_ptr((intptr_t) capacity, (intptr_t*) &_capacity); + Atomic::add_ptr(capacity, (intptr_t*) &_capacity); return; } else { // Don't starve competing threads. - os::yield(); + os::NakedYield(); } } } void ShenandoahFreeSet::add_region(ShenandoahHeapRegion* r) { - assert(!r->is_in_collection_set(), "Shouldn't be adding those to the free set"); + assert(! r->in_collection_set(), "Shouldn't be adding those to the free set"); assert(!contains(r), "We are about to add it, it shouldn't be there already"); assert(!r->is_humongous(), "Don't add to humongous regions"); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -21,44 +21,34 @@ * */ -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" +#include "memory/allocation.hpp" -#include "classfile/symbolTable.hpp" - -#include "gc_interface/collectedHeap.inline.hpp" -#include "gc_implementation/g1/concurrentMark.inline.hpp" -#include "gc_implementation/shared/gcHeapSummary.hpp" #include "gc_implementation/shared/gcTimer.hpp" -#include "gc_implementation/shared/gcTrace.hpp" #include "gc_implementation/shared/gcTraceTime.hpp" -#include "gc_implementation/shared/isGCActiveMark.hpp" +#include "gc_implementation/shared/parallelCleaning.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" +#include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" #include "gc_implementation/shenandoah/shenandoahCollectionSet.hpp" +#include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" +#include "gc_implementation/shenandoah/shenandoahConcurrentMark.hpp" +#include "gc_implementation/shenandoah/shenandoahConcurrentThread.hpp" #include "gc_implementation/shenandoah/shenandoahFreeSet.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp" #include "gc_implementation/shenandoah/shenandoahHumongous.hpp" +#include "gc_implementation/shenandoah/shenandoahMarkCompact.hpp" #include "gc_implementation/shenandoah/shenandoahMonitoringSupport.hpp" #include "gc_implementation/shenandoah/shenandoahRootProcessor.hpp" -#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" -#include "gc_implementation/shenandoah/shenandoahJNICritical.hpp" -#include "gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp" #include "gc_implementation/shenandoah/vm_operations_shenandoah.hpp" -#include "oops/oop.inline.hpp" + #include "runtime/vmThread.hpp" -#include "memory/iterator.hpp" -#include "memory/oopFactory.hpp" -#include "memory/referenceProcessor.hpp" -#include "memory/space.inline.hpp" -#include "memory/threadLocalAllocBuffer.inline.hpp" -#include "memory/universe.hpp" -#include "utilities/copy.hpp" -#include "gc_implementation/shared/vmGCOperations.hpp" -#include "runtime/atomic.inline.hpp" +#include "services/mallocTracker.hpp" -#define __ masm-> - -ShenandoahHeap* ShenandoahHeap::_pgc = NULL; +const char* ShenandoahHeap::name() const { + return "Shenandoah"; +} void ShenandoahHeap::print_heap_locations(HeapWord* start, HeapWord* end) { HeapWord* cur = NULL; @@ -67,20 +57,6 @@ } } -void ShenandoahHeap::print_heap_objects(HeapWord* start, HeapWord* end) { - HeapWord* cur = NULL; - for (cur = start; cur < end; cur = cur + oop(cur)->size()) { - oop(cur)->print(); - print_heap_locations(cur, cur + oop(cur)->size()); - } -} - -void ShenandoahHeap::print_heap_object(oop p) { - HeapWord* hw = (HeapWord*) p; - print_heap_locations(hw-1, hw+1+p->size()); -} - - class PrintHeapRegionsClosure : public ShenandoahHeapRegionClosure { private: @@ -95,25 +71,55 @@ } }; -class PrintHeapObjectsClosure : public ShenandoahHeapRegionClosure { +class ShenandoahPretouchTask : public AbstractGangTask { +private: + char* volatile _cur_addr; + char* const _start_addr; + char* const _end_addr; + size_t const _page_size; public: - bool doHeapRegion(ShenandoahHeapRegion* r) { - tty->print_cr("Region "SIZE_FORMAT" top = "PTR_FORMAT" used = "SIZE_FORMAT_HEX" free = "SIZE_FORMAT_HEX, - r->region_number(), p2i(r->top()), r->used(), r->free()); + ShenandoahPretouchTask(char* start_address, char* end_address, size_t page_size) : + AbstractGangTask("Shenandoah PreTouch"), + _cur_addr(start_address), + _start_addr(start_address), + _end_addr(end_address), + _page_size(page_size) { + } - ShenandoahHeap::heap()->print_heap_objects(r->bottom(), r->top()); - return false; + virtual void work(uint worker_id) { + size_t const actual_chunk_size = MAX2(PreTouchParallelChunkSize, _page_size); + while (true) { + char* touch_addr = (char*)Atomic::add_ptr((intptr_t)actual_chunk_size, (volatile void*) &_cur_addr) - actual_chunk_size; + if (touch_addr < _start_addr || touch_addr >= _end_addr) { + break; + } + char* end_addr = touch_addr + MIN2(actual_chunk_size, pointer_delta(_end_addr, touch_addr, sizeof(char))); + os::pretouch_memory(touch_addr, end_addr); + } } }; +void ShenandoahHeap::pretouch_storage(char* start, char* end, WorkGang* workers) { + assert (ShenandoahAlwaysPreTouch, "Sanity"); + assert (!AlwaysPreTouch, "Should have been overridden"); + + size_t size = (size_t)(end - start); + size_t page_size = UseLargePages ? (size_t)os::large_page_size() : (size_t)os::vm_page_size(); + size_t num_chunks = MAX2((size_t)1, size / MAX2(PreTouchParallelChunkSize, page_size)); + uint num_workers = MIN2((uint)num_chunks, workers->active_workers()); + + log_info(gc, heap)("Parallel pretouch with %u workers for " SIZE_FORMAT " work units pre-touching " SIZE_FORMAT " bytes.", + num_workers, num_chunks, size); + + ShenandoahPretouchTask cl(start, end, page_size); + workers->run_task(&cl, num_workers); +} + jint ShenandoahHeap::initialize() { CollectedHeap::pre_initialize(); size_t init_byte_size = collector_policy()->initial_heap_byte_size(); size_t max_byte_size = collector_policy()->max_heap_byte_size(); - if (ShenandoahGCVerbose) - tty->print_cr("init_byte_size = "SIZE_FORMAT","SIZE_FORMAT_HEX" max_byte_size = "INT64_FORMAT","SIZE_FORMAT_HEX, - init_byte_size, init_byte_size, max_byte_size, max_byte_size); Universe::check_alignment(max_byte_size, ShenandoahHeapRegion::RegionSizeBytes, @@ -124,16 +130,16 @@ ReservedSpace heap_rs = Universe::reserve_heap(max_byte_size, Arguments::conservative_max_heap_alignment()); + _reserved.set_word_size(0); _reserved.set_start((HeapWord*)heap_rs.base()); - _reserved.set_end((HeapWord*) (heap_rs.base() + heap_rs.size())); + _reserved.set_end((HeapWord*)(heap_rs.base() + heap_rs.size())); set_barrier_set(new ShenandoahBarrierSet(this)); ReservedSpace pgc_rs = heap_rs.first_part(max_byte_size); _storage.initialize(pgc_rs, init_byte_size); - if (ShenandoahGCVerbose) { - tty->print_cr("Calling initialize on reserved space base = "PTR_FORMAT" end = "PTR_FORMAT, - p2i(pgc_rs.base()), p2i(pgc_rs.base() + pgc_rs.size())); + if (ShenandoahAlwaysPreTouch) { + pretouch_storage(_storage.low(), _storage.high(), _workers); } _num_regions = init_byte_size / ShenandoahHeapRegion::RegionSizeBytes; @@ -146,11 +152,34 @@ _collection_set = new ShenandoahCollectionSet(_max_regions); _free_regions = new ShenandoahFreeSet(_max_regions); + // Initialize fast collection set test structure. + _in_cset_fast_test_length = _max_regions; + _in_cset_fast_test_base = + NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length, mtGC); + _in_cset_fast_test = _in_cset_fast_test_base - + ((uintx) pgc_rs.base() >> ShenandoahHeapRegion::RegionSizeShift); + + _next_top_at_mark_starts_base = + NEW_C_HEAP_ARRAY(HeapWord*, _max_regions, mtGC); + _next_top_at_mark_starts = _next_top_at_mark_starts_base - + ((uintx) pgc_rs.base() >> ShenandoahHeapRegion::RegionSizeShift); + + _complete_top_at_mark_starts_base = + NEW_C_HEAP_ARRAY(HeapWord*, _max_regions, mtGC); + _complete_top_at_mark_starts = _complete_top_at_mark_starts_base - + ((uintx) pgc_rs.base() >> ShenandoahHeapRegion::RegionSizeShift); + size_t i = 0; for (i = 0; i < _num_regions; i++) { + _in_cset_fast_test_base[i] = false; // Not in cset + HeapWord* bottom = (HeapWord*) pgc_rs.base() + regionSizeWords * i; + _complete_top_at_mark_starts_base[i] = bottom; + _next_top_at_mark_starts_base[i] = bottom; + } + for (i = 0; i < _num_regions; i++) { ShenandoahHeapRegion* current = new ShenandoahHeapRegion(); - current->initialize_heap_region((HeapWord*) pgc_rs.base() + + current->initialize_heap_region(this, (HeapWord*) pgc_rs.base() + regionSizeWords * i, regionSizeWords, i); _free_regions->add_region(current); _ordered_regions->add_region(current); @@ -165,11 +194,13 @@ _numAllocs = 0; - if (ShenandoahGCVerbose) { - tty->print("All Regions\n"); - print_heap_regions(); - tty->print("Free Regions\n"); - _free_regions->print(); + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + log_trace(gc, region)("All Regions"); + _ordered_regions->print(out); + log_trace(gc, region)("Free Regions"); + _free_regions->print(out); } // The call below uses stuff (the SATB* things) that are in G1, but probably @@ -183,51 +214,40 @@ size_t bitmap_size = CMBitMap::compute_size(heap_rs.size()); MemRegion heap_region = MemRegion((HeapWord*) heap_rs.base(), heap_rs.size() / HeapWordSize); - ReservedSpace bitmap0(ReservedSpace::allocation_align_size_up(bitmap_size)); + size_t page_size = UseLargePages ? (size_t)os::large_page_size() : (size_t)os::vm_page_size(); + + ReservedSpace bitmap0(bitmap_size, page_size); os::commit_memory_or_exit(bitmap0.base(), bitmap0.size(), false, "couldn't allocate mark bitmap"); + MemTracker::record_virtual_memory_type(bitmap0.base(), mtGC); MemRegion bitmap_region0 = MemRegion((HeapWord*) bitmap0.base(), bitmap0.size() / HeapWordSize); _mark_bit_map0.initialize(heap_region, bitmap_region0); - _prev_mark_bit_map = &_mark_bit_map0; + _complete_mark_bit_map = &_mark_bit_map0; - ReservedSpace bitmap1(ReservedSpace::allocation_align_size_up(bitmap_size)); + ReservedSpace bitmap1(bitmap_size, page_size); os::commit_memory_or_exit(bitmap1.base(), bitmap1.size(), false, "couldn't allocate mark bitmap"); + MemTracker::record_virtual_memory_type(bitmap1.base(), mtGC); MemRegion bitmap_region1 = MemRegion((HeapWord*) bitmap1.base(), bitmap1.size() / HeapWordSize); _mark_bit_map1.initialize(heap_region, bitmap_region1); _next_mark_bit_map = &_mark_bit_map1; - // Initialize fast collection set test structure. - _in_cset_fast_test_length = _max_regions; - _in_cset_fast_test_base = - NEW_C_HEAP_ARRAY(bool, (size_t) _in_cset_fast_test_length, mtGC); - _in_cset_fast_test = _in_cset_fast_test_base - - ((uintx) pgc_rs.base() >> ShenandoahHeapRegion::RegionSizeShift); - clear_cset_fast_test(); - - _top_at_mark_starts_base = - NEW_C_HEAP_ARRAY(HeapWord*, _max_regions, mtGC); - _top_at_mark_starts = _top_at_mark_starts_base - - ((uintx) pgc_rs.base() >> ShenandoahHeapRegion::RegionSizeShift); - - for (i = 0; i < _num_regions; i++) { - _in_cset_fast_test_base[i] = false; // Not in cset - _top_at_mark_starts_base[i] = _ordered_regions->get(i)->bottom(); - } - _monitoring_support = new ShenandoahMonitoringSupport(this); _concurrent_gc_thread = new ShenandoahConcurrentThread(); + + ShenandoahMarkCompact::initialize(); + return JNI_OK; } ShenandoahHeap::ShenandoahHeap(ShenandoahCollectorPolicy* policy) : SharedHeap(policy), _shenandoah_policy(policy), - _concurrent_mark_in_progress(false), - _evacuation_in_progress(false), + _concurrent_mark_in_progress(0), + _evacuation_in_progress(0), _full_gc_in_progress(false), _free_regions(NULL), _collection_set(NULL), - _bytesAllocSinceCM(0), + _bytes_allocated_since_cm(0), _bytes_allocated_during_cm(0), _max_allocated_gc(0), _allocated_last_gc(0), @@ -237,43 +257,48 @@ _ref_processor(NULL), _in_cset_fast_test(NULL), _in_cset_fast_test_base(NULL), - _top_at_mark_starts(NULL), - _top_at_mark_starts_base(NULL), + _next_top_at_mark_starts(NULL), + _next_top_at_mark_starts_base(NULL), + _complete_top_at_mark_starts(NULL), + _complete_top_at_mark_starts_base(NULL), _mark_bit_map0(), _mark_bit_map1(), _cancelled_concgc(false), _need_update_refs(false), _need_reset_bitmaps(false), _growing_heap(0), - _jni_critical(new ShenandoahJNICritical()) + _gc_timer(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()) { - if (ShenandoahLogConfig) { - tty->print_cr("Parallel GC threads: "UINTX_FORMAT, ParallelGCThreads); - tty->print_cr("Concurrent GC threads: "UINTX_FORMAT, ConcGCThreads); - tty->print_cr("Parallel reference processing enabled: %s", BOOL_TO_STR(ParallelRefProcEnabled)); - } - _pgc = this; + log_info(gc, init)("Parallel GC threads: "UINTX_FORMAT, ParallelGCThreads); + log_info(gc, init)("Concurrent GC threads: "UINTX_FORMAT, ConcGCThreads); + log_info(gc, init)("Parallel reference processing enabled: %s", BOOL_TO_STR(ParallelRefProcEnabled)); + _scm = new ShenandoahConcurrentMark(); _used = 0; + // This is odd. They are concurrent gc threads, but they are also task threads. // Framework doesn't allow both. - _conc_workers = new FlexibleWorkGang("Concurrent GC Threads", ConcGCThreads, + _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, /* are_GC_task_threads */true, /* are_ConcurrentGC_threads */false); - if (_conc_workers == NULL) { + _conc_workers = new WorkGang("Concurrent GC Threads", ConcGCThreads, + /* are_GC_task_threads */true, + /* are_ConcurrentGC_threads */false); + if ((_workers == NULL) || (_conc_workers == NULL)) { vm_exit_during_initialization("Failed necessary allocation."); } else { + _workers->initialize_workers(); _conc_workers->initialize_workers(); } } -class ResetBitmapTask : public AbstractGangTask { +class ResetNextBitmapTask : public AbstractGangTask { private: ShenandoahHeapRegionSet* _regions; public: - ResetBitmapTask(ShenandoahHeapRegionSet* regions) : + ResetNextBitmapTask(ShenandoahHeapRegionSet* regions) : AbstractGangTask("Parallel Reset Bitmap Task"), _regions(regions) { _regions->clear_current_index(); @@ -284,34 +309,64 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); while (region != NULL) { HeapWord* bottom = region->bottom(); - HeapWord* top = region->top_prev_mark_bitmap(); - region->set_top_prev_mark_bitmap(region->top_at_prev_mark_start()); + HeapWord* top = heap->next_top_at_mark_start(region->bottom()); if (top > bottom) { - heap->reset_mark_bitmap_range(bottom, top); + heap->next_mark_bit_map()->clearRange(MemRegion(bottom, top)); } region = _regions->claim_next(); } } }; -void ShenandoahHeap::reset_mark_bitmap() { - GCTraceTime time("Concurrent reset bitmaps", ShenandoahTracePhases, true, NULL, tracer()->gc_id()); +void ShenandoahHeap::reset_next_mark_bitmap(WorkGang* workers) { + // GCTraceTime time("Concurrent reset bitmaps", ShenandoahLogInfo, true, gc_timer(), tracer()->gc_id()); - ResetBitmapTask task = ResetBitmapTask(_ordered_regions); - conc_workers()->set_active_workers(_max_conc_workers); - conc_workers()->run_task(&task); + ResetNextBitmapTask task = ResetNextBitmapTask(_ordered_regions); + workers->run_task(&task); } -void ShenandoahHeap::reset_mark_bitmap_range(HeapWord* from, HeapWord* to) { - _next_mark_bit_map->clearRange(MemRegion(from, to)); +class ResetCompleteBitmapTask : public AbstractGangTask { +private: + ShenandoahHeapRegionSet* _regions; + +public: + ResetCompleteBitmapTask(ShenandoahHeapRegionSet* regions) : + AbstractGangTask("Parallel Reset Bitmap Task"), + _regions(regions) { + _regions->clear_current_index(); + } + + void work(uint worker_id) { + ShenandoahHeapRegion* region = _regions->claim_next(); + ShenandoahHeap* heap = ShenandoahHeap::heap(); + while (region != NULL) { + HeapWord* bottom = region->bottom(); + HeapWord* top = heap->complete_top_at_mark_start(region->bottom()); + if (top > bottom) { + heap->complete_mark_bit_map()->clearRange(MemRegion(bottom, top)); + } + region = _regions->claim_next(); + } + } +}; + +void ShenandoahHeap::reset_complete_mark_bitmap(WorkGang* workers) { + GCTraceTime time("Concurrent reset bitmaps", ShenandoahLogInfo, true, gc_timer(), tracer()->gc_id()); + + ResetCompleteBitmapTask task = ResetCompleteBitmapTask(_ordered_regions); + workers->run_task(&task); } -bool ShenandoahHeap::is_bitmap_clear() { +bool ShenandoahHeap::is_next_bitmap_clear() { HeapWord* start = _ordered_regions->bottom(); HeapWord* end = _ordered_regions->end(); return _next_mark_bit_map->getNextMarkedWordAddress(start, end) == end; } +bool ShenandoahHeap::is_complete_bitmap_clear_range(HeapWord* start, HeapWord* end) { + return _complete_mark_bit_map->getNextMarkedWordAddress(start, end) == end; +} + void ShenandoahHeap::print_on(outputStream* st) const { st->print("Shenandoah Heap"); st->print(" total = " SIZE_FORMAT " K, used " SIZE_FORMAT " K ", capacity()/ K, used() /K); @@ -322,7 +377,7 @@ if (_evacuation_in_progress) { st->print("evacuating "); } - if (_cancelled_concgc) { + if (cancelled_concgc()) { st->print("cancelled "); } st->print("\n"); @@ -350,11 +405,11 @@ gc_threads_do(&init_gclabs); } } - _scm->initialize(); + + _max_workers = MAX(_max_parallel_workers, _max_conc_workers); + _scm->initialize(_max_workers); ref_processing_init(); - - _max_workers = MAX(_max_parallel_workers, _max_conc_workers); } class CalculateUsedRegionClosure : public ShenandoahHeapRegionClosure { @@ -379,10 +434,6 @@ return cl.getResult(); } -size_t ShenandoahHeap::calculateFree() { - return capacity() - calculateUsed(); -} - void ShenandoahHeap::verify_heap_size_consistency() { assert(calculateUsed() == used(), @@ -395,7 +446,7 @@ } void ShenandoahHeap::increase_used(size_t bytes) { - Atomic::add_ptr(bytes, &_used); + Atomic::add_ptr(bytes, (intptr_t*) &_used); } void ShenandoahHeap::set_used(size_t bytes) { @@ -405,7 +456,7 @@ void ShenandoahHeap::decrease_used(size_t bytes) { assert(_used >= bytes, "never decrease heap size by more than we've left"); - Atomic::add_ptr(-bytes, &_used); + Atomic::add_ptr(-bytes, (intptr_t*) &_used); } size_t ShenandoahHeap::capacity() const { @@ -430,36 +481,10 @@ return (VirtualSpace*) &_storage; } -class IsInRegionClosure : public ShenandoahHeapRegionClosure { - const void* _p; - bool _result; -public: - - IsInRegionClosure(const void* p) { - _p = p; - _result = false; - } - - bool doHeapRegion(ShenandoahHeapRegion* r) { - if (r->is_in(_p)) { - _result = true; - return true; - } - return false; - } - - bool result() { return _result;} -}; - bool ShenandoahHeap::is_in(const void* p) const { - // IsInRegionClosure isIn(p); - // heap_region_iterate(&isIn); - // bool result = isIn.result(); - - // return isIn.result(); HeapWord* first_region_bottom = _first_region->bottom(); HeapWord* last_region_end = first_region_bottom + (ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize) * _num_regions; - return p > _first_region_bottom && p < last_region_end; + return p >= _first_region_bottom && p < last_region_end; } bool ShenandoahHeap::is_in_partial_collection(const void* p ) { @@ -467,9 +492,7 @@ return false; } -bool ShenandoahHeap::is_scavengable(const void* p) { - // nyi(); - // return false; +bool ShenandoahHeap::is_scavengable(const void* p) { return true; } @@ -526,46 +549,27 @@ HeapWord* result = allocate_memory(word_size, evacuating); if (result != NULL) { - assert(! heap_region_containing(result)->is_in_collection_set(), "Never allocate in dirty region"); - _bytesAllocSinceCM += word_size * HeapWordSize; + assert(! in_collection_set(result), "Never allocate in dirty region"); + _bytes_allocated_since_cm += word_size * HeapWordSize; -#ifdef ASSERT - if (ShenandoahTraceTLabs) - tty->print_cr("allocating new tlab of size "SIZE_FORMAT" at addr "PTR_FORMAT, word_size, p2i(result)); -#endif + log_develop_trace(gc, tlab)("allocating new tlab of size "SIZE_FORMAT" at addr "PTR_FORMAT, word_size, p2i(result)); } return result; } ShenandoahHeap* ShenandoahHeap::heap() { - assert(_pgc != NULL, "Unitialized access to ShenandoahHeap::heap()"); - assert(_pgc->kind() == CollectedHeap::ShenandoahHeap, "not a shenandoah heap"); - return _pgc; + CollectedHeap* heap = Universe::heap(); + assert(heap != NULL, "Unitialized access to ShenandoahHeap::heap()"); + assert(heap->kind() == CollectedHeap::ShenandoahHeap, "not a shenandoah heap"); + return (ShenandoahHeap*) heap; } ShenandoahHeap* ShenandoahHeap::heap_no_check() { - return _pgc; + CollectedHeap* heap = Universe::heap(); + return (ShenandoahHeap*) heap; } -class VM_ShenandoahVerifyHeap: public VM_GC_Operation { -public: - VM_ShenandoahVerifyHeap(unsigned int gc_count_before, - unsigned int full_gc_count_before, - GCCause::Cause cause) - : VM_GC_Operation(gc_count_before, cause, full_gc_count_before) { } - virtual VMOp_Type type() const { return VMOp_G1CollectFull; } - virtual void doit() { - if (ShenandoahGCVerbose) - tty->print_cr("verifying heap"); - Universe::heap()->ensure_parsability(false); - Universe::verify(); - } - virtual const char* name() const { - return "Shenandoah verify trigger"; - } -}; - HeapWord* ShenandoahHeap::allocate_memory(size_t word_size, bool evacuating) { HeapWord* result = NULL; result = allocate_memory_work(word_size); @@ -580,8 +584,7 @@ } if (result == NULL && ! evacuating) { // Allocation failed, try full-GC, then retry allocation. - // tty->print_cr("failed to allocate "SIZE_FORMAT " bytes, free regions:", word_size * HeapWordSize); - // _free_regions->print(); + log_develop_trace(gc)("Failed to allocate " SIZE_FORMAT " bytes, free regions: ", word_size * HeapWordSize); collect(GCCause::_allocation_failure); result = allocate_memory_work(word_size); } @@ -594,6 +597,9 @@ monitoring_support()->update_counters(); } + log_develop_trace(gc, alloc)("allocate memory chunk of size "SIZE_FORMAT" at addr "PTR_FORMAT " by thread %d ", + word_size, p2i(result), Thread::current()->osthread()->thread_id()); + return result; } @@ -622,13 +628,12 @@ return true; } else { // Let other threads work, then try again. - os::yield(); + os::NakedYield(); return true; } } HeapWord* ShenandoahHeap::allocate_memory_work(size_t word_size) { - if (word_size * HeapWordSize > ShenandoahHeapRegion::RegionSizeBytes) { return allocate_large_memory(word_size); } @@ -638,8 +643,7 @@ // free region available, so current_index may not be valid. if (word_size * HeapWordSize > _free_regions->capacity()) return NULL; - jlong current_idx = _free_regions->current_index(); - assert(current_idx >= 0, "expect >= 0"); + size_t current_idx = _free_regions->current_index(); ShenandoahHeapRegion* my_current_region = _free_regions->get(current_idx); if (my_current_region == NULL) { @@ -648,38 +652,33 @@ assert(my_current_region != NULL, "should have a region at this point"); #ifdef ASSERT - if (my_current_region->is_in_collection_set()) { + if (in_collection_set(my_current_region)) { print_heap_regions(); } #endif - assert(! my_current_region->is_in_collection_set(), "never get targetted regions in free-lists"); + assert(! in_collection_set(my_current_region), "never get targetted regions in free-lists"); assert(! my_current_region->is_humongous(), "never attempt to allocate from humongous object regions"); HeapWord* result = my_current_region->par_allocate(word_size); - while (result == NULL && my_current_region != NULL) { - + while (result == NULL) { // 2nd attempt. Try next region. - size_t remaining = my_current_region->free(); current_idx = _free_regions->par_claim_next(current_idx); my_current_region = _free_regions->get(current_idx); if (my_current_region == NULL) { - // tty->print("WTF: OOM error trying to allocate %ld words\n", word_size); return NULL; // No more room to make a new region. OOM. } // _free_regions->increase_used(remaining); assert(my_current_region != NULL, "should have a region at this point"); - assert(! my_current_region->is_in_collection_set(), "never get targetted regions in free-lists"); + assert(! in_collection_set(my_current_region), "never get targetted regions in free-lists"); assert(! my_current_region->is_humongous(), "never attempt to allocate from humongous object regions"); result = my_current_region->par_allocate(word_size); } - if (result != NULL) { - my_current_region->increase_live_data(word_size * HeapWordSize); - increase_used(word_size * HeapWordSize); - _free_regions->increase_used(word_size * HeapWordSize); - } + my_current_region->increase_live_data(word_size * HeapWordSize); + increase_used(word_size * HeapWordSize); + _free_regions->increase_used(word_size * HeapWordSize); return result; } @@ -695,15 +694,11 @@ if (r != NULL) { result = r->bottom(); - if (ShenandoahTraceHumongous) { - gclog_or_tty->print_cr("allocating humongous object of size: "SIZE_FORMAT" KB at location "PTR_FORMAT" in start region "SIZE_FORMAT, + log_debug(gc, humongous)("allocating humongous object of size: "SIZE_FORMAT" KB at location "PTR_FORMAT" in start region "SIZE_FORMAT, (words * HeapWordSize) / K, p2i(result), r->region_number()); - } } else { - if (ShenandoahTraceHumongous) { - gclog_or_tty->print_cr("allocating humongous object of size: "SIZE_FORMAT" KB at location "PTR_FORMAT" failed", + log_debug(gc, humongous)("allocating humongous object of size: "SIZE_FORMAT" KB at location "PTR_FORMAT" failed", (words * HeapWordSize) / K, p2i(result)); - } } @@ -720,25 +715,18 @@ } _numAllocs++; #endif - HeapWord* filler = allocate_memory(BrooksPointer::BROOKS_POINTER_OBJ_SIZE + size, false); - HeapWord* result = filler + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + HeapWord* filler = allocate_memory(BrooksPointer::word_size() + size, false); + HeapWord* result = filler + BrooksPointer::word_size(); if (filler != NULL) { - initialize_brooks_ptr(oop(result)); - _bytesAllocSinceCM += size * HeapWordSize; -#ifdef ASSERT - if (ShenandoahTraceAllocations) { - if (*gc_overhead_limit_was_exceeded) - tty->print("gc_overhead_limit_was_exceeded"); - tty->print_cr("mem_allocate object of size "SIZE_FORMAT" at addr "PTR_FORMAT " by thread %d ", - size, p2i(result), Thread::current()->osthread()->thread_id()); - } -#endif + BrooksPointer::initialize(oop(result)); + _bytes_allocated_since_cm += size * HeapWordSize; - assert(! heap_region_containing(result)->is_in_collection_set(), "never allocate in targetted region"); + assert(! in_collection_set(result), "never allocate in targetted region"); return result; } else { /* - tty->print_cr("Out of memory. Requested number of words: "SIZE_FORMAT" used heap: "INT64_FORMAT", bytes allocated since last CM: "INT64_FORMAT, size, used(), _bytesAllocSinceCM); + tty->print_cr("Out of memory. Requested number of words: "SIZE_FORMAT" used heap: "INT64_FORMAT", bytes allocated since last CM: "INT64_FORMAT, + size, used(), _bytes_allocated_since_cm); { print_heap_regions(); tty->print("Printing "SIZE_FORMAT" free regions:\n", _free_regions->count()); @@ -760,35 +748,31 @@ void do_object(oop p) { -#ifdef ASSERT - if (ShenandoahTraceEvacuations) { - tty->print_cr("Calling ParallelEvacuateRegionObjectClosure on "PTR_FORMAT" of size %d\n", p2i((HeapWord*) p), p->size()); - } -#endif + log_develop_trace(gc, compaction)("Calling ParallelEvacuateRegionObjectClosure on "PTR_FORMAT" of size %d\n", p2i((HeapWord*) p), p->size()); - assert(_heap->is_marked_prev(p), "expect only marked objects"); + assert(_heap->is_marked_complete(p), "expect only marked objects"); if (oopDesc::unsafe_equals(p, ShenandoahBarrierSet::resolve_oop_static_not_null(p))) { _heap->evacuate_object(p, _thread); } } }; +#ifdef ASSERT class VerifyEvacuatedObjectClosure : public ObjectClosure { public: void do_object(oop p) { - if (ShenandoahHeap::heap()->is_marked_current(p)) { + if (ShenandoahHeap::heap()->is_marked_complete(p)) { oop p_prime = oopDesc::bs()->read_barrier(p); assert(! oopDesc::unsafe_equals(p, p_prime), "Should point to evacuated copy"); -#ifdef ASSERT if (p->klass() != p_prime->klass()) { tty->print_cr("copy has different class than original:"); p->klass()->print_on(tty); p_prime->klass()->print_on(tty); } -#endif - assert(p->klass() == p_prime->klass(), err_msg("Should have the same class p: "PTR_FORMAT", p_prime: "PTR_FORMAT, p2i((HeapWord*) p), p2i((HeapWord*) p_prime))); + assert(p->klass() == p_prime->klass(), err_msg("Should have the same class p: "PTR_FORMAT", p_prime: "PTR_FORMAT, p2i(p), p2i(p_prime))); + // assert(p->mark() == p_prime->mark(), "Should have the same mark"); assert(p->size() == p_prime->size(), "Should be the same size"); assert(oopDesc::unsafe_equals(p_prime, oopDesc::bs()->read_barrier(p_prime)), "One forward once"); } @@ -796,38 +780,23 @@ }; void ShenandoahHeap::verify_evacuated_region(ShenandoahHeapRegion* from_region) { - if (ShenandoahGCVerbose) { - tty->print("Verifying From Region\n"); - from_region->print(); - } - VerifyEvacuatedObjectClosure verify_evacuation; - from_region->object_iterate_interruptible(&verify_evacuation, false); + marked_object_iterate(from_region, &verify_evacuation); } +#endif void ShenandoahHeap::parallel_evacuate_region(ShenandoahHeapRegion* from_region) { - assert(from_region->getLiveData() > 0, "all-garbage regions are reclaimed earlier"); + assert(from_region->get_live_data() > 0, "all-garbage regions are reclaimed earlier"); ParallelEvacuateRegionObjectClosure evacuate_region(this); -#ifdef ASSERT - if (ShenandoahGCVerbose) { - tty->print_cr("parallel_evacuate_region starting from_region "SIZE_FORMAT": free_regions = "SIZE_FORMAT, - from_region->region_number(), _free_regions->count()); - } -#endif - marked_object_iterate(from_region, &evacuate_region); #ifdef ASSERT if (ShenandoahVerify && ! cancelled_concgc()) { verify_evacuated_region(from_region); } - if (ShenandoahGCVerbose) { - tty->print_cr("parallel_evacuate_region after from_region = "SIZE_FORMAT": free_regions = "SIZE_FORMAT, - from_region->region_number(), _free_regions->count()); - } #endif } @@ -848,19 +817,15 @@ ShenandoahHeapRegion* from_hr = _cs->claim_next(); while (from_hr != NULL) { - if (ShenandoahGCVerbose) { - tty->print_cr("Thread "INT32_FORMAT" claimed Heap Region "SIZE_FORMAT, - worker_id, - from_hr->region_number()); - from_hr->print(); - } + log_develop_trace(gc, region)("Thread "INT32_FORMAT" claimed Heap Region "SIZE_FORMAT, + worker_id, + from_hr->region_number()); - assert(from_hr->getLiveData() > 0, "all-garbage regions are reclaimed early"); + assert(from_hr->get_live_data() > 0, "all-garbage regions are reclaimed early"); _sh->parallel_evacuate_region(from_hr); if (_sh->cancelled_concgc()) { - // tty->print("We cancelled concgc while working on region %d\n", from_hr->region_number()); - // from_hr->print(); + log_develop_trace(gc, region)("Cancelled concgc while evacuating region " SIZE_FORMAT "\n", from_hr->region_number()); break; } from_hr = _cs->claim_next(); @@ -877,20 +842,10 @@ bool doHeapRegion(ShenandoahHeapRegion* r) { - if (_heap->cancelled_concgc()) { - // The aborted marking bitmap needs to be cleared at the end of cycle. - // Setup the top-marker for this. - r->set_top_prev_mark_bitmap(r->top_at_mark_start()); + assert (! _heap->cancelled_concgc(), "no recycling after cancelled marking"); - return false; - } - - r->swap_top_at_mark_start(); - - if (r->is_in_collection_set()) { - // tty->print_cr("recycling region "INT32_FORMAT":", r->region_number()); - // r->print_on(tty); - // tty->print_cr(" "); + if (_heap->in_collection_set(r)) { + log_develop_trace(gc, region)("Recycling region " SIZE_FORMAT ":", r->region_number()); _heap->decrease_used(r->used()); _bytes_reclaimed += r->used(); r->recycle(); @@ -919,13 +874,8 @@ return _free_regions; } -CMBitMap* ShenandoahHeap::next_mark_bit_map() { - return _next_mark_bit_map; -} - void ShenandoahHeap::print_heap_regions(outputStream* st) const { - PrintHeapRegionsClosure pc1(st); - heap_region_iterate(&pc1); + _ordered_regions->print(st); } class PrintAllRefsOopClosure: public ExtendedOopClosure { @@ -936,14 +886,21 @@ public: PrintAllRefsOopClosure(const char* prefix) : _index(0), _prefix(prefix) {} - void do_oop(oop* p) { - oop o = *p; +private: + template + inline void do_oop_work(T* p) { + oop o = oopDesc::load_decode_heap_oop(p); if (o != NULL) { if (ShenandoahHeap::heap()->is_in(o) && o->is_oop()) { - tty->print_cr("%s "INT32_FORMAT" ("PTR_FORMAT")-> "PTR_FORMAT" (marked: %s) (%s "PTR_FORMAT")", _prefix, _index, p2i(p), p2i((HeapWord*) o), BOOL_TO_STR(ShenandoahHeap::heap()->is_marked_current(o)), o->klass()->internal_name(), p2i(o->klass())); + tty->print_cr("%s "INT32_FORMAT" ("PTR_FORMAT")-> "PTR_FORMAT" (marked: %s) (%s "PTR_FORMAT")", + _prefix, _index, + p2i(p), p2i(o), + BOOL_TO_STR(ShenandoahHeap::heap()->is_marked_complete(o)), + o->klass()->internal_name(), p2i(o->klass())); } else { - // tty->print_cr("%s "INT32_FORMAT" ("PTR_FORMAT" dirty: %s) -> "PTR_FORMAT" (not in heap, possibly corrupted or dirty (%s))", _prefix, _index, p2i(p), BOOL_TO_STR(ShenandoahHeap::heap()->heap_region_containing(p)->is_in_collection_set()), p2i((HeapWord*) o), BOOL_TO_STR(ShenandoahHeap::heap()->heap_region_containing(o)->is_in_collection_set())); - tty->print_cr("%s "INT32_FORMAT" ("PTR_FORMAT" dirty -> "PTR_FORMAT" (not in heap, possibly corrupted or dirty)", _prefix, _index, p2i(p), p2i((HeapWord*) o)); + tty->print_cr("%s "INT32_FORMAT" ("PTR_FORMAT" dirty -> "PTR_FORMAT" (not in heap, possibly corrupted or dirty)", + _prefix, _index, + p2i(p), p2i(o)); } } else { tty->print_cr("%s "INT32_FORMAT" ("PTR_FORMAT") -> "PTR_FORMAT, _prefix, _index, p2i(p), p2i((HeapWord*) o)); @@ -951,8 +908,13 @@ _index++; } +public: + void do_oop(oop* p) { + do_oop_work(p); + } + void do_oop(narrowOop* p) { - Unimplemented(); + do_oop_work(p); } }; @@ -965,7 +927,10 @@ void do_object(oop p) { if (ShenandoahHeap::heap()->is_in(p)) { - tty->print_cr("%s object "PTR_FORMAT" (marked: %s) (%s "PTR_FORMAT") refers to:", _prefix, p2i((HeapWord*) p), BOOL_TO_STR(ShenandoahHeap::heap()->is_marked_current(p)), p->klass()->internal_name(), p2i(p->klass())); + tty->print_cr("%s object "PTR_FORMAT" (marked: %s) (%s "PTR_FORMAT") refers to:", + _prefix, p2i(p), + BOOL_TO_STR(ShenandoahHeap::heap()->is_marked_complete(p)), + p->klass()->internal_name(), p2i(p->klass())); PrintAllRefsOopClosure cl(_prefix); p->oop_iterate(&cl); } @@ -994,14 +959,16 @@ VerifyAfterMarkingOopClosure() : _heap(ShenandoahHeap::heap()) { } - void do_oop(oop* p) { - oop o = *p; +private: + template + inline void do_oop_work(T* p) { + oop o = oopDesc::load_decode_heap_oop(p); if (o != NULL) { - if (! _heap->is_marked_current(o)) { + if (! _heap->is_marked_complete(o)) { _heap->print_heap_regions(); _heap->print_all_refs("post-mark"); tty->print_cr("oop not marked, although referrer is marked: "PTR_FORMAT": in_heap: %s, is_marked: %s", - p2i((HeapWord*) o), BOOL_TO_STR(_heap->is_in(o)), BOOL_TO_STR(_heap->is_marked_current(o))); + p2i((HeapWord*) o), BOOL_TO_STR(_heap->is_in(o)), BOOL_TO_STR(_heap->is_marked_complete(o))); _heap->print_heap_locations((HeapWord*) o, (HeapWord*) o + o->size()); tty->print_cr("oop class: %s", o->klass()->internal_name()); @@ -1021,33 +988,28 @@ assert(o->is_oop(), "oop must be an oop"); assert(Metaspace::contains(o->klass()), "klass pointer must go to metaspace"); if (! oopDesc::unsafe_equals(o, oopDesc::bs()->read_barrier(o))) { - tty->print_cr("oops has forwardee: p: "PTR_FORMAT" (%s), o = "PTR_FORMAT" (%s), new-o: "PTR_FORMAT" (%s)", p2i(p), BOOL_TO_STR(_heap->heap_region_containing(p)->is_in_collection_set()), p2i((HeapWord*) o), BOOL_TO_STR(_heap->heap_region_containing(o)->is_in_collection_set()), p2i((HeapWord*) oopDesc::bs()->read_barrier(o)), BOOL_TO_STR(_heap->heap_region_containing(oopDesc::bs()->read_barrier(o))->is_in_collection_set())); + tty->print_cr("oops has forwardee: p: "PTR_FORMAT" (%s), o = "PTR_FORMAT" (%s), new-o: "PTR_FORMAT" (%s)", + p2i(p), + BOOL_TO_STR(_heap->in_collection_set(p)), + p2i(o), + BOOL_TO_STR(_heap->in_collection_set(o)), + p2i((HeapWord*) oopDesc::bs()->read_barrier(o)), + BOOL_TO_STR(_heap->in_collection_set(oopDesc::bs()->read_barrier(o)))); tty->print_cr("oop class: %s", o->klass()->internal_name()); } assert(oopDesc::unsafe_equals(o, oopDesc::bs()->read_barrier(o)), "oops must not be forwarded"); - assert(! _heap->heap_region_containing(o)->is_in_collection_set(), "references must not point to dirty heap regions"); - assert(_heap->is_marked_current(o), "live oops must be marked current"); + assert(! _heap->in_collection_set(o), "references must not point to dirty heap regions"); + assert(_heap->is_marked_complete(o), "live oops must be marked current"); } } - void do_oop(narrowOop* p) { - Unimplemented(); +public: + void do_oop(oop* p) { + do_oop_work(p); } -}; - -class IterateMarkedCurrentObjectsClosure: public ObjectClosure { -private: - ShenandoahHeap* _heap; - ExtendedOopClosure* _cl; -public: - IterateMarkedCurrentObjectsClosure(ExtendedOopClosure* cl) : - _heap(ShenandoahHeap::heap()), _cl(cl) {}; - - void do_object(oop p) { - if (_heap->is_marked_current(p)) { - p->oop_iterate(_cl); - } + void do_oop(narrowOop* p) { + do_oop_work(p); } }; @@ -1056,28 +1018,25 @@ verify_heap_size_consistency(); - if (ShenandoahGCVerbose) { - tty->print("verifying heap after marking\n"); - } - ensure_parsability(false); + log_trace(gc)("verifying heap after marking"); + VerifyAfterMarkingOopClosure cl; roots_iterate(&cl); - - IterateMarkedCurrentObjectsClosure marked_oops(&cl); - object_iterate(&marked_oops); + ObjectToOopClosure objs(&cl); + object_iterate(&objs); } void ShenandoahHeap::reclaim_humongous_region_at(ShenandoahHeapRegion* r) { assert(r->is_humongous_start(), "reclaim regions starting with the first one"); - oop humongous_obj = oop(r->bottom() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE); - size_t size = humongous_obj->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + oop humongous_obj = oop(r->bottom() + BrooksPointer::word_size()); + size_t size = humongous_obj->size() + BrooksPointer::word_size(); uint required_regions = ShenandoahHumongous::required_regions(size * HeapWordSize); uint index = r->region_number(); - assert(r->getLiveData() == 0, "liveness must be zero"); + assert(r->get_live_data() == 0, "liveness must be zero"); for(size_t i = 0; i < required_regions; i++) { @@ -1086,13 +1045,14 @@ assert((region->is_humongous_start() || region->is_humongous_continuation()), "expect correct humongous start or continuation"); - if (ShenandoahTraceHumongous) { - tty->print_cr("reclaiming "UINT32_FORMAT" humongous regions for object of size: "SIZE_FORMAT" words", required_regions, size); - - region->print(); + if (ShenandoahLogDebug) { + log_debug(gc, humongous)("reclaiming "UINT32_FORMAT" humongous regions for object of size: "SIZE_FORMAT" words", required_regions, size); + ResourceMark rm; + outputStream* out = gclog_or_tty; + region->print_on(out); } - region->reset(); + region->recycle(); ShenandoahHeap::heap()->decrease_used(ShenandoahHeapRegion::RegionSizeBytes); } } @@ -1103,8 +1063,8 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); if (r->is_humongous_start()) { - oop humongous_obj = oop(r->bottom() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE); - if (! heap->is_marked_current(humongous_obj)) { + oop humongous_obj = oop(r->bottom() + BrooksPointer::word_size()); + if (! heap->is_marked_complete(humongous_obj)) { heap->reclaim_humongous_region_at(r); } @@ -1116,24 +1076,29 @@ #ifdef ASSERT class CheckCollectionSetClosure: public ShenandoahHeapRegionClosure { bool doHeapRegion(ShenandoahHeapRegion* r) { - assert(!r->is_in_collection_set(), "Should have been cleared by now"); + assert(! ShenandoahHeap::heap()->in_collection_set(r), "Should have been cleared by now"); return false; } }; #endif void ShenandoahHeap::prepare_for_concurrent_evacuation() { - assert(_ordered_regions->get(0)->region_number() == 0, "FIXME CHF"); - /* - tty->print("Thread %d started prepare_for_concurrent_evacuation\n", - Thread::current()->osthread()->thread_id()); - */ + assert(_ordered_regions->get(0)->region_number() == 0, "FIXME CHF. FIXME CHF!"); + + log_develop_trace(gc)("Thread %d started prepare_for_concurrent_evacuation", Thread::current()->osthread()->thread_id()); + if (!cancelled_concgc()) { recycle_dirty_regions(); ensure_parsability(true); +#ifdef ASSERT + if (ShenandoahVerify) { + verify_heap_after_marking(); + } +#endif + // NOTE: This needs to be done during a stop the world pause, because // putting regions into the collection set concurrently with Java threads // will create a race. In particular, acmp could fail because when we @@ -1165,15 +1130,7 @@ _free_regions->print(); */ - if (ShenandoahPrintCollectionSet) { - gclog_or_tty->print("Collection set live = " SIZE_FORMAT " K reclaimable = " SIZE_FORMAT " K\n", - _collection_set->live_data() / K, _collection_set->garbage() / K); - } - - if (_collection_set->count() == 0) - cancel_concgc(); - - _bytesAllocSinceCM = 0; + _bytes_allocated_since_cm = 0; Universe::update_heap_info_at_gc(); } @@ -1195,7 +1152,7 @@ void ShenandoahHeap::ensure_parsability(bool retire_tlabs) { if (UseTLAB) { - CollectedHeap::ensure_parsability(retire_tlabs); + CollectedHeap::ensure_parsability(retire_tlabs); RetireTLABClosure cl(retire_tlabs); for (JavaThread *thread = Threads::first(); thread != NULL; thread = thread->next()) { @@ -1214,35 +1171,50 @@ _heap(ShenandoahHeap::heap()), _thread(Thread::current()) { } - void do_oop(oop* p) { +private: + template + void do_oop_work(T* p) { assert(_heap->is_evacuation_in_progress(), "Only do this when evacuation is in progress"); - oop obj = oopDesc::load_heap_oop(p); - if (obj != NULL && _heap->in_cset_fast_test((HeapWord*) obj)) { - assert(_heap->is_marked_prev(obj), err_msg("only evacuate marked objects %d %d", _heap->is_marked_prev(obj), _heap->is_marked_prev(ShenandoahBarrierSet::resolve_oop_static_not_null(obj)))); - oop resolved = ShenandoahBarrierSet::resolve_oop_static_not_null(obj); - if (oopDesc::unsafe_equals(resolved, obj)) { - resolved = _heap->evacuate_object(obj, _thread); + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + if (_heap->in_collection_set(obj)) { + assert(_heap->is_marked_complete(obj), err_msg("only evacuate marked objects %d %d", + _heap->is_marked_complete(obj), _heap->is_marked_complete(ShenandoahBarrierSet::resolve_oop_static_not_null(obj)))); + oop resolved = ShenandoahBarrierSet::resolve_oop_static_not_null(obj); + if (oopDesc::unsafe_equals(resolved, obj)) { + resolved = _heap->evacuate_object(obj, _thread); + } + oopDesc::encode_store_heap_oop(p, resolved); } - oopDesc::store_heap_oop(p, resolved); } #ifdef ASSERT - else if (! oopDesc::is_null(obj)) { - // tty->print_cr("not updating root at: "PTR_FORMAT" with object: "PTR_FORMAT", is_in_heap: %s, is_in_cset: %s, is_marked: %s", p2i(p), p2i((HeapWord*) obj), BOOL_TO_STR(_heap->is_in(obj)), BOOL_TO_STR(_heap->in_cset_fast_test(obj)), BOOL_TO_STR(_heap->is_marked_current(obj))); + else { + // tty->print_cr("not updating root at: "PTR_FORMAT" with object: "PTR_FORMAT", is_in_heap: %s, is_in_cset: %s, is_marked: %s", + // p2i(p), + // p2i((HeapWord*) obj), + // BOOL_TO_STR(_heap->is_in(obj)), + // BOOL_TO_STR(_heap->in_cset_fast_test(obj)), + // BOOL_TO_STR(_heap->is_marked_complete(obj))); } #endif } +public: + void do_oop(oop* p) { + do_oop_work(p); + } void do_oop(narrowOop* p) { - Unimplemented(); + do_oop_work(p); } }; class ShenandoahEvacuateUpdateRootsTask : public AbstractGangTask { - ShenandoahRootProcessor* _rp; + ShenandoahRootEvacuator* _rp; public: - ShenandoahEvacuateUpdateRootsTask(ShenandoahRootProcessor* rp) : + ShenandoahEvacuateUpdateRootsTask(ShenandoahRootEvacuator* rp) : AbstractGangTask("Shenandoah evacuate and update roots"), _rp(rp) { @@ -1252,9 +1224,8 @@ void work(uint worker_id) { ShenandoahEvacuateUpdateRootsClosure cl; MarkingCodeBlobClosure blobsCl(&cl, CodeBlobToOopClosure::FixRelocations); - CLDToOopClosure cldCl(&cl); - _rp->process_roots(&cl, &cl, &cldCl, &cldCl, &cldCl, &blobsCl, &blobsCl); + _rp->process_evacuate_roots(&cl, &blobsCl, worker_id); } }; @@ -1270,12 +1241,9 @@ ClassLoaderDataGraph::clear_claimed_marks(); { - set_par_threads(_max_parallel_workers); - ShenandoahRootProcessor rp(this, _max_parallel_workers); + ShenandoahRootEvacuator rp(this, _max_parallel_workers, ShenandoahCollectorPolicy::evac_thread_roots); ShenandoahEvacuateUpdateRootsTask roots_task(&rp); - workers()->set_active_workers(_max_parallel_workers); workers()->run_task(&roots_task); - set_par_threads(0); } if (ShenandoahVerifyReadsToFromSpace) { @@ -1306,52 +1274,50 @@ if (! cancelled_concgc()) { - if (ShenandoahGCVerbose) { - tty->print_cr("starting parallel_evacuate"); - // PrintHeapRegionsClosure pc1; - // heap_region_iterate(&pc1); - } + log_develop_trace(gc)("starting parallel_evacuate"); _shenandoah_policy->record_phase_start(ShenandoahCollectorPolicy::conc_evac); - if (ShenandoahGCVerbose) { - tty->print("Printing all available regions"); - print_heap_regions(); + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print("Printing all available regions"); + print_heap_regions(out); } - if (ShenandoahPrintCollectionSet) { - tty->print("Printing collection set which contains "SIZE_FORMAT" regions:\n", _collection_set->count()); - _collection_set->print(); + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print("Printing collection set which contains "SIZE_FORMAT" regions:\n", _collection_set->count()); + _collection_set->print(out); - tty->print("Printing free set which contains "SIZE_FORMAT" regions:\n", _free_regions->count()); - _free_regions->print(); - - // if (_collection_set->length() == 0) - // print_heap_regions(); + out->print("Printing free set which contains "SIZE_FORMAT" regions:\n", _free_regions->count()); + _free_regions->print(out); } ParallelEvacuationTask evacuationTask = ParallelEvacuationTask(this, _collection_set); - conc_workers()->set_active_workers(_max_conc_workers); conc_workers()->run_task(&evacuationTask); - //workers()->set_active_workers(_max_parallel_workers); - if (ShenandoahGCVerbose) { - - tty->print("Printing postgc collection set which contains "SIZE_FORMAT" regions:\n", + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print("Printing postgc collection set which contains "SIZE_FORMAT" regions:\n", _collection_set->count()); - _collection_set->print(); + _collection_set->print(out); - tty->print("Printing postgc free regions which contain "SIZE_FORMAT" free regions:\n", + out->print("Printing postgc free regions which contain "SIZE_FORMAT" free regions:\n", _free_regions->count()); - _free_regions->print(); + _free_regions->print(out); - tty->print_cr("finished parallel_evacuate"); - print_heap_regions(); + } - tty->print_cr("all regions after evacuation:"); - print_heap_regions(); + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print_cr("all regions after evacuation:"); + print_heap_regions(out); } _shenandoah_policy->record_phase_end(ShenandoahCollectorPolicy::conc_evac); @@ -1373,16 +1339,22 @@ public: VerifyEvacuationClosure(ShenandoahHeapRegion* from_region) : _heap(ShenandoahHeap::heap()), _from_region(from_region) { } - - void do_oop(oop* p) { - oop heap_oop = oopDesc::load_heap_oop(p); +private: + template + inline void do_oop_work(T* p) { + oop heap_oop = oopDesc::load_decode_heap_oop(p); if (! oopDesc::is_null(heap_oop)) { guarantee(! _from_region->is_in(heap_oop), err_msg("no references to from-region allowed after evacuation: "PTR_FORMAT, p2i((HeapWord*) heap_oop))); } } +public: + void do_oop(oop* p) { + do_oop_work(p); + } + void do_oop(narrowOop* p) { - Unimplemented(); + do_oop_work(p); } }; @@ -1397,15 +1369,7 @@ ClassLoaderDataGraph::clear_claimed_marks(); ShenandoahRootProcessor rp(this, 1); - rp.process_all_roots(cl, &cldCl, &blobsCl); -} - -void ShenandoahHeap::weak_roots_iterate(OopClosure* cl) { - if (ShenandoahProcessReferences) { - ref_processor()->weak_oops_do(cl); - } - ShenandoahAlwaysTrueClosure always_true; - JNIHandles::weak_oops_do(&always_true, cl); + rp.process_all_roots(cl, NULL, &cldCl, &blobsCl, 0); } void ShenandoahHeap::verify_evacuation(ShenandoahHeapRegion* from_region) { @@ -1421,7 +1385,7 @@ size_t ShenandoahHeap::unsafe_max_tlab_alloc(Thread *thread) const { - jlong idx = _free_regions->current_index(); + size_t idx = _free_regions->current_index(); ShenandoahHeapRegion* current = _free_regions->get(idx); if (current == NULL) return 0; @@ -1446,19 +1410,12 @@ void ShenandoahHeap::resize_all_tlabs() { CollectedHeap::resize_all_tlabs(); - if (PrintTLAB && Verbose) { - tty->print_cr("Resizing Shenandoah GCLABs..."); - } - ResizeGCLABClosure cl; for (JavaThread *thread = Threads::first(); thread != NULL; thread = thread->next()) { cl.do_thread(thread); } gc_threads_do(&cl); - if (PrintTLAB && Verbose) { - tty->print_cr("Done resizing Shenandoah GCLABs..."); - } } class AccumulateStatisticsGCLABClosure : public ThreadClosure { @@ -1499,29 +1456,18 @@ return false; } -size_t ShenandoahHeap::unsafe_max_alloc() { - return ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize; -} - void ShenandoahHeap::collect(GCCause::Cause cause) { + assert(cause != GCCause::_gc_locker, "no JNI critical callback"); if (GCCause::is_user_requested_gc(cause)) { if (! DisableExplicitGC) { - cancel_concgc(); + cancel_concgc(cause); _concurrent_gc_thread->do_full_gc(cause); } } else if (cause == GCCause::_allocation_failure) { - - cancel_concgc(); + cancel_concgc(cause); collector_policy()->set_should_clear_all_soft_refs(true); _concurrent_gc_thread->do_full_gc(cause); - } else if (cause == GCCause::_gc_locker) { - - if (ShenandoahTraceJNICritical) { - gclog_or_tty->print_cr("Resuming deferred evacuation after JNI critical regions"); - } - - jni_critical()->notify_jni_critical(); } } @@ -1535,13 +1481,13 @@ } -ShenandoahCollectorPolicy* ShenandoahHeap::collector_policy() const { +CollectorPolicy* ShenandoahHeap::collector_policy() const { return _shenandoah_policy; } HeapWord* ShenandoahHeap::block_start(const void* addr) const { - Space* sp = space_containing(addr); + Space* sp = heap_region_containing(addr); if (sp != NULL) { return sp->block_start(addr); } @@ -1549,13 +1495,13 @@ } size_t ShenandoahHeap::block_size(const HeapWord* addr) const { - Space* sp = space_containing(addr); + Space* sp = heap_region_containing(addr); assert(sp != NULL, "block_size of address outside of heap"); return sp->block_size(addr); } bool ShenandoahHeap::block_is_obj(const HeapWord* addr) const { - Space* sp = space_containing(addr); + Space* sp = heap_region_containing(addr); return sp->block_is_obj(addr); } @@ -1580,8 +1526,10 @@ } void ShenandoahHeap::print_tracing_info() const { - if (PrintGCDetails) { - _shenandoah_policy->print_tracing_info(); + if (ShenandoahLogInfo) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + _shenandoah_policy->print_tracing_info(out); } } @@ -1601,23 +1549,27 @@ bool failures() { return _failures; } +private: + template + inline void do_oop_work(T* p) { + oop obj = oopDesc::load_decode_heap_oop(p); + if (! oopDesc::is_null(obj) && ! obj->is_oop()) { + { // Just for debugging. + tty->print_cr("Root location "PTR_FORMAT + "verified "PTR_FORMAT, p2i(p), p2i((void*) obj)); + // obj->print_on(tty); + } + } + guarantee(obj->is_oop_or_null(), "is oop or null"); + } + +public: void do_oop(oop* p) { - if (*p != NULL) { - oop heap_oop = oopDesc::load_heap_oop(p); - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - if (!obj->is_oop()) { - { // Just for debugging. - gclog_or_tty->print_cr("Root location "PTR_FORMAT - "verified "PTR_FORMAT, p2i(p), p2i((void*) obj)); - // obj->print_on(gclog_or_tty); - } - } - guarantee(obj->is_oop(), "is_oop"); - } + do_oop_work(p); } void do_oop(narrowOop* p) { - Unimplemented(); + do_oop_work(p); } }; @@ -1643,7 +1595,7 @@ } }; -void ShenandoahHeap::verify(bool silent , VerifyOption vo) { +void ShenandoahHeap::verify(bool silent, VerifyOption vo) { if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { ShenandoahVerifyRootsClosure rootsCl(vo); @@ -1654,18 +1606,14 @@ roots_iterate(&rootsCl); bool failures = rootsCl.failures(); - if (ShenandoahGCVerbose) - gclog_or_tty->print("verify failures: %s", BOOL_TO_STR(failures)); + log_trace(gc)("verify failures: %s", BOOL_TO_STR(failures)); ShenandoahVerifyHeapClosure heapCl(rootsCl); object_iterate(&heapCl); // TODO: Implement rest of it. -#ifdef ASSERT_DISABLED - verify_live(); -#endif } else { - if (!silent) gclog_or_tty->print("(SKIPPING roots, heapRegions, remset) "); + tty->print("(SKIPPING roots, heapRegions, remset) "); } } size_t ShenandoahHeap::tlab_capacity(Thread *thr) const { @@ -1682,64 +1630,15 @@ } }; -class ShenandoahIterateObjectClosureCarefulRegionClosure: public ShenandoahHeapRegionClosure { - ObjectClosureCareful* _cl; -public: - ShenandoahIterateObjectClosureCarefulRegionClosure(ObjectClosureCareful* cl) : _cl(cl) {} - bool doHeapRegion(ShenandoahHeapRegion* r) { - r->object_iterate_careful(_cl); - return false; - } -}; - void ShenandoahHeap::object_iterate(ObjectClosure* cl) { ShenandoahIterateObjectClosureRegionClosure blk(cl); heap_region_iterate(&blk, false, true); } -void ShenandoahHeap::object_iterate_careful(ObjectClosureCareful* cl) { - ShenandoahIterateObjectClosureCarefulRegionClosure blk(cl); - heap_region_iterate(&blk, false, true); -} - void ShenandoahHeap::safe_object_iterate(ObjectClosure* cl) { Unimplemented(); } -void ShenandoahHeap::marked_object_iterate(ShenandoahHeapRegion* region, ObjectClosure* cl) { - marked_object_iterate(region, cl, region->bottom(), region->top()); -} - -void ShenandoahHeap::marked_object_iterate_careful(ShenandoahHeapRegion* region, ObjectClosure* cl) { - marked_object_iterate(region, cl, region->bottom(), region->concurrent_iteration_safe_limit()); -} - -void ShenandoahHeap::marked_object_iterate(ShenandoahHeapRegion* region, ObjectClosure* cl, - HeapWord* addr, HeapWord* limit) { - addr += BrooksPointer::BROOKS_POINTER_OBJ_SIZE; - HeapWord* last_addr = NULL; - size_t last_size = 0; - HeapWord* top_at_mark_start = region->top_at_prev_mark_start(); - HeapWord* heap_end = _ordered_regions->end(); - while (addr < limit) { - if (addr < top_at_mark_start) { - HeapWord* end = top_at_mark_start + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; - end = MIN2(end, heap_end); - addr = _prev_mark_bit_map->getNextMarkedWordAddress(addr, end); - } - if (addr < limit) { - oop obj = oop(addr); - assert(is_marked_prev(obj), "object expected to be marked"); - cl->do_object(obj); - last_addr = addr; - last_size = obj->size(); - addr += obj->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; - } else { - break; - } - } -} - class ShenandoahIterateOopClosureRegionClosure : public ShenandoahHeapRegionClosure { MemRegion _mr; ExtendedOopClosure* _cl; @@ -1760,16 +1659,6 @@ heap_region_iterate(&blk, skip_dirty_regions, true); } -void ShenandoahHeap::oop_iterate(MemRegion mr, - ExtendedOopClosure* cl) { - ShenandoahIterateOopClosureRegionClosure blk(mr, cl); - heap_region_iterate(&blk, false, true); -} - -void ShenandoahHeap::object_iterate_since_last_GC(ObjectClosure* cl) { - Unimplemented(); -} - class SpaceClosureRegionClosure: public ShenandoahHeapRegionClosure { SpaceClosure* _cl; public: @@ -1806,7 +1695,7 @@ if (skip_humongous_continuation && current->is_humongous_continuation()) { continue; } - if (skip_dirty_regions && current->is_in_collection_set()) { + if (skip_dirty_regions && in_collection_set(current)) { continue; } if (blk->doHeapRegion(current)) { @@ -1815,50 +1704,14 @@ } } -/** - * Maybe we need that at some point... -oop* ShenandoahHeap::resolve_oop_ptr(oop* p) { - if (is_in(p) && heap_region_containing(p)->is_dirty()) { - // If the reference is in an object in from-space, we need to first - // find its to-space counterpart. - // TODO: This here is slow (linear search inside region). Make it faster. - oop from_space_oop = oop_containing_oop_ptr(p); - HeapWord* to_space_obj = (HeapWord*) oopDesc::bs()->read_barrier(from_space_oop); - return (oop*) (to_space_obj + ((HeapWord*) p - ((HeapWord*) from_space_oop))); - } else { - return p; - } -} - -oop ShenandoahHeap::oop_containing_oop_ptr(oop* p) { - HeapWord* from_space_ref = (HeapWord*) p; - ShenandoahHeapRegion* region = heap_region_containing(from_space_ref); - HeapWord* from_space_obj = NULL; - for (HeapWord* curr = region->bottom(); curr < from_space_ref; ) { - oop curr_obj = (oop) curr; - if (curr < from_space_ref && from_space_ref < (curr + curr_obj->size())) { - from_space_obj = curr; - break; - } else { - curr += curr_obj->size(); - } - } - assert (from_space_obj != NULL, "must not happen"); - oop from_space_oop = (oop) from_space_obj; - assert (from_space_oop->is_oop(), "must be oop"); - assert(ShenandoahBarrierSet::is_brooks_ptr(oop(((HeapWord*) from_space_oop) - BrooksPointer::BROOKS_POINTER_OBJ_SIZE)), "oop must have a brooks ptr"); - return from_space_oop; -} - */ - class ClearLivenessClosure : public ShenandoahHeapRegionClosure { ShenandoahHeap* sh; public: ClearLivenessClosure(ShenandoahHeap* heap) : sh(heap) { } bool doHeapRegion(ShenandoahHeapRegion* r) { - r->clearLiveData(); - r->init_top_at_mark_start(); + r->clear_live_data(); + sh->set_next_top_at_mark_start(r->bottom(), r->top()); return false; } }; @@ -1878,7 +1731,7 @@ shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::make_parsable); } - _shenandoah_policy->record_bytes_allocated(_bytesAllocSinceCM); + _shenandoah_policy->record_bytes_allocated(_bytes_allocated_since_cm); _used_start_gc = used(); #ifdef ASSERT @@ -1897,57 +1750,16 @@ // oopDesc::_debug = true; + // Make above changes visible to worker threads + OrderAccess::fence(); + shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::scan_roots); - concurrentMark()->prepare_unmarked_root_objs(); + concurrentMark()->init_mark_roots(); shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::scan_roots); // print_all_refs("pre-mark2"); } - -class VerifyLivenessClosure : public ExtendedOopClosure { - - ShenandoahHeap* _sh; - -public: - VerifyLivenessClosure() : _sh ( ShenandoahHeap::heap() ) {} - - template void do_oop_nv(T* p) { - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - guarantee(_sh->heap_region_containing(obj)->is_in_collection_set() == (! oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj))), - err_msg("forwarded objects can only exist in dirty (from-space) regions is_dirty: %s, is_forwarded: %s", - BOOL_TO_STR(_sh->heap_region_containing(obj)->is_in_collection_set()), - BOOL_TO_STR(! oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)))) - ); - obj = oopDesc::bs()->read_barrier(obj); - guarantee(! _sh->heap_region_containing(obj)->is_in_collection_set(), "forwarded oops must not point to dirty regions"); - guarantee(obj->is_oop(), "is_oop"); - ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); - if (! sh->is_marked_current(obj)) { - sh->print_on(tty); - } - assert(sh->is_marked_current(obj), err_msg("Referenced Objects should be marked obj: "PTR_FORMAT", marked: %s, is_in_heap: %s", - p2i((HeapWord*) obj), BOOL_TO_STR(sh->is_marked_current(obj)), BOOL_TO_STR(sh->is_in(obj)))); - } - } - - void do_oop(oop* p) { do_oop_nv(p); } - void do_oop(narrowOop* p) { do_oop_nv(p); } - -}; - -void ShenandoahHeap::verify_live() { - - VerifyLivenessClosure cl; - roots_iterate(&cl); - - IterateMarkedCurrentObjectsClosure marked_oops(&cl); - object_iterate(&marked_oops); - -} - class VerifyAfterEvacuationClosure : public ExtendedOopClosure { ShenandoahHeap* _sh; @@ -1959,41 +1771,15 @@ T heap_oop = oopDesc::load_heap_oop(p); if (!oopDesc::is_null(heap_oop)) { oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - guarantee(_sh->heap_region_containing(obj)->is_in_collection_set() == (! oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj))), + guarantee(_sh->in_collection_set(obj) == (! oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj))), err_msg("forwarded objects can only exist in dirty (from-space) regions is_dirty: %s, is_forwarded: %s obj-klass: %s, marked: %s", - BOOL_TO_STR(_sh->heap_region_containing(obj)->is_in_collection_set()), - BOOL_TO_STR(! oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj))), obj->klass()->external_name(), BOOL_TO_STR(_sh->is_marked_current(obj))) + BOOL_TO_STR(_sh->in_collection_set(obj)), + BOOL_TO_STR(! oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj))), + obj->klass()->external_name(), + BOOL_TO_STR(_sh->is_marked_complete(obj))) ); obj = oopDesc::bs()->read_barrier(obj); - guarantee(! _sh->heap_region_containing(obj)->is_in_collection_set(), "forwarded oops must not point to dirty regions"); - guarantee(obj->is_oop(), "is_oop"); - guarantee(Metaspace::contains(obj->klass()), "klass pointer must go to metaspace"); - } - } - - void do_oop(oop* p) { do_oop_nv(p); } - void do_oop(narrowOop* p) { do_oop_nv(p); } - -}; - -class VerifyAfterUpdateRefsClosure : public ExtendedOopClosure { - - ShenandoahHeap* _sh; - -public: - VerifyAfterUpdateRefsClosure() : _sh ( ShenandoahHeap::heap() ) {} - - template void do_oop_nv(T* p) { - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - guarantee((! _sh->heap_region_containing(obj)->is_in_collection_set()), - err_msg("no live reference must point to from-space, is_marked: %s", - BOOL_TO_STR(_sh->is_marked_current(obj)))); - if (! oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)) && _sh->is_in(p)) { - tty->print_cr("top-limit: "PTR_FORMAT", p: "PTR_FORMAT, p2i(_sh->heap_region_containing(p)->concurrent_iteration_safe_limit()), p2i(p)); - } - guarantee(oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)), "no live reference must point to forwarded object"); + guarantee(! _sh->in_collection_set(obj), "forwarded oops must not point to dirty regions"); guarantee(obj->is_oop(), "is_oop"); guarantee(Metaspace::contains(obj->klass()), "klass pointer must go to metaspace"); } @@ -2013,24 +1799,33 @@ VerifyAfterEvacuationClosure cl; roots_iterate(&cl); - IterateMarkedCurrentObjectsClosure marked_oops(&cl); - object_iterate(&marked_oops); + ObjectToOopClosure objs(&cl); + object_iterate(&objs); } class VerifyRegionsAfterUpdateRefsClosure : public ShenandoahHeapRegionClosure { public: bool doHeapRegion(ShenandoahHeapRegion* r) { - assert(! r->is_in_collection_set(), "no region must be in collection set"); - assert(! ShenandoahHeap::heap()->in_cset_fast_test(r->bottom()), "no region must be in collection set"); + assert(! ShenandoahHeap::heap()->in_collection_set(r), "no region must be in collection set"); return false; } }; void ShenandoahHeap::swap_mark_bitmaps() { - CMBitMap* tmp = _prev_mark_bit_map; - _prev_mark_bit_map = _next_mark_bit_map; - _next_mark_bit_map = tmp; + // Swap bitmaps. + CMBitMap* tmp1 = _complete_mark_bit_map; + _complete_mark_bit_map = _next_mark_bit_map; + _next_mark_bit_map = tmp1; + + // Swap top-at-mark-start pointers + HeapWord** tmp2 = _complete_top_at_mark_starts; + _complete_top_at_mark_starts = _next_top_at_mark_starts; + _next_top_at_mark_starts = tmp2; + + HeapWord** tmp3 = _complete_top_at_mark_starts_base; + _complete_top_at_mark_starts_base = _next_top_at_mark_starts_base; + _next_top_at_mark_starts_base = tmp3; } void ShenandoahHeap::stop_concurrent_marking() { @@ -2042,26 +1837,23 @@ swap_mark_bitmaps(); } set_concurrent_mark_in_progress(false); - if (ShenandoahGCVerbose) { - print_heap_regions(); + + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + print_heap_regions(out); } -#ifdef ASSERT - if (ShenandoahVerify && ! _cancelled_concgc) { - verify_heap_after_marking(); - } - -#endif } void ShenandoahHeap::set_concurrent_mark_in_progress(bool in_progress) { - _concurrent_mark_in_progress = in_progress; - JavaThread::satb_mark_queue_set().set_active_all_threads(in_progress, ! in_progress); + _concurrent_mark_in_progress = in_progress ? 1 : 0; + JavaThread::satb_mark_queue_set().set_active_all_threads(in_progress, !in_progress); } void ShenandoahHeap::set_evacuation_in_progress(bool in_progress) { JavaThread::set_evacuation_in_progress_all_threads(in_progress); - _evacuation_in_progress = in_progress; + _evacuation_in_progress = in_progress ? 1 : 0; OrderAccess::fence(); } @@ -2079,33 +1871,32 @@ } void ShenandoahHeap::oom_during_evacuation() { - // tty->print_cr("Out of memory during evacuation, cancel evacuation, schedule full GC by thread %d", - // Thread::current()->osthread()->thread_id()); + log_develop_trace(gc)("Out of memory during evacuation, cancel evacuation, schedule full GC by thread %d", + Thread::current()->osthread()->thread_id()); // We ran out of memory during evacuation. Cancel evacuation, and schedule a full-GC. collector_policy()->set_should_clear_all_soft_refs(true); concurrent_thread()->schedule_full_gc(); - cancel_concgc(); + cancel_concgc(_oom_evacuation); if ((! Thread::current()->is_GC_task_thread()) && (! Thread::current()->is_ConcurrentGC_thread())) { - if (ShenandoahWarnings) { - tty->print_cr("OOM during evacuation. Let Java thread wait until evacuation settlded.."); - } + log_warning(gc)("OOM during evacuation. Let Java thread wait until evacuation finishes."); while (_evacuation_in_progress) { // wait. - Thread::current()->_ParkEvent->park(1) ; + Thread::current()->_ParkEvent->park(1); } } } HeapWord* ShenandoahHeap::tlab_post_allocation_setup(HeapWord* obj) { - HeapWord* result = obj + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; - initialize_brooks_ptr(oop(result)); + // Initialize Brooks pointer for the next object + HeapWord* result = obj + BrooksPointer::word_size(); + BrooksPointer::initialize(oop(result)); return result; } uint ShenandoahHeap::oop_extra_words() { - return BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + return BrooksPointer::word_size(); } void ShenandoahHeap::grow_heap_by(size_t num_regions) { @@ -2117,17 +1908,21 @@ ShenandoahHeapRegion* new_region = new ShenandoahHeapRegion(); size_t new_region_index = i + base; HeapWord* start = _first_region_bottom + (ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize) * new_region_index; - new_region->initialize_heap_region(start, ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize, new_region_index); - if (ShenandoahGCVerbose) { - tty->print_cr("allocating new region at index: "SIZE_FORMAT, new_region_index); - new_region->print(); + new_region->initialize_heap_region(this, start, ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize, new_region_index); + + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print_cr("allocating new region at index: "SIZE_FORMAT, new_region_index); + new_region->print_on(out); } assert(_ordered_regions->active_regions() == new_region->region_number(), "must match"); _ordered_regions->add_region(new_region); _sorted_regions->add_region(new_region); _in_cset_fast_test_base[new_region_index] = false; // Not in cset - _top_at_mark_starts_base[new_region_index] = new_region->bottom(); + _next_top_at_mark_starts_base[new_region_index] = new_region->bottom(); + _complete_top_at_mark_starts_base[new_region_index] = new_region->bottom(); regions[i] = new_region; } @@ -2141,36 +1936,14 @@ assert(new_num_regions <= _max_regions, "we checked this earlier"); size_t expand_size = new_regions * ShenandoahHeapRegion::RegionSizeBytes; - if (ShenandoahGCVerbose) { - tty->print_cr("expanding storage by "SIZE_FORMAT_HEX" bytes, for "SIZE_FORMAT" new regions", expand_size, new_regions); - } - bool success = _storage.expand_by(expand_size); + log_trace(gc, region)("expanding storage by "SIZE_FORMAT_HEX" bytes, for "SIZE_FORMAT" new regions", expand_size, new_regions); + bool success = _storage.expand_by(expand_size, ShenandoahAlwaysPreTouch); assert(success, "should always be able to expand by requested size"); _num_regions = new_num_regions; } -ShenandoahIsAliveClosure::ShenandoahIsAliveClosure() : - _heap(ShenandoahHeap::heap_no_check()) { -} - -void ShenandoahIsAliveClosure::init(ShenandoahHeap* heap) { - _heap = heap; -} - -bool ShenandoahIsAliveClosure::do_object_b(oop obj) { - - assert(_heap != NULL, "sanity"); -#ifdef ASSERT - if (_heap->concurrent_mark_in_progress()) { - assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), "only query to-space"); - } -#endif - assert(!oopDesc::is_null(obj), "null"); - return _heap->is_marked_current(obj); -} - ShenandoahForwardedIsAliveClosure::ShenandoahForwardedIsAliveClosure() : _heap(ShenandoahHeap::heap_no_check()) { } @@ -2189,55 +1962,35 @@ } #endif assert(!oopDesc::is_null(obj), "null"); - return _heap->is_marked_current(obj); + return _heap->is_marked_next(obj); } void ShenandoahHeap::ref_processing_init() { MemRegion mr = reserved_region(); - // Concurrent Mark ref processor -// _ref_processor = -// new ReferenceProcessor(mr, // span -// ParallelRefProcEnabled && (ParallelGCThreads > 1), -// // mt processing -// (int) ParallelGCThreads, -// // degree of mt processing -// (ParallelGCThreads > 1) || (ConcGCThreads > 1), -// // mt discovery -// (int) MAX2(ParallelGCThreads, ConcGCThreads), -// // degree of mt discovery -// false, -// // Reference discovery is not atomic -// &isAlive); -// // is alive closure -// // (for efficiency/performance) + isAlive.init(ShenandoahHeap::heap()); + assert(_max_workers > 0, "Sanity"); - isAlive.init(ShenandoahHeap::heap()); _ref_processor = new ReferenceProcessor(mr, // span ParallelRefProcEnabled, // mt processing - (int) ConcGCThreads, + _max_workers, // degree of mt processing true, // mt discovery - (int) ConcGCThreads, + _max_workers, // degree of mt discovery false, // Reference discovery is not atomic &isAlive); - // is alive closure - // (for efficiency/performance) - - - } #ifdef ASSERT void ShenandoahHeap::set_from_region_protection(bool protect) { for (uint i = 0; i < _num_regions; i++) { ShenandoahHeapRegion* region = _ordered_regions->get(i); - if (region != NULL && region->is_in_collection_set()) { + if (region != NULL && in_collection_set(region)) { if (protect) { region->memProtectionOn(); } else { @@ -2255,7 +2008,6 @@ void ShenandoahHeap::release_pending_refs_lock() { _concurrent_gc_thread->slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL); } - size_t ShenandoahHeap::num_regions() { return _num_regions; } @@ -2265,122 +2017,64 @@ } GCTracer* ShenandoahHeap::tracer() { - return collector_policy()->tracer(); + return shenandoahPolicy()->tracer(); } size_t ShenandoahHeap::tlab_used(Thread* thread) const { return _free_regions->used(); } -void ShenandoahHeap::cancel_concgc() { - // only report it once - if (!_cancelled_concgc) { - _cancelled_concgc = true; - OrderAccess::fence(); +void ShenandoahHeap::cancel_concgc(GCCause::Cause cause) { + if (try_cancel_concgc()) { + log_info(gc)("Cancelling concurrent GC: %s", GCCause::to_string(cause)); _shenandoah_policy->report_concgc_cancelled(); } +} +void ShenandoahHeap::cancel_concgc(ShenandoahCancelCause cause) { + if (try_cancel_concgc()) { + log_info(gc)("Cancelling concurrent GC: %s", cancel_cause_to_string(cause)); + _shenandoah_policy->report_concgc_cancelled(); + } +} + +const char* ShenandoahHeap::cancel_cause_to_string(ShenandoahCancelCause cause) { + switch (cause) { + case _oom_evacuation: + return "Out of memory for evacuation"; + case _vm_stop: + return "Stopping VM"; + default: + return "Unknown"; + } } void ShenandoahHeap::clear_cancelled_concgc() { - _cancelled_concgc = false; + set_cancelled_concgc(false); } -int ShenandoahHeap::max_workers() { +uint ShenandoahHeap::max_workers() { return _max_workers; } -int ShenandoahHeap::max_parallel_workers() { +uint ShenandoahHeap::max_parallel_workers() { return _max_parallel_workers; } -int ShenandoahHeap::max_conc_workers() { +uint ShenandoahHeap::max_conc_workers() { return _max_conc_workers; } -void ShenandoahHeap::shutdown() { +void ShenandoahHeap::stop() { // We set this early here, to let GC threads terminate before we ask the concurrent thread // to terminate, which would otherwise block until all GC threads come to finish normally. - _cancelled_concgc = true; - _concurrent_gc_thread->shutdown(); - cancel_concgc(); + set_cancelled_concgc(true); + _concurrent_gc_thread->stop(); + cancel_concgc(_vm_stop); } -class ShenandoahStringSymbolTableUnlinkTask : public AbstractGangTask { -private: - BoolObjectClosure* _is_alive; - int _initial_string_table_size; - int _initial_symbol_table_size; - - bool _process_strings; - int _strings_processed; - int _strings_removed; - - bool _process_symbols; - int _symbols_processed; - int _symbols_removed; - -public: - ShenandoahStringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) : - AbstractGangTask("String/Symbol Unlinking"), - _is_alive(is_alive), - _process_strings(process_strings), _strings_processed(0), _strings_removed(0), - _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) { - - _initial_string_table_size = StringTable::the_table()->table_size(); - _initial_symbol_table_size = SymbolTable::the_table()->table_size(); - if (process_strings) { - StringTable::clear_parallel_claimed_index(); - } - if (process_symbols) { - SymbolTable::clear_parallel_claimed_index(); - } - } - - ~ShenandoahStringSymbolTableUnlinkTask() { - guarantee(!_process_strings || StringTable::parallel_claimed_index() >= _initial_string_table_size, - err_msg("claim value %d after unlink less than initial string table size %d", - StringTable::parallel_claimed_index(), _initial_string_table_size)); - guarantee(!_process_symbols || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size, - err_msg("claim value %d after unlink less than initial symbol table size %d", - SymbolTable::parallel_claimed_index(), _initial_symbol_table_size)); - - if (ShenandoahTraceStringSymbolTableScrubbing) { - gclog_or_tty->print_cr("Cleaned string and symbol table, " - "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, " - "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed", - strings_processed(), strings_removed(), - symbols_processed(), symbols_removed()); - } - } - - void work(uint worker_id) { - int strings_processed = 0; - int strings_removed = 0; - int symbols_processed = 0; - int symbols_removed = 0; - if (_process_strings) { - StringTable::possibly_parallel_unlink(_is_alive, &strings_processed, &strings_removed); - Atomic::add(strings_processed, &_strings_processed); - Atomic::add(strings_removed, &_strings_removed); - } - if (_process_symbols) { - SymbolTable::possibly_parallel_unlink(&symbols_processed, &symbols_removed); - Atomic::add(symbols_processed, &_symbols_processed); - Atomic::add(symbols_removed, &_symbols_removed); - } - } - - size_t strings_processed() const { return (size_t)_strings_processed; } - size_t strings_removed() const { return (size_t)_strings_removed; } - - size_t symbols_processed() const { return (size_t)_symbols_processed; } - size_t symbols_removed() const { return (size_t)_symbols_removed; } -}; - void ShenandoahHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) { - workers()->set_active_workers(_max_parallel_workers); - ShenandoahStringSymbolTableUnlinkTask shenandoah_unlink_task(is_alive, process_strings, process_symbols); + StringSymbolTableUnlinkTask shenandoah_unlink_task(is_alive, process_strings, process_symbols); workers()->run_task(&shenandoah_unlink_task); // if (G1StringDedup::is_enabled()) { @@ -2392,10 +2086,6 @@ _need_update_refs = need_update_refs; } -ShenandoahJNICritical* ShenandoahHeap::jni_critical() { - return _jni_critical; -} - //fixme this should be in heapregionset ShenandoahHeapRegion* ShenandoahHeap::next_compaction_region(const ShenandoahHeapRegion* r) { size_t region_idx = r->region_number() + 1; @@ -2409,20 +2099,20 @@ return next; } -bool ShenandoahHeap::is_in_collection_set(const void* p) { - return heap_region_containing(p)->is_in_collection_set(); +void ShenandoahHeap::set_region_in_collection_set(size_t region_index, bool b) { + _in_cset_fast_test_base[region_index] = b; } ShenandoahMonitoringSupport* ShenandoahHeap::monitoring_support() { return _monitoring_support; } -bool ShenandoahHeap::is_obj_dead(const oop obj, const ShenandoahHeapRegion* r) const { - return ! r->allocated_after_prev_mark_start((HeapWord*) obj) && - ! is_marked_prev(obj, r); +CMBitMap* ShenandoahHeap::complete_mark_bit_map() { + return _complete_mark_bit_map; } -CMBitMap* ShenandoahHeap::prev_mark_bit_map() { - return _prev_mark_bit_map; + +CMBitMap* ShenandoahHeap::next_mark_bit_map() { + return _next_mark_bit_map; } void ShenandoahHeap::add_free_region(ShenandoahHeapRegion* r) { @@ -2433,9 +2123,54 @@ _free_regions->clear(); } -void ShenandoahHeap::set_top_at_mark_start(HeapWord* region_base, HeapWord* addr) { +address ShenandoahHeap::in_cset_fast_test_addr() { + return (address) (ShenandoahHeap::heap()->_in_cset_fast_test); +} + +address ShenandoahHeap::cancelled_concgc_addr() { + return (address) &(ShenandoahHeap::heap()->_cancelled_concgc); +} + +void ShenandoahHeap::clear_cset_fast_test() { + assert(_in_cset_fast_test_base != NULL, "sanity"); + memset(_in_cset_fast_test_base, false, + _in_cset_fast_test_length * sizeof(bool)); +} + +size_t ShenandoahHeap::conservative_max_heap_alignment() { + return 32 * M; +} + +size_t ShenandoahHeap::bytes_allocated_since_cm() { + return _bytes_allocated_since_cm; +} + +void ShenandoahHeap::set_bytes_allocated_since_cm(size_t bytes) { + _bytes_allocated_since_cm = bytes; +} + +size_t ShenandoahHeap::max_allocated_gc() { + return _max_allocated_gc; +} + +void ShenandoahHeap::set_next_top_at_mark_start(HeapWord* region_base, HeapWord* addr) { uintx index = ((uintx) region_base) >> ShenandoahHeapRegion::RegionSizeShift; - _top_at_mark_starts[index] = addr; + _next_top_at_mark_starts[index] = addr; +} + +HeapWord* ShenandoahHeap::next_top_at_mark_start(HeapWord* region_base) { + uintx index = ((uintx) region_base) >> ShenandoahHeapRegion::RegionSizeShift; + return _next_top_at_mark_starts[index]; +} + +void ShenandoahHeap::set_complete_top_at_mark_start(HeapWord* region_base, HeapWord* addr) { + uintx index = ((uintx) region_base) >> ShenandoahHeapRegion::RegionSizeShift; + _complete_top_at_mark_starts[index] = addr; +} + +HeapWord* ShenandoahHeap::complete_top_at_mark_start(HeapWord* region_base) { + uintx index = ((uintx) region_base) >> ShenandoahHeapRegion::RegionSizeShift; + return _complete_top_at_mark_starts[index]; } void ShenandoahHeap::set_full_gc_in_progress(bool in_progress) { @@ -2453,17 +2188,25 @@ NMethodOopInitializer() : _heap(ShenandoahHeap::heap()) { } - void do_oop(oop* o) { - oop obj1 = oopDesc::load_heap_oop(o); - if (! oopDesc::is_null(obj1)) { +private: + template + inline void do_oop_work(T* p) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj1 = oopDesc::decode_heap_oop_not_null(o); oop obj2 = oopDesc::bs()->write_barrier(obj1); if (! oopDesc::unsafe_equals(obj1, obj2)) { - oopDesc::store_heap_oop(o, obj2); + oopDesc::encode_store_heap_oop(p, obj2); } } } + +public: + void do_oop(oop* o) { + do_oop_work(o); + } void do_oop(narrowOop* o) { - Unimplemented(); + do_oop_work(o); } }; @@ -2475,3 +2218,41 @@ void ShenandoahHeap::unregister_nmethod(nmethod* nm) { } + +void ShenandoahHeap::pin_object(oop o) { + heap_region_containing(o)->pin(); +} + +void ShenandoahHeap::unpin_object(oop o) { + heap_region_containing(o)->unpin(); +} + + +GCTimer* ShenandoahHeap::gc_timer() const { + return _gc_timer; +} + +class ShenandoahCountGarbageClosure : public ShenandoahHeapRegionClosure { +private: + size_t _garbage; +public: + ShenandoahCountGarbageClosure() : _garbage(0) { + } + + bool doHeapRegion(ShenandoahHeapRegion* r) { + if (! r->is_humongous() && ! r->is_pinned() && ! r->in_collection_set()) { + _garbage += r->garbage(); + } + return false; + } + + size_t garbage() { + return _garbage; + } +}; + +size_t ShenandoahHeap::garbage() { + ShenandoahCountGarbageClosure cl; + heap_region_iterate(&cl); + return cl.garbage(); +} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -24,27 +24,18 @@ #ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAP_HPP #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAP_HPP -#include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" -#include "gc_implementation/shenandoah/shenandoahConcurrentMark.hpp" -#include "gc_implementation/shenandoah/shenandoahConcurrentThread.hpp" -#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" +#include "gc_implementation/g1/concurrentMark.hpp" -#include "gc_implementation/g1/concurrentMark.hpp" -#include "gc_implementation/g1/heapRegionBounds.inline.hpp" +class ConcurrentGCTimer; -#include "memory/barrierSet.hpp" -#include "gc_interface/collectedHeap.hpp" -#include "memory/space.hpp" -#include "oops/oop.hpp" -#include "oops/markOop.hpp" - - -class SpaceClosure; -class GCTracer; - +class ShenandoahCollectorPolicy; +class ShenandoahHeapRegion; class ShenandoahHeapRegionClosure; class ShenandoahHeapRegionSet; -class ShenandoahJNICritical; +class ShenandoahCollectionSet; +class ShenandoahFreeSet; +class ShenandoahConcurrentMark; +class ShenandoahConcurrentThread; class ShenandoahMonitoringSupport; class ShenandoahAlwaysTrueClosure : public BoolObjectClosure { @@ -53,15 +44,6 @@ }; -class ShenandoahIsAliveClosure: public BoolObjectClosure { -private: - ShenandoahHeap* _heap; -public: - ShenandoahIsAliveClosure(); - void init(ShenandoahHeap* heap); - bool do_object_b(oop obj); -}; - class ShenandoahForwardedIsAliveClosure: public BoolObjectClosure { private: ShenandoahHeap* _heap; @@ -82,7 +64,11 @@ // // ShenandoahHeap class ShenandoahHeap : public SharedHeap { - +public: + enum ShenandoahCancelCause { + _oom_evacuation, + _vm_stop, + }; private: static ShenandoahHeap* _pgc; @@ -111,244 +97,160 @@ #ifndef NDEBUG uint _numAllocs; #endif - WorkGangBarrierSync barrierSync; - int _max_parallel_workers; - int _max_conc_workers; - int _max_workers; + uint _max_parallel_workers; + uint _max_conc_workers; + uint _max_workers; - FlexibleWorkGang* _conc_workers; + WorkGang* _conc_workers; + WorkGang* _workers; volatile size_t _used; CMBitMap _mark_bit_map0; CMBitMap _mark_bit_map1; - CMBitMap* _prev_mark_bit_map; + CMBitMap* _complete_mark_bit_map; CMBitMap* _next_mark_bit_map; bool* _in_cset_fast_test; bool* _in_cset_fast_test_base; - uint _in_cset_fast_test_length; + size_t _in_cset_fast_test_length; - HeapWord** _top_at_mark_starts; - HeapWord** _top_at_mark_starts_base; + HeapWord** _complete_top_at_mark_starts; + HeapWord** _complete_top_at_mark_starts_base; - bool _cancelled_concgc; + HeapWord** _next_top_at_mark_starts; + HeapWord** _next_top_at_mark_starts_base; - ShenandoahJNICritical* _jni_critical; + volatile jbyte _cancelled_concgc; jbyte _growing_heap; -public: - size_t _bytesAllocSinceCM; + size_t _bytes_allocated_since_cm; size_t _bytes_allocated_during_cm; size_t _bytes_allocated_during_cm_start; size_t _max_allocated_gc; size_t _allocated_last_gc; size_t _used_start_gc; + unsigned int _concurrent_mark_in_progress; + + bool _full_gc_in_progress; + + unsigned int _evacuation_in_progress; + bool _need_update_refs; + bool _need_reset_bitmaps; + + ReferenceProcessor* _ref_processor; + + ShenandoahForwardedIsAliveClosure isAlive; + + ConcurrentGCTimer* _gc_timer; + public: ShenandoahHeap(ShenandoahCollectorPolicy* policy); - inline HeapWord* allocate_from_gclab(Thread* thread, size_t size); - HeapWord* allocate_from_gclab_slow(Thread* thread, size_t size); - HeapWord* allocate_new_tlab(size_t word_size); - HeapWord* allocate_new_gclab(size_t word_size); -private: - HeapWord* allocate_new_tlab(size_t word_size, bool mark); -public: - HeapWord* allocate_memory(size_t word_size, bool evacuating); - // For now we are ignoring eden. - inline bool should_alloc_in_eden(size_t size) { return false;} - void print_on(outputStream* st) const ; + HeapWord *first_region_bottom() { return _first_region_bottom; } - ShenandoahHeap::Name kind() const { + const char* name() const /* override */; + HeapWord* allocate_new_tlab(size_t word_size) /* override */; + void print_on(outputStream* st) const /* override */; + + ShenandoahHeap::Name kind() const /* override */{ return CollectedHeap::ShenandoahHeap; } - static ShenandoahHeap* heap(); - static ShenandoahHeap* heap_no_check(); + jint initialize() /* override */; + void post_initialize() /* override */; + size_t capacity() const /* override */; + size_t used() const /* override */; + bool is_maximal_no_gc() const /* override */; + size_t max_capacity() const /* override */; + size_t min_capacity() const /* override */; + bool is_in(const void* p) const /* override */; + bool is_scavengable(const void* addr) /* override */; + HeapWord* mem_allocate(size_t size, bool* what) /* override */; + bool can_elide_tlab_store_barriers() const /* override */; + oop new_store_pre_barrier(JavaThread* thread, oop new_obj) /* override */; + bool can_elide_initializing_store_barrier(oop new_obj) /* override */; + bool card_mark_must_follow_store() const /* override */; + void collect(GCCause::Cause) /* override */; + void do_full_collection(bool clear_all_soft_refs) /* override */; + AdaptiveSizePolicy* size_policy() /* override */; + CollectorPolicy* collector_policy() const /* override */; + void ensure_parsability(bool retire_tlabs) /* override */; + HeapWord* block_start(const void* addr) const /* override */; + size_t block_size(const HeapWord* addr) const /* override */; + bool block_is_obj(const HeapWord* addr) const /* override */; + jlong millis_since_last_gc() /* override */; + void prepare_for_verify() /* override */; + void print_gc_threads_on(outputStream* st) const /* override */; + void gc_threads_do(ThreadClosure* tcl) const /* override */; + void print_tracing_info() const /* override */; + void verify(bool silent, VerifyOption vo) /* override */; + bool supports_tlab_allocation() const /* override */; + size_t tlab_capacity(Thread *thr) const /* override */; + void object_iterate(ObjectClosure* cl) /* override */; + void safe_object_iterate(ObjectClosure* cl) /* override */; + size_t unsafe_max_tlab_alloc(Thread *thread) const /* override */; + size_t max_tlab_size() const /* override */; + void resize_all_tlabs() /* override */; + void accumulate_statistics_all_gclabs() /* override */; + HeapWord* tlab_post_allocation_setup(HeapWord* obj) /* override */; + uint oop_extra_words() /* override */; + size_t tlab_used(Thread* ignored) const /* override */; + void stop() /* override */; + bool is_in_partial_collection(const void* p) /* override */; + bool supports_heap_inspection() const /* override */; - ShenandoahCollectorPolicy *shenandoahPolicy() { return _shenandoah_policy;} - - jint initialize(); - static size_t conservative_max_heap_alignment() { - return HeapRegionBounds::max_size(); - } - - void post_initialize(); - size_t capacity() const; - size_t used() const; - bool is_maximal_no_gc() const; - size_t max_capacity() const; - size_t min_capacity() const; - VirtualSpace* storage() const; - virtual bool is_in(const void* p) const; - bool is_in_partial_collection(const void* p); - bool is_scavengable(const void* addr); - virtual HeapWord* mem_allocate(size_t size, bool* what); - virtual size_t unsafe_max_alloc(); - bool can_elide_tlab_store_barriers() const; - virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj); - bool can_elide_initializing_store_barrier(oop new_obj); - bool card_mark_must_follow_store() const; - bool supports_heap_inspection() const; - void collect(GCCause::Cause); - void do_full_collection(bool clear_all_soft_refs); - AdaptiveSizePolicy* size_policy(); - ShenandoahCollectorPolicy* collector_policy() const; - - void ensure_parsability(bool retire_tlabs); - - void add_free_region(ShenandoahHeapRegion* r); - void clear_free_regions(); - + void space_iterate(SpaceClosure* scl) /* override */; void oop_iterate(ExtendedOopClosure* cl, bool skip_dirty_regions, bool skip_unreachable_objects); void oop_iterate(ExtendedOopClosure* cl) { oop_iterate(cl, false, false); } - void roots_iterate(OopClosure* cl); - void weak_roots_iterate(OopClosure* cl); - - void object_iterate(ObjectClosure* cl); - void object_iterate_careful(ObjectClosureCareful* cl); - void object_iterate_no_from_space(ObjectClosure* cl); - void safe_object_iterate(ObjectClosure* cl); - - void marked_object_iterate(ShenandoahHeapRegion* region, ObjectClosure* cl); - void marked_object_iterate_careful(ShenandoahHeapRegion* region, ObjectClosure* cl); - - HeapWord *first_region_bottom() { return _first_region_bottom; } -private: - void marked_object_iterate(ShenandoahHeapRegion* region, ObjectClosure* cl, HeapWord* start, HeapWord* limit); - -public: - HeapWord* block_start(const void* addr) const; - size_t block_size(const HeapWord* addr) const; - bool block_is_obj(const HeapWord* addr) const; - jlong millis_since_last_gc(); - void prepare_for_verify(); - void print_gc_threads_on(outputStream* st) const; - void gc_threads_do(ThreadClosure* tcl) const; - void print_tracing_info() const; - void verify(bool silent, VerifyOption vo); - bool supports_tlab_allocation() const; - virtual size_t tlab_capacity(Thread *thr) const; - void oop_iterate(MemRegion mr, ExtendedOopClosure* ecl); - void object_iterate_since_last_GC(ObjectClosure* cl); - void space_iterate(SpaceClosure* scl); - virtual size_t unsafe_max_tlab_alloc(Thread *thread) const; - virtual size_t max_tlab_size() const; - - void resize_all_tlabs(); - void accumulate_statistics_all_gclabs(); - - HeapWord* tlab_post_allocation_setup(HeapWord* obj); - - uint oop_extra_words(); + Space* space_containing(const void* oop) const; + void gc_prologue(bool b); + void gc_epilogue(bool b); #ifndef CC_INTERP - void compile_prepare_oop(MacroAssembler* masm, Register obj); + void compile_prepare_oop(MacroAssembler* masm, Register obj) /* override */; #endif void register_nmethod(nmethod* nm); void unregister_nmethod(nmethod* nm); - Space* space_containing(const void* oop) const; - void gc_prologue(bool b); - void gc_epilogue(bool b); + void pin_object(oop o) /* override */; + void unpin_object(oop o) /* override */; - void heap_region_iterate(ShenandoahHeapRegionClosure* blk, bool skip_dirty_regions = false, bool skip_humongous_continuation = false) const; + static ShenandoahHeap* heap(); + static ShenandoahHeap* heap_no_check(); + static size_t conservative_max_heap_alignment(); + static address in_cset_fast_test_addr(); + static address cancelled_concgc_addr(); + + static void pretouch_storage(char* start, char* end, WorkGang* workers); + + ShenandoahCollectorPolicy *shenandoahPolicy() { return _shenandoah_policy;} + inline ShenandoahHeapRegion* heap_region_containing(const void* addr) const; inline uint heap_region_index_containing(const void* addr) const; + inline bool requires_marking(const void* entry) const; + template + inline oop maybe_update_oop_ref(T* p); - volatile unsigned int _concurrent_mark_in_progress; - - bool _full_gc_in_progress; - - volatile unsigned int _evacuation_in_progress; - bool _need_update_refs; - bool _need_reset_bitmaps; + void recycle_dirty_regions(); void start_concurrent_marking(); void stop_concurrent_marking(); - ShenandoahConcurrentMark* concurrentMark() { return _scm;} - ShenandoahConcurrentThread* concurrent_thread() { return _concurrent_gc_thread; } - - ShenandoahMonitoringSupport* monitoring_support(); - - ShenandoahJNICritical* jni_critical(); - - size_t bump_object_age(HeapWord* start, HeapWord* end); - - void swap_mark_bitmaps(); - CMBitMap* prev_mark_bit_map(); - CMBitMap* next_mark_bit_map(); - - inline bool mark_current(oop obj) const; - inline bool mark_current_no_checks(oop obj) const; - inline bool is_marked_current(oop obj) const; - inline bool is_marked_current(oop obj, ShenandoahHeapRegion* r) const; - - inline bool is_marked_prev(oop obj) const; - inline bool is_marked_prev(oop obj, const ShenandoahHeapRegion* r) const; - - ReferenceProcessor* _ref_processor; - - inline bool requires_marking(const void* entry) const; - bool is_obj_dead(const oop obj, const ShenandoahHeapRegion* r) const; - - void reset_mark_bitmap(); - void reset_mark_bitmap_range(HeapWord* from, HeapWord* to); - - bool is_bitmap_clear(); - - void mark_object_live(oop obj, bool enqueue); + inline bool concurrent_mark_in_progress(); + static address concurrent_mark_in_progress_addr(); void prepare_for_concurrent_evacuation(); - void do_evacuation(); - void parallel_evacuate(); - - inline void initialize_brooks_ptr(oop p); - - inline oop maybe_update_oop_ref(oop* p); - inline oop maybe_update_oop_ref_not_null(oop* p, oop obj); - inline oop update_oop_ref_not_null(oop* p, oop obj); - - void evacuate_region(ShenandoahHeapRegion* from_region, ShenandoahHeapRegion* to_region); - void parallel_evacuate_region(ShenandoahHeapRegion* from_region); - void verify_evacuated_region(ShenandoahHeapRegion* from_region); - - void print_heap_regions(outputStream* st = tty) const; - - void print_all_refs(const char* prefix); - - void print_heap_objects(HeapWord* start, HeapWord* end); - void print_heap_locations(HeapWord* start, HeapWord* end); - void print_heap_object(oop p); - - inline oop evacuate_object(oop src, Thread* thread); - bool is_in_collection_set(const void* p); - - inline void copy_object(oop p, HeapWord* s, size_t words); - void verify_copy(oop p, oop c); - void verify_heap_size_consistency(); - void verify_heap_after_marking(); - void verify_heap_after_evacuation(); - void verify_heap_after_update_refs(); - void verify_regions_after_update_refs(); - - HeapWord* start_of_heap() { return _first_region_bottom + 1;} - - void cleanup_after_cancelconcgc(); - void increase_used(size_t bytes); - void decrease_used(size_t bytes); - void set_used(size_t bytes); - + void evacuate_and_update_roots(); + inline bool is_evacuation_in_progress(); void set_evacuation_in_progress(bool in_progress); - inline bool is_evacuation_in_progress(); void set_full_gc_in_progress(bool in_progress); bool is_full_gc_in_progress() const; @@ -356,74 +258,155 @@ inline bool need_update_refs() const; void set_need_update_refs(bool update_refs); - ReferenceProcessor* ref_processor() { return _ref_processor;} - virtual void ref_processing_init(); - ShenandoahForwardedIsAliveClosure isAlive; - void evacuate_and_update_roots(); + inline bool region_in_collection_set(size_t region_index) const; - ShenandoahFreeSet* free_regions(); + void set_region_in_collection_set(size_t region_index, bool b); void acquire_pending_refs_lock(); void release_pending_refs_lock(); - int max_workers(); - int max_conc_workers(); - int max_parallel_workers(); - FlexibleWorkGang* conc_workers() const{ return _conc_workers;} + // Mainly there to avoid accidentally calling the templated + // method below with ShenandoahHeapRegion* which would be *wrong*. + inline bool in_collection_set(ShenandoahHeapRegion* r) const; + + template + inline bool in_collection_set(T obj) const; + + void clear_cset_fast_test(); + + inline bool allocated_after_next_mark_start(HeapWord* addr) const; + void set_next_top_at_mark_start(HeapWord* region_base, HeapWord* addr); + HeapWord* next_top_at_mark_start(HeapWord* region_base); + + inline bool allocated_after_complete_mark_start(HeapWord* addr) const; + void set_complete_top_at_mark_start(HeapWord* region_base, HeapWord* addr); + HeapWord* complete_top_at_mark_start(HeapWord* region_base); + + inline oop evacuate_object(oop src, Thread* thread); + inline bool cancelled_concgc() const; + inline bool try_cancel_concgc() const; + void clear_cancelled_concgc(); ShenandoahHeapRegionSet* regions() { return _ordered_regions;} ShenandoahHeapRegionSet* sorted_regions() { return _sorted_regions;} + ShenandoahFreeSet* free_regions(); + void clear_free_regions(); + void add_free_region(ShenandoahHeapRegion* r); + + void increase_used(size_t bytes); + void decrease_used(size_t bytes); + + void set_used(size_t bytes); + size_t calculateUsed(); + + size_t garbage(); + + void reset_next_mark_bitmap(WorkGang* gang); + void reset_complete_mark_bitmap(WorkGang* gang); + + CMBitMap* complete_mark_bit_map(); + CMBitMap* next_mark_bit_map(); + inline bool is_marked_complete(oop obj) const; + inline bool mark_next(oop obj) const; + inline bool is_marked_next(oop obj) const; + bool is_next_bitmap_clear(); + bool is_complete_bitmap_clear_range(HeapWord* start, HeapWord* end); + + void parallel_evacuate_region(ShenandoahHeapRegion* from_region); + + template + inline oop update_oop_ref_not_null(T* p, oop obj); + + void print_heap_regions(outputStream* st = tty) const; + void print_all_refs(const char* prefix); + void print_heap_locations(HeapWord* start, HeapWord* end); + + size_t bytes_allocated_since_cm(); + void set_bytes_allocated_since_cm(size_t bytes); + + size_t max_allocated_gc(); + + void reclaim_humongous_region_at(ShenandoahHeapRegion* r); + + VirtualSpace* storage() const; + + ShenandoahMonitoringSupport* monitoring_support(); + ShenandoahConcurrentMark* concurrentMark() { return _scm;} + + ReferenceProcessor* ref_processor() { return _ref_processor;} + + WorkGang* conc_workers() const { return _conc_workers;} + WorkGang* workers() const { return _workers;} + + uint max_conc_workers(); + uint max_workers(); + uint max_parallel_workers(); + + void do_evacuation(); + ShenandoahHeapRegion* next_compaction_region(const ShenandoahHeapRegion* r); + + void heap_region_iterate(ShenandoahHeapRegionClosure* blk, bool skip_dirty_regions = false, bool skip_humongous_continuation = false) const; + + void verify_heap_after_evacuation(); + + // Delete entries for dead interned string and clean up unreferenced symbols + // in symbol table, possibly in parallel. + void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true); + size_t num_regions(); size_t max_regions(); - ShenandoahHeapRegion* next_compaction_region(const ShenandoahHeapRegion* r); + // TODO: consider moving this into ShenandoahHeapRegion. - void recycle_dirty_regions(); + template + inline void marked_object_iterate(ShenandoahHeapRegion* region, T* cl); - void register_region_with_in_cset_fast_test(ShenandoahHeapRegion* r) { - assert(_in_cset_fast_test_base != NULL, "sanity"); - assert(r->is_in_collection_set(), "invariant"); - uint index = r->region_number(); - assert(index < _in_cset_fast_test_length, "invariant"); - assert(!_in_cset_fast_test_base[index], "invariant"); - _in_cset_fast_test_base[index] = true; - } - bool in_cset_fast_test(HeapWord* obj) { - assert(_in_cset_fast_test != NULL, "sanity"); - if (is_in(obj)) { - // no need to subtract the bottom of the heap from obj, - // _in_cset_fast_test is biased - uintx index = ((uintx) obj) >> ShenandoahHeapRegion::RegionSizeShift; - bool ret = _in_cset_fast_test[index]; - // let's make sure the result is consistent with what the slower - // test returns - assert( ret || !is_in_collection_set(obj), "sanity"); - assert(!ret || is_in_collection_set(obj), "sanity"); - return ret; - } else { - return false; - } - } + GCTimer* gc_timer() const; + GCTracer* tracer(); - static address in_cset_fast_test_addr() { - return (address) (ShenandoahHeap::heap()->_in_cset_fast_test); - } - - void clear_cset_fast_test() { - assert(_in_cset_fast_test_base != NULL, "sanity"); - memset(_in_cset_fast_test_base, false, - (size_t) _in_cset_fast_test_length * sizeof(bool)); - } - - - inline bool allocated_after_mark_start(HeapWord* addr) const; - void set_top_at_mark_start(HeapWord* region_base, HeapWord* addr); - - GCTracer* tracer(); - ShenandoahCollectionSet* collection_set() { return _collection_set; } - size_t tlab_used(Thread* ignored) const; + void swap_mark_bitmaps(); private: + HeapWord* allocate_new_tlab(size_t word_size, bool mark); + HeapWord* allocate_memory(size_t word_size, bool evacuating); + // Shenandoah functionality. + inline HeapWord* allocate_from_gclab(Thread* thread, size_t size); + HeapWord* allocate_from_gclab_slow(Thread* thread, size_t size); + HeapWord* allocate_new_gclab(size_t word_size); + + void roots_iterate(OopClosure* cl); + + template + inline void do_marked_object(CMBitMap* bitmap, T* cl, oop obj); + + ShenandoahConcurrentThread* concurrent_thread() { return _concurrent_gc_thread; } + + inline bool mark_next_no_checks(oop obj) const; + + void parallel_evacuate(); + + template + inline oop maybe_update_oop_ref_not_null(T* p, oop obj); + + inline oop atomic_compare_exchange_oop(oop n, narrowOop* addr, oop c); + inline oop atomic_compare_exchange_oop(oop n, oop* addr, oop c); + + void evacuate_region(ShenandoahHeapRegion* from_region, ShenandoahHeapRegion* to_region); + +#ifdef ASSERT + void verify_evacuated_region(ShenandoahHeapRegion* from_region); +#endif + + inline void copy_object(oop p, HeapWord* s, size_t words); + void verify_copy(oop p, oop c); + void verify_heap_size_consistency(); + void verify_heap_after_marking(); + void verify_heap_after_update_refs(); + void verify_regions_after_update_refs(); + + void ref_processing_init(); + + ShenandoahCollectionSet* collection_set() { return _collection_set; } bool call_from_write_barrier(bool evacuating); bool check_grow_heap(); @@ -434,19 +417,10 @@ void set_concurrent_mark_in_progress(bool in_progress); void oom_during_evacuation(); - void cancel_concgc(); -public: - inline bool cancelled_concgc() const; - void clear_cancelled_concgc(); + void cancel_concgc(GCCause::Cause cause); + void cancel_concgc(ShenandoahCancelCause cause); + inline void set_cancelled_concgc(bool v); - void shutdown(); - - inline bool concurrent_mark_in_progress(); - static inline address concurrent_mark_in_progress_addr(); - size_t calculateUsed(); - size_t calculateFree(); - -private: void verify_live(); void verify_liveness_after_concurrent_mark(); @@ -455,12 +429,7 @@ void set_from_region_protection(bool protect); -public: - // Delete entries for dead interned string and clean up unreferenced symbols - // in symbol table, possibly in parallel. - void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true); - - void reclaim_humongous_region_at(ShenandoahHeapRegion* r); + const char* cancel_cause_to_string(ShenandoahCancelCause cause); }; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahHeap.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -24,15 +24,17 @@ #ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAP_INLINE_HPP #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAP_INLINE_HPP +#include "gc_implementation/g1/concurrentMark.inline.hpp" #include "memory/threadLocalAllocBuffer.inline.hpp" #include "gc_implementation/shenandoah/brooksPointer.inline.hpp" -#include "gc_implementation/g1/concurrentMark.inline.hpp" #include "gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.hpp" #include "gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp" #include "gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp" #include "oops/oop.inline.hpp" -#include "runtime/atomic.inline.hpp" +#include "runtime/atomic.hpp" +#include "runtime/prefetch.hpp" +#include "runtime/prefetch.inline.hpp" #include "utilities/copy.hpp" /* @@ -40,7 +42,7 @@ * been marked by this thread. Returns false if the object has already been marked, * or if a competing thread succeeded in marking this object. */ -inline bool ShenandoahHeap::mark_current(oop obj) const { +inline bool ShenandoahHeap::mark_next(oop obj) const { #ifdef ASSERT if (! oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj))) { tty->print_cr("heap region containing obj:"); @@ -53,32 +55,22 @@ #endif assert(oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)), "only mark forwarded copy of objects"); - return mark_current_no_checks(obj); + return mark_next_no_checks(obj); } -inline bool ShenandoahHeap::mark_current_no_checks(oop obj) const { +inline bool ShenandoahHeap::mark_next_no_checks(oop obj) const { HeapWord* addr = (HeapWord*) obj; - return (! allocated_after_mark_start(addr)) && _next_mark_bit_map->parMark(addr); + return (! allocated_after_next_mark_start(addr)) && _next_mark_bit_map->parMark(addr); } -inline bool ShenandoahHeap::is_marked_current(oop obj) const { +inline bool ShenandoahHeap::is_marked_next(oop obj) const { HeapWord* addr = (HeapWord*) obj; - return allocated_after_mark_start(addr) || _next_mark_bit_map->isMarked(addr); + return allocated_after_next_mark_start(addr) || _next_mark_bit_map->isMarked(addr); } -inline bool ShenandoahHeap::is_marked_current(oop obj, ShenandoahHeapRegion* r) const { +inline bool ShenandoahHeap::is_marked_complete(oop obj) const { HeapWord* addr = (HeapWord*) obj; - return _next_mark_bit_map->isMarked(addr) || r->allocated_after_mark_start(addr); -} - -inline bool ShenandoahHeap::is_marked_prev(oop obj) const { - ShenandoahHeapRegion* r = heap_region_containing((void*) obj); - return is_marked_prev(obj, r); -} - -inline bool ShenandoahHeap::is_marked_prev(oop obj, const ShenandoahHeapRegion* r) const { - HeapWord* addr = (HeapWord*) obj; - return _prev_mark_bit_map->isMarked(addr) || r->allocated_after_prev_mark_start(addr); + return allocated_after_complete_mark_start(addr) || _complete_mark_bit_map->isMarked(addr); } inline bool ShenandoahHeap::need_update_refs() const { @@ -90,7 +82,12 @@ uintptr_t index = (region_start - (uintptr_t) _first_region_bottom) >> ShenandoahHeapRegion::RegionSizeShift; #ifdef ASSERT if (!(index < _num_regions)) { - tty->print_cr("heap region does not contain address, first_region_bottom: "PTR_FORMAT", real bottom of first region: "PTR_FORMAT", num_regions: "SIZE_FORMAT", region_size: "SIZE_FORMAT, p2i(_first_region_bottom), p2i(_ordered_regions->get(0)->bottom()), _num_regions, ShenandoahHeapRegion::RegionSizeBytes); + tty->print_cr("heap region does not contain address, first_region_bottom: "PTR_FORMAT \ + ", real bottom of first region: "PTR_FORMAT", num_regions: "SIZE_FORMAT", region_size: "SIZE_FORMAT, + p2i(_first_region_bottom), + p2i(_ordered_regions->get(0)->bottom()), + _num_regions, + ShenandoahHeapRegion::RegionSizeBytes); } #endif assert(index < _num_regions, "heap region index must be in range"); @@ -102,20 +99,24 @@ ShenandoahHeapRegion* result = _ordered_regions->get(index); #ifdef ASSERT if (!(addr >= result->bottom() && addr < result->end())) { - tty->print_cr("heap region does not contain address, first_region_bottom: "PTR_FORMAT", real bottom of first region: "PTR_FORMAT", num_regions: "SIZE_FORMAT, p2i(_first_region_bottom), p2i(_ordered_regions->get(0)->bottom()), _num_regions); + tty->print_cr("heap region does not contain address, first_region_bottom: "PTR_FORMAT \ + ", real bottom of first region: "PTR_FORMAT", num_regions: "SIZE_FORMAT, + p2i(_first_region_bottom), + p2i(_ordered_regions->get(0)->bottom()), + _num_regions); } #endif assert(addr >= result->bottom() && addr < result->end(), "address must be in found region"); return result; } -inline oop ShenandoahHeap::update_oop_ref_not_null(oop* p, oop obj) { - if (in_cset_fast_test((HeapWord*) obj)) { +template +inline oop ShenandoahHeap::update_oop_ref_not_null(T* p, oop obj) { + if (in_collection_set(obj)) { oop forw = ShenandoahBarrierSet::resolve_oop_static_not_null(obj); assert(! oopDesc::unsafe_equals(forw, obj) || is_full_gc_in_progress(), "expect forwarded object"); obj = forw; - assert(obj->is_oop(), "sanity"); - oopDesc::store_heap_oop(p, obj); + oopDesc::encode_store_heap_oop(p, obj); } #ifdef ASSERT else { @@ -125,18 +126,31 @@ return obj; } -inline oop ShenandoahHeap::maybe_update_oop_ref(oop* p) { - oop obj = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(obj)) { +template +inline oop ShenandoahHeap::maybe_update_oop_ref(T* p) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); return maybe_update_oop_ref_not_null(p, obj); } else { - return obj; + return NULL; } } -inline oop ShenandoahHeap::maybe_update_oop_ref_not_null(oop* p, oop heap_oop) { +inline oop ShenandoahHeap::atomic_compare_exchange_oop(oop n, oop* addr, oop c) { + return (oop) Atomic::cmpxchg_ptr(n, addr, c); +} - assert((! is_in(p)) || (! heap_region_containing(p)->is_in_collection_set()) +inline oop ShenandoahHeap::atomic_compare_exchange_oop(oop n, narrowOop* addr, oop c) { + narrowOop cmp = oopDesc::encode_heap_oop(c); + narrowOop val = oopDesc::encode_heap_oop(n); + return oopDesc::decode_heap_oop((narrowOop) Atomic::cmpxchg(val, addr, cmp)); +} + +template +inline oop ShenandoahHeap::maybe_update_oop_ref_not_null(T* p, oop heap_oop) { + + assert((! is_in(p)) || (! in_collection_set(p)) || is_full_gc_in_progress(), "never update refs in from-space, unless evacuation has been cancelled"); @@ -148,16 +162,19 @@ } #endif assert(is_in(heap_oop), "only ever call this on objects in the heap"); - if (in_cset_fast_test((HeapWord*) heap_oop)) { + if (in_collection_set(heap_oop)) { oop forwarded_oop = ShenandoahBarrierSet::resolve_oop_static_not_null(heap_oop); // read brooks ptr assert(! oopDesc::unsafe_equals(forwarded_oop, heap_oop) || is_full_gc_in_progress(), "expect forwarded object"); - // tty->print_cr("updating old ref: "PTR_FORMAT" pointing to "PTR_FORMAT" to new ref: "PTR_FORMAT, p2i(p), p2i(heap_oop), p2i(forwarded_oop)); + + log_develop_trace(gc)("Updating old ref: "PTR_FORMAT" pointing to "PTR_FORMAT" to new ref: "PTR_FORMAT, + p2i(p), p2i(heap_oop), p2i(forwarded_oop)); + assert(forwarded_oop->is_oop(), "oop required"); assert(is_in(forwarded_oop), "forwardee must be in heap"); assert(oopDesc::bs()->is_safe(forwarded_oop), "forwardee must not be in collection set"); // If this fails, another thread wrote to p before us, it will be logged in SATB and the // reference be updated later. - oop result = (oop) Atomic::cmpxchg_ptr(forwarded_oop, p, heap_oop); + oop result = atomic_compare_exchange_oop(forwarded_oop, p, heap_oop); if (oopDesc::unsafe_equals(result, heap_oop)) { // CAS successful. return forwarded_oop; @@ -165,14 +182,22 @@ return NULL; } } else { - assert(oopDesc::unsafe_equals(heap_oop, ShenandoahBarrierSet::resolve_oop_static_not_null(heap_oop)), "expect not forwarded"); + assert(oopDesc::unsafe_equals(heap_oop, ShenandoahBarrierSet::resolve_oop_static_not_null(heap_oop)), + "expect not forwarded"); return heap_oop; } } inline bool ShenandoahHeap::cancelled_concgc() const { - bool cancelled = _cancelled_concgc; - return cancelled; + return (jbyte) OrderAccess::load_acquire((jbyte*) &_cancelled_concgc); +} + +inline bool ShenandoahHeap::try_cancel_concgc() const { + return Atomic::cmpxchg(true, (jbyte*) &_cancelled_concgc, false) == false; +} + +inline void ShenandoahHeap::set_cancelled_concgc(bool v) { + OrderAccess::release_store_fence((jbyte*) &_cancelled_concgc, (jbyte) v); } inline HeapWord* ShenandoahHeap::allocate_from_gclab(Thread* thread, size_t size) { @@ -188,53 +213,38 @@ } } -inline void ShenandoahHeap::initialize_brooks_ptr(oop p) { - BrooksPointer brooks_ptr = BrooksPointer::get(p); - brooks_ptr.set_forwardee(p); -} - inline void ShenandoahHeap::copy_object(oop p, HeapWord* s, size_t words) { - HeapWord* filler = s; assert(s != NULL, "allocation of brooks pointer must not fail"); - HeapWord* copy = s + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + HeapWord* copy = s + BrooksPointer::word_size(); guarantee(copy != NULL, "allocation of copy object must not fail"); Copy::aligned_disjoint_words((HeapWord*) p, copy, words); - initialize_brooks_ptr(oop(copy)); + BrooksPointer::initialize(oop(copy)); -#ifdef ASSERT - if (ShenandoahTraceEvacuations) { - tty->print_cr("copy object from "PTR_FORMAT" to: "PTR_FORMAT, p2i((HeapWord*) p), p2i(copy)); - } -#endif + log_develop_trace(gc, compaction)("copy object from "PTR_FORMAT" to: "PTR_FORMAT, p2i((HeapWord*) p), p2i(copy)); } inline oop ShenandoahHeap::evacuate_object(oop p, Thread* thread) { size_t required; #ifdef ASSERT - ShenandoahHeapRegion* hr; + ShenandoahHeapRegion* hr = NULL; if (ShenandoahVerifyReadsToFromSpace) { hr = heap_region_containing(p); { hr->memProtectionOff(); - required = BrooksPointer::BROOKS_POINTER_OBJ_SIZE + p->size(); + required = BrooksPointer::word_size() + p->size(); hr->memProtectionOn(); } } else { - required = BrooksPointer::BROOKS_POINTER_OBJ_SIZE + p->size(); + required = BrooksPointer::word_size() + p->size(); } #else - required = BrooksPointer::BROOKS_POINTER_OBJ_SIZE + p->size(); + required = BrooksPointer::word_size() + p->size(); #endif assert(! heap_region_containing(p)->is_humongous(), "never evacuate humongous objects"); - // Don't even attempt to evacuate anything if evacuation has been cancelled. - if (_cancelled_concgc) { - return ShenandoahBarrierSet::resolve_oop_static(p); - } - bool alloc_from_gclab = true; HeapWord* filler = allocate_from_gclab(thread, required); if (filler == NULL) { @@ -251,54 +261,73 @@ return resolved; } - HeapWord* copy = filler + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + HeapWord* copy = filler + BrooksPointer::word_size(); #ifdef ASSERT if (ShenandoahVerifyReadsToFromSpace) { hr->memProtectionOff(); - copy_object(p, filler, required - BrooksPointer::BROOKS_POINTER_OBJ_SIZE); + copy_object(p, filler, required - BrooksPointer::word_size()); hr->memProtectionOn(); } else { - copy_object(p, filler, required - BrooksPointer::BROOKS_POINTER_OBJ_SIZE); + copy_object(p, filler, required - BrooksPointer::word_size()); } #else - copy_object(p, filler, required - BrooksPointer::BROOKS_POINTER_OBJ_SIZE); + copy_object(p, filler, required - BrooksPointer::word_size()); #endif - HeapWord* result = BrooksPointer::get(p).cas_forwardee((HeapWord*) p, copy); + oop copy_val = oop(copy); + oop result = BrooksPointer::try_update_forwardee(p, copy_val); oop return_val; - if (result == (HeapWord*) p) { - return_val = oop(copy); + if (oopDesc::unsafe_equals(result, p)) { + return_val = copy_val; + + log_develop_trace(gc, compaction)("Copy of "PTR_FORMAT" to "PTR_FORMAT" succeeded \n", + p2i((HeapWord*) p), p2i(copy)); #ifdef ASSERT - if (ShenandoahTraceEvacuations) { - tty->print("Copy of "PTR_FORMAT" to "PTR_FORMAT" succeeded \n", p2i((HeapWord*) p), p2i(copy)); - } assert(return_val->is_oop(), "expect oop"); - assert(p->klass() == return_val->klass(), err_msg("Should have the same class p: "PTR_FORMAT", copy: "PTR_FORMAT, p2i((HeapWord*) p), p2i((HeapWord*) copy))); + assert(p->klass() == return_val->klass(), err_msg("Should have the same class p: "PTR_FORMAT", copy: "PTR_FORMAT, + p2i((HeapWord*) p), p2i((HeapWord*) copy))); #endif } else { if (alloc_from_gclab) { thread->gclab().rollback(required); } -#ifdef ASSERT - if (ShenandoahTraceEvacuations) { - tty->print_cr("Copy of "PTR_FORMAT" to "PTR_FORMAT" failed, use other: "PTR_FORMAT, p2i((HeapWord*) p), p2i(copy), p2i((HeapWord*) result)); - } -#endif - return_val = (oopDesc*) result; + log_develop_trace(gc, compaction)("Copy of "PTR_FORMAT" to "PTR_FORMAT" failed, use other: "PTR_FORMAT, + p2i((HeapWord*) p), p2i(copy), p2i((HeapWord*) result)); + return_val = result; } return return_val; } inline bool ShenandoahHeap::requires_marking(const void* entry) const { - return ! is_marked_current(oop(entry)); + return ! is_marked_next(oop(entry)); +} + +bool ShenandoahHeap::region_in_collection_set(size_t region_index) const { + return _in_cset_fast_test_base[region_index]; +} + +bool ShenandoahHeap::in_collection_set(ShenandoahHeapRegion* r) const { + return region_in_collection_set(r->region_number()); +} + +template +inline bool ShenandoahHeap::in_collection_set(T p) const { + HeapWord* obj = (HeapWord*) p; + assert(_in_cset_fast_test != NULL, "sanity"); + assert(is_in(obj), "should be in heap"); + + // no need to subtract the bottom of the heap from obj, + // _in_cset_fast_test is biased + uintx index = ((uintx) obj) >> ShenandoahHeapRegion::RegionSizeShift; + return _in_cset_fast_test[index]; } inline bool ShenandoahHeap::concurrent_mark_in_progress() { - return _concurrent_mark_in_progress; + return _concurrent_mark_in_progress != 0; } inline address ShenandoahHeap::concurrent_mark_in_progress_addr() { @@ -306,17 +335,104 @@ } inline bool ShenandoahHeap::is_evacuation_in_progress() { - return _evacuation_in_progress; + return _evacuation_in_progress != 0; } -inline bool ShenandoahHeap::allocated_after_mark_start(HeapWord* addr) const { +inline bool ShenandoahHeap::allocated_after_next_mark_start(HeapWord* addr) const { uintx index = ((uintx) addr) >> ShenandoahHeapRegion::RegionSizeShift; - HeapWord* top_at_mark_start = _top_at_mark_starts[index]; + HeapWord* top_at_mark_start = _next_top_at_mark_starts[index]; bool alloc_after_mark_start = addr >= top_at_mark_start; -#ifdef ASSERT - ShenandoahHeapRegion* r = heap_region_containing(addr); - assert(alloc_after_mark_start == r->allocated_after_mark_start(addr), "sanity"); -#endif return alloc_after_mark_start; } + +inline bool ShenandoahHeap::allocated_after_complete_mark_start(HeapWord* addr) const { + uintx index = ((uintx) addr) >> ShenandoahHeapRegion::RegionSizeShift; + HeapWord* top_at_mark_start = _complete_top_at_mark_starts[index]; + bool alloc_after_mark_start = addr >= top_at_mark_start; + return alloc_after_mark_start; +} + +template +inline void ShenandoahHeap::marked_object_iterate(ShenandoahHeapRegion* region, T* cl) { + assert(BrooksPointer::word_offset() < 0, "skip_delta calculation below assumes the forwarding ptr is before obj"); + + CMBitMap* mark_bit_map = _complete_mark_bit_map; + HeapWord* top_at_mark_start = complete_top_at_mark_start(region->bottom()); + + size_t skip_bitmap_delta = BrooksPointer::word_size() + 1; + size_t skip_objsize_delta = BrooksPointer::word_size() /* + actual obj.size() below */; + HeapWord* start = region->bottom() + BrooksPointer::word_size(); + + HeapWord* limit = region->top(); + HeapWord* end = MIN2(top_at_mark_start + BrooksPointer::word_size(), _ordered_regions->end()); + HeapWord* addr = mark_bit_map->getNextMarkedWordAddress(start, end); + + intx dist = ShenandoahMarkScanPrefetch; + if (dist > 0) { + // Batched scan that prefetches the oop data, anticipating the access to + // either header, oop field, or forwarding pointer. Not that we cannot + // touch anything in oop, while it still being prefetched to get enough + // time for prefetch to work. This is why we try to scan the bitmap linearly, + // disregarding the object size. However, since we know forwarding pointer + // preceeds the object, we can skip over it. Once we cannot trust the bitmap, + // there is no point for prefetching the oop contents, as oop->size() will + // touch it prematurely. + + oop slots[dist]; + bool aborting = false; + int avail; + do { + avail = 0; + for (int c = 0; (c < dist) && (addr < limit); c++) { + Prefetch::read(addr, 1); + oop obj = oop(addr); + slots[avail++] = obj; + if (addr < top_at_mark_start) { + addr += skip_bitmap_delta; + addr = mark_bit_map->getNextMarkedWordAddress(addr, end); + } else { + // cannot trust mark bitmap anymore, finish the current stride, + // and switch to accurate traversal + addr += obj->size() + skip_objsize_delta; + aborting = true; + } + } + + for (int c = 0; c < avail; c++) { + do_marked_object(mark_bit_map, cl, slots[c]); + } + } while (avail > 0 && !aborting); + + // accurate traversal + while (addr < limit) { + oop obj = oop(addr); + int size = obj->size(); + do_marked_object(mark_bit_map, cl, obj); + addr += size + skip_objsize_delta; + } + } else { + while (addr < limit) { + oop obj = oop(addr); + int size = obj->size(); + do_marked_object(mark_bit_map, cl, obj); + addr += size + skip_objsize_delta; + if (addr < top_at_mark_start) { + addr = mark_bit_map->getNextMarkedWordAddress(addr, end); + } + } + } +} + +template +inline void ShenandoahHeap::do_marked_object(CMBitMap* bitmap, T* cl, oop obj) { +#ifdef ASSERT + assert(!oopDesc::is_null(obj), "sanity"); + assert(obj->is_oop(), "sanity"); + assert(is_in(obj), "sanity"); + assert(bitmap == _complete_mark_bit_map, "only iterate completed mark bitmap"); + assert(is_marked_complete(obj), "object expected to be marked"); +#endif + cl->do_object(obj); +} + #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAP_INLINE_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -22,34 +22,30 @@ */ #include "memory/allocation.hpp" -#include "gc_implementation/shared/liveRange.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" -#include "gc_implementation/g1/heapRegionBounds.inline.hpp" #include "memory/space.inline.hpp" #include "memory/universe.hpp" #include "oops/oop.inline.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/os.hpp" +#include "runtime/safepoint.hpp" +Monitor ShenandoahHeapRegion::_mem_protect_lock(Mutex::special, "ShenandoahMemProtect_lock", true); size_t ShenandoahHeapRegion::RegionSizeShift = 0; size_t ShenandoahHeapRegion::RegionSizeBytes = 0; -jint ShenandoahHeapRegion::initialize_heap_region(HeapWord* start, - size_t regionSizeWords, int index) { - - reserved = MemRegion((HeapWord*) start, regionSizeWords); +jint ShenandoahHeapRegion::initialize_heap_region(ShenandoahHeap* heap, HeapWord* start, + size_t regionSizeWords, size_t index) { + _heap = heap; + reserved = MemRegion(start, regionSizeWords); ContiguousSpace::initialize(reserved, true, false); - liveData = 0; - _is_in_collection_set = false; + _live_data = 0; _region_number = index; #ifdef ASSERT _mem_protection_level = 1; // Off, level 1. #endif - _top_at_mark_start = bottom(); - _top_at_prev_mark_start = bottom(); - _top_prev_mark_bitmap = bottom(); return JNI_OK; } @@ -62,26 +58,48 @@ return true; } -void ShenandoahHeapRegion::clearLiveData() { - setLiveData(0); +void ShenandoahHeapRegion::clear_live_data() { + assert(Thread::current()->is_VM_thread(), "by VM thread"); + _live_data = 0; } -void ShenandoahHeapRegion::setLiveData(size_t s) { - Atomic::store_ptr(s, (intptr_t*) &liveData); +void ShenandoahHeapRegion::set_live_data(size_t s) { + assert(Thread::current()->is_VM_thread(), "by VM thread"); + _live_data = s; } -size_t ShenandoahHeapRegion::getLiveData() const { - return liveData; +size_t ShenandoahHeapRegion::get_live_data() const { + assert (sizeof(julong) == sizeof(size_t), "do not read excessively"); + return (size_t)OrderAccess::load_acquire((volatile julong*)&_live_data); } size_t ShenandoahHeapRegion::garbage() const { - assert(used() >= getLiveData() || is_humongous(), err_msg("Live Data must be a subset of used() live: "SIZE_FORMAT" used: "SIZE_FORMAT, getLiveData(), used())); - size_t result = used() - getLiveData(); + assert(used() >= get_live_data() || is_humongous(), err_msg("Live Data must be a subset of used() live: "SIZE_FORMAT" used: "SIZE_FORMAT, + get_live_data(), used())); + size_t result = used() - get_live_data(); return result; } -bool ShenandoahHeapRegion::is_in_collection_set() const { - return _is_in_collection_set; +bool ShenandoahHeapRegion::in_collection_set() const { + return _heap->region_in_collection_set(_region_number); +} + +void ShenandoahHeapRegion::set_in_collection_set(bool b) { + assert(! (is_humongous() && b), "never ever enter a humongous region into the collection set"); + + _heap->set_region_in_collection_set(_region_number, b); + +#ifdef ASSERT + if (ShenandoahVerifyWritesToFromSpace || ShenandoahVerifyReadsToFromSpace) { + if (b) { + memProtectionOn(); + assert(_mem_protection_level == 0, "need to be protected here"); + } else { + assert(_mem_protection_level == 0, "need to be protected here"); + memProtectionOff(); + } + } +#endif } #include @@ -90,10 +108,10 @@ void ShenandoahHeapRegion::memProtectionOn() { /* - tty->print_cr("protect memory on region level: "INT32_FORMAT, _mem_protection_level); + log_develop_trace(gc)("Protect memory on region level: "INT32_FORMAT, _mem_protection_level); print(tty); */ - MutexLockerEx ml(ShenandoahMemProtect_lock, true); + MutexLockerEx ml(&_mem_protect_lock, true); assert(_mem_protection_level >= 1, "invariant"); if (--_mem_protection_level == 0) { @@ -112,7 +130,7 @@ tty->print_cr("unprotect memory on region level: "INT32_FORMAT, _mem_protection_level); print(tty); */ - MutexLockerEx ml(ShenandoahMemProtect_lock, true); + MutexLockerEx ml(&_mem_protect_lock, true); assert(_mem_protection_level >= 0, "invariant"); if (_mem_protection_level++ == 0) { os::protect_memory((char*) bottom(), end() - bottom(), os::MEM_PROT_RW); @@ -121,38 +139,10 @@ #endif -void ShenandoahHeapRegion::set_is_in_collection_set(bool b) { - assert(! (is_humongous() && b), "never ever enter a humongous region into the collection set"); +void ShenandoahHeapRegion::print_on(outputStream* st) const { + st->print("ShenandoahHeapRegion: "PTR_FORMAT"/"SIZE_FORMAT, p2i(this), _region_number); - _is_in_collection_set = b; - - if (b) { - // tty->print_cr("registering region in fast-cset"); - // print(); - ShenandoahHeap::heap()->register_region_with_in_cset_fast_test(this); - } - -#ifdef ASSERT - if (ShenandoahVerifyWritesToFromSpace || ShenandoahVerifyReadsToFromSpace) { - if (b) { - memProtectionOn(); - assert(_mem_protection_level == 0, "need to be protected here"); - } else { - assert(_mem_protection_level == 0, "need to be protected here"); - memProtectionOff(); - } - } -#endif -} - -ByteSize ShenandoahHeapRegion::is_in_collection_set_offset() { - return byte_offset_of(ShenandoahHeapRegion, _is_in_collection_set); -} - -void ShenandoahHeapRegion::print_on(outputStream* st) const { - st->print_cr("ShenandoahHeapRegion: "PTR_FORMAT"/"SIZE_FORMAT, p2i(this), _region_number); - - if (is_in_collection_set()) + if (in_collection_set()) st->print("C"); if (is_humongous_start()) { st->print("H"); @@ -164,7 +154,7 @@ st->print(" "); st->print_cr("live = "SIZE_FORMAT" garbage = "SIZE_FORMAT" bottom = "PTR_FORMAT" end = "PTR_FORMAT" top = "PTR_FORMAT, - getLiveData(), garbage(), p2i(bottom()), p2i(end()), p2i(top())); + get_live_data(), garbage(), p2i(bottom()), p2i(end()), p2i(top())); } @@ -179,10 +169,10 @@ void do_object(oop obj) { - if ((! _skip_unreachable_objects) || _heap->is_marked_current(obj)) { + if ((! _skip_unreachable_objects) || _heap->is_marked_complete(obj)) { #ifdef ASSERT if (_skip_unreachable_objects) { - assert(_heap->is_marked_current(obj), "obj must be live"); + assert(_heap->is_marked_complete(obj), "obj must be live"); } #endif obj->oop_iterate(_cl); @@ -192,52 +182,32 @@ }; void ShenandoahHeapRegion::object_iterate_interruptible(ObjectClosure* blk, bool allow_cancel) { - HeapWord* p = bottom() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; - ShenandoahHeap* heap = ShenandoahHeap::heap(); - while (p < top() && !(allow_cancel && heap->cancelled_concgc())) { + HeapWord* p = bottom() + BrooksPointer::word_size(); + while (p < top() && !(allow_cancel && _heap->cancelled_concgc())) { blk->do_object(oop(p)); #ifdef ASSERT if (ShenandoahVerifyReadsToFromSpace) { memProtectionOff(); - p += oop(p)->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + p += oop(p)->size() + BrooksPointer::word_size(); memProtectionOn(); } else { - p += oop(p)->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + p += oop(p)->size() + BrooksPointer::word_size(); } #else - p += oop(p)->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + p += oop(p)->size() + BrooksPointer::word_size(); #endif } } -void ShenandoahHeapRegion::marked_object_iterate(ObjectClosure* blk) { - HeapWord* p = bottom(); - ShenandoahHeap* heap = ShenandoahHeap::heap(); - CMBitMap* bitmap = heap->next_mark_bit_map(); - while (p < top()) { - p += BrooksPointer::BROOKS_POINTER_OBJ_SIZE; - p = bitmap->getNextMarkedWordAddress(p, top()); - if (p < top()) { - oop obj = oop(p); - assert(heap->is_marked_current(obj), "must be marked"); - assert(p >= bottom() && p < top(), "must be within region bounds"); - assert(obj->is_oop(), "sanity"); - size_t size = obj->size(); - blk->do_object(obj); - p += size; - } - } -} - HeapWord* ShenandoahHeapRegion::object_iterate_careful(ObjectClosureCareful* blk) { HeapWord * limit = concurrent_iteration_safe_limit(); assert(limit <= top(), "sanity check"); - for (HeapWord* p = bottom() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; p < limit;) { + for (HeapWord* p = bottom() + BrooksPointer::word_size(); p < limit;) { size_t size = blk->do_object_careful(oop(p)); if (size == 0) { return p; // failed at p } else { - p += size + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + p += size + BrooksPointer::word_size(); } } return NULL; // all done @@ -251,11 +221,11 @@ void ShenandoahHeapRegion::fill_region() { ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); - if (free() > (BrooksPointer::BROOKS_POINTER_OBJ_SIZE + CollectedHeap::min_fill_size())) { - HeapWord* filler = allocate(BrooksPointer::BROOKS_POINTER_OBJ_SIZE); + if (free() > (BrooksPointer::word_size() + CollectedHeap::min_fill_size())) { + HeapWord* filler = allocate(BrooksPointer::word_size()); HeapWord* obj = allocate(end() - top()); sh->fill_with_object(obj, end() - obj); - sh->initialize_brooks_ptr(oop(obj)); + BrooksPointer::initialize(oop(obj)); } } @@ -279,38 +249,31 @@ return _humongous_continuation; } -void ShenandoahHeapRegion::do_reset() { +void ShenandoahHeapRegion::recycle() { ContiguousSpace::initialize(reserved, true, false); - clearLiveData(); + clear_live_data(); _humongous_start = false; _humongous_continuation = false; - // _top_at_mark_start = bottom(); - _top_at_prev_mark_start = bottom(); -} - -void ShenandoahHeapRegion::recycle() { - do_reset(); - set_is_in_collection_set(false); -} - -void ShenandoahHeapRegion::reset() { - assert(_mem_protection_level == 1, "needs to be unprotected here"); - do_reset(); - _is_in_collection_set = false; + set_in_collection_set(false); + // Reset C-TAMS pointer to ensure size-based iteration, everything + // in that regions is going to be new objects. + _heap->set_complete_top_at_mark_start(bottom(), bottom()); + // We can only safely reset the C-TAMS pointer if the bitmap is clear for that region. + assert(_heap->is_complete_bitmap_clear_range(bottom(), end()), "must be clear"); } HeapWord* ShenandoahHeapRegion::block_start_const(const void* p) const { assert(MemRegion(bottom(), end()).contains(p), err_msg("p ("PTR_FORMAT") not in space ["PTR_FORMAT", "PTR_FORMAT")", - p2i(p), p2i(bottom()), p2i(end()))); + p2i(p), p2i(bottom()), p2i(end()))); if (p >= top()) { return top(); } else { - HeapWord* last = bottom() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + HeapWord* last = bottom() + BrooksPointer::word_size(); HeapWord* cur = last; while (cur <= p) { last = cur; - cur += oop(cur)->size() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + cur += oop(cur)->size() + BrooksPointer::word_size(); } assert(oop(last)->is_oop(), err_msg(PTR_FORMAT" should be an object start", p2i(last))); @@ -319,11 +282,43 @@ } void ShenandoahHeapRegion::setup_heap_region_size(size_t initial_heap_size, size_t max_heap_size) { - uintx region_size = ShenandoahHeapRegionSize; + uintx region_size; if (FLAG_IS_DEFAULT(ShenandoahHeapRegionSize)) { + if (ShenandoahMinRegionSize > initial_heap_size / MIN_NUM_REGIONS) { + vm_exit_during_initialization("Invalid -XX:ShenandoahMinRegionSize option"); + } + if (ShenandoahMinRegionSize < MIN_REGION_SIZE) { + vm_exit_during_initialization("Invalid -XX:ShenandoahMinRegionSize option"); + } + if (ShenandoahMaxRegionSize < MIN_REGION_SIZE) { + vm_exit_during_initialization("Invalid -XX:ShenandoahMaxRegionSize option"); + } + if (ShenandoahMinRegionSize > ShenandoahMaxRegionSize) { + vm_exit_during_initialization("Invalid -XX:ShenandoahMinRegionSize or -XX:ShenandoahMaxRegionSize"); + } size_t average_heap_size = (initial_heap_size + max_heap_size) / 2; - region_size = MAX2(average_heap_size / HeapRegionBounds::target_number(), - (uintx) HeapRegionBounds::min_size()); + region_size = MAX2(average_heap_size / ShenandoahTargetNumRegions, + ShenandoahMinRegionSize); + + // Now make sure that we don't go over or under our limits. + region_size = MAX2(ShenandoahMinRegionSize, region_size); + region_size = MIN2(ShenandoahMaxRegionSize, region_size); + + } else { + if (ShenandoahHeapRegionSize > initial_heap_size / MIN_NUM_REGIONS) { + vm_exit_during_initialization("Invalid -XX:ShenandoahHeapRegionSize option"); + } + if (ShenandoahHeapRegionSize < MIN_REGION_SIZE) { + vm_exit_during_initialization("Invalid -XX:ShenandoahHeapRegionSize option"); + } + region_size = ShenandoahHeapRegionSize; + } + + // Make sure region size is at least one large page, if enabled. + // Otherwise, mem-protecting one region may falsely protect the adjacent + // regions too. + if (UseLargePages) { + region_size = MAX2(region_size, os::large_page_size()); } int region_size_log = log2_long((jlong) region_size); @@ -332,16 +327,6 @@ // <= what we've calculated so far. region_size = ((uintx)1 << region_size_log); - // Now make sure that we don't go over or under our limits. - if (region_size < HeapRegionBounds::min_size()) { - region_size = HeapRegionBounds::min_size(); - } else if (region_size > HeapRegionBounds::max_size()) { - region_size = HeapRegionBounds::max_size(); - } - - // And recalculate the log. - region_size_log = log2_long((jlong) region_size); - // Now, set up the globals. guarantee(RegionSizeShift == 0, "we should only set it once"); RegionSizeShift = region_size_log; @@ -349,51 +334,27 @@ guarantee(RegionSizeBytes == 0, "we should only set it once"); RegionSizeBytes = (size_t)region_size; - if (ShenandoahLogConfig) { - tty->print_cr("Region size in bytes: "SIZE_FORMAT, RegionSizeBytes); - tty->print_cr("Region size shift: "SIZE_FORMAT, RegionSizeShift); - tty->print_cr("Initial number of regions: "SIZE_FORMAT, initial_heap_size / RegionSizeBytes); - tty->print_cr("Maximum number of regions: "SIZE_FORMAT, max_heap_size / RegionSizeBytes); - } + log_info(gc, heap)("Heap region size: " SIZE_FORMAT "M", RegionSizeBytes / M); + log_info(gc, init)("Region size in bytes: "SIZE_FORMAT, RegionSizeBytes); + log_info(gc, init)("Region size shift: "SIZE_FORMAT, RegionSizeShift); + log_info(gc, init)("Initial number of regions: "SIZE_FORMAT, initial_heap_size / RegionSizeBytes); + log_info(gc, init)("Maximum number of regions: "SIZE_FORMAT, max_heap_size / RegionSizeBytes); } -void ShenandoahHeapRegion::init_top_at_mark_start() { - _top_at_mark_start = top(); - ShenandoahHeap::heap()->set_top_at_mark_start(bottom(), top()); +void ShenandoahHeapRegion::pin() { + assert(! SafepointSynchronize::is_at_safepoint(), "only outside safepoints"); + assert(_critical_pins >= 0, "sanity"); + Atomic::inc(&_critical_pins); } -void ShenandoahHeapRegion::set_top_at_mark_start(HeapWord* top) { - _top_at_mark_start = top; - ShenandoahHeap::heap()->set_top_at_mark_start(bottom(), top); +void ShenandoahHeapRegion::unpin() { + assert(! SafepointSynchronize::is_at_safepoint(), "only outside safepoints"); + Atomic::dec(&_critical_pins); + assert(_critical_pins >= 0, "sanity"); } -void ShenandoahHeapRegion::reset_top_at_prev_mark_start() { - _top_at_prev_mark_start = bottom(); +bool ShenandoahHeapRegion::is_pinned() { + assert(_critical_pins >= 0, "sanity"); + assert(SafepointSynchronize::is_at_safepoint(), "only at safepoints"); + return _critical_pins > 0; } - -HeapWord* ShenandoahHeapRegion::top_at_mark_start() { - return _top_at_mark_start; -} - -HeapWord* ShenandoahHeapRegion::top_at_prev_mark_start() { - return _top_at_prev_mark_start; -} - -HeapWord* ShenandoahHeapRegion::top_prev_mark_bitmap() { - return _top_prev_mark_bitmap; -} - -bool ShenandoahHeapRegion::allocated_after_prev_mark_start(HeapWord* addr) const { - return addr >= _top_at_prev_mark_start; -} - -void ShenandoahHeapRegion::swap_top_at_mark_start() { - HeapWord* tmp = _top_at_prev_mark_start; - _top_at_prev_mark_start = _top_at_mark_start; - _top_at_mark_start = tmp; - ShenandoahHeap::heap()->set_top_at_mark_start(bottom(), tmp); -} - -void ShenandoahHeapRegion::set_top_prev_mark_bitmap(HeapWord* top) { - _top_prev_mark_bitmap = top; -} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -25,29 +25,30 @@ #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAPREGION_HPP #include "memory/space.hpp" -#include "memory/universe.hpp" -#include "utilities/sizes.hpp" class ShenandoahHeapRegion : public ContiguousSpace { +private: + static const size_t MIN_REGION_SIZE = 256*K; + static const size_t MIN_NUM_REGIONS = 10; + + static Monitor _mem_protect_lock; public: static size_t RegionSizeBytes; static size_t RegionSizeShift; private: + ShenandoahHeap* _heap; size_t _region_number; - volatile size_t liveData; + volatile size_t _live_data; MemRegion reserved; - bool _is_in_collection_set; bool _humongous_start; bool _humongous_continuation; - HeapWord* _top_at_mark_start; - HeapWord* _top_at_prev_mark_start; - HeapWord* _top_prev_mark_bitmap; + HeapWord* _new_top; - HeapWord* _new_top; + volatile jint _critical_pins; #ifdef ASSERT int _mem_protection_level; @@ -56,7 +57,7 @@ public: static void setup_heap_region_size(size_t initial_heap_size, size_t max_heap_size); - jint initialize_heap_region(HeapWord* start, size_t regionSize, int index); + jint initialize_heap_region(ShenandoahHeap* heap, HeapWord* start, size_t regionSize, size_t index); size_t region_number() const; @@ -65,23 +66,20 @@ // Returns TRUE when successful, FALSE if not successful or not supported. bool rollback_allocation(uint size); - void clearLiveData(); - void setLiveData(size_t s); + void clear_live_data(); + void set_live_data(size_t s); inline void increase_live_data(size_t s); - size_t getLiveData() const; + size_t get_live_data() const; void print_on(outputStream* st) const; size_t garbage() const; void recycle(); - void reset(); void oop_iterate_skip_unreachable(ExtendedOopClosure* cl, bool skip_unreachable_objects); - void marked_object_iterate(ObjectClosure* blk); - void object_iterate_interruptible(ObjectClosure* blk, bool allow_cancel); HeapWord* object_iterate_careful(ObjectClosureCareful* cl); @@ -91,9 +89,9 @@ // Just before GC we need to fill the current region. void fill_region(); - bool is_in_collection_set() const; + bool in_collection_set() const; - void set_is_in_collection_set(bool b); + void set_in_collection_set(bool b); void set_humongous_start(bool start); void set_humongous_continuation(bool continuation); @@ -107,31 +105,18 @@ void memProtectionOff(); #endif - static ByteSize is_in_collection_set_offset(); // The following are for humongous regions. We need to save the markOop saved_mark_word; void save_mark_word(oop obj) {saved_mark_word = obj->mark();} markOop mark_word() {return saved_mark_word;} - - void init_top_at_mark_start(); - void set_top_at_mark_start(HeapWord* top); - HeapWord* top_at_mark_start(); - void reset_top_at_prev_mark_start(); - HeapWord* top_at_prev_mark_start(); - HeapWord* top_prev_mark_bitmap(); - - void set_top_prev_mark_bitmap(HeapWord* top); - void swap_top_at_mark_start(); - - inline bool allocated_after_mark_start(HeapWord* addr); - bool allocated_after_prev_mark_start(HeapWord* addr) const; - void set_new_top(HeapWord* new_top) { _new_top = new_top; } HeapWord* new_top() const { return _new_top; } -private: - void do_reset(); + void pin(); + void unpin(); + + bool is_pinned(); }; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -25,13 +25,10 @@ #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAPREGION_INLINE_HPP #include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" - -inline bool ShenandoahHeapRegion::allocated_after_mark_start(HeapWord* addr) { - return addr >= _top_at_mark_start; -} +#include "runtime/atomic.hpp" inline void ShenandoahHeapRegion::increase_live_data(size_t s) { - size_t new_live_data = (size_t) Atomic::add((jlong) s, (jlong*) &liveData); + size_t new_live_data = (size_t) Atomic::add_ptr(s, (intptr_t*) &_live_data); assert(new_live_data <= used() || is_humongous(), "can't have more live data than used"); } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -21,6 +21,7 @@ * */ +#include "gc_implementation/shenandoah/shenandoahHeap.hpp" #include "gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp" #include "gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp" #include "utilities/quickSort.hpp" @@ -83,7 +84,7 @@ if (skip_humongous_continuation && current->is_humongous_continuation()) { continue; } - if (skip_dirty_regions && current->is_in_collection_set()) { + if (skip_dirty_regions && current->in_collection_set()) { continue; } if (blk->doHeapRegion(current)) { @@ -103,7 +104,7 @@ if (skip_humongous_continuation && current->is_humongous_continuation()) { continue; } - if (skip_dirty_regions && current->is_in_collection_set()) { + if (skip_dirty_regions && current->in_collection_set()) { continue; } if (blk->doHeapRegion(current)) { @@ -119,9 +120,25 @@ active_heap_region_iterate(blk, skip_dirty_regions, skip_humongous_continuation); } -void ShenandoahHeapRegionSet::print() { - tty->print_cr("_current_index: "SIZE_FORMAT" current region: %p, _active_end: "SIZE_FORMAT, _current_index, _regions[_current_index], _active_end); - // Unimplemented(); +class PrintHeapRegionsClosure : public + ShenandoahHeapRegionClosure { +private: + outputStream* _st; +public: + PrintHeapRegionsClosure() : _st(tty) {} + PrintHeapRegionsClosure(outputStream* st) : _st(st) {} + + bool doHeapRegion(ShenandoahHeapRegion* r) { + r->print_on(_st); + return false; + } +}; + +void ShenandoahHeapRegionSet::print(outputStream* out) { + out->print_cr("_current_index: "SIZE_FORMAT" current region: %p, _active_end: "SIZE_FORMAT, _current_index, _regions[_current_index], _active_end); + + PrintHeapRegionsClosure pc1(out); + heap_region_iterate(&pc1, false, false); } ShenandoahHeapRegion* ShenandoahHeapRegionSet::next() { @@ -135,7 +152,7 @@ } ShenandoahHeapRegion* ShenandoahHeapRegionSet::claim_next() { - size_t next = Atomic::add(1, (jlong*) &_current_index) - 1; + size_t next = (size_t) Atomic::add_ptr(1, (intptr_t*) &_current_index) - 1; if (next < _active_end) { return get(next); } else { @@ -152,11 +169,7 @@ FindRegionClosure(ShenandoahHeapRegion* query) : _query(query), _result(false) {} bool doHeapRegion(ShenandoahHeapRegion* r) { - if (r == _query) { - _result = true; - } else { - _result = false; - } + _result = (r == _query); return _result; } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -30,6 +30,8 @@ class ShenandoahHeapRegion; +extern outputStream* tty; + class ShenandoahHeapRegionClosure : public StackObj { bool _complete; void incomplete() {_complete = false;} @@ -81,7 +83,7 @@ QuickSort::sort(_regions, _active_end, comparator, false); } - void print(); + void print(outputStream* out = tty); public: void heap_region_iterate(ShenandoahHeapRegionClosure* blk, diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahJNICritical.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahJNICritical.cpp Fri Nov 04 07:21:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2015, Red Hat, Inc. and/or its affiliates. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "gc_implementation/shenandoah/shenandoahJNICritical.hpp" -#include "gc_implementation/shenandoah/shenandoahHeap.hpp" - -#include "memory/gcLocker.hpp" -#include "runtime/mutexLocker.hpp" -#include "runtime/thread.hpp" -#include "runtime/vmThread.hpp" - -class VM_ShenandoahJNICriticalOperation : public VM_Operation { -private: - VM_Operation* _target; -public: - VM_ShenandoahJNICriticalOperation(VM_Operation* target); - VMOp_Type type() const; - bool doit_prologue(); - void doit_epilogue(); - void doit(); - const char* name() const; -}; - -ShenandoahJNICritical::ShenandoahJNICritical() : - _op_waiting_for_jni_critical(NULL), - _op_ready_for_execution(NULL) -{ -} - -/* - * This is called by the Java thread who leaves the last JNI critical block. - */ -void ShenandoahJNICritical::notify_jni_critical() { - assert(Thread::current()->is_Java_thread(), "call only from Java thread"); - - assert(_op_waiting_for_jni_critical != NULL, "must be waiting for jni critical notification"); - - if (ShenandoahTraceJNICritical) { - tty->print_cr("Shenandoah JNI critical: waiting until task is ready for re-execution"); - } - - { - MonitorLockerEx ml(ShenandoahJNICritical_lock); - while (_op_ready_for_execution == NULL) { - ml.wait(); - } - } - - assert(_op_waiting_for_jni_critical != NULL, "must be waiting for jni critical notification"); - assert(_op_ready_for_execution != NULL, "must be ready for re-execution"); - - if (ShenandoahTraceJNICritical) { - tty->print_cr("Shenandoah JNI critical: re-executing VM task after JNI critical notification"); - } - - VMThread::execute(_op_ready_for_execution); - - { - MonitorLockerEx ml(ShenandoahJNICritical_lock, Mutex::_no_safepoint_check_flag); - _op_waiting_for_jni_critical = NULL; - _op_ready_for_execution = NULL; - ml.notify(); - } - - if (ShenandoahTraceJNICritical) { - tty->print_cr("Shenandoah JNI critical: resuming Java thread after VM task re-execution"); - } - -} - -/* - * This is called by the VM thread, if it determines that the task must wait - * for JNI critical regions to be left. - */ -void ShenandoahJNICritical::set_waiting_for_jni_before_gc(VM_Operation* op) { - assert(Thread::current()->is_VM_thread(), "call only from VM thread"); - _op_waiting_for_jni_critical = op; -} - -/** - * This is called by the Shenandoah concurrent thread in order - * to execute a VM_Operation on the VM thread, that needs to perform - * a JNI critical region check. - */ -void ShenandoahJNICritical::execute_in_vm_thread(VM_Operation* op) { - assert(_op_waiting_for_jni_critical == NULL, "start out with no waiting op"); - assert(_op_ready_for_execution == NULL, "start out with no ready op"); - VM_ShenandoahJNICriticalOperation jni_op(op); - VMThread::execute(&jni_op); - - { - MonitorLockerEx ml(ShenandoahJNICritical_lock, Mutex::_no_safepoint_check_flag); - - if (_op_waiting_for_jni_critical != NULL) { - if (ShenandoahTraceJNICritical) { - tty->print_cr("Shenandoah JNI critical: make task ready for re-execution"); - } - - _op_ready_for_execution = _op_waiting_for_jni_critical; - ml.notify(); - - if (ShenandoahTraceJNICritical) { - tty->print_cr("Shenandoah JNI critical: waiting for task to get re-executed"); - } - - while (_op_ready_for_execution != NULL) { - ml.wait(Mutex::_no_safepoint_check_flag); - } - - if (ShenandoahTraceJNICritical) { - tty->print_cr("Shenandoah JNI critical: resuming concurrent GC thread after task has been re-executed"); - } - } - } - - assert(_op_waiting_for_jni_critical == NULL, "finish with no waiting op"); - assert(_op_ready_for_execution == NULL, "finish with no ready op"); -} - - -VM_ShenandoahJNICriticalOperation::VM_ShenandoahJNICriticalOperation(VM_Operation* target) - : _target(target) { -} - -VM_Operation::VMOp_Type VM_ShenandoahJNICriticalOperation::type() const { - return _target->type(); -} - -const char* VM_ShenandoahJNICriticalOperation::name() const { - return _target->name(); -} - -bool VM_ShenandoahJNICriticalOperation::doit_prologue() { - return _target->doit_prologue(); -} - -void VM_ShenandoahJNICriticalOperation::doit_epilogue() { - _target->doit_epilogue(); -} - -void VM_ShenandoahJNICriticalOperation::doit() { - if (! GC_locker::check_active_before_gc()) { - _target->doit(); - } else { - - if (ShenandoahTraceJNICritical) { - tty->print_cr("Shenandoah JNI critical: Deferring JNI critical op because of active JNI critical regions"); - } - - // This makes the GC background thread wait, and kick off evacuation as - // soon as JNI notifies us that critical regions have all been left. - ShenandoahHeap *sh = ShenandoahHeap::heap(); - sh->jni_critical()->set_waiting_for_jni_before_gc(this); - } -} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahJNICritical.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahJNICritical.hpp Fri Nov 04 07:21:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2015, Red Hat, Inc. and/or its affiliates. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHJNICRITICAL_HPP -#define SHARE_VM_GC_SHENANDOAH_SHENANDOAHJNICRITICAL_HPP - -#include "gc_implementation/shared/vmGCOperations.hpp" -#include "memory/allocation.hpp" - -class ShenandoahJNICritical : public CHeapObj { -private: - VM_Operation* _op_waiting_for_jni_critical; - VM_Operation* _op_ready_for_execution; -public: - ShenandoahJNICritical(); - void notify_jni_critical(); - void set_waiting_for_jni_before_gc(VM_Operation* op); - void execute_in_vm_thread(VM_Operation* op); -}; - - -#endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHJNICRITICAL_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahLogging.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahLogging.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,13 @@ + +#define log_trace(...) if (ShenandoahLogTrace) gclog_or_tty->print_cr +#define log_debug(...) if (ShenandoahLogDebug) gclog_or_tty->print_cr +#define log_info(...) if (ShenandoahLogInfo) gclog_or_tty->print_cr +#define log_warning(...) if (ShenandoahLogInfo) gclog_or_tty->print_cr + +#ifndef PRODUCT +#define log_develop_trace(...) if (ShenandoahLogTrace) gclog_or_tty->print_cr +#define log_develop_debug(...) if (ShenandoahLogDebug) gclog_or_tty->print_cr +#else +#define log_develop_trace(...) +#define log_develop_debug(...) +#endif diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -23,6 +23,7 @@ #include "code/codeCache.hpp" #include "gc_implementation/shared/gcTraceTime.hpp" +#include "gc_implementation/shared/gcTimer.hpp" #include "gc_implementation/shared/isGCActiveMark.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" @@ -56,31 +57,40 @@ return true; } bool is_safe(narrowOop o) { - Unimplemented(); - return true; + oop obj = oopDesc::decode_heap_oop(o); + return is_safe(obj); } #endif }; class ClearInCollectionSetHeapRegionClosure: public ShenandoahHeapRegionClosure { +private: + ShenandoahHeap* _heap; +public: + + ClearInCollectionSetHeapRegionClosure() : _heap(ShenandoahHeap::heap()) { + } + bool doHeapRegion(ShenandoahHeapRegion* r) { - r->set_top_at_mark_start(r->end()); - r->clearLiveData(); + _heap->set_next_top_at_mark_start(r->bottom(), r->top()); + r->clear_live_data(); r->set_concurrent_iteration_safe_limit(r->top()); - r->set_top_prev_mark_bitmap(r->top_at_mark_start()); return false; } }; +STWGCTimer* ShenandoahMarkCompact::_gc_timer = NULL; -void ShenandoahMarkCompact::do_mark_compact() { +void ShenandoahMarkCompact::initialize() { + _gc_timer = new (ResourceObj::C_HEAP, mtGC) STWGCTimer(); +} + +void ShenandoahMarkCompact::do_mark_compact(GCCause::Cause gc_cause) { ShenandoahHeap* _heap = ShenandoahHeap::heap(); + ShenandoahCollectorPolicy* policy = _heap->shenandoahPolicy(); - GCTimer* gc_timer = _heap->shenandoahPolicy()->conc_timer(); - gc_timer->register_gc_start(); - - COMPILER2_PRESENT(DerivedPointerTable::clear()); + _gc_timer->register_gc_start(); _heap->set_full_gc_in_progress(true); @@ -88,19 +98,15 @@ IsGCActiveMark is_active; assert(Thread::current()->is_VM_thread(), "Do full GC only while world is stopped"); - assert(_heap->is_bitmap_clear(), "require cleared bitmap"); + assert(_heap->is_next_bitmap_clear(), "require cleared bitmap"); assert(!_heap->concurrent_mark_in_progress(), "can't do full-GC while marking is in progress"); assert(!_heap->is_evacuation_in_progress(), "can't do full-GC while evacuation is in progress"); - _heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::full_gc); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc); ClearInCollectionSetHeapRegionClosure cl; _heap->heap_region_iterate(&cl, false, false); - _heap->clear_cancelled_concgc(); - - assert(_heap->is_bitmap_clear(), "require cleared bitmap"); - /* if (ShenandoahVerify) { // Full GC should only be called between regular concurrent cycles, therefore @@ -115,7 +121,7 @@ oopDesc::set_bs(&bs); { - GCTraceTime time("Pause Init-Mark", ShenandoahTraceFullGC, true, _heap->shenandoahPolicy()->conc_timer(), _heap->tracer()->gc_id()); + GCTraceTime time("Pause Full", ShenandoahLogInfo, true, _gc_timer, _heap->tracer()->gc_id()); if (UseTLAB) { _heap->ensure_parsability(true); @@ -131,21 +137,26 @@ OrderAccess::fence(); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_mark); phase1_mark_heap(); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_mark); OrderAccess::fence(); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_calculate_addresses); ShenandoahHeapRegionSet* copy_queues[_heap->max_parallel_workers()]; phase2_calculate_target_addresses(copy_queues); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_calculate_addresses); OrderAccess::fence(); - // Don't add any more derived pointers during phase3 - COMPILER2_PRESENT(DerivedPointerTable::set_active(false)); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_adjust_pointers); + phase3_update_references(); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_adjust_pointers); - phase3_update_references(); - + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_copy_objects); phase4_compact_objects(copy_queues); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_copy_objects); CodeCache::gc_epilogue(); JvmtiExport::gc_epilogue(); @@ -157,50 +168,55 @@ _heap->verify_heap_after_evacuation(); } - _heap->reset_mark_bitmap(); - _heap->_bytesAllocSinceCM = 0; + _heap->set_bytes_allocated_since_cm(0); _heap->set_need_update_refs(false); _heap->set_full_gc_in_progress(false); } - COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + _gc_timer->register_gc_end(); - gc_timer->register_gc_end(); - - _heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::full_gc); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc); oopDesc::set_bs(old_bs); } #ifdef ASSERT class VerifyNotForwardedPointersClosure : public MetadataAwareOopClosure { - void do_oop(oop* p) { - oop obj = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(obj)) { +private: + template + inline void do_oop_work(T* p) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), "expect forwarded oop"); ShenandoahHeap* heap = ShenandoahHeap::heap(); - if (! heap->is_marked_current(obj)) { + if (! heap->is_marked_complete(obj)) { tty->print_cr("ref region humongous? %s", BOOL_TO_STR(heap->heap_region_containing(p)->is_humongous())); } - assert(heap->is_marked_current(obj), "must be marked"); - assert(! heap->allocated_after_mark_start((HeapWord*) obj), "must be truly marked"); + assert(heap->is_marked_complete(obj), "must be marked"); + assert(! heap->allocated_after_complete_mark_start((HeapWord*) obj), "must be truly marked"); } } +public: + void do_oop(oop* p) { + do_oop_work(p); + } void do_oop(narrowOop* p) { - Unimplemented(); + do_oop_work(p); } }; class ShenandoahMCVerifyAfterMarkingObjectClosure : public ObjectClosure { +public: void do_object(oop p) { ShenandoahHeap* heap = ShenandoahHeap::heap(); assert(oopDesc::unsafe_equals(p, ShenandoahBarrierSet::resolve_oop_static_not_null(p)), "expect forwarded oop"); - assert(heap->is_marked_current(p), "must be marked"); - assert(! heap->allocated_after_mark_start((HeapWord*) p), "must be truly marked"); + assert(heap->is_marked_complete(p), "must be marked"); + assert(! heap->allocated_after_complete_mark_start((HeapWord*) p), "must be truly marked"); VerifyNotForwardedPointersClosure cl; p->oop_iterate(&cl); } @@ -210,60 +226,40 @@ bool doHeapRegion(ShenandoahHeapRegion* r) { ShenandoahMCVerifyAfterMarkingObjectClosure cl; if (! r->is_humongous_continuation()) { - r->marked_object_iterate(&cl); + ShenandoahHeap::heap()->marked_object_iterate(r, &cl); } return false; } }; -class ShenandoahMCVerifyBeforeMarkingObjectClosure : public ObjectClosure { -public: - bool marked; - ShenandoahMCVerifyBeforeMarkingObjectClosure() : ObjectClosure(), marked(false) { - } - void do_object(oop p) { - marked = true; - } -}; - -class ShenandoahMCVerifyBeforeMarkingRegionClosure : public ShenandoahHeapRegionClosure { -public: - bool doHeapRegion(ShenandoahHeapRegion* r) { - ShenandoahMCVerifyBeforeMarkingObjectClosure cl; - if (! r->is_humongous_continuation()) { - r->marked_object_iterate(&cl); - } - assert(! cl.marked, "must not see marked objects"); - return false; - } -}; - #endif void ShenandoahMarkCompact::phase1_mark_heap() { ShenandoahHeap* _heap = ShenandoahHeap::heap(); - - GCTraceTime time("Phase 1: Mark live objects", ShenandoahTraceFullGC, true, _heap->shenandoahPolicy()->conc_timer(), _heap->tracer()->gc_id()); - -#ifdef ASSERT - ShenandoahMCVerifyBeforeMarkingRegionClosure cl1; - _heap->heap_region_iterate(&cl1); -#endif + GCTraceTime time("Phase 1: Mark live objects", ShenandoahLogInfo, true, _gc_timer, _heap->tracer()->gc_id()); ShenandoahConcurrentMark* cm = _heap->concurrentMark(); - cm->prepare_unmarked_root_objs_no_derived_ptrs(true); - if (ShenandoahProcessReferences) { - ReferenceProcessor* rp = _heap->ref_processor(); - // enable ("weak") refs discovery - rp->enable_discovery(true /*verify_no_refs*/, true); - rp->setup_policy(true); // snapshot the soft ref policy to be used in this cycle - } - cm->shared_finish_mark_from_roots(); + cm->set_process_references(true); + cm->set_unload_classes(true); + + ReferenceProcessor* rp = _heap->ref_processor(); + // enable ("weak") refs discovery + rp->enable_discovery(true /*verify_no_refs*/, true); + rp->setup_policy(true); // snapshot the soft ref policy to be used in this cycle + rp->set_active_mt_degree(_heap->max_parallel_workers()); + + COMPILER2_PRESENT(DerivedPointerTable::clear()); + cm->update_roots(); + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + + cm->mark_roots(); + cm->shared_finish_mark_from_roots(/* full_gc = */ true); + + _heap->swap_mark_bitmaps(); if (VerifyDuringGC) { HandleMark hm; // handle scope - COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact); // Universe::heap()->prepare_for_verify(); _heap->prepare_for_verify(); // Note: we can verify only the heap here. When an object is @@ -286,6 +282,25 @@ #endif } +class ShenandoahMCReclaimHumongousRegionClosure : public ShenandoahHeapRegionClosure { +private: + ShenandoahHeap* _heap; +public: + ShenandoahMCReclaimHumongousRegionClosure() : _heap(ShenandoahHeap::heap()) { + } + + bool doHeapRegion(ShenandoahHeapRegion* r) { + if (r->is_humongous_start()) { + oop humongous_obj = oop(r->bottom() + BrooksPointer::word_size()); + if (! _heap->is_marked_complete(humongous_obj)) { + _heap->reclaim_humongous_region_at(r); + } + } + return false; + } +}; + + class ShenandoahPrepareForCompactionObjectClosure : public ObjectClosure { private: @@ -318,10 +333,10 @@ } void do_object(oop p) { assert(_from_region != NULL, "must set before work"); - assert(_heap->is_marked_current(p), "must be marked"); - assert(! _heap->allocated_after_mark_start((HeapWord*) p), "must be truly marked"); + assert(_heap->is_marked_complete(p), "must be marked"); + assert(! _heap->allocated_after_complete_mark_start((HeapWord*) p), "must be truly marked"); size_t size = p->size(); - size_t obj_size = size + BrooksPointer::BROOKS_POINTER_OBJ_SIZE; + size_t obj_size = size + BrooksPointer::word_size(); if (_compact_point + obj_size > _to_region->end()) { // Object doesn't fit. Pick next to-region and start compacting there. _to_region->set_new_top(_compact_point); @@ -335,10 +350,9 @@ _compact_point = _to_region->bottom(); } assert(_compact_point + obj_size <= _to_region->end(), "must fit"); - // tty->print_cr("forwarding %p to %p", p, _compact_point + BrooksPointer::BROOKS_POINTER_OBJ_SIZE); assert(oopDesc::unsafe_equals(p, ShenandoahBarrierSet::resolve_oop_static_not_null(p)), "expect forwarded oop"); - BrooksPointer::get(p).set_forwardee(oop(_compact_point + BrooksPointer::BROOKS_POINTER_OBJ_SIZE)); + BrooksPointer::set_raw(p, _compact_point + BrooksPointer::word_size()); _compact_point += obj_size; } }; @@ -351,12 +365,13 @@ ShenandoahHeapRegion* next_from_region(ShenandoahHeapRegionSet* copy_queue) { ShenandoahHeapRegion* from_region = _from_regions->claim_next(); - while (from_region != NULL && from_region->is_humongous()) { + while (from_region != NULL && (from_region->is_humongous() || from_region->is_pinned())) { from_region = _from_regions->claim_next(); } if (from_region != NULL) { assert(copy_queue != NULL, "sanity"); assert(! from_region->is_humongous(), "must not get humongous regions here"); + assert(! from_region->is_pinned(), "no pinned region in mark-compact"); copy_queue->add_region(from_region); } return from_region; @@ -369,6 +384,7 @@ } void work(uint worker_id) { + ShenandoahHeap* heap = ShenandoahHeap::heap(); ShenandoahHeapRegionSet* copy_queue = _copy_queues[worker_id]; ShenandoahHeapRegion* from_region = next_from_region(copy_queue); if (from_region == NULL) return; @@ -377,7 +393,7 @@ while (from_region != NULL) { assert(from_region != NULL, "sanity"); cl.set_from_region(from_region); - from_region->marked_object_iterate(&cl); + heap->marked_object_iterate(from_region, &cl); if (from_region != cl.to_region()) { assert(from_region != NULL, "sanity"); to_regions->add_region(from_region); @@ -400,10 +416,13 @@ void ShenandoahMarkCompact::phase2_calculate_target_addresses(ShenandoahHeapRegionSet** copy_queues) { ShenandoahHeap* heap = ShenandoahHeap::heap(); - GCTraceTime time("Phase 2: Compute new object addresses", ShenandoahTraceFullGC, true, heap->shenandoahPolicy()->conc_timer(), heap->tracer()->gc_id()); + GCTraceTime time("Phase 2: Compute new object addresses", ShenandoahLogInfo, true, _gc_timer, heap->tracer()->gc_id()); + + ShenandoahMCReclaimHumongousRegionClosure cl; + heap->heap_region_iterate(&cl); // Initialize copy queues. - for (int i = 0; i < heap->max_parallel_workers(); i++) { + for (uint i = 0; i < heap->max_parallel_workers(); i++) { copy_queues[i] = new ShenandoahHeapRegionSet(heap->max_regions()); } @@ -422,16 +441,23 @@ ShenandoahAdjustPointersClosure() : _heap(ShenandoahHeap::heap()) { } - void do_oop(oop* p) { - oop obj = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(obj)) { - assert(_heap->is_marked_current(obj), "must be marked"); - oop forw = oop(BrooksPointer::get(obj).get_forwardee()); - oopDesc::store_heap_oop(p, forw); +private: + template + inline void do_oop_work(T* p) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + assert(_heap->is_marked_complete(obj), "must be marked"); + oop forw = oop(BrooksPointer::get_raw(obj)); + oopDesc::encode_store_heap_oop(p, forw); } } +public: + void do_oop(oop* p) { + do_oop_work(p); + } void do_oop(narrowOop* p) { - Unimplemented(); + do_oop_work(p); } }; @@ -444,7 +470,7 @@ _cl(cl), _heap(ShenandoahHeap::heap()) { } void do_object(oop p) { - assert(_heap->is_marked_current(p), "must be marked"); + assert(_heap->is_marked_complete(p), "must be marked"); p->oop_iterate(_cl); } }; @@ -460,12 +486,13 @@ } void work(uint worker_id) { + ShenandoahHeap* heap = ShenandoahHeap::heap(); ShenandoahHeapRegion* r = _regions->claim_next(); ShenandoahAdjustPointersClosure cl; ShenandoahAdjustPointersObjectClosure obj_cl(&cl); while (r != NULL) { if (! r->is_humongous_continuation()) { - r->marked_object_iterate(&obj_cl); + heap->marked_object_iterate(r, &obj_cl); } r = _regions->claim_next(); } @@ -486,38 +513,30 @@ void work(uint worker_id) { ShenandoahAdjustPointersClosure cl; CLDToOopClosure adjust_cld_closure(&cl, true); - CodeBlobToOopClosure adjust_code_closure(&cl, + MarkingCodeBlobClosure adjust_code_closure(&cl, CodeBlobToOopClosure::FixRelocations); - _rp->process_all_roots(&cl, + _rp->process_all_roots(&cl, &cl, &adjust_cld_closure, - &adjust_code_closure); + &adjust_code_closure, worker_id); } }; void ShenandoahMarkCompact::phase3_update_references() { ShenandoahHeap* heap = ShenandoahHeap::heap(); - GCTraceTime time("Phase 3: Adjust pointers", ShenandoahTraceFullGC, true, heap->shenandoahPolicy()->conc_timer(), heap->tracer()->gc_id()); + GCTraceTime time("Phase 2: Adjust pointers", ShenandoahLogInfo, true, _gc_timer, heap->tracer()->gc_id()); - // Need cleared claim bits for the roots processing + // Need cleared claim bits for the roots processing ClassLoaderDataGraph::clear_claimed_marks(); { - heap->set_par_threads(heap->max_parallel_workers()); + COMPILER2_PRESENT(DerivedPointerTable::clear()); ShenandoahRootProcessor rp(heap, heap->max_parallel_workers()); ShenandoahAdjustRootPointersTask task(&rp); heap->workers()->run_task(&task); - heap->set_par_threads(0); + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } - // Now adjust pointers in remaining weak roots. (All of which should - // have been cleared if they pointed to non-surviving objects.) - ShenandoahAdjustPointersClosure cl; - heap->ref_processor()->weak_oops_do(&cl); - - ShenandoahAlwaysTrueClosure always_true; - JNIHandles::weak_oops_do(&always_true, &cl); - ShenandoahHeapRegionSet* regions = heap->regions(); regions->clear_current_index(); ShenandoahAdjustPointersTask adjust_pointers_task(regions); @@ -531,16 +550,16 @@ ShenandoahCompactObjectsClosure() : _heap(ShenandoahHeap::heap()) { } void do_object(oop p) { - assert(_heap->is_marked_current(p), "must be marked"); + assert(_heap->is_marked_complete(p), "must be marked"); size_t size = p->size(); - HeapWord* compact_to = BrooksPointer::get(p).get_forwardee(); + HeapWord* compact_to = BrooksPointer::get_raw(p); HeapWord* compact_from = (HeapWord*) p; if (compact_from != compact_to) { Copy::aligned_conjoint_words(compact_from, compact_to, size); } oop new_obj = oop(compact_to); // new_obj->init_mark(); - _heap->initialize_brooks_ptr(new_obj); + BrooksPointer::initialize(new_obj); } }; @@ -552,13 +571,14 @@ _regions(regions) { } void work(uint worker_id) { + ShenandoahHeap* heap = ShenandoahHeap::heap(); ShenandoahHeapRegionSet* copy_queue = _regions[worker_id]; copy_queue->clear_current_index(); ShenandoahCompactObjectsClosure cl; ShenandoahHeapRegion* r = copy_queue->next(); while (r != NULL) { assert(! r->is_humongous(), "must not get humongous regions here"); - r->marked_object_iterate(&cl); + heap->marked_object_iterate(r, &cl); r->set_top(r->new_top()); r = copy_queue->next(); } @@ -575,54 +595,52 @@ } bool doHeapRegion(ShenandoahHeapRegion* r) { - r->reset_top_at_prev_mark_start(); - r->set_is_in_collection_set(false); + // Need to reset the complete-top-at-mark-start pointer here because + // the complete marking bitmap is no longer valid. This ensures + // size-based iteration in marked_object_iterate(). + _heap->set_complete_top_at_mark_start(r->bottom(), r->bottom()); + r->set_in_collection_set(false); if (r->is_humongous()) { - if (r->is_humongous_start()) { - oop humongous_obj = oop(r->bottom() + BrooksPointer::BROOKS_POINTER_OBJ_SIZE); - if (! _heap->is_marked_current(humongous_obj)) { - _heap->reclaim_humongous_region_at(r); - } else { - _live += ShenandoahHeapRegion::RegionSizeBytes; - } - } else { - _live += ShenandoahHeapRegion::RegionSizeBytes; - } - + _live += ShenandoahHeapRegion::RegionSizeBytes; } else { size_t live = r->used(); if (live == 0) { r->recycle(); _heap->add_free_region(r); } - r->setLiveData(live); + r->set_live_data(live); _live += live; } return false; } - size_t getLive() { return _live;} + size_t get_live() { return _live; } }; void ShenandoahMarkCompact::phase4_compact_objects(ShenandoahHeapRegionSet** copy_queues) { ShenandoahHeap* heap = ShenandoahHeap::heap(); - GCTraceTime time("Phase 4: Move objects", ShenandoahTraceFullGC, true, heap->shenandoahPolicy()->conc_timer(), heap->tracer()->gc_id()); + GCTraceTime time("Phase 4: Move objects", ShenandoahLogInfo, true, _gc_timer, heap->tracer()->gc_id()); ShenandoahCompactObjectsTask compact_task(copy_queues); heap->workers()->run_task(&compact_task); heap->clear_cset_fast_test(); + + // Reset complete bitmap. We're about to reset the complete-top-at-mark-start pointer + // and must ensure the bitmap is in sync. + heap->reset_complete_mark_bitmap(heap->workers()); + ShenandoahPostCompactClosure post_compact; heap->heap_region_iterate(&post_compact); - // We just reset the top-at-prev-mark-start pointer. Thus - // we also need to clear the bitmap, otherwise it would make - // a mess later when clearing the prev bitmap. - heap->prev_mark_bit_map()->clearAll(); + heap->clear_cancelled_concgc(); - heap->set_used(post_compact.getLive()); + // Also clear the next bitmap in preparation for next marking. + heap->reset_next_mark_bitmap(heap->workers()); - for (int i = 0; i < heap->max_parallel_workers(); i++) { + heap->set_used(post_compact.get_live()); + + for (uint i = 0; i < heap->max_parallel_workers(); i++) { delete copy_queues[i]; } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -26,7 +26,7 @@ #include "memory/allocation.hpp" -class HeapWord; +class STWGCTimer; class ShenandoahHeapRegionSet; /** @@ -43,10 +43,12 @@ */ class ShenandoahMarkCompact: AllStatic { +private: + static STWGCTimer* _gc_timer; public: - - static void do_mark_compact(); + static void initialize(); + static void do_mark_compact(GCCause::Cause gc_cause); private: @@ -54,7 +56,6 @@ static void phase2_calculate_target_addresses(ShenandoahHeapRegionSet** copy_queues); static void phase3_update_references(); static void phase4_compact_objects(ShenandoahHeapRegionSet** copy_queues); - static void finish_compaction(HeapWord* last_addr); static void allocate_stacks(); }; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -55,14 +55,14 @@ return _stw_collection_counters; } +CollectorCounters* ShenandoahMonitoringSupport::full_collection_counters() { + return _full_collection_counters; +} + CollectorCounters* ShenandoahMonitoringSupport::concurrent_collection_counters() { return _concurrent_collection_counters; } -CollectorCounters* ShenandoahMonitoringSupport::full_collection_counters() { - return _full_collection_counters; -} - void ShenandoahMonitoringSupport::update_counters() { MemoryService::track_memory_usage(); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -24,20 +24,22 @@ #ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHOOPCLOSURES_HPP #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHOOPCLOSURES_HPP +#include "gc_implementation/shenandoah/shenandoahTaskqueue.hpp" + +typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; +typedef Padded SCMObjToScanQueue; + class ShenandoahHeap; -class QHolder; class ShenandoahMarkUpdateRefsClosure : public MetadataAwareOopClosure { - QHolder* _queue; + SCMObjToScanQueue* _queue; ShenandoahHeap* _heap; public: - ShenandoahMarkUpdateRefsClosure(QHolder* q); + ShenandoahMarkUpdateRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp); - void do_oop_nv(narrowOop* p) { - Unimplemented(); - } - void do_oop_nv(oop* p); + template + void do_oop_nv(T* p); virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } @@ -45,16 +47,14 @@ }; class ShenandoahMarkRefsClosure : public MetadataAwareOopClosure { - QHolder* _queue; + SCMObjToScanQueue* _queue; ShenandoahHeap* _heap; public: - ShenandoahMarkRefsClosure(QHolder* q); + ShenandoahMarkRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp); - void do_oop_nv(narrowOop* p) { - Unimplemented(); - } - void do_oop_nv(oop* p); + template + void do_oop_nv(T* p); virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -27,22 +27,23 @@ #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" -inline void ShenandoahMarkUpdateRefsClosure::do_oop_nv(oop* p) { +template +inline void ShenandoahMarkUpdateRefsClosure::do_oop_nv(T* p) { // We piggy-back reference updating to the marking tasks. oop obj = _heap->maybe_update_oop_ref(p); assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static(obj)), "need to-space object here"); if (! oopDesc::is_null(obj)) { - ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue->queue()); + ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); } } -inline void ShenandoahMarkRefsClosure::do_oop_nv(oop* p) { - oop obj = oopDesc::load_heap_oop(p); - assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static(obj)), "expect forwarded obj in queue"); +template +inline void ShenandoahMarkRefsClosure::do_oop_nv(T* p) { + oop obj = oopDesc::load_decode_heap_oop(p); + assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static(obj)), "need to-space object here"); if (! oopDesc::is_null(obj)) { - ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue->queue()); + ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); } } - #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHOOPCLOSURES_INLINE_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahPhaseTimes.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahPhaseTimes.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "gc_implementation/shenandoah/shenandoahPhaseTimes.hpp" +#include "gc_implementation/shenandoah/shenandoahWorkerDataArray.inline.hpp" +#include "runtime/os.hpp" + +ShenandoahPhaseTimes::ShenandoahPhaseTimes(uint max_gc_threads) : + _max_gc_threads(max_gc_threads) +{ + assert(max_gc_threads > 0, "Must have some GC threads"); + + // Root scanning phases + _gc_par_phases[ThreadRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "Thread Roots (ms):"); + _gc_par_phases[CodeCacheRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "CodeCache Roots (ms):"); + _gc_par_phases[StringTableRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "StringTable Roots (ms):"); + _gc_par_phases[UniverseRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "Universe Roots (ms):"); + _gc_par_phases[JNIRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "JNI Handles Roots (ms):"); + _gc_par_phases[JNIWeakRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "JNI Weak Roots (ms):"); + _gc_par_phases[ObjectSynchronizerRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "ObjectSynchronizer Roots (ms):"); + _gc_par_phases[FlatProfilerRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "FlatProfiler Roots (ms):"); + _gc_par_phases[ManagementRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "Management Roots (ms):"); + _gc_par_phases[SystemDictionaryRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "SystemDictionary Roots (ms):"); + _gc_par_phases[CLDGRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "CLDG Roots (ms):"); + _gc_par_phases[JVMTIRoots] = new ShenandoahWorkerDataArray(max_gc_threads, "JVMTI Roots (ms):"); +} + +// record the time a phase took in seconds +void ShenandoahPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) { + _gc_par_phases[phase]->set(worker_i, secs); +} + +double ShenandoahPhaseTimes::average(uint i) { + return _gc_par_phases[i]->average(); +} +void ShenandoahPhaseTimes::reset(uint i) { + _gc_par_phases[i]->reset(); +} + +void ShenandoahPhaseTimes::print() { + for (uint i = 0; i < GCParPhasesSentinel; i++) { + _gc_par_phases[i]->print_summary_on(tty); + } +} + +ShenandoahParPhaseTimesTracker::ShenandoahParPhaseTimesTracker(ShenandoahPhaseTimes* phase_times, + ShenandoahPhaseTimes::GCParPhases phase, uint worker_id) : + _phase_times(phase_times), _phase(phase), _worker_id(worker_id) { + if (_phase_times != NULL) { + _start_time = os::elapsedTime(); + } +} + +ShenandoahParPhaseTimesTracker::~ShenandoahParPhaseTimesTracker() { + if (_phase_times != NULL) { + _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time); + } +} + diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahPhaseTimes.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahPhaseTimes.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHPHASETIMES_HPP +#define SHARE_VM_GC_SHENANDOAH_SHENANDOAHPHASETIMES_HPP + +#include "memory/allocation.hpp" + +template class ShenandoahWorkerDataArray; + +class ShenandoahPhaseTimes : public CHeapObj { + public: + enum GCParPhases { + ThreadRoots, + CodeCacheRoots, + StringTableRoots, + UniverseRoots, + JNIRoots, + JNIWeakRoots, + ObjectSynchronizerRoots, + FlatProfilerRoots, + ManagementRoots, + SystemDictionaryRoots, + CLDGRoots, + JVMTIRoots, + GCParPhasesSentinel + }; + + private: + uint _max_gc_threads; + ShenandoahWorkerDataArray* _gc_par_phases[GCParPhasesSentinel]; + + public: + ShenandoahPhaseTimes(uint max_gc_threads); + + // record the time a phase took in seconds + void record_time_secs(GCParPhases phase, uint worker_i, double secs); + + double average(uint i); + void reset(uint i); + void print(); +}; + +class ShenandoahParPhaseTimesTracker : public StackObj { + double _start_time; + ShenandoahPhaseTimes::GCParPhases _phase; + ShenandoahPhaseTimes* _phase_times; + uint _worker_id; +public: + ShenandoahParPhaseTimesTracker(ShenandoahPhaseTimes* phase_times, ShenandoahPhaseTimes::GCParPhases phase, uint worker_id); + ~ShenandoahParPhaseTimesTracker(); +}; + +#endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHPHASETIMES_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -23,121 +23,173 @@ #include "precompiled.hpp" -#include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" #include "code/codeCache.hpp" #include "gc_implementation/shenandoah/shenandoahRootProcessor.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" +#include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" +#include "gc_implementation/shenandoah/shenandoahPhaseTimes.hpp" #include "memory/allocation.inline.hpp" #include "runtime/fprofiler.hpp" #include "runtime/mutex.hpp" #include "services/management.hpp" -ShenandoahRootProcessor::ShenandoahRootProcessor(ShenandoahHeap* heap, uint n_workers) : - _process_strong_tasks(new SubTasksDone(SHENANDOAH_RP_PS_NumElements)), - _srs(heap, true) +ShenandoahRootProcessor::ShenandoahRootProcessor(ShenandoahHeap* heap, uint n_workers, + ShenandoahCollectorPolicy::TimingPhase phase) : + _process_strong_tasks(SHENANDOAH_RP_PS_NumElements), + _srs(heap, true), + _phase(phase), + _cld_iterator(ClassLoaderDataGraph::parallel_cld_root_iterator()) , + _om_iterator(ObjectSynchronizer::parallel_iterator()) { - _process_strong_tasks->set_n_threads(n_workers); + heap->shenandoahPolicy()->record_workers_start(_phase); + _process_strong_tasks.set_n_threads(n_workers); + heap->set_par_threads(n_workers); } ShenandoahRootProcessor::~ShenandoahRootProcessor() { - delete _process_strong_tasks; -} - -void ShenandoahRootProcessor::process_roots(OopClosure* strong_oops, - OopClosure* weak_oops, - CLDClosure* strong_clds, - CLDClosure* weak_clds, - CLDClosure* thread_stack_clds, - CodeBlobClosure* strong_code, - CodeBlobClosure* weak_code) { - process_java_roots(strong_oops, thread_stack_clds, strong_clds, weak_clds, strong_code, 0); - process_vm_roots(strong_oops, weak_oops, 0); - - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_CodeCache_oops_do)) { - CodeCache::blobs_do(weak_code); - } - - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_weak_oops_do)) { - ShenandoahAlwaysTrueClosure always_true; - JNIHandles::weak_oops_do(&always_true, weak_oops); - } - - _process_strong_tasks->all_tasks_completed(); + ShenandoahHeap::heap()->shenandoahPolicy()->record_workers_end(_phase); } void ShenandoahRootProcessor::process_strong_roots(OopClosure* oops, - CLDClosure* clds, - CodeBlobClosure* blobs) { + OopClosure* weak_oops, + CLDClosure* clds, + CodeBlobClosure* blobs, + uint worker_id) { - process_java_roots(oops, clds, clds, NULL, blobs, 0); - process_vm_roots(oops, NULL, 0); + process_java_roots(oops, clds, clds, NULL, blobs, worker_id); + process_vm_roots(oops, NULL, weak_oops, worker_id); - _process_strong_tasks->all_tasks_completed(); + _process_strong_tasks.all_tasks_completed(); } void ShenandoahRootProcessor::process_all_roots(OopClosure* oops, - CLDClosure* clds, - CodeBlobClosure* blobs) { + OopClosure* weak_oops, + CLDClosure* clds, + CodeBlobClosure* blobs, + uint worker_id) { - process_java_roots(oops, NULL, clds, clds, NULL, 0); - process_vm_roots(oops, oops, 0); + ShenandoahPhaseTimes* phase_times = ShenandoahHeap::heap()->shenandoahPolicy()->phase_times(); + process_java_roots(oops, NULL, clds, clds, NULL, worker_id); + process_vm_roots(oops, oops, weak_oops, worker_id); - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_CodeCache_oops_do)) { - CodeCache::blobs_do(blobs); + if (blobs != NULL) { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::CodeCacheRoots, worker_id); + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_CodeCache_oops_do)) { + CodeCache::blobs_do(blobs); + } } - _process_strong_tasks->all_tasks_completed(); + _process_strong_tasks.all_tasks_completed(); } void ShenandoahRootProcessor::process_java_roots(OopClosure* strong_roots, - CLDClosure* thread_stack_clds, + CLDClosure* thread_clds, CLDClosure* strong_clds, CLDClosure* weak_clds, CodeBlobClosure* strong_code, - uint worker_i) + uint worker_id) { - //assert(thread_stack_clds == NULL || weak_clds == NULL, "There is overlap between those, only one may be set"); + ShenandoahPhaseTimes* phase_times = ShenandoahHeap::heap()->shenandoahPolicy()->phase_times(); // Iterating over the CLDG and the Threads are done early to allow us to // first process the strong CLDs and nmethods and then, after a barrier, // let the thread process the weak CLDs and nmethods. - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_ClassLoaderDataGraph_oops_do)) { - ClassLoaderDataGraph::roots_cld_do(strong_clds, weak_clds); + { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::CLDGRoots, worker_id); + while(_cld_iterator.root_cld_do(strong_clds, weak_clds)); } - ResourceMark rm; - Threads::possibly_parallel_oops_do(strong_roots, thread_stack_clds, strong_code); + { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::ThreadRoots, worker_id); + ResourceMark rm; + Threads::possibly_parallel_oops_do(strong_roots, thread_clds, strong_code); + } } void ShenandoahRootProcessor::process_vm_roots(OopClosure* strong_roots, OopClosure* weak_roots, - uint worker_i) + OopClosure* jni_weak_roots, + uint worker_id) { - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_Universe_oops_do)) { + ShenandoahPhaseTimes* phase_times = ShenandoahHeap::heap()->shenandoahPolicy()->phase_times(); + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_Universe_oops_do)) { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::UniverseRoots, worker_id); Universe::oops_do(strong_roots); } - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_oops_do)) { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::JNIRoots, worker_id); JNIHandles::oops_do(strong_roots); } - if (!_process_strong_tasks-> is_task_claimed(SHENANDOAH_RP_PS_ObjectSynchronizer_oops_do)) { - ObjectSynchronizer::oops_do(strong_roots); - } - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_FlatProfiler_oops_do)) { + + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_FlatProfiler_oops_do)) { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::FlatProfilerRoots, worker_id); FlatProfiler::oops_do(strong_roots); } - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_Management_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_Management_oops_do)) { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::ManagementRoots, worker_id); Management::oops_do(strong_roots); } - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_jvmti_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_jvmti_oops_do)) { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::JVMTIRoots, worker_id); JvmtiExport::oops_do(strong_roots); } - if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_SystemDictionary_oops_do)) { + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_SystemDictionary_oops_do)) { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::SystemDictionaryRoots, worker_id); SystemDictionary::roots_oops_do(strong_roots, weak_roots); } + if (jni_weak_roots != NULL) { + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_weak_oops_do)) { + ShenandoahAlwaysTrueClosure always_true; + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::JNIWeakRoots, worker_id); + JNIHandles::weak_oops_do(&always_true, jni_weak_roots); + } + } + + { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::ObjectSynchronizerRoots, worker_id); + while(_om_iterator.parallel_oops_do(strong_roots)); + } // All threads execute the following. A specific chunk of buckets // from the StringTable are the individual tasks. if (weak_roots != NULL) { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::StringTableRoots, worker_id); StringTable::possibly_parallel_oops_do(weak_roots); } } + +ShenandoahRootEvacuator::ShenandoahRootEvacuator(ShenandoahHeap* heap, uint n_workers, ShenandoahCollectorPolicy::TimingPhase phase) : + _process_strong_tasks(SHENANDOAH_RP_PS_NumElements), + _srs(heap, true), + _phase(phase) +{ + _process_strong_tasks.set_n_threads(n_workers); + heap->set_par_threads(n_workers); + heap->shenandoahPolicy()->record_workers_start(_phase); +} + +ShenandoahRootEvacuator::~ShenandoahRootEvacuator() { + ShenandoahHeap::heap()->shenandoahPolicy()->record_workers_end(_phase); +} + +void ShenandoahRootEvacuator::process_evacuate_roots(OopClosure* oops, + CodeBlobClosure* blobs, + uint worker_id) { + + ShenandoahPhaseTimes* phase_times = ShenandoahHeap::heap()->shenandoahPolicy()->phase_times(); + { + ResourceMark rm; + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::ThreadRoots, worker_id); + Threads::possibly_parallel_oops_do(oops, NULL, NULL); + } + + { + ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::CodeCacheRoots, worker_id); + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_CodeCache_oops_do)) { + CodeCache::blobs_do(blobs); + } + } + + _process_strong_tasks.all_tasks_completed(); +} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -24,7 +24,11 @@ #ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHROOTPROCESSOR_HPP #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHROOTPROCESSOR_HPP +#include "classfile/classLoaderData.hpp" +#include "code/codeCache.hpp" #include "memory/sharedHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "memory/allocation.hpp" #include "runtime/mutex.hpp" @@ -37,29 +41,30 @@ class OopClosure; class SubTasksDone; +enum Shenandoah_process_roots_tasks { + SHENANDOAH_RP_PS_Universe_oops_do, + SHENANDOAH_RP_PS_JNIHandles_oops_do, + SHENANDOAH_RP_PS_JNIHandles_weak_oops_do, + SHENANDOAH_RP_PS_ObjectSynchronizer_oops_do, + SHENANDOAH_RP_PS_FlatProfiler_oops_do, + SHENANDOAH_RP_PS_Management_oops_do, + SHENANDOAH_RP_PS_SystemDictionary_oops_do, + SHENANDOAH_RP_PS_ClassLoaderDataGraph_oops_do, + SHENANDOAH_RP_PS_jvmti_oops_do, + SHENANDOAH_RP_PS_CodeCache_oops_do, + // Leave this one last. + SHENANDOAH_RP_PS_NumElements +}; + class ShenandoahRootProcessor : public StackObj { - SubTasksDone* _process_strong_tasks; + SubTasksDone _process_strong_tasks; SharedHeap::StrongRootsScope _srs; - - enum Shenandoah_process_roots_tasks { - SHENANDOAH_RP_PS_Universe_oops_do, - SHENANDOAH_RP_PS_JNIHandles_oops_do, - SHENANDOAH_RP_PS_JNIHandles_weak_oops_do, - SHENANDOAH_RP_PS_ObjectSynchronizer_oops_do, - SHENANDOAH_RP_PS_FlatProfiler_oops_do, - SHENANDOAH_RP_PS_Management_oops_do, - SHENANDOAH_RP_PS_SystemDictionary_oops_do, - SHENANDOAH_RP_PS_ClassLoaderDataGraph_oops_do, - SHENANDOAH_RP_PS_jvmti_oops_do, - SHENANDOAH_RP_PS_CodeCache_oops_do, - SHENANDOAH_RP_PS_filter_satb_buffers, - SHENANDOAH_RP_PS_refProcessor_oops_do, - // Leave this one last. - SHENANDOAH_RP_PS_NumElements - }; + ShenandoahCollectorPolicy::TimingPhase _phase; + ParallelCLDRootIterator _cld_iterator; + ParallelObjectSynchronizerIterator _om_iterator; void process_java_roots(OopClosure* scan_non_heap_roots, - CLDClosure* thread_stack_clds, + CLDClosure* thread_clds, CLDClosure* scan_strong_clds, CLDClosure* scan_weak_clds, CodeBlobClosure* scan_strong_code, @@ -67,30 +72,45 @@ void process_vm_roots(OopClosure* scan_non_heap_roots, OopClosure* scan_non_heap_weak_roots, + OopClosure* weak_jni_roots, uint worker_i); public: - ShenandoahRootProcessor(ShenandoahHeap* heap, uint n_workers); + ShenandoahRootProcessor(ShenandoahHeap* heap, uint n_workers, + ShenandoahCollectorPolicy::TimingPhase phase = ShenandoahCollectorPolicy::_num_phases); ~ShenandoahRootProcessor(); - void process_roots(OopClosure* strong_oops, - OopClosure* weak_oops, - CLDClosure* strong_clds, - CLDClosure* weak_clds, - CLDClosure* thread_stack_clds, - CodeBlobClosure* strong_code, - CodeBlobClosure* weak_code); - // Apply oops, clds and blobs to all strongly reachable roots in the system - void process_strong_roots(OopClosure* oops, + void process_strong_roots(OopClosure* oops, OopClosure* weak_oops, CLDClosure* clds, - CodeBlobClosure* blobs); + CodeBlobClosure* blobs, + uint worker_id); // Apply oops, clds and blobs to strongly and weakly reachable roots in the system - void process_all_roots(OopClosure* oops, + void process_all_roots(OopClosure* oops, OopClosure* weak_oops, CLDClosure* clds, - CodeBlobClosure* blobs); + CodeBlobClosure* blobs, + uint worker_id); + // Number of worker threads used by the root processor. + uint n_workers() const; }; +class ShenandoahRootEvacuator : public StackObj { + SubTasksDone _process_strong_tasks; + SharedHeap::StrongRootsScope _srs; + ShenandoahCollectorPolicy::TimingPhase _phase; + +public: + ShenandoahRootEvacuator(ShenandoahHeap* heap, uint n_workers, + ShenandoahCollectorPolicy::TimingPhase phase = ShenandoahCollectorPolicy::_num_phases); + ~ShenandoahRootEvacuator(); + + void process_evacuate_roots(OopClosure* oops, + CodeBlobClosure* blobs, + uint worker_id); + + // Number of worker threads used by the root processor. + uint n_workers() const; +}; #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHROOTPROCESSOR_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahLogging.hpp" +#include "gc_implementation/shenandoah/shenandoahTaskqueue.hpp" + +void SCMObjToScanQueueSet::clear() { + uint size = GenericTaskQueueSet::size(); + for (uint index = 0; index < size; index ++) { + SCMObjToScanQueue* q = queue(index); + assert(q != NULL, "Sanity"); + q->set_empty(); + q->overflow_stack()->clear(); + q->clear_buffer(); + } +} + + +bool SCMObjToScanQueueSet::is_empty() { + uint size = GenericTaskQueueSet::size(); + for (uint index = 0; index < size; index ++) { + SCMObjToScanQueue* q = queue(index); + assert(q != NULL, "Sanity"); + if (!q->is_empty()) { + return false; + } + } + return true; +} + +bool ShenandoahTaskTerminator::offer_termination(TerminatorTerminator* terminator) { + assert(_n_threads > 0, "Initialization is incorrect"); + assert(_offered_termination < _n_threads, "Invariant"); + assert(_blocker != NULL, "Invariant"); + + // single worker, done + if (_n_threads == 1) { + return true; + } + + _blocker->lock_without_safepoint_check(); + // all arrived, done + if (++ _offered_termination == _n_threads) { + _blocker->notify_all(); + _blocker->unlock(); + return true; + } + + Thread* the_thread = Thread::current(); + while (true) { + if (_spin_master == NULL) { + _spin_master = the_thread; + + _blocker->unlock(); + + if (do_spin_master_work()) { + assert(_offered_termination == _n_threads, "termination condition"); + return true; + } else { + _blocker->lock_without_safepoint_check(); + } + } else { + _blocker->wait(true, WorkStealingSleepMillis); + + if (_offered_termination == _n_threads) { + _blocker->unlock(); + return true; + } + } + + if (peek_in_queue_set() || + (terminator != NULL && terminator->should_exit_termination())) { + _offered_termination --; + _blocker->unlock(); + return false; + } + } +} + +bool ShenandoahTaskTerminator::do_spin_master_work() { + uint yield_count = 0; + // Number of hard spin loops done since last yield + uint hard_spin_count = 0; + // Number of iterations in the hard spin loop. + uint hard_spin_limit = WorkStealingHardSpins; + + // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done. + // If it is greater than 0, then start with a small number + // of spins and increase number with each turn at spinning until + // the count of hard spins exceeds WorkStealingSpinToYieldRatio. + // Then do a yield() call and start spinning afresh. + if (WorkStealingSpinToYieldRatio > 0) { + hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio; + hard_spin_limit = MAX2(hard_spin_limit, 1U); + } + // Remember the initial spin limit. + uint hard_spin_start = hard_spin_limit; + + // Loop waiting for all threads to offer termination or + // more work. + while (true) { + // Look for more work. + // Periodically sleep() instead of yield() to give threads + // waiting on the cores the chance to grab this code + if (yield_count <= WorkStealingYieldsBeforeSleep) { + // Do a yield or hardspin. For purposes of deciding whether + // to sleep, count this as a yield. + yield_count++; + + // Periodically call yield() instead spinning + // After WorkStealingSpinToYieldRatio spins, do a yield() call + // and reset the counts and starting limit. + if (hard_spin_count > WorkStealingSpinToYieldRatio) { + yield(); + hard_spin_count = 0; + hard_spin_limit = hard_spin_start; +#ifdef TRACESPINNING + _total_yields++; +#endif + } else { + // Hard spin this time + // Increase the hard spinning period but only up to a limit. + hard_spin_limit = MIN2(2*hard_spin_limit, + (uint) WorkStealingHardSpins); + for (uint j = 0; j < hard_spin_limit; j++) { + SpinPause(); + } + hard_spin_count++; +#ifdef TRACESPINNING + _total_spins++; +#endif + } + } else { + log_develop_trace(gc, task)("ShenanddoahTaskTerminator::do_spin_master_work() thread " PTR_FORMAT " sleeps after %u yields", + p2i(Thread::current()), yield_count); + yield_count = 0; + + MonitorLockerEx locker(_blocker, Mutex::_no_safepoint_check_flag); // no safepoint check + _spin_master = NULL; + locker.wait(Mutex::_no_safepoint_check_flag, WorkStealingSleepMillis); + if (_spin_master == NULL) { + _spin_master = Thread::current(); + } else { + return false; + } + } + +#ifdef TRACESPINNING + _total_peeks++; +#endif + size_t tasks = tasks_in_queue_set(); + if (tasks > 0) { + MonitorLockerEx locker(_blocker, Mutex::_no_safepoint_check_flag); // no safepoint check + + if ((int) tasks >= _offered_termination - 1) { + locker.notify_all(); + } else { + for (; tasks > 1; tasks --) { + locker.notify(); + } + } + _spin_master = NULL; + return false; + } else if (_offered_termination == _n_threads) { + return true; + } + } +} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAH_TASKQUEUE_HPP +#define SHARE_VM_GC_SHENANDOAH_SHENANDOAH_TASKQUEUE_HPP + +#include "memory/padded.hpp" +#include "utilities/taskqueue.hpp" +#include "runtime/mutex.hpp" + +class Thread; + +template +class BufferedOverflowTaskQueue: public OverflowTaskQueue +{ +public: + typedef OverflowTaskQueue taskqueue_t; + + BufferedOverflowTaskQueue() : _buf_empty(true) {}; + + TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;) + + // Push task t onto: + // - first, try buffer; + // - then, try the queue; + // - then, overflow stack. + // Return true. + inline bool push(E t); + + // Attempt to pop from the buffer; return true if anything was popped. + inline bool pop_buffer(E &t); + + inline void clear_buffer() { _buf_empty = true; } + inline bool buffer_empty() const { return _buf_empty; } + inline bool is_empty() const { + return taskqueue_t::is_empty() && buffer_empty(); + } + +private: + bool _buf_empty; + E _elem; +}; + +class ObjArrayFromToTask +{ +public: + ObjArrayFromToTask(oop o = NULL, int from = 0, int to = 0): _obj(o), _from(from), _to(to) { } + ObjArrayFromToTask(oop o, size_t from, size_t to): _obj(o), _from(int(from)), _to(int(to)) { + assert(from <= size_t(max_jint), "too big"); + assert(to <= size_t(max_jint), "too big"); + assert(from < to, "sanity"); + } + ObjArrayFromToTask(const ObjArrayFromToTask& t): _obj(t._obj), _from(t._from), _to(t._to) { } + + ObjArrayFromToTask& operator =(const ObjArrayFromToTask& t) { + _obj = t._obj; + _from = t._from; + _to = t._to; + return *this; + } + volatile ObjArrayFromToTask& + operator =(const volatile ObjArrayFromToTask& t) volatile { + (void)const_cast(_obj = t._obj); + _from = t._from; + _to = t._to; + return *this; + } + + inline oop obj() const { return _obj; } + inline int from() const { return _from; } + inline int to() const { return _to; } + + DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid. + +private: + oop _obj; + int _from, _to; +}; + +typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; +typedef Padded SCMObjToScanQueue; +// typedef GenericTaskQueueSet SCMObjToScanQueueSet; + + +template +class ParallelClaimableQueueSet: public GenericTaskQueueSet { +private: + volatile jint _claimed_index; + debug_only(uint _reserved; ) + +public: + using GenericTaskQueueSet::size; + +public: + ParallelClaimableQueueSet(int n) : GenericTaskQueueSet(n) { + debug_only(_reserved = 0; ) + } + + void clear_claimed() { _claimed_index = 0; } + T* claim_next(); + + // reserve queues that not for parallel claiming + void reserve(uint n) { + assert(n <= size(), "Sanity"); + _claimed_index = (jint)n; + debug_only(_reserved = n;) + } + + debug_only(uint get_reserved() const { return (uint)_reserved; }) +}; + + +template +T* ParallelClaimableQueueSet::claim_next() { + jint size = (jint)GenericTaskQueueSet::size(); + + if (_claimed_index >= size) { + return NULL; + } + + jint index = Atomic::add(1, &_claimed_index); + + if (index <= size) { + return GenericTaskQueueSet::queue((uint)index - 1); + } else { + return NULL; + } +} + +class SCMObjToScanQueueSet: public ParallelClaimableQueueSet { + +public: + SCMObjToScanQueueSet(int n) : ParallelClaimableQueueSet(n) { + } + + bool is_empty(); + + void clear(); +}; + + +/* + * This is an enhanced implementation of Google's work stealing + * protocol, which is described in the paper: + * Understanding and improving JVM GC work stealing at the data center scale + * (http://dl.acm.org/citation.cfm?id=2926706) + * + * Instead of a dedicated spin-master, our implementation will let spin-master to relinquish + * the role before it goes to sleep/wait, so allows newly arrived thread to compete for the role. + * The intention of above enhancement, is to reduce spin-master's latency on detecting new tasks + * for stealing and termination condition. + */ + +class ShenandoahTaskTerminator: public ParallelTaskTerminator { +private: + Monitor* _blocker; + Thread* _spin_master; + + +public: + ShenandoahTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : + ParallelTaskTerminator(n_threads, queue_set), _spin_master(NULL) { + _blocker = new Monitor(Mutex::leaf, "ShenandoahTaskTerminator", false); + } + + bool offer_termination(TerminatorTerminator* terminator); + +private: + size_t tasks_in_queue_set() { return _queue_set->tasks(); } + + + /* + * Perform spin-master task. + * return true if termination condition is detected + * otherwise, return false + */ + bool do_spin_master_work(); +}; + +#endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAH_TASKQUEUE_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,21 @@ + +template +bool BufferedOverflowTaskQueue::pop_buffer(E &t) +{ + if (_buf_empty) return false; + t = _elem; + _buf_empty = true; + return true; +} + +template +inline bool BufferedOverflowTaskQueue::push(E t) +{ + if (_buf_empty) { + _elem = t; + _buf_empty = false; + return true; + } else { + return taskqueue_t::push(t); + } +} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahWorkerDataArray.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahWorkerDataArray.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/shenandoah/shenandoahWorkerDataArray.inline.hpp" +#include "utilities/ostream.hpp" + +template <> +size_t ShenandoahWorkerDataArray::uninitialized() { + return (size_t)-1; +} + +template <> +double ShenandoahWorkerDataArray::uninitialized() { + return -1.0; +} + +template <> +void ShenandoahWorkerDataArray::WDAPrinter::summary(outputStream* out, double min, double avg, double max, double diff, double sum, bool print_sum) { + out->print(" Min: %4.1lf, Avg: %4.1lf, Max: %4.1lf, Diff: %4.1lf", min * MILLIUNITS, avg * MILLIUNITS, max * MILLIUNITS, diff* MILLIUNITS); + if (print_sum) { + out->print(", Sum: %4.1lf", sum * MILLIUNITS); + } +} + +template <> +void ShenandoahWorkerDataArray::WDAPrinter::summary(outputStream* out, size_t min, double avg, size_t max, size_t diff, size_t sum, bool print_sum) { + out->print(" Min: " SIZE_FORMAT ", Avg: %4.1lf, Max: " SIZE_FORMAT ", Diff: " SIZE_FORMAT, min, avg, max, diff); + if (print_sum) { + out->print(", Sum: " SIZE_FORMAT, sum); + } +} + +template <> +void ShenandoahWorkerDataArray::WDAPrinter::details(const ShenandoahWorkerDataArray* phase, outputStream* out) { + out->print("%-25s", ""); + for (uint i = 0; i < phase->_length; ++i) { + double value = phase->get(i); + if (value != phase->uninitialized()) { + out->print(" %4.1lf", phase->get(i) * 1000.0); + } else { + out->print(" -"); + } + } + out->cr(); +} + +template <> +void ShenandoahWorkerDataArray::WDAPrinter::details(const ShenandoahWorkerDataArray* phase, outputStream* out) { + out->print("%-25s", ""); + for (uint i = 0; i < phase->_length; ++i) { + size_t value = phase->get(i); + if (value != phase->uninitialized()) { + out->print(" " SIZE_FORMAT, phase->get(i)); + } else { + out->print(" -"); + } + } + out->cr(); +} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahWorkerDataArray.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahWorkerDataArray.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_WORKERDATAARRAY_HPP +#define SHARE_VM_GC_G1_WORKERDATAARRAY_HPP + +#include "memory/allocation.hpp" +#include "utilities/debug.hpp" + +class outputStream; + +template +class ShenandoahWorkerDataArray : public CHeapObj { + friend class WDAPrinter; + T* _data; + uint _length; + const char* _title; + + ShenandoahWorkerDataArray* _thread_work_items; + + public: + ShenandoahWorkerDataArray(uint length, const char* title); + ~ShenandoahWorkerDataArray(); + + void link_thread_work_items(ShenandoahWorkerDataArray* thread_work_items); + void set_thread_work_item(uint worker_i, size_t value); + ShenandoahWorkerDataArray* thread_work_items() const { + return _thread_work_items; + } + + static T uninitialized(); + + void set(uint worker_i, T value); + T get(uint worker_i) const; + + void add(uint worker_i, T value); + + // The sum() and average() methods below consider uninitialized slots to be 0. + double average() const; + T sum() const; + + const char* title() const { + return _title; + } + + void reset(); + void set_all(T value); + + + private: + class WDAPrinter { + public: + static void summary(outputStream* out, double min, double avg, double max, double diff, double sum, bool print_sum); + static void summary(outputStream* out, size_t min, double avg, size_t max, size_t diff, size_t sum, bool print_sum); + + static void details(const ShenandoahWorkerDataArray* phase, outputStream* out); + static void details(const ShenandoahWorkerDataArray* phase, outputStream* out); + }; + + public: + void print_summary_on(outputStream* out, bool print_sum = true) const; + void print_details_on(outputStream* out) const; +}; + +#endif // SHARE_VM_GC_G1_WORKERDATAARRAY_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoahWorkerDataArray.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahWorkerDataArray.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_WORKERDATAARRAY_INLINE_HPP +#define SHARE_VM_GC_G1_WORKERDATAARRAY_INLINE_HPP + +#include "gc_implementation/shenandoah/shenandoahWorkerDataArray.hpp" +#include "memory/allocation.inline.hpp" +#include "utilities/ostream.hpp" + +template +ShenandoahWorkerDataArray::ShenandoahWorkerDataArray(uint length, const char* title) : + _title(title), + _length(0), + _thread_work_items(NULL) { + assert(length > 0, "Must have some workers to store data for"); + _length = length; + _data = NEW_C_HEAP_ARRAY(T, _length, mtGC); + reset(); +} + +template +void ShenandoahWorkerDataArray::set(uint worker_i, T value) { + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); + assert(_data[worker_i] == uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title)); + _data[worker_i] = value; +} + +template +T ShenandoahWorkerDataArray::get(uint worker_i) const { + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); + return _data[worker_i]; +} + +template +ShenandoahWorkerDataArray::~ShenandoahWorkerDataArray() { + FREE_C_HEAP_ARRAY(T, _data, mtGC); +} + +template +void ShenandoahWorkerDataArray::link_thread_work_items(ShenandoahWorkerDataArray* thread_work_items) { + _thread_work_items = thread_work_items; +} + +template +void ShenandoahWorkerDataArray::set_thread_work_item(uint worker_i, size_t value) { + assert(_thread_work_items != NULL, "No sub count"); + _thread_work_items->set(worker_i, value); +} + +template +void ShenandoahWorkerDataArray::add(uint worker_i, T value) { + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); + assert(_data[worker_i] != uninitialized(), err_msg("No data to add to for worker %d", worker_i)); + _data[worker_i] += value; +} + +template +double ShenandoahWorkerDataArray::average() const { + uint contributing_threads = 0; + for (uint i = 0; i < _length; ++i) { + if (get(i) != uninitialized()) { + contributing_threads++; + } + } + if (contributing_threads == 0) { + return 0.0; + } + return sum() / (double) contributing_threads; +} + +template +T ShenandoahWorkerDataArray::sum() const { + T s = 0; + for (uint i = 0; i < _length; ++i) { + if (get(i) != uninitialized()) { + s += get(i); + } + } + return s; +} + +template +void ShenandoahWorkerDataArray::set_all(T value) { + for (uint i = 0; i < _length; i++) { + _data[i] = value; + } +} + +template +void ShenandoahWorkerDataArray::print_summary_on(outputStream* out, bool print_sum) const { + out->print("%-25s", title()); + uint start = 0; + while (start < _length && get(start) == uninitialized()) { + start++; + } + if (start < _length) { + T min = get(start); + T max = min; + T sum = 0; + uint contributing_threads = 0; + for (uint i = start; i < _length; ++i) { + T value = get(i); + if (value != uninitialized()) { + max = MAX2(max, value); + min = MIN2(min, value); + sum += value; + contributing_threads++; + } + } + T diff = max - min; + assert(contributing_threads != 0, "Must be since we found a used value for the start index"); + double avg = sum / (double) contributing_threads; + WDAPrinter::summary(out, min, avg, max, diff, sum, print_sum); + out->print_cr(", Workers: %d", contributing_threads); + } else { + // No data for this phase. + out->print_cr(" skipped"); + } +} + +template +void ShenandoahWorkerDataArray::print_details_on(outputStream* out) const { + WDAPrinter::details(this, out); +} + +template +void ShenandoahWorkerDataArray::reset() { + set_all(uninitialized()); + if (_thread_work_items != NULL) { + _thread_work_items->reset(); + } +} + +#endif // SHARE_VM_GC_G1_WORKERDATAARRAY_INLINE_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoah_globals.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -21,11 +21,14 @@ * */ -#include "precompiled.hpp" #include "gc_implementation/shenandoah/shenandoah_globals.hpp" -SHENANDOAH_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \ - MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \ - MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \ - MATERIALIZE_NOTPRODUCT_FLAG, \ - MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG) +SHENANDOAH_FLAGS(MATERIALIZE_DEVELOPER_FLAG, \ + MATERIALIZE_PD_DEVELOPER_FLAG, \ + MATERIALIZE_PRODUCT_FLAG, \ + MATERIALIZE_PD_PRODUCT_FLAG, \ + MATERIALIZE_DIAGNOSTIC_FLAG, \ + MATERIALIZE_EXPERIMENTAL_FLAG, \ + MATERIALIZE_NOTPRODUCT_FLAG, \ + MATERIALIZE_MANAGEABLE_FLAG, \ + MATERIALIZE_PRODUCT_RW_FLAG) diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -21,21 +21,139 @@ * */ -#ifndef SHARE_VM_GC_IMPLEMENTATION_SHENANDOAH_SHENANDOAH_GLOBALS_HPP -#define SHARE_VM_GC_IMPLEMENTATION_SHENANDOAH_SHENANDOAH_GLOBALS_HPP +#ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAH_GLOBALS_HPP +#define SHARE_VM_GC_SHENANDOAH_SHENANDOAH_GLOBALS_HPP #include "runtime/globals.hpp" -#define SHENANDOAH_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \ +#define SHENANDOAH_FLAGS(develop, \ + develop_pd, \ + product, \ + product_pd, \ + diagnostic, \ + experimental, \ + notproduct, \ + manageable, \ + product_rw) \ \ product(bool, UseShenandoahGC, false, \ "Use the Shenandoah garbage collector") \ \ product(bool, ShenandoahOptimizeFinals, true, \ - "Optimize barriers on final and stable fields/arrays") \ + "Optimize barriers on final and stable fields/arrays. " \ + "Turn it off for maximum compatibility with reflection or JNI " \ + "code that manipulates final fields." \ + "Defaults to true. ") \ \ - product(uintx, ShenandoahHeapRegionSize, 0, \ - "Size of the Shenandoah regions.") \ + product(size_t, ShenandoahHeapRegionSize, 0, \ + "Size of the Shenandoah regions. " \ + "Determined automatically by default.") \ + \ + experimental(size_t, ShenandoahMinRegionSize, 1 * M, \ + "Minimum heap region size. ") \ + \ + experimental(size_t, ShenandoahMaxRegionSize, 32 * M, \ + "Maximum heap region size. ") \ + \ + experimental(size_t, ShenandoahTargetNumRegions, 2048, \ + "Target number of regions. We try to get around that many " \ + "regions, based on ShenandoahMinRegionSize and " \ + "ShenandoahMaxRegionSizeSize. ") \ + \ + product(ccstr, ShenandoahGCHeuristics, "dynamic", \ + "The heuristics to use in Shenandoah GC. Possible values: " \ + "dynamic, adaptive, aggressive." \ + "Defauls to dynamic") \ + \ + product(uintx, ShenandoahAllocReserveRegions, 10, \ + "How many regions should be kept as allocation reserve, before " \ + "Shenandoah attempts to grow the heap. Defaults to 10.") \ + \ + product(uintx, ShenandoahRefProcFrequency, 5, \ + "How often should (weak, soft, etc) references be processed. " \ + "References get processed at every Nth GC cycle. " \ + "Set to 0 to disable reference processing. " \ + "Defaults to process references every 5 cycles.") \ + \ + product(uintx, ShenandoahUnloadClassesFrequency, 5, \ + "How often should classes get unloaded. " \ + "Class unloading is performed at every Nth GC cycle. " \ + "Set to 0 to disable concurrent class unloading. " \ + "Defaults to unload classes every 5 cycles.") \ + \ + product(bool, ShenandoahLogTrace, false, \ + "Turns on logging in Shenandoah at trace level. ") \ + \ + product(bool, ShenandoahLogDebug, false, \ + "Turns on logging in Shenandoah at debug level. ") \ + \ + product(bool, ShenandoahLogInfo, false, \ + "Turns on logging in Shenandoah at info level. ") \ + \ + product(bool, ShenandoahLogWarning, false, \ + "Turns on logging in Shenandoah at warning level. ") \ + \ + product(size_t, PreTouchParallelChunkSize, 1 * G, \ + "Per-thread chunk size for parallel memory pre-touch.") \ + \ + product_rw(uintx, ShenandoahGarbageThreshold, 60, \ + "Sets the percentage of garbage a region need to contain before " \ + "it can be marked for collection. Applies to " \ + "Shenandoah GC dynamic Heuristic mode only (ignored otherwise). " \ + "Defaults to 60%.") \ + \ + product_rw(uintx, ShenandoahFreeThreshold, 25, \ + "Set the percentage of free heap at which a GC cycle is started. " \ + "Applies to Shenandoah GC dynamic Heuristic mode only " \ + "(ignored otherwise). Defaults to 25%.") \ + \ + product_rw(uintx, ShenandoahInitialFreeThreshold, 50, \ + "Set the percentage of free heap at which an initial GC cycle " \ + "is started. An initial GC cycle is the first one after VM " \ + "start or after a full GC." \ + "Applies to Shenandoah GC dynamic Heuristic mode only " \ + "(ignored otherwise). Defaults to 50%.") \ + \ + product_rw(uintx, ShenandoahAllocationThreshold, 0, \ + "Set percentage of memory allocated since last GC cycle before " \ + "a new GC cycle is started. " \ + "Applies to Shenandoah GC dynamic Heuristic mode only " \ + "(ignored otherwise). Defauls to 0%.") \ + \ + experimental(uint, ShenandoahMarkLoopStride, 1000, \ + "How many items are processed during one marking step") \ + \ + experimental(bool, ShenandoahConcurrentCodeRoots, false, \ + "Scan code roots concurrently, instead of during a pause") \ + \ + experimental(bool, ShenandoahNoBarriersForConst, true, \ + "Constant oops don't need barriers") \ + \ + experimental(bool, ShenandoahDontIncreaseWBFreq, true, \ + "Common 2 WriteBarriers or WriteBarrier and a ReadBarrier only " \ + "if the resulting WriteBarrier isn't executed more frequently") \ + \ + experimental(bool, ShenandoahNoLivenessFullGC, true, \ + "Skip liveness counting for mark during full GC.") \ + \ + experimental(bool, ShenandoahWriteBarrierToIR, true, \ + "Convert write barrier to IR instead of using assembly blob") \ + \ + experimental(bool, UseShenandoahOWST, true, \ + "Use Shenandoah work stealing termination protocol") \ + \ + experimental(size_t, ShenandoahSATBBufferSize, 1 * K, \ + "Number of entries in an SATB log buffer.") \ + \ + diagnostic(bool, ShenandoahWriteBarrier, true, \ + "Turn on/off write barriers in Shenandoah") \ + \ + diagnostic(bool, ShenandoahReadBarrier, true, \ + "Turn on/off read barriers in Shenandoah") \ + \ + diagnostic(bool, ShenandoahStoreCheck, false, \ + "Emit additional code that checks objects are written to only" \ + " in to-space") \ \ develop(bool, ShenandoahDumpHeapBeforeConcurrentMark, false, \ "Dump the ShenanodahHeap Before Each ConcurrentMark") \ @@ -43,111 +161,37 @@ develop(bool, ShenandoahDumpHeapAfterConcurrentMark, false, \ "Dump the ShenanodahHeap After Each Concurrent Mark") \ \ - product(bool, ShenandoahTraceFullGC, false, \ - "Trace Shenandoah full GC") \ - \ - product(bool, ShenandoahTracePhases, false, \ - "Trace Shenandoah GC phases") \ - \ - develop(bool, ShenandoahTraceJNICritical, false, \ - "Trace Shenandoah stalls for JNI critical regions") \ - \ - product(bool, ShenandoahTraceHumongous, false, \ - "Trace Shenandoah humongous objects") \ - \ - develop(bool, ShenandoahTraceAllocations, false, \ - "Trace Shenandoah Allocations") \ - \ - develop(bool, ShenandoahTraceBrooksPointers, false, \ - "Trace Brooks Pointer updates") \ - \ - develop(bool, ShenandoahTraceEvacuations, false, \ - "Trace Shenandoah Evacuations") \ - \ develop(bool, ShenandoahVerifyWritesToFromSpace, false, \ "Use Memory Protection to signal illegal writes to from space") \ \ develop(bool, ShenandoahVerifyReadsToFromSpace, false, \ "Use Memory Protection to signal illegal reads to from space") \ \ - develop(bool, ShenandoahTraceConcurrentMarking, false, \ - "Trace Concurrent Marking") \ - \ - develop(bool, ShenandoahTraceUpdates, false, \ - "Trace Shenandoah Updates") \ - \ - develop(bool, ShenandoahTraceTLabs, false, \ - "Trace TLabs in Shenandoah Heap") \ - \ - product(bool, ShenandoahProcessReferences, true, \ - "Enable processing of (soft/weak/..) references in Shenandoah") \ - \ - develop(bool, ShenandoahTraceWeakReferences, false, \ - "Trace Weak Reference Processing in Shenandoah Heap") \ - \ - product(bool, ShenandoahGCVerbose, false, \ - "Verbose information about the Shenandoah garbage collector") \ - \ - product(bool, ShenandoahLogConfig, false, \ - "Log information about Shenandoah's configuration settings") \ - \ develop(bool, ShenandoahVerify, false, \ "Verify the Shenandoah garbage collector") \ \ - product(bool, ShenandoahWriteBarrier, true, \ - "Turn on/off write barriers in Shenandoah") \ - \ - product(bool, ShenandoahReadBarrier, true, \ - "Turn on/off read barriers in Shenandoah") \ - \ - product(ccstr, ShenandoahGCHeuristics, "dynamic", \ - "The heuristics to use in Shenandoah GC; possible values: " \ - "statusquo, aggressive, halfway, lazy, dynamic") \ - \ - product(uintx, ShenandoahGarbageThreshold, 60, \ - "Sets the percentage of garbage a region need to contain before " \ - "it can be marked for collection. Applies to " \ - "Shenandoah GC dynamic Heuristic mode only (ignored otherwise)") \ - \ - product(uintx, ShenandoahFreeThreshold, 25, \ - "Set the percentage of heap free in relation to the total " \ - "capacity before a region can enter the concurrent marking " \ - "phase. Applies to Shenandoah GC dynamic Heuristic mode only " \ - "(ignored otherwise)") \ - \ - product(uintx, ShenandoahInitialFreeThreshold, 50, \ - "Set the percentage of heap free in relation to the total " \ - "capacity before a region can enter the concurrent marking " \ - "phase. Applies to Shenandoah GC dynamic Heuristic mode only " \ - "(ignored otherwise)") \ - \ - product(uintx, ShenandoahAllocationThreshold, 0, \ - "Set the number of bytes allocated since last GC cycle before" \ - "a region can enter the concurrent marking " \ - "phase. Applies to Shenandoah GC dynamic Heuristic mode only " \ - "(ignored otherwise)") \ - \ - product(uintx, ShenandoahTargetHeapOccupancy, 80, \ - "Sets the target maximum percentage occupance of the heap we" \ - "would like to maintain." \ - "Shenandoah GC newadaptive Heuristic mode only.") \ - \ - product(uintx, ShenandoahAllocReserveRegions, 10, \ - "How many regions should be kept as allocation reserve, before " \ - "Shenandoah attempts to grow the heap") \ - \ - product(bool, ShenandoahWarnings, false, \ - "Print Shenandoah related warnings. Useful for Shenandoah devs.") \ - \ - product(bool, ShenandoahPrintCollectionSet, false, \ - "Print the collection set before each GC phase") \ - \ develop(bool, VerifyStrictOopOperations, false, \ "Verify that == and != are not used on oops. Only in fastdebug") \ \ - experimental(bool, ShenandoahTraceStringSymbolTableScrubbing, false, \ - "Trace information string and symbol table scrubbing.") + develop(bool, ShenandoahVerifyOptoBarriers, false, \ + "Verify no missing barriers in c2") \ + \ + product(bool, ShenandoahAlwaysPreTouch, false, \ + "Pre-touch heap memory, overrides global AlwaysPreTouch") \ + \ + experimental(intx, ShenandoahMarkScanPrefetch, 32, \ + "How many objects to prefetch ahead when traversing mark bitmaps." \ + "Set to 0 to disable prefetching.") \ -SHENANDOAH_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) -#endif // SHARE_VM_GC_IMPLEMENTATION_SHENANDOAH_SHENANDOAH_GLOBALS_HPP +SHENANDOAH_FLAGS(DECLARE_DEVELOPER_FLAG, \ + DECLARE_PD_DEVELOPER_FLAG, \ + DECLARE_PRODUCT_FLAG, \ + DECLARE_PD_PRODUCT_FLAG, \ + DECLARE_DIAGNOSTIC_FLAG, \ + DECLARE_EXPERIMENTAL_FLAG, \ + DECLARE_NOTPRODUCT_FLAG, \ + DECLARE_MANAGEABLE_FLAG, \ + DECLARE_PRODUCT_RW_FLAG) + +#endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAH_GLOBALS_HPP diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -22,6 +22,7 @@ */ #include "gc_implementation/shared/gcTraceTime.hpp" +#include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahMarkCompact.hpp" #include "gc_implementation/shenandoah/vm_operations_shenandoah.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" @@ -36,13 +37,11 @@ void VM_ShenandoahInitMark::doit() { ShenandoahHeap *sh = (ShenandoahHeap*) Universe::heap(); - GCTraceTime time("Pause Init-Mark", ShenandoahTracePhases, true, sh->shenandoahPolicy()->conc_timer(), sh->tracer()->gc_id()); + GCTraceTime time("Pause Init-Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::init_mark); - assert(sh->is_bitmap_clear(), "need clear marking bitmap"); + assert(sh->is_next_bitmap_clear(), "need clear marking bitmap"); - if (ShenandoahGCVerbose) - tty->print("vm_ShenandoahInitMark\n"); sh->start_concurrent_marking(); if (UseTLAB) { sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::resize_tlabs); @@ -58,9 +57,13 @@ return VMOp_ShenandoahFullGC; } +VM_ShenandoahFullGC::VM_ShenandoahFullGC(GCCause::Cause gc_cause) : + _gc_cause(gc_cause) { +} + void VM_ShenandoahFullGC::doit() { - ShenandoahMarkCompact::do_mark_compact(); + ShenandoahMarkCompact::do_mark_compact(_gc_cause); ShenandoahHeap *sh = ShenandoahHeap::heap(); if (UseTLAB) { sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::resize_tlabs); @@ -95,16 +98,12 @@ void VM_ShenandoahStartEvacuation::doit() { - // We need to do the finish mark here, so that a JNI critical region - // can't divide it from evacuation start. It is critical that we + // It is critical that we // evacuate roots right after finishing marking, so that we don't // get unmarked objects in the roots. ShenandoahHeap *sh = ShenandoahHeap::heap(); if (! sh->cancelled_concgc()) { - if (ShenandoahGCVerbose) - tty->print("vm_ShenandoahFinalMark\n"); - - GCTraceTime time("Pause Init-Evacuation", ShenandoahTracePhases, true, sh->shenandoahPolicy()->conc_timer(), sh->tracer()->gc_id()); + GCTraceTime time("Pause Final Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::final_mark); sh->concurrentMark()->finish_mark_from_roots(); sh->stop_concurrent_marking(); @@ -124,9 +123,9 @@ sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_evac); } else { + GCTraceTime time("Cancel concurrent Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); sh->concurrentMark()->cancel(); sh->stop_concurrent_marking(); - sh->recycle_dirty_regions(); } } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.hpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -60,7 +60,10 @@ }; class VM_ShenandoahFullGC : public VM_ShenandoahReferenceOperation { - public: +private: + GCCause::Cause _gc_cause; +public: + VM_ShenandoahFullGC(GCCause::Cause gc_cause); VMOp_Type type() const; void doit(); const char* name() const; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -132,6 +132,14 @@ assert_locked_or_safepoint(CodeCache_lock); } +void CollectedHeap::pin_object(oop o) { + // Defaults to no-op +} + +void CollectedHeap::unpin_object(oop o) { + // Defaults to no-op +} + void CollectedHeap::trace_heap(GCWhen::Type when, GCTracer* gc_tracer) { const GCHeapSummary& heap_summary = create_heap_summary(); gc_tracer->report_gc_heap_summary(when, heap_summary); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/gc_interface/collectedHeap.hpp --- a/src/share/vm/gc_interface/collectedHeap.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -613,6 +613,14 @@ virtual void register_nmethod(nmethod* nm); virtual void unregister_nmethod(nmethod* nm); + // The following two methods are there to support object pinning for JNI critical + // regions. They are called whenever a thread enters or leaves a JNI critical + // region and requires an object not to move. Notice that there's another + // mechanism for GCs to implement critical region (see gcLocker.hpp). The default + // implementation does nothing. + virtual void pin_object(oop o); + virtual void unpin_object(oop o); + void trace_heap_before_gc(GCTracer* gc_tracer); void trace_heap_after_gc(GCTracer* gc_tracer); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/memory/barrierSet.cpp --- a/src/share/vm/memory/barrierSet.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/memory/barrierSet.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -42,6 +42,29 @@ } // count is number of array elements being written +void BarrierSet::write_ref_array(HeapWord* start, size_t count) { + assert(count <= (size_t)max_intx, "count too large"); + HeapWord* end = (HeapWord*)((char*)start + (count*heapOopSize)); + // In the case of compressed oops, start and end may potentially be misaligned; + // so we need to conservatively align the first downward (this is not + // strictly necessary for current uses, but a case of good hygiene and, + // if you will, aesthetics) and the second upward (this is essential for + // current uses) to a HeapWord boundary, so we mark all cards overlapping + // this write. If this evolves in the future to calling a + // logging barrier of narrow oop granularity, like the pre-barrier for G1 + // (mentioned here merely by way of example), we will need to change this + // interface, so it is "exactly precise" (if i may be allowed the adverbial + // redundancy for emphasis) and does not include narrow oop slots not + // included in the original write interval. + HeapWord* aligned_start = (HeapWord*)align_size_down((uintptr_t)start, HeapWordSize); + HeapWord* aligned_end = (HeapWord*)align_size_up ((uintptr_t)end, HeapWordSize); + // If compressed oops were not being used, these should already be aligned + assert(UseCompressedOops || (aligned_start == start && aligned_end == end), + "Expected heap word alignment of start and end"); + write_ref_array_work(MemRegion(aligned_start, aligned_end)); +} + +// count is number of array elements being written void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) { // simply delegate to instance method Universe::heap()->barrier_set()->write_ref_array(start, count); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/memory/barrierSet.hpp --- a/src/share/vm/memory/barrierSet.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/memory/barrierSet.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -141,7 +141,7 @@ bool dest_uninitialized = false) {} // Below count is the # array elements being written, starting // at the address "start", which may not necessarily be HeapWord-aligned - inline void write_ref_array(HeapWord* start, size_t count); + virtual void write_ref_array(HeapWord* start, size_t count); // Static versions, suitable for calling from generated code; // count is # array elements being written, starting with "start", diff -r b1cf900aa021 -r 87059e2365be src/share/vm/memory/barrierSet.inline.hpp --- a/src/share/vm/memory/barrierSet.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/memory/barrierSet.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -48,34 +48,6 @@ } } -// count is number of array elements being written -void BarrierSet::write_ref_array(HeapWord* start, size_t count) { - assert(count <= (size_t)max_intx, "count too large"); - HeapWord* end = (HeapWord*)((char*)start + (count*heapOopSize)); - // In the case of compressed oops, start and end may potentially be misaligned; - // so we need to conservatively align the first downward (this is not - // strictly necessary for current uses, but a case of good hygiene and, - // if you will, aesthetics) and the second upward (this is essential for - // current uses) to a HeapWord boundary, so we mark all cards overlapping - // this write. If this evolves in the future to calling a - // logging barrier of narrow oop granularity, like the pre-barrier for G1 - // (mentioned here merely by way of example), we will need to change this - // interface, so it is "exactly precise" (if i may be allowed the adverbial - // redundancy for emphasis) and does not include narrow oop slots not - // included in the original write interval. - HeapWord* aligned_start = (HeapWord*)align_size_down((uintptr_t)start, HeapWordSize); - HeapWord* aligned_end = (HeapWord*)align_size_up ((uintptr_t)end, HeapWordSize); - // If compressed oops were not being used, these should already be aligned - assert(UseCompressedOops || (aligned_start == start && aligned_end == end), - "Expected heap word alignment of start and end"); -#if 0 - warning("Post:\t" INTPTR_FORMAT "[" SIZE_FORMAT "] : [" INTPTR_FORMAT","INTPTR_FORMAT")\t", - start, count, aligned_start, aligned_end); -#endif - write_ref_array_work(MemRegion(aligned_start, aligned_end)); -} - - void BarrierSet::write_region(MemRegion mr) { if (kind() == CardTableModRef) { ((CardTableModRefBS*)this)->inline_write_region(mr); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/memory/genMarkSweep.cpp --- a/src/share/vm/memory/genMarkSweep.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/memory/genMarkSweep.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -179,7 +179,7 @@ void GenMarkSweep::deallocate_stacks() { - if (!UseG1GC && !UseShenandoahGC) { + if (!UseG1GC) { GenCollectedHeap* gch = GenCollectedHeap::heap(); gch->release_scratch(); } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/memory/space.inline.hpp --- a/src/share/vm/memory/space.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/memory/space.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -103,7 +103,7 @@ /* prefetch beyond end */ \ Prefetch::write(end, interval); \ end += block_size(end); \ - } while (end < t && (!block_is_obj(end) || !oop(end)->is_gc_marked())); \ + } while (end < t && (!block_is_obj(end) || !oop(end)->is_gc_marked()));\ \ /* see if we might want to pretend this object is alive so that \ * we don't have to compact quite as often. \ diff -r b1cf900aa021 -r 87059e2365be src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/oops/instanceKlass.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -531,7 +531,6 @@ void InstanceKlass::eager_initialize_impl(instanceKlassHandle this_oop) { EXCEPTION_MARK; oop init_lock = this_oop->init_lock(); - init_lock = oopDesc::bs()->write_barrier(init_lock); ObjectLocker ol(init_lock, THREAD, init_lock != NULL); // abort if someone beat us to the initialization @@ -678,7 +677,6 @@ // verification & rewriting { oop init_lock = this_oop->init_lock(); - init_lock = oopDesc::bs()->write_barrier(init_lock); ObjectLocker ol(init_lock, THREAD, init_lock != NULL); // rewritten will have been set if loader constraint error found // on an earlier link attempt @@ -852,7 +850,6 @@ // Step 1 { oop init_lock = this_oop->init_lock(); - init_lock = oopDesc::bs()->write_barrier(init_lock); ObjectLocker ol(init_lock, THREAD, init_lock != NULL); Thread *self = THREAD; // it's passed the current thread @@ -985,7 +982,6 @@ void InstanceKlass::set_initialization_state_and_notify_impl(instanceKlassHandle this_oop, ClassState state, TRAPS) { oop init_lock = this_oop->init_lock(); - init_lock = oopDesc::bs()->write_barrier(init_lock); ObjectLocker ol(init_lock, THREAD, init_lock != NULL); this_oop->set_init_state(state); this_oop->fence_and_clear_init_lock(); @@ -2589,7 +2585,7 @@ } address InstanceKlass::static_field_addr(int offset) { - return (address)(offset + InstanceMirrorKlass::offset_of_static_fields() + cast_from_oop(java_mirror())); + return (address)(offset + InstanceMirrorKlass::offset_of_static_fields() + cast_from_oop(oopDesc::bs()->write_barrier(java_mirror()))); } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/oops/instanceRefKlass.cpp --- a/src/share/vm/oops/instanceRefKlass.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/oops/instanceRefKlass.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -512,7 +512,7 @@ bool InstanceRefKlass::owns_pending_list_lock(JavaThread* thread) { if (java_lang_ref_Reference::pending_list_lock() == NULL) return false; - Handle h_lock(thread, oopDesc::bs()->write_barrier(java_lang_ref_Reference::pending_list_lock())); + Handle h_lock(thread, java_lang_ref_Reference::pending_list_lock()); return ObjectSynchronizer::current_thread_holds_lock(thread, h_lock); } @@ -525,7 +525,7 @@ // to hold the pending list lock. We want to free this handle. HandleMark hm; - Handle h_lock(THREAD, oopDesc::bs()->write_barrier(java_lang_ref_Reference::pending_list_lock())); + Handle h_lock(THREAD, java_lang_ref_Reference::pending_list_lock()); ObjectSynchronizer::fast_enter(h_lock, pending_list_basic_lock, false, THREAD); assert(ObjectSynchronizer::current_thread_holds_lock( JavaThread::current(), h_lock), @@ -543,7 +543,7 @@ // to hold the pending list lock. We want to free this handle. HandleMark hm; - Handle h_lock(THREAD, oopDesc::bs()->write_barrier(java_lang_ref_Reference::pending_list_lock())); + Handle h_lock(THREAD, java_lang_ref_Reference::pending_list_lock()); assert(ObjectSynchronizer::current_thread_holds_lock( JavaThread::current(), h_lock), "Lock should be held"); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/oops/oop.cpp --- a/src/share/vm/oops/oop.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/oops/oop.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -99,7 +99,7 @@ // slow case; we have to acquire the micro lock in order to locate the header ResetNoHandleMark rnm; // Might be called from LEAF/QUICK ENTRY HandleMark hm; - Handle object(oopDesc::bs()->write_barrier(this)); + Handle object(this); return ObjectSynchronizer::identity_hash_value_for(object); } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/oops/oop.hpp --- a/src/share/vm/oops/oop.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/oops/oop.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -69,10 +69,16 @@ static BarrierSet* _bs; public: - markOop mark() const { return _mark; } + markOop mark() const { + oop p = bs()->read_barrier((oop) this); + return p->_mark; + } markOop* mark_addr() const { return (markOop*) &_mark; } - void set_mark(volatile markOop m) { _mark = m; } + void set_mark(volatile markOop m) { + oop p = bs()->write_barrier(this); + p->_mark = m; + } void release_set_mark(markOop m); markOop cas_set_mark(markOop new_mark, markOop old_mark); @@ -152,13 +158,13 @@ inline static bool safe_equals(oop o1, oop o2) { assert(bs()->is_safe(o1), "o1 not safe?"); - assert(bs()->is_safe(o2), "o1 not safe?"); + assert(bs()->is_safe(o2), "o2 not safe?"); return unsafe_equals(o1, o2); } inline static bool safe_equals(narrowOop o1, narrowOop o2) { assert(bs()->is_safe(o1), "o1 not safe?"); - assert(bs()->is_safe(o2), "o1 not safe?"); + assert(bs()->is_safe(o2), "o2 not safe?"); return unsafe_equals(o1, o2); } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/oops/oop.inline.hpp --- a/src/share/vm/oops/oop.inline.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/oops/oop.inline.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -65,11 +65,13 @@ // We need a separate file to avoid circular references inline void oopDesc::release_set_mark(markOop m) { - OrderAccess::release_store_ptr(&_mark, m); + oop p = bs()->write_barrier(this); + OrderAccess::release_store_ptr(&p->_mark, m); } inline markOop oopDesc::cas_set_mark(markOop new_mark, markOop old_mark) { - return (markOop) Atomic::cmpxchg_ptr(new_mark, &_mark, old_mark); + oop p = bs()->write_barrier(this); + return (markOop) Atomic::cmpxchg_ptr(new_mark, &p->_mark, old_mark); } inline Klass* oopDesc::klass() const { @@ -201,7 +203,7 @@ assert(OopEncodingHeapMax > pd, "change encoding max if new encoding"); uint64_t result = pd >> shift; assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow"); - assert(decode_heap_oop(result) == v, "reversibility"); + assert(oopDesc::unsafe_equals(decode_heap_oop(result), v), "reversibility"); return (narrowOop)result; } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/opto/compile.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1440,7 +1440,7 @@ tj = TypeInstPtr::MARK; ta = TypeAryPtr::RANGE; // generic ignored junk ptr = TypePtr::BotPTR; - } else if (offset == BrooksPointer::BYTE_OFFSET && UseShenandoahGC) { + } else if (offset == BrooksPointer::byte_offset() && UseShenandoahGC) { // Need to distinguish brooks ptr as is. tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset); } else { // Random constant offset into array body @@ -1507,7 +1507,7 @@ if (!is_known_inst) { // Do it only for non-instance types tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset); } - } else if ((offset != BrooksPointer::BYTE_OFFSET || !UseShenandoahGC) && (offset < 0 || offset >= k->size_helper() * wordSize)) { + } else if ((offset != BrooksPointer::byte_offset() || !UseShenandoahGC) && (offset < 0 || offset >= k->size_helper() * wordSize)) { // Static fields are in the space above the normal instance // fields in the java.lang.Class instance. if (to->klass() != ciEnv::current()->Class_klass()) { @@ -1606,7 +1606,7 @@ (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) || (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) || (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr) || - (offset == BrooksPointer::BYTE_OFFSET && tj->base() == Type::AryPtr && UseShenandoahGC), + (offset == BrooksPointer::byte_offset() && tj->base() == Type::AryPtr && UseShenandoahGC), "For oops, klasses, raw offset must be constant; for arrays the offset is never known" ); assert( tj->ptr() != TypePtr::TopPTR && tj->ptr() != TypePtr::AnyNull && diff -r b1cf900aa021 -r 87059e2365be src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/opto/escape.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -2043,7 +2043,7 @@ } else if (adr_type->isa_aryptr()) { if (offset == arrayOopDesc::length_offset_in_bytes()) { // Ignore array length load. - } else if (UseShenandoahGC && offset == BrooksPointer::BYTE_OFFSET) { + } else if (UseShenandoahGC && offset == BrooksPointer::byte_offset()) { // Shenandoah read barrier. bt = T_ARRAY; } else if (find_second_addp(n, n->in(AddPNode::Base)) != NULL) { diff -r b1cf900aa021 -r 87059e2365be src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/opto/graphKit.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -4240,7 +4240,7 @@ if (obj_type->higher_equal(TypePtr::NULL_PTR)) { return obj; } - const TypePtr* adr_type = obj_type->is_ptr()->add_offset(BrooksPointer::BYTE_OFFSET); + const TypePtr* adr_type = obj_type->is_ptr()->add_offset(BrooksPointer::byte_offset()); Node* mem = use_mem ? memory(adr_type) : immutable_memory(); if (! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, obj, mem, use_mem)) { @@ -4294,7 +4294,7 @@ return obj; } const Type* obj_type = obj->bottom_type(); - const TypePtr* adr_type = obj_type->is_ptr()->add_offset(BrooksPointer::BYTE_OFFSET); + const TypePtr* adr_type = obj_type->is_ptr()->add_offset(BrooksPointer::byte_offset()); if (obj_type->meet(TypePtr::NULL_PTR) == obj_type->remove_speculative()) { // We don't know if it's null or not. Need null-check. enum { _not_null_path = 1, _null_path, PATH_LIMIT }; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/opto/macro.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1678,7 +1678,7 @@ if (UseShenandoahGC) { // Initialize Shenandoah brooks pointer to point to the object itself. - rawmem = make_store(control, rawmem, object, BrooksPointer::BYTE_OFFSET, object, T_OBJECT); + rawmem = make_store(control, rawmem, object, BrooksPointer::byte_offset(), object, T_OBJECT); } // Clear the object body, if necessary. diff -r b1cf900aa021 -r 87059e2365be src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/opto/memnode.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1667,7 +1667,7 @@ // as to alignment, which will therefore produce the smallest // possible base offset. const int min_base_off = arrayOopDesc::base_offset_in_bytes(T_BYTE); - const bool off_beyond_header = (off != BrooksPointer::BYTE_OFFSET || !UseShenandoahGC) && ((uint)off >= (uint)min_base_off); + const bool off_beyond_header = (off != BrooksPointer::byte_offset() || !UseShenandoahGC) && ((uint)off >= (uint)min_base_off); // Try to constant-fold a stable array element. if (FoldStableValues && ary->is_stable() && ary->const_oop() != NULL) { diff -r b1cf900aa021 -r 87059e2365be src/share/vm/opto/shenandoahSupport.cpp --- a/src/share/vm/opto/shenandoahSupport.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/opto/shenandoahSupport.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -165,7 +165,7 @@ } else if (current->is_MemBar()) { return false; // TODO: Do we need to stop at *any* membar? } else if (current->is_MergeMem()) { - const TypePtr* adr_type = phase->type(b2)->is_ptr()->add_offset(BrooksPointer::BYTE_OFFSET); + const TypePtr* adr_type = phase->type(b2)->is_ptr()->add_offset(BrooksPointer::byte_offset()); uint alias_idx = phase->C->get_alias_index(adr_type); Node* mem_in = current->as_MergeMem()->memory_at(alias_idx); return dominates_memory_rb_impl(phase, b1, b2, current->in(TypeFunc::Memory), visited); @@ -207,7 +207,7 @@ // If memory input is a MergeMem, take the appropriate slice out of it. Node* mem_in = in(Memory); if (mem_in->isa_MergeMem()) { - const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::BYTE_OFFSET); + const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::byte_offset()); uint alias_idx = phase->C->get_alias_index(adr_type); mem_in = mem_in->as_MergeMem()->memory_at(alias_idx); set_req(Memory, mem_in); @@ -264,7 +264,7 @@ Node* mem_in = in(Memory); if (mem_in->isa_MergeMem()) { - const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::BYTE_OFFSET); + const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::byte_offset()); uint alias_idx = phase->C->get_alias_index(adr_type); mem_in = mem_in->as_MergeMem()->memory_at(alias_idx); set_req(Memory, mem_in); @@ -343,7 +343,7 @@ } else if (current->is_MemBar()) { return dominates_memory_impl(phase, b1, b2, current->in(TypeFunc::Memory), visited); } else if (current->is_MergeMem()) { - const TypePtr* adr_type = phase->type(b2)->is_ptr()->add_offset(BrooksPointer::BYTE_OFFSET); + const TypePtr* adr_type = phase->type(b2)->is_ptr()->add_offset(BrooksPointer::byte_offset()); uint alias_idx = phase->C->get_alias_index(adr_type); Node* mem_in = current->as_MergeMem()->memory_at(alias_idx); return dominates_memory_impl(phase, b1, b2, current->in(TypeFunc::Memory), visited); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/opto/shenandoahSupport.hpp --- a/src/share/vm/opto/shenandoahSupport.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/opto/shenandoahSupport.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -59,8 +59,8 @@ static Node* skip_through_barrier(Node* n); virtual const class TypePtr* adr_type() const { - const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::BYTE_OFFSET); - assert(adr_type->offset() == BrooksPointer::BYTE_OFFSET, "sane offset"); + const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::byte_offset()); + assert(adr_type->offset() == BrooksPointer::byte_offset(), "sane offset"); assert(Compile::current()->alias_type(adr_type)->is_rewritable(), "brooks ptr must be rewritable"); return adr_type; } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/prims/jni.cpp --- a/src/share/vm/prims/jni.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/prims/jni.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -391,9 +391,9 @@ if (UsePerfData && !class_loader.is_null()) { // check whether the current caller thread holds the lock or not. // If not, increment the corresponding counter - Handle class_loader1 (THREAD, oopDesc::bs()->write_barrier(class_loader())); + Handle class_loader1 (THREAD, class_loader()); if (ObjectSynchronizer:: - query_lock_ownership((JavaThread*)THREAD, class_loader1) != + query_lock_ownership((JavaThread*)THREAD, class_loader) != ObjectSynchronizer::owner_self) { ClassLoader::sync_JNIDefineClassLockFreeCounter()->inc(); } @@ -4153,7 +4153,7 @@ THROW_(vmSymbols::java_lang_NullPointerException(), JNI_ERR); } - Handle obj(thread, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(jobj))); + Handle obj(thread, JNIHandles::resolve_non_null(jobj)); ObjectSynchronizer::jni_enter(obj, CHECK_(JNI_ERR)); ret = JNI_OK; return ret; @@ -4181,7 +4181,7 @@ THROW_(vmSymbols::java_lang_NullPointerException(), JNI_ERR); } - Handle obj(THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(jobj))); + Handle obj(THREAD, JNIHandles::resolve_non_null(jobj)); ObjectSynchronizer::jni_exit(obj(), CHECK_(JNI_ERR)); ret = JNI_OK; @@ -4273,6 +4273,7 @@ } oop a = JNIHandles::resolve_non_null(array); a = oopDesc::bs()->write_barrier(a); + Universe::heap()->pin_object(a); assert(a->is_array(), "just checking"); BasicType type; if (a->is_objArray()) { @@ -4300,6 +4301,21 @@ env, array, carray, mode); #endif /* USDT2 */ // The array, carray and mode arguments are ignored + oop a = JNIHandles::resolve_non_null(array); + a = oopDesc::bs()->read_barrier(a); +#ifdef ASSERT + assert(a->is_array(), "just checking"); + BasicType type; + if (a->is_objArray()) { + type = T_OBJECT; + } else { + type = TypeArrayKlass::cast(a->klass())->element_type(); + } + void* ret = arrayOop(a)->base(type); + assert(ret == carray, "check array not moved"); +#endif + + Universe::heap()->unpin_object(a); GC_locker::unlock_critical(thread); #ifndef USDT2 DTRACE_PROBE(hotspot_jni, ReleasePrimitiveArrayCritical__return); @@ -4323,6 +4339,8 @@ *isCopy = JNI_FALSE; } oop s = JNIHandles::resolve_non_null(string); + s = oopDesc::bs()->write_barrier(s); + Universe::heap()->pin_object(s); int s_len = java_lang_String::length(s); typeArrayOop s_value = java_lang_String::value(s); int s_offset = java_lang_String::offset(s); @@ -4351,6 +4369,9 @@ env, str, (uint16_t *) chars); #endif /* USDT2 */ // The str and chars arguments are ignored + oop s = JNIHandles::resolve_non_null(str); + s = oopDesc::bs()->read_barrier(s); + Universe::heap()->unpin_object(s); GC_locker::unlock_critical(thread); #ifndef USDT2 DTRACE_PROBE(hotspot_jni, ReleaseStringCritical__return); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/prims/jvm.cpp --- a/src/share/vm/prims/jvm.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/prims/jvm.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -542,13 +542,13 @@ JVM_ENTRY(jint, JVM_IHashCode(JNIEnv* env, jobject handle)) JVMWrapper("JVM_IHashCode"); // as implemented in the classic virtual machine; return 0 if object is NULL - return handle == NULL ? 0 : ObjectSynchronizer::FastHashCode (THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(handle))) ; + return handle == NULL ? 0 : ObjectSynchronizer::FastHashCode (THREAD, JNIHandles::resolve_non_null(handle)) ; JVM_END JVM_ENTRY(void, JVM_MonitorWait(JNIEnv* env, jobject handle, jlong ms)) JVMWrapper("JVM_MonitorWait"); - Handle obj(THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(handle))); + Handle obj(THREAD, JNIHandles::resolve_non_null(handle)); JavaThreadInObjectWaitState jtiows(thread, ms != 0); if (JvmtiExport::should_post_monitor_wait()) { JvmtiExport::post_monitor_wait((JavaThread *)THREAD, (oop)obj(), ms); @@ -565,21 +565,21 @@ JVM_ENTRY(void, JVM_MonitorNotify(JNIEnv* env, jobject handle)) JVMWrapper("JVM_MonitorNotify"); - Handle obj(THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(handle))); + Handle obj(THREAD, JNIHandles::resolve_non_null(handle)); ObjectSynchronizer::notify(obj, CHECK); JVM_END JVM_ENTRY(void, JVM_MonitorNotifyAll(JNIEnv* env, jobject handle)) JVMWrapper("JVM_MonitorNotifyAll"); - Handle obj(THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(handle))); + Handle obj(THREAD, JNIHandles::resolve_non_null(handle)); ObjectSynchronizer::notifyall(obj, CHECK); JVM_END JVM_ENTRY(jobject, JVM_Clone(JNIEnv* env, jobject handle)) JVMWrapper("JVM_Clone"); - Handle obj(THREAD, oopDesc::bs()->read_barrier(JNIHandles::resolve_non_null(handle))); + Handle obj(THREAD, JNIHandles::resolve_non_null(handle)); const KlassHandle klass (THREAD, obj->klass()); JvmtiVMObjectAllocEventCollector oam; @@ -621,7 +621,7 @@ // The same is true of StubRoutines::object_copy and the various oop_copy // variants, and of the code generated by the inline_native_clone intrinsic. assert(MinObjAlignmentInBytes >= BytesPerLong, "objects misaligned"); - Copy::conjoint_jlongs_atomic((jlong*)obj(), (jlong*)new_obj_oop, + Copy::conjoint_jlongs_atomic((jlong*) oopDesc::bs()->read_barrier(obj()), (jlong*)new_obj_oop, (size_t)align_object_size(size) / HeapWordsPerLong); // Clear the header new_obj_oop->init_mark(); @@ -1021,7 +1021,6 @@ ClassFileStream st((u1*) buf, len, (char *)source); Handle class_loader (THREAD, JNIHandles::resolve(loader)); if (UsePerfData) { - Handle class_loader1 (THREAD, oopDesc::bs()->write_barrier(class_loader())); is_lock_held_by_thread(class_loader, ClassLoader::sync_JVMDefineClassLockFreeCounter(), THREAD); @@ -1086,8 +1085,7 @@ // us to pass the NULL as the initiating class loader. Handle h_loader(THREAD, JNIHandles::resolve(loader)); if (UsePerfData) { - Handle h_loader1(THREAD, oopDesc::bs()->write_barrier(h_loader())); - is_lock_held_by_thread(h_loader1, + is_lock_held_by_thread(h_loader, ClassLoader::sync_JVMFindLoadedClassLockFreeCounter(), THREAD); } @@ -3065,7 +3063,7 @@ if (receiver != NULL) { // Check if exception is getting thrown at self (use oop equality, since the // target object might exit) - if (java_thread == thread->threadObj()) { + if (oopDesc::equals(java_thread, thread->threadObj())) { THROW_OOP(java_throwable); } else { // Enques a VM_Operation to stop all threads and then deliver the exception... @@ -3351,7 +3349,7 @@ if (obj == NULL) { THROW_(vmSymbols::java_lang_NullPointerException(), JNI_FALSE); } - Handle h_obj(THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve(obj))); + Handle h_obj(THREAD, JNIHandles::resolve(obj)); return ObjectSynchronizer::current_thread_holds_lock((JavaThread*)THREAD, h_obj); JVM_END diff -r b1cf900aa021 -r 87059e2365be src/share/vm/prims/jvmtiEnv.cpp --- a/src/share/vm/prims/jvmtiEnv.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/prims/jvmtiEnv.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -468,7 +468,7 @@ // lock the loader Thread* thread = Thread::current(); HandleMark hm; - Handle loader_lock = Handle(thread, oopDesc::bs()->write_barrier(SystemDictionary::system_loader_lock())); + Handle loader_lock = Handle(thread, SystemDictionary::system_loader_lock()); ObjectLocker ol(loader_lock, thread); @@ -514,7 +514,7 @@ // lock the loader Thread* THREAD = Thread::current(); - Handle loader = Handle(THREAD, oopDesc::bs()->write_barrier(SystemDictionary::java_system_loader())); + Handle loader = Handle(THREAD, SystemDictionary::java_system_loader()); ObjectLocker ol(loader, THREAD); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/prims/unsafe.cpp --- a/src/share/vm/prims/unsafe.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/prims/unsafe.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -375,8 +375,8 @@ return v; } else { - Handle p (THREAD, oopDesc::bs()->read_barrier(JNIHandles::resolve(obj))); - jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset)); + Handle p (THREAD, JNIHandles::resolve(obj)); + jlong* addr = (jlong*)(index_oop_from_field_offset_long(oopDesc::bs()->read_barrier(p()), offset)); MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag); jlong value = Atomic::load(addr); return value; @@ -391,8 +391,8 @@ SET_FIELD_VOLATILE(obj, offset, jlong, x); } else { - Handle p (THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve(obj))); - jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset)); + Handle p (THREAD, JNIHandles::resolve(obj)); + jlong* addr = (jlong*)(index_oop_from_field_offset_long(oopDesc::bs()->write_barrier(p()), offset)); MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag); Atomic::store(x, addr); } @@ -503,8 +503,8 @@ SET_FIELD_VOLATILE(obj, offset, jlong, x); } else { - Handle p (THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve(obj))); - jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset)); + Handle p (THREAD, JNIHandles::resolve(obj)); + jlong* addr = (jlong*)(index_oop_from_field_offset_long(oopDesc::bs()->write_barrier(p()), offset)); MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag); Atomic::store(x, addr); } @@ -1193,7 +1193,7 @@ if (jobj == NULL) { THROW(vmSymbols::java_lang_NullPointerException()); } - Handle obj(thread, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(jobj))); + Handle obj(thread, JNIHandles::resolve_non_null(jobj)); ObjectSynchronizer::jni_enter(obj, CHECK); } UNSAFE_END @@ -1205,7 +1205,7 @@ if (jobj == NULL) { THROW_(vmSymbols::java_lang_NullPointerException(), JNI_FALSE); } - Handle obj(thread, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(jobj))); + Handle obj(thread, JNIHandles::resolve_non_null(jobj)); bool res = ObjectSynchronizer::jni_try_enter(obj, CHECK_0); return (res ? JNI_TRUE : JNI_FALSE); } @@ -1218,7 +1218,7 @@ if (jobj == NULL) { THROW(vmSymbols::java_lang_NullPointerException()); } - Handle obj(THREAD, oopDesc::bs()->write_barrier(JNIHandles::resolve_non_null(jobj))); + Handle obj(THREAD, JNIHandles::resolve_non_null(jobj)); ObjectSynchronizer::jni_exit(obj(), CHECK); } UNSAFE_END @@ -1236,26 +1236,32 @@ UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h)) UnsafeWrapper("Unsafe_CompareAndSwapObject"); - // We are about to write to this entry so check to see if we need to copy it. - oop p = oopDesc::bs()->write_barrier(JNIHandles::resolve(obj)); + oop x = JNIHandles::resolve(x_h); + oop e = JNIHandles::resolve(e_h); + oop p = JNIHandles::resolve(obj); + + p = oopDesc::bs()->write_barrier(p); + x = oopDesc::bs()->read_barrier(x); + HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset); - oop x = JNIHandles::resolve(x_h); - x = oopDesc::bs()->read_barrier(x); - oop old = JNIHandles::resolve(e_h); jboolean success; if (UseShenandoahGC) { oop expected; do { - expected = old; - old = oopDesc::atomic_compare_exchange_oop(x, addr, expected, true); - success = oopDesc::unsafe_equals(old, expected); - } while ((! success) && oopDesc::unsafe_equals(oopDesc::bs()->read_barrier(old), oopDesc::bs()->read_barrier(expected))); + expected = e; + e = oopDesc::atomic_compare_exchange_oop(x, addr, expected, true); + success = oopDesc::unsafe_equals(e, expected); + } while ((! success) && oopDesc::unsafe_equals(oopDesc::bs()->read_barrier(e), oopDesc::bs()->read_barrier(expected))); } else { - success = oopDesc::unsafe_equals(old, oopDesc::atomic_compare_exchange_oop(x, addr, old, true)); + success = oopDesc::unsafe_equals(e, oopDesc::atomic_compare_exchange_oop(x, addr, e, true)); } - if (success) - update_barrier_set((void*)addr, x); - return success; + if (! success) { + return false; + } + + update_barrier_set((void*)addr, x); + + return true; UNSAFE_END UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x)) diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/arguments.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1498,11 +1498,6 @@ // the only value that can override MaxHeapSize if we are // to use UseCompressedOops is InitialHeapSize. size_t max_heap_size = MAX2(MaxHeapSize, InitialHeapSize); - if (UseShenandoahGC && FLAG_IS_DEFAULT(UseCompressedOops)) { - warning("Compressed Oops not supported with ShenandoahGC"); - FLAG_SET_ERGO(bool, UseCompressedOops, false); - FLAG_SET_ERGO(bool, UseCompressedClassPointers, false); - } if (max_heap_size <= max_heap_for_compressed_oops()) { #if !defined(COMPILER1) || defined(TIERED) @@ -1570,8 +1565,6 @@ } else if (UseG1GC) { heap_alignment = G1CollectedHeap::conservative_max_heap_alignment(); } else if (UseShenandoahGC) { - // TODO: This sucks. Can't we have a clean interface to call the GC's collector - // policy for this? heap_alignment = ShenandoahHeap::conservative_max_heap_alignment(); } #endif // INCLUDE_ALL_GCS @@ -1730,13 +1723,9 @@ UNSUPPORTED_OPTION(UseShenandoahGC); #endif - FLAG_SET_DEFAULT(UseDynamicNumberOfGCThreads, true); FLAG_SET_DEFAULT(ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads()); - FLAG_SET_DEFAULT(VerifyBeforeExit, false); - FLAG_SET_DEFAULT(ClassUnloadingWithConcurrentMark, false); - if (FLAG_IS_DEFAULT(ConcGCThreads)) { uint conc_threads = MAX2((uintx) 1, ParallelGCThreads); FLAG_SET_DEFAULT(ConcGCThreads, conc_threads); @@ -1745,6 +1734,14 @@ if (FLAG_IS_DEFAULT(ParallelRefProcEnabled)) { FLAG_SET_DEFAULT(ParallelRefProcEnabled, true); } + + if (AlwaysPreTouch) { + // Shenandoah handles pre-touch on its own. It does not let the + // generic storage code to do the pre-touch before Shenandoah has + // a chance to do it on its own. + FLAG_SET_DEFAULT(AlwaysPreTouch, false); + FLAG_SET_DEFAULT(ShenandoahAlwaysPreTouch, true); + } } #if !INCLUDE_ALL_GCS diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/biasedLocking.cpp --- a/src/share/vm/runtime/biasedLocking.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/biasedLocking.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -211,7 +211,7 @@ BasicLock* highest_lock = NULL; for (int i = 0; i < cached_monitor_info->length(); i++) { MonitorInfo* mon_info = cached_monitor_info->at(i); - if (oopDesc::safe_equals(mon_info->owner(), obj)) { + if (oopDesc::equals(mon_info->owner(), obj)) { if (TraceBiasedLocking && Verbose) { tty->print_cr(" mon_info->owner (" PTR_FORMAT ") == obj (" PTR_FORMAT ")", p2i((void *) mon_info->owner()), @@ -543,7 +543,7 @@ // the bias of the object. markOop biased_value = mark; markOop unbiased_prototype = markOopDesc::prototype()->set_age(mark->age()); - markOop res_mark = (markOop) Atomic::cmpxchg_ptr(unbiased_prototype, obj->mark_addr(), mark); + markOop res_mark = obj->cas_set_mark(unbiased_prototype, mark); if (res_mark == biased_value) { return BIAS_REVOKED; } @@ -558,8 +558,8 @@ // by another thread so we simply return and let the caller deal // with it. markOop biased_value = mark; - markOop res_mark = (markOop) Atomic::cmpxchg_ptr(prototype_header, obj->mark_addr(), mark); - assert(!(*(obj->mark_addr()))->has_bias_pattern(), "even if we raced, should still be revoked"); + markOop res_mark = obj->cas_set_mark(prototype_header, mark); + assert(! obj->mark()->has_bias_pattern(), "even if we raced, should still be revoked"); return BIAS_REVOKED; } else if (prototype_header->bias_epoch() != mark->bias_epoch()) { // The epoch of this biasing has expired indicating that the @@ -573,14 +573,14 @@ assert(THREAD->is_Java_thread(), ""); markOop biased_value = mark; markOop rebiased_prototype = markOopDesc::encode((JavaThread*) THREAD, mark->age(), prototype_header->bias_epoch()); - markOop res_mark = (markOop) Atomic::cmpxchg_ptr(rebiased_prototype, obj->mark_addr(), mark); + markOop res_mark = obj->cas_set_mark(rebiased_prototype, mark); if (res_mark == biased_value) { return BIAS_REVOKED_AND_REBIASED; } } else { markOop biased_value = mark; markOop unbiased_prototype = markOopDesc::prototype()->set_age(mark->age()); - markOop res_mark = (markOop) Atomic::cmpxchg_ptr(unbiased_prototype, obj->mark_addr(), mark); + markOop res_mark = obj->cas_set_mark(unbiased_prototype, mark); if (res_mark == biased_value) { return BIAS_REVOKED; } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/deoptimization.cpp --- a/src/share/vm/runtime/deoptimization.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/deoptimization.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -990,7 +990,7 @@ if (mon_info->eliminated()) { assert(!mon_info->owner_is_scalar_replaced() || realloc_failures, "reallocation was missed"); if (!mon_info->owner_is_scalar_replaced()) { - Handle obj = Handle(oopDesc::bs()->write_barrier(mon_info->owner())); + Handle obj = Handle(mon_info->owner()); markOop mark = obj->mark(); if (UseBiasedLocking && mark->has_bias_pattern()) { // New allocated objects may have the mark set to anonymously biased. @@ -1113,7 +1113,7 @@ for (int j = 0; j < monitors->number_of_monitors(); j++) { BasicObjectLock* src = monitors->at(j); if (src->obj() != NULL) { - ObjectSynchronizer::fast_exit(oopDesc::bs()->write_barrier(src->obj()), src->lock(), thread); + ObjectSynchronizer::fast_exit(src->obj(), src->lock(), thread); } } array->element(i)->free_monitors(thread); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/fieldDescriptor.hpp --- a/src/share/vm/runtime/fieldDescriptor.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/fieldDescriptor.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -98,6 +98,7 @@ bool is_static() const { return access_flags().is_static(); } bool is_final() const { return access_flags().is_final(); } + bool is_stable() const { return access_flags().is_stable(); } bool is_volatile() const { return access_flags().is_volatile(); } bool is_transient() const { return access_flags().is_transient(); } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/init.cpp --- a/src/share/vm/runtime/init.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/init.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -54,6 +54,7 @@ void os_init_globals(); // depends on VM_Version_init, before universe_init void stubRoutines_init1(); jint universe_init(); // depends on codeCache_init and stubRoutines_init +void stubRoutines_init3(); // note: StubRoutines need 3-phase init void interpreter_init(); // before any methods loaded void invocationCounter_init(); // before any methods loaded void marksweep_init(); @@ -106,6 +107,8 @@ if (status != JNI_OK) return status; + stubRoutines_init3(); + interpreter_init(); // before any methods loaded invocationCounter_init(); // before any methods loaded marksweep_init(); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/mutexLocker.cpp --- a/src/share/vm/runtime/mutexLocker.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/mutexLocker.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -52,9 +52,6 @@ Mutex* JvmtiThreadState_lock = NULL; Monitor* JvmtiPendingEvent_lock = NULL; Monitor* Heap_lock = NULL; -Monitor* ShenandoahFullGC_lock = NULL; -Monitor* ShenandoahJNICritical_lock = NULL; -Monitor* ShenandoahMemProtect_lock = NULL; Mutex* ExpandHeap_lock = NULL; Mutex* AdapterHandlerLibrary_lock = NULL; Mutex* SignatureHandlerLibrary_lock = NULL; @@ -211,9 +208,6 @@ def(SATB_Q_FL_lock , Mutex , special, true); def(SATB_Q_CBL_mon , Monitor, nonleaf, true); def(Shared_SATB_Q_lock , Mutex, nonleaf, true); - def(ShenandoahFullGC_lock , Monitor, leaf, true); - def(ShenandoahJNICritical_lock , Monitor, leaf, true); - def(ShenandoahMemProtect_lock , Monitor, native, false); } def(ParGCRareEvent_lock , Mutex , leaf , true ); def(DerivedPointerTableGC_lock , Mutex, leaf, true ); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/mutexLocker.hpp --- a/src/share/vm/runtime/mutexLocker.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/mutexLocker.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -60,9 +60,6 @@ extern Mutex* JvmtiThreadState_lock; // a lock on modification of JVMTI thread data extern Monitor* JvmtiPendingEvent_lock; // a lock on the JVMTI pending events list extern Monitor* Heap_lock; // a lock on the heap -extern Monitor* ShenandoahFullGC_lock; // a monitor to wait/notify the Shenandoah background thread on full-GC requests -extern Monitor* ShenandoahJNICritical_lock; // a monitor to wait/notify the Shenandoah background thread on JNI critical events -extern Monitor* ShenandoahMemProtect_lock; // ShenandoahGC uses this for memory protection to verify operations on the heap. extern Mutex* ExpandHeap_lock; // a lock on expanding the heap extern Mutex* AdapterHandlerLibrary_lock; // a lock on the AdapterHandlerLibrary extern Mutex* SignatureHandlerLibrary_lock; // a lock on the SignatureHandlerLibrary diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/objectMonitor.cpp --- a/src/share/vm/runtime/objectMonitor.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/objectMonitor.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -2549,6 +2549,10 @@ InitDone = 1 ; } +void* ObjectMonitor::cas_set_object(void* obj, void* expected) { + return Atomic::cmpxchg_ptr(obj, (volatile void*)&_object, expected); +} + #ifndef PRODUCT void ObjectMonitor::verify() { } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/objectMonitor.hpp --- a/src/share/vm/runtime/objectMonitor.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/objectMonitor.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -185,6 +185,7 @@ void* object() const; void* object_addr(); void set_object(void* obj); + void* cas_set_object(void* obj, void* expected_obj); bool check(TRAPS); // true if the thread owns the monitor. void check_slow(TRAPS); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/os.hpp --- a/src/share/vm/runtime/os.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/os.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -28,7 +28,6 @@ #include "jvmtifiles/jvmti.h" #include "runtime/atomic.hpp" #include "runtime/extendedPC.hpp" -#include "runtime/handles.hpp" #include "utilities/top.hpp" #ifdef TARGET_OS_FAMILY_linux # include "jvm_linux.h" @@ -54,6 +53,7 @@ #endif class AgentLibrary; +class methodHandle; // os defines the interface to operating system; this includes traditional // OS services (time, I/O) as well as other functionality with system- diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/safepoint.cpp --- a/src/share/vm/runtime/safepoint.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/safepoint.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -80,7 +80,6 @@ #if INCLUDE_ALL_GCS #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp" #include "gc_implementation/shared/suspendibleThreadSet.hpp" -#include "gc_implementation/shenandoah/shenandoahConcurrentThread.hpp" #endif // INCLUDE_ALL_GCS #ifdef COMPILER1 #include "c1/c1_globals.hpp" diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/sharedRuntime.cpp --- a/src/share/vm/runtime/sharedRuntime.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/sharedRuntime.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1863,7 +1863,7 @@ int SharedRuntime::_monitor_enter_ctr=0; #endif JRT_ENTRY_NO_ASYNC(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj, BasicLock* lock, JavaThread* thread)) - oop obj(oopDesc::bs()->write_barrier(_obj)); + oop obj(_obj); #ifndef PRODUCT _monitor_enter_ctr++; // monitor enter slow #endif @@ -1885,7 +1885,7 @@ #endif // Handles the uncommon cases of monitor unlocking in compiled code JRT_LEAF(void, SharedRuntime::complete_monitor_unlocking_C(oopDesc* _obj, BasicLock* lock)) - oop obj(oopDesc::bs()->write_barrier(_obj)); + oop obj(_obj); #ifndef PRODUCT _monitor_exit_ctr++; // monitor exit slow #endif diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/stubRoutines.cpp --- a/src/share/vm/runtime/stubRoutines.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/stubRoutines.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -43,6 +43,7 @@ BufferBlob* StubRoutines::_code1 = NULL; BufferBlob* StubRoutines::_code2 = NULL; +BufferBlob* StubRoutines::_code3 = NULL; address StubRoutines::_call_stub_return_address = NULL; address StubRoutines::_call_stub_entry = NULL; @@ -158,13 +159,15 @@ address StubRoutines::_safefetchN_continuation_pc = NULL; #endif +address StubRoutines::_shenandoah_wb_C = NULL; + // Initialization // // Note: to break cycle with universe initialization, stubs are generated in two phases. // The first one generates stubs needed during universe init (e.g., _handle_must_compile_first_entry). // The second phase includes all other stubs (which may depend on universe being initialized.) -extern void StubGenerator_generate(CodeBuffer* code, bool all); // only interface to generators +extern void StubGenerator_generate(CodeBuffer* code, int phase); // only interface to generators void StubRoutines::initialize1() { if (_code1 == NULL) { @@ -175,7 +178,7 @@ vm_exit_out_of_memory(code_size1, OOM_MALLOC_ERROR, "CodeCache: no room for StubRoutines (1)"); } CodeBuffer buffer(_code1); - StubGenerator_generate(&buffer, false); + StubGenerator_generate(&buffer, 1); } } @@ -230,7 +233,7 @@ vm_exit_out_of_memory(code_size2, OOM_MALLOC_ERROR, "CodeCache: no room for StubRoutines (2)"); } CodeBuffer buffer(_code2); - StubGenerator_generate(&buffer, true); + StubGenerator_generate(&buffer, 2); } #ifdef ASSERT @@ -311,9 +314,22 @@ #endif } +void StubRoutines::initialize3() { + if (UseShenandoahGC && _code3 == NULL) { + ResourceMark rm; + TraceTime timer("StubRoutines generation 3", TraceStartupTime); + _code3 = BufferBlob::create("StubRoutines (3)", code_size3); + if (_code3 == NULL) { + vm_exit_out_of_memory(code_size3, OOM_MALLOC_ERROR, "CodeCache: no room for StubRoutines (3)"); + } + CodeBuffer buffer(_code3); + StubGenerator_generate(&buffer, 3); + } +} void stubRoutines_init1() { StubRoutines::initialize1(); } void stubRoutines_init2() { StubRoutines::initialize2(); } +void stubRoutines_init3() { StubRoutines::initialize3(); } // // Default versions of arraycopy functions diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/stubRoutines.hpp --- a/src/share/vm/runtime/stubRoutines.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/stubRoutines.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -154,6 +154,7 @@ static BufferBlob* _code1; // code buffer for initial routines static BufferBlob* _code2; // code buffer for all other routines + static BufferBlob* _code3; // code buffer for all other routines // Leaf routines which implement arraycopy and their addresses // arraycopy operands aligned on element type boundary @@ -243,21 +244,26 @@ static address _safefetchN_continuation_pc; #endif + static address _shenandoah_wb_C; + public: // Initialization/Testing static void initialize1(); // must happen before universe::genesis static void initialize2(); // must happen after universe::genesis + static void initialize3(); // must happen before interpreter, after universe::genesis static bool is_stub_code(address addr) { return contains(addr); } static bool contains(address addr) { return (_code1 != NULL && _code1->blob_contains(addr)) || - (_code2 != NULL && _code2->blob_contains(addr)) ; + (_code2 != NULL && _code1->blob_contains(addr)) || + (_code3 != NULL && _code2->blob_contains(addr)) ; } static CodeBlob* code1() { return _code1; } static CodeBlob* code2() { return _code2; } + static CodeBlob* code3() { return _code3; } // Debugging static jint verify_oop_count() { return _verify_oop_count; } @@ -462,6 +468,11 @@ static void arrayof_jlong_copy (HeapWord* src, HeapWord* dest, size_t count); static void arrayof_oop_copy (HeapWord* src, HeapWord* dest, size_t count); static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count); + + static address shenandoah_wb_C() + { + return _shenandoah_wb_C; + } }; #ifndef BUILTIN_SIM diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/synchronizer.cpp --- a/src/share/vm/runtime/synchronizer.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/synchronizer.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -157,6 +157,7 @@ static volatile int MonitorFreeCount = 0 ; // # on gFreeList static volatile int MonitorPopulation = 0 ; // # Extant -- in circulation #define CHAINMARKER (cast_to_oop(-1)) +#define CLAIMEDMARKER (cast_to_oop(-2)) // ----------------------------------------------------------------------------- // Fast Monitor Enter/Exit @@ -209,7 +210,7 @@ // swing the displaced header from the box back to the mark. if (mark == (markOop) lock) { assert (dhw->is_neutral(), "invariant") ; - if ((markOop) Atomic::cmpxchg_ptr (dhw, object->mark_addr(), mark) == mark) { + if (object->cas_set_mark(dhw, mark) == mark) { TEVENT (fast_exit: release stacklock) ; return; } @@ -231,7 +232,7 @@ // Anticipate successful CAS -- the ST of the displaced mark must // be visible <= the ST performed by the CAS. lock->set_displaced_header(mark); - if (mark == (markOop) Atomic::cmpxchg_ptr(lock, obj()->mark_addr(), mark)) { + if (mark == obj()->cas_set_mark((markOop) lock, mark)) { TEVENT (slow_enter: release stacklock) ; return ; } @@ -647,7 +648,7 @@ hash = get_next_hash(Self, obj); // allocate a new hash code temp = mark->copy_set_hash(hash); // merge the hash code into header // use (machine word version) atomic operation to install the hash - test = (markOop) Atomic::cmpxchg_ptr(temp, obj->mark_addr(), mark); + test = obj->cas_set_mark(temp, mark); if (test == mark) { return hash; } @@ -844,9 +845,9 @@ // Get the next block in the block list. static inline ObjectMonitor* next(ObjectMonitor* block) { - assert(block->object() == CHAINMARKER, "must be a block header"); + assert(block->object() == CHAINMARKER || block->object() == CLAIMEDMARKER, "must be a valid block header"); block = block->FreeNext ; - assert(block == NULL || block->object() == CHAINMARKER, "must be a block header"); + assert(block == NULL || block->object() == CHAINMARKER || block->object() == CLAIMEDMARKER, "must be a valid block header"); return block; } @@ -1212,7 +1213,7 @@ if (mark->has_monitor()) { ObjectMonitor * inf = mark->monitor() ; assert (inf->header()->is_neutral(), "invariant"); - assert (inf->object() == object, "invariant") ; + assert (oopDesc::equals((oop) inf->object(), object), "invariant") ; assert (ObjectSynchronizer::verify_objmon_isinpool(inf), "monitor is invalid"); return inf ; } @@ -1259,7 +1260,7 @@ m->_recursions = 0 ; m->_SpinDuration = ObjectMonitor::Knob_SpinLimit ; // Consider: maintain by type/class - markOop cmp = (markOop) Atomic::cmpxchg_ptr (markOopDesc::INFLATING(), object->mark_addr(), mark) ; + markOop cmp = object->cas_set_mark(markOopDesc::INFLATING(), mark); if (cmp != mark) { omRelease (Self, m, true) ; continue ; // Interference -- just retry @@ -1352,7 +1353,7 @@ m->_Responsible = NULL ; m->_SpinDuration = ObjectMonitor::Knob_SpinLimit ; // consider: keep metastats by type/class - if (Atomic::cmpxchg_ptr (markOopDesc::encode(m), object->mark_addr(), mark) != mark) { + if (object->cas_set_mark(markOopDesc::encode(m), mark) != mark) { m->set_object (NULL) ; m->set_owner (NULL) ; m->OwnerIsThread = 0 ; @@ -1679,3 +1680,58 @@ } #endif + +ParallelObjectSynchronizerIterator ObjectSynchronizer::parallel_iterator() { + return ParallelObjectSynchronizerIterator(gBlockList); +} + +// ParallelObjectSynchronizerIterator implementation +ParallelObjectSynchronizerIterator::ParallelObjectSynchronizerIterator(ObjectMonitor * head) + : _head(head), _cur(head) { + assert(SafepointSynchronize::is_at_safepoint(), "Must at safepoint"); +} + +ParallelObjectSynchronizerIterator::~ParallelObjectSynchronizerIterator() { + assert(SafepointSynchronize::is_at_safepoint(), "Must at safepoint"); + ObjectMonitor* block = _head; + for (; block != NULL; block = next(block)) { + assert(block->object() == CLAIMEDMARKER, "Must be a claimed block"); + // Restore chainmarker + block->set_object(CHAINMARKER); + } +} + +void* ParallelObjectSynchronizerIterator::claim() { + ObjectMonitor* my_cur = _cur; + ObjectMonitor* next_block; + + while (true) { + if (my_cur == NULL) return NULL; + + if (my_cur->object() == CHAINMARKER) { + if (my_cur->cas_set_object(CLAIMEDMARKER, CHAINMARKER) == CHAINMARKER) { + return (void*)my_cur; + } + } else { + assert(my_cur->object() == CLAIMEDMARKER, "Must be"); + } + + next_block = next(my_cur); + my_cur = (ObjectMonitor*) Atomic::cmpxchg_ptr(next_block, &_cur, my_cur); + } +} + +bool ParallelObjectSynchronizerIterator::parallel_oops_do(OopClosure* f) { + ObjectMonitor* block = (ObjectMonitor*) claim(); + if (block != NULL) { + assert(block->object() == CLAIMEDMARKER, "Must be a claimed block"); + for (int i = 1; i < ObjectSynchronizer::_BLOCKSIZE; i++) { + ObjectMonitor* mid = &block[i]; + if (mid->object() != NULL) { + f->do_oop((oop*) mid->object_addr()); + } + } + return true; + } + return false; +} diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/synchronizer.hpp --- a/src/share/vm/runtime/synchronizer.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/synchronizer.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -34,8 +34,25 @@ class ObjectMonitor; +class ParallelObjectSynchronizerIterator VALUE_OBJ_CLASS_SPEC { + friend class ObjectSynchronizer; + + private: + ObjectMonitor* _head; + ObjectMonitor* volatile _cur; + + private: + ParallelObjectSynchronizerIterator(ObjectMonitor* head); + void* claim(); + + public: + ~ParallelObjectSynchronizerIterator(); + bool parallel_oops_do(OopClosure* f); +}; + class ObjectSynchronizer : AllStatic { friend class VMStructs; + friend class ParallelObjectSynchronizerIterator; public: typedef enum { owner_self, @@ -120,6 +137,9 @@ ObjectMonitor** FreeTailp); static void oops_do(OopClosure* f); + // Parallel GC support + static ParallelObjectSynchronizerIterator parallel_iterator(); + // debugging static void verify() PRODUCT_RETURN; static int verify_objmon_isinpool(ObjectMonitor *addr) PRODUCT_RETURN0; diff -r b1cf900aa021 -r 87059e2365be src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/runtime/thread.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1712,7 +1712,7 @@ static void ensure_join(JavaThread* thread) { // We do not need to grap the Threads_lock, since we are operating on ourself. - Handle threadObj(thread, oopDesc::bs()->write_barrier(thread->threadObj())); + Handle threadObj(thread, thread->threadObj()); assert(threadObj.not_null(), "java thread object must exist"); ObjectLocker lock(threadObj, thread); // Ignore pending exception (ThreadDeath), since we are exiting anyway @@ -4041,9 +4041,6 @@ thread->exit(true); - // Stop GC threads. - Universe::heap()->shutdown(); - // Stop VM thread. { // 4945125 The vm thread comes to a safepoint during exit. @@ -4214,7 +4211,8 @@ bool is_par = sh->n_par_threads() > 0; assert(!is_par || (SharedHeap::heap()->n_par_threads() == - SharedHeap::heap()->workers()->active_workers()) || UseShenandoahGC, "Mismatch"); + SharedHeap::heap()->workers()->active_workers() + || UseShenandoahGC), "Mismatch"); int cp = SharedHeap::heap()->strong_roots_parity(); ALL_JAVA_THREADS(p) { if (p->claim_oops_do(is_par, cp)) { diff -r b1cf900aa021 -r 87059e2365be src/share/vm/services/attachListener.cpp --- a/src/share/vm/services/attachListener.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/services/attachListener.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -91,6 +91,7 @@ assert(TypeArrayKlass::cast(res->klass())->element_type() == T_BYTE, "just checking"); // copy the bytes to the output stream + res = oopDesc::bs()->read_barrier(res); typeArrayOop ba = typeArrayOop(res); jbyte* addr = typeArrayOop(res)->byte_at_addr(0); out->print_raw((const char*)addr, ba->length()); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/services/diagnosticCommand.cpp --- a/src/share/vm/services/diagnosticCommand.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/services/diagnosticCommand.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -224,6 +224,7 @@ assert(TypeArrayKlass::cast(res->klass())->element_type() == T_BYTE, "just checking"); // copy the bytes to the output stream + res = oopDesc::bs()->read_barrier(res); typeArrayOop ba = typeArrayOop(res); jbyte* addr = typeArrayOop(res)->byte_at_addr(0); output()->print_raw((const char*)addr, ba->length()); diff -r b1cf900aa021 -r 87059e2365be src/share/vm/services/heapDumper.cpp --- a/src/share/vm/services/heapDumper.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/services/heapDumper.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -1036,6 +1036,7 @@ // If the byte ordering is big endian then we can copy most types directly u4 length_in_bytes = (u4)array->length() * type2aelembytes(type); + array = typeArrayOop(oopDesc::bs()->read_barrier(array)); switch (type) { case T_INT : { if (Bytes::is_Java_byte_ordering_different()) { diff -r b1cf900aa021 -r 87059e2365be src/share/vm/services/threadService.cpp --- a/src/share/vm/services/threadService.cpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/services/threadService.cpp Wed Dec 07 21:03:02 2016 +0100 @@ -165,7 +165,6 @@ // If obj == NULL, then ObjectMonitor is raw which doesn't count. } - obj = oopDesc::bs()->write_barrier(obj); Handle h(obj); return h; } diff -r b1cf900aa021 -r 87059e2365be src/share/vm/utilities/growableArray.hpp --- a/src/share/vm/utilities/growableArray.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/utilities/growableArray.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -27,6 +27,7 @@ #include "memory/allocation.hpp" #include "memory/allocation.inline.hpp" +#include "oops/oop.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/top.hpp" diff -r b1cf900aa021 -r 87059e2365be src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Fri Nov 04 07:21:01 2016 -0400 +++ b/src/share/vm/utilities/taskqueue.hpp Wed Dec 07 21:03:02 2016 +0100 @@ -492,6 +492,7 @@ public: // Returns "true" if some TaskQueue in the set contains a task. virtual bool peek() = 0; + virtual size_t tasks() = 0; }; template class TaskQueueSetSuperImpl: public CHeapObj, public TaskQueueSetSuper { @@ -528,6 +529,9 @@ bool steal(uint queue_num, int* seed, E& t); bool peek(); + size_t tasks(); + + uint size() const { return _n; } }; template void @@ -585,6 +589,15 @@ return false; } +template +size_t GenericTaskQueueSet::tasks() { + size_t n = 0; + for (uint j = 0; j < _n; j++) { + n += _queues[j]->size(); + } + return n; +} + // When to terminate from the termination protocol. class TerminatorTerminator: public CHeapObj { public: @@ -597,7 +610,7 @@ #undef TRACESPINNING class ParallelTaskTerminator: public StackObj { -private: +protected: int _n_threads; TaskQueueSetSuper* _queue_set; int _offered_termination; @@ -623,7 +636,7 @@ // else is. If returns "true", all threads are terminated. If returns // "false", available work has been observed in one of the task queues, // so the global task is not complete. - bool offer_termination() { + virtual bool offer_termination() { return offer_termination(NULL); } diff -r b1cf900aa021 -r 87059e2365be test/TEST.groups --- a/test/TEST.groups Fri Nov 04 07:21:01 2016 -0400 +++ b/test/TEST.groups Wed Dec 07 21:03:02 2016 +0100 @@ -135,6 +135,22 @@ sanity/ExecuteInternalVMTests.java \ -gc/g1/TestGreyReclaimedHumongousObjects.java +hotspot_gc_shenandoah = \ + gc/stress/TestGCOldWithShenandoah.java \ + gc/stress/gcbasher/TestGCBasherWithShenandoah.java \ + gc/shenandoah/ + +hotspot_fast_gc_shenandoah = \ + gc/shenandoah/TestRegionSizeArgs.java \ + gc/shenandoah/compiler/TestNullCheck.java \ + gc/shenandoah/compiler/TestWriteBarrierClearControl.java \ + gc/shenandoah/HumongousRegionReclaimTest/TestHumongous.java \ + gc/shenandoah/AlwaysPreTouch.java \ + gc/shenandoah/TestAllocLargeObjOOM.java \ + gc/shenandoah/TestAllocSmallObjOOM.java \ + gc/shenandoah/TestRegionSizeArgs.java \ + gc/shenandoah/TestSingleThreadedShenandoah.java + hotspot_runtime = \ sanity/ExecuteInternalVMTests.java changeset: 9479:db98996d26b2 user: rkennke date: Thu Dec 08 17:48:03 2016 +0100 summary: Added dummy arg consumer to pseudo-logging code to be able to build release. diff -r 87059e2365be -r db98996d26b2 src/share/vm/gc_implementation/shenandoah/shenandoahLogging.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahLogging.hpp Wed Dec 07 21:03:02 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahLogging.hpp Thu Dec 08 17:48:03 2016 +0100 @@ -8,6 +8,7 @@ #define log_develop_trace(...) if (ShenandoahLogTrace) gclog_or_tty->print_cr #define log_develop_debug(...) if (ShenandoahLogDebug) gclog_or_tty->print_cr #else -#define log_develop_trace(...) -#define log_develop_debug(...) +#define DUMMY_ARGUMENT_CONSUMER(...) +#define log_develop_trace(...) DUMMY_ARGUMENT_CONSUMER +#define log_develop_debug(...) DUMMY_ARGUMENT_CONSUMER #endif changeset: 9480:da17b9cffd4f user: roland date: Thu Dec 08 13:28:52 2016 +0100 summary: backport shenandoah C2 support from jdk9 diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/adlc/formssel.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -639,21 +639,6 @@ } -bool InstructForm::is_wide_memory_kill(FormDict &globals) const { - if( _matrule == NULL ) return false; - if( !_matrule->_opType ) return false; - - if( strcmp(_matrule->_opType,"MemBarRelease") == 0 ) return true; - if( strcmp(_matrule->_opType,"MemBarAcquire") == 0 ) return true; - if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true; - if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true; - if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true; - if( strcmp(_matrule->_opType,"StoreFence") == 0 ) return true; - if( strcmp(_matrule->_opType,"LoadFence") == 0 ) return true; - - return false; -} - int InstructForm::memory_operand(FormDict &globals) const { // Machine independent loads must be checked for anti-dependences // Check if instruction has a USE of a memory operand class, or a def. @@ -1158,6 +1143,9 @@ else if (is_ideal_nop()) { return "MachNopNode"; } + else if( is_ideal_membar()) { + return "MachMemBarNode"; + } else if (is_mach_constant()) { return "MachConstantNode"; } @@ -4078,7 +4066,8 @@ !strcmp(_opType,"StoreFence") || !strcmp(_opType,"MemBarVolatile") || !strcmp(_opType,"MemBarCPUOrder") || - !strcmp(_opType,"MemBarStoreStore"); + !strcmp(_opType,"MemBarStoreStore") || + !strcmp(_opType, "OnSpinWait"); } bool MatchRule::is_ideal_loadPC() const { diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/adlc/formssel.hpp --- a/src/share/vm/adlc/formssel.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/adlc/formssel.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -191,7 +191,6 @@ // loads from memory, so must check for anti-dependence virtual bool needs_anti_dependence_check(FormDict &globals) const; virtual int memory_operand(FormDict &globals) const; - bool is_wide_memory_kill(FormDict &globals) const; enum memory_operand_type { NO_MEMORY_OPERAND = -1, diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/adlc/output_c.cpp --- a/src/share/vm/adlc/output_c.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/adlc/output_c.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -3263,10 +3263,6 @@ // Analyze machine instructions that either USE or DEF memory. int memory_operand = instr->memory_operand(_globalNames); - // Some guys kill all of memory - if ( instr->is_wide_memory_kill(_globalNames) ) { - memory_operand = InstructForm::MANY_MEMORY_OPERANDS; - } if ( memory_operand != InstructForm::NO_MEMORY_OPERAND ) { if( memory_operand == InstructForm::MANY_MEMORY_OPERANDS ) { diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/adlc/output_h.cpp --- a/src/share/vm/adlc/output_h.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/adlc/output_h.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -2002,10 +2002,6 @@ // Analyze machine instructions that either USE or DEF memory. int memory_operand = instr->memory_operand(_globalNames); - // Some guys kill all of memory - if ( instr->is_wide_memory_kill(_globalNames) ) { - memory_operand = InstructForm::MANY_MEMORY_OPERANDS; - } if ( memory_operand != InstructForm::NO_MEMORY_OPERAND ) { if( memory_operand == InstructForm::MANY_MEMORY_OPERANDS ) { fprintf(fp," virtual const TypePtr *adr_type() const;\n"); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/ci/ciInstanceKlass.cpp --- a/src/share/vm/ci/ciInstanceKlass.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/ci/ciInstanceKlass.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -704,3 +704,16 @@ ik->do_local_static_fields(&sffp); } } + +#ifdef ASSERT +bool ciInstanceKlass::debug_final_or_stable_field_at(int offset) { + GUARDED_VM_ENTRY( + InstanceKlass* ik = get_instanceKlass(); + fieldDescriptor fd; + if (ik->find_field_from_offset(offset, false, &fd)) { + return fd.is_final() || fd.is_stable(); + } + ); + return false; +} +#endif diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/ci/ciInstanceKlass.hpp --- a/src/share/vm/ci/ciInstanceKlass.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/ci/ciInstanceKlass.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -250,6 +250,10 @@ // Dump the current state of this klass for compilation replay. virtual void dump_replay_data(outputStream* out); + +#ifdef ASSERT + bool debug_final_or_stable_field_at(int offset); +#endif }; #endif // SHARE_VM_CI_CIINSTANCEKLASS_HPP diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/addnode.cpp --- a/src/share/vm/opto/addnode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/addnode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -638,6 +638,38 @@ } } + if (UseShenandoahGC && + in(Base) == in(AddPNode::Address) && + phase->type(in(Base)) == TypePtr::NULL_PTR) { + if (can_reshape) { + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + Node* u = fast_out(i); + if (u->is_LoadStore()) { + if (u->as_LoadStore()->adr_type() != NULL) { + u->as_LoadStore()->set_adr_type(TypeRawPtr::BOTTOM); + } + } +#ifdef ASSERT + else if (u->is_Mem()) { + assert(u->as_Mem()->raw_adr_type() == TypeOopPtr::BOTTOM, "bad slice"); + u->as_Mem()->set_raw_adr_type(TypeRawPtr::BOTTOM); + } else if (u->Opcode() == Op_CallLeafNoFP && !strcmp(u->as_CallLeaf()->_name, "unsafe_arraycopy")) { + assert(u->in(0) == NULL || u->in(0)->is_top() || u->in(0)->in(0) == NULL || u->in(0)->in(0)->is_top() || + (u->in(0)->is_Proj() && u->in(0)->in(0)->is_MemBar()), "need membar before"); + Node* c = u->unique_ctrl_out(); + assert(c == NULL || c->is_Proj(), "need membar after"); + c = c->unique_ctrl_out(); + assert(c == NULL || c->is_MemBar(), "need membar after"); + } else { + u->dump(); + ShouldNotReachHere(); + } +#endif + } + } + return new (phase->C) CastX2PNode(in(AddPNode::Offset)); + } + return NULL; // No progress } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/callGenerator.cpp --- a/src/share/vm/opto/callGenerator.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/callGenerator.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -38,6 +38,7 @@ #include "opto/parse.hpp" #include "opto/rootnode.hpp" #include "opto/runtime.hpp" +#include "opto/shenandoahSupport.hpp" #include "opto/subnode.hpp" @@ -343,6 +344,10 @@ Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory)); C->initial_gvn()->set_type_bottom(mem); map->set_req(TypeFunc::Memory, mem); + } else if (map->in(TypeFunc::Memory)->outcnt() > 1) { + Node* mem = map->in(TypeFunc::Memory)->clone(); + C->initial_gvn()->set_type_bottom(mem); + map->set_req(TypeFunc::Memory, mem); } uint nargs = method()->arg_size(); @@ -786,6 +791,7 @@ { // Get MethodHandle receiver: Node* receiver = kit.argument(0); + assert(!(ShenandoahBarrierNode::skip_through_barrier(receiver)->is_Con() && !receiver->is_Con()), "barrier prevents optimization"); if (receiver->Opcode() == Op_ConP) { input_not_const = false; const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr(); @@ -807,6 +813,7 @@ { // Get MemberName argument: Node* member_name = kit.argument(callee->arg_size() - 1); + assert(!(ShenandoahBarrierNode::skip_through_barrier(member_name)->is_Con() && !member_name->is_Con()), "barrier prevents optimization"); if (member_name->Opcode() == Op_ConP) { input_not_const = false; const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr(); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/callnode.cpp --- a/src/share/vm/opto/callnode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/callnode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -37,6 +37,7 @@ #include "opto/regmask.hpp" #include "opto/rootnode.hpp" #include "opto/runtime.hpp" +#include "opto/shenandoahSupport.hpp" // Portions of code courtesy of Clifford Click @@ -807,7 +808,7 @@ } -void CallNode::extract_projections(CallProjections* projs, bool separate_io_proj) { +void CallNode::extract_projections(CallProjections* projs, bool separate_io_proj, bool do_asserts) { projs->fallthrough_proj = NULL; projs->fallthrough_catchproj = NULL; projs->fallthrough_ioproj = NULL; @@ -870,17 +871,18 @@ } } - // The resproj may not exist because the result couuld be ignored + // The resproj may not exist because the result could be ignored // and the exception object may not exist if an exception handler // swallows the exception but all the other must exist and be found. assert(projs->fallthrough_proj != NULL, "must be found"); - assert(Compile::current()->inlining_incrementally() || projs->fallthrough_catchproj != NULL, "must be found"); - assert(Compile::current()->inlining_incrementally() || projs->fallthrough_memproj != NULL, "must be found"); - assert(Compile::current()->inlining_incrementally() || projs->fallthrough_ioproj != NULL, "must be found"); - assert(Compile::current()->inlining_incrementally() || projs->catchall_catchproj != NULL, "must be found"); + do_asserts = do_asserts && !Compile::current()->inlining_incrementally(); + assert(!do_asserts || projs->fallthrough_catchproj != NULL, "must be found"); + assert(!do_asserts || projs->fallthrough_memproj != NULL, "must be found"); + assert(!do_asserts || projs->fallthrough_ioproj != NULL, "must be found"); + assert(!do_asserts || projs->catchall_catchproj != NULL, "must be found"); if (separate_io_proj) { - assert(Compile::current()->inlining_incrementally() || projs->catchall_memproj != NULL, "must be found"); - assert(Compile::current()->inlining_incrementally() || projs->catchall_ioproj != NULL, "must be found"); + assert(!do_asserts || projs->catchall_memproj != NULL, "must be found"); + assert(!do_asserts || projs->catchall_ioproj != NULL, "must be found"); } } @@ -906,7 +908,6 @@ return SafePointNode::Ideal(phase, can_reshape); } - //============================================================================= uint CallJavaNode::size_of() const { return sizeof(*this); } uint CallJavaNode::cmp( const Node &n ) const { @@ -998,6 +999,13 @@ Matcher::c_calling_convention( sig_bt, parm_regs, argcnt ); } +bool CallRuntimeNode::is_call_to_arraycopystub() const { + if (_name != NULL && strstr(_name, "arraycopy") != 0) { + return true; + } + return false; +} + //============================================================================= //------------------------------calling_convention----------------------------- diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/callnode.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -622,7 +622,7 @@ // Collect all the interesting edges from a call for use in // replacing the call by something else. Used by macro expansion // and the late inlining support. - void extract_projections(CallProjections* projs, bool separate_io_proj); + void extract_projections(CallProjections* projs, bool separate_io_proj, bool do_asserts = true); virtual uint match_edge(uint idx) const; @@ -760,6 +760,8 @@ virtual int Opcode() const; virtual void calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const; + bool is_call_to_arraycopystub() const; + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/cfgnode.cpp --- a/src/share/vm/opto/cfgnode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/cfgnode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -35,6 +35,7 @@ #include "opto/phaseX.hpp" #include "opto/regmask.hpp" #include "opto/runtime.hpp" +#include "opto/shenandoahSupport.hpp" #include "opto/subnode.hpp" // Portions of code courtesy of Clifford Click @@ -588,6 +589,9 @@ if( n->as_Phi()->is_unsafe_data_reference(in) ) in = phase->C->top(); // replaced by top } + if (n->outcnt() == 0) { + in = phase->C->top(); + } igvn->replace_node(n, in); } else if( n->is_Region() ) { // Update all incoming edges @@ -1262,7 +1266,7 @@ } else return NULL; // Build int->bool conversion - Node *n = new (phase->C) Conv2BNode( cmp->in(1) ); + Node *n = new (phase->C) Conv2BNode(ShenandoahBarrierNode::skip_through_barrier(cmp->in(1))); if( flipped ) n = new (phase->C) XorINode( phase->transform(n), phase->intcon(1) ); @@ -1628,7 +1632,12 @@ if (can_reshape && igvn != NULL) { igvn->_worklist.push(r); } - set_req(j, top); // Nuke it down + // Nuke it down + if (can_reshape) { + set_req_X(j, top, igvn); + } else { + set_req(j, top); + } progress = this; // Record progress } } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/compile.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -433,6 +433,12 @@ remove_expensive_node(n); } } + for (int i = C->shenandoah_barriers_count()-1; i >= 0; i--) { + Node* n = C->shenandoah_barrier(i); + if (!useful.member(n)) { + remove_shenandoah_barrier(n->as_ShenandoahBarrier()); + } + } // clean up the late inline lists remove_useless_late_inlines(&_string_late_inlines, useful); remove_useless_late_inlines(&_boxing_late_inlines, useful); @@ -1196,6 +1202,7 @@ _predicate_opaqs = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); _expensive_nodes = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); _range_check_casts = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); + _shenandoah_barriers = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); register_library_intrinsics(); } @@ -2292,6 +2299,10 @@ igvn.optimize(); } + if (UseShenandoahGC && ShenandoahVerifyOptoBarriers) { + ShenandoahBarrierNode::verify(C->root()); + } + { NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); ) PhaseMacroExpand mex(igvn); @@ -2301,6 +2312,20 @@ } } + if (ShenandoahWriteBarrierToIR) { + if (shenandoah_barriers_count() > 0) { + C->clear_major_progress(); + PhaseIdealLoop ideal_loop(igvn, false, true); + if (failing()) return; + PhaseIdealLoop::verify(igvn); +#ifdef ASSERT + if (UseShenandoahGC) { + ShenandoahBarrierNode::verify_raw_mem(C->root()); + } +#endif + } + } + } // (End scope of igvn; run destructor if necessary for asserts.) dump_inlining(); @@ -2867,9 +2892,38 @@ break; } -#ifdef _LP64 - case Op_CastPP: - if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) { + case Op_CastPP: { + // Remove CastPP nodes to gain more freedom during scheduling but + // keep the dependency they encode as control or precedence edges + // (if control is set already) on memory operations. Some CastPP + // nodes don't have a control (don't carry a dependency): skip + // those. + if (n->in(0) != NULL) { + ResourceMark rm; + Unique_Node_List wq; + wq.push(n); + for (uint next = 0; next < wq.size(); ++next) { + Node *m = wq.at(next); + for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) { + Node* use = m->fast_out(i); + if (use->is_Mem() || use->is_EncodeNarrowPtr()) { + use->ensure_control_or_add_prec(n->in(0)); + } else if (use->in(0) == NULL) { + switch(use->Opcode()) { + case Op_AddP: + case Op_DecodeN: + case Op_DecodeNKlass: + case Op_CheckCastPP: + case Op_CastPP: + wq.push(use); + break; + } + } + } + } + } + const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); + if (is_LP64 && n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) { Node* in1 = n->in(1); const Type* t = n->bottom_type(); Node* new_in1 = in1->clone(); @@ -2902,9 +2956,15 @@ if (in1->outcnt() == 0) { in1->disconnect_inputs(NULL, this); } + } else { + n->subsume_by(n->in(1), this); + if (n->outcnt() == 0) { + n->disconnect_inputs(NULL, this); + } } break; - + } +#ifdef _LP64 case Op_CmpP: // Do this transformation here to preserve CmpPNode::sub() and // other TypePtr related Ideal optimizations (for example, ptr nullness). @@ -3173,7 +3233,7 @@ case Op_ShenandoahReadBarrier: break; case Op_ShenandoahWriteBarrier: - n->set_req(ShenandoahBarrierNode::Memory, immutable_memory()); + assert(!ShenandoahWriteBarrierToIR, "should have been expanded already"); break; default: assert( !n->is_Call(), "" ); @@ -3576,9 +3636,7 @@ if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 0 && cmp->in(1)->is_Load()) { LoadNode* load = cmp->in(1)->as_Load(); - if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal - && load->in(2)->in(3)->is_Con() - && load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) { + if (load->is_g1_marking_load()) { Node* if_ctrl = iff->in(0); Node* load_ctrl = load->in(0); @@ -4070,7 +4128,7 @@ const Type* t_no_spec = t->remove_speculative(); if (t_no_spec != t) { bool in_hash = igvn.hash_delete(n); - assert(in_hash, "node should be in igvn hash table"); + assert(in_hash || n->hash() == Node::NO_HASH, "node should be in igvn hash table"); tn->set_type(t_no_spec); igvn.hash_insert(n); igvn._worklist.push(n); // give it a chance to go away diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/compile.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -69,6 +69,7 @@ class PhaseCCP_DCE; class RootNode; class relocInfo; +class ShenandoahBarrierNode; class Scope; class StartNode; class SafePointNode; @@ -336,6 +337,7 @@ GrowableArray* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. GrowableArray* _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common GrowableArray* _range_check_casts; // List of CastII nodes with a range check dependency + GrowableArray* _shenandoah_barriers; ConnectionGraph* _congraph; #ifndef PRODUCT IdealGraphPrinter* _printer; @@ -664,9 +666,11 @@ int macro_count() const { return _macro_nodes->length(); } int predicate_count() const { return _predicate_opaqs->length();} int expensive_count() const { return _expensive_nodes->length(); } + int shenandoah_barriers_count() const { return _shenandoah_barriers->length(); } Node* macro_node(int idx) const { return _macro_nodes->at(idx); } Node* predicate_opaque1_node(int idx) const { return _predicate_opaqs->at(idx);} Node* expensive_node(int idx) const { return _expensive_nodes->at(idx); } + ShenandoahBarrierNode* shenandoah_barrier(int idx) const { return _shenandoah_barriers->at(idx); } ConnectionGraph* congraph() { return _congraph;} void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;} void add_macro_node(Node * n) { @@ -690,6 +694,15 @@ _expensive_nodes->remove(n); } } + void add_shenandoah_barrier(ShenandoahBarrierNode * n) { + assert(!_shenandoah_barriers->contains(n), "duplicate entry in barrier list"); + _shenandoah_barriers->append(n); + } + void remove_shenandoah_barrier(ShenandoahBarrierNode * n) { + if (_shenandoah_barriers->contains(n)) { + _shenandoah_barriers->remove(n); + } + } void add_predicate_opaq(Node * n) { assert(!_predicate_opaqs->contains(n), "duplicate entry in predicate opaque1"); assert(_macro_nodes->contains(n), "should have already been in macro list"); @@ -722,6 +735,8 @@ // Sort expensive nodes to locate similar expensive nodes void sort_expensive_nodes(); + GrowableArray* shenandoah_barriers() { return _shenandoah_barriers; } + // Compilation environment. Arena* comp_arena() { return &_comp_arena; } ciEnv* env() const { return _env; } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/connode.cpp --- a/src/share/vm/opto/connode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/connode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -431,16 +431,6 @@ return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL; } -//------------------------------Ideal_DU_postCCP------------------------------- -// Throw away cast after constant propagation -Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - const Type *t = ccp->type(in(1)); - ccp->hash_delete(this); - set_type(t); // Turn into ID function - ccp->hash_insert(this); - return this; -} - uint CastIINode::size_of() const { return sizeof(*this); } @@ -522,13 +512,6 @@ return res; } -Node *CastIINode::Ideal_DU_postCCP(PhaseCCP *ccp) { - if (_carry_dependency) { - return NULL; - } - return ConstraintCastNode::Ideal_DU_postCCP(ccp); -} - #ifndef PRODUCT void CastIINode::dump_spec(outputStream *st) const { TypeNode::dump_spec(st); @@ -542,20 +525,6 @@ #endif //============================================================================= - -//------------------------------Ideal_DU_postCCP------------------------------- -// If not converting int->oop, throw away cast after constant propagation -Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - const Type *t = ccp->type(in(1)); - if (!t->isa_oop_ptr() || ((in(1)->is_DecodeN()) && Matcher::gen_narrow_oop_implicit_null_checks())) { - return NULL; // do not transform raw pointers or narrow oops - } - return ConstraintCastNode::Ideal_DU_postCCP(ccp); -} - - - -//============================================================================= //------------------------------Identity--------------------------------------- // If input is already higher or equal to cast type, then this is an identity. Node *CheckCastPPNode::Identity( PhaseTransform *phase ) { @@ -689,10 +658,6 @@ } -Node *EncodeNarrowPtrNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - return MemNode::Ideal_common_DU_postCCP(ccp, this, in(1)); -} - Node* DecodeNKlassNode::Identity(PhaseTransform* phase) { const Type *t = phase->type( in(1) ); if( t == Type::TOP ) return in(1); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/connode.hpp --- a/src/share/vm/opto/connode.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/connode.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -235,7 +235,6 @@ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual int Opcode() const; virtual uint ideal_reg() const = 0; - virtual Node *Ideal_DU_postCCP( PhaseCCP * ); }; //------------------------------CastIINode------------------------------------- @@ -260,7 +259,6 @@ virtual uint ideal_reg() const { return Op_RegI; } virtual Node *Identity( PhaseTransform *phase ); virtual const Type *Value( PhaseTransform *phase ) const; - virtual Node *Ideal_DU_postCCP( PhaseCCP * ); const bool has_range_check() { #ifdef _LP64 return _range_check_dependency; @@ -281,7 +279,6 @@ CastPPNode (Node *n, const Type *t ): ConstraintCastNode(n, t) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegP; } - virtual Node *Ideal_DU_postCCP( PhaseCCP * ); }; //------------------------------CheckCastPPNode-------------------------------- @@ -299,9 +296,6 @@ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegP; } - // No longer remove CheckCast after CCP as it gives me a place to hang - // the proper address type - which is required to compute anti-deps. - //virtual Node *Ideal_DU_postCCP( PhaseCCP * ); }; @@ -316,7 +310,6 @@ } public: virtual uint ideal_reg() const { return Op_RegN; } - virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp ); }; //------------------------------EncodeP-------------------------------- diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/escape.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -2287,7 +2287,9 @@ assert(opcode == Op_ConP || opcode == Op_ThreadLocal || opcode == Op_CastX2P || uncast_base->is_DecodeNarrowPtr() || (uncast_base->is_Mem() && (uncast_base->bottom_type()->isa_rawptr() != NULL)) || - (uncast_base->is_Proj() && uncast_base->in(0)->is_Allocate()), "sanity"); + (uncast_base->is_Proj() && uncast_base->in(0)->is_Allocate()) || + (uncast_base->is_Phi() && (uncast_base->bottom_type()->isa_rawptr() != NULL)) || + uncast_base->is_ShenandoahBarrier(), "sanity"); } return base; } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/gcm.cpp --- a/src/share/vm/opto/gcm.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/gcm.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -116,6 +116,9 @@ } } +static bool is_dominator(Block* d, Block* n) { + return d->dom_lca(n) == d; +} //------------------------------schedule_pinned_nodes-------------------------- // Set the basic block for Nodes pinned into blocks @@ -138,6 +141,42 @@ schedule_node_into_block(node, block); } + // If the node has precedence edges (added when CastPP nodes are + // removed in final_graph_reshaping), fix the control of the + // node to cover the precedence edges and remove the + // dependencies. + Node* n = NULL; + for (uint i = node->len()-1; i >= node->req(); i--) { + Node* m = node->in(i); + if (m == NULL) continue; + // Skip the precedence edge if the test that guarded a CastPP: + // - was optimized out during escape analysis + // (OptimizePtrCompare): the CastPP's control isn't an end of + // block. + // - is moved in the branch of a dominating If: the control of + // the CastPP is then a Region. + if (m->is_block_proj() || m->is_block_start()) { + node->rm_prec(i); + if (n == NULL) { + n = m; + } else { + Block* bn = get_block_for_node(n); + Block* bm = get_block_for_node(m); + assert(is_dominator(bn, bm) || is_dominator(bm, bn), "one must dominate the other"); + n = is_dominator(bn, bm) ? m : n; + } + } + } + if (n != NULL) { + assert(node->in(0), "control should have been set"); + Block* bn = get_block_for_node(n); + Block* bnode = get_block_for_node(node->in(0)); + assert(is_dominator(bn, bnode) || is_dominator(bnode, bn), "one must dominate the other"); + if (!is_dominator(bn, bnode)) { + node->set_req(0, n); + } + } + // process all inputs that are non NULL for (int i = node->req() - 1; i >= 0; --i) { if (node->in(i) != NULL) { @@ -250,6 +289,7 @@ int is_visited = visited.test_set(in->_idx); if (!has_block(in)) { if (is_visited) { + assert(false, "graph should be schedulable"); return false; } // Save parent node and next input's index. @@ -1064,6 +1104,7 @@ if (LCA == NULL) { // Bailout without retry + assert(false, "graph should be schedulable"); C->record_method_not_compilable("late schedule failed: LCA == NULL"); return least; } @@ -1218,6 +1259,7 @@ C->record_failure(C2Compiler::retry_no_subsuming_loads()); } else { // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) + assert(false, "graph should be schedulable"); C->record_method_not_compilable("late schedule failed: incorrect graph"); } return; diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/graphKit.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -26,8 +26,8 @@ #include "compiler/compileLog.hpp" #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" #include "gc_implementation/g1/heapRegion.hpp" +#include "gc_interface/collectedHeap.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" -#include "gc_interface/collectedHeap.hpp" #include "memory/barrierSet.hpp" #include "memory/cardTableModRefBS.hpp" #include "opto/addnode.hpp" @@ -1168,7 +1168,7 @@ array = shenandoah_read_barrier(array); } - Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); + Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); alen = _gvn.transform( new (C) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS)); } else { alen = alloc->Ideal_length(); @@ -2926,13 +2926,13 @@ } } - // Load the object's klass - Node* obj_klass = load_object_klass(not_null_obj); - if (ShenandoahVerifyReadsToFromSpace) { not_null_obj = shenandoah_read_barrier(not_null_obj); } + // Load the object's klass + Node* obj_klass = load_object_klass(not_null_obj); + // Generate the subtype check Node* not_subtype_ctrl = gen_subtype_check(obj_klass, superklass); @@ -4124,11 +4124,6 @@ false, NULL, 0); const TypePtr* offset_field_type = string_type->add_offset(offset_offset); int offset_field_idx = C->get_alias_index(offset_field_type); - - if (! ShenandoahOptimizeFinals) { - str = shenandoah_read_barrier(str); - } - return make_load(ctrl, basic_plus_adr(str, str, offset_offset), TypeInt::INT, T_INT, offset_field_idx, MemNode::unordered); @@ -4186,10 +4181,7 @@ false, NULL, 0); const TypePtr* offset_field_type = string_type->add_offset(offset_offset); int offset_field_idx = C->get_alias_index(offset_field_type); - - str = shenandoah_write_barrier(str); - - store_to_memory(control(), basic_plus_adr(str, offset_offset), + store_to_memory(ctrl, basic_plus_adr(str, offset_offset), value, T_INT, offset_field_idx, MemNode::unordered); } @@ -4199,10 +4191,10 @@ false, NULL, 0); const TypePtr* value_field_type = string_type->add_offset(value_offset); + value = shenandoah_read_barrier_storeval(value); str = shenandoah_write_barrier(str); - value = shenandoah_read_barrier_nomem(value); - - store_oop_to_object(control(), str, basic_plus_adr(str, value_offset), value_field_type, + + store_oop_to_object(ctrl, str, basic_plus_adr(str, value_offset), value_field_type, value, TypeAryPtr::CHARS, T_OBJECT, MemNode::unordered); } @@ -4212,10 +4204,7 @@ false, NULL, 0); const TypePtr* count_field_type = string_type->add_offset(count_offset); int count_field_idx = C->get_alias_index(count_field_type); - - str = shenandoah_write_barrier(str); - - store_to_memory(control(), basic_plus_adr(str, count_offset), + store_to_memory(ctrl, basic_plus_adr(str, count_offset), value, T_INT, count_field_idx, MemNode::unordered); } @@ -4226,24 +4215,24 @@ } Node* GraphKit::shenandoah_read_barrier(Node* obj) { - return shenandoah_read_barrier_impl(obj, false, true); + return shenandoah_read_barrier_impl(obj, false, true, true); } -Node* GraphKit::shenandoah_read_barrier_nomem(Node* obj) { - return shenandoah_read_barrier_impl(obj, false, false); +Node* GraphKit::shenandoah_read_barrier_storeval(Node* obj) { + return shenandoah_read_barrier_impl(obj, true, false, false); } -Node* GraphKit::shenandoah_read_barrier_impl(Node* obj, bool use_ctrl, bool use_mem) { +Node* GraphKit::shenandoah_read_barrier_impl(Node* obj, bool use_ctrl, bool use_mem, bool allow_fromspace) { if (UseShenandoahGC && ShenandoahReadBarrier) { const Type* obj_type = obj->bottom_type(); if (obj_type->higher_equal(TypePtr::NULL_PTR)) { return obj; } - const TypePtr* adr_type = obj_type->is_ptr()->add_offset(BrooksPointer::byte_offset()); + const TypePtr* adr_type = ShenandoahBarrierNode::brooks_pointer_type(obj_type); Node* mem = use_mem ? memory(adr_type) : immutable_memory(); - if (! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, obj, mem, use_mem)) { + if (! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, obj, mem, allow_fromspace)) { // We know it is null, no barrier needed. return obj; } @@ -4259,10 +4248,10 @@ Node* not_null_obj = null_check_oop(obj, &null_ctrl); region->init_req(_null_path, null_ctrl); - phi ->init_req(_null_path, obj); + phi ->init_req(_null_path, zerocon(T_OBJECT)); Node* ctrl = use_ctrl ? control() : NULL; - ShenandoahReadBarrierNode* rb = new (C) ShenandoahReadBarrierNode(ctrl, mem, not_null_obj, use_mem); + ShenandoahReadBarrierNode* rb = new (C) ShenandoahReadBarrierNode(ctrl, mem, not_null_obj, allow_fromspace); Node* n = _gvn.transform(rb); region->init_req(_not_null_path, control()); @@ -4275,7 +4264,7 @@ } else { // We know it is not null. Simple barrier is sufficient. Node* ctrl = use_ctrl ? control() : NULL; - ShenandoahReadBarrierNode* rb = new (C) ShenandoahReadBarrierNode(ctrl, mem, obj, use_mem); + ShenandoahReadBarrierNode* rb = new (C) ShenandoahReadBarrierNode(ctrl, mem, obj, allow_fromspace); Node* n = _gvn.transform(rb); record_for_igvn(n); return n; @@ -4286,6 +4275,17 @@ } } +static Node* shenandoah_write_barrier_helper(GraphKit& kit, Node* obj, const TypePtr* adr_type) { + ShenandoahWriteBarrierNode* wb = new (kit.C) ShenandoahWriteBarrierNode(kit.C, kit.control(), kit.memory(adr_type), obj); + Node* n = kit.gvn().transform(wb); + if (n == wb) { // New barrier needs memory projection. + Node* proj = kit.gvn().transform(new (kit.C) ShenandoahWBMemProjNode(n)); + kit.set_memory(proj, adr_type); + } + + return n; +} + Node* GraphKit::shenandoah_write_barrier(Node* obj) { if (UseShenandoahGC && ShenandoahWriteBarrier) { @@ -4294,7 +4294,7 @@ return obj; } const Type* obj_type = obj->bottom_type(); - const TypePtr* adr_type = obj_type->is_ptr()->add_offset(BrooksPointer::byte_offset()); + const TypePtr* adr_type = ShenandoahBarrierNode::brooks_pointer_type(obj_type); if (obj_type->meet(TypePtr::NULL_PTR) == obj_type->remove_speculative()) { // We don't know if it's null or not. Need null-check. enum { _not_null_path = 1, _null_path, PATH_LIMIT }; @@ -4307,15 +4307,10 @@ Node* not_null_obj = null_check_oop(obj, &null_ctrl); region->init_req(_null_path, null_ctrl); - phi ->init_req(_null_path, obj); + phi ->init_req(_null_path, zerocon(T_OBJECT)); memphi->init_req(_null_path, prev_mem); - ShenandoahWriteBarrierNode* wb = new (C) ShenandoahWriteBarrierNode(NULL, memory(adr_type), not_null_obj); - Node* n = _gvn.transform(wb); - if (n == wb) { // New barrier needs memory projection. - Node* proj = _gvn.transform(new (C) ShenandoahWBMemProjNode(n)); - set_memory(proj, adr_type); - } + Node* n = shenandoah_write_barrier_helper(*this, not_null_obj, adr_type); region->init_req(_not_null_path, control()); phi ->init_req(_not_null_path, n); @@ -4326,15 +4321,12 @@ set_memory(_gvn.transform(memphi), adr_type); Node* res_val = _gvn.transform(phi); + // replace_in_map(obj, res_val); return res_val; } else { // We know it is not null. Simple barrier is sufficient. - ShenandoahWriteBarrierNode* wb = new (C) ShenandoahWriteBarrierNode(NULL, memory(adr_type), obj); - Node* n = _gvn.transform(wb); - if (n == wb) { - Node* proj = _gvn.transform(new (C) ShenandoahWBMemProjNode(wb)); - set_memory(proj, adr_type); - } + Node* n = shenandoah_write_barrier_helper(*this, obj, adr_type); + // replace_in_map(obj, n); record_for_igvn(n); return n; } @@ -4345,7 +4337,7 @@ } /** - * We need barriers on acmp (and similar instructions that compare two + * In Shenandoah, we need barriers on acmp (and similar instructions that compare two * oops) to avoid false negatives. If it compares a from-space and a to-space * copy of an object, a regular acmp would return false, even though both are * the same. The acmp barrier compares the two objects, and when they are @@ -4353,31 +4345,39 @@ * failed because of different copies of the object, we know that the object * must already have been evacuated (and therefore doesn't require a write-barrier). */ -void GraphKit::shenandoah_acmp_barrier(Node*& a, Node*& b) { +Node* GraphKit::cmp_objects(Node* a, Node* b) { + // TODO: Refactor into proper GC interface. if (UseShenandoahGC) { const Type* a_type = a->bottom_type(); const Type* b_type = b->bottom_type(); if (a_type->higher_equal(TypePtr::NULL_PTR) || b_type->higher_equal(TypePtr::NULL_PTR)) { // We know one arg is gonna be null. No need for barriers. - return; - } - if (a_type->is_oopptr()->const_oop() != NULL && b_type->is_oopptr()->const_oop() != NULL ) { - // We know one arg is inlined constant. No need for barriers. - return; - } - if (a->Opcode() == Op_ShenandoahWriteBarrier && b->Opcode() == Op_ShenandoahWriteBarrier) { - // We know one arg is already write-barrier'd. No need for barriers. - return; + return _gvn.transform(new (C) CmpPNode(b, a)); } if (AllocateNode::Ideal_allocation(a, &_gvn) != NULL || AllocateNode::Ideal_allocation(b, &_gvn) != NULL) { // We know one arg is already in to-space. No need for barriers. - return; + return _gvn.transform(new (C) CmpPNode(b, a)); + } + const TypePtr* a_adr_type = ShenandoahBarrierNode::brooks_pointer_type(a_type); + const TypePtr* b_adr_type = ShenandoahBarrierNode::brooks_pointer_type(b_type); + if ((! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, a, memory(a_adr_type), false)) && + (! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, b, memory(b_adr_type), false))) { + // We know both args are in to-space already. No acmp barrier needed. + return _gvn.transform(new (C) CmpPNode(b, a)); + } + + C->set_has_split_ifs(true); + + if (ShenandoahVerifyOptoBarriers) { + a = shenandoah_write_barrier(a); + b = shenandoah_write_barrier(b); + return _gvn.transform(new (C) CmpPNode(b, a)); } enum { _equal = 1, _not_equal, PATH_LIMIT }; RegionNode* region = new (C) RegionNode(PATH_LIMIT); - PhiNode* phiA = PhiNode::make(region, a); - PhiNode* phiB = PhiNode::make(region, b); + PhiNode* phiA = PhiNode::make(region, a, _gvn.type(a)->is_oopptr()->cast_to_nonconst()); + PhiNode* phiB = PhiNode::make(region, b, _gvn.type(b)->is_oopptr()->cast_to_nonconst()); Node* cmp = _gvn.transform(new (C) CmpPNode(b, a)); Node* tst = _gvn.transform(new (C) BoolNode(cmp, BoolTest::eq)); @@ -4392,19 +4392,59 @@ phiA->init_req(_equal, a); phiB->init_req(_equal, b); + uint alias_a = C->get_alias_index(a_adr_type); + uint alias_b = C->get_alias_index(b_adr_type); + PhiNode* mem_phi = NULL; + if (alias_a == alias_b) { + mem_phi = PhiNode::make(region, memory(alias_a), Type::MEMORY, C->get_adr_type(alias_a)); + } else { + mem_phi = PhiNode::make(region, map()->memory(), Type::MEMORY, TypePtr::BOTTOM); + } + // Unequal path: retry after read barriers. set_control(iffalse); - a = shenandoah_read_barrier_impl(a, true, true); - b = shenandoah_read_barrier_impl(b, true, true); + if (!iffalse->is_top()) { + Node* mb = NULL; + if (alias_a == alias_b) { + Node* mem = reset_memory(); + mb = MemBarNode::make(C, Op_MemBarAcquire, alias_a); + mb->init_req(TypeFunc::Control, control()); + mb->init_req(TypeFunc::Memory, mem); + Node* membar = _gvn.transform(mb); + set_control(_gvn.transform(new (C) ProjNode(membar, TypeFunc::Control))); + Node* newmem = _gvn.transform(new (C) ProjNode(membar, TypeFunc::Memory)); + set_all_memory(mem); + set_memory(newmem, alias_a); + } else { + mb = insert_mem_bar(Op_MemBarAcquire); + } + } else { + a = top(); + b = top(); + } + + a = shenandoah_read_barrier_impl(a, true, true, false); + b = shenandoah_read_barrier_impl(b, true, true, false); region->init_req(_not_equal, control()); phiA->init_req(_not_equal, a); phiB->init_req(_not_equal, b); - + if (alias_a == alias_b) { + mem_phi->init_req(_not_equal, memory(alias_a)); + set_memory(mem_phi, alias_a); + } else { + mem_phi->init_req(_not_equal, reset_memory()); + set_all_memory(mem_phi); + } + record_for_igvn(mem_phi); + _gvn.set_type(mem_phi, Type::MEMORY); set_control(_gvn.transform(region)); record_for_igvn(region); a = _gvn.transform(phiA); b = _gvn.transform(phiB); + return _gvn.transform(new (C) CmpPNode(b, a)); + } else { + return _gvn.transform(new (C) CmpPNode(b, a)); } } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/graphKit.hpp --- a/src/share/vm/opto/graphKit.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/graphKit.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -902,11 +902,11 @@ Node* cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type); Node* shenandoah_read_barrier(Node* obj); - Node* shenandoah_read_barrier_nomem(Node* obj); + Node* shenandoah_read_barrier_storeval(Node* obj); Node* shenandoah_write_barrier(Node* obj); - void shenandoah_acmp_barrier(Node*& a, Node*& b); + Node* cmp_objects(Node* a, Node* b); private: - Node* shenandoah_read_barrier_impl(Node* obj, bool use_ctrl, bool use_mem); + Node* shenandoah_read_barrier_impl(Node* obj, bool use_ctrl, bool use_mem, bool allow_fromspace); }; // Helper class to support building of control flow branches. Upon diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/lcm.cpp --- a/src/share/vm/opto/lcm.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/lcm.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -395,7 +395,7 @@ // Should be DU safe because no edge updates. for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { Node* n = best->fast_out(j); - if( n->is_MachProj() || n->Opcode() == Op_ShenandoahWBMemProj ) { + if( n->is_MachProj() || n->Opcode() == Op_ShenandoahWBMemProj) { get_block_for_node(n)->find_remove(n); block->add_inst(n); map_node_to_block(n, block); @@ -935,6 +935,8 @@ // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); + } else { + assert(false, "graph should be schedulable"); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/library_call.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -226,7 +226,7 @@ Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y); // This returns Type::AnyPtr, RawPtr, or OopPtr. int classify_unsafe_addr(Node* &base, Node* &offset); - Node* make_unsafe_address(Node* base, Node* offset); + Node* make_unsafe_address(Node* base, Node* offset, bool is_store); // Helper for inline_unsafe_access. // Generates the guards that check whether the result of // Unsafe.getObject should be recorded in an SATB log buffer. @@ -1244,6 +1244,7 @@ if (stopped()) { return true; } + set_result(make_string_method_node(Op_StrComp, receiver, arg)); return true; } @@ -1251,19 +1252,9 @@ //------------------------------inline_string_equals------------------------ bool LibraryCallKit::inline_string_equals() { Node* receiver = null_check_receiver(); - - if (ShenandoahVerifyReadsToFromSpace) { - receiver = shenandoah_read_barrier(receiver); - } - // NOTE: Do not null check argument for String.equals() because spec // allows to specify NULL as argument. Node* argument = this->argument(1); - - if (ShenandoahVerifyReadsToFromSpace) { - argument = shenandoah_read_barrier(argument); - } - if (stopped()) { return true; } @@ -1273,7 +1264,7 @@ Node* phi = new (C) PhiNode(region, TypeInt::BOOL); // does source == target string? - Node* cmp = _gvn.transform(new (C) CmpPNode(receiver, argument)); + Node* cmp = cmp_objects(receiver, argument); Node* bol = _gvn.transform(new (C) BoolNode(cmp, BoolTest::eq)); Node* if_eq = generate_slow_guard(bol, NULL); @@ -1320,11 +1311,6 @@ // Get start addr of argument Node* argument_val = load_String_value(no_ctrl, argument); - - if (ShenandoahVerifyReadsToFromSpace) { - argument_val = shenandoah_read_barrier(argument_val); - } - Node* argument_offset = load_String_offset(no_ctrl, argument); Node* argument_start = array_element_address(argument_val, argument_offset, T_CHAR); @@ -1362,10 +1348,8 @@ Node* arg1 = argument(0); Node* arg2 = argument(1); - if (! ShenandoahOptimizeFinals) { - arg1 = shenandoah_read_barrier(arg1); - arg2 = shenandoah_read_barrier(arg2); - } + arg1 = shenandoah_read_barrier(arg1); + arg2 = shenandoah_read_barrier(arg2); set_result(_gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS), arg1, arg2))); return true; @@ -1622,6 +1606,7 @@ } receiver = null_check(receiver, T_OBJECT); + receiver = shenandoah_read_barrier(receiver); // NOTE: No null check on the argument is needed since it's a constant String oop. if (stopped()) { return true; @@ -2387,11 +2372,33 @@ } } -inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) { +inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset, bool is_store) { int kind = classify_unsafe_addr(base, offset); if (kind == Type::RawPtr) { return basic_plus_adr(top(), base, offset); } else { + if (UseShenandoahGC) { + if (kind == Type::OopPtr) { + base = cast_not_null(base, false); + if (is_store) { + base = shenandoah_write_barrier(base); + } else { + base = shenandoah_read_barrier(base); + } + } else if (kind == Type::AnyPtr) { + if (UseShenandoahGC && + _gvn.type(base)->isa_aryptr()) { + Node* ctrl = top(); + base = null_check_oop(base, &ctrl, true); + } + + if (is_store) { + base = shenandoah_write_barrier(base); + } else { + base = shenandoah_read_barrier(base); + } + } + } return basic_plus_adr(base, offset); } } @@ -2632,11 +2639,6 @@ if (!is_native_ptr) { // The base is either a Java object or a value produced by Unsafe.staticFieldBase Node* base = argument(1); // type: oop - if (is_store) { - base = shenandoah_write_barrier(base); - } else { - base = shenandoah_read_barrier(base); - } // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset offset = argument(2); // type: long // We currently rely on the cookies produced by Unsafe.xxxFieldOffset @@ -2646,13 +2648,13 @@ "fieldOffset must be byte-scaled"); // 32-bit machines ignore the high half! offset = ConvL2X(offset); - adr = make_unsafe_address(base, offset); + adr = make_unsafe_address(base, offset, is_store); heap_base_oop = base; val = is_store ? argument(4) : NULL; } else { Node* ptr = argument(1); // type: long ptr = ConvL2X(ptr); // adjust Java long to machine word - adr = make_unsafe_address(NULL, ptr); + adr = make_unsafe_address(NULL, ptr, is_store); val = is_store ? argument(3) : NULL; } @@ -2661,6 +2663,18 @@ // First guess at the value type. const Type *value_type = Type::get_const_basic_type(type); + if (UseShenandoahGC && adr->is_AddP() && + adr->in(AddPNode::Base) == adr->in(AddPNode::Address)) { + Node* base = ShenandoahBarrierNode::skip_through_barrier(adr->in(AddPNode::Base)); + const TypeInstPtr* base_t = _gvn.type(base)->isa_instptr(); + if (base_t != NULL && + base_t->const_oop() != NULL && + base_t->klass() == ciEnv::current()->Class_klass() && + adr_type->is_instptr()->offset() >= base_t->klass()->as_instance_klass()->size_helper() * wordSize) { + adr_type = base_t->add_offset(adr_type->is_instptr()->offset()); + } + } + // Try to categorize the address. If it comes up as TypeJavaPtr::BOTTOM, // there was not enough information to nail it down. Compile::AliasType* alias_type = C->alias_type(adr_type); @@ -2776,10 +2790,10 @@ if (type != T_OBJECT ) { (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile); } else { - val = shenandoah_read_barrier_nomem(val); // Possibly an oop being stored to Java heap or native memory if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) { // oop to Java heap. + val = shenandoah_read_barrier_storeval(val); (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo); } else { // We can't tell at compile time if we are storing in the Java heap or outside @@ -2792,6 +2806,7 @@ __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); { // Sync IdealKit and graphKit. sync_kit(ideal); + val = shenandoah_read_barrier_storeval(val); Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo); // Update IdealKit memory. __ sync_kit(this); @@ -2867,11 +2882,11 @@ "fieldOffset must be byte-scaled"); // 32-bit machines ignore the high half! offset = ConvL2X(offset); - adr = make_unsafe_address(base, offset); + adr = make_unsafe_address(base, offset, false); } else { Node* ptr = argument(idx + 0); // type: long ptr = ConvL2X(ptr); // adjust Java long to machine word - adr = make_unsafe_address(NULL, ptr); + adr = make_unsafe_address(NULL, ptr, false); } // Generate the read or write prefetch @@ -2973,8 +2988,6 @@ return true; } - base = shenandoah_write_barrier(base); - // Build field offset expression. // We currently rely on the cookies produced by Unsafe.xxxFieldOffset // to be plain byte offsets, which are also the same as those accepted @@ -2982,7 +2995,7 @@ assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled"); // 32-bit machines ignore the high half of long offsets offset = ConvL2X(offset); - Node* adr = make_unsafe_address(base, offset); + Node* adr = make_unsafe_address(base, offset, true); const TypePtr *adr_type = _gvn.type(adr)->isa_ptr(); // For CAS, unlike inline_unsafe_access, there seems no point in @@ -3046,7 +3059,7 @@ if (_gvn.type(newval) == TypePtr::NULL_PTR) newval = _gvn.makecon(TypePtr::NULL_PTR); - newval = shenandoah_read_barrier_nomem(newval); + newval = shenandoah_read_barrier_storeval(newval); // Reference stores need a store barrier. if (kind == LS_xchg) { @@ -3172,13 +3185,11 @@ return true; } - base = shenandoah_write_barrier(base); - // Build field offset expression. assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled"); // 32-bit machines ignore the high half of long offsets offset = ConvL2X(offset); - Node* adr = make_unsafe_address(base, offset); + Node* adr = make_unsafe_address(base, offset, true); const TypePtr *adr_type = _gvn.type(adr)->isa_ptr(); const Type *value_type = Type::get_const_basic_type(type); Compile::AliasType* alias_type = C->alias_type(adr_type); @@ -3188,10 +3199,8 @@ // Ensure that the store is atomic for longs: const bool require_atomic_access = true; Node* store; - if (type == T_OBJECT) { // reference stores need a store barrier. - val = shenandoah_read_barrier_nomem(val); + if (type == T_OBJECT) // reference stores need a store barrier. store = store_oop_to_unknown(control(), base, adr, adr_type, val, type, MemNode::release); - } else { store = store_to_memory(control(), adr, val, type, adr_type, MemNode::release, require_atomic_access); } @@ -3370,7 +3379,7 @@ Node* rec_thr = argument(0); Node* tls_ptr = NULL; Node* cur_thr = generate_current_thread(tls_ptr); - Node* cmp_thr = _gvn.transform(new (C) CmpPNode(cur_thr, rec_thr)); + Node* cmp_thr = cmp_objects(cur_thr, rec_thr); Node* bol_thr = _gvn.transform(new (C) BoolNode(cmp_thr, BoolTest::ne)); generate_slow_guard(bol_thr, slow_region); @@ -3723,7 +3732,9 @@ RegionNode* region = new (C) RegionNode(PATH_LIMIT); Node* phi = new (C) PhiNode(region, TypeInt::BOOL); + Node* mem_phi= new (C) PhiNode(region, Type::MEMORY, TypePtr::BOTTOM); record_for_igvn(region); + Node* init_mem = map()->memory(); const TypePtr* adr_type = TypeRawPtr::BOTTOM; // memory type of loads const TypeKlassPtr* kls_type = TypeKlassPtr::OBJECT_OR_NULL; @@ -3742,6 +3753,12 @@ klasses[which_arg] = _gvn.transform(kls); } + if (ShenandoahVerifyOptoBarriers) { + args[0] = shenandoah_write_barrier(args[0]); + args[1] = shenandoah_write_barrier(args[1]); + } + + // Having loaded both klasses, test each for null. bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check); for (which_arg = 0; which_arg <= 1; which_arg++) { @@ -3769,8 +3786,7 @@ set_control(region->in(_prim_0_path)); // go back to first null check if (!stopped()) { // Since superc is primitive, make a guard for the superc==subc case. - shenandoah_acmp_barrier(args[0], args[1]); - Node* cmp_eq = _gvn.transform(new (C) CmpPNode(args[0], args[1])); + Node* cmp_eq = cmp_objects(args[0], args[1]); Node* bol_eq = _gvn.transform(new (C) BoolNode(cmp_eq, BoolTest::eq)); generate_guard(bol_eq, region, PROB_FAIR); if (region->req() == PATH_LIMIT+1) { @@ -3787,18 +3803,24 @@ // pull together the cases: assert(region->req() == PATH_LIMIT, "sane region"); + Node* cur_mem = reset_memory(); for (uint i = 1; i < region->req(); i++) { Node* ctl = region->in(i); if (ctl == NULL || ctl == top()) { region->set_req(i, top()); phi ->set_req(i, top()); - } else if (phi->in(i) == NULL) { + mem_phi->set_req(i, top()); + } else { + if (phi->in(i) == NULL) { phi->set_req(i, intcon(0)); // all other paths produce 'false' } + mem_phi->set_req(i, (i == _prim_0_path || i == _prim_same_path) ? cur_mem : init_mem); + } } set_control(_gvn.transform(region)); set_result(_gvn.transform(phi)); + set_all_memory(_gvn.transform(mem_phi)); return true; } @@ -4466,11 +4488,8 @@ assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled"); - src_ptr = shenandoah_read_barrier(src_ptr); - dst_ptr = shenandoah_write_barrier(dst_ptr); - - Node* src = make_unsafe_address(src_ptr, src_off); - Node* dst = make_unsafe_address(dst_ptr, dst_off); + Node* src = make_unsafe_address(src_ptr, src_off, false); + Node* dst = make_unsafe_address(dst_ptr, dst_off, true); // Conservatively insert a memory barrier on all memory slices. // Do not let writes of the copy source or destination float below the copy. @@ -4888,6 +4907,9 @@ // Do not let writes into the source float below the arraycopy. insert_mem_bar(Op_MemBarCPUOrder); + src = shenandoah_read_barrier(src); + dest = shenandoah_write_barrier(dest); + // Call StubRoutines::generic_arraycopy stub. generate_arraycopy(TypeRawPtr::BOTTOM, T_CONFLICT, src, src_offset, dest, dest_offset, length); @@ -4912,6 +4934,10 @@ if (src_elem != dest_elem || dest_elem == T_VOID) { // The component types are not the same or are not recognized. Punt. // (But, avoid the native method wrapper to JVM_ArrayCopy.) + + src = shenandoah_read_barrier(src); + dest = shenandoah_write_barrier(dest); + generate_slow_arraycopy(TypePtr::BOTTOM, src, src_offset, dest, dest_offset, length, /*dest_uninitialized*/false); @@ -4977,6 +5003,9 @@ src = null_check(src, T_ARRAY); dest = null_check(dest, T_ARRAY); + src = shenandoah_read_barrier(src); + dest = shenandoah_write_barrier(dest); + // (4) src_offset must not be negative. generate_negative_guard(src_offset, slow_region); @@ -4999,9 +5028,6 @@ // (9) each element of an oop array must be assignable // The generate_arraycopy subroutine checks this. - src = shenandoah_read_barrier(src); - dest = shenandoah_write_barrier(dest); - // This is where the memory effects are placed: const TypePtr* adr_type = TypeAryPtr::get_array_body_type(dest_elem); generate_arraycopy(adr_type, dest_elem, @@ -5296,7 +5322,7 @@ // At this point we know we do not need type checks on oop stores. // Let's see if we need card marks: - if (alloc != NULL && use_ReduceInitialCardMarks() && ! UseShenandoahGC) { + if (alloc != NULL && use_ReduceInitialCardMarks()) { // If we do not need card marks, copy using the jint or jlong stub. copy_type = LP64_ONLY(UseCompressedOops ? T_INT : T_LONG) NOT_LP64(T_INT); assert(type2aelembytes(basic_elem_type) == type2aelembytes(copy_type), @@ -5828,6 +5854,9 @@ Node *dst_offset = argument(3); Node *length = argument(4); + src = cast_not_null(src, false); + dst = cast_not_null(dst, false); + src = shenandoah_read_barrier(src); dst = shenandoah_write_barrier(dst); @@ -5880,11 +5909,9 @@ Node* ylen = argument(3); Node* z = argument(4); + x = cast_not_null(x, false); x = shenandoah_read_barrier(x); - y = shenandoah_read_barrier(y); - z = shenandoah_write_barrier(z); - - x = shenandoah_read_barrier(x); + y = cast_not_null(y, false); y = shenandoah_read_barrier(y); z = shenandoah_write_barrier(z); @@ -5933,7 +5960,12 @@ } __ else_(); { // Update graphKit memory and control from IdealKit. sync_kit(ideal); - Node* zlen_arg = load_array_length(z); + Node *cast = new (C) CastPPNode(z, TypePtr::NOTNULL); + cast->init_req(0, control()); + _gvn.set_type(cast, cast->bottom_type()); + C->record_for_igvn(cast); + + Node* zlen_arg = load_array_length(cast); // Update IdealKit memory and control from graphKit. __ sync_kit(this); __ if_then(zlen_arg, BoolTest::lt, zlen); { @@ -5988,6 +6020,11 @@ Node* z = argument(2); Node* zlen = argument(3); + x = cast_not_null(x, false); + x = shenandoah_read_barrier(x); + z = cast_not_null(z, false); + z = shenandoah_write_barrier(z); + const Type* x_type = x->Value(&_gvn); const Type* z_type = z->Value(&_gvn); const TypeAryPtr* top_x = x_type->isa_aryptr(); @@ -6035,6 +6072,10 @@ Node* len = argument(3); Node* k = argument(4); + in = shenandoah_read_barrier(in); + out = cast_not_null(out, false); + out = shenandoah_write_barrier(out); + const Type* out_type = out->Value(&_gvn); const Type* in_type = in->Value(&_gvn); const TypeAryPtr* top_out = out_type->isa_aryptr(); @@ -6073,7 +6114,7 @@ } assert(UseMontgomeryMultiplyIntrinsic, "not implemented on this platform"); - const char* stubName = "montgomery_square"; + const char* stubName = "montgomery_multiply"; assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters"); @@ -6084,6 +6125,11 @@ Node* inv = argument(4); Node* m = argument(6); + a = shenandoah_read_barrier(a); + b = shenandoah_read_barrier(b); + n = shenandoah_read_barrier(n); + m = shenandoah_write_barrier(m); + const Type* a_type = a->Value(&_gvn); const TypeAryPtr* top_a = a_type->isa_aryptr(); const Type* b_type = b->Value(&_gvn); @@ -6143,6 +6189,10 @@ Node* inv = argument(3); Node* m = argument(5); + a = shenandoah_read_barrier(a); + n = shenandoah_read_barrier(n); + m = shenandoah_write_barrier(m); + const Type* a_type = a->Value(&_gvn); const TypeAryPtr* top_a = a_type->isa_aryptr(); const Type* n_type = a->Value(&_gvn); @@ -6243,6 +6293,8 @@ } // 'src_start' points to src array + scaled offset + src = cast_not_null(src, false); + src = shenandoah_read_barrier(src); src = shenandoah_read_barrier(src); Node* src_start = array_element_address(src, offset, src_elem); @@ -6345,7 +6397,9 @@ if (field == NULL) return (Node *) NULL; assert (field != NULL, "undefined field"); - fromObj = shenandoah_read_barrier(fromObj); + if (! ShenandoahOptimizeFinals || (! field->is_final() && ! field->is_stable())) { + fromObj = shenandoah_read_barrier(fromObj); + } // Next code copied from Parse::do_get_xxx(): @@ -6479,7 +6533,11 @@ Node* dest = argument(4); Node* dest_offset = argument(5); - // Resolve src and dest arrays for ShenandoahGC. + + // inline_cipherBlockChaining_AESCrypt_predicate() has its own + // barrier. This one should optimize away. + src = cast_not_null(src, false); + dest = cast_not_null(dest, false); src = shenandoah_read_barrier(src); dest = shenandoah_write_barrier(dest); @@ -6578,6 +6636,8 @@ assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt"); if (objAESCryptKey == NULL) return (Node *) NULL; + objAESCryptKey = shenandoah_read_barrier(objAESCryptKey); + // now have the array, need to get the start address of the lastKey array Node* original_k_start = array_element_address(objAESCryptKey, intcon(0), T_BYTE); return original_k_start; @@ -6596,6 +6656,9 @@ // The receiver was checked for NULL already. Node* objCBC = argument(0); + Node* src = argument(1); + Node* dest = argument(4); + // Load embeddedCipher field of CipherBlockChaining object. Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false); @@ -6614,6 +6677,15 @@ set_control(top()); // no regular fast path return ctrl; } + + // Resolve src and dest arrays for ShenandoahGC. Here because new + // memory state is not handled by predicate logic in + // inline_cipherBlockChaining_AESCrypt itself + src = cast_not_null(src, false); + dest = cast_not_null(dest, false); + src = shenandoah_write_barrier(src); + dest = shenandoah_write_barrier(dest); + ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass(); Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt))); @@ -6631,9 +6703,8 @@ // see the original java code for why. RegionNode* region = new(C) RegionNode(3); region->init_req(1, instof_false); - Node* src = argument(1); - Node* dest = argument(4); - Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest)); + + Node* cmp_src_dest = cmp_objects(src, dest); Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq)); Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN); region->init_req(2, src_dest_conjoint); @@ -6672,6 +6743,7 @@ return false; } // 'src_start' points to src array + offset + src = cast_not_null(src, false); src = shenandoah_read_barrier(src); Node* src_start = array_element_address(src, ofs, src_elem); Node* state = NULL; @@ -6739,6 +6811,7 @@ return false; } // 'src_start' points to src array + offset + src = cast_not_null(src, false); src = shenandoah_read_barrier(src); Node* src_start = array_element_address(src, ofs, src_elem); @@ -6907,6 +6980,7 @@ Node* counts = argument(1); const TypeAryPtr* ary = NULL; ciArray* aobj = NULL; + assert(!(ShenandoahBarrierNode::skip_through_barrier(counts)->is_Con() && !counts->is_Con()), "barrier prevents optimization"); if (counts->is_Con() && (ary = counts->bottom_type()->isa_aryptr()) != NULL && (aobj = ary->const_oop()->as_array()) != NULL diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/loopPredicate.cpp --- a/src/share/vm/opto/loopPredicate.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/loopPredicate.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -408,6 +408,9 @@ if (_lpt->is_invariant(n)) { // known invariant _invariant.set(n->_idx); } else if (!n->is_CFG()) { + if (n->Opcode() == Op_ShenandoahWriteBarrier) { + return; + } Node *n_ctrl = _phase->ctrl_or_self(n); Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG if (_phase->is_dominator(n_ctrl, u_ctrl)) { diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/loopTransform.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -861,7 +861,11 @@ // Recursive fixup any other input edges into x. // If there are no changes we can just return 'n', otherwise // we need to clone a private copy and change it. - for( uint i = 1; i < n->req(); i++ ) { + uint start = 1; + if (n->Opcode() == Op_ShenandoahWBMemProj) { + start = 0; + } + for( uint i = start; i < n->req(); i++ ) { Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i), visited, clones ); if( g != n->in(i) ) { if( !x ) { diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/loopnode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -2333,6 +2333,10 @@ C->set_major_progress(); } + if (!C->major_progress()) { + shenandoah_pin_and_expand_barriers(); + } + // Cleanup any modified bits _igvn.optimize(); @@ -3113,7 +3117,7 @@ ++i; if (in == NULL) continue; if (in->pinned() && !in->is_CFG()) - set_ctrl(in, in->Opcode() == Op_ShenandoahWBMemProj ? in->in(0)->in(0) : in->in(0)); + set_ctrl(in, in->in(0)); int is_visited = visited.test_set( in->_idx ); if (!has_node(in)) { // No controlling input yet? assert( !in->is_CFG(), "CFG Node with no controlling input?" ); @@ -3289,7 +3293,7 @@ } while(worklist.size() != 0 && LCA != early) { Node* s = worklist.pop(); - if (s->is_Load()) { + if (s->is_Load() || s->is_ShenandoahBarrier()) { continue; } else if (s->is_MergeMem()) { for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { @@ -3521,6 +3525,7 @@ case Op_AryEq: case Op_ShenandoahReadBarrier: case Op_ShenandoahWriteBarrier: + case Op_ShenandoahWBMemProj: pinned = false; } if( pinned ) { @@ -3609,6 +3614,16 @@ IdealLoopTree *chosen_loop = get_loop(least); if( !chosen_loop->_child ) // Inner loop? chosen_loop->_body.push(n);// Collect inner loops + + if (n->Opcode() == Op_ShenandoahWriteBarrier) { + // The write barrier and its memory proj must have the same + // control otherwise some loop opts could put nodes (Phis) between + // them + Node* proj = n->find_out_with(Op_ShenandoahWBMemProj); + if (proj != NULL) { + set_ctrl_and_loop(proj, least); + } + } } #ifdef ASSERT diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/loopnode.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -1058,6 +1058,38 @@ void sink_use( Node *use, Node *post_loop ); Node *place_near_use( Node *useblock ) const; + Node* try_common_shenandoah_barriers(Node* n, Node *n_ctrl); + MergeMemNode* shenandoah_allocate_merge_mem(Node* mem, int alias, Node* rep_proj, Node* rep_ctrl); + bool shenandoah_should_process_phi(Node* phi, int alias); + MergeMemNode* shenandoah_clone_merge_mem(Node* u, Node* mem, int alias, Node* rep_proj, Node* rep_ctrl, DUIterator& i); + bool shenandoah_is_dominator(Node *d_c, Node *n_c, Node* d, Node* n); + bool shenandoah_is_dominator_same_ctrl(Node* c, Node* d, Node* n); + Node* shenandoah_no_branches(Node* c, Node* dom, bool allow_one_proj); + Node* try_move_shenandoah_barrier_before_loop(Node* n, Node *n_ctrl); + void shenandoah_fix_memory_uses(Node* mem, Node* replacement, Node* rep_proj, Node* rep_ctrl, int alias); +#ifdef ASSERT + bool shenandoah_memory_dominates_all_paths(Node* mem, Node* rep_ctrl, int alias); + void shenandoah_memory_dominates_all_paths_helper(Node* c, Node* rep_ctrl, Unique_Node_List& controls); +#endif + bool shenandoah_fix_mem_phis(Node* mem, Node* mem_ctrl, Node* rep_ctrl, int alias); + bool shenandoah_fix_mem_phis_helper(Node* c, Node* mem, Node* mem_ctrl, Node* rep_ctrl, int alias, VectorSet& controls, GrowableArray& phis); + void try_move_shenandoah_read_barrier(Node* n, Node *n_ctrl); + Node* shenandoah_dom_mem(Node* mem, Node*& mem_ctrl, Node* n, Node* rep_ctrl, int alias); + Node* try_move_shenandoah_barrier_before_pre_loop(Node* c, Node* val_ctrl); + Node* try_move_shenandoah_barrier_before_loop_helper(Node* n, Node* cl, Node* val_ctrl, Node* mem); + Node* shenandoah_move_above_predicates(Node* cl, Node* val_ctrl); + void shenandoah_pin_and_expand_barriers(); + CallStaticJavaNode* shenandoah_pin_and_expand_barriers_null_check(ShenandoahBarrierNode* wb); + void shenandoah_pin_and_expand_barriers_move_barrier(ShenandoahBarrierNode* wb); + Node* shenandoah_find_raw_mem(Node* ctrl, Node* wb, const Node_List& memory_nodes, const Node_List& phis, bool strict); + Node* shenandoah_pick_phi(Node* phi1, Node* phi2, Node_Stack& phis, VectorSet& visited); + Node* shenandoah_find_bottom_mem(Node* ctrl); + void shenandoah_follow_barrier_uses(Node* n, Node* ctrl, Unique_Node_List& uses); + bool shenandoah_already_has_better_phi(Node* region, int alias, Node* m, Node* m_ctrl); + void shenandoah_collect_memory_nodes(int alias, Node_List& memory_nodes, Node_List& phis); + void shenandoah_collect_memory_nodes_helper(Node* n, int alias, GrowableArray& inputs, int adj, Node_List& memory_nodes, Node_List& phis, Node*& cur_mem, Unique_Node_List& wq); + void shenandoah_fix_raw_mem(Node* ctrl, Node* region, Node* raw_mem, Node* raw_mem_phi, Node_List& memory_nodes, Node_List& memory_phis, Unique_Node_List& uses); + bool _created_loop_node; public: void set_created_loop_node() { _created_loop_node = true; } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/loopopts.cpp --- a/src/share/vm/opto/loopopts.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/loopopts.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -119,22 +119,22 @@ // igvn->type(x) is set to x->Value() already. x->raise_bottom_type(t); if (x->Opcode() != Op_ShenandoahWriteBarrier) { - Node *y = x->Identity(&_igvn); - if (y != x) { + Node *y = x->Identity(&_igvn); + if (y != x) { + wins++; + x = y; + } else { + y = _igvn.hash_find(x); + if (y) { wins++; x = y; } else { - y = _igvn.hash_find(x); - if (y) { - wins++; - x = y; - } else { - // Else x is a new node we are keeping - // We do not need register_new_node_with_optimizer - // because set_type has already been called. - _igvn._worklist.push(x); - } + // Else x is a new node we are keeping + // We do not need register_new_node_with_optimizer + // because set_type has already been called. + _igvn._worklist.push(x); } + } } else { _igvn._worklist.push(x); } @@ -221,7 +221,7 @@ for (uint i = 1; i < r->req(); i++) { Node* wb = phi->in(i); if (wb->Opcode() == Op_ShenandoahWriteBarrier) { - assert(! wb->has_out_with(Op_ShenandoahWBMemProj), "new clone does not have mem proj"); + // assert(! wb->has_out_with(Op_ShenandoahWBMemProj), "new clone does not have mem proj"); Node* new_proj = new (C) ShenandoahWBMemProjNode(wb); register_new_node(new_proj, r->in(i)); memphi->set_req(i, new_proj); @@ -233,8 +233,11 @@ } register_new_node(memphi, r); Node* old_mem_out = n->find_out_with(Op_ShenandoahWBMemProj); - assert(old_mem_out != NULL, "expect memory projection"); - _igvn.replace_node(old_mem_out, memphi); + while (old_mem_out != NULL) { + assert(old_mem_out != NULL, "expect memory projection"); + _igvn.replace_node(old_mem_out, memphi); + old_mem_out = n->find_out_with(Op_ShenandoahWBMemProj); + } } assert(! n->has_out_with(Op_ShenandoahWBMemProj), "no more memory outs"); } @@ -729,6 +732,15 @@ Node *n_ctrl = get_ctrl(n); if( !n_ctrl ) return n; // Dead node + try_move_shenandoah_barrier_before_loop(n, n_ctrl); + + Node* res = try_common_shenandoah_barriers(n, n_ctrl); + if (res != NULL) { + return res; + } + + try_move_shenandoah_read_barrier(n, n_ctrl); + // Attempt to remix address expressions for loop invariants Node *m = remix_address_expressions( n ); if( m ) return m; @@ -1046,7 +1058,7 @@ // For inner loop uses get the preheader area. x_ctrl = place_near_use(x_ctrl); - if (n->is_Load()) { + if (n->is_Load() || n->Opcode() == Op_ShenandoahReadBarrier) { // For loads, add a control edge to a CFG node outside of the loop // to force them to not combine and return back inside the loop // during GVN optimization (4641526). @@ -1054,7 +1066,9 @@ // Because we are setting the actual control input, factor in // the result from get_late_ctrl() so we respect any // anti-dependences. (6233005). + if (n->is_Load()) { x_ctrl = dom_lca(late_load_ctrl, x_ctrl); + } // Don't allow the control input to be a CFG splitting node. // Such nodes should only have ProjNodes as outs, e.g. IfNode diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/machnode.cpp --- a/src/share/vm/opto/machnode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/machnode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -784,6 +784,13 @@ return &jvms_for_throw; } +uint MachMemBarNode::size_of() const { return sizeof(*this); } + +const TypePtr *MachMemBarNode::adr_type() const { + return _adr_type; +} + + //============================================================================= #ifndef PRODUCT void labelOper::int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const { diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/machnode.hpp --- a/src/share/vm/opto/machnode.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/machnode.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -904,6 +904,19 @@ virtual JVMState* jvms() const; }; +class MachMemBarNode : public MachNode { + virtual uint size_of() const; // Size is bigger +public: + const TypePtr* _adr_type; // memory effects of call or return + MachMemBarNode() : MachNode() { + init_class_id(Class_MachMemBar); + _adr_type = TypePtr::BOTTOM; // the default: all of memory + } + + void set_adr_type(const TypePtr* atp) { _adr_type = atp; } + virtual const TypePtr *adr_type() const; +}; + //------------------------------MachTempNode----------------------------------- // Node used by the adlc to construct inputs to represent temporary registers diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/macro.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -446,7 +446,7 @@ if (val == mem) { values.at_put(j, mem); } else if (val->is_Store()) { - values.at_put(j, val->in(MemNode::ValueIn)); + values.at_put(j, ShenandoahBarrierNode::skip_through_barrier(val->in(MemNode::ValueIn))); } else if(val->is_Proj() && val->in(0) == alloc) { values.at_put(j, _igvn.zerocon(ft)); } else if (val->is_Phi()) { @@ -548,7 +548,7 @@ // hit a sentinel, return appropriate 0 value return _igvn.zerocon(ft); } else if (mem->is_Store()) { - return mem->in(MemNode::ValueIn); + return ShenandoahBarrierNode::skip_through_barrier(mem->in(MemNode::ValueIn)); } else if (mem->is_Phi()) { // attempt to produce a Phi reflecting the values on the input paths of the Phi Node_Stack value_phis(a, 8); @@ -846,9 +846,6 @@ field_val = transform_later(new (C) DecodeNNode(field_val, field_val->get_ptr_type())); } } - if (field_val->isa_ShenandoahBarrier()) { - field_val = field_val->in(ShenandoahBarrierNode::ValueIn); - } sfpt->add_req(field_val); } JVMState *jvms = sfpt->jvms(); @@ -1291,8 +1288,8 @@ // Add to heap top to get a new heap top if (UseShenandoahGC) { - // Allocate one word more for the Shenandoah brooks pointer. - size_in_bytes = new (C) AddLNode(size_in_bytes, _igvn.MakeConX(8)); + // Allocate several words more for the Shenandoah brooks pointer. + size_in_bytes = new (C) AddLNode(size_in_bytes, _igvn.MakeConX(BrooksPointer::byte_size())); transform_later(size_in_bytes); } @@ -1387,9 +1384,8 @@ } if (UseShenandoahGC) { - // Bump up object by one word. The preceding word is used for - // the Shenandoah brooks pointer. - fast_oop = new (C) AddPNode(top(), fast_oop, _igvn.MakeConX(8)); + // Bump up object for Shenandoah brooks pointer. + fast_oop = new (C) AddPNode(top(), fast_oop, _igvn.MakeConX(BrooksPointer::byte_size())); transform_later(fast_oop); } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/matcher.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -34,6 +34,7 @@ #include "opto/regmask.hpp" #include "opto/rootnode.hpp" #include "opto/runtime.hpp" +#include "opto/shenandoahSupport.hpp" #include "opto/type.hpp" #include "opto/vectornode.hpp" #include "runtime/atomic.hpp" @@ -1021,6 +1022,9 @@ m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n); if (C->failing()) return NULL; if (m == NULL) { Matcher::soft_match_failure(); return NULL; } + if (n->is_MemBar()) { + m->as_MachMemBar()->set_adr_type(n->adr_type()); + } } else { // Nothing the matcher cares about if( n->is_Proj() && n->in(0)->is_Multi()) { // Projections? // Convert to machine-dependent projection @@ -1065,6 +1069,15 @@ mstack.push(m, Visit, n, -1); } + // Handle precedence edges for interior nodes + for (i = n->len()-1; (uint)i >= n->req(); i--) { + Node *m = n->in(i); + if (m == NULL || C->node_arena()->contains(m)) continue; + n->rm_prec(i); + // set -1 to call add_prec() instead of set_req() during Step1 + mstack.push(m, Visit, n, -1); + } + // For constant debug info, I'd rather have unmatched constants. int cnt = n->req(); JVMState* jvms = n->jvms(); @@ -1754,6 +1767,14 @@ return ex; } +void Matcher::handle_precedence_edges(Node* n, MachNode *mach) { + for (uint i = n->req(); i < n->len(); i++) { + if (n->in(i) != NULL) { + mach->add_prec(n->in(i)); + } + } +} + void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) { // 'op' is what I am expecting to receive int op = _leftOp[rule]; @@ -1788,6 +1809,8 @@ uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) { + handle_precedence_edges(s->_leaf, mach); + if( s->_leaf->is_Load() ) { Node *mem2 = s->_leaf->in(MemNode::Memory); assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" ); @@ -1870,6 +1893,9 @@ mem = s->_leaf->in(MemNode::Memory); debug_only(_mem_node = s->_leaf;) } + + handle_precedence_edges(s->_leaf, mach); + if( s->_leaf->in(0) && s->_leaf->req() > 1) { if( !mach->in(0) ) mach->set_req(0,s->_leaf->in(0)); @@ -2131,6 +2157,9 @@ mem_op = true; break; case Op_ShenandoahReadBarrier: + if (n->in(ShenandoahBarrierNode::ValueIn)->is_DecodeNarrowPtr()) { + set_shared(n->in(ShenandoahBarrierNode::ValueIn)->in(1)); + } case Op_ShenandoahWriteBarrier: mem_op = true; set_shared(n); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/matcher.hpp --- a/src/share/vm/opto/matcher.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/matcher.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -124,6 +124,8 @@ // Mach node for ConP #NULL MachNode* _mach_null; + void handle_precedence_edges(Node* n, MachNode *mach); + public: int LabelRootDepth; // Convert ideal machine register to a register mask for spill-loads diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/memnode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -650,227 +650,6 @@ } } -//------------------------adr_phi_is_loop_invariant---------------------------- -// A helper function for Ideal_DU_postCCP to check if a Phi in a counted -// loop is loop invariant. Make a quick traversal of Phi and associated -// CastPP nodes, looking to see if they are a closed group within the loop. -bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) { - // The idea is that the phi-nest must boil down to only CastPP nodes - // with the same data. This implies that any path into the loop already - // includes such a CastPP, and so the original cast, whatever its input, - // must be covered by an equivalent cast, with an earlier control input. - ResourceMark rm; - - // The loop entry input of the phi should be the unique dominating - // node for every Phi/CastPP in the loop. - Unique_Node_List closure; - closure.push(adr_phi->in(LoopNode::EntryControl)); - - // Add the phi node and the cast to the worklist. - Unique_Node_List worklist; - worklist.push(adr_phi); - if( cast != NULL ){ - if( !cast->is_ConstraintCast() ) return false; - worklist.push(cast); - } - - // Begin recursive walk of phi nodes. - while( worklist.size() ){ - // Take a node off the worklist - Node *n = worklist.pop(); - if( !closure.member(n) ){ - // Add it to the closure. - closure.push(n); - // Make a sanity check to ensure we don't waste too much time here. - if( closure.size() > 20) return false; - // This node is OK if: - // - it is a cast of an identical value - // - or it is a phi node (then we add its inputs to the worklist) - // Otherwise, the node is not OK, and we presume the cast is not invariant - if( n->is_ConstraintCast() ){ - worklist.push(n->in(1)); - } else if( n->is_Phi() ) { - for( uint i = 1; i < n->req(); i++ ) { - worklist.push(n->in(i)); - } - } else { - return false; - } - } - } - - // Quit when the worklist is empty, and we've found no offending nodes. - return true; -} - -//------------------------------Ideal_DU_postCCP------------------------------- -// Find any cast-away of null-ness and keep its control. Null cast-aways are -// going away in this pass and we need to make this memory op depend on the -// gating null check. -Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - return Ideal_common_DU_postCCP(ccp, this, in(MemNode::Address)); -} - -// I tried to leave the CastPP's in. This makes the graph more accurate in -// some sense; we get to keep around the knowledge that an oop is not-null -// after some test. Alas, the CastPP's interfere with GVN (some values are -// the regular oop, some are the CastPP of the oop, all merge at Phi's which -// cannot collapse, etc). This cost us 10% on SpecJVM, even when I removed -// some of the more trivial cases in the optimizer. Removing more useless -// Phi's started allowing Loads to illegally float above null checks. I gave -// up on this approach. CNC 10/20/2000 -// This static method may be called not from MemNode (EncodePNode calls it). -// Only the control edge of the node 'n' might be updated. -Node *MemNode::Ideal_common_DU_postCCP( PhaseCCP *ccp, Node* n, Node* adr ) { - Node *skipped_cast = NULL; - // Need a null check? Regular static accesses do not because they are - // from constant addresses. Array ops are gated by the range check (which - // always includes a NULL check). Just check field ops. - if( n->in(MemNode::Control) == NULL ) { - // Scan upwards for the highest location we can place this memory op. - while( true ) { - switch( adr->Opcode() ) { - - case Op_AddP: // No change to NULL-ness, so peek thru AddP's - adr = adr->in(AddPNode::Base); - continue; - - case Op_DecodeN: // No change to NULL-ness, so peek thru - case Op_DecodeNKlass: - adr = adr->in(1); - continue; - - case Op_EncodeP: - case Op_EncodePKlass: - // EncodeP node's control edge could be set by this method - // when EncodeP node depends on CastPP node. - // - // Use its control edge for memory op because EncodeP may go away - // later when it is folded with following or preceding DecodeN node. - if (adr->in(0) == NULL) { - // Keep looking for cast nodes. - adr = adr->in(1); - continue; - } - ccp->hash_delete(n); - n->set_req(MemNode::Control, adr->in(0)); - ccp->hash_insert(n); - return n; - - case Op_ShenandoahReadBarrier: - case Op_ShenandoahWriteBarrier: - if (adr->in(ShenandoahBarrierNode::Control) == NULL) { - // Keep looking for cast nodes. - adr = adr->in(ShenandoahBarrierNode::ValueIn); - continue; - } - ccp->hash_delete(n); - n->set_req(MemNode::Control, adr->in(ShenandoahBarrierNode::Control)); - ccp->hash_insert(n); - return n; - case Op_CastPP: - // If the CastPP is useless, just peek on through it. - if( ccp->type(adr) == ccp->type(adr->in(1)) ) { - // Remember the cast that we've peeked though. If we peek - // through more than one, then we end up remembering the highest - // one, that is, if in a loop, the one closest to the top. - skipped_cast = adr; - adr = adr->in(1); - continue; - } - // CastPP is going away in this pass! We need this memory op to be - // control-dependent on the test that is guarding the CastPP. - ccp->hash_delete(n); - n->set_req(MemNode::Control, adr->in(0)); - ccp->hash_insert(n); - return n; - - case Op_Phi: - // Attempt to float above a Phi to some dominating point. - if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) { - // If we've already peeked through a Cast (which could have set the - // control), we can't float above a Phi, because the skipped Cast - // may not be loop invariant. - if (adr_phi_is_loop_invariant(adr, skipped_cast)) { - adr = adr->in(1); - continue; - } - } - - // Intentional fallthrough! - - // No obvious dominating point. The mem op is pinned below the Phi - // by the Phi itself. If the Phi goes away (no true value is merged) - // then the mem op can float, but not indefinitely. It must be pinned - // behind the controls leading to the Phi. - case Op_CheckCastPP: - // These usually stick around to change address type, however a - // useless one can be elided and we still need to pick up a control edge - if (adr->in(0) == NULL) { - // This CheckCastPP node has NO control and is likely useless. But we - // need check further up the ancestor chain for a control input to keep - // the node in place. 4959717. - skipped_cast = adr; - adr = adr->in(1); - continue; - } - ccp->hash_delete(n); - n->set_req(MemNode::Control, adr->in(0)); - ccp->hash_insert(n); - return n; - - // List of "safe" opcodes; those that implicitly block the memory - // op below any null check. - case Op_CastX2P: // no null checks on native pointers - case Op_Parm: // 'this' pointer is not null - case Op_LoadP: // Loading from within a klass - case Op_LoadN: // Loading from within a klass - case Op_LoadKlass: // Loading from within a klass - case Op_LoadNKlass: // Loading from within a klass - case Op_ConP: // Loading from a klass - case Op_ConN: // Loading from a klass - case Op_ConNKlass: // Loading from a klass - case Op_CreateEx: // Sucking up the guts of an exception oop - case Op_Con: // Reading from TLS - case Op_CMoveP: // CMoveP is pinned - case Op_CMoveN: // CMoveN is pinned - break; // No progress - - case Op_Proj: // Direct call to an allocation routine - case Op_SCMemProj: // Memory state from store conditional ops -#ifdef ASSERT - { - assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value"); - const Node* call = adr->in(0); - if (call->is_CallJava()) { - const CallJavaNode* call_java = call->as_CallJava(); - const TypeTuple *r = call_java->tf()->range(); - assert(r->cnt() > TypeFunc::Parms, "must return value"); - const Type* ret_type = r->field_at(TypeFunc::Parms); - assert(ret_type && ret_type->isa_ptr(), "must return pointer"); - // We further presume that this is one of - // new_instance_Java, new_array_Java, or - // the like, but do not assert for this. - } else if (call->is_Allocate()) { - // similar case to new_instance_Java, etc. - } else if (!call->is_CallLeaf()) { - // Projections from fetch_oop (OSR) are allowed as well. - ShouldNotReachHere(); - } - } -#endif - break; - default: - ShouldNotReachHere(); - } - break; - } - } - - return NULL; // No progress -} - - //============================================================================= // Should LoadNode::Ideal() attempt to remove control edges? bool LoadNode::can_remove_control() const { @@ -1145,9 +924,28 @@ if (!phase->type(value)->higher_equal(phase->type(this))) return this; } + Node* value_no_barrier = ShenandoahBarrierNode::skip_through_barrier(value); + PhaseIterGVN* igvn = phase->is_IterGVN(); + if (UseShenandoahGC && + igvn != NULL && + value->is_Phi() && + value->req() > 2 && + value->in(1) != NULL && + value->in(1)->is_ShenandoahBarrier()) { + if (igvn->_worklist.member(value) || + igvn->_worklist.member(value->in(0)) || + (value->in(0)->in(1) != NULL && + value->in(0)->in(1)->is_IfProj() && + (igvn->_worklist.member(value->in(0)->in(1)) || + value->in(0)->in(1)->in(0) != NULL && + igvn->_worklist.member(value->in(0)->in(1)->in(0))))) { + igvn->_worklist.push(this); + return this; + } + } // (This works even when value is a Con, but LoadNode::Value // usually runs first, producing the singleton type of the Con.) - return value; + return ShenandoahBarrierNode::skip_through_barrier(value); } // Search for an existing data phi which was generated before for the same @@ -1206,7 +1004,7 @@ return NULL; // Complex address } AddPNode* address = base->in(Address)->as_AddP(); - Node* cache_base = address->in(AddPNode::Base); + Node* cache_base = ShenandoahBarrierNode::skip_through_barrier(address->in(AddPNode::Base)); if ((cache_base != NULL) && cache_base->is_DecodeN()) { // Get ConP node which is static 'cache' field. cache_base = cache_base->in(1); @@ -1670,9 +1468,19 @@ const bool off_beyond_header = (off != BrooksPointer::byte_offset() || !UseShenandoahGC) && ((uint)off >= (uint)min_base_off); // Try to constant-fold a stable array element. - if (FoldStableValues && ary->is_stable() && ary->const_oop() != NULL) { + if (FoldStableValues && ary->is_stable()) { // Make sure the reference is not into the header and the offset is constant - if (off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) { + ciObject* aobj = NULL; + if (UseShenandoahGC && adr->is_AddP() && !adr->in(AddPNode::Base)->is_top()) { + Node* base = ShenandoahBarrierNode::skip_through_barrier(adr->in(AddPNode::Base)); + if (!base->is_top()) { + ary = phase->type(base)->is_aryptr(); + aobj = ary->const_oop(); + } + } else { + aobj = ary->const_oop(); + } + if (aobj != NULL && off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) { const Type* con_type = fold_stable_ary_elem(ary, off, memory_type()); if (con_type != NULL) { return con_type; @@ -1761,7 +1569,19 @@ } } // Optimizations for constant objects - ciObject* const_oop = tinst->const_oop(); + ciObject* const_oop = NULL; + if (UseShenandoahGC && adr->is_AddP() && !adr->in(AddPNode::Base)->is_top()) { + Node* base = ShenandoahBarrierNode::skip_through_barrier(adr->in(AddPNode::Base)); + if (phase->type(base) != Type::TOP) { + const TypePtr* base_t = phase->type(base)->is_ptr(); + if (base_t != TypePtr::NULL_PTR) { + tinst = base_t->is_instptr(); + const_oop = tinst->const_oop(); + } + } + } else { + const_oop = tinst->const_oop(); + } if (const_oop != NULL) { // For constant Boxed value treat the target field as a compile time constant. if (tinst->is_ptr_to_boxed_value()) { diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/memnode.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -84,10 +84,6 @@ // This one should probably be a phase-specific function: static bool all_controls_dominate(Node* dom, Node* sub); - // Find any cast-away of null-ness and keep its control. - static Node *Ideal_common_DU_postCCP( PhaseCCP *ccp, Node* n, Node* adr ); - virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp ); - virtual const class TypePtr *adr_type() const; // returns bottom_type of address // Shared code for Ideal methods: @@ -106,6 +102,12 @@ #endif } +#ifdef ASSERT + void set_raw_adr_type(const TypePtr *t) { + _adr_type = t; + } +#endif + // Map a load or store opcode to its corresponding store opcode. // (Return -1 if unknown.) virtual int store_Opcode() const { return -1; } @@ -289,6 +291,13 @@ virtual const Type *Value(PhaseTransform *phase) const; virtual int store_Opcode() const { return Op_StoreB; } virtual BasicType memory_type() const { return T_BYTE; } + + virtual bool is_g1_marking_load() const { + const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()); + return in(2)->is_AddP() && in(2)->in(2)->Opcode() == Op_ThreadLocal + && in(2)->in(3)->is_Con() + && in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset; + } }; //------------------------------LoadUSNode------------------------------------- @@ -779,6 +788,9 @@ virtual const Type *bottom_type() const { return _type; } virtual uint ideal_reg() const; virtual const class TypePtr *adr_type() const { return _adr_type; } // returns bottom_type of address + void set_adr_type(const TypePtr *t) { + _adr_type = t; + } bool result_not_used() const; }; diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/multnode.cpp --- a/src/share/vm/opto/multnode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/multnode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -150,59 +150,62 @@ } //-------------------------------is_uncommon_trap_proj---------------------------- -// Return true if proj is the form of "proj->[region->..]call_uct" -bool ProjNode::is_uncommon_trap_proj(Deoptimization::DeoptReason reason) { +// Return uncommon trap call node if proj is for "proj->[region->..]call_uct" +// NULL otherwise +CallStaticJavaNode* ProjNode::is_uncommon_trap_proj(Deoptimization::DeoptReason reason) { int path_limit = 10; Node* out = this; for (int ct = 0; ct < path_limit; ct++) { out = out->unique_ctrl_out(); if (out == NULL) - return false; + return NULL; if (out->is_CallStaticJava()) { - int req = out->as_CallStaticJava()->uncommon_trap_request(); + CallStaticJavaNode* call = out->as_CallStaticJava(); + int req = call->uncommon_trap_request(); if (req != 0) { Deoptimization::DeoptReason trap_reason = Deoptimization::trap_request_reason(req); if (trap_reason == reason || reason == Deoptimization::Reason_none) { - return true; + return call; } } - return false; // don't do further after call + return NULL; // don't do further after call } if (out->Opcode() != Op_Region) - return false; + return NULL; } - return false; + return NULL; } //-------------------------------is_uncommon_trap_if_pattern------------------------- -// Return true for "if(test)-> proj -> ... -// | -// V -// other_proj->[region->..]call_uct" -// +// Return uncommon trap call node for "if(test)-> proj -> ... +// | +// V +// other_proj->[region->..]call_uct" +// NULL otherwise // "must_reason_predicate" means the uct reason must be Reason_predicate -bool ProjNode::is_uncommon_trap_if_pattern(Deoptimization::DeoptReason reason) { +CallStaticJavaNode* ProjNode::is_uncommon_trap_if_pattern(Deoptimization::DeoptReason reason) { Node *in0 = in(0); - if (!in0->is_If()) return false; + if (!in0->is_If()) return NULL; // Variation of a dead If node. - if (in0->outcnt() < 2) return false; + if (in0->outcnt() < 2) return NULL; IfNode* iff = in0->as_If(); // we need "If(Conv2B(Opaque1(...)))" pattern for reason_predicate if (reason != Deoptimization::Reason_none) { if (iff->in(1)->Opcode() != Op_Conv2B || iff->in(1)->in(1)->Opcode() != Op_Opaque1) { - return false; + return NULL; } } ProjNode* other_proj = iff->proj_out(1-_con); if (other_proj == NULL) // Should never happen, but make Parfait happy. - return false; - if (other_proj->is_uncommon_trap_proj(reason)) { + return NULL; + CallStaticJavaNode* call = other_proj->is_uncommon_trap_proj(reason); + if (call != NULL) { assert(reason == Deoptimization::Reason_none || Compile::current()->is_predicate_opaq(iff->in(1)->in(1)), "should be on the list"); - return true; + return call; } - return false; + return NULL; } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/multnode.hpp --- a/src/share/vm/opto/multnode.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/multnode.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -89,13 +89,15 @@ virtual void dump_spec(outputStream *st) const; #endif - // Return true if proj is for "proj->[region->..]call_uct" - bool is_uncommon_trap_proj(Deoptimization::DeoptReason reason); - // Return true for "if(test)-> proj -> ... - // | - // V - // other_proj->[region->..]call_uct" - bool is_uncommon_trap_if_pattern(Deoptimization::DeoptReason reason); + // Return uncommon trap call node if proj is for "proj->[region->..]call_uct" + // NULL otherwise + CallStaticJavaNode* is_uncommon_trap_proj(Deoptimization::DeoptReason reason); + // Return uncommon trap call node for "if(test)-> proj -> ... + // | + // V + // other_proj->[region->..]call_uct" + // NULL otherwise + CallStaticJavaNode* is_uncommon_trap_if_pattern(Deoptimization::DeoptReason reason); }; #endif // SHARE_VM_OPTO_MULTNODE_HPP diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/node.cpp --- a/src/share/vm/opto/node.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/node.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -521,6 +521,10 @@ C->add_macro_node(n); if (is_expensive()) C->add_expensive_node(n); + + if (Opcode() == Op_ShenandoahWriteBarrier) { + C->add_shenandoah_barrier(n->as_ShenandoahBarrier()); + } // If the cloned node is a range check dependent CastII, add it to the list. CastIINode* cast = n->isa_CastII(); if (cast != NULL && cast->has_range_check()) { @@ -654,6 +658,9 @@ if (is_expensive()) { compile->remove_expensive_node(this); } + if (is_ShenandoahBarrier()) { + compile->remove_shenandoah_barrier(this->as_ShenandoahBarrier()); + } CastIINode* cast = isa_CastII(); if (cast != NULL && cast->has_range_check()) { compile->remove_range_check_cast(cast); @@ -925,22 +932,6 @@ return (Node*) this; } -// Find out of current node that matches opcode. -Node* Node::find_out_with(int opcode) { - for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { - Node* use = fast_out(i); - if (use->Opcode() == opcode) { - return use; - } - } - return NULL; -} - -// Return true if the current node has an out that matches opcode. -bool Node::has_out_with(int opcode) { - return (find_out_with(opcode) != NULL); -} - //---------------------------uncast_helper------------------------------------- Node* Node::uncast_helper(const Node* p) { #ifdef ASSERT @@ -970,6 +961,22 @@ return (Node*) p; } +// Find out of current node that matches opcode. +Node* Node::find_out_with(int opcode) { + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + Node* use = fast_out(i); + if (use->Opcode() == opcode) { + return use; + } + } + return NULL; +} + +// Return true if the current node has an out that matches opcode. +bool Node::has_out_with(int opcode) { + return (find_out_with(opcode) != NULL); +} + //------------------------------add_prec--------------------------------------- // Add a new precedence input. Precedence inputs are unordered, with // duplicates removed and NULLs packed down at the end. @@ -1152,6 +1159,8 @@ } else if( op == Op_SubI || op == Op_SubL ) { // Condition for subI(x,subI(y,z)) ==> subI(addI(x,z),y) return n->Opcode() == op && n->in(2) == this; + } else if (op == Op_ShenandoahWriteBarrier) { + return n->Opcode() == Op_ShenandoahWBMemProj; } return false; }; @@ -1370,6 +1379,9 @@ if (dead->is_expensive()) { igvn->C->remove_expensive_node(dead); } + if (dead->is_ShenandoahBarrier()) { + igvn->C->remove_shenandoah_barrier(dead->as_ShenandoahBarrier()); + } CastIINode* cast = dead->isa_CastII(); if (cast != NULL && cast->has_range_check()) { igvn->C->remove_range_check_cast(cast); @@ -1419,12 +1431,6 @@ return false; } -//------------------------------Ideal_DU_postCCP------------------------------- -// Idealize graph, using DU info. Must clone result into new-space -Node *Node::Ideal_DU_postCCP( PhaseCCP * ) { - return NULL; // Default to no change -} - //------------------------------hash------------------------------------------- // Hash function over Nodes. uint Node::hash() const { @@ -2113,6 +2119,14 @@ return found; } +void Node::ensure_control_or_add_prec(Node* c) { + if (in(0) == NULL) { + set_req(0, c); + } else if (in(0) != c) { + add_prec(c); + } +} + //============================================================================= //------------------------------yank------------------------------------------- // Find and remove diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/node.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -71,6 +71,7 @@ class FastLockNode; class FastUnlockNode; class IfNode; +class IfProjNode; class IfFalseNode; class IfTrueNode; class InitializeNode; @@ -100,6 +101,7 @@ class MachSpillCopyNode; class MachTempNode; class MachMergeNode; +class MachMemBarNode; class Matcher; class MemBarNode; class MemBarStoreStoreNode; @@ -444,7 +446,6 @@ bool eqv_uncast(const Node* n) const { return (this->uncast() == n->uncast()); } - // Find out of current node that matches opcode. Node* find_out_with(int opcode); // Return true if the current node has an out that matches opcode. @@ -606,6 +607,7 @@ DEFINE_CLASS_ID(MachConstantBase, Mach, 4) DEFINE_CLASS_ID(MachConstant, Mach, 5) DEFINE_CLASS_ID(MachMerge, Mach, 6) + DEFINE_CLASS_ID(MachMemBar, Mach, 7) DEFINE_CLASS_ID(Type, Node, 2) DEFINE_CLASS_ID(Phi, Type, 0) @@ -625,8 +627,9 @@ DEFINE_CLASS_ID(Proj, Node, 3) DEFINE_CLASS_ID(CatchProj, Proj, 0) DEFINE_CLASS_ID(JumpProj, Proj, 1) - DEFINE_CLASS_ID(IfTrue, Proj, 2) - DEFINE_CLASS_ID(IfFalse, Proj, 3) + DEFINE_CLASS_ID(IfProj, Proj, 2) + DEFINE_CLASS_ID(IfTrue, IfProj, 0) + DEFINE_CLASS_ID(IfFalse, IfProj, 1) DEFINE_CLASS_ID(Parm, Proj, 4) DEFINE_CLASS_ID(MachProj, Proj, 5) @@ -752,6 +755,7 @@ DEFINE_CLASS_QUERY(FastLock) DEFINE_CLASS_QUERY(FastUnlock) DEFINE_CLASS_QUERY(If) + DEFINE_CLASS_QUERY(IfProj) DEFINE_CLASS_QUERY(IfFalse) DEFINE_CLASS_QUERY(IfTrue) DEFINE_CLASS_QUERY(Initialize) @@ -779,6 +783,7 @@ DEFINE_CLASS_QUERY(MachSafePoint) DEFINE_CLASS_QUERY(MachSpillCopy) DEFINE_CLASS_QUERY(MachTemp) + DEFINE_CLASS_QUERY(MachMemBar) DEFINE_CLASS_QUERY(MachMerge) DEFINE_CLASS_QUERY(Mem) DEFINE_CLASS_QUERY(MemBar) @@ -894,13 +899,12 @@ // Check if 'this' node dominates or equal to 'sub'. bool dominates(Node* sub, Node_List &nlist); + virtual bool is_g1_marking_load() const { return false; } + protected: bool remove_dead_region(PhaseGVN *phase, bool can_reshape); public: - // Idealize graph, using DU info. Done after constant propagation - virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp ); - // See if there is valid pipeline info static const Pipeline *pipeline_class(); virtual const Pipeline *pipeline() const; @@ -934,6 +938,9 @@ // Return the unique control out if only one. Null if none or more than one. Node* unique_ctrl_out(); + // Set control or add control as precedence edge + void ensure_control_or_add_prec(Node* c); + //----------------- Code Generation // Ideal register class for Matching. Zero means unmatched instruction diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/parse2.cpp --- a/src/share/vm/opto/parse2.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/parse2.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -81,12 +81,6 @@ // Compile-time detect of null-exception? if (stopped()) return top(); - if (is_store) { - ary = shenandoah_write_barrier(ary); - } else { - ary = shenandoah_read_barrier(ary); - } - const TypeAryPtr* arytype = _gvn.type(ary)->is_aryptr(); const TypeInt* sizetype = arytype->size(); const Type* elemtype = arytype->elem(); @@ -164,6 +158,12 @@ // Check for always knowing you are throwing a range-check exception if (stopped()) return top(); + if (is_store) { + ary = shenandoah_write_barrier(ary); + } else { + ary = shenandoah_read_barrier(ary); + } + // Make array address computation control dependent to prevent it // from floating above the range check during loop optimizations. Node* ptr = array_element_address(ary, idx, type, sizetype, control()); @@ -1736,7 +1736,7 @@ // a is not used except for an assert. The address d already has the // write barrier. Adding a barrier on a only results in additional code // being generated. - c = shenandoah_read_barrier_nomem(c); + c = shenandoah_read_barrier_storeval(c); Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT, MemNode::release); break; } @@ -2281,8 +2281,7 @@ maybe_add_safepoint(iter().get_dest()); a = pop(); b = pop(); - shenandoah_acmp_barrier(a, b); - c = _gvn.transform( new (C) CmpPNode(b, a) ); + c = cmp_objects(a, b); c = optimize_cmp_with_klass(c); do_if(btest, c); break; diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/parse3.cpp --- a/src/share/vm/opto/parse3.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/parse3.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -245,10 +245,10 @@ if (UseShenandoahGC && ShenandoahOptimizeFinals && UseImplicitStableValues) { if (field->holder()->name() == ciSymbol::java_lang_String() && field->offset() == java_lang_String::value_offset_in_bytes()) { - const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::NotNull, - TypeAry::make(TypeInt::CHAR,TypeInt::POS), - ciTypeArrayKlass::make(T_CHAR), true, 0); - ld = cast_array_to_stable(ld, value_type); + const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::NotNull, + TypeAry::make(TypeInt::CHAR, TypeInt::POS), + ciTypeArrayKlass::make(T_CHAR), true, 0); + ld = cast_array_to_stable(ld, value_type); } } // Adjust Java stack @@ -332,7 +332,7 @@ field_type = TypeOopPtr::make_from_klass(field->type()->as_klass()); } - val = shenandoah_read_barrier_nomem(val); + val = shenandoah_read_barrier_storeval(val); store = store_oop_to_object(control(), obj, adr, adr_type, val, field_type, bt, mo); } else { diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/phaseX.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -35,6 +35,7 @@ #include "opto/phaseX.hpp" #include "opto/regalloc.hpp" #include "opto/rootnode.hpp" +#include "opto/shenandoahSupport.hpp" //============================================================================= #define NODE_HASH_MINIMUM_SIZE 255 @@ -805,9 +806,7 @@ if( t->singleton() && !k->is_Con() ) { NOT_PRODUCT( set_progress(); ) - if (t == Type::TOP || k->Opcode() != Op_ShenandoahWriteBarrier) { - return makecon(t); // Turn into a constant - } + return makecon(t); // Turn into a constant } // Now check for Identities @@ -1212,9 +1211,7 @@ NOT_PRODUCT( set_progress(); ) Node *con = makecon(t); // Make a constant add_users_to_worklist( k ); - if (k->Opcode() != Op_ShenandoahWriteBarrier || t == Type::TOP) { - subsume_node( k, con ); // Everybody using k now uses con - } + subsume_node( k, con ); // Everybody using k now uses con return con; } @@ -1296,6 +1293,9 @@ } assert(!(i < imax), "sanity"); } + } else if (dead->Opcode() == Op_ShenandoahWBMemProj) { + assert(i == 0 && in->Opcode() == Op_ShenandoahWriteBarrier, "broken graph"); + _worklist.push(in); } if (ReduceFieldZeroing && dead->is_Load() && i == MemNode::Memory && in->is_Proj() && in->in(0) != NULL && in->in(0)->is_Initialize()) { @@ -1343,6 +1343,9 @@ if (dead->is_expensive()) { C->remove_expensive_node(dead); } + if (dead->is_ShenandoahBarrier()) { + C->remove_shenandoah_barrier(dead->as_ShenandoahBarrier()); + } CastIINode* cast = dead->isa_CastII(); if (cast != NULL && cast->has_range_check()) { C->remove_range_check_cast(cast); @@ -1656,6 +1659,16 @@ } } } + if (m->is_ShenandoahBarrier()) { + for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) { + Node* p = m->fast_out(i2); + if (p->Opcode() == Op_CmpP) { + if(p->bottom_type() != type(p)) { + worklist.push(p); + } + } + } + } } } } @@ -1748,11 +1761,6 @@ _worklist.push(n); // n re-enters the hash table via the worklist } - // Idealize graph using DU info. Must clone() into new-space. - // DU info is generally used to show profitability, progress or safety - // (but generally not needed for correctness). - Node *nn = n->Ideal_DU_postCCP(this); - // TEMPORARY fix to ensure that 2nd GVN pass eliminates NULL checks switch( n->Opcode() ) { case Op_FastLock: // Revisit FastLocks for lock coarsening @@ -1769,12 +1777,6 @@ default: break; } - if( nn ) { - _worklist.push(n); - // Put users of 'n' onto worklist for second igvn transform - add_users_to_worklist(n); - return nn; - } return n; } diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/phaseX.hpp --- a/src/share/vm/opto/phaseX.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/phaseX.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -206,8 +206,6 @@ } public: - virtual PhaseIterGVN *is_IterGVN() { return 0; } - // Get a previously recorded type for the node n. // This type must already have been recorded. // If you want the type of a very new (untransformed) node, @@ -329,8 +327,7 @@ const Type* limit_type) const { ShouldNotCallThis(); return NULL; } - // Delayed node rehash if this is an IGVN phase - virtual void igvn_rehash_node_delayed(Node* n) {} + virtual PhaseIterGVN *is_IterGVN() { return 0; } #ifndef PRODUCT void dump_old2new_map() const; @@ -498,10 +495,6 @@ _worklist.push(n); } - void igvn_rehash_node_delayed(Node* n) { - rehash_node_delayed(n); - } - // Replace ith edge of "n" with "in" void replace_input_of(Node* n, int i, Node* in) { rehash_node_delayed(n); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/runtime.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -34,6 +34,7 @@ #include "compiler/compileBroker.hpp" #include "compiler/compilerOracle.hpp" #include "compiler/oopMap.hpp" +#include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" #include "gc_implementation/g1/heapRegion.hpp" #include "gc_interface/collectedHeap.hpp" @@ -575,6 +576,19 @@ return TypeFunc::make(domain, range); } +const TypeFunc *OptoRuntime::shenandoah_write_barrier_Type() { + const Type **fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields); + + // create result type (range) + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields); + + return TypeFunc::make(domain, range); +} + const TypeFunc *OptoRuntime::uncommon_trap_Type() { // create input type (domain) const Type **fields = TypeTuple::fields(1); @@ -1238,7 +1252,7 @@ // Update the exception cache only when the unwind was not forced // and there didn't happen another exception during the computation of the // compiled exception handler. - if (!force_unwind && oopDesc::equals(original_exception(), exception())) { + if (!force_unwind && original_exception() == exception()) { nm->add_handler_for_exception_and_pc(exception,pc,handler_address); } } else { @@ -1385,6 +1399,7 @@ return caller_frame.is_deoptimized_frame(); } + const TypeFunc *OptoRuntime::register_finalizer_Type() { // create input type (domain) const Type **fields = TypeTuple::fields(1); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/runtime.hpp --- a/src/share/vm/opto/runtime.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/runtime.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -277,6 +277,7 @@ static const TypeFunc* g1_wb_pre_Type(); static const TypeFunc* g1_wb_post_Type(); static const TypeFunc* shenandoah_clone_barrier_Type(); + static const TypeFunc* shenandoah_write_barrier_Type(); static const TypeFunc* complete_monitor_enter_Type(); static const TypeFunc* complete_monitor_exit_Type(); static const TypeFunc* uncommon_trap_Type(); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/shenandoahSupport.cpp --- a/src/share/vm/opto/shenandoahSupport.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/shenandoahSupport.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -23,15 +23,48 @@ #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "opto/callnode.hpp" +#include "opto/connode.hpp" #include "opto/phaseX.hpp" +#include "opto/rootnode.hpp" +#include "opto/runtime.hpp" #include "opto/shenandoahSupport.hpp" +#include "opto/subnode.hpp" Node* ShenandoahBarrierNode::skip_through_barrier(Node* n) { - if (n->is_ShenandoahBarrier()) { + if (n == NULL) { + return NULL; + } else if (n->is_ShenandoahBarrier()) { return n->in(ValueIn); - } else { - return n; + } else if (n->is_Phi() && + n->req() == 3 && + n->in(1) != NULL && + n->in(1)->is_ShenandoahBarrier() && + n->in(2) != NULL && + n->in(2)->bottom_type() == TypePtr::NULL_PTR && + n->in(0) != NULL && + n->in(0)->in(1) != NULL && + n->in(0)->in(1)->is_IfProj() && + n->in(0)->in(2) != NULL && + n->in(0)->in(2)->is_IfProj() && + n->in(0)->in(1)->in(0) != NULL && + n->in(0)->in(1)->in(0) == n->in(0)->in(2)->in(0) && + n->in(1)->in(ShenandoahBarrierNode::ValueIn)->Opcode() == Op_CastPP) { + Node* iff = n->in(0)->in(1)->in(0); + Node* res = n->in(1)->in(ShenandoahBarrierNode::ValueIn)->in(1); + if (iff->is_If() && + iff->in(1) != NULL && + iff->in(1)->is_Bool() && + iff->in(1)->as_Bool()->_test._test == BoolTest::ne && + iff->in(1)->in(1) != NULL && + iff->in(1)->in(1)->Opcode() == Op_CmpP && + iff->in(1)->in(1)->in(1) != NULL && + iff->in(1)->in(1)->in(1) == res && + iff->in(1)->in(1)->in(2) != NULL && + iff->in(1)->in(1)->in(2)->bottom_type() == TypePtr::NULL_PTR) { + return res; + } } + return n; } bool ShenandoahBarrierNode::needs_barrier(PhaseTransform* phase, ShenandoahBarrierNode* orig, Node* n, Node* rb_mem, bool allow_fromspace) { @@ -47,20 +80,26 @@ visited.push(n); if (n->is_Allocate()) { + // tty->print_cr("killed barrier for newly allocated object"); return false; } - if (n->is_CallJava()) { + if (n->is_CallJava() || n->Opcode() == Op_CallLeafNoFP) { return true; } const Type* type = phase->type(n); - if (type->higher_equal(TypePtr::NULL_PTR)) { + if (type == Type::TOP) { return false; } - if (type->isa_oopptr() && type->is_oopptr()->const_oop() != NULL) { + if (type->make_ptr()->higher_equal(TypePtr::NULL_PTR)) { + // tty->print_cr("killed barrier for NULL object"); return false; } + if (type->make_oopptr() && type->make_oopptr()->const_oop() != NULL) { + // tty->print_cr("killed barrier for constant object"); + return !ShenandoahNoBarriersForConst; + } if (ShenandoahOptimizeFinals) { const TypeAryPtr* ary = type->isa_aryptr(); @@ -98,24 +137,32 @@ return true; } if (n->Opcode() == Op_ShenandoahWriteBarrier) { + // tty->print_cr("skipped barrier for chained write barrier object"); return false; } if (n->Opcode() == Op_ShenandoahReadBarrier) { if (rb_mem == n->in(Memory)) { + // tty->print_cr("Eliminated chained read barrier"); return false; } else { return true; } } - if (n->Opcode() == Op_LoadP) { + if (n->Opcode() == Op_LoadP || + n->Opcode() == Op_LoadN || + n->Opcode() == Op_GetAndSetP || + n->Opcode() == Op_GetAndSetN) { return true; } - if (n->Opcode() == Op_GetAndSetP) { - return true; + if (n->Opcode() == Op_DecodeN || + n->Opcode() == Op_EncodeP) { + return needs_barrier_impl(phase, orig, n->in(1), rb_mem, allow_fromspace, visited); } + #ifdef ASSERT tty->print("need barrier on?: "); n->dump(); + ShouldNotReachHere(); #endif return true; } @@ -124,75 +171,129 @@ Node* b1, Node* b2, Node* current, - Unique_Node_List &visited) { - if (current == NULL) { - return false; // Incomplete phi. Try again later. - } else if (visited.member(current)) { - // We have already seen it. + bool linear) { + ResourceMark rm; + VectorSet visited(Thread::current()->resource_area()); + Node_Stack phis(0); + + for(int i = 0; i < 10; i++) { + if (current == NULL) { + return false; + } else if (visited.test_set(current->_idx) || current->is_top() || current == b1) { + current = NULL; + while (phis.is_nonempty() && current == NULL) { + uint idx = phis.index(); + Node* phi = phis.node(); + if (idx >= phi->req()) { + phis.pop(); + } else { + current = phi->in(idx); + phis.set_index(idx+1); + } + } + if (current == NULL) { + return true; + } + } else if (current == phase->C->immutable_memory()) { + return false; + } else if (current->isa_Phi()) { + if (!linear) { + return false; + } + phis.push(current, 2); + current = current->in(1); + } else if (current->Opcode() == Op_ShenandoahWriteBarrier) { + const Type* in_type = current->bottom_type(); + const Type* this_type = b2->bottom_type(); + if (is_independent(in_type, this_type)) { + current = current->in(Memory); + } else { + return false; + } + } else if (current->Opcode() == Op_ShenandoahWBMemProj) { + current = current->in(0); + } else if (current->is_Proj()) { + current = current->in(0); + } else if (current->is_Call()) { + return false; // TODO: Maybe improve by looking at the call's memory effects? + } else if (current->is_MemBar()) { + return false; // TODO: Do we need to stop at *any* membar? + } else if (current->is_MergeMem()) { + // if (true) return false; + // tty->print_cr("current == mergemem: "); current->dump(); + const TypePtr* adr_type = brooks_pointer_type(phase->type(b2)); + uint alias_idx = phase->C->get_alias_index(adr_type); + current = current->as_MergeMem()->memory_at(alias_idx); + } else { + // tty->print_cr("what else can we see here:"); +#ifdef ASSERT + current->dump(); +#endif + ShouldNotReachHere(); + return false; + } + } + return false; +} + +bool ShenandoahReadBarrierNode::is_independent(Node* mem) { + if (mem->is_Phi() || mem->is_Proj() || mem->is_MergeMem()) { return true; - } - visited.push(current); - - if (current == b1) { - return true; - } else if (current == phase->C->immutable_memory()) { - return false; - } else if (current->isa_Phi()) { - bool dominates = true; - for (uint i = 1; i < current->req() && dominates == true; i++) { - Node* in = current->in(i); - dominates = dominates && dominates_memory_rb_impl(phase, b1, b2, in, visited); - } - return dominates; - } else if (current->Opcode() == Op_ShenandoahWriteBarrier) { - const Type* in_type = current->bottom_type(); - const Type* this_type = b2->bottom_type(); - if (is_independent(in_type, this_type)) { - Node* in = current->in(Memory); - return dominates_memory_rb_impl(phase, b1, b2, in, visited); + } else if (mem->Opcode() == Op_ShenandoahWriteBarrier) { + const Type* mem_type = mem->bottom_type(); + const Type* this_type = bottom_type(); + if (is_independent(mem_type, this_type)) { + return true; } else { return false; } - } else if (current->Opcode() == Op_ShenandoahWBMemProj) { - Node* in = current->in(0); - return dominates_memory_rb_impl(phase, b1, b2, in, visited); - } else if (current->is_top()) { - return true; // Dead path - } else if (current->is_Proj()) { - return dominates_memory_rb_impl(phase, b1, b2, current->in(0), visited); - } else if (current->is_Call()) { - return false; // TODO: Maybe improve by looking at the call's memory effects? - } else if (current->is_MemBar()) { - return false; // TODO: Do we need to stop at *any* membar? - } else if (current->is_MergeMem()) { - const TypePtr* adr_type = phase->type(b2)->is_ptr()->add_offset(BrooksPointer::byte_offset()); - uint alias_idx = phase->C->get_alias_index(adr_type); - Node* mem_in = current->as_MergeMem()->memory_at(alias_idx); - return dominates_memory_rb_impl(phase, b1, b2, current->in(TypeFunc::Memory), visited); - } else { -#ifdef ASSERT - current->dump(); -#endif - ShouldNotReachHere(); + } else if (mem->is_Call() || mem->is_MemBar()) { return false; } +#ifdef ASSERT + mem->dump(); +#endif + ShouldNotReachHere(); + return true; } -bool ShenandoahReadBarrierNode::dominates_memory_rb(PhaseTransform* phase, Node* b1, Node* b2) { - Unique_Node_List visited; - return dominates_memory_rb_impl(phase, b1->in(Memory), b2, b2->in(Memory), visited); + +bool ShenandoahReadBarrierNode::dominates_memory_rb(PhaseTransform* phase, Node* b1, Node* b2, bool linear) { + return dominates_memory_rb_impl(phase, b1->in(Memory), b2, b2->in(Memory), linear); } -bool ShenandoahReadBarrierNode::is_independent(const Type* in_type, const Type* this_type) const { +bool ShenandoahReadBarrierNode::is_independent(const Type* in_type, const Type* this_type) { assert(in_type->isa_oopptr(), "expect oop ptr"); assert(this_type->isa_oopptr(), "expect oop ptr"); + /* + if ((! in_type->isa_oopptr()) || (! this_type->isa_oopptr())) { +#ifdef ASSERT + tty->print_cr("not oopptr"); + tty->print("in: "); in_type->dump(); tty->print_cr(" "); + tty->print("this: "); this_type->dump(); tty->print_cr(" "); +#endif + return false; + } + */ ciKlass* in_kls = in_type->is_oopptr()->klass(); ciKlass* this_kls = this_type->is_oopptr()->klass(); - if ((!in_kls->is_subclass_of(this_kls)) && + if (in_kls != NULL && this_kls != NULL && + in_kls->is_loaded() && this_kls->is_loaded() && + (!in_kls->is_subclass_of(this_kls)) && (!this_kls->is_subclass_of(in_kls))) { +#ifdef ASSERT + // tty->print_cr("independent: "); + // tty->print("in: "); in_kls->print(); tty->print_cr(" "); + // tty->print("this: "); this_kls->print(); tty->print_cr(" "); +#endif return true; } +#ifdef ASSERT + // tty->print_cr("possibly dependend?"); + // tty->print("in: "); in_type->dump(); tty->print_cr(" "); + // tty->print("this: "); this_type->dump(); tty->print_cr(" "); +#endif return false; } @@ -207,7 +308,7 @@ // If memory input is a MergeMem, take the appropriate slice out of it. Node* mem_in = in(Memory); if (mem_in->isa_MergeMem()) { - const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::byte_offset()); + const TypePtr* adr_type = brooks_pointer_type(bottom_type()); uint alias_idx = phase->C->get_alias_index(adr_type); mem_in = mem_in->as_MergeMem()->memory_at(alias_idx); set_req(Memory, mem_in); @@ -220,9 +321,10 @@ if (wb->is_top()) return NULL; // Dead path. assert(wb->Opcode() == Op_ShenandoahWriteBarrier, "expect write barrier"); const Type* in_type = phase->type(wb); - const Type* this_type = phase->type(this); - if (is_independent(in_type, this_type)) { - phase->igvn_rehash_node_delayed(wb); + if (is_independent(in_type, _type)) { + if (phase->is_IterGVN()) { + phase->is_IterGVN()->rehash_node_delayed(wb); + } set_req(Memory, wb->in(Memory)); if (can_reshape && input->outcnt() == 0) { phase->is_IterGVN()->_worklist.push(input); @@ -233,128 +335,113 @@ return NULL; } -bool ShenandoahBarrierNode::has_barrier_users(Node* n, Unique_Node_List &visited) { - if (visited.member(n)) { - return false; - } - visited.push(n); +Node* ShenandoahWriteBarrierNode::Identity(PhaseTransform* phase) { + assert(in(0) != NULL, "should have control"); + PhaseIterGVN* igvn = phase->is_IterGVN(); + Node* mem_in = in(Memory); + Node* mem_proj = NULL; - bool has_users = false; - for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax && ! has_users; j++) { - Node* o = n->fast_out(j); - if (o->Opcode() == Op_ShenandoahReadBarrier || - o->Opcode() == Op_ShenandoahWriteBarrier) { - has_users = true; - } else if (o->isa_Phi()) { - has_users = has_barrier_users(o, visited); - } else if (o->Opcode() == Op_MergeMem) { - // Not a user. ? - } else { - ShouldNotReachHere(); + if (igvn != NULL) { + mem_proj = find_out_with(Op_ShenandoahWBMemProj); + if (mem_proj == NULL || mem_in == mem_proj) { + return this; } } - return has_users; + + Node* replacement = Identity_impl(phase); + if (igvn != NULL) { + if (replacement != NULL && replacement != this) { + igvn->replace_node(mem_proj, mem_in); + } + } + return replacement; } + Node* ShenandoahWriteBarrierNode::Ideal(PhaseGVN *phase, bool can_reshape) { + assert(in(0) != NULL, "should have control"); + if (!can_reshape) { + return NULL; + } - if (! can_reshape) return NULL; + PhaseIterGVN* igvn = phase->is_IterGVN(); + Node* mem_proj = find_out_with(Op_ShenandoahWBMemProj); + Node* mem_in = in(Memory); - if (in(Memory) == phase->C->immutable_memory()) return NULL; + if (mem_in == phase->C->immutable_memory()) return NULL; - Node* mem_in = in(Memory); if (mem_in->isa_MergeMem()) { - const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::byte_offset()); + const TypePtr* adr_type = brooks_pointer_type(bottom_type()); uint alias_idx = phase->C->get_alias_index(adr_type); mem_in = mem_in->as_MergeMem()->memory_at(alias_idx); set_req(Memory, mem_in); return this; } - Node* mem_proj = find_out_with(Op_ShenandoahWBMemProj); - if (mem_proj == NULL) { - set_req(Memory, phase->C->immutable_memory()); - return this; - } - - Unique_Node_List visited; - if (! has_barrier_users(mem_proj, visited)) { - phase->igvn_rehash_node_delayed(in(Memory)); - set_req(Memory, phase->C->immutable_memory()); - return this; - } return NULL; } -bool ShenandoahBarrierNode::dominates_control(PhaseTransform* phase, - Node* c1, - Node* c2) { - if (c1 == c2) { - return true; - } - if (c1 == NULL) { - return true; - } - //ShouldNotReachHere(); - return false; -} - bool ShenandoahBarrierNode::dominates_memory_impl(PhaseTransform* phase, Node* b1, Node* b2, Node* current, - Unique_Node_List &visited) { - if (current == NULL) { - return false; - } else if (visited.member(current)) { - // We have already seen it. - return true; + bool linear) { + ResourceMark rm; + VectorSet visited(Thread::current()->resource_area()); + Node_Stack phis(0); + + + for(int i = 0; i < 10; i++) { + if (current == NULL) { + return false; + } else if (visited.test_set(current->_idx) || current->is_top() || current == b1) { + current = NULL; + while (phis.is_nonempty() && current == NULL) { + uint idx = phis.index(); + Node* phi = phis.node(); + if (idx >= phi->req()) { + phis.pop(); + } else { + current = phi->in(idx); + phis.set_index(idx+1); + } + } + if (current == NULL) { + return true; + } + } else if (current == b2) { + return false; + } else if (current == phase->C->immutable_memory()) { + return false; + } else if (current->isa_Phi()) { + if (!linear) { + return false; + } + phis.push(current, 2); + current = current->in(1); + } else if (current->Opcode() == Op_ShenandoahWriteBarrier) { + current = current->in(Memory); + } else if (current->Opcode() == Op_ShenandoahWBMemProj) { + current = current->in(0); + } else if (current->is_Proj()) { + current = current->in(0); + } else if (current->is_Call()) { + current = current->in(TypeFunc::Memory); + } else if (current->is_MemBar()) { + current = current->in(TypeFunc::Memory); + } else if (current->is_MergeMem()) { + const TypePtr* adr_type = brooks_pointer_type(phase->type(b2)); + uint alias_idx = phase->C->get_alias_index(adr_type); + current = current->as_MergeMem()->memory_at(alias_idx); + } else { +#ifdef ASSERT + current->dump(); +#endif + ShouldNotReachHere(); + return false; + } } - - visited.push(current); - - if (current == b1) { - return true; - } else if (current == b2) { - return false; - } else if (current == phase->C->immutable_memory()) { - return false; - } else if (current->isa_Phi()) { - bool dominates = true; - for (uint i = 1; i < current->req() && dominates == true; i++) { - Node* in = current->in(i); - dominates = dominates && dominates_memory_impl(phase, b1, b2, in, visited); - } - return dominates; - } else if (current->Opcode() == Op_ShenandoahWriteBarrier) { - // Follow through memory input. - Node* in = current->in(Memory); - return dominates_memory_impl(phase, b1, b2, in, visited); - } else if (current->Opcode() == Op_ShenandoahWBMemProj) { - // Follow through memory input. - Node* in = current->in(0); - return dominates_memory_impl(phase, b1, b2, in, visited); - } else if (current->is_top()) { - return true; // Dead path - } else if (current->is_Proj()) { - return dominates_memory_impl(phase, b1, b2, current->in(0), visited); - } else if (current->is_Call()) { - return dominates_memory_impl(phase, b1, b2, current->in(TypeFunc::Memory), visited); - } else if (current->is_MemBar()) { - return dominates_memory_impl(phase, b1, b2, current->in(TypeFunc::Memory), visited); - } else if (current->is_MergeMem()) { - const TypePtr* adr_type = phase->type(b2)->is_ptr()->add_offset(BrooksPointer::byte_offset()); - uint alias_idx = phase->C->get_alias_index(adr_type); - Node* mem_in = current->as_MergeMem()->memory_at(alias_idx); - return dominates_memory_impl(phase, b1, b2, current->in(TypeFunc::Memory), visited); - } else { - // tty->print_cr("what else can we see here:"); -#ifdef ASSERT - current->dump(); -#endif - ShouldNotReachHere(); - return false; - } + return false; } /** @@ -364,9 +451,8 @@ * In all other cases, (in particular, if we reach immutable_memory without having seen b1) * we return false. */ -bool ShenandoahBarrierNode::dominates_memory(PhaseTransform* phase, Node* b1, Node* b2) { - Unique_Node_List visited; - return dominates_memory_impl(phase, b1->in(Memory), b2, b2->in(Memory), visited); +bool ShenandoahBarrierNode::dominates_memory(PhaseTransform* phase, Node* b1, Node* b2, bool linear) { + return dominates_memory_impl(phase, b1, b2, b2->in(Memory), linear); } Node* ShenandoahBarrierNode::Identity_impl(PhaseTransform* phase) { @@ -377,45 +463,74 @@ return n; } + // tty->print_cr("find sibling for: "); dump(2); // Try to find a write barrier sibling with identical inputs that we can fold into. for (DUIterator i = n->outs(); n->has_out(i); i++) { Node* sibling = n->out(i); if (sibling == this) { continue; } + /* + assert(sibling->Opcode() != Op_ShenandoahWriteBarrier || + Opcode() != Op_ShenandoahWriteBarrier || hash() == sibling->hash(), + "if this is a write barrier, then sibling can't be write barrier too"); + */ if (sibling->Opcode() != Op_ShenandoahWriteBarrier) { continue; } + /* + if (sibling->outcnt() == 0) { + // Some dead node. + continue; + } + */ assert(sibling->in(ValueIn) == in(ValueIn), "sanity"); assert(sibling->Opcode() == Op_ShenandoahWriteBarrier, "sanity"); + // tty->print_cr("candidate: "); sibling->dump(); - if (dominates_control(phase, sibling->in(Control), in(Control)) && - dominates_memory(phase, sibling, this)) { + if (dominates_memory(phase, sibling, this, phase->is_IterGVN() == NULL)) { + /* + tty->print_cr("matched barrier:"); + sibling->dump(); + tty->print_cr("for: "); + dump(); + */ return sibling; } + /* + tty->print_cr("couldn't match candidate:"); + sibling->dump(2); + */ } + /* + tty->print_cr("couldn't match barrier to any:"); + dump(); + */ return this; } -Node* ShenandoahBarrierNode::Identity(PhaseTransform* phase) { - - Node* replacement = Identity_impl(phase); - if (replacement != this) { - // If we have a memory projection, we first need to make it go away. - Node* mem_proj = find_out_with(Op_ShenandoahWBMemProj); - if (mem_proj != NULL) { - phase->igvn_rehash_node_delayed(mem_proj); - return this; - } +#ifndef PRODUCT +void ShenandoahBarrierNode::dump_spec(outputStream *st) const { + const TypePtr* adr = adr_type(); + if (adr == NULL) { + return; } - return replacement; + st->print(" @"); + adr->dump_on(st); + st->print(" ("); + Compile::current()->alias_type(adr)->adr_type()->dump_on(st); + st->print(") "); } +#endif Node* ShenandoahReadBarrierNode::Identity(PhaseTransform* phase) { - Node* id = ShenandoahBarrierNode::Identity(phase); + // if (true) return this; + + // tty->print("optimizing rb: "); dump(); + Node* id = Identity_impl(phase); if (id == this && phase->is_IterGVN()) { Node* n = in(ValueIn); @@ -429,7 +544,14 @@ if (phase->is_IterGVN()->hash_find(sibling) && sibling->bottom_type() == bottom_type() && sibling->in(Control) == in(Control) && - dominates_memory_rb(phase, sibling, this)) { + dominates_memory_rb(phase, sibling, this, phase->is_IterGVN() == NULL)) { + /* + if (in(Memory) != sibling->in(Memory)) { + tty->print_cr("interesting rb-fold"); + dump(); + sibling->dump(); + } + */ return sibling; } } @@ -445,26 +567,10 @@ if( t2 == Type::TOP ) return Type::TOP; Node* input = in(ValueIn); - const Type* type = phase->type(input); - return type; + const Type* type = phase->type(input)->is_oopptr()->cast_to_nonconst(); + return type->filter_speculative(_type); } -#ifdef ASSERT -uint ShenandoahBarrierNode::num_mem_projs() { - uint num_mem_proj = 0; - for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { - Node* use = fast_out(i); - if (use->Opcode() == Op_ShenandoahWBMemProj) { - num_mem_proj++; - } - } - return num_mem_proj; -} - -void ShenandoahBarrierNode::check_invariants() { -} -#endif - uint ShenandoahBarrierNode::hash() const { return TypeNode::hash() + _allow_fromspace; } @@ -484,28 +590,3092 @@ if (wb->is_top()) return phase->C->top(); // Dead path. assert(wb->Opcode() == Op_ShenandoahWriteBarrier, "expect write barrier"); - if (wb->as_ShenandoahBarrier()->Identity_impl(phase) != wb) { - // If the parent write barrier would go away, make this mem proj go away first. - // Poke parent to give it a chance to go away too. - phase->igvn_rehash_node_delayed(wb); - return wb->in(ShenandoahBarrierNode::Memory); - } - + PhaseIterGVN* igvn = phase->is_IterGVN(); // We can't do the below unless the graph is fully constructed. - if (! phase->is_IterGVN()) { + if (igvn == NULL) { return this; } // If the mem projection has no barrier users, it's not needed anymore. Unique_Node_List visited; - if (! ShenandoahWriteBarrierNode::has_barrier_users(this, visited)) { - phase->igvn_rehash_node_delayed(wb); + if (wb->outcnt() == 1) { return wb->in(ShenandoahBarrierNode::Memory); } return this; } -Node* ShenandoahBarrierNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - return MemNode::Ideal_common_DU_postCCP(ccp, this, in(ValueIn)); +#ifdef ASSERT +bool ShenandoahBarrierNode::verify_helper(Node* in, Node_Stack& phis, VectorSet& visited, verify_type t, bool trace, Unique_Node_List& barriers_used) { + assert(phis.size() == 0, ""); + + while (true) { + if (!in->bottom_type()->make_ptr()->isa_oopptr()) { + if (trace) {tty->print_cr("Non oop");} + } else if (t == ShenandoahLoad && ShenandoahOptimizeFinals && + in->bottom_type()->make_ptr()->isa_aryptr() && + in->bottom_type()->make_ptr()->is_aryptr()->is_stable()) { + if (trace) {tty->print_cr("Stable array load");} + } else { + in = in->uncast(); + if (in->is_AddP()) { + assert(!in->in(AddPNode::Address)->is_top(), "no raw memory access"); + in = in->in(AddPNode::Address); + continue; + } else if (in->is_Con()) { + if (trace) {tty->print("Found constant"); in->dump();} + } else if (in->is_ShenandoahBarrier()) { + if (t == ShenandoahStore && in->Opcode() != Op_ShenandoahWriteBarrier) { + return false; + } + barriers_used.push(in); + if (trace) {tty->print("Found barrier"); in->dump();} + } else if (in->is_Proj() && in->in(0)->is_Allocate()) { + if (trace) {tty->print("Found alloc"); in->in(0)->dump();} + } else if (in->is_Phi()) { + if (!visited.test_set(in->_idx)) { + if (trace) {tty->print("Pushed phi:"); in->dump();} + phis.push(in, 2); + in = in->in(1); + continue; + } + if (trace) {tty->print("Already seen phi:"); in->dump();} + } else if (in->Opcode() == Op_CMoveP) { + if (!visited.test_set(in->_idx)) { + if (trace) {tty->print("Pushed cmovep:"); in->dump();} + phis.push(in, CMoveNode::IfTrue); + in = in->in(CMoveNode::IfFalse); + continue; + } + if (trace) {tty->print("Already seen cmovep:"); in->dump();} + } else if (in->Opcode() == Op_EncodeP || in->Opcode() == Op_DecodeN) { + in = in->in(1); + continue; + } else { + return false; + } + } + bool cont = false; + while (phis.is_nonempty()) { + uint idx = phis.index(); + Node* phi = phis.node(); + if (idx >= phi->req()) { + if (trace) {tty->print("Popped phi:"); phi->dump();} + phis.pop(); + continue; + } + if (trace) {tty->print("Next entry(%d) for phi:", idx); phi->dump();} + in = phi->in(idx); + phis.set_index(idx+1); + cont = true; + break; + } + if (!cont) { + break; + } + } + return true; } +#endif + +void ShenandoahBarrierNode::verify(RootNode* root) { +#ifdef ASSERT + ResourceMark rm; + Unique_Node_List wq; + GrowableArray barriers; + Unique_Node_List barriers_used; + Node_Stack phis(0); + VectorSet visited(Thread::current()->resource_area()); + const bool trace = false; + const bool verify_no_useless_barrier = false; + + wq.push(root); + for (uint next = 0; next < wq.size(); next++) { + Node *n = wq.at(next); + if (n->is_Load()) { + const bool trace = false; + if (trace) {tty->print("Verifying"); n->dump();} + if (n->Opcode() == Op_LoadRange || n->Opcode() == Op_LoadKlass || n->Opcode() == Op_LoadNKlass) { + if (trace) {tty->print_cr("Load range/klass");} + } else { + const TypePtr* adr_type = n->as_Load()->adr_type(); + + if (adr_type->isa_oopptr() && adr_type->is_oopptr()->offset() == oopDesc::mark_offset_in_bytes()) { + if (trace) {tty->print_cr("Mark load");} + } else if (adr_type->isa_instptr() && + adr_type->is_instptr()->klass()->is_subtype_of(Compile::current()->env()->Reference_klass()) && + adr_type->is_instptr()->offset() == java_lang_ref_Reference::referent_offset) { + if (trace) {tty->print_cr("Reference.get()");} + } else { + bool verify = true; + if (adr_type->isa_instptr() && ShenandoahOptimizeFinals) { + ciKlass* k = adr_type->is_instptr()->klass(); + assert(k->is_instance_klass(), ""); + ciInstanceKlass* ik = (ciInstanceKlass*)k; + int offset = adr_type->offset(); + + if (ik->debug_final_or_stable_field_at(offset)) { + if (trace) {tty->print_cr("Final/stable");} + verify = false; + } + } + + if (verify && !ShenandoahBarrierNode::verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahLoad, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } + } + } else if (n->is_Store()) { + const bool trace = false; + + if (trace) {tty->print("Verifying"); n->dump();} + if (n->in(MemNode::ValueIn)->bottom_type()->isa_oopptr()) { + Node* adr = n->in(MemNode::Address); + bool verify = true; + + if (adr->is_AddP() && adr->in(AddPNode::Base)->is_top()) { + adr = adr->in(AddPNode::Address); + if (adr->is_AddP()) { + assert(adr->in(AddPNode::Base)->is_top(), ""); + adr = adr->in(AddPNode::Address); + if (adr->Opcode() == Op_LoadP && + adr->in(MemNode::Address)->in(AddPNode::Base)->is_top() && + adr->in(MemNode::Address)->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && + adr->in(MemNode::Address)->in(AddPNode::Offset)->find_intptr_t_con(-1) == in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())) { + if (trace) {tty->print_cr("G1 prebarrier");} + verify = false; + } + } + } + + if (verify && !ShenandoahBarrierNode::verify_helper(n->in(MemNode::ValueIn), phis, visited, ShenandoahValue, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } + if (!ShenandoahBarrierNode::verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahStore, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } else if (n->is_ClearArray()) { + if (!ShenandoahBarrierNode::verify_helper(n->in(3), phis, visited, ShenandoahStore, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } else if (n->Opcode() == Op_CmpP) { + const bool trace = false; + + Node* in1 = n->in(1); + Node* in2 = n->in(2); + if (in1->bottom_type()->isa_oopptr()) { + if (trace) {tty->print("Verifying"); n->dump();} + + bool mark_inputs = false; + if (in1->is_Con() || in2->is_Con()) { + if (trace) {tty->print_cr("Comparison against a constant");} + mark_inputs = true; + } else if ((in1->is_CheckCastPP() && in1->in(1)->is_Proj() && in1->in(1)->in(0)->is_Allocate()) || + (in2->is_CheckCastPP() && in2->in(1)->is_Proj() && in2->in(1)->in(0)->is_Allocate())) { + if (trace) {tty->print_cr("Comparison with newly alloc'ed object");} + mark_inputs = true; + } else { + assert(in2->bottom_type()->isa_oopptr(), ""); + + if (!ShenandoahBarrierNode::verify_helper(in1, phis, visited, ShenandoahStore, trace, barriers_used) || + !ShenandoahBarrierNode::verify_helper(in2, phis, visited, ShenandoahStore, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } + if (verify_no_useless_barrier && + mark_inputs && + (!ShenandoahBarrierNode::verify_helper(in1, phis, visited, ShenandoahValue, trace, barriers_used) || + !ShenandoahBarrierNode::verify_helper(in2, phis, visited, ShenandoahValue, trace, barriers_used))) { + phis.clear(); + visited.Reset(); + } + } + } else if (n->is_LoadStore()) { + if (n->in(MemNode::ValueIn)->bottom_type()->isa_ptr() && + !ShenandoahBarrierNode::verify_helper(n->in(MemNode::ValueIn), phis, visited, ShenandoahLoad, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + + if (n->in(MemNode::Address)->bottom_type()->isa_oopptr() && !ShenandoahBarrierNode::verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahStore, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } else if (n->Opcode() == Op_CallLeafNoFP || n->Opcode() == Op_CallLeaf) { + CallRuntimeNode* call = n->as_CallRuntime(); + + static struct { + const char* name; + struct { + int pos; + verify_type t; + } args[6]; + } calls[] = { + "aescrypt_encryptBlock", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "aescrypt_decryptBlock", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "multiplyToLen", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahLoad }, { TypeFunc::Parms+4, ShenandoahStore }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "squareToLen", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahLoad }, { -1, ShenandoahNone}, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "montgomery_multiply", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahLoad }, + { TypeFunc::Parms+6, ShenandoahStore }, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "montgomery_square", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+5, ShenandoahStore }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "mulAdd", + { { TypeFunc::Parms, ShenandoahStore }, { TypeFunc::Parms+1, ShenandoahLoad }, { -1, ShenandoahNone}, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "vectorizedMismatch", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahLoad }, { -1, ShenandoahNone}, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "updateBytesCRC32", + { { TypeFunc::Parms+1, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone}, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "updateBytesAdler32", + { { TypeFunc::Parms+1, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone}, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "updateBytesCRC32C", + { { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+3, ShenandoahLoad}, { -1, ShenandoahNone}, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "counterMode_AESCrypt", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad }, + { TypeFunc::Parms+3, ShenandoahStore }, { TypeFunc::Parms+5, ShenandoahStore }, { TypeFunc::Parms+6, ShenandoahStore } }, + "cipherBlockChaining_encryptAESCrypt", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad }, + { TypeFunc::Parms+3, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "cipherBlockChaining_decryptAESCrypt", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad }, + { TypeFunc::Parms+3, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "shenandoah_clone_barrier", + { { TypeFunc::Parms, ShenandoahLoad }, { -1, ShenandoahNone}, { -1, ShenandoahNone}, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "ghash_processBlocks", + { { TypeFunc::Parms, ShenandoahStore }, { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+2, ShenandoahLoad }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "sha1_implCompress", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "sha256_implCompress", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "sha512_implCompress", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "sha1_implCompressMB", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "sha256_implCompressMB", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + "sha512_implCompressMB", + { { TypeFunc::Parms, ShenandoahLoad }, { TypeFunc::Parms+1, ShenandoahStore }, { -1, ShenandoahNone }, + { -1, ShenandoahNone}, { -1, ShenandoahNone}, { -1, ShenandoahNone} }, + }; + + if (call->is_call_to_arraycopystub()) { + Node* dest = NULL; + const TypeTuple* args = n->as_Call()->_tf->domain(); + for (uint i = TypeFunc::Parms, j = 0; i < args->cnt(); i++) { + if (args->field_at(i)->isa_ptr()) { + j++; + if (j == 2) { + dest = n->in(i); + break; + } + } + } + if (!ShenandoahBarrierNode::verify_helper(n->in(TypeFunc::Parms), phis, visited, ShenandoahLoad, trace, barriers_used) || + !ShenandoahBarrierNode::verify_helper(dest, phis, visited, ShenandoahStore, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } else if (strlen(call->_name) > 5 && + !strcmp(call->_name + strlen(call->_name) - 5, "_fill")) { + if (!ShenandoahBarrierNode::verify_helper(n->in(TypeFunc::Parms), phis, visited, ShenandoahStore, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } else if (!strcmp(call->_name, "g1_wb_pre")) { + // skip + } else { + const int calls_len = sizeof(calls) / sizeof(calls[0]); + int i = 0; + for (; i < calls_len; i++) { + if (!strcmp(calls[i].name, call->_name)) { + break; + } + } + if (i != calls_len) { + const int args_len = sizeof(calls[0].args) / sizeof(calls[0].args[0]); + for (uint j = 0; j < args_len; j++) { + int pos = calls[i].args[j].pos; + if (pos == -1) { + break; + } + if (!ShenandoahBarrierNode::verify_helper(call->in(pos), phis, visited, calls[i].args[j].t, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } + for (uint j = TypeFunc::Parms; j < call->req(); j++) { + if (call->in(j)->bottom_type()->make_ptr() && + call->in(j)->bottom_type()->make_ptr()->isa_oopptr()) { + uint k = 0; + for (; k < args_len && calls[i].args[k].pos != (int)j; k++); + if (k == args_len) { + fatal(err_msg("arg %d for call %s not covered", j, call->_name)); + } + } + } + } else { + for (uint j = TypeFunc::Parms; j < call->req(); j++) { + if (call->in(j)->bottom_type()->make_ptr() && + call->in(j)->bottom_type()->make_ptr()->isa_oopptr()) { + fatal(err_msg("%s not covered", call->_name)); + } + } + } + } + } else if (n->is_ShenandoahBarrier()) { + assert(!barriers.contains(n), ""); + assert(n->Opcode() != Op_ShenandoahWriteBarrier || n->find_out_with(Op_ShenandoahWBMemProj) != NULL, "bad shenandoah write barrier"); + assert(n->Opcode() != Op_ShenandoahWriteBarrier || n->outcnt() > 1, "bad shenandoah write barrier"); + barriers.push(n); + } else if (n->is_AddP() + || n->is_Phi() + || n->Opcode() == Op_CastPP + || n->Opcode() == Op_CheckCastPP + || n->Opcode() == Op_Return + || n->Opcode() == Op_CMoveP + || n->Opcode() == Op_CMoveN + || n->Opcode() == Op_Rethrow + || n->is_MemBar() + || n->Opcode() == Op_Conv2B + || n->Opcode() == Op_SafePoint + || n->is_CallJava() + || n->Opcode() == Op_Unlock + || n->Opcode() == Op_EncodeP + || n->Opcode() == Op_DecodeN) { + // nothing to do + } else { + static struct { + int opcode; + struct { + int pos; + verify_type t; + } inputs[2]; + } others[] = { + Op_FastLock, + { { 1, ShenandoahLoad }, { -1, ShenandoahNone} }, + Op_Lock, + { { TypeFunc::Parms, ShenandoahLoad }, { -1, ShenandoahNone} }, + Op_AryEq, + { { 2, ShenandoahLoad }, { 3, ShenandoahLoad } }, + Op_StrIndexOf, + { { 2, ShenandoahLoad }, { 4, ShenandoahLoad } }, + Op_StrComp, + { { 2, ShenandoahLoad }, { 4, ShenandoahLoad } }, + Op_StrEquals, + { { 2, ShenandoahLoad }, { 3, ShenandoahLoad } }, + Op_EncodeISOArray, + { { 2, ShenandoahLoad }, { 3, ShenandoahStore } }, + Op_CastP2X, + { { 1, ShenandoahLoad }, { -1, ShenandoahNone} }, + }; + + const int others_len = sizeof(others) / sizeof(others[0]); + int i = 0; + for (; i < others_len; i++) { + if (others[i].opcode == n->Opcode()) { + break; + } + } + uint stop = n->is_Call() ? n->as_Call()->tf()->domain()->cnt() : n->req(); + if (i != others_len) { + const int inputs_len = sizeof(others[0].inputs) / sizeof(others[0].inputs[0]); + for (uint j = 0; j < inputs_len; j++) { + int pos = others[i].inputs[j].pos; + if (pos == -1) { + break; + } + if (!ShenandoahBarrierNode::verify_helper(n->in(pos), phis, visited, others[i].inputs[j].t, trace, barriers_used)) { + n->dump(10); + ShouldNotReachHere(); + } + } + for (uint j = 1; j < stop; j++) { + if (n->in(j) != NULL && n->in(j)->bottom_type()->make_ptr() && + n->in(j)->bottom_type()->make_ptr()->isa_oopptr()) { + uint k = 0; + for (; k < inputs_len && others[i].inputs[k].pos != (int)j; k++); + if (k == inputs_len) { + fatal(err_msg("arg %d for node %s not covered", j, n->Name())); + } + } + } + } else { + for (uint j = 1; j < stop; j++) { + if (n->in(j) != NULL && n->in(j)->bottom_type()->make_ptr() && + n->in(j)->bottom_type()->make_ptr()->isa_oopptr()) { + fatal(err_msg("%s not covered", n->Name())); + } + } + } + } + + if (n->is_SafePoint()) { + SafePointNode* sfpt = n->as_SafePoint(); + if (verify_no_useless_barrier && sfpt->jvms() != NULL) { + for (uint i = sfpt->jvms()->scloff(); i < sfpt->jvms()->endoff(); i++) { + if (!ShenandoahBarrierNode::verify_helper(sfpt->in(i), phis, visited, ShenandoahLoad, trace, barriers_used)) { + phis.clear(); + visited.Reset(); + } + } + } + } + for( uint i = 0; i < n->len(); ++i ) { + Node *m = n->in(i); + if (m == NULL) continue; + + // In most cases, inputs should be known to be non null. If it's + // not the case, it could be a missing cast_not_null() in an + // intrinsic or support might be needed in AddPNode::Ideal() to + // avoid a NULL+offset input. + if (!(n->is_Phi() || + (n->is_SafePoint() && (!n->is_CallRuntime() || !strcmp(n->as_CallRuntime()->_name, "g1_wb_pre") || !strcmp(n->as_CallRuntime()->_name, "unsafe_arraycopy"))) || + n->Opcode() == Op_CmpP || + n->Opcode() == Op_CmpN || + (n->Opcode() == Op_StoreP && i == StoreNode::ValueIn) || + (n->Opcode() == Op_StoreN && i == StoreNode::ValueIn) || + n->Opcode() == Op_CheckCastPP || + n->Opcode() == Op_CastPP || + n->Opcode() == Op_Return || + n->Opcode() == Op_Conv2B || + n->is_AddP() || + n->Opcode() == Op_CMoveP || + n->Opcode() == Op_CMoveN || + n->Opcode() == Op_Rethrow || + n->is_MemBar() || + n->is_Mem() || + n->Opcode() == Op_AryEq || + n->Opcode() == Op_SCMemProj || + n->Opcode() == Op_EncodeP || + n->Opcode() == Op_DecodeN || + (n->is_CallRuntime() && !strcmp(n->as_CallRuntime()->_name, "generic_arraycopy")))) { + if (m->bottom_type()->isa_oopptr() && m->bottom_type()->meet(TypePtr::NULL_PTR) == m->bottom_type()) { + n->dump(); + m->dump(); + ShouldNotReachHere(); + } + } + + wq.push(m); + } + } + + if (verify_no_useless_barrier) { + for (int i = 0; i < barriers.length(); i++) { + Node* n = barriers.at(i); + if (!barriers_used.member(n)) { + tty->print("XXX useless barrier"); n->dump(-2); + ShouldNotReachHere(); + } + } + } +#endif +} + +#include "opto/loopnode.hpp" + +MergeMemNode* PhaseIdealLoop::shenandoah_allocate_merge_mem(Node* mem, int alias, Node* rep_proj, Node* rep_ctrl) { + MergeMemNode* mm = MergeMemNode::make(C, mem); + mm->set_memory_at(alias, rep_proj); + register_new_node(mm, rep_ctrl); + return mm; +} + +MergeMemNode* PhaseIdealLoop::shenandoah_clone_merge_mem(Node* u, Node* mem, int alias, Node* rep_proj, Node* rep_ctrl, DUIterator& i) { + MergeMemNode* newmm = NULL; + MergeMemNode* u_mm = u->as_MergeMem(); + Node* c = get_ctrl(u); + if (is_dominator(c, rep_ctrl)) { + c = rep_ctrl; + } else { + assert(is_dominator(rep_ctrl, c), "one must dominate the other"); + } + if (u->outcnt() == 1) { + if (u->req() > (uint)alias && u->in(alias) == mem) { + _igvn.replace_input_of(u, alias, rep_proj); + --i; + } else { + _igvn.rehash_node_delayed(u); + u_mm->set_memory_at(alias, rep_proj); + } + newmm = u_mm; + set_ctrl_and_loop(u, c); + } else { + // can't simply clone u and then change one of its input because + // it adds and then removes an edge which messes with the + // DUIterator + newmm = MergeMemNode::make(C, u_mm->base_memory()); + for (uint j = 0; j < u->req(); j++) { + if (j < newmm->req()) { + if (j == (uint)alias) { + newmm->set_req(j, rep_proj); + } else if (newmm->in(j) != u->in(j)) { + newmm->set_req(j, u->in(j)); + } + } else if (j == (uint)alias) { + newmm->add_req(rep_proj); + } else { + newmm->add_req(u->in(j)); + } + } + if ((uint)alias >= u->req()) { + newmm->set_memory_at(alias, rep_proj); + } + register_new_node(newmm, c); + } + return newmm; +} + +bool PhaseIdealLoop::shenandoah_should_process_phi(Node* phi, int alias) { + if (phi->adr_type() == TypePtr::BOTTOM) { + Node* region = phi->in(0); + for (DUIterator_Fast jmax, j = region->fast_outs(jmax); j < jmax; j++) { + Node* uu = region->fast_out(j); + if (uu->is_Phi() && uu != phi && uu->bottom_type() == Type::MEMORY && C->get_alias_index(uu->adr_type()) == alias) { + return false; + } + } + return true; + } + return C->get_alias_index(phi->adr_type()) == alias; +} + +bool PhaseIdealLoop::shenandoah_is_dominator_same_ctrl(Node*c, Node* d, Node* n) { + // That both nodes have the same control is not sufficient to prove + // domination, verify that there's no path from d to n + ResourceMark rm; + Unique_Node_List wq; + wq.push(d); + for (uint next = 0; next < wq.size(); next++) { + Node *m = wq.at(next); + if (m == n) { + return false; + } + if (m->is_Phi() && m->in(0)->is_Loop()) { + assert(ctrl_or_self(m->in(LoopNode::EntryControl)) != c, "following loop entry should lead to new control"); + } else { + for (uint i = 0; i < m->req(); i++) { + if (m->in(i) != NULL && ctrl_or_self(m->in(i)) == c) { + wq.push(m->in(i)); + } + } + } + } + return true; +} + +bool PhaseIdealLoop::shenandoah_is_dominator(Node *d_c, Node *n_c, Node* d, Node* n) { + if (d_c != n_c) { + return is_dominator(d_c, n_c); + } + return shenandoah_is_dominator_same_ctrl(d_c, d, n); +} + +Node* shenandoah_next_mem(Node* mem, int alias) { + Node* res = NULL; + if (mem->is_Proj()) { + res = mem->in(0); + } else if (mem->is_SafePoint() || mem->is_MemBar()) { + res = mem->in(TypeFunc::Memory); + } else if (mem->is_Phi()) { + res = mem->in(1); + } else if (mem->is_ShenandoahBarrier()) { + res = mem->in(ShenandoahBarrierNode::Memory); + } else if (mem->is_MergeMem()) { + res = mem->as_MergeMem()->memory_at(alias); + } else if (mem->is_Store() || mem->is_LoadStore() || mem->is_ClearArray()) { + assert(alias = Compile::AliasIdxRaw, "following raw memory can't lead to a barrier"); + res = mem->in(MemNode::Memory); + } else { +#ifdef ASSERT + mem->dump(); +#endif + ShouldNotReachHere(); + } + return res; +} + +bool shenandoah_suitable_mem(Node* mem, Node* old_mem, Node* rep_proj) { + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + Node* u = mem->fast_out(i); + if (u->is_MergeMem()) { + if (u->has_out_with(Op_MergeMem)) { + // too complicated for now + return false; + } + if (old_mem == u && rep_proj->has_out_with(Op_MergeMem)) { + return false; + } + } + if (u->Opcode() == Op_Unlock && mem->is_Proj() && mem->in(0)->Opcode() == Op_MemBarReleaseLock) { + // would require a merge mem between unlock and the + // preceding membar. Would confuse logic that eliminates + // lock/unlock nodes. + return false; + } + } + return true; +} + +void PhaseIdealLoop::shenandoah_fix_memory_uses(Node* mem, Node* replacement, Node* rep_proj, Node* rep_ctrl, int alias) { + uint last = C->unique(); + MergeMemNode* mm = NULL; + assert(mem->bottom_type() == Type::MEMORY, ""); + for (DUIterator i = mem->outs(); mem->has_out(i); i++) { + Node* u = mem->out(i); + if (u != replacement && u->_idx < last) { + if (u->is_ShenandoahBarrier() && alias != Compile::AliasIdxRaw) { + if (C->get_alias_index(u->adr_type()) == alias && shenandoah_is_dominator(rep_ctrl, ctrl_or_self(u), replacement, u)) { + _igvn.replace_input_of(u, u->find_edge(mem), rep_proj); + assert(u->find_edge(mem) == -1, "only one edge"); + --i; + } + } else if (u->is_Mem()) { + if (C->get_alias_index(u->adr_type()) == alias && shenandoah_is_dominator(rep_ctrl, ctrl_or_self(u), replacement, u)) { + assert(alias == Compile::AliasIdxRaw , "only raw memory can lead to a memory operation"); + _igvn.replace_input_of(u, u->find_edge(mem), rep_proj); + assert(u->find_edge(mem) == -1, "only one edge"); + --i; + } + } else if (u->is_MergeMem()) { + MergeMemNode* u_mm = u->as_MergeMem(); + if (u_mm->memory_at(alias) == mem) { + MergeMemNode* newmm = NULL; + for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) { + Node* uu = u->fast_out(j); + assert(!uu->is_MergeMem(), "chain of MergeMems?"); + if (uu->is_Phi()) { + if (shenandoah_should_process_phi(uu, alias)) { + Node* region = uu->in(0); + int nb = 0; + for (uint k = 1; k < uu->req(); k++) { + if (uu->in(k) == u && is_dominator(rep_ctrl, region->in(k))) { + if (newmm == NULL) { + newmm = shenandoah_clone_merge_mem(u, mem, alias, rep_proj, rep_ctrl, i); + } + if (newmm != u) { + _igvn.replace_input_of(uu, k, newmm); + nb++; + --jmax; + } + } + } + if (nb > 0) { + --j; + } + } + } else { + if (shenandoah_is_dominator(rep_ctrl, ctrl_or_self(uu), replacement, uu)) { + if (newmm == NULL) { + newmm = shenandoah_clone_merge_mem(u, mem, alias, rep_proj, rep_ctrl, i); + } + if (newmm != u) { + _igvn.replace_input_of(uu, uu->find_edge(u), newmm); + --j, --jmax; + } + } + } + } + } + } else if (u->is_Phi()) { + assert(u->bottom_type() == Type::MEMORY, "what else?"); + Node* region = u->in(0); + if (shenandoah_should_process_phi(u, alias)) { + bool replaced = false; + for (uint j = 1; j < u->req(); j++) { + if (u->in(j) == mem && is_dominator(rep_ctrl, region->in(j))) { + Node* nnew = rep_proj; + if (u->adr_type() == TypePtr::BOTTOM) { + if (mm == NULL) { + mm = shenandoah_allocate_merge_mem(mem, alias, rep_proj, rep_ctrl); + } + nnew = mm; + } + _igvn.replace_input_of(u, j, nnew); + replaced = true; + } + } + if (replaced) { + --i; + } + + } + } else if (u->adr_type() == TypePtr::BOTTOM || + u->adr_type() == NULL) { + assert(u->adr_type() != NULL || + u->Opcode() == Op_Rethrow || + u->Opcode() == Op_Return || + u->Opcode() == Op_SafePoint || + (u->is_CallStaticJava() && u->as_CallStaticJava()->uncommon_trap_request() != 0) || + (u->is_CallStaticJava() && u->as_CallStaticJava()->_entry_point == OptoRuntime::rethrow_stub()) || + u->Opcode() == Op_CallLeaf, ""); + if (shenandoah_is_dominator(rep_ctrl, ctrl_or_self(u), replacement, u)) { + if (mm == NULL) { + mm = shenandoah_allocate_merge_mem(mem, alias, rep_proj, rep_ctrl); + } + _igvn.replace_input_of(u, u->find_edge(mem), mm); + --i; + } + } else if (C->get_alias_index(u->adr_type()) == alias) { + if (shenandoah_is_dominator(rep_ctrl, ctrl_or_self(u), replacement, u)) { + _igvn.replace_input_of(u, u->find_edge(mem), rep_proj); + --i; + } + } + } + } +} + +Node* PhaseIdealLoop::shenandoah_no_branches(Node* c, Node* dom, bool allow_one_proj) { + Node* iffproj = NULL; + while (c != dom) { + Node* next = idom(c); + assert(next->unique_ctrl_out() == c || c->is_Proj() || c->is_Region(), "multiple control flow out but no proj or region?"); + if (c->is_Region()) { + ResourceMark rm; + Unique_Node_List wq; + wq.push(c); + for (uint i = 0; i < wq.size(); i++) { + Node *n = wq.at(i); + if (n->is_Region()) { + for (uint j = 1; j < n->req(); j++) { + if (n->in(j) != next) { + wq.push(n->in(j)); + } + } + } else { + if (n->in(0) != next) { + wq.push(n->in(0)); + } + } + } + for (DUIterator_Fast imax, i = next->fast_outs(imax); i < imax; i++) { + Node* u = next->fast_out(i); + if (u->is_CFG()) { + if (!wq.member(u)) { + return NodeSentinel; + } + } + } + + } else if (c->is_Proj()) { + if (c->is_IfProj()) { + if (c->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) != NULL) { + // continue; + } else { + if (!allow_one_proj) { + return NodeSentinel; + } + if (iffproj == NULL) { + iffproj = c; + } else { + return NodeSentinel; + } + } + } else if (c->Opcode() == Op_JumpProj) { + return NodeSentinel; // unsupported + } else if (c->Opcode() == Op_CatchProj) { + return NodeSentinel; // unsupported + } else if (c->Opcode() == Op_CProj && next->Opcode() == Op_NeverBranch) { + return NodeSentinel; // unsupported + } else { + assert(next->unique_ctrl_out() == c, "unsupported branch pattern"); + } + } + c = next; + } + return iffproj; +} + +#ifdef ASSERT +void PhaseIdealLoop::shenandoah_memory_dominates_all_paths_helper(Node* c, Node* rep_ctrl, Unique_Node_List& controls) { + const bool trace = false; + if (trace) { tty->print("X control is"); c->dump(); } + + uint start = controls.size(); + controls.push(c); + for (uint i = start; i < controls.size(); i++) { + Node *n = controls.at(i); + + if (trace) { tty->print("X from"); n->dump(); } + + if (n == rep_ctrl) { + continue; + } + + if (n->is_Proj()) { + Node* n_dom = n->in(0); + IdealLoopTree* n_dom_loop = get_loop(n_dom); + if (n->is_IfProj() && n_dom->outcnt() == 2) { + n_dom_loop = get_loop(n_dom->as_If()->proj_out(n->as_Proj()->_con == 0 ? 1 : 0)); + } + if (n_dom_loop != _ltree_root) { + Node* tail = n_dom_loop->tail(); + if (tail->is_Region()) { + for (uint j = 1; j < tail->req(); j++) { + if (is_dominator(n_dom, tail->in(j)) && !is_dominator(n, tail->in(j))) { + assert(is_dominator(rep_ctrl, tail->in(j)), "why are we here?"); + // entering loop from below, mark backedge + if (trace) { tty->print("X pushing backedge"); tail->in(j)->dump(); } + controls.push(tail->in(j)); + //assert(n->in(0) == n_dom, "strange flow control"); + } + } + } else if (get_loop(n) != n_dom_loop && is_dominator(n_dom, tail)) { + // entering loop from below, mark backedge + if (trace) { tty->print("X pushing backedge"); tail->dump(); } + controls.push(tail); + //assert(n->in(0) == n_dom, "strange flow control"); + } + } + } + + if (n->is_Loop()) { + Node* c = n->in(LoopNode::EntryControl); + if (trace) { tty->print("X pushing"); c->dump(); } + controls.push(c); + } else if (n->is_Region()) { + for (uint i = 1; i < n->req(); i++) { + Node* c = n->in(i); + if (trace) { tty->print("X pushing"); c->dump(); } + controls.push(c); + } + } else { + Node* c = n->in(0); + if (trace) { tty->print("X pushing"); c->dump(); } + controls.push(c); + } + } + +} + +bool PhaseIdealLoop::shenandoah_memory_dominates_all_paths(Node* mem, Node* rep_ctrl, int alias) { + const bool trace = false; + if (trace) { + tty->print("XXX mem is"); mem->dump(); + tty->print("XXX rep ctrl is"); rep_ctrl->dump(); + tty->print_cr("XXX alias is %d", alias); + } + ResourceMark rm; + Unique_Node_List wq; + Unique_Node_List controls; + wq.push(mem); + for (uint next = 0; next < wq.size(); next++) { + Node *nn = wq.at(next); + if (trace) { tty->print("XX from mem"); nn->dump(); } + assert(nn->bottom_type() == Type::MEMORY, "memory only"); + + if (nn->is_Phi()) { + Node* r = nn->in(0); + for (DUIterator_Fast jmax, j = r->fast_outs(jmax); j < jmax; j++) { + Node* u = r->fast_out(j); + if (u->is_Phi() && u->bottom_type() == Type::MEMORY && u != nn && + (u->adr_type() == TypePtr::BOTTOM || C->get_alias_index(u->adr_type()) == alias)) { + if (trace) { tty->print("XX Next mem (other phi)"); u->dump(); } + wq.push(u); + } + } + } + + for (DUIterator_Fast imax, i = nn->fast_outs(imax); i < imax; i++) { + Node* use = nn->fast_out(i); + + if (trace) { tty->print("XX use %p", use->adr_type()); use->dump(); } + if (use->is_CFG()) { + assert(use->in(TypeFunc::Memory) == nn, "bad cfg node"); + Node* c = use->in(0); + if (is_dominator(rep_ctrl, c)) { + shenandoah_memory_dominates_all_paths_helper(c, rep_ctrl, controls); + } else if (use->is_CallStaticJava() && use->as_CallStaticJava()->uncommon_trap_request() != 0 && c->is_Region()) { + Node* region = c; + if (trace) { tty->print("XX unc region"); region->dump(); } + for (uint j = 1; j < region->req(); j++) { + if (is_dominator(rep_ctrl, region->in(j))) { + if (trace) { tty->print("XX unc follows"); region->in(j)->dump(); } + shenandoah_memory_dominates_all_paths_helper(region->in(j), rep_ctrl, controls); + } + } + } + //continue; + } else if (use->is_Phi()) { + assert(use->bottom_type() == Type::MEMORY, "bad phi"); + if ((use->adr_type() == TypePtr::BOTTOM /*&& !shenandoah_has_alias_phi(C, use, alias)*/) || + C->get_alias_index(use->adr_type()) == alias) { + for (uint j = 1; j < use->req(); j++) { + if (use->in(j) == nn) { + Node* c = use->in(0)->in(j); + if (is_dominator(rep_ctrl, c)) { + shenandoah_memory_dominates_all_paths_helper(c, rep_ctrl, controls); + } + } + } + } + // continue; + } + + if (use->is_MergeMem()) { + if (use->as_MergeMem()->memory_at(alias) == nn) { + if (trace) { tty->print("XX Next mem"); use->dump(); } + // follow the memory edges + wq.push(use); + } + } else if (use->is_Phi()) { + assert(use->bottom_type() == Type::MEMORY, "bad phi"); + if ((use->adr_type() == TypePtr::BOTTOM /*&& !shenandoah_has_alias_phi(C, use, alias)*/) || + C->get_alias_index(use->adr_type()) == alias) { + if (trace) { tty->print("XX Next mem"); use->dump(); } + // follow the memory edges + wq.push(use); + } + } else if (use->bottom_type() == Type::MEMORY && + (use->adr_type() == TypePtr::BOTTOM || C->get_alias_index(use->adr_type()) == alias)) { + if (trace) { tty->print("XX Next mem"); use->dump(); } + // follow the memory edges + wq.push(use); + } else if ((use->is_SafePoint() || use->is_MemBar()) && + (use->adr_type() == TypePtr::BOTTOM || C->get_alias_index(use->adr_type()) == alias)) { + for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) { + Node* u = use->fast_out(j); + if (u->bottom_type() == Type::MEMORY) { + if (trace) { tty->print("XX Next mem"); u->dump(); } + // follow the memory edges + wq.push(u); + } + } + } else if (use->Opcode() == Op_ShenandoahWriteBarrier && C->get_alias_index(use->adr_type()) == alias) { + Node* m = use->find_out_with(Op_ShenandoahWBMemProj); + if (m != NULL) { + if (trace) { tty->print("XX Next mem"); m->dump(); } + // follow the memory edges + wq.push(m); + } + } + } + } + + if (controls.size() == 0) { + return false; + } + + for (uint i = 0; i < controls.size(); i++) { + Node *n = controls.at(i); + + if (trace) { tty->print("X checking"); n->dump(); } + + if (n->unique_ctrl_out() != NULL) { + continue; + } + + if (n->Opcode() == Op_NeverBranch) { + Node* taken = n->as_Multi()->proj_out(0); + if (!controls.member(taken)) { + if (trace) { tty->print("X not seen"); taken->dump(); } + return false; + } + continue; + } + + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { + Node* u = n->fast_out(j); + + if (u->is_CFG()) { + if (!controls.member(u)) { + if (u->is_Proj() && u->as_Proj()->is_uncommon_trap_proj(Deoptimization::Reason_none)) { + if (trace) { tty->print("X not seen but unc"); u->dump(); } + } else { + if (u->unique_ctrl_out() != NULL && u->unique_ctrl_out()->Opcode() == Op_Halt) { + if (trace) { tty->print("X not seen but sink in halt"); u->dump(); } + } else { + if (trace) { tty->print("X not seen"); u->dump(); } + return false; + } + } + } else { + if (trace) { tty->print("X seen"); u->dump(); } + } + } + } + } + return true; +} +#endif + +static bool shenandoah_has_mem_phi(Compile* C, Node* region, int alias) { + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { + Node* use = region->fast_out(i); + if (use->is_Phi() && use->bottom_type() == Type::MEMORY && + (C->get_alias_index(use->adr_type()) == alias)) { + return true; + } + } + return false; +} + +bool PhaseIdealLoop::shenandoah_fix_mem_phis_helper(Node* c, Node* mem, Node* mem_ctrl, Node* rep_ctrl, int alias, VectorSet& controls, GrowableArray& regions) { + const bool trace = false; + Node_List wq; + wq.push(c); + +#ifdef ASSERT + if (trace) { tty->print("YYY from"); c->dump(); } + if (trace) { tty->print("YYY with mem"); mem->dump(); } +#endif + + while(wq.size() > 0) { + c = wq.pop(); + + while (!c->is_Region() || c->is_Loop()) { +#ifdef ASSERT + if (trace) { tty->print("YYY"); c->dump(); } +#endif + assert(c->is_CFG(), "node should be control node"); + if (c == mem_ctrl || is_dominator(c, rep_ctrl)) { + c = NULL; + break; + } else if (c->is_Loop()) { + c = c->in(LoopNode::EntryControl); + } else { + c = c->in(0); + } + } + if (c == NULL) { + continue; + } + +#ifdef ASSERT + if (trace) { tty->print("YYY new region"); c->dump(); } +#endif + + bool has_phi = shenandoah_has_mem_phi(C, c, alias); + if (!has_phi) { + + Node* m = mem; + Node* m_ctrl = ctrl_or_self(m); + { + ResourceMark rm; + VectorSet wq(Thread::current()->resource_area()); + wq.set(m->_idx); + while (!is_dominator(m_ctrl, c) || m_ctrl == c) { + m = shenandoah_next_mem(m, alias); + if (wq.test_set(m->_idx)) { + return false; + } + m_ctrl = ctrl_or_self(m); + } + } + + assert(m->bottom_type() == Type::MEMORY, ""); + + if (m->is_MergeMem()) { + m = m->as_MergeMem()->memory_at(alias); + m_ctrl = ctrl_or_self(m); + } + +#ifdef ASSERT + if (trace) { tty->print("YYY mem "); m->dump(); } +#endif + + if (controls.test(c->_idx)) { + int i = 0; + for (; i < regions.length() && regions.at(i) != c; i+=2); + assert(i < regions.length(), "missing region"); + Node* prev_m = regions.at(i+1); + if (prev_m == m) { + continue; + } +#ifdef ASSERT + if (trace) { tty->print("YYY prev mem "); prev_m->dump(); } +#endif + Node* prev_m_ctrl = ctrl_or_self(prev_m); + assert(shenandoah_is_dominator(m_ctrl, prev_m_ctrl, m, prev_m) || + shenandoah_is_dominator(prev_m_ctrl, m_ctrl, prev_m, m), "one should dominate the other"); + if (shenandoah_is_dominator(m_ctrl, prev_m_ctrl, m, prev_m)) { + continue; + } +#ifdef ASSERT + if (trace) { tty->print("YYY Fixing "); c->dump(); } +#endif + regions.at_put(i+1, m); + } else { +#ifdef ASSERT + if (trace) { tty->print("YYY Pushing "); c->dump(); } +#endif + regions.push(c); + regions.push(m); + } + } else { + continue; + } + + controls.set(c->_idx); + + for (uint i = 1; i < c->req(); i++) { + wq.push(c->in(i)); + } + } + return true; +} + + +bool PhaseIdealLoop::shenandoah_fix_mem_phis(Node* mem, Node* mem_ctrl, Node* rep_ctrl, int alias) { + //ResourceMark rm; // register_new_node makes an internal grow + GrowableArray regions; + VectorSet controls(Thread::current()->resource_area()); + const bool trace = false; + +#ifdef ASSERT + if (trace) { tty->print("YYY mem is "); mem->dump(); } + if (trace) { tty->print("YYY mem ctrl is "); mem_ctrl->dump(); } + if (trace) { tty->print("YYY rep ctrl is "); rep_ctrl->dump(); } + if (trace) { tty->print_cr("YYY alias is %d", alias); } +#endif + + // Walk memory edges from mem until we hit a memory point where + // control is known then follow the control up looking for regions + // with no memory Phi for alias + Unique_Node_List wq; + wq.push(mem); + + for (uint next = 0; next < wq.size(); next++) { + Node *n = wq.at(next); +#ifdef ASSERT + if (trace) { tty->print("YYY from (2) "); n->dump(); } +#endif + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); +#ifdef ASSERT + if (trace) { tty->print("YYY processing "); u->dump(); } +#endif + if (u->is_Phi()) { + assert(u->bottom_type() == Type::MEMORY, "strange memory graph"); + if (shenandoah_should_process_phi(u, alias)) { + for (uint j = 1; j < u->req(); j++) { + if (u->in(j) == n) { + Node *c = u->in(0)->in(j); + if (!shenandoah_fix_mem_phis_helper(c, n, mem_ctrl, rep_ctrl, alias, controls, regions)) { + return false; + } + } + } + } +#ifdef ASSERT + } else if (u->is_CallStaticJava() && u->as_CallStaticJava()->uncommon_trap_request() != 0) { + if (!shenandoah_fix_mem_phis_helper(u->in(0), n, mem_ctrl, rep_ctrl, alias, controls, regions)) { + return false; + } +#endif + } else if ((u->is_CFG() && u->adr_type() == TypePtr::BOTTOM) || u->Opcode() == Op_Rethrow || u->Opcode() == Op_Return) { + if (!shenandoah_fix_mem_phis_helper(u->in(0), n, mem_ctrl, rep_ctrl, alias, controls, regions)) { + return false; + } + } else if (u->is_MergeMem() && u->as_MergeMem()->memory_at(alias) == n) { + wq.push(u); + } else if (u->Opcode() == Op_ShenandoahWriteBarrier && C->get_alias_index(u->adr_type()) == alias) { + Node* m = u->find_out_with(Op_ShenandoahWBMemProj); + if (m != NULL) { + wq.push(m); + } + } + } + } +#ifdef ASSERT + if (trace) { + tty->print_cr("XXXXXXXXXXXXXXXXXXXX"); + for (int i = 0; i < regions.length(); i++) { + Node* r = regions.at(i); + tty->print("%d", i); r->dump(); + } + tty->print_cr("XXXXXXXXXXXXXXXXXXXX"); + } +#endif + + if (regions.length() == 0) { + return true; + } + + { + int i = 0; + for (; i < regions.length(); i+=2) { + Node* region = regions.at(i); + bool has_phi = false; + for (DUIterator_Fast jmax, j = region->fast_outs(jmax); j < jmax && !has_phi; j++) { + Node* u = region->fast_out(j); + if (u->is_Phi() && u->bottom_type() == Type::MEMORY && + (u->adr_type() == TypePtr::BOTTOM || C->get_alias_index(u->adr_type()) == alias)) { + has_phi = true; + } + } + if (!has_phi) { + break; + } + } + if (i == regions.length()) { + return true; + } + } + + // Try to restrict the update to path that post dominates rep_ctrl + int k = 0; + int start = 0; + int end = 0; + do { + start = end; + end = k; + for (int i = end; i < regions.length(); i+=2) { + Node* r = regions.at(i); + int prev = k; + for (uint j = 1; j < r->req() && prev == k; j++) { + if (end == 0) { + if (is_dominator(rep_ctrl, r->in(j))) { + Node* mem = regions.at(i+1); + regions.at_put(i, regions.at(k)); + regions.at_put(i+1, regions.at(k+1)); + regions.at_put(k, r); + regions.at_put(k+1, mem); + k+=2; + } + } else { + for (int l = start; l < end && prev == k; l+=2) { + Node* r2 = regions.at(l); + if (is_dominator(r2, r->in(j))) { + Node* mem = regions.at(i+1); + regions.at_put(i, regions.at(k)); + regions.at_put(i+1, regions.at(k+1)); + regions.at_put(k, r); + regions.at_put(k+1, mem); + k+=2; + } + } + } + } + } +#ifdef ASSERT + if (trace) { tty->print_cr("k = %d start = %d end = %d", k, start, end); } +#endif + } while(k != end); + +#ifdef ASSERT + if (end != regions.length()) { + if (trace) { tty->print_cr("Compacting %d -> %d", regions.length(), end); } + } +#endif + regions.trunc_to(end); + +#ifdef ASSERT + if (trace) { + tty->print_cr("XXXXXXXXXXXXXXXXXXXX"); + for (int i = 0; i < regions.length(); i++) { + Node* r = regions.at(i); + tty->print("%d", i); r->dump(); + } + tty->print_cr("XXXXXXXXXXXXXXXXXXXX"); + } +#endif + + // Creating new phis must be done in post order + while (regions.length() > 0) { + int i = 0; + for (; i < regions.length(); i+=2) { + Node* r1 = regions.at(i); + bool is_dom = false; + for (int j = 0; j < regions.length() && !is_dom; j+=2) { + if (i != j) { + Node* r2 = regions.at(j); + for (uint k = 1; k < r2->req() && !is_dom; k++) { + if (is_dominator(r1, r2->in(k))) { + is_dom = true; + } + } + } + } + if (!is_dom) { + break; + } + } + assert(i < regions.length(), "need one"); + Node* r = regions.at(i); + Node* m = regions.at(i+1); + regions.delete_at(i+1); + regions.delete_at(i); + + if (!shenandoah_suitable_mem(m, NULL, NULL)) { + return false; + } + Node* phi = PhiNode::make(r, m, Type::MEMORY, C->get_adr_type(alias)); +#ifdef ASSERT + if (trace) { tty->print("YYY Adding new mem phi "); phi->dump(); } +#endif + register_new_node(phi, r); + + shenandoah_fix_memory_uses(m, phi, phi, r, C->get_alias_index(phi->adr_type())); + assert(phi->outcnt() != 0, "new proj should have uses"); + if (phi->outcnt() == 0) { + _igvn.remove_dead_node(phi); + } + } + + return true; +} + +Node* PhaseIdealLoop::shenandoah_dom_mem(Node* mem, Node*& mem_ctrl, Node* n, Node* rep_ctrl, int alias) { + ResourceMark rm; + VectorSet wq(Thread::current()->resource_area()); + wq.set(mem->_idx); + mem_ctrl = get_ctrl(mem); + while (!shenandoah_is_dominator(mem_ctrl, rep_ctrl, mem, n)) { + mem = shenandoah_next_mem(mem, alias); + if (wq.test_set(mem->_idx)) { + return NULL; // hit an unexpected loop + } + mem_ctrl = ctrl_or_self(mem); + } + if (mem->is_MergeMem()) { + mem = mem->as_MergeMem()->memory_at(alias); + mem_ctrl = ctrl_or_self(mem); + } + return mem; +} + +Node* PhaseIdealLoop::try_common_shenandoah_barriers(Node* n, Node *n_ctrl) { + if (n->is_ShenandoahBarrier() && !C->has_irreducible_loop()) { + // We look for a write barrier whose memory edge dominates n + // Either the replacement write barrier dominates n or we have, + // for instance: + // if ( ) { + // read barrier n + // } else { + // write barrier + // } + // in which case replacing n by the write barrier causes the write + // barrier to move above the if() and the memory Phi that merges + // the memory state for both branches must be updated so both + // inputs become the write barrier's memory projection (and the + // Phi is optimized out) otherwise we risk loosing a memory + // dependency. + // Once we find a replacement write barrier, the code below fixes + // the memory graph in cases like the one above. + Node* val = n->in(ShenandoahBarrierNode::ValueIn); + Node* val_ctrl = get_ctrl(val); + Node* n_proj = n->find_out_with(Op_ShenandoahWBMemProj); + Node* replacement = NULL; + int alias = C->get_alias_index(n->adr_type()); + for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax && replacement == NULL; i++) { + Node* u = val->fast_out(i); + if (u != n && u->Opcode() == Op_ShenandoahWriteBarrier) { + Node* u_mem = u->in(ShenandoahBarrierNode::Memory); + Node* u_proj = u->find_out_with(Op_ShenandoahWBMemProj); + Node* u_ctrl = get_ctrl(u); + Node* u_mem_ctrl = get_ctrl(u_mem); + IdealLoopTree* n_loop = get_loop(n_ctrl); + IdealLoopTree* u_loop = get_loop(u_ctrl); + + Node* ctrl = dom_lca(u_ctrl, n_ctrl); + + if (ctrl->is_Proj() && + ctrl->in(0)->is_Call() && + ctrl->unique_ctrl_out() != NULL && + ctrl->unique_ctrl_out()->Opcode() == Op_Catch & + !is_dominator(val_ctrl, ctrl->in(0)->in(0))) { + continue; + } + + if (n->Opcode() == Op_ShenandoahWriteBarrier && u_proj == NULL && n_proj != NULL) { + continue; + } + + IdealLoopTree* loop = get_loop(ctrl); + + // we don't want to move a write barrier in a loop + if (loop->is_member(u_loop) || (n->Opcode() == Op_ShenandoahWriteBarrier && loop->is_member(n_loop))) { + if (ShenandoahDontIncreaseWBFreq) { + Node* u_iffproj = shenandoah_no_branches(u_ctrl, ctrl, true); + if (n->Opcode() == Op_ShenandoahWriteBarrier) { + Node* n_iffproj = shenandoah_no_branches(n_ctrl, ctrl, true); + if (u_iffproj == NULL || n_iffproj == NULL) { + replacement = u; + } else if (u_iffproj != NodeSentinel && n_iffproj != NodeSentinel && u_iffproj->in(0) == n_iffproj->in(0)) { + replacement = u; + } + } else if (u_iffproj == NULL) { + replacement = u; + } + } else { + replacement = u; + } + } + } + } + if (replacement != NULL) { + Node* old_ctrl = get_ctrl(replacement); + Node* rep_ctrl = dom_lca(n_ctrl, old_ctrl); + if (rep_ctrl->is_Proj() && + rep_ctrl->in(0)->is_Call() && + rep_ctrl->unique_ctrl_out() != NULL && + rep_ctrl->unique_ctrl_out()->Opcode() == Op_Catch) { + rep_ctrl = rep_ctrl->in(0)->in(0); + assert(is_dominator(val_ctrl, rep_ctrl), "bad control"); + } else { + Node* c = try_move_shenandoah_barrier_before_pre_loop(rep_ctrl, val_ctrl); + if (c != NULL) { + rep_ctrl = shenandoah_move_above_predicates(c, val_ctrl); + } else { + while (rep_ctrl->is_IfProj()) { + CallStaticJavaNode* unc = rep_ctrl->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none); + if (unc != NULL) { + int req = unc->uncommon_trap_request(); + Deoptimization::DeoptReason trap_reason = Deoptimization::trap_request_reason(req); + if ((trap_reason == Deoptimization::Reason_loop_limit_check || + trap_reason == Deoptimization::Reason_predicate) && is_dominator(val_ctrl, rep_ctrl->in(0)->in(0))) { + rep_ctrl = rep_ctrl->in(0)->in(0); + continue; + } + } + break; + } + } + } + + Node* mem = replacement->in(ShenandoahBarrierNode::Memory); + Node* old_mem = mem; + Node* rep_proj = replacement->find_out_with(Op_ShenandoahWBMemProj); + { + Node* mem_ctrl = NULL; + + mem = shenandoah_dom_mem(mem, mem_ctrl, n, rep_ctrl, alias); + if (mem == NULL) { + return NULL; + } + + // Add a memory Phi for the slice of the write barrier to any + // region that post dominates rep_ctrl and doesn't have one + // already. + if (rep_proj != NULL && !shenandoah_fix_mem_phis(mem, mem_ctrl, rep_ctrl, alias)) { + return NULL; + } + + assert(shenandoah_memory_dominates_all_paths(mem, rep_ctrl, alias), "can't fix the memory graph"); + } + assert(_igvn.type(mem) == Type::MEMORY, "not memory"); + + if (rep_proj != NULL) { + Node* old_mem = replacement->in(ShenandoahBarrierNode::Memory); + if (!shenandoah_suitable_mem(mem, old_mem, rep_proj)) { + return NULL; + } + + if (replacement->in(ShenandoahBarrierNode::Memory) != mem) { + // tty->print("XXX setting memory of"); replacement->dump(); + // tty->print("XXX to"); mem->dump(); + for (DUIterator_Last imin, i = rep_proj->last_outs(imin); i >= imin; ) { + Node* u = rep_proj->last_out(i); + _igvn.rehash_node_delayed(u); + int uses_found = u->replace_edge(rep_proj, old_mem); + i -= uses_found; + } + _igvn.replace_input_of(replacement, ShenandoahBarrierNode::Memory, mem); + } + set_ctrl_and_loop(replacement, rep_ctrl); + _igvn.replace_input_of(replacement, ShenandoahBarrierNode::Control, rep_ctrl); + + shenandoah_fix_memory_uses(mem, replacement, rep_proj, rep_ctrl, C->get_alias_index(replacement->adr_type())); + assert(rep_proj->outcnt() != 0, "new proj should have uses"); + } else { + if (replacement->in(ShenandoahBarrierNode::Memory) != mem) { + _igvn._worklist.push(replacement->in(ShenandoahBarrierNode::Memory)); + _igvn.replace_input_of(replacement, ShenandoahBarrierNode::Memory, mem); + } + set_ctrl_and_loop(replacement, rep_ctrl); + _igvn.replace_input_of(replacement, ShenandoahBarrierNode::Control, rep_ctrl); + } + if (n->Opcode() == Op_ShenandoahWriteBarrier) { + if (n_proj != NULL) { + lazy_replace(n_proj, n->in(ShenandoahBarrierNode::Memory)); + } + } + lazy_replace(n, replacement); + if (rep_proj != NULL) { + set_ctrl_and_loop(rep_proj, rep_ctrl); + } + return replacement; + } + } + + return NULL; +} + +static Node* shenandoah_find_mem_phi(Node* n_loop_head, int alias, Compile* C) { + Node* phi_in = NULL; + Node* phi_bottom = NULL; + + for (DUIterator_Fast imax, i = n_loop_head->fast_outs(imax); i < imax; i++) { + Node* u = n_loop_head->fast_out(i); + if (u->is_Phi() && + u->bottom_type() == Type::MEMORY) { + if (C->get_alias_index(u->adr_type()) == alias) { + if (phi_in != NULL && phi_in != u) { + return NULL; + } + phi_in = u; + } else if (u->adr_type() == TypePtr::BOTTOM) { + assert(phi_bottom == NULL, "only one phi"); + phi_bottom = u; + } + } + } + if (phi_in != NULL) { + return phi_in; + } + return phi_bottom; +} + +static void shenandoah_disconnect_barrier_mem(Node* wb, PhaseIterGVN& igvn) { + Node* mem_in = wb->in(ShenandoahBarrierNode::Memory); + Node* proj = wb->find_out_with(Op_ShenandoahWBMemProj); + + for (DUIterator_Last imin, i = proj->last_outs(imin); i >= imin; ) { + Node* u = proj->last_out(i); + igvn.rehash_node_delayed(u); + int nb = u->replace_edge(proj, mem_in); + assert(nb > 0, "no replacement?"); + i -= nb; + } +} + +Node* PhaseIdealLoop::shenandoah_move_above_predicates(Node* cl, Node* val_ctrl) { + Node* entry = cl->in(LoopNode::EntryControl); + Node* above_pred = skip_loop_predicates(entry); + Node* ctrl = entry; + while (ctrl != above_pred) { + Node* next = ctrl->in(0); + if (!is_dominator(val_ctrl, next)) { + break; + } + ctrl = next; + } + return ctrl; +} + +Node* PhaseIdealLoop::try_move_shenandoah_barrier_before_loop_helper(Node* n, Node* cl, Node* val_ctrl, Node* mem) { + assert(cl->is_Loop(), "bad control"); + assert(n->Opcode() == Op_ShenandoahWriteBarrier, "only for shenandoah write barriers"); + Node* ctrl = shenandoah_move_above_predicates(cl, val_ctrl); + Node* mem_ctrl = NULL; + int alias = C->get_alias_index(n->adr_type()); + mem = shenandoah_dom_mem(mem, mem_ctrl, n, ctrl, alias); + if (mem == NULL) { + return NULL; + } + + Node* old_mem = n->in(ShenandoahBarrierNode::Memory); + Node* proj = n->find_out_with(Op_ShenandoahWBMemProj); + if (old_mem != mem && !shenandoah_suitable_mem(mem, old_mem, proj)) { + return NULL; + } + + assert(shenandoah_memory_dominates_all_paths(mem, ctrl, alias), "can't fix the memory graph"); + set_ctrl_and_loop(n, ctrl); + _igvn.replace_input_of(n, ShenandoahBarrierNode::Control, ctrl); + if (old_mem != mem) { + if (proj != NULL) { + shenandoah_disconnect_barrier_mem(n, _igvn); + shenandoah_fix_memory_uses(mem, n, proj, ctrl, C->get_alias_index(n->adr_type())); + assert(proj->outcnt() > 0, "disconnected write barrier"); + } + _igvn.replace_input_of(n, ShenandoahBarrierNode::Memory, mem); + } + if (proj != NULL) { + set_ctrl_and_loop(proj, ctrl); + } + return n; +} + +Node* PhaseIdealLoop::try_move_shenandoah_barrier_before_pre_loop(Node* c, Node* val_ctrl) { + // A write barrier between a pre and main loop can get in the way of + // vectorization. Move it above the pre loop if possible + CountedLoopNode* cl = NULL; + if (c->is_IfFalse() && + c->in(0)->is_CountedLoopEnd()) { + cl = c->in(0)->as_CountedLoopEnd()->loopnode(); + } else if (c->is_IfProj() && + c->in(0)->is_If() && + c->in(0)->in(0)->is_IfFalse() && + c->in(0)->in(0)->in(0)->is_CountedLoopEnd()) { + cl = c->in(0)->in(0)->in(0)->as_CountedLoopEnd()->loopnode(); + } + if (cl != NULL && + cl->is_pre_loop() && + val_ctrl != cl && + is_dominator(val_ctrl, cl)) { + return cl; + } + return NULL; +} + +Node* PhaseIdealLoop::try_move_shenandoah_barrier_before_loop(Node* n, Node *n_ctrl) { + if (n->Opcode() == Op_ShenandoahWriteBarrier) { + IdealLoopTree *n_loop = get_loop(n_ctrl); + Node* val = n->in(ShenandoahBarrierNode::ValueIn); + Node* val_ctrl = get_ctrl(val); + if (n_loop != _ltree_root && !n_loop->_irreducible) { + IdealLoopTree *val_loop = get_loop(val_ctrl); + Node* mem = n->in(ShenandoahBarrierNode::Memory); + IdealLoopTree *mem_loop = get_loop(get_ctrl(mem)); + if (!n_loop->is_member(val_loop) && + n_loop->is_member(mem_loop)) { + Node* n_loop_head = n_loop->_head; + + if (n_loop_head->is_Loop()) { + int alias = C->get_alias_index(n->adr_type()); + Node* mem = shenandoah_find_mem_phi(n_loop_head, alias, C); + if (mem == NULL) { + mem = n->in(ShenandoahBarrierNode::Memory); + } + + Node* loop = n_loop_head; + if (n_loop_head->is_CountedLoop() && n_loop_head->as_CountedLoop()->is_main_loop()) { + Node* res = try_move_shenandoah_barrier_before_pre_loop(n_loop_head->in(LoopNode::EntryControl), val_ctrl); + if (res != NULL) { + loop = res; + } + } + + return try_move_shenandoah_barrier_before_loop_helper(n, loop, val_ctrl, mem); + } + } + } + Node* ctrl = try_move_shenandoah_barrier_before_pre_loop(n->in(0), val_ctrl); + if (ctrl != NULL) { + return try_move_shenandoah_barrier_before_loop_helper(n, ctrl, val_ctrl, n->in(ShenandoahBarrierNode::Memory)); + } + } + return NULL; +} + +void PhaseIdealLoop::try_move_shenandoah_read_barrier(Node* n, Node *n_ctrl) { + if (n->Opcode() == Op_ShenandoahReadBarrier) { + ShenandoahReadBarrierNode* rb = (ShenandoahReadBarrierNode*)n; + Node* mem = n->in(MemNode::Memory); + int alias = C->get_alias_index(n->adr_type()); + const bool trace = false; + +#ifdef ASSERT + if (trace) { tty->print("Trying to move mem of"); n->dump(); } +#endif + + Node* new_mem = mem; + + ResourceMark rm; + VectorSet seen(Thread::current()->resource_area()); + Node_List phis; + + for (;;) { +#ifdef ASSERT + if (trace) { tty->print("Looking for dominator from"); mem->dump(); } +#endif + if (mem->is_Proj() && mem->in(0)->is_Start()) { + if (new_mem != n->in(MemNode::Memory)) { +#ifdef ASSERT + if (trace) { tty->print("XXX Setting mem to"); new_mem->dump(); tty->print(" for "); n->dump(); } +#endif + _igvn.replace_input_of(n, MemNode::Memory, new_mem); + } + return; + } + + Node* candidate = mem; + do { + if (!rb->is_independent(mem)) { + if (trace) { tty->print_cr("Not independent"); } + if (new_mem != n->in(MemNode::Memory)) { +#ifdef ASSERT + if (trace) { tty->print("XXX Setting mem to"); new_mem->dump(); tty->print(" for "); n->dump(); } +#endif + _igvn.replace_input_of(n, MemNode::Memory, new_mem); + } + return; + } + if (seen.test_set(mem->_idx)) { + if (trace) { tty->print_cr("Already seen"); } + ShouldNotReachHere(); + // Strange graph + if (new_mem != n->in(MemNode::Memory)) { +#ifdef ASSERT + if (trace) { tty->print("XXX Setting mem to"); new_mem->dump(); tty->print(" for "); n->dump(); } +#endif + _igvn.replace_input_of(n, MemNode::Memory, new_mem); + } + return; + } + if (mem->is_Phi()) { + phis.push(mem); + } + mem = shenandoah_next_mem(mem, alias); + if (mem->bottom_type() == Type::MEMORY) { + candidate = mem; + } + assert(shenandoah_is_dominator(ctrl_or_self(mem), n_ctrl, mem, n) == is_dominator(ctrl_or_self(mem), n_ctrl), "strange dominator"); +#ifdef ASSERT + if (trace) { tty->print("Next mem is"); mem->dump(); } +#endif + } while (mem->bottom_type() != Type::MEMORY || !is_dominator(ctrl_or_self(mem), n_ctrl)); + + assert(mem->bottom_type() == Type::MEMORY, "bad mem"); + + bool not_dom = false; + for (uint i = 0; i < phis.size() && !not_dom; i++) { + Node* nn = phis.at(i); + +#ifdef ASSERT + if (trace) { tty->print("Looking from phi"); nn->dump(); } +#endif + assert(nn->is_Phi(), "phis only"); + for (uint j = 2; j < nn->req() && !not_dom; j++) { + Node* m = nn->in(j); +#ifdef ASSERT + if (trace) { tty->print("Input %d is", j); m->dump(); } +#endif + while (m != mem && !seen.test_set(m->_idx)) { + if (shenandoah_is_dominator(ctrl_or_self(m), ctrl_or_self(mem), m, mem)) { + not_dom = true; + // Scheduling anomaly +#ifdef ASSERT + if (trace) { tty->print("Giving up"); m->dump(); } +#endif + break; + } + if (!rb->is_independent(m)) { + if (trace) { tty->print_cr("Not independent"); } + if (new_mem != n->in(MemNode::Memory)) { +#ifdef ASSERT + if (trace) { tty->print("XXX Setting mem to"); new_mem->dump(); tty->print(" for "); n->dump(); } +#endif + _igvn.replace_input_of(n, MemNode::Memory, new_mem); + } + return; + } + if (m->is_Phi()) { + phis.push(m); + } + m = shenandoah_next_mem(m, alias); +#ifdef ASSERT + if (trace) { tty->print("Next mem is"); m->dump(); } +#endif + } + } + } + if (!not_dom) { + new_mem = mem; + phis.clear(); + } else { + seen.Clear(); + } + } + } +} + +CallStaticJavaNode* PhaseIdealLoop::shenandoah_pin_and_expand_barriers_null_check(ShenandoahBarrierNode* wb) { + Node* val = wb->in(ShenandoahBarrierNode::ValueIn); + +#ifdef ASSERT + const Type* val_t = _igvn.type(val); + assert(val_t->meet(TypePtr::NULL_PTR) != val_t, "should be not null"); +#endif + + if (val->Opcode() == Op_CastPP && + val->in(0)->Opcode() == Op_IfTrue && + val->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) && + val->in(0)->in(0)->is_If() && + val->in(0)->in(0)->in(1)->Opcode() == Op_Bool && + val->in(0)->in(0)->in(1)->as_Bool()->_test._test == BoolTest::ne && + val->in(0)->in(0)->in(1)->in(1)->Opcode() == Op_CmpP && + val->in(0)->in(0)->in(1)->in(1)->in(1) == val->in(1) && + val->in(0)->in(0)->in(1)->in(1)->in(2)->bottom_type() == TypePtr::NULL_PTR) { + assert(val->in(0)->in(0)->in(1)->in(1)->in(1) == val->in(1), ""); + CallStaticJavaNode* unc = val->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none); + return unc; + } + return NULL; +} + +void PhaseIdealLoop::shenandoah_pin_and_expand_barriers_move_barrier(ShenandoahBarrierNode* wb) { + Node* unc = shenandoah_pin_and_expand_barriers_null_check(wb); + Node* val = wb->in(ShenandoahBarrierNode::ValueIn); + + if (unc != NULL) { + Node* ctrl = get_ctrl(wb); + Node* unc_ctrl = val->in(0); + + Node* branch = shenandoah_no_branches(ctrl, unc_ctrl, false); + assert(branch == NULL || branch == NodeSentinel, "was not looking for a branch"); + if (branch == NodeSentinel) { + return; + } + + Node* mem = wb->in(ShenandoahBarrierNode::Memory); + Node* old_mem = mem; + + Node* mem_ctrl = NULL; + int alias = C->get_alias_index(wb->adr_type()); + mem = shenandoah_dom_mem(mem, mem_ctrl, wb, unc_ctrl, alias); + if (mem == NULL) { + return; + } + + Node* proj = wb->find_out_with(Op_ShenandoahWBMemProj); + if (proj != NULL && mem != old_mem && !shenandoah_fix_mem_phis(mem, mem_ctrl, unc_ctrl, alias)) { + return; + } + + assert(proj == NULL || mem == old_mem || shenandoah_memory_dominates_all_paths(mem, unc_ctrl, alias), "can't fix the memory graph"); + set_ctrl_and_loop(wb, unc_ctrl); + if (wb->in(ShenandoahBarrierNode::Control) != NULL) { + _igvn.replace_input_of(wb, ShenandoahBarrierNode::Control, unc_ctrl); + } + if (old_mem != mem) { + if (proj != NULL) { + shenandoah_disconnect_barrier_mem(wb, _igvn); + shenandoah_fix_memory_uses(mem, wb, proj, unc_ctrl, C->get_alias_index(wb->adr_type())); + assert(proj->outcnt() > 0, "disconnected write barrier"); + } + _igvn.replace_input_of(wb, ShenandoahBarrierNode::Memory, mem); + } + if (proj != NULL) { + set_ctrl_and_loop(proj, unc_ctrl); + } + } +} + +Node* PhaseIdealLoop::shenandoah_pick_phi(Node* phi1, Node* phi2, Node_Stack& phis, VectorSet& visited) { + assert(phis.size() == 0, "stack needs to be empty"); + uint i = 1; + int phi_dominates = -1; + for (;;) { + assert(phi1->req() == phi2->req(), "strange pair of phis"); + assert(phis.size() % 2 == 0, ""); + Node* in1 = phi1->in(i); + Node* in2 = phi2->in(i); + + if (in1->is_MergeMem()) { + in1 = in1->as_MergeMem()->base_memory(); + } + if (in2->is_MergeMem()) { + in2 = in2->as_MergeMem()->base_memory(); + } + + if (in1 == in2) { + //continue + } else if (in1->is_Phi() && in2->is_Phi() && in1->in(0) == in2->in(0)) { + assert(!visited.test_set(in1->_idx), "no loop"); + assert(!visited.test_set(in2->_idx), "no loop"); + phis.push(phi1, i+1); + phis.push(phi2, i+1); + phi1 = in1; + phi2 = in2; + i = 1; + } else { + Node* in1_c = get_ctrl(in1); + Node* in2_c = get_ctrl(in2); + if (shenandoah_is_dominator(in1_c, in2_c, in1, in2)) { + assert(!shenandoah_is_dominator(in2_c, in1_c, in2, in1), "one has to dominate the other"); + assert(phi_dominates == -1 || phi_dominates == 1, "all inputs must dominate"); + phi_dominates = 1; + } else { + assert(shenandoah_is_dominator(in2_c, in1_c, in2, in1), "one must dominate the other"); + assert(!shenandoah_is_dominator(in1_c, in2_c, in1, in2), "one has to dominate the other"); + assert(phi_dominates == -1 || phi_dominates == 2, "all inputs must dominate"); + phi_dominates = 2; + } + } + i++; + + while (i >= phi1->req() && phis.size() > 0) { + i = phis.index(); + phi2 = phis.node(); + phis.pop(); + phi1 = phis.node(); + phis.pop(); + } + + if (i >= phi1->req() && phis.size() == 0) { + Node* phi = NULL; + if (phi_dominates == 1) { + return phi2; + } else if (phi_dominates == 2) { + return phi1; + } else { + return phi1; + } + } + } + return NULL; +} + +static Node* memory_for(Node* mem, const Node_List& phis) { + Node *m = mem; + while (m != NULL) { + mem = m; + m = phis[m->_idx]; + } + return mem; +} + +Node* PhaseIdealLoop::shenandoah_find_raw_mem(Node* ctrl, Node* n, const Node_List& memory_nodes, const Node_List& phis, bool strict) { + assert(n == NULL || ctrl_or_self(n) == ctrl, ""); + Node* raw_mem = memory_for(memory_nodes[ctrl->_idx], phis); + Node* c = ctrl; + while (raw_mem == NULL || (strict && get_ctrl(raw_mem) != c && (!c->is_CatchProj() || c->in(0)->in(0)->in(0) != get_ctrl(raw_mem)))) { + c = idom(c); + raw_mem = memory_for(memory_nodes[c->_idx], phis); + } + if (n != NULL && get_ctrl(raw_mem) == ctrl) { + while (!shenandoah_is_dominator_same_ctrl(c, raw_mem, n) && ctrl_or_self(raw_mem) == ctrl) { + raw_mem = shenandoah_next_mem(raw_mem, Compile::AliasIdxRaw); + } + if (raw_mem->is_MergeMem()) { + raw_mem = raw_mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); + } + if (get_ctrl(raw_mem) != ctrl) { + do { + c = idom(c); + raw_mem = memory_for(memory_nodes[c->_idx], phis); + } while (raw_mem == NULL || (strict && get_ctrl(raw_mem) != c && (!c->is_CatchProj() || c->in(0)->in(0)->in(0) != get_ctrl(raw_mem)))); + } + } + assert(raw_mem->bottom_type() == Type::MEMORY, ""); + return raw_mem; +} + +Node* PhaseIdealLoop::shenandoah_find_bottom_mem(Node* ctrl) { + Node* mem = NULL; + Node* c = ctrl; + do { + if (c->is_Region()) { + Node* phi_bottom = NULL; + for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax; i++) { + Node* u = c->fast_out(i); + if (u->is_Phi() && u->bottom_type() == Type::MEMORY) { + if (u->adr_type() == TypePtr::BOTTOM) { + if (phi_bottom != NULL) { + phi_bottom = NodeSentinel; + } else { + phi_bottom = u; + } + } + } + } + if (phi_bottom != NULL) { + if (phi_bottom != NodeSentinel) { + mem = phi_bottom; + } else { + Node* phi = NULL; + ResourceMark rm; + Node_Stack phis(0); + VectorSet visited(Thread::current()->resource_area()); + for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax; i++) { + Node* u = c->fast_out(i); + if (u->is_Phi() && u->bottom_type() == Type::MEMORY && u->adr_type() == TypePtr::BOTTOM) { + if (phi == NULL) { + phi = u; + } else { + phi = shenandoah_pick_phi(phi, u, phis, visited); + } + } + } + mem = phi; + } + } + } else { + if (c->is_Call() && c->as_Call()->_entry_point != OptoRuntime::rethrow_stub()) { + CallProjections projs; + c->as_Call()->extract_projections(&projs, true, false); + if (projs.fallthrough_memproj != NULL) { + if (projs.fallthrough_memproj->adr_type() == TypePtr::BOTTOM) { + if (projs.catchall_memproj == NULL) { + mem = projs.fallthrough_memproj; + } else { + if (is_dominator(projs.fallthrough_catchproj, ctrl)) { + mem = projs.fallthrough_memproj; + } else { + assert(is_dominator(projs.catchall_catchproj, ctrl), "one proj must dominate barrier"); + mem = projs.catchall_memproj; + } + } + } + } else { + Node* proj = c->as_Call()->proj_out(TypeFunc::Memory); + if (proj != NULL && + proj->adr_type() == TypePtr::BOTTOM) { + mem = proj; + } + } + } else { + for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax; i++) { + Node* u = c->fast_out(i); + if (u->is_Proj() && + u->bottom_type() == Type::MEMORY && + u->adr_type() == TypePtr::BOTTOM) { + assert(c->is_SafePoint() || c->is_MemBar() || c->is_Start(), ""); + assert(mem == NULL, "only one proj"); + mem = u; + } + } + } + } + c = idom(c); + } while (mem == NULL); + return mem; +} + +void PhaseIdealLoop::shenandoah_follow_barrier_uses(Node* n, Node* ctrl, Unique_Node_List& uses) { + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + if (!u->is_CFG() && !u->is_Phi() && get_ctrl(u) == ctrl) { + uses.push(u); + } + } +} + +static void push_uses(Compile* C, Node* n, Unique_Node_List& wq, int alias) { + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + if ((u->is_Phi() && (u->adr_type() == TypePtr::BOTTOM || C->get_alias_index(n->adr_type()) == alias)) || + (u->is_MergeMem() && u->as_MergeMem()->memory_at(alias) == n)) { + assert(!u->is_Phi() || u->in(0), ""); + wq.push(u); + } + } +} + +void PhaseIdealLoop::shenandoah_collect_memory_nodes_helper(Node* n, int alias, GrowableArray& inputs, int adj, Node_List& memory_nodes, Node_List& phis, Node*& cur_mem, Unique_Node_List& wq) { + const bool trace = false; + DEBUG_ONLY(if (trace) { tty->print("YYY phi post"); n->dump(); }) + Node* r = n->in(0); + Node* self = memory_for(n, phis); + if (self != n) { + if (self->is_Phi()) { + wq.push(self); + } + return; + } + bool differs = false; + for (uint i = 1; i < n->req(); i++) { + Node* mem = memory_for(inputs.at(i + adj), phis); + DEBUG_ONLY(if (trace) { tty->print("YYY phi post input %d %s", i, mem != n->in(i) ? "differs" : "matches"); mem->dump(); }) + if (mem != n->in(i)) { + differs = true; + } + } + Node* mem = NULL; + if (differs) { + Node* unique = NULL; + for (uint i = 1; i < n->req() && unique != NodeSentinel; i++) { + Node* in = memory_for(inputs.at(i + adj), phis); + if (unique == NULL) { + if (in != self) { + unique = in; + } + } else if (in != self && in != unique) { + unique = NodeSentinel; + } + } + assert(unique != NULL, "at least one non backedge entry"); + if (unique != NodeSentinel) { + DEBUG_ONLY(if (trace) { tty->print("YYY phi post: unique input"); unique->dump(); }) + if (phis[n->_idx] != unique) { + phis.map(n->_idx, unique); + push_uses(C, n, wq, alias); + } + mem = unique; + if (C->get_alias_index(n->adr_type()) == alias) { + lazy_replace(n, unique); + } else { + shenandoah_fix_memory_uses(n, unique, unique, r, alias); + } + if (n->outcnt() == 0) { + wq.remove(n); + } + } else { + Node* better_phi = NULL; + for (DUIterator_Fast imax, i = r->fast_outs(imax); i < imax && better_phi == NULL; i++) { + Node* u = r->fast_out(i); + if (u != n && u->is_Phi() && u->bottom_type() == Type::MEMORY && + (u->adr_type() == TypePtr::BOTTOM || C->get_alias_index(u->adr_type()) == alias)) { + uint i = 1; + for (; i < u->req() && u->in(i) == memory_for(inputs.at(i + adj), phis); i++); + if (i == u->req()) { + better_phi = u; + } + } + } + if (better_phi != NULL) { + DEBUG_ONLY(if (trace) { tty->print("YYY phi post: better phi"); better_phi->dump(); }) + } else { + better_phi = new (C) PhiNode(r, Type::MEMORY, C->get_adr_type(alias)); + for (uint i = 1; i < better_phi->req(); i++) { + better_phi->init_req(i, memory_for(inputs.at(i+adj), phis)); + } + DEBUG_ONLY(if (trace) { tty->print("YYY phi post: creating better phi"); better_phi->dump(); }) + register_new_node(better_phi, r); + } + mem = better_phi; + if (phis[n->_idx] != better_phi) { + phis.map(n->_idx, better_phi); + push_uses(C, n, wq, alias); + } + if (C->get_alias_index(n->adr_type()) == alias) { + lazy_replace(n, better_phi); + } else { + shenandoah_fix_memory_uses(n, better_phi, better_phi, r, alias); + } + if (n->outcnt() == 0) { + wq.remove(n); + } + } + } else { + mem = n; + } + Node* other = memory_for(phis[r->_idx], phis); + Node* other2 = memory_for(memory_nodes[r->_idx], phis); + if (other != NULL) { + DEBUG_ONLY(if (trace) { tty->print("YYY phi post: other"); other->dump(); }) + if (other != mem) { + if (other->is_Phi() && other->in(0) == r && mem->is_Phi() && mem->in(0) == r) { + for (uint i = 1; i < mem->req(); i++) { + assert(mem->in(i) == other->in(i) || + (C->get_alias_index(mem->adr_type()) == alias && mem->in(i) == memory_for(get_ctrl(other->in(i)), phis)), ""); + } + if (mem->adr_type() == TypePtr::BOTTOM || C->get_alias_index(other->adr_type()) == alias) { + assert(mem->adr_type() != TypePtr::BOTTOM || mem == n, ""); + DEBUG_ONLY(if (trace) { tty->print("YYY phi post: replacing other with"); mem->dump(); }) + if (phis[other->_idx] != mem) { + phis.map(other->_idx, mem); + push_uses(C, other, wq, alias); + } + lazy_replace(other, mem); + wq.remove(other); + phis.map(r->_idx, mem); + cur_mem = mem; + DEBUG_ONLY(if (trace) { tty->print("YYY setting cur_mem %d", __LINE__); cur_mem->dump(); }) + } else { + DEBUG_ONLY(if (trace) { tty->print("YYY phi post: replacing with other"); mem->dump(); }) + assert(other->adr_type() == TypePtr::BOTTOM, ""); + if (phis[mem->_idx] != other) { + phis.map(mem->_idx, other); + push_uses(C, mem, wq, alias); + } + lazy_replace(mem, other); + wq.remove(mem); + cur_mem = other; + DEBUG_ONLY(if (trace) { tty->print("YYY setting cur_mem %d", __LINE__); cur_mem->dump(); }) + } + } else if (mem->is_Phi() && mem->in(0) == r) { + assert (is_dominator(ctrl_or_self(other), ctrl_or_self(mem)), ""); + cur_mem = other; + DEBUG_ONLY(if (trace) { tty->print("YYY setting cur_mem %d", __LINE__); cur_mem->dump(); }) + } else if (other->is_Phi() && other->in(0) == r) { + assert (is_dominator(ctrl_or_self(mem), ctrl_or_self(other)), ""); + cur_mem = mem; + } else { + ShouldNotReachHere(); + } + } else { + cur_mem = mem; + DEBUG_ONLY(if (trace) { tty->print("YYY setting cur_mem %d", __LINE__); cur_mem->dump(); }) + } + } else { + phis.map(r->_idx, mem); + cur_mem = mem; + DEBUG_ONLY(if (trace) { tty->print("YYY setting cur_mem %d", __LINE__); cur_mem->dump(); }) + } + if (other == other2) { + memory_nodes.map(r->_idx, memory_for(phis[r->_idx], phis)); + } else if (get_ctrl(other) != get_ctrl(other2)) { + assert(shenandoah_is_dominator(get_ctrl(other), get_ctrl(other2), other, other2) && !shenandoah_is_dominator(get_ctrl(other2), get_ctrl(other), other2, other), ""); + memory_nodes.map(r->_idx, memory_for(phis[r->_idx], phis)); + } else { + assert(ctrl_or_self(other2) == r && shenandoah_is_dominator_same_ctrl(r, other, other2) && !shenandoah_is_dominator_same_ctrl(r, other2, other), ""); + } +} + + +void PhaseIdealLoop::shenandoah_collect_memory_nodes(int alias, Node_List& memory_nodes, Node_List& phis) { + const bool trace = false; + + Node_Stack stack(0); + VectorSet visited(Thread::current()->resource_area()); + GrowableArray inputs; + Node* cur_mem = NULL; + Unique_Node_List wq; + + stack.push(C->root(), 1); + do { + Node* n = stack.node(); + int opc = n->Opcode(); + uint i = stack.index(); + if (i < n->req()) { + Node* mem = NULL; + if (opc == Op_Root) { + Node* in = n->in(i); + int in_opc = in->Opcode(); + if (in_opc == Op_Return || in_opc == Op_Rethrow) { + mem = in->in(TypeFunc::Memory); + } else if (in_opc == Op_Halt) { + if (in->in(0)->is_Region()) { +#ifdef ASSERT + Node* r = in->in(0); + for (uint j = 1; j < r->req(); j++) { + assert(r->in(j)->is_Proj() && r->in(j)->in(0)->Opcode() == Op_NeverBranch, ""); + } +#endif + } else { + Node* proj = in->in(0); + assert(proj->is_Proj(), ""); + Node* in = proj->in(0); + assert(in->is_CallStaticJava() || in->Opcode() == Op_NeverBranch || in->Opcode() == Op_Catch, ""); + if (in->is_CallStaticJava()) { + mem = in->in(TypeFunc::Memory); + } else if (in->Opcode() == Op_Catch) { + Node* call = in->in(0)->in(0); + assert(call->is_Call(), ""); + mem = call->in(TypeFunc::Memory); + } + } + } else { +#ifdef ASSERT + n->dump(); + in->dump(); +#endif + ShouldNotReachHere(); + } + } else { + assert(n->is_Phi() && n->bottom_type() == Type::MEMORY, ""); + assert(n->adr_type() == TypePtr::BOTTOM || C->get_alias_index(n->adr_type()) == alias, ""); + assert(cur_mem != NULL, ""); + DEBUG_ONLY(if (trace) { tty->print("YYY inputs: pushing %d", __LINE__); cur_mem->dump(); }) + inputs.push(cur_mem); + cur_mem = NULL; + mem = n->in(i); + } + i++; + stack.set_index(i); + if (mem == NULL) { + continue; + } + for (;;) { + DEBUG_ONLY(if (trace) { tty->print("Y"); mem->dump(); }) + if (mem->is_Start()) { + break; + } + if (visited.test(mem->_idx)) { + cur_mem = mem; + DEBUG_ONLY(if (trace) { tty->print("YYY setting cur_mem %d", __LINE__); cur_mem->dump(); }) + break; + } + if (mem->is_Phi()) { + visited.set(mem->_idx); + stack.push(mem, 2); + mem = mem->in(1); + } else if (mem->is_Proj()) { + visited.set(mem->_idx); + stack.push(mem, mem->req()); + mem = mem->in(0); + } else if (mem->is_SafePoint() || mem->is_MemBar()) { + mem = mem->in(TypeFunc::Memory); + } else if (mem->is_MergeMem()) { + mem = mem->as_MergeMem()->memory_at(alias); + } else if (mem->is_Store() || mem->is_LoadStore() || mem->is_ClearArray()) { + visited.set(mem->_idx); + stack.push(mem, mem->req()); + mem = mem->in(MemNode::Memory); + } else { +#ifdef ASSERT + mem->dump(); +#endif + ShouldNotReachHere(); + } + } + } else { + if (n->is_Phi()) { + assert(cur_mem != NULL, ""); + DEBUG_ONLY(if (trace) { tty->print("YYY inputs: pushing %d", __LINE__); cur_mem->dump(); }) + inputs.push(cur_mem); + cur_mem = NULL; + int adj = inputs.length() - n->req(); + shenandoah_collect_memory_nodes_helper(n, alias, inputs, adj, memory_nodes, phis, cur_mem, wq); + inputs.trunc_to(inputs.length() - n->req() + 1); + } else if (!n->is_Root()) { + cur_mem = n; + DEBUG_ONLY(if (trace) { tty->print("YYY setting cur_mem %d", __LINE__); cur_mem->dump(); }) + Node* c = get_ctrl(n); + Node* mem = memory_for(memory_nodes[c->_idx], phis); + DEBUG_ONLY(if (trace) { tty->print("YYY post"); n->dump(); }) + if (n->is_Proj() && n->in(0)->is_Call()) { + assert(c == n->in(0), ""); + CallNode* call = c->as_Call(); + CallProjections projs; + call->extract_projections(&projs, true, false); + if (projs.catchall_memproj != NULL) { + if (projs.fallthrough_memproj == n) { + c = projs.fallthrough_catchproj; + } else { + assert(projs.catchall_memproj == n, ""); + c = projs.catchall_catchproj; + } + } + } + assert(mem == NULL || mem == n || shenandoah_is_dominator_same_ctrl(c, mem, n) && !shenandoah_is_dominator_same_ctrl(c, n, mem), ""); + memory_nodes.map(c->_idx, n); + } + stack.pop(); + } + } while(stack.size() > 0); + assert(inputs.length() == 0, ""); + +#ifdef ASSERT + if (trace) { + tty->print_cr("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + wq.dump(); + } +#endif + + while (wq.size() > 0) { + Node* n = wq.pop(); + if (n->is_Phi()) { + Node* r = n->in(0); + assert(r != NULL, ""); + for (uint i = 1; i < r->req(); i++) { + inputs.push(shenandoah_find_raw_mem(r->in(i), NULL, memory_nodes, phis, false)); + } + Node* dummy = NULL; + shenandoah_collect_memory_nodes_helper(n, alias, inputs, -1, memory_nodes, phis, dummy, wq); + inputs.clear(); + } else if (n->is_MergeMem()) { + push_uses(C, n, wq, alias); + } + } + +#ifdef ASSERT + if (trace) { + for (uint i = 0; i < memory_nodes.Size(); i++) { + if (memory_nodes[i] != NULL) { + tty->print("XXX %d -> ", i); memory_nodes[i]->dump(); + } + } + } +#endif +} + +void PhaseIdealLoop::shenandoah_fix_raw_mem(Node* ctrl, Node* region, Node* raw_mem, Node* raw_mem_phi, Node_List& memory_nodes, Node_List& memory_phis, Unique_Node_List& uses) { + const bool trace = false; + DEBUG_ONLY(if (trace) { tty->print("ZZZ control is"); ctrl->dump(); }); + DEBUG_ONLY(if (trace) { tty->print("ZZZ mem is"); raw_mem->dump(); }); + GrowableArray phis; + Node* old = shenandoah_find_raw_mem(ctrl, NULL, memory_nodes, memory_phis, true); + if (old != raw_mem) { + Node* prev = NULL; + while (old != raw_mem) { + assert(old->is_Store() || old->is_LoadStore() || old->is_ClearArray(), ""); + prev = old; + old = old->in(MemNode::Memory); + } + assert(prev != NULL, ""); + memory_nodes.map(ctrl->_idx, raw_mem); + memory_nodes.map(region->_idx, old); + _igvn.replace_input_of(prev, MemNode::Memory, raw_mem_phi); + } else { + memory_nodes.map(region->_idx, raw_mem_phi); + uses.clear(); + uses.push(region); + for(uint next = 0; next < uses.size(); next++ ) { + Node *n = uses.at(next); + assert(n->is_CFG(), ""); + DEBUG_ONLY(if (trace) { tty->print("ZZZ ctrl"); n->dump(); }) + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + if (!u->is_Root() && u->is_CFG() && u != n) { + Node* m = memory_nodes[u->_idx]; + if (u->is_Region() && !shenandoah_has_mem_phi(C, u, Compile::AliasIdxRaw)) { + DEBUG_ONLY(if (trace) { tty->print("ZZZ region"); u->dump(); }); + DEBUG_ONLY(if (trace && m != NULL) { tty->print("ZZZ mem"); m->dump(); }); + + if (m == NULL || !m->is_Phi() || m->in(0) != u) { + bool push = true; + bool create_phi = true; + if (is_dominator(region, u)) { + create_phi = false; + } else { + IdealLoopTree* loop = get_loop(ctrl); + bool do_check = true; + IdealLoopTree* l = loop; + create_phi = false; + while (l != _ltree_root) { + if (is_dominator(l->_head, u) && is_dominator(idom(u), l->_head)) { + create_phi = true; + do_check = false; + break; + } + l = l->_parent; + } + + if (do_check) { + assert(!create_phi, ""); + IdealLoopTree* u_loop = get_loop(u); + if (u_loop != _ltree_root && u_loop->is_member(loop)) { + Node* c = ctrl; + while (!is_dominator(c, u_loop->tail())) { + c = idom(c); + } + if (do_check && !is_dominator(c, u)) { + do_check = false; + } + } + } + + if (do_check && is_dominator(idom(u), region)) { + create_phi = true; + } + } + if (create_phi) { + Node* phi = new (C) PhiNode(u, Type::MEMORY, TypeRawPtr::BOTTOM); + register_new_node(phi, u); + phis.push(phi); + DEBUG_ONLY(if (trace) { tty->print("ZZZ new phi"); phi->dump(); }) + if (!(m != NULL && !m->is_Phi() && get_ctrl(m) == u)) { + DEBUG_ONLY(if (trace) { tty->print("ZZZ setting mem"); phi->dump(); }) + memory_nodes.map(u->_idx, phi); + } else { + DEBUG_ONLY(if (trace) { tty->print("ZZZ NOT setting mem"); m->dump(); }) + for (;;) { + assert(m->is_Mem() || m->is_LoadStore() || m->is_Proj() /*|| m->is_MergeMem()*/, ""); + Node* next = NULL; + if (m->is_Proj()) { + next = m->in(0); + } else { + next = m->in(MemNode::Memory); + } + if (get_ctrl(next) != u) { + break; + } + if (next->is_MergeMem()) { + assert(get_ctrl(next->as_MergeMem()->memory_at(Compile::AliasIdxRaw)) != u, ""); + break; + } + if (next->is_Phi()) { + assert(next->adr_type() == TypePtr::BOTTOM && next->in(0) == u, ""); + break; + } + m = next; + } + + DEBUG_ONLY(if (trace) { tty->print("ZZZ setting to phi"); m->dump(); }) + assert(m->is_Mem() || m->is_LoadStore(), ""); + _igvn.replace_input_of(m, MemNode::Memory, phi); + push = false; + } + } else { + DEBUG_ONLY(if (trace) { tty->print("ZZZ skipping region"); u->dump(); }); + } + if (push) { + uses.push(u); + } + } + } else if (m == NULL) { + uses.push(u); + } + } + } + } + for (int i = 0; i < phis.length(); i++) { + Node* n = phis.at(i); + Node* r = n->in(0); + DEBUG_ONLY(if (trace) { tty->print("ZZZ fixing new phi"); n->dump(); }) + for (uint j = 1; j < n->req(); j++) { + Node* m = shenandoah_find_raw_mem(r->in(j), NULL, memory_nodes, memory_phis, true); + _igvn.replace_input_of(n, j, m); + DEBUG_ONLY(if (trace) { tty->print("ZZZ fixing new phi: %d", j); m->dump(); }) + } + } + } + uint last = C->unique(); + MergeMemNode* mm = NULL; + int alias = Compile::AliasIdxRaw; + DEBUG_ONLY(if (trace) { tty->print("ZZZ raw mem is"); raw_mem->dump(); }) + for (DUIterator i = raw_mem->outs(); raw_mem->has_out(i); i++) { + Node* u = raw_mem->out(i); + if (u->_idx < last) { + if (u->is_Mem()) { + if (C->get_alias_index(u->adr_type()) == alias) { + Node* m = shenandoah_find_raw_mem(get_ctrl(u), u, memory_nodes, memory_phis, true); + if (m != raw_mem) { + DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); }) + _igvn.replace_input_of(u, MemNode::Memory, m); + --i; + } + } + } else if (u->is_MergeMem()) { + MergeMemNode* u_mm = u->as_MergeMem(); + if (u_mm->memory_at(alias) == raw_mem) { + MergeMemNode* newmm = NULL; + for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) { + Node* uu = u->fast_out(j); + assert(!uu->is_MergeMem(), "chain of MergeMems?"); + if (uu->is_Phi()) { + assert(uu->adr_type() == TypePtr::BOTTOM, ""); + Node* region = uu->in(0); + int nb = 0; + for (uint k = 1; k < uu->req(); k++) { + if (uu->in(k) == u) { + Node* m = shenandoah_find_raw_mem(region->in(k), NULL, memory_nodes, memory_phis, true); + if (m != raw_mem) { + DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of phi %d", k); uu->dump(); }) + if (newmm == NULL || 1) { + newmm = shenandoah_clone_merge_mem(u, raw_mem, alias, m, ctrl_or_self(m), i); + } + if (newmm != u) { + _igvn.replace_input_of(uu, k, newmm); + nb++; + --jmax; + } + } + } + } + if (nb > 0) { + --j; + } + } else { + Node* m = shenandoah_find_raw_mem(ctrl_or_self(uu), uu, memory_nodes, memory_phis, true); + if (m != raw_mem) { + DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); uu->dump(); }) + if (newmm == NULL || 1) { + newmm = shenandoah_clone_merge_mem(u, raw_mem, alias, m, ctrl_or_self(m), i); + } + if (newmm != u) { + _igvn.replace_input_of(uu, uu->find_edge(u), newmm); + --j, --jmax; + } + } + } + } + } + } else if (u->is_Phi()) { + assert(u->bottom_type() == Type::MEMORY, "what else?"); + Node* region = u->in(0); + bool replaced = false; + for (uint j = 1; j < u->req(); j++) { + if (u->in(j) == raw_mem) { + Node* m = shenandoah_find_raw_mem(region->in(j), NULL, memory_nodes, memory_phis, true); + Node* nnew = m; + if (m != raw_mem) { + if (u->adr_type() == TypePtr::BOTTOM) { + if (mm == NULL || 1) { + mm = shenandoah_allocate_merge_mem(raw_mem, alias, m, ctrl_or_self(m)); + } + nnew = mm; + } + DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of phi %d", j); u->dump(); }) + _igvn.replace_input_of(u, j, nnew); + replaced = true; + } + } + } + if (replaced) { + --i; + } + } else if (u->adr_type() == TypePtr::BOTTOM || + u->adr_type() == NULL) { + assert(u->adr_type() != NULL || + u->Opcode() == Op_Rethrow || + u->Opcode() == Op_Return || + u->Opcode() == Op_SafePoint || + (u->is_CallStaticJava() && u->as_CallStaticJava()->uncommon_trap_request() != 0) || + (u->is_CallStaticJava() && u->as_CallStaticJava()->_entry_point == OptoRuntime::rethrow_stub()) || + u->Opcode() == Op_CallLeaf, ""); + Node* m = shenandoah_find_raw_mem(ctrl_or_self(u), u, memory_nodes, memory_phis, true); + if (m != raw_mem) { + if (mm == NULL || 1) { + mm = shenandoah_allocate_merge_mem(raw_mem, alias, m, get_ctrl(m)); + } + _igvn.replace_input_of(u, u->find_edge(raw_mem), mm); + --i; + } + } else if (C->get_alias_index(u->adr_type()) == alias) { + Node* m = shenandoah_find_raw_mem(ctrl_or_self(u), u, memory_nodes, memory_phis, true); + if (m != raw_mem) { + DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); }) + _igvn.replace_input_of(u, u->find_edge(raw_mem), m); + --i; + } + } + } + } +#ifdef ASSERT + assert(raw_mem_phi->outcnt() > 0, ""); + for (int i = 0; i < phis.length(); i++) { + Node* n = phis.at(i); + assert(n->outcnt() > 0, "new phi must have uses now"); + } +#endif +} + +void PhaseIdealLoop::shenandoah_pin_and_expand_barriers() { + const bool trace = false; + Node_List memory_nodes; + Node_List memory_phis; + + // Collect raw memory state at CFG points in the entire graph and + // record it in memory_nodes. Optimize the raw memory graph in the + // process and record all memory phis replaced by a more specific + // memory state in memory_phis. Optimizing the memory graph also + // makes the memory graph simpler. + shenandoah_collect_memory_nodes(Compile::AliasIdxRaw, memory_nodes, memory_phis); + + // Let's try to common write barriers again + for (int i = C->shenandoah_barriers_count(); i > 0; i--) { + ShenandoahBarrierNode* wb = C->shenandoah_barrier(i-1); + Node* ctrl = get_ctrl(wb); + try_common_shenandoah_barriers(wb, ctrl); + } + + for (int i = 0; i < C->shenandoah_barriers_count(); i++) { + ShenandoahBarrierNode* wb = C->shenandoah_barrier(i); + Node* ctrl = get_ctrl(wb); + + Node* val = wb->in(ShenandoahBarrierNode::ValueIn); + if (ctrl->is_Proj() && ctrl->in(0)->is_CallJava()) { + assert(shenandoah_is_dominator(get_ctrl(val), ctrl->in(0)->in(0), val, ctrl->in(0)), "can't move"); + set_ctrl(wb, ctrl->in(0)->in(0)); + } else if (ctrl->is_CallRuntime()) { + assert(shenandoah_is_dominator(get_ctrl(val), ctrl->in(0), val, ctrl), "can't move"); + set_ctrl(wb, ctrl->in(0)); + } + + assert(wb->Opcode() == Op_ShenandoahWriteBarrier, "only for write barriers"); + // Look for a null check that dominates this barrier and move the + // barrier right after the null check to enable implicit null + // checks + shenandoah_pin_and_expand_barriers_move_barrier(wb); + + ctrl = get_ctrl(wb); + } + + Unique_Node_List uses; + Unique_Node_List uses_to_ignore; + for (int i = C->shenandoah_barriers_count(); i > 0; i--) { + int cnt = C->shenandoah_barriers_count(); + ShenandoahBarrierNode* wb = C->shenandoah_barrier(i-1); + assert(wb->Opcode() == Op_ShenandoahWriteBarrier, "only for write barriers"); + + uint last = C->unique(); + Node* ctrl = get_ctrl(wb); + + Node* raw_mem = shenandoah_find_raw_mem(ctrl, wb, memory_nodes, memory_phis, true); + int alias = C->get_alias_index(wb->adr_type()); + Node* mem = wb->in(ShenandoahBarrierNode::Memory); + + // The slow path stub consumes and produces raw memory in addition + // to the existing memory edges + Node* base = shenandoah_find_bottom_mem(ctrl); + MergeMemNode* mm = MergeMemNode::make(C, base); + mm->set_memory_at(alias, mem); + mm->set_memory_at(Compile::AliasIdxRaw, raw_mem); + register_new_node(mm, ctrl); + + Node* val = wb->in(ShenandoahBarrierNode::ValueIn); + Node* wbproj = wb->find_out_with(Op_ShenandoahWBMemProj); + IdealLoopTree *loop = get_loop(ctrl); + + assert(val->Opcode() != Op_ShenandoahWriteBarrier || C->has_irreducible_loop(), "No chain of write barriers"); + + CallStaticJavaNode* unc = shenandoah_pin_and_expand_barriers_null_check(wb); + Node* unc_ctrl = val->in(0); + if (unc != NULL && val->in(0) != ctrl) { + unc = NULL; + } + + Node* uncasted_val = val; + if (unc != NULL) { + uncasted_val = val->in(1); + } + Node* thread = new (C) ThreadLocalNode(); + register_new_node(thread, ctrl); + Node* offset = _igvn.MakeConX(in_bytes(JavaThread::evacuation_in_progress_offset())); + set_ctrl(offset, C->root()); + Node* evacuation_in_progress_adr = new (C) AddPNode(C->top(), thread, offset); + register_new_node(evacuation_in_progress_adr, ctrl); + uint evacuation_in_progress_idx = Compile::AliasIdxRaw; + const TypePtr* evacuation_in_progress_adr_type = NULL; // debug-mode-only argument + debug_only(evacuation_in_progress_adr_type = C->get_adr_type(evacuation_in_progress_idx)); + + Node* evacuation_in_progress = new (C) LoadUBNode(ctrl, raw_mem, evacuation_in_progress_adr, evacuation_in_progress_adr_type, TypeInt::BOOL, MemNode::unordered); + register_new_node(evacuation_in_progress, ctrl); + + Node* mb = MemBarNode::make(C, Op_MemBarAcquire, Compile::AliasIdxRaw); + mb->init_req(TypeFunc::Control, ctrl); + mb->init_req(TypeFunc::Memory, mm); + register_control(mb, loop, ctrl); + Node* ctrl_proj = new (C) ProjNode(mb,TypeFunc::Control); + register_control(ctrl_proj, loop, mb); + Node* mem_proj = new (C) ProjNode(mb,TypeFunc::Memory); + register_new_node(mem_proj, mb); + + Node* evacuation_in_progress_cmp = new (C) CmpINode(evacuation_in_progress, _igvn.zerocon(T_INT)); + register_new_node(evacuation_in_progress_cmp, ctrl_proj); + Node* evacuation_in_progress_test = new (C) BoolNode(evacuation_in_progress_cmp, BoolTest::ne); + register_new_node(evacuation_in_progress_test, ctrl_proj); + IfNode* evacuation_iff = new (C) IfNode(ctrl_proj, evacuation_in_progress_test, PROB_UNLIKELY(0.999), COUNT_UNKNOWN); + register_control(evacuation_iff, loop, ctrl_proj); + Node* region = new (C) RegionNode(3); + Node* val_phi = PhiNode::make_blank(region, val); + + Node* mem_phi = PhiNode::make(region, mem_proj, Type::MEMORY, C->alias_type(wb->adr_type())->adr_type()); + Node* raw_mem_phi = PhiNode::make(region, mem_proj, Type::MEMORY, TypeRawPtr::BOTTOM); + Node* iffalse = new (C) IfFalseNode(evacuation_iff); + register_control(iffalse, loop, evacuation_iff); + Node* iftrue = new (C) IfTrueNode(evacuation_iff); + register_control(iftrue, loop, evacuation_iff); + + Node* c = iffalse; + Node* v = uncasted_val; + Node* unc_region = NULL; + if (unc != NULL) { + // Clone the null check in this branch to allow implicit null check + Node* iff = unc_ctrl->in(0); + assert(iff->is_If(), "broken"); + Node* new_iff = iff->clone(); + new_iff->set_req(0, c); + register_control(new_iff, loop, c); + Node* iffalse = new (C) IfFalseNode(new_iff->as_If()); + register_control(iffalse, loop, new_iff); + Node* iftrue = new (C) IfTrueNode(new_iff->as_If()); + register_control(iftrue, loop, new_iff); + c = iftrue; + unc_region = new (C) RegionNode(3); + unc_region->init_req(1, iffalse); + const Type *t = _igvn.type(val); + v = new (C) CastPPNode(uncasted_val, t); + v->init_req(0, c); + register_new_node(v, c); + } + region->init_req(1, c); + Node* rbfalse = new (C) ShenandoahReadBarrierNode(c, mem_proj, v); + register_new_node(rbfalse, c); + val_phi->init_req(1, rbfalse); + mem_phi->init_req(1, mem_proj); + raw_mem_phi->init_req(1, mem_proj); + + c = iftrue; + + if (unc != NULL) { + // Clone the null check in this branch to allow implicit null check + Node* iff = unc_ctrl->in(0); + assert(iff->is_If(), "broken"); + Node* new_iff = iff->clone(); + new_iff->set_req(0, c); + register_control(new_iff, loop, c); + Node* iffalse = new (C) IfFalseNode(new_iff->as_If()); + register_control(iffalse, loop, new_iff); + Node* iftrue = new (C) IfTrueNode(new_iff->as_If()); + register_control(iftrue, loop, new_iff); + c = iftrue; + unc_region->init_req(2, iffalse); + + Node* proj = iff->as_If()->proj_out(0); + assert(proj != unc_ctrl, "bad projection"); + Node* use = proj->unique_ctrl_out(); + + assert(use == unc || use->is_Region(), "what else?"); + + uses.clear(); + if (use == unc) { + set_idom(use, unc_region, dom_depth(unc_region)+1); + for (uint i = 1; i < unc->req(); i++) { + Node* n = unc->in(i); + if (has_ctrl(n) && get_ctrl(n) == proj) { + uses.push(n); + } + } + } else { + assert(use->is_Region(), "what else?"); + uint idx = 1; + for (; use->in(idx) != proj; idx++); + for (DUIterator_Fast imax, i = use->fast_outs(imax); i < imax; i++) { + Node* u = use->fast_out(i); + if (u->is_Phi() && get_ctrl(u->in(idx)) == proj) { + uses.push(u->in(idx)); + } + } + } + for(uint next = 0; next < uses.size(); next++ ) { + Node *n = uses.at(next); + assert(get_ctrl(n) == proj, "bad control"); + set_ctrl_and_loop(n, unc_region); + if (n->in(0) == proj) { + _igvn.replace_input_of(n, 0, unc_region); + } + for (uint i = 0; i < n->req(); i++) { + Node* m = n->in(i); + if (m != NULL && has_ctrl(m) && get_ctrl(m) == proj) { + uses.push(m); + } + } + } + + _igvn.rehash_node_delayed(use); + int nb = use->replace_edge(proj, unc_region); + assert(nb == 1, "only use expected"); + register_control(unc_region, _ltree_root, evacuation_iff); + + _igvn.replace_input_of(iff, 1, _igvn.intcon(1)); + const Type *t = _igvn.type(val); + v = new (C) CastPPNode(uncasted_val, t); + v->init_req(0, c); + register_new_node(v, c); + } + + Node* rbtrue = new (C) ShenandoahReadBarrierNode(c, mem_proj, v); + register_new_node(rbtrue, c); + + Node* call = new (C) CallLeafNoFPNode(OptoRuntime::shenandoah_write_barrier_Type(), StubRoutines::shenandoah_wb_C(), "shenandoah_write_barrier", TypeRawPtr::BOTTOM); + call->init_req(TypeFunc::Control, c); + call->init_req(TypeFunc::I_O, C->top()); + call->init_req(TypeFunc::Memory, mem_proj); + call->init_req(TypeFunc::FramePtr, C->top()); + call->init_req(TypeFunc::ReturnAdr, C->top()); + call->init_req(TypeFunc::Parms, rbtrue); + register_control(call, loop, c); + ctrl_proj = new (C) ProjNode(call, TypeFunc::Control); + register_control(ctrl_proj, loop, call); + mem_proj = new (C) ProjNode(call, TypeFunc::Memory); + register_new_node(mem_proj, call); + Node* res_proj = new (C) ProjNode(call, TypeFunc::Parms); + register_new_node(res_proj, call); + Node* res = new (C) CheckCastPPNode(ctrl_proj, res_proj, _igvn.type(val)); + register_new_node(res, ctrl_proj); + region->init_req(2, ctrl_proj); + val_phi->init_req(2, res); + mem_phi->init_req(2, mem_proj); + raw_mem_phi->init_req(2, mem_proj); + register_control(region, loop, evacuation_iff); + Node* out_val = val_phi; + register_new_node(val_phi, region); + register_new_node(mem_phi, region); + register_new_node(raw_mem_phi, region); + + // Update the control of all nodes that should be after the + // barrier control flow + uses.clear(); + // Every node that is control dependent on the barrier's input + // control will be after the expanded barrier. The raw memory (if + // its memory is control dependent on the barrier's input control) + // must stay above the barrier. + uses_to_ignore.clear(); + if (has_ctrl(raw_mem) && get_ctrl(raw_mem) == ctrl && !raw_mem->is_Phi()) { + uses_to_ignore.push(raw_mem); + } + for (uint next = 0; next < uses_to_ignore.size(); next++) { + Node *n = uses_to_ignore.at(next); + for (uint i = 0; i < n->req(); i++) { + Node* in = n->in(i); + if (in != NULL && has_ctrl(in) && get_ctrl(in) == ctrl) { + uses_to_ignore.push(in); + } + } + } + for (DUIterator_Fast imax, i = ctrl->fast_outs(imax); i < imax; i++) { + Node* u = ctrl->fast_out(i); + if (u->_idx < last && + u != wb && + !uses_to_ignore.member(u) && + (u->in(0) != ctrl || (!u->is_Region() && !u->is_Phi())) && + (ctrl->Opcode() != Op_CatchProj || u->Opcode() != Op_CreateEx)) { + Node* old_c = ctrl_or_self(u); + Node* c = old_c; + if (c != ctrl || + shenandoah_is_dominator_same_ctrl(old_c, wb, u) || + u->is_g1_marking_load()) { + _igvn.rehash_node_delayed(u); + int nb = u->replace_edge(ctrl, region); + if (u->is_CFG()) { + if (idom(u) == ctrl) { + set_idom(u, region, dom_depth(region)); + } + } else if (get_ctrl(u) == ctrl) { + assert(u != raw_mem, "should leave input raw mem above the barrier"); + uses.push(u); + } + assert(nb == 1, "more than 1 ctrl input?"); + --i, imax -= nb; + } + } + } + + if (wbproj != NULL) { + _igvn.replace_input_of(wbproj, 0, C->top()); + lazy_replace(wbproj, mem_phi); + } + if (unc != NULL) { + for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) { + Node* u = val->fast_out(i); + Node* c = ctrl_or_self(u); + if (u != wb && (c != ctrl || shenandoah_is_dominator_same_ctrl(c, wb, u))) { + _igvn.rehash_node_delayed(u); + int nb = u->replace_edge(val, out_val); + --i, imax -= nb; + } + } + if (val->outcnt() == 0) { + lazy_update(val, out_val); + _igvn._worklist.push(val); + } + } + lazy_replace(wb, out_val); + + shenandoah_follow_barrier_uses(mem_phi, ctrl, uses); + shenandoah_follow_barrier_uses(out_val, ctrl, uses); + + for(uint next = 0; next < uses.size(); next++ ) { + Node *n = uses.at(next); + assert(get_ctrl(n) == ctrl, "bad control"); + assert(n != raw_mem, "should leave input raw mem above the barrier"); + set_ctrl(n, region); + shenandoah_follow_barrier_uses(n, ctrl, uses); + } + + recompute_dom_depth(); + + // The slow path call produces memory: hook the raw memory phi + // from the expanded write barrier with the rest of the graph + // which may require adding memory phis at every post dominated + // region and at enclosing loop heads. Use the memory state + // collected in memory_nodes to fix the memory graph. Update that + // memory state as we go. + shenandoah_fix_raw_mem(ctrl ,region, raw_mem, raw_mem_phi, memory_nodes, memory_phis, uses); + assert(C->shenandoah_barriers_count() == cnt - 1, "not replaced"); + } + + assert(C->shenandoah_barriers_count() == 0, "all write barrier nodes should have been replaced"); +} + +#ifdef ASSERT +void ShenandoahBarrierNode::verify_raw_mem(RootNode* root) { + const bool trace = false; + ResourceMark rm; + Unique_Node_List nodes; + Unique_Node_List controls; + Unique_Node_List memories; + + nodes.push(root); + for (uint next = 0; next < nodes.size(); next++) { + Node *n = nodes.at(next); + if (n->Opcode() == Op_CallLeafNoFP && n->as_Call()->_entry_point == StubRoutines::shenandoah_wb_C()) { + controls.push(n); + if (trace) { tty->print("XXXXXX verifying"); n->dump(); } + for (uint next2 = 0; next2 < controls.size(); next2++) { + Node *m = controls.at(next2); + if (!m->is_Loop() || controls.member(m->in(LoopNode::EntryControl)) || 1) { + for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) { + Node* u = m->fast_out(i); + if (u->is_CFG() && !u->is_Root()) { + if (trace) { tty->print("XXXXXX pushing control"); u->dump(); } + controls.push(u); + } + } + } + } + memories.push(n->as_Call()->proj_out(TypeFunc::Memory)); + for (uint next2 = 0; next2 < memories.size(); next2++) { + Node *m = memories.at(next2); + assert(m->bottom_type() == Type::MEMORY, ""); + if (!m->is_Phi() || !m->in(0)->is_Loop() || controls.member(m->in(0)->in(LoopNode::EntryControl)) || 1) { + for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) { + Node* u = m->fast_out(i); + if (u->bottom_type() == Type::MEMORY && (u->is_Mem() || u->is_ClearArray())) { + memories.push(u); + } else if (u->is_LoadStore()) { + memories.push(u->find_out_with(Op_SCMemProj)); + } else if (u->is_MergeMem() && u->as_MergeMem()->memory_at(Compile::AliasIdxRaw) == m) { + memories.push(u); + } else if (u->is_Phi()) { + assert(u->bottom_type() == Type::MEMORY, ""); + if (u->adr_type() == TypeRawPtr::BOTTOM || u->adr_type() == TypePtr::BOTTOM) { + assert(controls.member(u->in(0)), ""); + memories.push(u); + } + } else if (u->is_SafePoint() || u->is_MemBar()) { + for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) { + Node* uu = u->fast_out(j); + if (uu->bottom_type() == Type::MEMORY) { + memories.push(uu); + } + } + } + } + } + } + for (uint next2 = 0; next2 < controls.size(); next2++) { + Node *m = controls.at(next2); + if (m->is_Region()) { + bool all_in = true; + for (uint i = 1; i < m->req(); i++) { + if (!controls.member(m->in(i))) { + all_in = false; + break; + } + } + if (trace) { tty->print("XXX verifying %s", all_in ? "all in" : ""); m->dump(); } + bool found_phi = false; + for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax && !found_phi; j++) { + Node* u = m->fast_out(j); + if (u->is_Phi() && memories.member(u)) { + found_phi = true; + for (uint i = 1; i < u->req() && found_phi; i++) { + Node* k = u->in(i); + if (memories.member(k) != controls.member(m->in(i))) { + found_phi = false; + } + } + } + } + assert(found_phi || all_in, ""); + } + } + controls.clear(); + memories.clear(); + } + for( uint i = 0; i < n->len(); ++i ) { + Node *m = n->in(i); + if (m != NULL) { + nodes.push(m); + } + } + } +} +#endif diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/shenandoahSupport.hpp --- a/src/share/vm/opto/shenandoahSupport.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/shenandoahSupport.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -27,16 +27,29 @@ #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "memory/allocation.hpp" #include "opto/addnode.hpp" +#include "opto/machnode.hpp" #include "opto/memnode.hpp" #include "opto/multnode.hpp" #include "opto/node.hpp" -class PhaseTransform; +class PhaseGVN; class ShenandoahBarrierNode : public TypeNode { private: bool _allow_fromspace; + +#ifdef ASSERT + enum verify_type { + ShenandoahLoad, + ShenandoahStore, + ShenandoahValue, + ShenandoahNone, + }; + + static bool verify_helper(Node* in, Node_Stack& phis, VectorSet& visited, verify_type t, bool trace, Unique_Node_List& barriers_used); +#endif + public: public: @@ -58,8 +71,16 @@ static Node* skip_through_barrier(Node* n); - virtual const class TypePtr* adr_type() const { - const TypePtr* adr_type = bottom_type()->is_ptr()->add_offset(BrooksPointer::byte_offset()); + static const TypeOopPtr* brooks_pointer_type(const Type* t) { + return t->is_oopptr()->cast_to_nonconst()->add_offset(BrooksPointer::byte_offset())->is_oopptr(); + } + + virtual const TypePtr* adr_type() const { + if (bottom_type() == Type::TOP) { + return NULL; + } + //const TypePtr* adr_type = in(MemNode::Address)->bottom_type()->is_ptr(); + const TypePtr* adr_type = brooks_pointer_type(bottom_type()); assert(adr_type->offset() == BrooksPointer::byte_offset(), "sane offset"); assert(Compile::current()->alias_type(adr_type)->is_rewritable(), "brooks ptr must be rewritable"); return adr_type; @@ -70,24 +91,24 @@ return idx >= ValueIn; } - virtual Node* Identity(PhaseTransform* phase); Node* Identity_impl(PhaseTransform* phase); - virtual Node *Ideal_DU_postCCP( PhaseCCP * ); - virtual const Type* Value(PhaseTransform* phase) const; virtual bool depends_only_on_test() const { return true; }; -#ifdef ASSERT - void check_invariants(); - uint num_mem_projs(); -#endif static bool needs_barrier(PhaseTransform* phase, ShenandoahBarrierNode* orig, Node* n, Node* rb_mem, bool allow_fromspace); - static bool has_barrier_users(Node* n, Unique_Node_List &visited); + static void verify(RootNode* root); +#ifdef ASSERT + static void verify_raw_mem(RootNode* root); +#endif +#ifndef PRODUCT + virtual void dump_spec(outputStream *st) const; +#endif +protected: uint hash() const; uint cmp(const Node& n) const; uint size_of() const; @@ -96,9 +117,8 @@ static bool needs_barrier_impl(PhaseTransform* phase, ShenandoahBarrierNode* orig, Node* n, Node* rb_mem, bool allow_fromspace, Unique_Node_List &visited); - bool dominates_control(PhaseTransform* phase, Node* c1, Node* c2); - bool dominates_memory(PhaseTransform* phase, Node* b1, Node* b2); - bool dominates_memory_impl(PhaseTransform* phase, Node* b1, Node* b2, Node* current, Unique_Node_List &visisted); + static bool dominates_memory(PhaseTransform* phase, Node* b1, Node* b2, bool linear); + static bool dominates_memory_impl(PhaseTransform* phase, Node* b1, Node* b2, Node* current, bool linear); }; class ShenandoahReadBarrierNode : public ShenandoahBarrierNode { @@ -114,20 +134,31 @@ virtual Node* Identity(PhaseTransform* phase); virtual int Opcode() const; + bool is_independent(Node* mem); + private: - bool is_independent(const Type* in_type, const Type* this_type) const; - bool dominates_memory_rb(PhaseTransform* phase, Node* b1, Node* b2); - bool dominates_memory_rb_impl(PhaseTransform* phase, Node* b1, Node* b2, Node* current, Unique_Node_List &visited); + static bool is_independent(const Type* in_type, const Type* this_type); + static bool dominates_memory_rb(PhaseTransform* phase, Node* b1, Node* b2, bool linear); + static bool dominates_memory_rb_impl(PhaseTransform* phase, Node* b1, Node* b2, Node* current, bool linear); }; class ShenandoahWriteBarrierNode : public ShenandoahBarrierNode { public: - ShenandoahWriteBarrierNode(Node* ctrl, Node* mem, Node* obj) - : ShenandoahBarrierNode(ctrl, mem, obj, true) { + ShenandoahWriteBarrierNode(Compile* C, Node* ctrl, Node* mem, Node* obj) + : ShenandoahBarrierNode(ctrl, mem, obj, false) { + C->add_shenandoah_barrier(this); + //tty->print("new wb: "); dump(); } virtual int Opcode() const; virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); + virtual Node* Identity(PhaseTransform* phase); + virtual bool depends_only_on_test() const { return false; } + + // virtual void set_req( uint i, Node *n ) { + // if (i == MemNode::Memory) { assert(n == Compiler::current()->immutable_memory(), "set only immutable mem on wb"); } + // Node::set_req(i, n); + // } }; class ShenandoahWBMemProjNode : public ProjNode { @@ -135,9 +166,6 @@ enum {SWBMEMPROJCON = (uint)-3}; ShenandoahWBMemProjNode(Node *src) : ProjNode( src, SWBMEMPROJCON) { assert(src->Opcode() == Op_ShenandoahWriteBarrier || src->is_Mach(), "epxect wb"); -#ifdef ASSERT - in(0)->as_ShenandoahBarrier()->check_invariants(); -#endif } virtual Node* Identity(PhaseTransform* phase); @@ -145,14 +173,14 @@ virtual bool is_CFG() const { return false; } virtual const Type *bottom_type() const {return Type::MEMORY;} virtual const TypePtr *adr_type() const { - Node* ctrl = in(0); - if (ctrl == NULL) return NULL; // node is dead - assert(ctrl->Opcode() == Op_ShenandoahWriteBarrier || ctrl->is_Mach(), "expect wb"); - return ctrl->adr_type(); + Node* wb = in(0); + if (wb == NULL || wb->is_top()) return NULL; // node is dead + assert(wb->Opcode() == Op_ShenandoahWriteBarrier || (wb->is_Mach() && wb->as_Mach()->ideal_Opcode() == Op_ShenandoahWriteBarrier), "expect wb"); + return ShenandoahBarrierNode::brooks_pointer_type(wb->bottom_type()); } virtual uint ideal_reg() const { return 0;} // memory projections don't have a register - virtual const Type *Value( PhaseTransform *phase ) const { + virtual const Type *Value(PhaseTransform* phase ) const { return bottom_type(); } #ifndef PRODUCT diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/stringopts.cpp --- a/src/share/vm/opto/stringopts.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/stringopts.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -32,6 +32,7 @@ #include "opto/idealKit.hpp" #include "opto/rootnode.hpp" #include "opto/runtime.hpp" +#include "opto/shenandoahSupport.hpp" #include "opto/stringopts.hpp" #include "opto/subnode.hpp" @@ -1378,7 +1379,10 @@ Node* count = kit.load_String_length(kit.control(), string); Node* value = kit.load_String_value (kit.control(), string); + value = kit.shenandoah_read_barrier(value); + // copy the contents + assert(!(ShenandoahBarrierNode::skip_through_barrier(value)->is_Con() && !value->is_Con()), "barrier prevents optimization"); if (offset->is_Con() && count->is_Con() && value->is_Con() && count->get_int() < unroll_string_copy_length) { // For small constant strings just emit individual stores. // A length of 6 seems like a good space/speed tradeof. @@ -1596,7 +1600,6 @@ char_alloc->maybe_set_complete(_gvn); // Now copy the string representations into the final char[] - char_array = __ shenandoah_write_barrier(char_array); Node* start = __ intcon(0); for (int argi = 0; argi < sc->num_arguments(); argi++) { Node* arg = sc->argument(argi); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/subnode.cpp --- a/src/share/vm/opto/subnode.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/subnode.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -36,6 +36,7 @@ #include "opto/phaseX.hpp" #include "opto/shenandoahSupport.hpp" #include "opto/subnode.hpp" +#include "opto/shenandoahSupport.hpp" #include "runtime/sharedRuntime.hpp" // Portions of code courtesy of Clifford Click @@ -106,8 +107,20 @@ if (t != NULL) { return t; } - const Type* t1 = phase->type(in(1)); - const Type* t2 = phase->type(in(2)); + Node* in1 = in(1); + Node* in2 = in(2); + if (Opcode() == Op_CmpP) { + Node* n = ShenandoahBarrierNode::skip_through_barrier(in1); + if (!n->is_top()) { + in1 = n; + } + n = ShenandoahBarrierNode::skip_through_barrier(in2); + if (!n->is_top()) { + in2 = n; + } + } + const Type* t1 = phase->type(in1); + const Type* t2 = phase->type(in2); return sub(t1,t2); // Local flavor of type subtraction } @@ -906,7 +919,11 @@ if (!in(2)->is_Phi() || region == in(2)->in(0)) { if (region->in(1) != NULL && region->in(2) != NULL && - region->in(1)->in(0) == region->in(2)->in(0) && + region->in(2)->is_Proj() && + region->in(2)->in(0) != NULL && + region->in(2)->in(0)->is_MemBar() && + region->in(2)->in(0)->in(0) != NULL && + region->in(1)->in(0) == region->in(2)->in(0)->in(0)->in(0) && region->in(1)->in(0)->is_If()) { Node* iff = region->in(1)->in(0); if (iff->in(1) != NULL && @@ -918,6 +935,31 @@ (!in(2)->is_Phi() || in(2)->in(1) == cmp->in(2)) && in(1)->in(2)->in(ShenandoahBarrierNode::ValueIn) == cmp->in(1) && (!in(2)->is_Phi() || in(2)->in(2)->in(ShenandoahBarrierNode::ValueIn) == cmp->in(2))) { + MemBarNode* membar = region->in(2)->in(0)->as_MemBar(); + Node* ctrl_proj = membar->proj_out(TypeFunc::Control); + Node* mem_proj = membar->proj_out(TypeFunc::Memory); + Node* rb1 = in(1)->in(2); + Node* rb2 = in(2)->is_Phi() ? in(2)->in(2) : NULL; + uint nb_rb = (rb2 == NULL) ? 1 : 2; + if (region->in(1)->outcnt() == 1 && + membar->in(0)->outcnt() == 1 && + mem_proj->outcnt() == nb_rb + 1 && + ctrl_proj->outcnt() == nb_rb + 1 && + rb1->in(ShenandoahBarrierNode::Control) == ctrl_proj && + rb1->in(ShenandoahBarrierNode::Memory) == mem_proj && + (rb2 == NULL || (rb2->in(ShenandoahBarrierNode::Control) == ctrl_proj && + rb2->in(ShenandoahBarrierNode::Memory) == mem_proj))) { + if (can_reshape) { + PhaseIterGVN* igvn = phase->is_IterGVN(); + igvn->replace_input_of(region, 2, membar->in(0)); + igvn->replace_input_of(membar, 0, phase->C->top()); + } else { + region->set_req(2, membar->in(0)); + membar->set_req(0, phase->C->top()); + phase->C->record_for_igvn(region); + phase->C->record_for_igvn(membar); + } + } return true; } } @@ -945,8 +987,11 @@ Node* uu = u->fast_out(j); if (uu->is_If() && uu->in(0) != NULL && - uu->in(0)->Opcode() == Op_IfTrue) { - Node* iff = uu->in(0)->in(0); + uu->in(0)->is_Proj() && + uu->in(0)->in(0)->is_MemBar() && + uu->in(0)->in(0)->in(0) != NULL && + uu->in(0)->in(0)->in(0)->Opcode() == Op_IfTrue) { + Node* iff = uu->in(0)->in(0)->in(0)->in(0); if (iff->in(1) != NULL && iff->in(1)->is_Bool() && iff->in(1)->as_Bool()->_test._test == BoolTest::ne && @@ -977,6 +1022,37 @@ // checking to see an unknown klass subtypes a known klass with no subtypes; // this only happens on an exact match. We can shorten this test by 1 load. Node *CmpPNode::Ideal( PhaseGVN *phase, bool can_reshape ) { + if (UseShenandoahGC) { + Node* in1 = in(1); + Node* in2 = in(2); + if (in1->bottom_type() == TypePtr::NULL_PTR || + AllocateNode::Ideal_allocation(in1, phase) != NULL) { + in2 = ShenandoahBarrierNode::skip_through_barrier(in2); + } + if (in2->bottom_type() == TypePtr::NULL_PTR || + AllocateNode::Ideal_allocation(in2, phase) != NULL) { + in1 = ShenandoahBarrierNode::skip_through_barrier(in1); + } + PhaseIterGVN* igvn = phase->is_IterGVN(); + if (in1 != in(1)) { + if (igvn != NULL) { + set_req_X(1, in1, igvn); + } else { + set_req(1, in1); + } + assert(in2 == in(2), "only one change"); + return this; + } + if (in2 != in(2)) { + if (igvn != NULL) { + set_req_X(2, in2, igvn); + } else { + set_req(2, in2); + } + return this; + } + } + // Normalize comparisons between Java mirrors into comparisons of the low- // level klass, where a dependent load could be shortened. // @@ -996,14 +1072,14 @@ if (k1 && (k2 || conk2)) { if (!UseShenandoahGC || shenandoah_optimize_java_mirror_cmp(phase, can_reshape)) { - Node* lhs = k1; - Node* rhs = (k2 != NULL) ? k2 : conk2; - this->set_req(1, lhs); - this->set_req(2, rhs); - return this; - } + Node* lhs = k1; + Node* rhs = (k2 != NULL) ? k2 : conk2; + this->set_req(1, lhs); + this->set_req(2, rhs); + return this; } } + } // Constant pointer on right? const TypeKlassPtr* t2 = phase->type(in(2))->isa_klassptr(); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/superword.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -1685,7 +1685,7 @@ Node* n_tail = n->in(LoopNode::LoopBackControl); if (n_tail != n->in(LoopNode::EntryControl)) { if (!n_tail->is_Mem()) { - assert(n_tail->is_Mem(), err_msg_res("unexpected node for memory slice: %s", n_tail->Name())); + assert(n_tail->is_Mem() || n_tail == n, err_msg_res("unexpected node for memory slice: %s", n_tail->Name())); return false; // Bailout } _mem_slice_head.push(n); @@ -2311,6 +2311,23 @@ assert(!valid(), "unsafe access"); return; } + // Detect a Shenandoah write barrier between the pre and main loop + // (which could break loop alignment code) + CountedLoopNode *main_head = slp->lp()->as_CountedLoop(); + Node* c = main_head->in(LoopNode::EntryControl)->in(0)->in(0)->in(0); + if (!c->is_CountedLoopEnd()) { + // in case of a reserve copy + c = c->in(0)->in(0); + assert(c->is_CountedLoopEnd(), "where's the pre loop?"); + } + CountedLoopEndNode* pre_end = c->as_CountedLoopEnd(); + CountedLoopNode* pre_loop = pre_end->loopnode(); + assert(pre_loop->is_pre_loop(), "where's the pre loop?"); + + Node* base_c = phase()->get_ctrl(base); + if (!phase()->is_dominator(base_c, pre_loop)) { + return; + } for (int i = 0; i < 3; i++) { if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { assert(!valid(), "too complex"); diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/superword.hpp --- a/src/share/vm/opto/superword.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/superword.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -204,6 +204,7 @@ // -----------------------------SuperWord--------------------------------- // Transforms scalar operations into packed (superword) operations. class SuperWord : public ResourceObj { + friend class SWPointer; private: PhaseIdealLoop* _phase; Arena* _arena; diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/type.cpp --- a/src/share/vm/opto/type.cpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/type.cpp Thu Dec 08 13:28:52 2016 +0100 @@ -28,6 +28,7 @@ #include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" #include "compiler/compileLog.hpp" +#include "gc_implementation/shenandoah/brooksPointer.hpp" #include "libadt/dict.hpp" #include "memory/gcLocker.hpp" #include "memory/oopFactory.hpp" @@ -2516,6 +2517,8 @@ if (_offset != 0) { if (_offset == oopDesc::klass_offset_in_bytes()) { _is_ptr_to_narrowklass = UseCompressedClassPointers; + } else if (_offset == BrooksPointer::byte_offset()) { + // Shenandoah doesn't support compressed oops } else if (klass() == NULL) { // Array with unknown body type assert(this->isa_aryptr(), "only arrays without klass"); @@ -2541,7 +2544,8 @@ assert(this->isa_instptr(), "must be an instance ptr."); _is_ptr_to_narrowoop = false; } else if (klass() == ciEnv::current()->Class_klass() && - _offset >= InstanceMirrorKlass::offset_of_static_fields()) { + _offset >= InstanceMirrorKlass::offset_of_static_fields() && + !UseShenandoahGC) { // Static fields assert(o != NULL, "must be constant"); ciInstanceKlass* k = o->as_instance()->java_lang_Class_klass()->as_instance_klass(); @@ -2597,6 +2601,10 @@ return this; } +const TypeOopPtr *TypeOopPtr::cast_to_nonconst() const { + return this; +} + //-----------------------------cast_to_exactness------------------------------- const Type *TypeOopPtr::cast_to_exactness(bool klass_is_exact) const { // There is no such thing as an exact general oop. @@ -3254,6 +3262,11 @@ return make(_ptr, klass(), _klass_is_exact, const_oop(), _offset, instance_id, _speculative, _inline_depth); } +const TypeOopPtr *TypeInstPtr::cast_to_nonconst() const { + if (const_oop() == NULL) return this; + return make(NotNull, klass(), _klass_is_exact, NULL, _offset, _instance_id, _speculative, _inline_depth); +} + //------------------------------xmeet_unloaded--------------------------------- // Compute the MEET of two InstPtrs when at least one is unloaded. // Assume classes are different since called after check for same name/class-loader @@ -3772,6 +3785,12 @@ return make(_ptr, const_oop(), _ary, klass(), _klass_is_exact, _offset, instance_id, _speculative, _inline_depth); } +const TypeOopPtr *TypeAryPtr::cast_to_nonconst() const { + if (const_oop() == NULL) return this; + return make(NotNull, NULL, _ary, klass(), _klass_is_exact, _offset, _instance_id, _speculative, _inline_depth); +} + + //-----------------------------narrow_size_type------------------------------- // Local cache for arrayOopDesc::max_array_length(etype), // which is kind of slow (and cached elsewhere by other users). diff -r db98996d26b2 -r da17b9cffd4f src/share/vm/opto/type.hpp --- a/src/share/vm/opto/type.hpp Thu Dec 08 17:48:03 2016 +0100 +++ b/src/share/vm/opto/type.hpp Thu Dec 08 13:28:52 2016 +0100 @@ -970,6 +970,8 @@ virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const; + virtual const TypeOopPtr *cast_to_nonconst() const; + // corresponding pointer to klass, for a given instance const TypeKlassPtr* as_klass_type() const; @@ -1061,6 +1063,8 @@ virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const; + virtual const TypeOopPtr *cast_to_nonconst() const; + virtual const TypePtr *add_offset( intptr_t offset ) const; // Return same type without a speculative part virtual const Type* remove_speculative() const; @@ -1136,6 +1140,8 @@ virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const; + virtual const TypeOopPtr *cast_to_nonconst() const; + virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const; virtual const TypeInt* narrow_size_type(const TypeInt* size) const; @@ -1646,7 +1652,7 @@ } inline const TypeOopPtr* Type::make_oopptr() const { - return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype()->is_oopptr() : is_oopptr(); + return (_base == NarrowOop) ? is_narrowoop()->get_ptrtype()->is_oopptr() : isa_oopptr(); } inline const TypeNarrowOop* Type::make_narrowoop() const { changeset: 9481:88c8ad7d034b user: rkennke date: Mon Dec 12 17:03:12 2016 +0100 summary: Added missing include of oop closures. Fixes linking problem. diff -r da17b9cffd4f -r 88c8ad7d034b src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Thu Dec 08 13:28:52 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Dec 12 17:03:12 2016 +0100 @@ -29,6 +29,7 @@ #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahRootProcessor.hpp" #include "gc_implementation/shenandoah/shenandoah_specialized_oop_closures.hpp" +#include "gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "memory/referenceProcessor.hpp" #include "gc_implementation/shenandoah/shenandoahTaskqueue.hpp" changeset: 9482:cb8a8ef885c3 user: rkennke date: Thu Dec 15 17:10:37 2016 +0100 summary: Prevent C2 optimization that turns oop arraycopy into int arraycopy and elide the required post-barrier. diff -r 88c8ad7d034b -r cb8a8ef885c3 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Mon Dec 12 17:03:12 2016 +0100 +++ b/src/share/vm/opto/library_call.cpp Thu Dec 15 17:10:37 2016 +0100 @@ -5322,7 +5322,7 @@ // At this point we know we do not need type checks on oop stores. // Let's see if we need card marks: - if (alloc != NULL && use_ReduceInitialCardMarks()) { + if (alloc != NULL && use_ReduceInitialCardMarks() && ! UseShenandoahGC) { // If we do not need card marks, copy using the jint or jlong stub. copy_type = LP64_ONLY(UseCompressedOops ? T_INT : T_LONG) NOT_LP64(T_INT); assert(type2aelembytes(basic_elem_type) == type2aelembytes(copy_type), changeset: 9483:91b6e4811a5f user: rkennke date: Mon Dec 19 12:05:22 2016 +0100 summary: Ensure metadata alive for Shenandoah too. diff -r cb8a8ef885c3 -r 91b6e4811a5f src/share/vm/ci/ciObjectFactory.cpp --- a/src/share/vm/ci/ciObjectFactory.cpp Thu Dec 15 17:10:37 2016 +0100 +++ b/src/share/vm/ci/ciObjectFactory.cpp Mon Dec 19 12:05:22 2016 +0100 @@ -403,7 +403,7 @@ ASSERT_IN_VM; // We're handling raw oops here. #if INCLUDE_ALL_GCS - if (!UseG1GC) { + if (!(UseG1GC || UseShenandoahGC)) { return; } Klass* metadata_owner_klass; changeset: 9484:c7ccb4a2b360 user: rkennke date: Mon Dec 19 15:17:46 2016 +0100 summary: Added missing read-barrier to inline_unsafe_ordered_store() in C2 intrinsics. diff -r 91b6e4811a5f -r c7ccb4a2b360 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Mon Dec 19 12:05:22 2016 +0100 +++ b/src/share/vm/opto/library_call.cpp Mon Dec 19 15:17:46 2016 +0100 @@ -3199,9 +3199,10 @@ // Ensure that the store is atomic for longs: const bool require_atomic_access = true; Node* store; - if (type == T_OBJECT) // reference stores need a store barrier. + if (type == T_OBJECT) { // reference stores need a store barrier. + val = shenandoah_read_barrier_storeval(val); store = store_oop_to_unknown(control(), base, adr, adr_type, val, type, MemNode::release); - else { + } else { store = store_to_memory(control(), adr, val, type, adr_type, MemNode::release, require_atomic_access); } insert_mem_bar(Op_MemBarCPUOrder); changeset: 9485:eb39f84890cb user: rkennke date: Mon Dec 19 17:33:12 2016 +0100 summary: Add missing eq barrier in opto runtime. diff -r c7ccb4a2b360 -r eb39f84890cb src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Mon Dec 19 15:17:46 2016 +0100 +++ b/src/share/vm/opto/runtime.cpp Mon Dec 19 17:33:12 2016 +0100 @@ -1252,7 +1252,7 @@ // Update the exception cache only when the unwind was not forced // and there didn't happen another exception during the computation of the // compiled exception handler. - if (!force_unwind && original_exception() == exception()) { + if (!force_unwind && oopDesc::equals(original_exception(), exception())) { nm->add_handler_for_exception_and_pc(exception,pc,handler_address); } } else { changeset: 9486:05f696d8443b user: roland date: Tue Dec 20 11:03:57 2016 +0100 summary: null check bypasses read barrier diff -r eb39f84890cb -r 05f696d8443b src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Mon Dec 19 17:33:12 2016 +0100 +++ b/src/share/vm/opto/compile.cpp Tue Dec 20 11:03:57 2016 +0100 @@ -2906,7 +2906,7 @@ Node *m = wq.at(next); for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) { Node* use = m->fast_out(i); - if (use->is_Mem() || use->is_EncodeNarrowPtr()) { + if (use->is_Mem() || use->is_EncodeNarrowPtr() || use->is_ShenandoahBarrier()) { use->ensure_control_or_add_prec(n->in(0)); } else if (use->in(0) == NULL) { switch(use->Opcode()) { changeset: 9487:b9bba0d6458d user: roland date: Tue Dec 20 12:44:01 2016 +0100 summary: read barrier in unsafe can break C2 graph diff -r 05f696d8443b -r b9bba0d6458d src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Tue Dec 20 11:03:57 2016 +0100 +++ b/src/share/vm/opto/library_call.cpp Tue Dec 20 12:44:01 2016 +0100 @@ -2806,8 +2806,8 @@ __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); { // Sync IdealKit and graphKit. sync_kit(ideal); - val = shenandoah_read_barrier_storeval(val); - Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo); + Node* rb = shenandoah_read_barrier_storeval(val); + Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, rb, type, mo); // Update IdealKit memory. __ sync_kit(this); } __ else_(); { changeset: 9488:4ba3e50858e2 user: roland date: Tue Dec 20 14:29:46 2016 +0100 summary: add back accidentally dropped write barriers in GraphKit::store_String_* diff -r b9bba0d6458d -r 4ba3e50858e2 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Tue Dec 20 12:44:01 2016 +0100 +++ b/src/share/vm/opto/graphKit.cpp Tue Dec 20 14:29:46 2016 +0100 @@ -4181,6 +4181,9 @@ false, NULL, 0); const TypePtr* offset_field_type = string_type->add_offset(offset_offset); int offset_field_idx = C->get_alias_index(offset_field_type); + + str = shenandoah_write_barrier(str); + store_to_memory(ctrl, basic_plus_adr(str, offset_offset), value, T_INT, offset_field_idx, MemNode::unordered); } @@ -4204,6 +4207,9 @@ false, NULL, 0); const TypePtr* count_field_type = string_type->add_offset(count_offset); int count_field_idx = C->get_alias_index(count_field_type); + + str = shenandoah_write_barrier(str); + store_to_memory(ctrl, basic_plus_adr(str, count_offset), value, T_INT, count_field_idx, MemNode::unordered); } changeset: 9489:9ba353933d12 user: rkennke date: Wed Dec 21 19:27:57 2016 +0100 summary: Fix freeze on OOM-on-evac regarding the PLL. diff -r 4ba3e50858e2 -r 9ba353933d12 src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Tue Dec 20 14:29:46 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Wed Dec 21 19:27:57 2016 +0100 @@ -191,5 +191,13 @@ } } + if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_ReferenceProcessor_oops_do)) { + // Evacuate the PLL here so that the SurrogateLockerThread doesn't + // have to. If the SLT runs into OOM during evacuation, the + // ShenandoahConcurrentThread cannot get back from VMThread::execute() + // and therefore never turn off _evacuation_in_progress -> deadlock. + oop pll = java_lang_ref_Reference::pending_list_lock(); + oopDesc::bs()->write_barrier(pll); + } _process_strong_tasks.all_tasks_completed(); } diff -r 4ba3e50858e2 -r 9ba353933d12 src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp Tue Dec 20 14:29:46 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp Wed Dec 21 19:27:57 2016 +0100 @@ -52,6 +52,7 @@ SHENANDOAH_RP_PS_ClassLoaderDataGraph_oops_do, SHENANDOAH_RP_PS_jvmti_oops_do, SHENANDOAH_RP_PS_CodeCache_oops_do, + SHENANDOAH_RP_PS_ReferenceProcessor_oops_do, // Leave this one last. SHENANDOAH_RP_PS_NumElements }; diff -r 4ba3e50858e2 -r 9ba353933d12 src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Tue Dec 20 14:29:46 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Dec 21 19:27:57 2016 +0100 @@ -122,6 +122,9 @@ sh->evacuate_and_update_roots(); sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_evac); + if (sh->cancelled_concgc()) { + sh->set_evacuation_in_progress(false); + } } else { GCTraceTime time("Cancel concurrent Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); sh->concurrentMark()->cancel(); changeset: 9490:9fe66b8f9d19 user: rkennke date: Wed Jan 04 13:09:12 2017 +0100 summary: Avoid evacuation if concurrent GC was cancelled. Make sure Full GC is able to recover. diff -r 9ba353933d12 -r 9fe66b8f9d19 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Dec 21 19:27:57 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Jan 04 13:09:12 2017 +0100 @@ -528,6 +528,7 @@ _phase_names[resize_tlabs] = "Resize TLABs"; _phase_names[full_gc] = "Full GC Times"; + _phase_names[full_gc_prepare] = " Prepare"; _phase_names[full_gc_mark] = " Mark"; _phase_names[full_gc_mark_drain_queues] = " Drain Queues"; _phase_names[full_gc_mark_weakrefs] = " Weak References"; diff -r 9ba353933d12 -r 9fe66b8f9d19 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Dec 21 19:27:57 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Jan 04 13:09:12 2017 +0100 @@ -101,6 +101,7 @@ reset_bitmaps, full_gc, + full_gc_prepare, full_gc_mark, full_gc_mark_drain_queues, full_gc_mark_weakrefs, diff -r 9ba353933d12 -r 9fe66b8f9d19 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Dec 21 19:27:57 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:09:12 2017 +0100 @@ -61,75 +61,13 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); - GCTimer* gc_timer = heap->gc_timer(); - GCTracer* gc_tracer = heap->shenandoahPolicy()->tracer(); while (! _should_terminate) { if (_do_full_gc) { - { - if (_full_gc_cause == GCCause::_allocation_failure) { - heap->shenandoahPolicy()->record_allocation_failure_gc(); - } else { - heap->shenandoahPolicy()->record_user_requested_gc(); - } - - TraceCollectorStats tcs(heap->monitoring_support()->full_collection_counters()); - TraceMemoryManagerStats tmms(true, _full_gc_cause); - VM_ShenandoahFullGC full_gc(_full_gc_cause); - VMThread::execute(&full_gc); - } - MonitorLockerEx ml(&_full_gc_lock); - _do_full_gc = false; - ml.notify_all(); - } else if (heap->shenandoahPolicy()->should_start_concurrent_mark(heap->used(), - heap->capacity())) - { - - gc_timer->register_gc_start(); - - heap->shenandoahPolicy()->increase_cycle_counter(); - - TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); - TraceMemoryManagerStats tmms(false, GCCause::_no_cause_specified); - - { - TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); - VM_ShenandoahInitMark initMark; - heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::init_mark_gross); - VMThread::execute(&initMark); - heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_mark_gross); - } - { - // GCTraceTime time("Concurrent marking", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); - TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); - ShenandoahHeap::heap()->concurrentMark()->mark_from_roots(); - } - - { - TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); - VM_ShenandoahStartEvacuation finishMark; - heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::final_mark_gross); - VMThread::execute(&finishMark); - heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::final_mark_gross); - } - - if (! _should_terminate) { - // GCTraceTime time("Concurrent evacuation ", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); - TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); - heap->do_evacuation(); - } - - if (heap->is_evacuation_in_progress()) { - MutexLocker mu(Threads_lock); - heap->set_evacuation_in_progress(false); - } - heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); - heap->reset_next_mark_bitmap(heap->conc_workers()); - heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::reset_bitmaps); - - gc_timer->register_gc_end(); - } else { - Thread::current()->_ParkEvent->park(10) ; - // yield(); + service_fullgc_cycle(); + } else if (heap->shenandoahPolicy()->should_start_concurrent_mark(heap->used(), heap->capacity())) { + service_normal_cycle(); + } else { + Thread::current()->_ParkEvent->park(10); } // Make sure the _do_full_gc flag changes are seen. @@ -138,6 +76,83 @@ terminate(); } +void ShenandoahConcurrentThread::service_normal_cycle() { + ShenandoahHeap* heap = ShenandoahHeap::heap(); + + GCTimer* gc_timer = heap->gc_timer(); + GCTracer* gc_tracer = heap->tracer(); + + gc_timer->register_gc_start(); + + heap->shenandoahPolicy()->increase_cycle_counter(); + + TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); + TraceMemoryManagerStats tmms(false, GCCause::_no_cause_specified); + + // Start initial mark under STW: + { + TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); + VM_ShenandoahInitMark initMark; + heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::init_mark_gross); + VMThread::execute(&initMark); + heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_mark_gross); + } + + if (check_cancellation()) return; + + // Continue concurrent mark: + { + // GCTraceTime time("Concurrent marking", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); + TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); + ShenandoahHeap::heap()->concurrentMark()->mark_from_roots(); + } + + if (check_cancellation()) return; + + // Proceed to complete marking under STW, and start evacuation: + { + TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); + VM_ShenandoahStartEvacuation finishMark; + heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::final_mark_gross); + VMThread::execute(&finishMark); + heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::final_mark_gross); + } + + if (check_cancellation()) return; + + // Continue concurrent evacuation: + { + // GCTraceTime time("Concurrent evacuation ", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); + TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); + heap->do_evacuation(); + } + + if (check_cancellation()) return; + + // Prepare for the next normal cycle: + if (heap->is_evacuation_in_progress()) { + MutexLocker mu(Threads_lock); + heap->set_evacuation_in_progress(false); + } + + heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); + heap->reset_next_mark_bitmap(heap->conc_workers()); + heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::reset_bitmaps); + + gc_timer->register_gc_end(); +} + +bool ShenandoahConcurrentThread::check_cancellation() { + ShenandoahHeap* heap = ShenandoahHeap::heap(); + if (heap->cancelled_concgc() || _should_terminate) { + assert (_do_full_gc || _should_terminate, "Either exiting, or impending Full GC"); + heap->gc_timer()->register_gc_end(); + return true; + } + return false; +} + + void ShenandoahConcurrentThread::stop() { { MutexLockerEx ml(Terminator_lock); @@ -157,6 +172,26 @@ } } +void ShenandoahConcurrentThread::service_fullgc_cycle() { + ShenandoahHeap* heap = ShenandoahHeap::heap(); + + { + if (_full_gc_cause == GCCause::_allocation_failure) { + heap->shenandoahPolicy()->record_allocation_failure_gc(); + } else { + heap->shenandoahPolicy()->record_user_requested_gc(); + } + + TraceCollectorStats tcs(heap->monitoring_support()->full_collection_counters()); + TraceMemoryManagerStats tmms(true, _full_gc_cause); + VM_ShenandoahFullGC full_gc(_full_gc_cause); + VMThread::execute(&full_gc); + } + MonitorLockerEx ml(&_full_gc_lock); + _do_full_gc = false; + ml.notify_all(); +} + void ShenandoahConcurrentThread::do_full_gc(GCCause::Cause cause) { assert(Thread::current()->is_Java_thread(), "expect Java thread here"); diff -r 9ba353933d12 -r 9fe66b8f9d19 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp Wed Dec 21 19:27:57 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp Wed Jan 04 13:09:12 2017 +0100 @@ -50,7 +50,11 @@ void sleepBeforeNextCycle(); - public: + bool check_cancellation(); + void service_normal_cycle(); + void service_fullgc_cycle(); + +public: // Constructor ShenandoahConcurrentThread(); ~ShenandoahConcurrentThread(); diff -r 9ba353933d12 -r 9fe66b8f9d19 src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Dec 21 19:27:57 2016 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 04 13:09:12 2017 +0100 @@ -98,12 +98,30 @@ IsGCActiveMark is_active; assert(Thread::current()->is_VM_thread(), "Do full GC only while world is stopped"); - assert(_heap->is_next_bitmap_clear(), "require cleared bitmap"); - assert(!_heap->concurrent_mark_in_progress(), "can't do full-GC while marking is in progress"); - assert(!_heap->is_evacuation_in_progress(), "can't do full-GC while evacuation is in progress"); policy->record_phase_start(ShenandoahCollectorPolicy::full_gc); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_prepare); + + // Full GC is supposed to recover from any GC state: + + // a. Cancel concurrent mark, if in progress + if (_heap->concurrent_mark_in_progress()) { + _heap->concurrentMark()->cancel(); + _heap->stop_concurrent_marking(); + } + assert(!_heap->concurrent_mark_in_progress(), "sanity"); + + // b. Cancel evacuation, if in progress + if (_heap->is_evacuation_in_progress()) { + _heap->set_evacuation_in_progress(false); + } + assert(!_heap->is_evacuation_in_progress(), "sanity"); + + // c. Reset the bitmaps for new marking + _heap->reset_next_mark_bitmap(_heap->workers()); + assert(_heap->is_next_bitmap_clear(), "sanity"); + ClearInCollectionSetHeapRegionClosure cl; _heap->heap_region_iterate(&cl, false, false); @@ -120,6 +138,8 @@ ShenandoahMarkCompactBarrierSet bs(_heap); oopDesc::set_bs(&bs); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_prepare); + { GCTraceTime time("Pause Full", ShenandoahLogInfo, true, _gc_timer, _heap->tracer()->gc_id()); diff -r 9ba353933d12 -r 9fe66b8f9d19 src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Wed Dec 21 19:27:57 2016 +0100 +++ b/src/share/vm/runtime/thread.cpp Wed Jan 04 13:09:12 2017 +0100 @@ -1976,7 +1976,7 @@ } void JavaThread::set_evacuation_in_progress_all_threads(bool in_prog) { - assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); + assert_locked_or_safepoint(Threads_lock); _evacuation_in_progress_global = in_prog; for (JavaThread* t = Threads::first(); t != NULL; t = t->next()) { t->set_evacuation_in_progress(in_prog); changeset: 9491:1def7a9a30be user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Fix TLAB flapping. Do not reply with MinTLABSize if we have no space left in current region, make allocator to ask for another region. diff -r 9fe66b8f9d19 -r 1def7a9a30be src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:12 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -1387,12 +1387,15 @@ size_t ShenandoahHeap::unsafe_max_tlab_alloc(Thread *thread) const { size_t idx = _free_regions->current_index(); ShenandoahHeapRegion* current = _free_regions->get(idx); - if (current == NULL) + if (current == NULL) { return 0; - else if (current->free() > MinTLABSize) { + } else if (current->free() > MinTLABSize) { + // Current region has enough space left, can use it. return current->free(); } else { - return MinTLABSize; + // No more space in current region, we will take next free region + // on the next TLAB allocation. + return ShenandoahHeapRegion::RegionSizeBytes; } } diff -r 9fe66b8f9d19 -r 1def7a9a30be src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Wed Jan 04 13:09:12 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -290,6 +290,9 @@ if (ShenandoahMinRegionSize < MIN_REGION_SIZE) { vm_exit_during_initialization("Invalid -XX:ShenandoahMinRegionSize option"); } + if (ShenandoahMinRegionSize < MinTLABSize) { + vm_exit_during_initialization("Invalid -XX:ShenandoahMinRegionSize option"); + } if (ShenandoahMaxRegionSize < MIN_REGION_SIZE) { vm_exit_during_initialization("Invalid -XX:ShenandoahMaxRegionSize option"); } changeset: 9492:c07dbebf60f9 user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Fix object initialization in C2 diff -r 1def7a9a30be -r c07dbebf60f9 src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/opto/macro.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -1287,6 +1287,7 @@ transform_later(old_eden_top); // Add to heap top to get a new heap top + Node* init_size_in_bytes = size_in_bytes; if (UseShenandoahGC) { // Allocate several words more for the Shenandoah brooks pointer. size_in_bytes = new (C) AddLNode(size_in_bytes, _igvn.MakeConX(BrooksPointer::byte_size())); @@ -1392,7 +1393,7 @@ InitializeNode* init = alloc->initialization(); fast_oop_rawmem = initialize_object(alloc, fast_oop_ctrl, fast_oop_rawmem, fast_oop, - klass_node, length, size_in_bytes); + klass_node, length, init_size_in_bytes); // If initialization is performed by an array copy, any required // MemBarStoreStore was already added. If the object does not changeset: 9493:a4b8d20c15ef user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: C1 cleanup diff -r c07dbebf60f9 -r a4b8d20c15ef src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -34,9 +34,6 @@ #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" -#include "gc_implementation/shenandoah/brooksPointer.hpp" -#include "gc_implementation/shenandoah/shenandoahHeap.hpp" -#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" #include "gc_interface/collectedHeap.hpp" #include "memory/barrierSet.hpp" #include "memory/cardTableModRefBS.hpp" @@ -861,7 +858,7 @@ if (UseCompressedOops && !wide) { __ strw(compressed_src, as_Address(to_addr, rscratch2)); } else { - __ str(compressed_src, as_Address(to_addr)); + __ str(compressed_src, as_Address(to_addr)); } break; case T_METADATA: @@ -1688,6 +1685,7 @@ // Return 1 in rscratch1 if the CAS fails. void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + assert(VM_Version::supports_cx8(), "wrong machine"); Register addr = as_reg(op->addr()); Register newval = as_reg(op->new_value()); Register cmpval = as_reg(op->cmp_value()); diff -r c07dbebf60f9 -r a4b8d20c15ef src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -1038,7 +1038,6 @@ LIR_Address::times_1, offset, T_BYTE); - BasicTypeList signature(3); signature.append(T_INT); signature.append(T_ADDRESS); diff -r c07dbebf60f9 -r a4b8d20c15ef src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -133,7 +133,6 @@ if (UseBiasedLocking) { // load object ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - biased_locking_exit(obj, hdr, done); } diff -r c07dbebf60f9 -r a4b8d20c15ef src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -32,9 +32,6 @@ #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" -#include "gc_implementation/shenandoah/brooksPointer.hpp" -#include "gc_implementation/shenandoah/shenandoahHeap.hpp" -#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" #include "gc_interface/collectedHeap.hpp" #include "memory/barrierSet.hpp" #include "memory/cardTableModRefBS.hpp" @@ -1523,9 +1520,6 @@ Label done; Register obj = op->in_opr()->as_register(); Register res = op->result_opr()->as_register(); - Register tmp1 = op->tmp1_opr()->as_register(); - Register tmp2 = op->tmp2_opr()->as_register(); - assert_different_registers(res, tmp1, tmp2); if (res != obj) { __ mov(res, obj); diff -r c07dbebf60f9 -r a4b8d20c15ef src/share/vm/c1/c1_LIR.cpp --- a/src/share/vm/c1/c1_LIR.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/c1/c1_LIR.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -1017,8 +1017,6 @@ LIR_OpShenandoahWriteBarrier* opShenandoahWB = (LIR_OpShenandoahWriteBarrier*) op; do_input(opShenandoahWB->_opr); do_output(opShenandoahWB->_result); - do_temp(opShenandoahWB->_tmp1); - do_temp(opShenandoahWB->_tmp2); break; } default: @@ -1852,8 +1850,6 @@ void LIR_OpShenandoahWriteBarrier::print_instr(outputStream* out) const { out->print("[obj: "); in_opr()->print(out); out->print("]"); out->print("[res: "); result_opr()->print(out); out->print("]"); - out->print("[tmp1: "); tmp1_opr()->print(out); out->print("]"); - out->print("[tmp2: "); tmp2_opr()->print(out); out->print("]"); } // LIR_OpJavaCall diff -r c07dbebf60f9 -r a4b8d20c15ef src/share/vm/c1/c1_LIR.hpp --- a/src/share/vm/c1/c1_LIR.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/c1/c1_LIR.hpp Wed Jan 04 13:09:48 2017 +0100 @@ -1473,14 +1473,10 @@ private: bool _need_null_check; - LIR_Opr _tmp1; - LIR_Opr _tmp2; public: - LIR_OpShenandoahWriteBarrier(LIR_Opr obj, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2, CodeEmitInfo* info, bool need_null_check) : LIR_Op1(lir_shenandoah_wb, obj, result, T_OBJECT, lir_patch_none, info), _tmp1(tmp1), _tmp2(tmp2), _need_null_check(need_null_check) { + LIR_OpShenandoahWriteBarrier(LIR_Opr obj, LIR_Opr result, CodeEmitInfo* info, bool need_null_check) : LIR_Op1(lir_shenandoah_wb, obj, result, T_OBJECT, lir_patch_none, info), _need_null_check(need_null_check) { } - LIR_Opr tmp1_opr() const { return _tmp1; } - LIR_Opr tmp2_opr() const { return _tmp2; } bool need_null_check() const { return _need_null_check; } virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpShenandoahWriteBarrier* as_OpShenandoahWriteBarrier() { return this; } @@ -2175,7 +2171,7 @@ void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } #endif - void shenandoah_wb(LIR_Opr obj, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2, CodeEmitInfo* info, bool need_null_check) { append(new LIR_OpShenandoahWriteBarrier(obj, result, tmp1, tmp2, info, need_null_check)); } + void shenandoah_wb(LIR_Opr obj, LIR_Opr result, CodeEmitInfo* info, bool need_null_check) { append(new LIR_OpShenandoahWriteBarrier(obj, result, info, need_null_check)); } void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } diff -r c07dbebf60f9 -r a4b8d20c15ef src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -1862,9 +1862,7 @@ if (UseShenandoahGC) { LIR_Opr result = new_register(T_OBJECT); - LIR_Opr tmp1 = new_register(T_INT); - LIR_Opr tmp2 = new_register(T_INT); - __ shenandoah_wb(obj, result, tmp1, tmp2, info ? new CodeEmitInfo(info) : NULL, need_null_check); + __ shenandoah_wb(obj, result, info ? new CodeEmitInfo(info) : NULL, need_null_check); return result; } else { changeset: 9494:6fb2ed4e97b9 user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Fix shutdown/cancelled races. diff -r a4b8d20c15ef -r 6fb2ed4e97b9 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -37,7 +37,8 @@ ShenandoahConcurrentThread::ShenandoahConcurrentThread() : ConcurrentGCThread(), _full_gc_lock(Mutex::leaf, "ShenandoahFullGC_lock", true), - _do_full_gc(false) + _do_full_gc(false), + _graceful_shutdown(0) { create_and_start(); } @@ -62,7 +63,9 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); while (! _should_terminate) { - if (_do_full_gc) { + if (in_graceful_shutdown()) { + break; + } else if (_do_full_gc) { service_fullgc_cycle(); } else if (heap->shenandoahPolicy()->should_start_concurrent_mark(heap->used(), heap->capacity())) { service_normal_cycle(); @@ -73,10 +76,17 @@ // Make sure the _do_full_gc flag changes are seen. OrderAccess::storeload(); } + + // Wait for the actual stop(), can't leave run_service() earlier. + while (! _should_terminate) { + Thread::current()->_ParkEvent->park(10); + } terminate(); } void ShenandoahConcurrentThread::service_normal_cycle() { + if (check_cancellation()) return; + ShenandoahHeap* heap = ShenandoahHeap::heap(); GCTimer* gc_timer = heap->gc_timer(); @@ -144,8 +154,8 @@ bool ShenandoahConcurrentThread::check_cancellation() { ShenandoahHeap* heap = ShenandoahHeap::heap(); - if (heap->cancelled_concgc() || _should_terminate) { - assert (_do_full_gc || _should_terminate, "Either exiting, or impending Full GC"); + if (heap->cancelled_concgc()) { + assert (_do_full_gc || in_graceful_shutdown(), "Cancel GC either for Full GC, or gracefully exiting"); heap->gc_timer()->register_gc_end(); return true; } @@ -199,6 +209,10 @@ MonitorLockerEx ml(&_full_gc_lock); schedule_full_gc(); _full_gc_cause = cause; + + // Now that full GC is scheduled, we can abort everything else + ShenandoahHeap::heap()->cancel_concgc(cause); + while (_do_full_gc) { ml.wait(); OrderAccess::storeload(); @@ -234,3 +248,11 @@ assert(_slt == NULL, "SLT already created"); _slt = SurrogateLockerThread::make(THREAD); } + +void ShenandoahConcurrentThread::prepare_for_graceful_shutdown() { + OrderAccess::release_store_fence(&_graceful_shutdown, 1); +} + +bool ShenandoahConcurrentThread::in_graceful_shutdown() { + return OrderAccess::load_acquire(&_graceful_shutdown) == 1; +} diff -r a4b8d20c15ef -r 6fb2ed4e97b9 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp Wed Jan 04 13:09:48 2017 +0100 @@ -46,6 +46,7 @@ private: bool _do_full_gc; + volatile jbyte _graceful_shutdown; GCCause::Cause _full_gc_cause; void sleepBeforeNextCycle(); @@ -72,10 +73,9 @@ char* name() const { return (char*)"ShenandoahConcurrentThread";} void start(); - void yield(); - static void safepoint_synchronize(); - static void safepoint_desynchronize(); + void prepare_for_graceful_shutdown(); + bool in_graceful_shutdown(); }; #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHCONCURRENTTHREAD_HPP diff -r a4b8d20c15ef -r 6fb2ed4e97b9 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -1271,64 +1271,53 @@ } void ShenandoahHeap::parallel_evacuate() { + log_develop_trace(gc)("starting parallel_evacuate"); - if (! cancelled_concgc()) { + _shenandoah_policy->record_phase_start(ShenandoahCollectorPolicy::conc_evac); - log_develop_trace(gc)("starting parallel_evacuate"); + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print("Printing all available regions"); + print_heap_regions(out); + } - _shenandoah_policy->record_phase_start(ShenandoahCollectorPolicy::conc_evac); + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print("Printing collection set which contains "SIZE_FORMAT" regions:\n", _collection_set->count()); + _collection_set->print(out); - if (ShenandoahLogTrace) { - ResourceMark rm; - outputStream* out = gclog_or_tty; - out->print("Printing all available regions"); - print_heap_regions(out); - } + out->print("Printing free set which contains "SIZE_FORMAT" regions:\n", _free_regions->count()); + _free_regions->print(out); + } - if (ShenandoahLogTrace) { - ResourceMark rm; - outputStream* out = gclog_or_tty; - out->print("Printing collection set which contains "SIZE_FORMAT" regions:\n", _collection_set->count()); - _collection_set->print(out); + ParallelEvacuationTask evacuationTask = ParallelEvacuationTask(this, _collection_set); - out->print("Printing free set which contains "SIZE_FORMAT" regions:\n", _free_regions->count()); - _free_regions->print(out); - } + conc_workers()->run_task(&evacuationTask); - ParallelEvacuationTask evacuationTask = ParallelEvacuationTask(this, _collection_set); + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print("Printing postgc collection set which contains "SIZE_FORMAT" regions:\n", + _collection_set->count()); - conc_workers()->run_task(&evacuationTask); + _collection_set->print(out); - if (ShenandoahLogTrace) { - ResourceMark rm; - outputStream* out = gclog_or_tty; - out->print("Printing postgc collection set which contains "SIZE_FORMAT" regions:\n", - _collection_set->count()); + out->print("Printing postgc free regions which contain "SIZE_FORMAT" free regions:\n", + _free_regions->count()); + _free_regions->print(out); - _collection_set->print(out); + } - out->print("Printing postgc free regions which contain "SIZE_FORMAT" free regions:\n", - _free_regions->count()); - _free_regions->print(out); + if (ShenandoahLogTrace) { + ResourceMark rm; + outputStream* out = gclog_or_tty; + out->print_cr("all regions after evacuation:"); + print_heap_regions(out); + } - } - - if (ShenandoahLogTrace) { - ResourceMark rm; - outputStream* out = gclog_or_tty; - out->print_cr("all regions after evacuation:"); - print_heap_regions(out); - } - - _shenandoah_policy->record_phase_end(ShenandoahCollectorPolicy::conc_evac); - - if (cancelled_concgc()) { - // tty->print("GOTCHA: by thread %d", Thread::current()->osthread()->thread_id()); - concurrent_thread()->schedule_full_gc(); - // tty->print("PostGotcha: by thread %d FullGC should be scheduled\n", - // Thread::current()->osthread()->thread_id()); - } - } + _shenandoah_policy->record_phase_end(ShenandoahCollectorPolicy::conc_evac); } class VerifyEvacuationClosure: public ExtendedOopClosure { @@ -1463,14 +1452,11 @@ assert(cause != GCCause::_gc_locker, "no JNI critical callback"); if (GCCause::is_user_requested_gc(cause)) { if (! DisableExplicitGC) { - cancel_concgc(cause); _concurrent_gc_thread->do_full_gc(cause); } } else if (cause == GCCause::_allocation_failure) { - cancel_concgc(cause); collector_policy()->set_should_clear_all_soft_refs(true); - _concurrent_gc_thread->do_full_gc(cause); - + _concurrent_gc_thread->do_full_gc(cause); } } @@ -2068,11 +2054,18 @@ } void ShenandoahHeap::stop() { - // We set this early here, to let GC threads terminate before we ask the concurrent thread - // to terminate, which would otherwise block until all GC threads come to finish normally. - set_cancelled_concgc(true); + // The shutdown sequence should be able to terminate when GC is running. + + // Step 1. Notify control thread that we are in shutdown. + // Note that we cannot do that with stop(), because stop() is blocking and waits for the actual shutdown. + // Doing stop() here would wait for the normal GC cycle to complete, never falling through to cancel below. + _concurrent_gc_thread->prepare_for_graceful_shutdown(); + + // Step 2. Notify GC workers that we are cancelling GC. + cancel_concgc(_vm_stop); + + // Step 3. Wait until GC worker exits normally. _concurrent_gc_thread->stop(); - cancel_concgc(_vm_stop); } void ShenandoahHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) { diff -r a4b8d20c15ef -r 6fb2ed4e97b9 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 04 13:09:48 2017 +0100 @@ -366,6 +366,9 @@ void swap_mark_bitmaps(); + void cancel_concgc(GCCause::Cause cause); + void cancel_concgc(ShenandoahCancelCause cause); + private: HeapWord* allocate_new_tlab(size_t word_size, bool mark); HeapWord* allocate_memory(size_t word_size, bool evacuating); @@ -417,8 +420,6 @@ void set_concurrent_mark_in_progress(bool in_progress); void oom_during_evacuation(); - void cancel_concgc(GCCause::Cause cause); - void cancel_concgc(ShenandoahCancelCause cause); inline void set_cancelled_concgc(bool v); void verify_live(); changeset: 9495:456fcbf22594 user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Heap dump support diff -r 6fb2ed4e97b9 -r 456fcbf22594 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -1624,8 +1624,64 @@ heap_region_iterate(&blk, false, true); } +class ShenandoahSafeObjectIterateAdjustPtrsClosure : public MetadataAwareOopClosure { +private: + ShenandoahHeap* _heap; + +public: + ShenandoahSafeObjectIterateAdjustPtrsClosure() : _heap(ShenandoahHeap::heap()) {} + +private: + template + inline void do_oop_work(T* p) { + T o = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + oopDesc::encode_store_heap_oop(p, BrooksPointer::forwardee(obj)); + } + } +public: + void do_oop(oop* p) { + do_oop_work(p); + } + void do_oop(narrowOop* p) { + do_oop_work(p); + } +}; + +class ShenandoahSafeObjectIterateAndUpdate : public ObjectClosure { +private: + ObjectClosure* _cl; +public: + ShenandoahSafeObjectIterateAndUpdate(ObjectClosure *cl) : _cl(cl) {} + + virtual void do_object(oop obj) { + assert (oopDesc::unsafe_equals(obj, BrooksPointer::forwardee(obj)), + "avoid double-counting: only non-forwarded objects here"); + + // Fix up the ptrs. + ShenandoahSafeObjectIterateAdjustPtrsClosure adjust_ptrs; + obj->oop_iterate(&adjust_ptrs); + + // Can reply the object now: + _cl->do_object(obj); + } +}; + void ShenandoahHeap::safe_object_iterate(ObjectClosure* cl) { - Unimplemented(); + assert(SafepointSynchronize::is_at_safepoint(), "safe iteration is only available during safepoints"); + + // Safe iteration does objects only with correct references. + // This is why we skip dirty regions that have stale copies of objects, + // and fix up the pointers in the returned objects. + + ShenandoahSafeObjectIterateAndUpdate safe_cl(cl); + ShenandoahIterateObjectClosureRegionClosure blk(&safe_cl); + heap_region_iterate(&blk, + /* skip_dirty_regions = */ true, + /* skip_humongous_continuations = */ true); + + _need_update_refs = false; // already updated the references } class ShenandoahIterateOopClosureRegionClosure : public ShenandoahHeapRegionClosure { changeset: 9496:135b06fb56f5 user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Fix another Full GC trigger race diff -r 456fcbf22594 -r 135b06fb56f5 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -65,7 +65,7 @@ while (! _should_terminate) { if (in_graceful_shutdown()) { break; - } else if (_do_full_gc) { + } else if (is_full_gc()) { service_fullgc_cycle(); } else if (heap->shenandoahPolicy()->should_start_concurrent_mark(heap->used(), heap->capacity())) { service_normal_cycle(); @@ -155,7 +155,7 @@ bool ShenandoahConcurrentThread::check_cancellation() { ShenandoahHeap* heap = ShenandoahHeap::heap(); if (heap->cancelled_concgc()) { - assert (_do_full_gc || in_graceful_shutdown(), "Cancel GC either for Full GC, or gracefully exiting"); + assert (is_full_gc() || in_graceful_shutdown(), "Cancel GC either for Full GC, or gracefully exiting"); heap->gc_timer()->register_gc_end(); return true; } @@ -197,31 +197,47 @@ VM_ShenandoahFullGC full_gc(_full_gc_cause); VMThread::execute(&full_gc); } + + reset_full_gc(); + MonitorLockerEx ml(&_full_gc_lock); - _do_full_gc = false; ml.notify_all(); } void ShenandoahConcurrentThread::do_full_gc(GCCause::Cause cause) { - assert(Thread::current()->is_Java_thread(), "expect Java thread here"); + if (try_set_full_gc()) { + _full_gc_cause = cause; + + // Now that full GC is scheduled, we can abort everything else + ShenandoahHeap::heap()->cancel_concgc(cause); + } else { + if (_full_gc_cause != cause) { + log_info(gc)("Full GC is already pending with cause: %s; new cause is %s", + GCCause::to_string(_full_gc_cause), + GCCause::to_string(cause)); + } + } + MonitorLockerEx ml(&_full_gc_lock); - schedule_full_gc(); - _full_gc_cause = cause; - - // Now that full GC is scheduled, we can abort everything else - ShenandoahHeap::heap()->cancel_concgc(cause); - - while (_do_full_gc) { + while (is_full_gc()) { ml.wait(); - OrderAccess::storeload(); } - assert(!_do_full_gc, "expect full GC to have completed"); + assert(!is_full_gc(), "expect full GC to have completed"); } -void ShenandoahConcurrentThread::schedule_full_gc() { - _do_full_gc = true; +void ShenandoahConcurrentThread::reset_full_gc() { + OrderAccess::release_store_fence(&_do_full_gc, 0); +} + +bool ShenandoahConcurrentThread::try_set_full_gc() { + jbyte old = Atomic::cmpxchg(1, &_do_full_gc, 0); + return old == 0; // success +} + +bool ShenandoahConcurrentThread::is_full_gc() { + return OrderAccess::load_acquire(&_do_full_gc) == 1; } void ShenandoahConcurrentThread::print() const { diff -r 456fcbf22594 -r 135b06fb56f5 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.hpp Wed Jan 04 13:09:48 2017 +0100 @@ -45,7 +45,7 @@ void stop(); private: - bool _do_full_gc; + volatile jbyte _do_full_gc; volatile jbyte _graceful_shutdown; GCCause::Cause _full_gc_cause; @@ -69,7 +69,9 @@ void do_full_gc(GCCause::Cause cause); - void schedule_full_gc(); + bool try_set_full_gc(); + void reset_full_gc(); + bool is_full_gc(); char* name() const { return (char*)"ShenandoahConcurrentThread";} void start(); changeset: 9497:5d2b541157fa user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Enable -XX:+HeapDump{Before|After}FullGC. diff -r 135b06fb56f5 -r 5d2b541157fa src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -528,6 +528,7 @@ _phase_names[resize_tlabs] = "Resize TLABs"; _phase_names[full_gc] = "Full GC Times"; + _phase_names[full_gc_heapdumps] = " Heap Dumps"; _phase_names[full_gc_prepare] = " Prepare"; _phase_names[full_gc_mark] = " Mark"; _phase_names[full_gc_mark_drain_queues] = " Drain Queues"; diff -r 135b06fb56f5 -r 5d2b541157fa src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Jan 04 13:09:48 2017 +0100 @@ -101,6 +101,7 @@ reset_bitmaps, full_gc, + full_gc_heapdumps, full_gc_prepare, full_gc_mark, full_gc_mark_drain_queues, diff -r 135b06fb56f5 -r 5d2b541157fa src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -1921,7 +1921,7 @@ // We ran out of memory during evacuation. Cancel evacuation, and schedule a full-GC. collector_policy()->set_should_clear_all_soft_refs(true); - concurrent_thread()->schedule_full_gc(); + concurrent_thread()->try_set_full_gc(); cancel_concgc(_oom_evacuation); if ((! Thread::current()->is_GC_task_thread()) && (! Thread::current()->is_ConcurrentGC_thread())) { diff -r 135b06fb56f5 -r 5d2b541157fa src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -101,6 +101,10 @@ policy->record_phase_start(ShenandoahCollectorPolicy::full_gc); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_heapdumps); + _heap->pre_full_gc_dump(_gc_timer); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_heapdumps); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_prepare); // Full GC is supposed to recover from any GC state: @@ -197,6 +201,10 @@ _gc_timer->register_gc_end(); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_heapdumps); + _heap->post_full_gc_dump(_gc_timer); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_heapdumps); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc); oopDesc::set_bs(old_bs); changeset: 9498:268d57171c9f user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Do more Full GC tries following the allocation failure diff -r 5d2b541157fa -r 268d57171c9f src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -583,10 +583,24 @@ } while (retry && result == NULL); } - if (result == NULL && ! evacuating) { // Allocation failed, try full-GC, then retry allocation. - log_develop_trace(gc)("Failed to allocate " SIZE_FORMAT " bytes, free regions: ", word_size * HeapWordSize); - collect(GCCause::_allocation_failure); - result = allocate_memory_work(word_size); + if (!evacuating) { + // Allocation failed, try full-GC, then retry allocation. + // + // It might happen that one of the threads requesting allocation would unblock + // way later after full-GC happened, only to fail the second allocation, because + // other threads have already depleted the free storage. In this case, a better + // strategy would be to try full-GC again. + // + // Lacking the way to detect progress from "collect" call, we are left with blindly + // retrying for some bounded number of times. + // TODO: Poll if Full GC made enough progress to warrant retry. + int tries = 0; + while ((result == NULL) && (tries++ < ShenandoahFullGCTries)) { + log_debug(gc)("[" PTR_FORMAT " Failed to allocate " SIZE_FORMAT " bytes, doing full GC, try %d", + p2i(Thread::current()), word_size * HeapWordSize, tries); + collect(GCCause::_allocation_failure); + result = allocate_memory_work(word_size); + } } // Only update monitoring counters when not calling from a write-barrier. diff -r 5d2b541157fa -r 268d57171c9f src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Jan 04 13:09:48 2017 +0100 @@ -182,6 +182,10 @@ experimental(intx, ShenandoahMarkScanPrefetch, 32, \ "How many objects to prefetch ahead when traversing mark bitmaps." \ "Set to 0 to disable prefetching.") \ + \ + experimental(intx, ShenandoahFullGCTries, 3, \ + "How many times to try to do Full GC on allocation failure." \ + "Set to 0 to never try, and fail instead.") \ SHENANDOAH_FLAGS(DECLARE_DEVELOPER_FLAG, \ changeset: 9499:dbad5da24efa user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Add remaining unused free space to 'used' counter in free list. Makes heuristics more precise. diff -r 268d57171c9f -r dbad5da24efa src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -223,6 +223,7 @@ size_t result = (size_t) Atomic::cmpxchg((jlong) next, (jlong*) &_current_index, (jlong) idx); if (result == idx) { + increase_used(get(idx)->free()); result = next; } assert (result != _active_end, "don't increase current into active_end"); changeset: 9500:963893176ea7 user: rkennke date: Wed Jan 04 13:09:48 2017 +0100 summary: Fix MXBean Full GC notifications. diff -r dbad5da24efa -r 963893176ea7 src/share/vm/services/memoryManager.cpp --- a/src/share/vm/services/memoryManager.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/services/memoryManager.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -98,8 +98,12 @@ return (GCMemoryManager*) new G1OldGenMemoryManager(); } -GCMemoryManager* MemoryManager::get_shenandoah_memory_manager() { - return (GCMemoryManager*) new ShenandoahMemoryManager(); +GCMemoryManager* MemoryManager::get_shenandoah_minor_memory_manager() { + return (GCMemoryManager*) new ShenandoahMinorMemoryManager(); +} + +GCMemoryManager* MemoryManager::get_shenandoah_major_memory_manager() { + return (GCMemoryManager*) new ShenandoahMajorMemoryManager(); } instanceOop MemoryManager::get_memory_manager_instance(TRAPS) { diff -r dbad5da24efa -r 963893176ea7 src/share/vm/services/memoryManager.hpp --- a/src/share/vm/services/memoryManager.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/services/memoryManager.hpp Wed Jan 04 13:09:48 2017 +0100 @@ -65,7 +65,8 @@ PSMarkSweep, G1YoungGen, G1OldGen, - Shenandoah + ShenandoahMinor, + ShenandoahMajor }; MemoryManager(); @@ -99,8 +100,8 @@ static GCMemoryManager* get_psMarkSweep_memory_manager(); static GCMemoryManager* get_g1YoungGen_memory_manager(); static GCMemoryManager* get_g1OldGen_memory_manager(); - static GCMemoryManager* get_shenandoah_memory_manager(); - + static GCMemoryManager* get_shenandoah_minor_memory_manager(); + static GCMemoryManager* get_shenandoah_major_memory_manager(); }; class CodeCacheMemoryManager : public MemoryManager { @@ -286,14 +287,19 @@ const char* name() { return "G1 Old Generation"; } }; -class ShenandoahMemoryManager : public GCMemoryManager { -private: +class ShenandoahMinorMemoryManager : public GCMemoryManager { public: - ShenandoahMemoryManager() : GCMemoryManager() {} + ShenandoahMinorMemoryManager() : GCMemoryManager() {} - MemoryManager::Name kind() { return MemoryManager::Shenandoah; } - const char* name() { return "Shenandoah";} - + MemoryManager::Name kind() { return MemoryManager::ShenandoahMinor; } + const char* name() { return "Shenandoah Minor"; } }; +class ShenandoahMajorMemoryManager : public GCMemoryManager { +public: + ShenandoahMajorMemoryManager() : GCMemoryManager() {} + + MemoryManager::Name kind() { return MemoryManager::ShenandoahMajor; } + const char* name() { return "Shenandoah Major"; } +}; #endif // SHARE_VM_SERVICES_MEMORYMANAGER_HPP diff -r dbad5da24efa -r 963893176ea7 src/share/vm/services/memoryService.cpp --- a/src/share/vm/services/memoryService.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/services/memoryService.cpp Wed Jan 04 13:09:48 2017 +0100 @@ -198,8 +198,12 @@ void MemoryService::add_shenandoah_heap_info(ShenandoahHeap* pgch) { assert(UseShenandoahGC, "sanity"); - _major_gc_manager = MemoryManager::get_shenandoah_memory_manager(); - _minor_gc_manager = MemoryManager::get_shenandoah_memory_manager(); + + // Need to have different names for these managers, because having the same name + // would confuse notification mechanics: it will enable notifications only for + // the first manager with the matching name. + _major_gc_manager = MemoryManager::get_shenandoah_major_memory_manager(); + _minor_gc_manager = MemoryManager::get_shenandoah_minor_memory_manager(); _managers_list->append(_major_gc_manager); _managers_list->append(_minor_gc_manager); add_shenandoah_memory_pool(pgch, _minor_gc_manager, true); changeset: 9501:6b50d518992e user: rkennke date: Wed Jan 04 13:26:34 2017 +0100 summary: JVMStat heap region counters diff -r 963893176ea7 -r 6b50d518992e src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -72,6 +72,7 @@ } else { Thread::current()->_ParkEvent->park(10); } + heap->monitoring_support()->update_counters(); // Make sure the _do_full_gc flag changes are seen. OrderAccess::storeload(); diff -r 963893176ea7 -r 6b50d518992e src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegionCounters.hpp" +#include "runtime/perfData.hpp" + +ShenandoahHeapRegionCounters::ShenandoahHeapRegionCounters() { + + if (UsePerfData) { + EXCEPTION_MARK; + ResourceMark rm; + ShenandoahHeap* heap = ShenandoahHeap::heap(); + size_t max_regions = heap->max_regions(); + const char* cns = PerfDataManager::name_space("shenandoah", "regions"); + _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC); + strcpy(_name_space, cns); + + const char* cname = PerfDataManager::counter_name(_name_space, "max_regions"); + PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_None, max_regions, CHECK); + + cname = PerfDataManager::counter_name(_name_space, "region_size"); + PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_None, ShenandoahHeapRegion::RegionSizeBytes, CHECK); + + cname = PerfDataManager::counter_name(_name_space, "status"); + _status = PerfDataManager::create_long_variable(SUN_GC, cname, + PerfData::U_None, CHECK); + + _regions_data = NEW_C_HEAP_ARRAY(PerfVariable*, max_regions, mtGC); + for (uint i = 0; i < max_regions; i++) { + const char* reg_name = PerfDataManager::name_space(_name_space, "region", i); + const char* data_name = PerfDataManager::counter_name(reg_name, "data"); + const char* ns = PerfDataManager::ns_to_string(SUN_GC); + const char* fullname = PerfDataManager::counter_name(ns, data_name); + assert(!PerfDataManager::exists(fullname), "must not exist"); + _regions_data[i] = PerfDataManager::create_long_variable(SUN_GC, data_name, + PerfData::U_None, CHECK); + + } + } +} + +ShenandoahHeapRegionCounters::~ShenandoahHeapRegionCounters() { + if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space, mtGC); +} + +void ShenandoahHeapRegionCounters::update() { + if (ShenandoahRegionSampling) { + jlong current = os::javaTimeMillis(); + if (current - _last_sample_millis > ShenandoahRegionSamplingRate) { + ShenandoahHeap* heap = ShenandoahHeap::heap(); + jlong status = 0; + if (heap->concurrent_mark_in_progress()) status |= 1; + if (heap->is_evacuation_in_progress()) status |= 2; + _status->set_value(status); + + size_t num_regions = heap->num_regions(); + size_t max_regions = heap->max_regions(); + ShenandoahHeapRegionSet* regions = heap->regions(); + for (uint i = 0; i < max_regions; i++) { + if (i < num_regions) { + ShenandoahHeapRegion* r = regions->get(i); + jlong data = (r->used() & USED_MASK) << USED_SHIFT; + data |= (r->get_live_data() & LIVE_MASK) << LIVE_SHIFT; + jlong flags = 0; + if (r->in_collection_set()) flags |= 1 << 0; + if (r->is_humongous()) flags |= 1 << 1; + data |= (flags & FLAGS_MASK) << FLAGS_SHIFT; + _regions_data[i]->set_value(data); + } else { + jlong flags = 1 << 2; + flags = (flags & FLAGS_MASK) << FLAGS_SHIFT; + _regions_data[i]->set_value(flags); + } + } + _last_sample_millis = current; + } + } +} diff -r 963893176ea7 -r 6b50d518992e src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.hpp Wed Jan 04 13:26:34 2017 +0100 @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAPREGIONCOUNTERS_HPP +#define SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAPREGIONCOUNTERS_HPP + +#include "memory/allocation.hpp" + +/** + * This provides the following in JVMStat: + * + * constants: + * - sun.gc.shenandoah.regions.max_regions maximum number of regions + * - sun.gc.shenandoah.regions.region_size size per region, in bytes + * + * variables: + * - sun.gc.shenandoah.regions.status current GC status: + * - bit 0 set when marking in progress + * - bit 1 set when evacuation in progress + * + * one variable counter per region, with $max_regions (see above) counters: + * - sun.gc.shenandoah.regions.region.$i.data + * where $ is the region number from 0 <= i < $max_regions + * + * in the following format: + * - bits 0-29 used memory in bytes + * - bits 30-59 live memory in bytes + * - bits 60-63 status + * - bit 60 set when region in collection set + * - bit 61 set when region is humongous + * - bit 62 set when region is not used yet + */ +class ShenandoahHeapRegionCounters : public CHeapObj { +private: + static const jlong USED_MASK = 0x3fffffff; // bits 0-29 + static const jlong USED_SHIFT = 0; + + static const jlong LIVE_MASK = 0x3fffffff; // bits 30-59 + static const jlong LIVE_SHIFT = 30; + + static const jlong FLAGS_MASK = 0xf; // bits 60-63 + static const jlong FLAGS_SHIFT = 60; // bits 60-63 + + char* _name_space; + PerfLongVariable** _regions_data; + PerfLongVariable* _status; + jlong _last_sample_millis; + +public: + ShenandoahHeapRegionCounters(); + ~ShenandoahHeapRegionCounters(); + void update(); +}; + +#endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAPREGIONCOUNTERS_HPP diff -r 963893176ea7 -r 6b50d518992e src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -26,6 +26,7 @@ #include "gc_implementation/shared/generationCounters.hpp" #include "gc_implementation/shenandoah/shenandoahMonitoringSupport.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegionCounters.hpp" class DummyGenerationCounters : public GenerationCounters { public: @@ -49,6 +50,8 @@ // We report young gen as unused. _heap_counters = new GenerationCounters("heap", 0, 1, heap->storage()); _space_counters = new HSpaceCounters("heap", 0, heap->max_capacity(), heap->min_capacity(), _heap_counters); + + _heap_region_counters = new ShenandoahHeapRegionCounters(); } CollectorCounters* ShenandoahMonitoringSupport::stw_collection_counters() { @@ -73,5 +76,6 @@ size_t capacity = heap->capacity(); _heap_counters->update_all(); _space_counters->update_all(capacity, used); + _heap_region_counters->update(); } } diff -r 963893176ea7 -r 6b50d518992e src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMonitoringSupport.hpp Wed Jan 04 13:26:34 2017 +0100 @@ -30,6 +30,7 @@ class HSpaceCounters; class ShenandoahHeap; class CollectorCounters; +class ShenandoahHeapRegionCounters; class ShenandoahMonitoringSupport : public CHeapObj { private: @@ -41,6 +42,8 @@ HSpaceCounters* _space_counters; + ShenandoahHeapRegionCounters* _heap_region_counters; + public: ShenandoahMonitoringSupport(ShenandoahHeap* heap); CollectorCounters* stw_collection_counters(); diff -r 963893176ea7 -r 6b50d518992e src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Jan 04 13:26:34 2017 +0100 @@ -145,6 +145,13 @@ experimental(size_t, ShenandoahSATBBufferSize, 1 * K, \ "Number of entries in an SATB log buffer.") \ \ + product_rw(int, ShenandoahRegionSamplingRate, 100, \ + "Sampling rate for heap region sampling. " \ + "Number of milliseconds between samples") \ + \ + product_rw(bool, ShenandoahRegionSampling, false, \ + "Turns on heap region sampling via JVMStat") \ + \ diagnostic(bool, ShenandoahWriteBarrier, true, \ "Turn on/off write barriers in Shenandoah") \ \ diff -r 963893176ea7 -r 6b50d518992e src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Wed Jan 04 13:09:48 2017 +0100 +++ b/src/share/vm/runtime/arguments.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -1735,6 +1735,10 @@ FLAG_SET_DEFAULT(ParallelRefProcEnabled, true); } + if (FLAG_IS_DEFAULT(PerfDataMemorySize)) { + FLAG_SET_DEFAULT(PerfDataMemorySize, 512*K); + } + if (AlwaysPreTouch) { // Shenandoah handles pre-touch on its own. It does not let the // generic storage code to do the pre-touch before Shenandoah has changeset: 9502:b991fdff1e7f user: rkennke date: Wed Jan 04 13:26:34 2017 +0100 summary: Locked allocation diff -r 6b50d518992e -r b991fdff1e7f src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -28,7 +28,6 @@ ShenandoahFreeSet::ShenandoahFreeSet(size_t max_regions) : ShenandoahHeapRegionSet(max_regions), - _write_index(0), _capacity(0), _used(0) { @@ -38,8 +37,12 @@ } void ShenandoahFreeSet::increase_used(size_t num_bytes) { - assert(_used <= _capacity, "must not use more than we have"); - Atomic::add_ptr(num_bytes, (intptr_t*) &_used); + assert_heaplock_owned_by_current_thread(); + _used += num_bytes; + + assert(_used <= _capacity, err_msg("must not use more than we have: used: "SIZE_FORMAT + ", capacity: "SIZE_FORMAT", num_bytes: "SIZE_FORMAT, + _used, _capacity, num_bytes)); } size_t ShenandoahFreeSet::used() { @@ -56,7 +59,8 @@ * the caller, how many regions to skip (because we know, there * can't start a contiguous range there). */ -size_t ShenandoahFreeSet::is_contiguous(size_t start, size_t end, size_t num) { +size_t ShenandoahFreeSet::is_contiguous(size_t start, size_t num) { + assert_heaplock_owned_by_current_thread(); ShenandoahHeapRegion* r1 = get(start); @@ -65,8 +69,8 @@ } for (size_t i = 1; i < num; i++) { - size_t index = (start + i) % _reserved_end; - if (index == end) { + size_t index = start + i; + if (index == _active_end) { // We reached the end of our free list. ShouldNotReachHere(); // We limit search in find_contiguous() return i; @@ -83,41 +87,33 @@ return 0; } -size_t ShenandoahFreeSet::find_contiguous(size_t start, size_t end, size_t num) { +size_t ShenandoahFreeSet::find_contiguous(size_t start, size_t num) { + assert_heaplock_owned_by_current_thread(); assert(start < _reserved_end, "sanity"); - // The modulo will take care of wrapping around. size_t index = start; - while (index != end && diff_to_end(index, end) > num) { + while (index + num < _active_end) { assert(index < _reserved_end, "sanity"); - size_t j = is_contiguous(index, end, num); + size_t j = is_contiguous(index, num); if (j == 0) { return index; } - index = (index + j) % _reserved_end; + index = index + j; } return SIZE_MAX; } -void ShenandoahFreeSet::push_back_regions(size_t start, size_t end) { - if (start == end) return; // Nothing to do. - for (size_t i = start; i != end; i = (i + 1) % _reserved_end) { - ShenandoahHeapRegion* r = get(i); - // We subtract the capacity here, and add it back in par_add_region. - Atomic::add(-((jlong) r->free()), (jlong*) &_capacity); - } - par_add_regions(_regions, start, diff_to_end(start, end), _reserved_end); -} - void ShenandoahFreeSet::initialize_humongous_regions(size_t first, size_t num) { + assert_heaplock_owned_by_current_thread(); for (size_t i = 0; i < num; i++) { - ShenandoahHeapRegion* current = get((first + i) % _reserved_end); + ShenandoahHeapRegion* current = get(first + i); if (i == 0) current->set_humongous_start(true); else current->set_humongous_continuation(true); + assert(current->is_empty(), "must be empty"); current->set_top(current->end()); current->increase_live_data(ShenandoahHeapRegion::RegionSizeBytes); } @@ -125,107 +121,64 @@ ShenandoahHeap::heap()->increase_used(ShenandoahHeapRegion::RegionSizeBytes * num); } -size_t ShenandoahFreeSet::diff_to_end(size_t i, size_t end) const { - if (end <= i) { - end += _reserved_end; - } - assert(end > i, "sanity"); - return end == i ? _capacity : end - i; -} +ShenandoahHeapRegion* ShenandoahFreeSet::allocate_contiguous(size_t num) { + assert_heaplock_owned_by_current_thread(); + size_t next = _current_index; + while (next + num < _active_end) { + size_t first = find_contiguous(next, num); + if (first == SIZE_MAX) return NULL; + size_t next_current = first + num; + assert(next_current != _active_end, "never set current==end"); -ShenandoahHeapRegion* ShenandoahFreeSet::claim_contiguous(size_t num) { - size_t current_idx = _current_index; - size_t next = (current_idx + 1) % _reserved_end; - size_t end = _active_end; - while (next != end && diff_to_end(next, end) > num) { - size_t first = find_contiguous(next, end, num); - if (first == SIZE_MAX) return NULL; - size_t next_current = (first + num) % _reserved_end; - assert(next_current != end, "never set current==end"); - do { - size_t result = (size_t) Atomic::cmpxchg((jlong) next_current, (jlong*) &_current_index, (jlong) current_idx); - if (result == current_idx) { + initialize_humongous_regions(first, num); - push_back_regions(next, first); + return get(first); - initialize_humongous_regions(first, num); - assert(current_index() != first, "current overlaps with contiguous regions"); - return get(first); - } - - current_idx = result; - assert(current_idx != end, "must not cross active-end"); - next = (current_idx + 1) % _reserved_end; - end = _active_end; - } while (diff_to_end(current_idx, end) > diff_to_end(first, end)); } return NULL; } +void ShenandoahFreeSet::add_region(ShenandoahHeapRegion* r) { + assert_heaplock_owned_by_current_thread(); + assert(! r->in_collection_set(), "Shouldn't be adding those to the free set"); + assert(! contains(r), "We are about to add it, it shouldn't be there already"); + assert(! r->is_humongous(), "Don't add to humongous regions"); + + assert(_active_end < _reserved_end, "within bounds"); + + _regions[_active_end] = r; + _active_end++; + _capacity += r->free(); + assert(_used <= _capacity, "must not use more than we have"); +} + void ShenandoahFreeSet::clear() { - _active_end = _current_index; - _write_index = _current_index; + assert_heaplock_owned_by_current_thread(); + ShenandoahHeapRegionSet::clear(); _capacity = 0; _used = 0; } -void ShenandoahFreeSet::par_add_regions(ShenandoahHeapRegion** regions, size_t start, size_t num, size_t max) { - - size_t next = Atomic::add_ptr(num, (intptr_t*) &_write_index); - assert(next >= num, "don't get negative"); - size_t bottom = (next - num) % _reserved_end; - next = next % _reserved_end; - assert(bottom != next, "must be"); - - size_t capacity = 0; - for (size_t i = 0; i < num; i++) { - ShenandoahHeapRegion* r = regions[(start + i) % max]; - capacity += r->free(); - _regions[(bottom + i) % _reserved_end] = r; +ShenandoahHeapRegion* ShenandoahFreeSet::skip_humongous(ShenandoahHeapRegion* r) { + while (r != NULL && r->is_humongous()) { + next(); + r = current(); } - - // loop until we succeed in bringing the active_end up to our - // write index - // active_end gets set to 0 when we start a full gc - while (true) { - size_t test = (size_t) Atomic::cmpxchg((jlong) next, (jlong*) &_active_end, (jlong) bottom); - if (test == bottom) { - Atomic::add_ptr(capacity, (intptr_t*) &_capacity); - return; - } else { - // Don't starve competing threads. - os::NakedYield(); - } - } - + return r; } -void ShenandoahFreeSet::add_region(ShenandoahHeapRegion* r) { - assert(! r->in_collection_set(), "Shouldn't be adding those to the free set"); - assert(!contains(r), "We are about to add it, it shouldn't be there already"); - assert(!r->is_humongous(), "Don't add to humongous regions"); - - assert(_active_end < _reserved_end, "within bounds and no wrapping here"); - - _regions[_active_end] = r; - _active_end = (_active_end + 1) % _reserved_end; - _write_index++; - _capacity += r->free(); - assert(_used <= _capacity, "must not use more than we have"); +ShenandoahHeapRegion* ShenandoahFreeSet::current_no_humongous() { + ShenandoahHeapRegion* r = current(); + return skip_humongous(r); } -size_t ShenandoahFreeSet::par_claim_next(size_t idx) { - size_t next = (idx + 1) % _reserved_end; - if (next == _active_end) { - // Don't increase _current_index up to _active_end. - return SIZE_MAX; - } - size_t result = (size_t) Atomic::cmpxchg((jlong) next, (jlong*) &_current_index, (jlong) idx); +ShenandoahHeapRegion* ShenandoahFreeSet::next_no_humongous() { + next(); + return current_no_humongous(); +} - if (result == idx) { - increase_used(get(idx)->free()); - result = next; - } - assert (result != _active_end, "don't increase current into active_end"); - return result; +void ShenandoahFreeSet::assert_heaplock_owned_by_current_thread() { +#ifdef ASSERT + ShenandoahHeap::heap()->assert_heaplock_owned_by_current_thread(); +#endif } diff -r 6b50d518992e -r b991fdff1e7f src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.hpp Wed Jan 04 13:26:34 2017 +0100 @@ -33,29 +33,29 @@ size_t _capacity; size_t _used; - size_t _write_index; + size_t is_contiguous(size_t start, size_t num); + size_t find_contiguous(size_t start, size_t num); + void initialize_humongous_regions(size_t first, size_t num); + ShenandoahHeapRegion* skip_humongous(ShenandoahHeapRegion* r); - size_t is_contiguous(size_t start, size_t end, size_t num); - size_t find_contiguous(size_t start, size_t end, size_t num); - void push_back_regions(size_t start, size_t end); - void initialize_humongous_regions(size_t first, size_t num); - size_t diff_to_end(size_t i, size_t end) const; + void assert_heaplock_owned_by_current_thread(); public: ShenandoahFreeSet(size_t max_regions); ~ShenandoahFreeSet(); + void add_region(ShenandoahHeapRegion* r); - void par_add_regions(ShenandoahHeapRegion** regions, size_t start, size_t num, size_t max); - - size_t par_claim_next(size_t current); size_t capacity(); - ShenandoahHeapRegion* claim_contiguous(size_t num); - void clear(); size_t used(); + ShenandoahHeapRegion* allocate_contiguous(size_t num); + void clear(); + void increase_used(size_t amount); + ShenandoahHeapRegion* current_no_humongous(); + ShenandoahHeapRegion* next_no_humongous(); }; #endif //SHARE_VM_GC_SHENANDOAH_SHENANDOAHFREESET_HPP diff -r 6b50d518992e -r b991fdff1e7f src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -177,13 +177,16 @@ _next_top_at_mark_starts_base[i] = bottom; } - for (i = 0; i < _num_regions; i++) { - ShenandoahHeapRegion* current = new ShenandoahHeapRegion(); - current->initialize_heap_region(this, (HeapWord*) pgc_rs.base() + - regionSizeWords * i, regionSizeWords, i); - _free_regions->add_region(current); - _ordered_regions->add_region(current); - _sorted_regions->add_region(current); + { + ShenandoahHeapLock lock(this); + for (i = 0; i < _num_regions; i++) { + ShenandoahHeapRegion* current = new ShenandoahHeapRegion(); + current->initialize_heap_region(this, (HeapWord*) pgc_rs.base() + + regionSizeWords * i, regionSizeWords, i); + _free_regions->add_region(current); + _ordered_regions->add_region(current); + _sorted_regions->add_region(current); + } } assert(((size_t) _ordered_regions->active_regions()) == _num_regions, ""); _first_region = _ordered_regions->get(0); @@ -266,7 +269,10 @@ _cancelled_concgc(false), _need_update_refs(false), _need_reset_bitmaps(false), - _growing_heap(0), + _heap_lock(0), +#ifdef ASSERT + _heap_lock_owner(NULL), +#endif _gc_timer(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()) { @@ -570,19 +576,23 @@ return (ShenandoahHeap*) heap; } +HeapWord* ShenandoahHeap::allocate_memory_work(size_t word_size) { + + ShenandoahHeapLock heap_lock(this); + + HeapWord* result = allocate_memory_under_lock(word_size); + while (result == NULL && _num_regions < _max_regions) { + grow_heap_by(1); + result = allocate_memory_under_lock(word_size); + } + + return result; +} + HeapWord* ShenandoahHeap::allocate_memory(size_t word_size, bool evacuating) { HeapWord* result = NULL; result = allocate_memory_work(word_size); - if (result == NULL) { - bool retry; - do { - // Try to grow the heap. - retry = check_grow_heap(); - result = allocate_memory_work(word_size); - } while (retry && result == NULL); - } - if (!evacuating) { // Allocation failed, try full-GC, then retry allocation. // @@ -621,33 +631,9 @@ return evacuating && Thread::current()->is_Java_thread(); } -bool ShenandoahHeap::check_grow_heap() { - - assert(_free_regions->max_regions() >= _free_regions->active_regions(), "don't get negative"); - - size_t available = _max_regions - _num_regions; - if (available == 0) { - return false; // Don't retry. - } - - jbyte growing = Atomic::cmpxchg(1, &_growing_heap, 0); - if (growing == 0) { - // Only one thread succeeds this, and this one gets - // to grow the heap. All other threads can continue - // to allocate from the reserve. - grow_heap_by(MIN2(available, ShenandoahAllocReserveRegions)); - - // Reset it back to 0, so that other threads can take it again. - Atomic::store(0, &_growing_heap); - return true; - } else { - // Let other threads work, then try again. - os::NakedYield(); - return true; - } -} - -HeapWord* ShenandoahHeap::allocate_memory_work(size_t word_size) { +HeapWord* ShenandoahHeap::allocate_memory_under_lock(size_t word_size) { + assert_heaplock_owned_by_current_thread(); + if (word_size * HeapWordSize > ShenandoahHeapRegion::RegionSizeBytes) { return allocate_large_memory(word_size); } @@ -657,8 +643,7 @@ // free region available, so current_index may not be valid. if (word_size * HeapWordSize > _free_regions->capacity()) return NULL; - size_t current_idx = _free_regions->current_index(); - ShenandoahHeapRegion* my_current_region = _free_regions->get(current_idx); + ShenandoahHeapRegion* my_current_region = _free_regions->current_no_humongous(); if (my_current_region == NULL) { return NULL; // No more room to make a new region. OOM. @@ -677,13 +662,14 @@ while (result == NULL) { // 2nd attempt. Try next region. - current_idx = _free_regions->par_claim_next(current_idx); - my_current_region = _free_regions->get(current_idx); + _free_regions->increase_used(my_current_region->free()); + ShenandoahHeapRegion* next_region = _free_regions->next_no_humongous(); + assert(next_region != my_current_region, "must not get current again"); + my_current_region = next_region; if (my_current_region == NULL) { return NULL; // No more room to make a new region. OOM. } - // _free_regions->increase_used(remaining); assert(my_current_region != NULL, "should have a region at this point"); assert(! in_collection_set(my_current_region), "never get targetted regions in free-lists"); assert(! my_current_region->is_humongous(), "never attempt to allocate from humongous object regions"); @@ -697,11 +683,12 @@ } HeapWord* ShenandoahHeap::allocate_large_memory(size_t words) { + assert_heaplock_owned_by_current_thread(); uint required_regions = ShenandoahHumongous::required_regions(words * HeapWordSize); if (required_regions > _max_regions) return NULL; - ShenandoahHeapRegion* r = _free_regions->claim_contiguous(required_regions); + ShenandoahHeapRegion* r = _free_regions->allocate_contiguous(required_regions); HeapWord* result = NULL; @@ -863,7 +850,6 @@ _heap->decrease_used(r->used()); _bytes_reclaimed += r->used(); r->recycle(); - _heap->free_regions()->add_region(r); } return false; @@ -1122,22 +1108,23 @@ // and the oop gets evacuated. If both operands have originally been // the same, we get false negatives. - - _collection_set->clear(); - _free_regions->clear(); - - ShenandoahReclaimHumongousRegionsClosure reclaim; - heap_region_iterate(&reclaim); - - // _ordered_regions->print(); + { + ShenandoahHeapLock lock(this); + _collection_set->clear(); + _free_regions->clear(); + + ShenandoahReclaimHumongousRegionsClosure reclaim; + heap_region_iterate(&reclaim); + #ifdef ASSERT - CheckCollectionSetClosure ccsc; - _ordered_regions->heap_region_iterate(&ccsc); + CheckCollectionSetClosure ccsc; + _ordered_regions->heap_region_iterate(&ccsc); #endif - _shenandoah_policy->choose_collection_set(_collection_set); - - _shenandoah_policy->choose_free_set(_free_regions); + _shenandoah_policy->choose_collection_set(_collection_set); + + _shenandoah_policy->choose_free_set(_free_regions); + } /* tty->print("Sorted free regions\n"); @@ -1962,7 +1949,6 @@ size_t base = _num_regions; ensure_new_regions(num_regions); - ShenandoahHeapRegion* regions[num_regions]; for (size_t i = 0; i < num_regions; i++) { ShenandoahHeapRegion* new_region = new ShenandoahHeapRegion(); size_t new_region_index = i + base; @@ -1983,9 +1969,8 @@ _next_top_at_mark_starts_base[new_region_index] = new_region->bottom(); _complete_top_at_mark_starts_base[new_region_index] = new_region->bottom(); - regions[i] = new_region; + _free_regions->add_region(new_region); } - _free_regions->par_add_regions(regions, 0, num_regions, num_regions); } void ShenandoahHeap::ensure_new_regions(size_t new_regions) { @@ -2322,3 +2307,10 @@ heap_region_iterate(&cl); return cl.garbage(); } + +#ifdef ASSERT +void ShenandoahHeap::assert_heaplock_owned_by_current_thread() { + assert(_heap_lock == locked, "must be locked"); + assert(_heap_lock_owner == Thread::current(), "must be owned by current thread"); +} +#endif diff -r 6b50d518992e -r b991fdff1e7f src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 04 13:26:34 2017 +0100 @@ -45,6 +45,7 @@ class ShenandoahForwardedIsAliveClosure: public BoolObjectClosure { + private: ShenandoahHeap* _heap; public: @@ -64,6 +65,36 @@ // // ShenandoahHeap class ShenandoahHeap : public SharedHeap { + enum LockState { unlocked = 0, locked = 1 }; + +public: + class ShenandoahHeapLock : public StackObj { + private: + ShenandoahHeap* _heap; + + public: + ShenandoahHeapLock(ShenandoahHeap* heap) : _heap(heap) { + while (OrderAccess::load_acquire(& _heap->_heap_lock) == locked || Atomic::cmpxchg(locked, &_heap->_heap_lock, unlocked) == locked) { + SpinPause(); + } + assert(_heap->_heap_lock == locked, "sanity"); + +#ifdef ASSERT + assert(_heap->_heap_lock_owner == NULL, "must not be owned"); + _heap->_heap_lock_owner = Thread::current(); +#endif + } + + ~ShenandoahHeapLock() { +#ifdef ASSERT + _heap->assert_heaplock_owned_by_current_thread(); + _heap->_heap_lock_owner = NULL; +#endif + OrderAccess::release_store_fence(&_heap->_heap_lock, unlocked); + } + + }; + public: enum ShenandoahCancelCause { _oom_evacuation, @@ -124,8 +155,6 @@ volatile jbyte _cancelled_concgc; - jbyte _growing_heap; - size_t _bytes_allocated_since_cm; size_t _bytes_allocated_during_cm; size_t _bytes_allocated_during_cm_start; @@ -147,6 +176,13 @@ ConcurrentGCTimer* _gc_timer; + // See allocate_memory() + volatile jbyte _heap_lock; + +#ifdef ASSERT + volatile Thread* _heap_lock_owner; +#endif + public: ShenandoahHeap(ShenandoahCollectorPolicy* policy); @@ -369,8 +405,11 @@ void cancel_concgc(GCCause::Cause cause); void cancel_concgc(ShenandoahCancelCause cause); + void assert_heaplock_owned_by_current_thread() PRODUCT_RETURN; + private: HeapWord* allocate_new_tlab(size_t word_size, bool mark); + HeapWord* allocate_memory_under_lock(size_t word_size); HeapWord* allocate_memory(size_t word_size, bool evacuating); // Shenandoah functionality. inline HeapWord* allocate_from_gclab(Thread* thread, size_t size); @@ -412,7 +451,6 @@ ShenandoahCollectionSet* collection_set() { return _collection_set; } bool call_from_write_barrier(bool evacuating); - bool check_grow_heap(); void grow_heap_by(size_t num_regions); void ensure_new_regions(size_t num_new_regions); diff -r 6b50d518992e -r b991fdff1e7f src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -141,13 +141,9 @@ heap_region_iterate(&pc1, false, false); } -ShenandoahHeapRegion* ShenandoahHeapRegionSet::next() { - size_t next = _current_index; - if (next < _active_end) { +void ShenandoahHeapRegionSet::next() { + if (_current_index < _active_end) { _current_index++; - return get(next); - } else { - return NULL; } } @@ -191,9 +187,17 @@ } ShenandoahHeapRegion* ShenandoahHeapRegionSet::get(size_t i) const { - if (i < _reserved_end) { + if (i < _active_end) { return _regions[i]; } else { return NULL; } } + +ShenandoahHeapRegion* ShenandoahHeapRegionSet::current() const { + if (_current_index < _active_end) { + return get(_current_index); + } else { + return NULL; + } +} diff -r 6b50d518992e -r b991fdff1e7f src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp Wed Jan 04 13:26:34 2017 +0100 @@ -75,7 +75,9 @@ virtual void add_region(ShenandoahHeapRegion* r); - ShenandoahHeapRegion* next(); + // Advance the iteration pointer to the next region. + void next(); + // Return the current region, and advance iteration pointer to next one, atomically. ShenandoahHeapRegion* claim_next(); template @@ -93,6 +95,8 @@ size_t current_index() { return _current_index;} void clear_current_index() {_current_index = 0; } + ShenandoahHeapRegion* current() const; + protected: bool contains(ShenandoahHeapRegion* r); diff -r 6b50d518992e -r b991fdff1e7f src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -368,7 +368,8 @@ if (_compact_point + obj_size > _to_region->end()) { // Object doesn't fit. Pick next to-region and start compacting there. _to_region->set_new_top(_compact_point); - ShenandoahHeapRegion* new_to_region = _to_regions->next(); + ShenandoahHeapRegion* new_to_region = _to_regions->current(); + _to_regions->next(); if (new_to_region == NULL) { new_to_region = _from_region; } @@ -431,7 +432,8 @@ assert(cl.to_region() != NULL, "should not happen"); cl.to_region()->set_new_top(cl.compact_point()); while (to_regions->count() > 0) { - ShenandoahHeapRegion* r = to_regions->next(); + ShenandoahHeapRegion* r = to_regions->current(); + to_regions->next(); if (r == NULL) { to_regions->print(); } @@ -603,12 +605,14 @@ ShenandoahHeapRegionSet* copy_queue = _regions[worker_id]; copy_queue->clear_current_index(); ShenandoahCompactObjectsClosure cl; - ShenandoahHeapRegion* r = copy_queue->next(); + ShenandoahHeapRegion* r = copy_queue->current(); + copy_queue->next(); while (r != NULL) { assert(! r->is_humongous(), "must not get humongous regions here"); heap->marked_object_iterate(r, &cl); r->set_top(r->new_top()); - r = copy_queue->next(); + r = copy_queue->current(); + copy_queue->next(); } } }; @@ -658,16 +662,20 @@ // and must ensure the bitmap is in sync. heap->reset_complete_mark_bitmap(heap->workers()); - ShenandoahPostCompactClosure post_compact; - heap->heap_region_iterate(&post_compact); + { + ShenandoahHeap::ShenandoahHeapLock lock(heap); + ShenandoahPostCompactClosure post_compact; + heap->heap_region_iterate(&post_compact); + + heap->set_used(post_compact.get_live()); + + } heap->clear_cancelled_concgc(); // Also clear the next bitmap in preparation for next marking. heap->reset_next_mark_bitmap(heap->workers()); - heap->set_used(post_compact.get_live()); - for (uint i = 0; i < heap->max_parallel_workers(); i++) { delete copy_queues[i]; } diff -r 6b50d518992e -r b991fdff1e7f src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Jan 04 13:26:34 2017 +0100 @@ -65,10 +65,6 @@ "dynamic, adaptive, aggressive." \ "Defauls to dynamic") \ \ - product(uintx, ShenandoahAllocReserveRegions, 10, \ - "How many regions should be kept as allocation reserve, before " \ - "Shenandoah attempts to grow the heap. Defaults to 10.") \ - \ product(uintx, ShenandoahRefProcFrequency, 5, \ "How often should (weak, soft, etc) references be processed. " \ "References get processed at every Nth GC cycle. " \ changeset: 9503:352e7275a860 user: rkennke date: Wed Jan 04 13:26:34 2017 +0100 summary: Fix freeze when running OOM during write barrier diff -r b991fdff1e7f -r 352e7275a860 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -138,14 +138,14 @@ heap->do_evacuation(); } - if (check_cancellation()) return; - // Prepare for the next normal cycle: if (heap->is_evacuation_in_progress()) { MutexLocker mu(Threads_lock); heap->set_evacuation_in_progress(false); } + if (check_cancellation()) return; + heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); heap->reset_next_mark_bitmap(heap->conc_workers()); heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::reset_bitmaps); changeset: 9504:17e523dc476c user: rkennke date: Wed Jan 04 13:26:34 2017 +0100 summary: More efficient heap expansion diff -r 352e7275a860 -r 17e523dc476c src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:26:34 2017 +0100 @@ -581,8 +581,10 @@ ShenandoahHeapLock heap_lock(this); HeapWord* result = allocate_memory_under_lock(word_size); - while (result == NULL && _num_regions < _max_regions) { - grow_heap_by(1); + int grow_by = (word_size * HeapWordSize + ShenandoahHeapRegion::RegionSizeBytes - 1) / ShenandoahHeapRegion::RegionSizeBytes; + + while (result == NULL && _num_regions + grow_by <= _max_regions) { + grow_heap_by(grow_by); result = allocate_memory_under_lock(word_size); } @@ -1948,7 +1950,6 @@ void ShenandoahHeap::grow_heap_by(size_t num_regions) { size_t base = _num_regions; ensure_new_regions(num_regions); - for (size_t i = 0; i < num_regions; i++) { ShenandoahHeapRegion* new_region = new ShenandoahHeapRegion(); size_t new_region_index = i + base; changeset: 9505:145137908d2f user: rkennke date: Wed Jan 04 13:46:54 2017 +0100 summary: Degenerating concurrent marking diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Jan 04 13:46:54 2017 +0100 @@ -41,6 +41,9 @@ size_t _bytes_allocated_start_CM; size_t _bytes_allocated_during_CM; + uint _cancelled_cm_cycles_in_a_row; + uint _successful_cm_cycles_in_a_row; + public: ShenandoahHeuristics(); @@ -55,6 +58,20 @@ virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const=0; + virtual bool handover_cancelled_marking() { + return _cancelled_cm_cycles_in_a_row <= ShenandoahFullGCThreshold; + } + + virtual void record_cm_cancelled() { + _cancelled_cm_cycles_in_a_row++; + _successful_cm_cycles_in_a_row = 0; + } + + virtual void record_cm_success() { + _cancelled_cm_cycles_in_a_row = 0; + _successful_cm_cycles_in_a_row++; + } + virtual void start_choose_collection_set() { } virtual void end_choose_collection_set() { @@ -88,7 +105,9 @@ _bytes_allocated_since_CM(0), _bytes_reclaimed_this_cycle(0), _bytes_allocated_start_CM(0), - _bytes_allocated_during_CM(0) + _bytes_allocated_during_CM(0), + _cancelled_cm_cycles_in_a_row(0), + _successful_cm_cycles_in_a_row(0) { } @@ -328,68 +347,55 @@ class AdaptiveHeuristics : public ShenandoahHeuristics { private: - size_t _max_live_data; - double _used_threshold_factor; - double _garbage_threshold_factor; - double _allocation_threshold_factor; - - uintx _used_threshold; - uintx _garbage_threshold; - uintx _allocation_threshold; - - size_t _garbage; + uintx _free_threshold; public: - AdaptiveHeuristics() : ShenandoahHeuristics() { - _max_live_data = 0; - - _used_threshold = 0; - _garbage_threshold = 0; - _allocation_threshold = 0; - - _used_threshold_factor = 0.; - _garbage_threshold_factor = 0.1; - _allocation_threshold_factor = 0.; + AdaptiveHeuristics() : + ShenandoahHeuristics(), + _free_threshold(ShenandoahInitFreeThreshold) { } virtual ~AdaptiveHeuristics() {} - virtual void start_choose_collection_set() { - _garbage = 0; + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { + size_t threshold = ShenandoahHeapRegion::RegionSizeBytes * ShenandoahGarbageThreshold / 100; + return r->garbage() > threshold; } - virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { - size_t bytes_alloc = ShenandoahHeap::heap()->bytes_allocated_since_cm(); - size_t min_garbage = bytes_alloc/* * 1.1*/; - size_t threshold = ShenandoahHeapRegion::RegionSizeBytes * ShenandoahGarbageThreshold / 100; - if (_garbage + immediate_garbage < min_garbage && r->garbage() > threshold) { - _garbage += r->garbage(); - return true; - } else { - return false; + virtual void record_cm_cancelled() { + ShenandoahHeuristics::record_cm_cancelled(); + if (_free_threshold < ShenandoahMaxFreeThreshold) { + _free_threshold++; + log_debug(gc,ergo)("increasing free threshold to: "UINTX_FORMAT, _free_threshold); + } + } + + virtual void record_cm_success() { + ShenandoahHeuristics::record_cm_success(); + if (_successful_cm_cycles_in_a_row > ShenandoahHappyCyclesThreshold && + _free_threshold > ShenandoahMinFreeThreshold) { + _free_threshold--; + log_debug(gc,ergo)("reducing free threshold to: "UINTX_FORMAT, _free_threshold); + _successful_cm_cycles_in_a_row = 0; } } virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const { + bool shouldStartConcurrentMark = false; - ShenandoahHeap* _heap = ShenandoahHeap::heap(); - bool shouldStartConcurrentMark = false; - OrderAccess::release(); + ShenandoahHeap* heap = ShenandoahHeap::heap(); + size_t free_capacity = heap->free_regions()->capacity(); + size_t free_used = heap->free_regions()->used(); + assert(free_used <= free_capacity, "must use less than capacity"); + size_t available = free_capacity - free_used; + uintx factor = _free_threshold; + size_t targetStartMarking = (capacity * factor) / 100; - size_t max_live_data = _max_live_data; - if (max_live_data == 0) { - max_live_data = capacity * 0.2; // Very generous initial value. - } else { - max_live_data *= 1.3; // Add some wiggle room. - } - size_t max_cycle_allocated = _heap->max_allocated_gc(); - if (max_cycle_allocated == 0) { - max_cycle_allocated = capacity * 0.3; // Very generous. - } else { - max_cycle_allocated *= 1.3; // Add 20% wiggle room. Should be enough. - } - size_t threshold = _heap->capacity() - max_cycle_allocated - max_live_data; - if (used > threshold) + size_t threshold_bytes_allocated = heap->capacity() * ShenandoahAllocationThreshold / 100; + if (available < targetStartMarking && + heap->bytes_allocated_since_cm() > threshold_bytes_allocated) { + // Need to check that an appropriate number of regions have + // been allocated since last concurrent mark too. shouldStartConcurrentMark = true; } @@ -417,7 +423,7 @@ } virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { - if (_garbage + immediate_garbage < _min_garbage) { + if (_garbage + immediate_garbage < _min_garbage && ! r->is_empty()) { _garbage += r->garbage(); return true; } else { @@ -446,7 +452,7 @@ virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { size_t min_ratio = 100 - ShenandoahGarbageThreshold; - if (_live * 100 / MAX2(_garbage + immediate_garbage, 1UL) < min_ratio) { + if (_live * 100 / MAX2(_garbage + immediate_garbage, 1UL) < min_ratio && ! r->is_empty()) { _garbage += r->garbage(); _live += r->get_live_data(); return true; @@ -456,7 +462,11 @@ } }; -ShenandoahCollectorPolicy::ShenandoahCollectorPolicy() : _cycle_counter(0) { +ShenandoahCollectorPolicy::ShenandoahCollectorPolicy() : + _cycle_counter(0), + _successful_cm(0), + _degenerated_cm(0) +{ ShenandoahHeapRegion::setup_heap_region_size(initial_heap_byte_size(), max_heap_byte_size()); @@ -630,6 +640,23 @@ return _heuristics->should_start_concurrent_mark(used, capacity); } +bool ShenandoahCollectorPolicy::handover_cancelled_marking() { + return _heuristics->handover_cancelled_marking(); +} + +void ShenandoahCollectorPolicy::record_cm_success() { + _heuristics->record_cm_success(); + _successful_cm++; +} + +void ShenandoahCollectorPolicy::record_cm_degenerated() { + _degenerated_cm++; +} + +void ShenandoahCollectorPolicy::record_cm_cancelled() { + _heuristics->record_cm_cancelled(); +} + void ShenandoahCollectorPolicy::choose_collection_set(ShenandoahCollectionSet* collection_set) { _heuristics->choose_collection_set(collection_set); } @@ -655,6 +682,8 @@ } out->print_cr("User requested GCs: "SIZE_FORMAT, _user_requested_gcs); out->print_cr("Allocation failure GCs: "SIZE_FORMAT, _allocation_failure_gcs); + out->print_cr("Successful concurrent markings: "SIZE_FORMAT, _successful_cm); + out->print_cr("Degenerated concurrent markings: "SIZE_FORMAT, _degenerated_cm); out->print_cr(" "); double total_sum = _timing_data[init_mark_gross]._ms.sum() + diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Jan 04 13:46:54 2017 +0100 @@ -127,6 +127,8 @@ size_t _user_requested_gcs; size_t _allocation_failure_gcs; + size_t _degenerated_cm; + size_t _successful_cm; ShenandoahHeap* _pgc; ShenandoahHeuristics* _heuristics; @@ -175,6 +177,11 @@ void record_bytes_start_CM(size_t bytes); void record_bytes_end_CM(size_t bytes); bool should_start_concurrent_mark(size_t used, size_t capacity); + bool handover_cancelled_marking(); + + void record_cm_cancelled(); + void record_cm_success(); + void record_cm_degenerated(); void choose_collection_set(ShenandoahCollectionSet* collection_set); void choose_free_set(ShenandoahFreeSet* free_set); diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Jan 04 13:46:54 2017 +0100 @@ -386,7 +386,7 @@ sh->conc_workers()->run_task(&markingTask, nworkers); } - assert(task_queues()->is_empty(), "Should be empty"); + assert(task_queues()->is_empty() || sh->cancelled_concgc(), "Should be empty when not cancelled"); if (! sh->cancelled_concgc()) { TASKQUEUE_STATS_ONLY(print_taskqueue_stats()); } @@ -897,14 +897,8 @@ while (true) { if (heap->cancelled_concgc()) { - clear_queue(q); - - // Clear other queues for termination - while ((q = queues->claim_next()) != NULL) { - clear_queue(q); - } - - while (! terminator->offer_termination()); + ShenandoahCancelledTerminatorTerminator tt; + while (! terminator->offer_termination(&tt)); return; } diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 13:46:54 2017 +0100 @@ -69,6 +69,10 @@ service_fullgc_cycle(); } else if (heap->shenandoahPolicy()->should_start_concurrent_mark(heap->used(), heap->capacity())) { service_normal_cycle(); + if (heap->is_evacuation_in_progress()) { + MutexLocker mu(Threads_lock); + heap->set_evacuation_in_progress(false); + } } else { Thread::current()->_ParkEvent->park(10); } @@ -118,7 +122,22 @@ ShenandoahHeap::heap()->concurrentMark()->mark_from_roots(); } - if (check_cancellation()) return; + // Possibly hand over remaining marking work to final-mark phase. + bool clear_full_gc = false; + if (heap->cancelled_concgc()) { + heap->shenandoahPolicy()->record_cm_cancelled(); + if (_full_gc_cause == GCCause::_allocation_failure && + heap->shenandoahPolicy()->handover_cancelled_marking()) { + heap->set_cancelled_concgc(false); + clear_full_gc = true; + heap->shenandoahPolicy()->record_cm_degenerated(); + } else { + heap->gc_timer()->register_gc_end(); + return; + } + } else { + heap->shenandoahPolicy()->record_cm_success(); + } // Proceed to complete marking under STW, and start evacuation: { @@ -131,6 +150,11 @@ if (check_cancellation()) return; + // If we handed off remaining marking work above, we need to kick off waiting Java threads + if (clear_full_gc) { + reset_full_gc(); + } + // Continue concurrent evacuation: { // GCTraceTime time("Concurrent evacuation ", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); @@ -139,11 +163,6 @@ } // Prepare for the next normal cycle: - if (heap->is_evacuation_in_progress()) { - MutexLocker mu(Threads_lock); - heap->set_evacuation_in_progress(false); - } - if (check_cancellation()) return; heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); @@ -200,9 +219,6 @@ } reset_full_gc(); - - MonitorLockerEx ml(&_full_gc_lock); - ml.notify_all(); } void ShenandoahConcurrentThread::do_full_gc(GCCause::Cause cause) { @@ -230,6 +246,8 @@ void ShenandoahConcurrentThread::reset_full_gc() { OrderAccess::release_store_fence(&_do_full_gc, 0); + MonitorLockerEx ml(&_full_gc_lock); + ml.notify_all(); } bool ShenandoahConcurrentThread::try_set_full_gc() { diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 13:46:54 2017 +0100 @@ -1928,6 +1928,7 @@ cancel_concgc(_oom_evacuation); if ((! Thread::current()->is_GC_task_thread()) && (! Thread::current()->is_ConcurrentGC_thread())) { + assert(! Threads_lock->owned_by_self(), "must not hold Threads_lock here"); log_warning(gc)("OOM during evacuation. Let Java thread wait until evacuation finishes."); while (_evacuation_in_progress) { // wait. Thread::current()->_ParkEvent->park(1); diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 04 13:46:54 2017 +0100 @@ -320,6 +320,7 @@ inline oop evacuate_object(oop src, Thread* thread); inline bool cancelled_concgc() const; + inline void set_cancelled_concgc(bool v); inline bool try_cancel_concgc() const; void clear_cancelled_concgc(); @@ -458,7 +459,6 @@ void set_concurrent_mark_in_progress(bool in_progress); void oom_during_evacuation(); - inline void set_cancelled_concgc(bool v); void verify_live(); void verify_liveness_after_concurrent_mark(); diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahHeap.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.inline.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.inline.hpp Wed Jan 04 13:46:54 2017 +0100 @@ -252,6 +252,14 @@ alloc_from_gclab = false; } +#ifdef ASSERT + // Checking that current Java thread does not hold Threads_lock when we get here. + // If that ever be the case, we'd deadlock in oom_during_evacuation. + if ((! Thread::current()->is_GC_task_thread()) && (! Thread::current()->is_ConcurrentGC_thread())) { + assert(! Threads_lock->owned_by_self(), "must not hold Threads_lock here"); + } +#endif + if (filler == NULL) { oom_during_evacuation(); // If this is a Java thread, it should have waited diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.cpp Wed Jan 04 13:46:54 2017 +0100 @@ -76,7 +76,7 @@ _blocker->unlock(); - if (do_spin_master_work()) { + if (do_spin_master_work(terminator)) { assert(_offered_termination == _n_threads, "termination condition"); return true; } else { @@ -91,7 +91,7 @@ } } - if (peek_in_queue_set() || + if (((terminator == NULL || terminator->should_force_termination()) && peek_in_queue_set()) || (terminator != NULL && terminator->should_exit_termination())) { _offered_termination --; _blocker->unlock(); @@ -100,7 +100,7 @@ } } -bool ShenandoahTaskTerminator::do_spin_master_work() { +bool ShenandoahTaskTerminator::do_spin_master_work(TerminatorTerminator* terminator) { uint yield_count = 0; // Number of hard spin loops done since last yield uint hard_spin_count = 0; @@ -172,7 +172,7 @@ _total_peeks++; #endif size_t tasks = tasks_in_queue_set(); - if (tasks > 0) { + if (tasks > 0 && (terminator == NULL || ! terminator->should_force_termination())) { MonitorLockerEx locker(_blocker, Mutex::_no_safepoint_check_flag); // no safepoint check if ((int) tasks >= _offered_termination - 1) { diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp Wed Jan 04 13:46:54 2017 +0100 @@ -194,7 +194,16 @@ * return true if termination condition is detected * otherwise, return false */ - bool do_spin_master_work(); + bool do_spin_master_work(TerminatorTerminator* terminator); +}; + +class ShenandoahCancelledTerminatorTerminator : public TerminatorTerminator { + virtual bool should_exit_termination() { + return false; + } + virtual bool should_force_termination() { + return true; + } }; #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAH_TASKQUEUE_HPP diff -r 17e523dc476c -r 145137908d2f src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Jan 04 13:46:54 2017 +0100 @@ -92,6 +92,11 @@ product(size_t, PreTouchParallelChunkSize, 1 * G, \ "Per-thread chunk size for parallel memory pre-touch.") \ \ + product_rw(uintx, ShenandoahFullGCThreshold, 3, \ + "How many cycles in a row to do degenerated marking on " \ + "cancelled GC before triggering a full-gc" \ + "Defaults to 3") \ + \ product_rw(uintx, ShenandoahGarbageThreshold, 60, \ "Sets the percentage of garbage a region need to contain before " \ "it can be marked for collection. Applies to " \ @@ -116,6 +121,19 @@ "Applies to Shenandoah GC dynamic Heuristic mode only " \ "(ignored otherwise). Defauls to 0%.") \ \ + experimental(uintx, ShenandoahInitFreeThreshold, 10, \ + "Initial remaininig free threshold for adaptive heuristics") \ + \ + experimental(uintx, ShenandoahMinFreeThreshold, 3, \ + "Minimum remaininig free threshold for adaptive heuristics") \ + \ + experimental(uintx, ShenandoahMaxFreeThreshold, 30, \ + "Maximum remaininig free threshold for adaptive heuristics") \ + \ + experimental(uintx, ShenandoahHappyCyclesThreshold, 5, \ + "How many successful marking cycles before improving free " \ + "threshold for adaptive heuristics") \ + \ experimental(uint, ShenandoahMarkLoopStride, 1000, \ "How many items are processed during one marking step") \ \ diff -r 17e523dc476c -r 145137908d2f src/share/vm/utilities/taskqueue.cpp --- a/src/share/vm/utilities/taskqueue.cpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/utilities/taskqueue.cpp Wed Jan 04 13:46:54 2017 +0100 @@ -229,7 +229,7 @@ #ifdef TRACESPINNING _total_peeks++; #endif - if (peek_in_queue_set() || + if ((peek_in_queue_set() && (terminator == NULL || ! terminator->should_force_termination())) || (terminator != NULL && terminator->should_exit_termination())) { Atomic::dec(&_offered_termination); assert(_offered_termination < _n_threads, "Invariant"); diff -r 17e523dc476c -r 145137908d2f src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Wed Jan 04 13:26:34 2017 +0100 +++ b/src/share/vm/utilities/taskqueue.hpp Wed Jan 04 13:46:54 2017 +0100 @@ -602,6 +602,7 @@ class TerminatorTerminator: public CHeapObj { public: virtual bool should_exit_termination() = 0; + virtual bool should_force_termination() { return false; } }; // A class to aid in the termination of a set of parallel tasks using @@ -636,6 +637,7 @@ // else is. If returns "true", all threads are terminated. If returns // "false", available work has been observed in one of the task queues, // so the global task is not complete. + // If force is set to true, it terminates even if there's remaining work left virtual bool offer_termination() { return offer_termination(NULL); } @@ -643,6 +645,7 @@ // As above, but it also terminates if the should_exit_termination() // method of the terminator parameter returns true. If terminator is // NULL, then it is ignored. + // If force is set to true, it terminates even if there's remaining work left bool offer_termination(TerminatorTerminator* terminator); // Reset the terminator, so that it may be reused again. changeset: 9506:5fe3f645db28 user: rkennke date: Wed Jan 04 13:46:54 2017 +0100 summary: Enable UseCountedLoopSafepoints with Shenandoah. diff -r 145137908d2f -r 5fe3f645db28 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Wed Jan 04 13:46:54 2017 +0100 +++ b/src/share/vm/runtime/arguments.cpp Wed Jan 04 13:46:54 2017 +0100 @@ -1739,6 +1739,13 @@ FLAG_SET_DEFAULT(PerfDataMemorySize, 512*K); } + // Shenandoah cares more about pause times, rather than raw throughput. + // Enabling safepoints in counted loops makes it more responsive with + // long loops. + if (FLAG_IS_DEFAULT(UseCountedLoopSafepoints)) { + FLAG_SET_DEFAULT(UseCountedLoopSafepoints, true); + } + if (AlwaysPreTouch) { // Shenandoah handles pre-touch on its own. It does not let the // generic storage code to do the pre-touch before Shenandoah has changeset: 9507:9e21fa63bbf8 user: rkennke date: Wed Jan 04 14:36:29 2017 +0100 summary: Improve AryEq instruction by avoiding false negatives with a Shenandoah cmp barrier diff -r 5fe3f645db28 -r 9e21fa63bbf8 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Jan 04 13:46:54 2017 +0100 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Jan 04 14:36:29 2017 +0100 @@ -4919,6 +4919,7 @@ // same array? cmp(ary1, ary2); + oopDesc::bs()->asm_acmp_barrier(this, ary1, ary2); br(Assembler::EQ, SAME); // ne if either null diff -r 5fe3f645db28 -r 9e21fa63bbf8 src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Jan 04 13:46:54 2017 +0100 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Jan 04 14:36:29 2017 +0100 @@ -7173,6 +7173,9 @@ // Check the input args cmpptr(ary1, ary2); + if (is_array_equ) { + oopDesc::bs()->asm_acmp_barrier(this, ary1, ary2); + } jcc(Assembler::equal, TRUE_LABEL); if (is_array_equ) { changeset: 9508:714dea8cd74c user: rkennke date: Wed Jan 04 14:36:38 2017 +0100 summary: Refactor concurrent mark to be more inlineable. diff -r 9e21fa63bbf8 -r 714dea8cd74c src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Jan 04 14:36:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Jan 04 14:36:38 2017 +0100 @@ -893,8 +893,15 @@ int seed = 17; uint stride = ShenandoahMarkLoopStride; SCMObjToScanQueueSet* queues = task_queues(); - bool done_queues = false; + // Drain outstanding queues first + if (!concurrent_process_queues(heap, q, cl)) { + ShenandoahCancelledTerminatorTerminator tt; + while (! terminator->offer_termination(&tt)); + return; + } + + // Normal loop while (true) { if (heap->cancelled_concgc()) { ShenandoahCancelledTerminatorTerminator tt; @@ -902,18 +909,13 @@ return; } - if (!done_queues) { - done_queues = true; - if (!concurrent_process_queues(heap, q, cl)) { - // concurrent GC cancelled - continue; - } - } - + ObjArrayFromToTask t; for (uint i = 0; i < stride; i++) { - if (!try_queue(q, cl) && - !try_draining_an_satb_buffer(q) && - !try_to_steal(worker_id, cl, &seed)) { + if (try_queue(q, t) || + try_draining_satb_buffer(q, t) || + queues->steal(worker_id, &seed, t)) { + cl->do_object_or_array(t.obj(), t.from(), t.to()); + } else { if (terminator->offer_termination()) return; } } @@ -928,8 +930,11 @@ while (true) { if (heap->cancelled_concgc()) return false; + ObjArrayFromToTask t; for (uint i = 0; i < stride; i++) { - if (!try_queue(q, cl)) { + if (try_queue(q, t)) { + cl->do_object_or_array(t.obj(), t.from(), t.to()); + } else { assert(q->is_empty(), "Must be empty"); q = queues->claim_next(); if (q == NULL) { @@ -947,10 +952,15 @@ SCMObjToScanQueue* q, ParallelTaskTerminator* terminator) { int seed = 17; + SCMObjToScanQueueSet* queues = task_queues(); + + ObjArrayFromToTask t; while (true) { - if (!try_queue(q, cl) && - !try_to_steal(worker_id, cl, &seed)) { - if (terminator->offer_termination()) break; + if (try_queue(q, t) || + queues->steal(worker_id, &seed, t)) { + cl->do_object_or_array(t.obj(), t.from(), t.to()); + } else { + if (terminator->offer_termination()) return; } } } diff -r 9e21fa63bbf8 -r 714dea8cd74c src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Wed Jan 04 14:36:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Wed Jan 04 14:36:38 2017 +0100 @@ -106,16 +106,12 @@ template void final_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* t); - template - inline bool try_queue(SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl); - - template - inline bool try_to_steal(uint worker_id, ShenandoahMarkObjsClosure* cl, int *seed); + inline bool try_queue(SCMObjToScanQueue* q, ObjArrayFromToTask &task); SCMObjToScanQueue* get_queue(uint worker_id); void clear_queue(SCMObjToScanQueue *q); - inline bool try_draining_an_satb_buffer(SCMObjToScanQueue* q); + inline bool try_draining_satb_buffer(SCMObjToScanQueue *q, ObjArrayFromToTask &task); void drain_satb_buffers(uint worker_id, bool remark = false); SCMObjToScanQueueSet* task_queues() { return _task_queues;} diff -r 9e21fa63bbf8 -r 714dea8cd74c src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Jan 04 14:36:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Jan 04 14:36:38 2017 +0100 @@ -106,28 +106,10 @@ array->oop_iterate_range(&_mark_refs, from, to); } -template -inline bool ShenandoahConcurrentMark::try_queue(SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl) { - ObjArrayFromToTask task; - if (q->pop_buffer(task) || +inline bool ShenandoahConcurrentMark::try_queue(SCMObjToScanQueue* q, ObjArrayFromToTask &task) { + return (q->pop_buffer(task) || q->pop_local(task) || - q->pop_overflow(task)) { - assert(task.obj() != NULL, "Can't mark null"); - cl->do_object_or_array(task.obj(), task.from(), task.to()); - return true; - } else { - return false; - } -} - -template -inline bool ShenandoahConcurrentMark::try_to_steal(uint worker_id, ShenandoahMarkObjsClosure* cl, int *seed) { - ObjArrayFromToTask task; - if (task_queues()->steal(worker_id, seed, task)) { - cl->do_object_or_array(task.obj(), task.from(), task.to()); - return true; - } else - return false; + q->pop_overflow(task)); } class ShenandoahSATBBufferClosure : public SATBBufferClosure { @@ -152,10 +134,11 @@ } }; -inline bool ShenandoahConcurrentMark:: try_draining_an_satb_buffer(SCMObjToScanQueue* q) { +inline bool ShenandoahConcurrentMark:: try_draining_satb_buffer(SCMObjToScanQueue *q, ObjArrayFromToTask &task) { ShenandoahSATBBufferClosure cl(q); SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); - return satb_mq_set.apply_closure_to_completed_buffer(&cl); + bool had_refs = satb_mq_set.apply_closure_to_completed_buffer(&cl); + return had_refs && try_queue(q, task); } inline void ShenandoahConcurrentMark::mark_and_push(oop obj, ShenandoahHeap* heap, SCMObjToScanQueue* q) { diff -r 9e21fa63bbf8 -r 714dea8cd74c src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 14:36:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 14:36:38 2017 +0100 @@ -23,6 +23,7 @@ #include "gc_implementation/shared/gcTimer.hpp" #include "gc_implementation/shared/gcTraceTime.hpp" +#include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentThread.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" diff -r 9e21fa63bbf8 -r 714dea8cd74c src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 14:36:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 14:36:38 2017 +0100 @@ -32,6 +32,7 @@ #include "gc_implementation/shenandoah/shenandoahCollectionSet.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.hpp" +#include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentThread.hpp" #include "gc_implementation/shenandoah/shenandoahFreeSet.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" diff -r 9e21fa63bbf8 -r 714dea8cd74c src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 04 14:36:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 04 14:36:38 2017 +0100 @@ -27,6 +27,7 @@ #include "gc_implementation/shared/isGCActiveMark.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" +#include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" #include "gc_implementation/shenandoah/shenandoahMarkCompact.hpp" #include "gc_implementation/shenandoah/shenandoahBarrierSet.hpp" #include "gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp" diff -r 9e21fa63bbf8 -r 714dea8cd74c src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Jan 04 14:36:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Jan 04 14:36:38 2017 +0100 @@ -24,6 +24,7 @@ #include "gc_implementation/shared/gcTraceTime.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahMarkCompact.hpp" +#include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" #include "gc_implementation/shenandoah/vm_operations_shenandoah.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" changeset: 9509:835e79217215 user: rkennke date: Wed Jan 11 15:52:26 2017 +0100 summary: Fix another deadlock with oom_during_evacuation() diff -r 714dea8cd74c -r 835e79217215 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 04 14:36:38 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Jan 11 15:52:26 2017 +0100 @@ -71,8 +71,7 @@ } else if (heap->shenandoahPolicy()->should_start_concurrent_mark(heap->used(), heap->capacity())) { service_normal_cycle(); if (heap->is_evacuation_in_progress()) { - MutexLocker mu(Threads_lock); - heap->set_evacuation_in_progress(false); + heap->set_evacuation_in_progress_concurrently(false); } } else { Thread::current()->_ParkEvent->park(10); diff -r 714dea8cd74c -r 835e79217215 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 04 14:36:38 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 11 15:52:26 2017 +0100 @@ -1900,8 +1900,22 @@ JavaThread::satb_mark_queue_set().set_active_all_threads(in_progress, !in_progress); } +void ShenandoahHeap::set_evacuation_in_progress_concurrently(bool in_progress) { + // Note: it is important to first release the _evacuation_in_progress flag here, + // so that Java threads can get out of oom_during_evacuation() and reach a safepoint, + // in case a VM task is pending. + set_evacuation_in_progress(in_progress); + MutexLocker mu(Threads_lock); + JavaThread::set_evacuation_in_progress_all_threads(in_progress); +} + +void ShenandoahHeap::set_evacuation_in_progress_at_safepoint(bool in_progress) { + assert(SafepointSynchronize::is_at_safepoint(), "Only call this at safepoint"); + set_evacuation_in_progress(in_progress); + JavaThread::set_evacuation_in_progress_all_threads(in_progress); +} + void ShenandoahHeap::set_evacuation_in_progress(bool in_progress) { - JavaThread::set_evacuation_in_progress_all_threads(in_progress); _evacuation_in_progress = in_progress ? 1 : 0; OrderAccess::fence(); } diff -r 714dea8cd74c -r 835e79217215 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 04 14:36:38 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 11 15:52:26 2017 +0100 @@ -285,8 +285,13 @@ void prepare_for_concurrent_evacuation(); void evacuate_and_update_roots(); + +private: + void set_evacuation_in_progress(bool in_progress); +public: inline bool is_evacuation_in_progress(); - void set_evacuation_in_progress(bool in_progress); + void set_evacuation_in_progress_concurrently(bool in_progress); + void set_evacuation_in_progress_at_safepoint(bool in_progress); void set_full_gc_in_progress(bool in_progress); bool is_full_gc_in_progress() const; diff -r 714dea8cd74c -r 835e79217215 src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 04 14:36:38 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Jan 11 15:52:26 2017 +0100 @@ -119,7 +119,7 @@ // b. Cancel evacuation, if in progress if (_heap->is_evacuation_in_progress()) { - _heap->set_evacuation_in_progress(false); + _heap->set_evacuation_in_progress_at_safepoint(false); } assert(!_heap->is_evacuation_in_progress(), "sanity"); diff -r 714dea8cd74c -r 835e79217215 src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Jan 04 14:36:38 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Jan 11 15:52:26 2017 +0100 @@ -114,7 +114,7 @@ sh->prepare_for_concurrent_evacuation(); sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::prepare_evac); - sh->set_evacuation_in_progress(true); + sh->set_evacuation_in_progress_at_safepoint(true); // From here on, we need to update references. sh->set_need_update_refs(true); @@ -123,9 +123,6 @@ sh->evacuate_and_update_roots(); sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_evac); - if (sh->cancelled_concgc()) { - sh->set_evacuation_in_progress(false); - } } else { GCTraceTime time("Cancel concurrent Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); sh->concurrentMark()->cancel(); changeset: 9510:72a422e2fc2e user: roland date: Mon Jul 25 14:31:42 2016 -0700 summary: 8161147: jvm crashes when -XX:+UseCountedLoopSafepoints is enabled diff -r 835e79217215 -r 72a422e2fc2e src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/opto/loopnode.cpp Mon Jul 25 14:31:42 2016 -0700 @@ -278,8 +278,16 @@ return false; // Allow funny placement of Safepoint - if (back_control->Opcode() == Op_SafePoint) + if (back_control->Opcode() == Op_SafePoint) { + if (UseCountedLoopSafepoints) { + // Leaving the safepoint on the backedge and creating a + // CountedLoop will confuse optimizations. We can't move the + // safepoint around because its jvm state wouldn't match a new + // location. Give up on that loop. + return false; + } back_control = back_control->in(TypeFunc::Control); + } // Controlling test for loop Node *iftrue = back_control; diff -r 835e79217215 -r 72a422e2fc2e test/compiler/loopopts/TestCountedLoopSafepointBackedge.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/loopopts/TestCountedLoopSafepointBackedge.java Mon Jul 25 14:31:42 2016 -0700 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8161147 + * @summary Safepoint on backedge breaks UseCountedLoopSafepoints + * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:+UseCountedLoopSafepoints TestCountedLoopSafepointBackedge + * + */ + +public class TestCountedLoopSafepointBackedge { + static void test(int[] arr, int inc) { + int i = 0; + for (;;) { + for (int j = 0; j < 10; j++); + arr[i] = i; + i++; + if (i >= 100) { + break; + } + for (int j = 0; j < 10; j++); + } + } + + static public void main(String[] args) { + int[] arr = new int[100]; + for (int i = 0; i < 20000; i++) { + test(arr, 1); + } + } +} changeset: 9511:55fa9e39143e parent: 9509:835e79217215 user: shade date: Thu Jan 05 12:33:55 2017 +0100 summary: Thread-local buffers for liveness data. diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahCollectionSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectionSet.cpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectionSet.cpp Thu Jan 05 12:33:55 2017 +0100 @@ -36,7 +36,7 @@ void ShenandoahCollectionSet::add_region(ShenandoahHeapRegion* r) { ShenandoahHeapRegionSet::add_region(r); _garbage += r->garbage(); - _live_data += r->get_live_data(); + _live_data += r->get_live_data_bytes(); } size_t ShenandoahCollectionSet::garbage() { diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Thu Jan 05 12:33:55 2017 +0100 @@ -148,7 +148,7 @@ ShenandoahHeapRegion* region = sorted_regions->get(i); if (! region->is_humongous() && ! region->is_pinned()) { - if ((! region->is_empty()) && region->get_live_data() == 0) { + if ((! region->is_empty()) && ! region->has_live()) { // We can recycle it right away and put it in the free set. immediate_regions++; immediate_garbage += region->garbage(); @@ -156,19 +156,19 @@ region->recycle(); log_develop_trace(gc)("Choose region " SIZE_FORMAT " for immediate reclaim with garbage = " SIZE_FORMAT " and live = " SIZE_FORMAT "\n", - region->region_number(), region->garbage(), region->get_live_data()); + region->region_number(), region->garbage(), region->get_live_data_bytes()); } else if (region_in_collection_set(region, immediate_garbage)) { log_develop_trace(gc)("Choose region " SIZE_FORMAT " with garbage = " SIZE_FORMAT " and live = " SIZE_FORMAT "\n", - region->region_number(), region->garbage(), region->get_live_data()); + region->region_number(), region->garbage(), region->get_live_data_bytes()); collection_set->add_region(region); region->set_in_collection_set(true); } } else { - assert(region->get_live_data() != 0 || region->is_empty() || region->is_pinned() || region->is_humongous(), "check rejected"); + assert(region->has_live() || region->is_empty() || region->is_pinned() || region->is_humongous(), "check rejected"); log_develop_trace(gc)("Rejected region " SIZE_FORMAT " with garbage = " SIZE_FORMAT " and live = " SIZE_FORMAT "\n", - region->region_number(), region->garbage(), region->get_live_data()); + region->region_number(), region->garbage(), region->get_live_data_bytes()); } } @@ -454,7 +454,7 @@ size_t min_ratio = 100 - ShenandoahGarbageThreshold; if (_live * 100 / MAX2(_garbage + immediate_garbage, 1UL) < min_ratio && ! r->is_empty()) { _garbage += r->garbage(); - _live += r->get_live_data(); + _live += r->get_live_data_bytes(); return true; } else { return false; diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Thu Jan 05 12:33:55 2017 +0100 @@ -105,20 +105,29 @@ // Mark the object and add it to the queue to be scanned template -ShenandoahMarkObjsClosure::ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : +ShenandoahMarkObjsClosure::ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp, jushort* live_data) : _heap((ShenandoahHeap*)(Universe::heap())), _queue(q), _mark_refs(T(q, rp)), - _last_region_idx(0), - _live_data(0) + _live_data(live_data) { + if (CL) { + Copy::fill_to_bytes(_live_data, _heap->max_regions() * sizeof(jushort)); + } } template ShenandoahMarkObjsClosure::~ShenandoahMarkObjsClosure() { if (CL) { - ShenandoahHeapRegion *r = _heap->regions()->get(_last_region_idx); - r->increase_live_data(_live_data); + for (uint i = 0; i < _heap->max_regions(); i++) { + ShenandoahHeapRegion* r = _heap->regions()->get(i); + if (r != NULL) { + jushort live = _live_data[i]; + if (live > 0) { + r->increase_live_data_words(live); + } + } + } } } @@ -202,6 +211,7 @@ void work(uint worker_id) { SCMObjToScanQueue* q = _cm->get_queue(worker_id); + jushort* live_data = _cm->get_liveness(worker_id); ReferenceProcessor* rp; if (_cm->process_references()) { rp = ShenandoahHeap::heap()->ref_processor(); @@ -217,10 +227,10 @@ } } if (_update_refs) { - ShenandoahMarkObjsClosure cl(q, rp); + ShenandoahMarkObjsClosure cl(q, rp, live_data); _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); } else { - ShenandoahMarkObjsClosure cl(q, rp); + ShenandoahMarkObjsClosure cl(q, rp, live_data); _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); } } @@ -253,21 +263,23 @@ rp = NULL; } SCMObjToScanQueue* q = _cm->get_queue(worker_id); + jushort* live_data = _cm->get_liveness(worker_id); + // Templates need constexprs, so we have to switch by the flags ourselves. if (_update_refs) { if (_count_live) { - ShenandoahMarkObjsClosure cl(q, rp); + ShenandoahMarkObjsClosure cl(q, rp, live_data); _cm->final_mark_loop(&cl, worker_id, q, _terminator); } else { - ShenandoahMarkObjsClosure cl(q, rp); + ShenandoahMarkObjsClosure cl(q, rp, live_data); _cm->final_mark_loop(&cl, worker_id, q, _terminator); } } else { if (_count_live) { - ShenandoahMarkObjsClosure cl(q, rp); + ShenandoahMarkObjsClosure cl(q, rp, live_data); _cm->final_mark_loop(&cl, worker_id, q, _terminator); } else { - ShenandoahMarkObjsClosure cl(q, rp); + ShenandoahMarkObjsClosure cl(q, rp, live_data); _cm->final_mark_loop(&cl, worker_id, q, _terminator); } } @@ -353,6 +365,12 @@ _claimed_codecache = 0; JavaThread::satb_mark_queue_set().set_buffer_size(ShenandoahSATBBufferSize); + + size_t max_regions = ShenandoahHeap::heap()->max_regions(); + _liveness_local = NEW_C_HEAP_ARRAY(jushort*, workers, mtGC); + for (uint worker = 0; worker < workers; worker++) { + _liveness_local[worker] = NEW_C_HEAP_ARRAY(jushort, max_regions, mtGC); + } } void ShenandoahConcurrentMark::mark_from_roots() { @@ -628,11 +646,12 @@ rp = NULL; } SCMObjToScanQueue* q = scm->get_queue(_worker_id); + jushort* live_data = scm->get_liveness(_worker_id); if (sh->need_update_refs()) { - ShenandoahMarkObjsClosure cl(q, rp); + ShenandoahMarkObjsClosure cl(q, rp, live_data); scm->final_mark_loop(&cl, _worker_id, q, _terminator); } else { - ShenandoahMarkObjsClosure cl(q, rp); + ShenandoahMarkObjsClosure cl(q, rp, live_data); scm->final_mark_loop(&cl, _worker_id, q, _terminator); } } @@ -987,6 +1006,10 @@ return old == 0; } +jushort* ShenandoahConcurrentMark::get_liveness(uint worker_id) { + return _liveness_local[worker_id]; +} + void ShenandoahConcurrentMark::clear_claim_codecache() { assert(ShenandoahConcurrentCodeRoots, "must not be called otherwise"); _claimed_codecache = 0; diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Thu Jan 05 12:33:55 2017 +0100 @@ -50,10 +50,9 @@ ShenandoahHeap* _heap; T _mark_refs; SCMObjToScanQueue* _queue; - uint _last_region_idx; - size_t _live_data; + jushort* _live_data; public: - ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp); + ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp, jushort* live_data); ~ShenandoahMarkObjsClosure(); inline void do_object_or_array(oop obj, int from, int to); @@ -72,6 +71,16 @@ jbyte _claimed_codecache; + // Used for buffering per-region liveness data. + // Needed since ShenandoahHeapRegion uses atomics to update liveness. + // + // The array has max-workers elements, each of which is an array of + // jushort * max_regions. The choice of jushort is not accidental: + // there is a tradeoff between static/dynamic footprint that translates + // into cache pressure (which is already high during marking), and + // too many atomic updates. size_t/jint is too large, jbyte is too small. + jushort** _liveness_local; + public: // We need to do this later when the heap is already created. void initialize(uint workers); @@ -115,6 +124,8 @@ void drain_satb_buffers(uint worker_id, bool remark = false); SCMObjToScanQueueSet* task_queues() { return _task_queues;} + jushort* get_liveness(uint worker_id); + void cancel(); private: diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Thu Jan 05 12:33:55 2017 +0100 @@ -78,13 +78,22 @@ inline void ShenandoahMarkObjsClosure::count_liveness(oop obj) { if (!CL) return; // no need to count liveness! uint region_idx = _heap->heap_region_index_containing(obj); - if (region_idx == _last_region_idx) { - _live_data += (obj->size() + BrooksPointer::word_size()) * HeapWordSize; + jushort cur = _live_data[region_idx]; + int size = obj->size() + BrooksPointer::word_size(); + int max = (1 << (sizeof(jushort) * 8)) - 1; + if (size >= max) { + // too big, add to region data directly + _heap->regions()->get_fast(region_idx)->increase_live_data_words(size); } else { - ShenandoahHeapRegion* r = _heap->regions()->get(_last_region_idx); - r->increase_live_data(_live_data); - _last_region_idx = region_idx; - _live_data = (obj->size() + BrooksPointer::word_size()) * HeapWordSize; + int new_val = cur + size; + if (new_val >= max) { + // overflow, flush to region data + _heap->regions()->get_fast(region_idx)->increase_live_data_words(new_val); + _live_data[region_idx] = 0; + } else { + // still good, remember in locals + _live_data[region_idx] = (jushort) new_val; + } } } diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahFreeSet.cpp Thu Jan 05 12:33:55 2017 +0100 @@ -115,7 +115,7 @@ assert(current->is_empty(), "must be empty"); current->set_top(current->end()); - current->increase_live_data(ShenandoahHeapRegion::RegionSizeBytes); + current->increase_live_data_words(ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize); } increase_used(ShenandoahHeapRegion::RegionSizeBytes * num); ShenandoahHeap::heap()->increase_used(ShenandoahHeapRegion::RegionSizeBytes * num); diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Thu Jan 05 12:33:55 2017 +0100 @@ -679,7 +679,7 @@ result = my_current_region->par_allocate(word_size); } - my_current_region->increase_live_data(word_size * HeapWordSize); + my_current_region->increase_live_data_words(word_size); increase_used(word_size * HeapWordSize); _free_regions->increase_used(word_size * HeapWordSize); return result; @@ -791,7 +791,7 @@ void ShenandoahHeap::parallel_evacuate_region(ShenandoahHeapRegion* from_region) { - assert(from_region->get_live_data() > 0, "all-garbage regions are reclaimed earlier"); + assert(from_region->has_live(), "all-garbage regions are reclaimed earlier"); ParallelEvacuateRegionObjectClosure evacuate_region(this); @@ -825,7 +825,7 @@ worker_id, from_hr->region_number()); - assert(from_hr->get_live_data() > 0, "all-garbage regions are reclaimed early"); + assert(from_hr->has_live(), "all-garbage regions are reclaimed early"); _sh->parallel_evacuate_region(from_hr); if (_sh->cancelled_concgc()) { @@ -1039,7 +1039,7 @@ uint index = r->region_number(); - assert(r->get_live_data() == 0, "liveness must be zero"); + assert(!r->has_live(), "liveness must be zero"); for(size_t i = 0; i < required_regions; i++) { diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Thu Jan 05 12:33:55 2017 +0100 @@ -65,18 +65,25 @@ void ShenandoahHeapRegion::set_live_data(size_t s) { assert(Thread::current()->is_VM_thread(), "by VM thread"); - _live_data = s; + _live_data = (jint) (s / HeapWordSize); } -size_t ShenandoahHeapRegion::get_live_data() const { - assert (sizeof(julong) == sizeof(size_t), "do not read excessively"); - return (size_t)OrderAccess::load_acquire((volatile julong*)&_live_data); +size_t ShenandoahHeapRegion::get_live_data_words() const { + return (size_t)OrderAccess::load_acquire((volatile jint*)&_live_data); +} + +size_t ShenandoahHeapRegion::get_live_data_bytes() const { + return get_live_data_words() * HeapWordSize; +} + +bool ShenandoahHeapRegion::has_live() const { + return get_live_data_words() != 0; } size_t ShenandoahHeapRegion::garbage() const { - assert(used() >= get_live_data() || is_humongous(), err_msg("Live Data must be a subset of used() live: "SIZE_FORMAT" used: "SIZE_FORMAT, - get_live_data(), used())); - size_t result = used() - get_live_data(); + assert(used() >= get_live_data_bytes() || is_humongous(), err_msg("Live Data must be a subset of used() live: "SIZE_FORMAT" used: "SIZE_FORMAT, + get_live_data_bytes(), used())); + size_t result = used() - get_live_data_bytes(); return result; } @@ -154,7 +161,7 @@ st->print(" "); st->print_cr("live = "SIZE_FORMAT" garbage = "SIZE_FORMAT" bottom = "PTR_FORMAT" end = "PTR_FORMAT" top = "PTR_FORMAT, - get_live_data(), garbage(), p2i(bottom()), p2i(end()), p2i(top())); + get_live_data_bytes(), garbage(), p2i(bottom()), p2i(end()), p2i(top())); } diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp Thu Jan 05 12:33:55 2017 +0100 @@ -40,7 +40,7 @@ private: ShenandoahHeap* _heap; size_t _region_number; - volatile size_t _live_data; + volatile jint _live_data; MemRegion reserved; bool _humongous_start; @@ -68,9 +68,11 @@ void clear_live_data(); void set_live_data(size_t s); - inline void increase_live_data(size_t s); + inline void increase_live_data_words(jint s); - size_t get_live_data() const; + bool has_live() const; + size_t get_live_data_bytes() const; + size_t get_live_data_words() const; void print_on(outputStream* st) const; diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.inline.hpp Thu Jan 05 12:33:55 2017 +0100 @@ -27,9 +27,14 @@ #include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" #include "runtime/atomic.hpp" -inline void ShenandoahHeapRegion::increase_live_data(size_t s) { - size_t new_live_data = (size_t) Atomic::add_ptr(s, (intptr_t*) &_live_data); - assert(new_live_data <= used() || is_humongous(), "can't have more live data than used"); +inline void ShenandoahHeapRegion::increase_live_data_words(jint s) { + jint new_live_data = Atomic::add(s, &_live_data); +#ifdef ASSERT + size_t live_bytes = (size_t)(new_live_data * HeapWordSize); + size_t used_bytes = used(); + assert(live_bytes <= used_bytes || is_humongous(), + err_msg("can't have more live data than used: " SIZE_FORMAT ", " SIZE_FORMAT, live_bytes, used_bytes)); +#endif } #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAPREGION_INLINE_HPP diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.cpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.cpp Thu Jan 05 12:33:55 2017 +0100 @@ -83,7 +83,7 @@ if (i < num_regions) { ShenandoahHeapRegion* r = regions->get(i); jlong data = (r->used() & USED_MASK) << USED_SHIFT; - data |= (r->get_live_data() & LIVE_MASK) << LIVE_SHIFT; + data |= (r->get_live_data_bytes() & LIVE_MASK) << LIVE_SHIFT; jlong flags = 0; if (r->in_collection_set()) flags |= 1 << 0; if (r->is_humongous()) flags |= 1 << 1; diff -r 835e79217215 -r 55fa9e39143e src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp Wed Jan 11 15:52:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp Thu Jan 05 12:33:55 2017 +0100 @@ -72,6 +72,10 @@ size_t count() const; ShenandoahHeapRegion* get(size_t i) const; + inline ShenandoahHeapRegion* get_fast(size_t i) const { + assert (i < _active_end, "sanity"); + return _regions[i]; + } virtual void add_region(ShenandoahHeapRegion* r); changeset: 9512:87ed01562715 user: shade date: Mon Jan 09 14:39:01 2017 +0100 summary: Percentile levels in -Xlog:gc+stats. diff -r 55fa9e39143e -r 87ed01562715 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Thu Jan 05 12:33:55 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Mon Jan 09 14:39:01 2017 +0100 @@ -210,7 +210,7 @@ if (phase != _num_phases) { for (uint i = 0; i < ShenandoahPhaseTimes::GCParPhasesSentinel; i++) { double t = _phase_times->average(i); - _timing_data[phase + i]._ms.add(t * 1000.0); + _timing_data[phase + i]._secs.add(t); } } } @@ -223,8 +223,7 @@ void ShenandoahCollectorPolicy::record_phase_end(TimingPhase phase) { double end = os::elapsedTime(); double elapsed = end - _timing_data[phase]._start; - _timing_data[phase]._ms.add(elapsed * 1000); - + _timing_data[phase]._secs.add(elapsed); } void ShenandoahCollectorPolicy::report_concgc_cancelled() { @@ -676,8 +675,8 @@ void ShenandoahCollectorPolicy::print_tracing_info(outputStream* out) { for (uint i = 0; i < _num_phases; i++) { - if (_timing_data[i]._ms.maximum() != 0) { - print_summary_sd(out, _phase_names[i], &(_timing_data[i]._ms)); + if (_timing_data[i]._secs.maximum() != 0) { + print_summary_sd(out, _phase_names[i], &(_timing_data[i]._secs)); } } out->print_cr("User requested GCs: "SIZE_FORMAT, _user_requested_gcs); @@ -686,24 +685,34 @@ out->print_cr("Degenerated concurrent markings: "SIZE_FORMAT, _degenerated_cm); out->print_cr(" "); - double total_sum = _timing_data[init_mark_gross]._ms.sum() + - _timing_data[final_mark_gross]._ms.sum(); - double total_avg = (_timing_data[init_mark_gross]._ms.avg() + - _timing_data[final_mark_gross]._ms.avg()) / 2.0; - double total_max = MAX2(_timing_data[init_mark_gross]._ms.maximum(), - _timing_data[final_mark_gross]._ms.maximum()); + double total_sum = _timing_data[init_mark_gross]._secs.sum() + + _timing_data[final_mark_gross]._secs.sum(); + double total_avg = (_timing_data[init_mark_gross]._secs.avg() + + _timing_data[final_mark_gross]._secs.avg()) / 2.0; + double total_max = MAX2(_timing_data[init_mark_gross]._secs.maximum(), + _timing_data[final_mark_gross]._secs.maximum()); out->print_cr("%-27s = %8.2lf s, avg = %8.2lf ms, max = %8.2lf ms", - "Total", total_sum / 1000.0, total_avg, total_max); + "Total", total_sum, total_avg * 1000.0, total_max * 1000.0); } -void ShenandoahCollectorPolicy::print_summary_sd(outputStream* out, const char* str, const NumberSeq* seq) { - double sum = seq->sum(); - out->print("%-34s = %8.2lf s (avg = %8.2lf ms)", - str, sum / 1000.0, seq->avg()); - out->print_cr(" %s = "INT32_FORMAT_W(5)", std dev = %8.2lf ms, max = %8.2lf ms)", - "(num", seq->num(), seq->sd(), seq->maximum()); +void ShenandoahCollectorPolicy::print_summary_sd(outputStream* out, const char* str, const HdrSeq* seq) { + out->print("%-34s = %8.2lf s (avg = %8.0lf us)", + str, seq->sum(), seq->avg() * 1000000.0); + out->print_cr(" (num = "INT32_FORMAT_W(5)", lvls (10%% step, us) = %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, max = %8.0lf)", + seq->num(), + seq->percentile(10) * 1000000.0, + seq->percentile(20) * 1000000.0, + seq->percentile(30) * 1000000.0, + seq->percentile(40) * 1000000.0, + seq->percentile(50) * 1000000.0, + seq->percentile(60) * 1000000.0, + seq->percentile(70) * 1000000.0, + seq->percentile(80) * 1000000.0, + seq->percentile(90) * 1000000.0, + seq->maximum() * 1000000.0 + ); } void ShenandoahCollectorPolicy::increase_cycle_counter() { diff -r 55fa9e39143e -r 87ed01562715 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Thu Jan 05 12:33:55 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Mon Jan 09 14:39:01 2017 +0100 @@ -116,7 +116,7 @@ private: struct TimingData { - NumberSeq _ms; + HdrSeq _secs; double _start; size_t _count; }; @@ -202,7 +202,7 @@ size_t cycle_counter() const; private: - void print_summary_sd(outputStream* out, const char* str, const NumberSeq* seq); + void print_summary_sd(outputStream* out, const char* str, const HdrSeq* seq); }; diff -r 55fa9e39143e -r 87ed01562715 src/share/vm/utilities/numberSeq.cpp --- a/src/share/vm/utilities/numberSeq.cpp Thu Jan 05 12:33:55 2017 +0100 +++ b/src/share/vm/utilities/numberSeq.cpp Mon Jan 09 14:39:01 2017 +0100 @@ -260,3 +260,96 @@ } s->cr(); } + +HdrSeq::HdrSeq() { + _hdr = NEW_C_HEAP_ARRAY(int*, MagBuckets, mtInternal); + for (int c = 0; c < MagBuckets; c++) { + _hdr[c] = NULL; + } +} + +HdrSeq::~HdrSeq() { + for (int c = 0; c < MagBuckets; c++) { + int* sub = _hdr[c]; + if (sub != NULL) { + FREE_C_HEAP_ARRAY(int, sub, mtInternal); + } + } + FREE_C_HEAP_ARRAY(int*, _hdr, mtInternal); +} + +void HdrSeq::add(double val) { + if (val < 0) { + assert (false, err_msg("value (%8.2f) is not negative", val)); + val = 0; + } + + NumberSeq::add(val); + + double v = val; + int mag; + if (v > 0) { + mag = 0; + while (v > 1) { + mag++; + v /= 10; + } + while (v < 0.1) { + mag--; + v *= 10; + } + } else { + mag = MagMinimum; + } + + int bucket = -MagMinimum + mag; + int sub_bucket = (int) (v * ValBuckets); + + // Defensively saturate for product bits: + if (bucket < 0) { + assert (false, err_msg("bucket index (%d) underflow for value (%8.2f)", bucket, val)); + bucket = 0; + } + + if (bucket >= MagBuckets) { + assert (false, err_msg("bucket index (%d) overflow for value (%8.2f)", bucket, val)); + bucket = MagBuckets - 1; + } + + if (sub_bucket < 0) { + assert (false, err_msg("sub-bucket index (%d) underflow for value (%8.2f)", sub_bucket, val)); + sub_bucket = 0; + } + + if (sub_bucket >= ValBuckets) { + assert (false, err_msg("sub-bucket index (%d) overflow for value (%8.2f)", sub_bucket, val)); + sub_bucket = ValBuckets - 1; + } + + int* b = _hdr[bucket]; + if (b == NULL) { + b = NEW_C_HEAP_ARRAY(int, ValBuckets, mtInternal); + for (int c = 0; c < ValBuckets; c++) { + b[c] = 0; + } + _hdr[bucket] = b; + } + b[sub_bucket]++; +} + +double HdrSeq::percentile(double level) const { + int target = (int) (level * num() / 100); + int cnt = 0; + for (int mag = 0; mag < MagBuckets; mag++) { + if (_hdr[mag] != NULL) { + for (int val = 0; val < ValBuckets; val++) { + cnt += _hdr[mag][val]; + if (cnt >= target) { + return pow(10.0, MagMinimum + mag) * val / ValBuckets; + } + } + } + } + return maximum(); +} + diff -r 55fa9e39143e -r 87ed01562715 src/share/vm/utilities/numberSeq.hpp --- a/src/share/vm/utilities/numberSeq.hpp Thu Jan 05 12:33:55 2017 +0100 +++ b/src/share/vm/utilities/numberSeq.hpp Mon Jan 09 14:39:01 2017 +0100 @@ -104,6 +104,31 @@ virtual void dump_on(outputStream* s); }; +// HDR sequence stores the low-resolution high-dynamic-range values. +// It does so by maintaining the double array, where first array defines +// the magnitude of the value being stored, and the second array maintains +// the low resolution histogram within that magnitude. For example, storing +// 4.352819 * 10^3 increments the bucket _hdr[3][435]. This allows for +// memory efficient storage of huge amount of samples. +// +// Accepts positive numbers only. +class HdrSeq: public NumberSeq { +private: + enum PrivateConstants { + ValBuckets = 512, + MagBuckets = 24, + MagMinimum = -12, + }; + int** _hdr; + +public: + HdrSeq(); + ~HdrSeq(); + + virtual void add(double val); + double percentile(double level) const; +}; + class TruncatedSeq: public AbsSeq { private: enum PrivateConstants { changeset: 9513:11dc2b01f59e user: shade date: Wed Jan 11 18:30:01 2017 +0100 summary: Avoid double-touching array headers during mark. diff -r 87ed01562715 -r 11dc2b01f59e src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Mon Jan 09 14:39:01 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Jan 11 18:30:01 2017 +0100 @@ -55,12 +55,16 @@ count_liveness(obj); if (obj->is_objArray()) { // Case 1: Array instance and no task bounds set. Must be the first time - // we visit it. Process its metadata, and submit the chunked array task - // with proper bounds. - _mark_refs.do_klass(obj->klass()); + // we visit it. objArrayOop array = objArrayOop(obj); - if (array->length() > 0) { - do_array(array, 0, array->length()); + int len = array->length(); + if (len > 0) { + // Case 1a. Non-empty array. The header would be processed along with the + // chunk that starts at offset=0, see ObjArrayKlass::oop_oop_iterate_range. + do_array(array, 0, len); + } else { + // Case 1b. Empty array. Only need to care about the header. + _mark_refs.do_klass(obj->klass()); } } else { // Case 2: Normal oop, process as usual. changeset: 9514:d1ebf007ae0a user: rkennke date: Wed Jan 11 18:48:38 2017 +0100 summary: Print heap start/end addresses in hs_err. diff -r 11dc2b01f59e -r d1ebf007ae0a src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 11 18:30:01 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 11 18:48:38 2017 +0100 @@ -377,6 +377,9 @@ void ShenandoahHeap::print_on(outputStream* st) const { st->print("Shenandoah Heap"); st->print(" total = " SIZE_FORMAT " K, used " SIZE_FORMAT " K ", capacity()/ K, used() /K); + st->print(" [" PTR_FORMAT ", " PTR_FORMAT ") ", + p2i(reserved_region().start()), + p2i(reserved_region().end())); st->print("Region size = " SIZE_FORMAT "K ", ShenandoahHeapRegion::RegionSizeBytes / K); if (_concurrent_mark_in_progress) { st->print("marking "); @@ -389,6 +392,15 @@ } st->print("\n"); + // Adapted from VirtualSpace::print_on(), which is non-PRODUCT only + st->print ("Virtual space:"); + if (_storage.special()) st->print(" (pinned in memory)"); + st->cr(); + st->print_cr(" - committed: " SIZE_FORMAT, _storage.committed_size()); + st->print_cr(" - reserved: " SIZE_FORMAT, _storage.reserved_size()); + st->print_cr(" - [low, high]: [" INTPTR_FORMAT ", " INTPTR_FORMAT "]", p2i(_storage.low()), p2i(_storage.high())); + st->print_cr(" - [low_b, high_b]: [" INTPTR_FORMAT ", " INTPTR_FORMAT "]", p2i(_storage.low_boundary()), p2i(_storage.high_boundary())); + if (Verbose) { print_heap_regions(st); } changeset: 9515:29b452a5da29 user: shade date: Wed Jan 11 21:25:04 2017 +0100 summary: Replace VirtualSpace-based pretouch with region-based one. diff -r d1ebf007ae0a -r 29b452a5da29 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 11 18:48:38 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 11 21:25:04 2017 +0100 @@ -74,48 +74,27 @@ class ShenandoahPretouchTask : public AbstractGangTask { private: - char* volatile _cur_addr; - char* const _start_addr; - char* const _end_addr; + ShenandoahHeapRegionSet* _regions; size_t const _page_size; public: - ShenandoahPretouchTask(char* start_address, char* end_address, size_t page_size) : + ShenandoahPretouchTask(ShenandoahHeapRegionSet* regions, size_t page_size) : AbstractGangTask("Shenandoah PreTouch"), - _cur_addr(start_address), - _start_addr(start_address), - _end_addr(end_address), + _regions(regions), _page_size(page_size) { - } + _regions->clear_current_index(); + }; virtual void work(uint worker_id) { - size_t const actual_chunk_size = MAX2(PreTouchParallelChunkSize, _page_size); - while (true) { - char* touch_addr = (char*)Atomic::add_ptr((intptr_t)actual_chunk_size, (volatile void*) &_cur_addr) - actual_chunk_size; - if (touch_addr < _start_addr || touch_addr >= _end_addr) { - break; - } - char* end_addr = touch_addr + MIN2(actual_chunk_size, pointer_delta(_end_addr, touch_addr, sizeof(char))); - os::pretouch_memory(touch_addr, end_addr); + ShenandoahHeapRegion* r = _regions->claim_next(); + while (r != NULL) { + log_trace(gc, heap)("Pretouch region " SIZE_FORMAT ": " PTR_FORMAT " -> " PTR_FORMAT, + r->region_number(), p2i(r->bottom()), p2i(r->end())); + os::pretouch_memory((char*) r->bottom(), (char*) r->end()); + r = _regions->claim_next(); } } }; -void ShenandoahHeap::pretouch_storage(char* start, char* end, WorkGang* workers) { - assert (ShenandoahAlwaysPreTouch, "Sanity"); - assert (!AlwaysPreTouch, "Should have been overridden"); - - size_t size = (size_t)(end - start); - size_t page_size = UseLargePages ? (size_t)os::large_page_size() : (size_t)os::vm_page_size(); - size_t num_chunks = MAX2((size_t)1, size / MAX2(PreTouchParallelChunkSize, page_size)); - uint num_workers = MIN2((uint)num_chunks, workers->active_workers()); - - log_info(gc, heap)("Parallel pretouch with %u workers for " SIZE_FORMAT " work units pre-touching " SIZE_FORMAT " bytes.", - num_workers, num_chunks, size); - - ShenandoahPretouchTask cl(start, end, page_size); - workers->run_task(&cl, num_workers); -} - jint ShenandoahHeap::initialize() { CollectedHeap::pre_initialize(); @@ -139,9 +118,6 @@ set_barrier_set(new ShenandoahBarrierSet(this)); ReservedSpace pgc_rs = heap_rs.first_part(max_byte_size); _storage.initialize(pgc_rs, init_byte_size); - if (ShenandoahAlwaysPreTouch) { - pretouch_storage(_storage.low(), _storage.high(), _workers); - } _num_regions = init_byte_size / ShenandoahHeapRegion::RegionSizeBytes; _max_regions = max_byte_size / ShenandoahHeapRegion::RegionSizeBytes; @@ -240,6 +216,17 @@ ShenandoahMarkCompact::initialize(); + if (ShenandoahAlwaysPreTouch) { + assert (!AlwaysPreTouch, "Should have been overridden"); + + size_t page_size = UseLargePages ? os::large_page_size() : (size_t) os::vm_page_size(); + + log_info(gc, heap)("Parallel pretouch " SIZE_FORMAT " regions with " SIZE_FORMAT " byte pages", + _ordered_regions->count(), page_size); + ShenandoahPretouchTask cl(_ordered_regions, page_size); + _workers->run_task(&cl); + } + return JNI_OK; } diff -r d1ebf007ae0a -r 29b452a5da29 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 11 18:48:38 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 11 21:25:04 2017 +0100 @@ -266,8 +266,6 @@ static address in_cset_fast_test_addr(); static address cancelled_concgc_addr(); - static void pretouch_storage(char* start, char* end, WorkGang* workers); - ShenandoahCollectorPolicy *shenandoahPolicy() { return _shenandoah_policy;} inline ShenandoahHeapRegion* heap_region_containing(const void* addr) const; changeset: 9516:3c243692b79a user: shade date: Thu Jan 12 10:33:43 2017 +0100 summary: Alias ObjArrayFromToTask -> SCMTask. diff -r 29b452a5da29 -r 3c243692b79a src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Jan 11 21:25:04 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Thu Jan 12 10:33:43 2017 +0100 @@ -928,12 +928,12 @@ return; } - ObjArrayFromToTask t; + SCMTask t; for (uint i = 0; i < stride; i++) { if (try_queue(q, t) || try_draining_satb_buffer(q, t) || queues->steal(worker_id, &seed, t)) { - cl->do_object_or_array(t.obj(), t.from(), t.to()); + cl->do_task(&t); } else { if (terminator->offer_termination()) return; } @@ -949,10 +949,10 @@ while (true) { if (heap->cancelled_concgc()) return false; - ObjArrayFromToTask t; + SCMTask t; for (uint i = 0; i < stride; i++) { if (try_queue(q, t)) { - cl->do_object_or_array(t.obj(), t.from(), t.to()); + cl->do_task(&t); } else { assert(q->is_empty(), "Must be empty"); q = queues->claim_next(); @@ -973,11 +973,11 @@ int seed = 17; SCMObjToScanQueueSet* queues = task_queues(); - ObjArrayFromToTask t; + SCMTask t; while (true) { if (try_queue(q, t) || queues->steal(worker_id, &seed, t)) { - cl->do_object_or_array(t.obj(), t.from(), t.to()); + cl->do_task(&t); } else { if (terminator->offer_termination()) return; } diff -r 29b452a5da29 -r 3c243692b79a src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Wed Jan 11 21:25:04 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Thu Jan 12 10:33:43 2017 +0100 @@ -28,7 +28,8 @@ #include "utilities/workgroup.hpp" #include "gc_implementation/shenandoah/shenandoahTaskqueue.hpp" -typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; +typedef ObjArrayFromToTask SCMTask; +typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; typedef Padded SCMObjToScanQueue; class ShenandoahConcurrentMark; @@ -55,8 +56,8 @@ ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp, jushort* live_data); ~ShenandoahMarkObjsClosure(); - inline void do_object_or_array(oop obj, int from, int to); - inline void do_array(objArrayOop array, int from, int to); + inline void do_task(SCMTask* task); + inline void do_chunked_array(objArrayOop array, int from, int to); inline void count_liveness(oop obj); }; @@ -115,12 +116,12 @@ template void final_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* t); - inline bool try_queue(SCMObjToScanQueue* q, ObjArrayFromToTask &task); + inline bool try_queue(SCMObjToScanQueue* q, SCMTask &task); SCMObjToScanQueue* get_queue(uint worker_id); void clear_queue(SCMObjToScanQueue *q); - inline bool try_draining_satb_buffer(SCMObjToScanQueue *q, ObjArrayFromToTask &task); + inline bool try_draining_satb_buffer(SCMObjToScanQueue *q, SCMTask &task); void drain_satb_buffers(uint worker_id, bool remark = false); SCMObjToScanQueueSet* task_queues() { return _task_queues;} diff -r 29b452a5da29 -r 3c243692b79a src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Jan 11 21:25:04 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Thu Jan 12 10:33:43 2017 +0100 @@ -33,7 +33,9 @@ #include "runtime/prefetch.inline.hpp" template -void ShenandoahMarkObjsClosure::do_object_or_array(oop obj, int from, int to) { +void ShenandoahMarkObjsClosure::do_task(SCMTask* task) { + oop obj = task->obj(); + assert(obj != NULL, "expect non-null object"); assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), "expect forwarded obj in queue"); @@ -51,6 +53,7 @@ assert(_heap->is_in(obj), "referenced objects must be in the heap. No?"); assert(_heap->is_marked_next(obj), "only marked objects on task queue"); + int from = task->from(); if (from == -1) { count_liveness(obj); if (obj->is_objArray()) { @@ -61,7 +64,7 @@ if (len > 0) { // Case 1a. Non-empty array. The header would be processed along with the // chunk that starts at offset=0, see ObjArrayKlass::oop_oop_iterate_range. - do_array(array, 0, len); + do_chunked_array(array, 0, len); } else { // Case 1b. Empty array. Only need to care about the header. _mark_refs.do_klass(obj->klass()); @@ -74,7 +77,7 @@ // Case 3: Array chunk, has sensible (from, to) bounds. Process it. assert(obj->is_objArray(), "expect object array"); objArrayOop array = objArrayOop(obj); - do_array(array, from, to); + do_chunked_array(array, from, task->to()); } } @@ -102,7 +105,7 @@ } template -inline void ShenandoahMarkObjsClosure::do_array(objArrayOop array, int from, int to) { +inline void ShenandoahMarkObjsClosure::do_chunked_array(objArrayOop array, int from, int to) { assert (from < to, "sanity"); assert (ObjArrayMarkingStride > 0, "sanity"); @@ -110,7 +113,7 @@ // "stealing" part of the queue, which will seed other workers efficiently. while ((to - from) > (int)ObjArrayMarkingStride) { int mid = from + (to - from) / 2; - bool pushed = _queue->push(ObjArrayFromToTask(array, mid, to)); + bool pushed = _queue->push(SCMTask(array, mid, to)); assert(pushed, "overflow queue should always succeed pushing"); to = mid; } @@ -119,7 +122,7 @@ array->oop_iterate_range(&_mark_refs, from, to); } -inline bool ShenandoahConcurrentMark::try_queue(SCMObjToScanQueue* q, ObjArrayFromToTask &task) { +inline bool ShenandoahConcurrentMark::try_queue(SCMObjToScanQueue* q, SCMTask &task) { return (q->pop_buffer(task) || q->pop_local(task) || q->pop_overflow(task)); @@ -147,7 +150,7 @@ } }; -inline bool ShenandoahConcurrentMark:: try_draining_satb_buffer(SCMObjToScanQueue *q, ObjArrayFromToTask &task) { +inline bool ShenandoahConcurrentMark::try_draining_satb_buffer(SCMObjToScanQueue *q, SCMTask &task) { ShenandoahSATBBufferClosure cl(q); SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); bool had_refs = satb_mq_set.apply_closure_to_completed_buffer(&cl); @@ -180,7 +183,7 @@ || oopDesc::bs()->is_safe(obj), "we don't want to mark objects in from-space"); - bool pushed = q->push(ObjArrayFromToTask(obj, -1, -1)); + bool pushed = q->push(SCMTask(obj, -1, -1)); assert(pushed, "overflow queue should always succeed pushing"); } diff -r 29b452a5da29 -r 3c243692b79a src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp Wed Jan 11 21:25:04 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp Thu Jan 12 10:33:43 2017 +0100 @@ -97,10 +97,9 @@ int _from, _to; }; -typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; +typedef ObjArrayFromToTask SCMTask; +typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; typedef Padded SCMObjToScanQueue; -// typedef GenericTaskQueueSet SCMObjToScanQueueSet; - template class ParallelClaimableQueueSet: public GenericTaskQueueSet { changeset: 9517:c50c9462519a user: shade date: Fri Jan 13 16:52:07 2017 +0100 summary: Cherry-pick the ObjArrayMarkingStride change from JDK-8057003. diff -r 3c243692b79a -r c50c9462519a src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu Jan 12 10:33:43 2017 +0100 +++ b/src/share/vm/runtime/globals.hpp Fri Jan 13 16:52:07 2017 +0100 @@ -2023,7 +2023,7 @@ experimental(uintx, WorkStealingSpinToYieldRatio, 10, \ "Ratio of hard spins to calls to yield") \ \ - develop(uintx, ObjArrayMarkingStride, 512, \ + develop(uintx, ObjArrayMarkingStride, 2048, \ "Number of object array elements to push onto the marking stack " \ "before pushing a continuation entry") \ \ changeset: 9518:42938de9bb9e user: shade date: Fri Jan 13 19:30:39 2017 +0100 summary: Reformat GC stats table. diff -r c50c9462519a -r 42938de9bb9e src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Fri Jan 13 16:52:07 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Fri Jan 13 19:30:39 2017 +0100 @@ -478,77 +478,79 @@ _allocation_failure_gcs = 0; _conc_gc_aborted = false; - _phase_names[init_mark] = "Initial Mark Pauses (net)"; - _phase_names[init_mark_gross] = "Initial Mark Pauses (gross)"; - _phase_names[final_mark] = "Final Mark Pauses (net)"; - _phase_names[final_mark_gross] = "Final Mark Pauses (gross)"; - _phase_names[accumulate_stats] = " Accumulate Stats"; - _phase_names[make_parsable] = " Make Parsable"; - _phase_names[clear_liveness] = " Clear Liveness"; - _phase_names[scan_roots] = " Scan Roots"; - _phase_names[update_roots] = " Update Roots"; - _phase_names[drain_satb] = " Drain SATB"; - _phase_names[weakrefs] = " Weak References"; - _phase_names[class_unloading] = " Class Unloading"; - _phase_names[prepare_evac] = " Prepare Evacuation"; - _phase_names[init_evac] = " Initial Evacuation"; + _phase_names[total_pause] = "Total Pauses (net)"; + _phase_names[total_pause_gross] = "Total Pauses (gross)"; + _phase_names[init_mark] = "Initial Mark Pauses (net)"; + _phase_names[init_mark_gross] = "Initial Mark Pauses (gross)"; + _phase_names[final_mark] = "Final Mark Pauses (net)"; + _phase_names[final_mark_gross] = "Final Mark Pauses (gross)"; + _phase_names[accumulate_stats] = " Accumulate Stats"; + _phase_names[make_parsable] = " Make Parsable"; + _phase_names[clear_liveness] = " Clear Liveness"; + _phase_names[scan_roots] = " Scan Roots"; + _phase_names[update_roots] = " Update Roots"; + _phase_names[drain_satb] = " Drain SATB"; + _phase_names[weakrefs] = " Weak References"; + _phase_names[class_unloading] = " Class Unloading"; + _phase_names[prepare_evac] = " Prepare Evacuation"; + _phase_names[init_evac] = " Initial Evacuation"; - _phase_names[scan_thread_roots] = " Scan Thread Roots"; - _phase_names[scan_code_roots] = " Scan Code Cache Roots"; - _phase_names[scan_string_table_roots] = " Scan String Table Roots"; - _phase_names[scan_universe_roots] = " Scan Universe Roots"; - _phase_names[scan_jni_roots] = " Scan JNI Roots"; - _phase_names[scan_jni_weak_roots] = " Scan JNI Weak Roots"; - _phase_names[scan_synchronizer_roots] = " Scan Synchronizer Roots"; - _phase_names[scan_flat_profiler_roots] = " Scan Flat Profiler Roots"; - _phase_names[scan_management_roots] = " Scan Management Roots"; - _phase_names[scan_system_dictionary_roots] = " Scan System Dictionary Roots"; - _phase_names[scan_cldg_roots] = " Scan CLDG Roots"; - _phase_names[scan_jvmti_roots] = " Scan JVMTI Roots"; + _phase_names[scan_thread_roots] = " S: Thread Roots"; + _phase_names[scan_code_roots] = " S: Code Cache Roots"; + _phase_names[scan_string_table_roots] = " S: String Table Roots"; + _phase_names[scan_universe_roots] = " S: Universe Roots"; + _phase_names[scan_jni_roots] = " S: JNI Roots"; + _phase_names[scan_jni_weak_roots] = " S: JNI Weak Roots"; + _phase_names[scan_synchronizer_roots] = " S: Synchronizer Roots"; + _phase_names[scan_flat_profiler_roots] = " S: Flat Profiler Roots"; + _phase_names[scan_management_roots] = " S: Management Roots"; + _phase_names[scan_system_dictionary_roots] = " S: System Dict Roots"; + _phase_names[scan_cldg_roots] = " S: CLDG Roots"; + _phase_names[scan_jvmti_roots] = " S: JVMTI Roots"; - _phase_names[update_thread_roots] = " Update Thread Roots"; - _phase_names[update_code_roots] = " Update Code Cache Roots"; - _phase_names[update_string_table_roots] = " Update String Table Roots"; - _phase_names[update_universe_roots] = " Update Universe Roots"; - _phase_names[update_jni_roots] = " Update JNI Roots"; - _phase_names[update_jni_weak_roots] = " Update JNI Weak Roots"; - _phase_names[update_synchronizer_roots] = " Update Synchronizer Roots"; - _phase_names[update_flat_profiler_roots] = " Update Flat Profiler Roots"; - _phase_names[update_management_roots] = " Update Management Roots"; - _phase_names[update_system_dictionary_roots] = " Update System Dictionary Roots"; - _phase_names[update_cldg_roots] = " Update CLDG Roots"; - _phase_names[update_jvmti_roots] = " Update JVMTI Roots"; + _phase_names[update_thread_roots] = " U: Thread Roots"; + _phase_names[update_code_roots] = " U: Code Cache Roots"; + _phase_names[update_string_table_roots] = " U: String Table Roots"; + _phase_names[update_universe_roots] = " U: Universe Roots"; + _phase_names[update_jni_roots] = " U: JNI Roots"; + _phase_names[update_jni_weak_roots] = " U: JNI Weak Roots"; + _phase_names[update_synchronizer_roots] = " U: Synchronizer Roots"; + _phase_names[update_flat_profiler_roots] = " U: Flat Profiler Roots"; + _phase_names[update_management_roots] = " U: Management Roots"; + _phase_names[update_system_dictionary_roots] = " U: System Dict Roots"; + _phase_names[update_cldg_roots] = " U: CLDG Roots"; + _phase_names[update_jvmti_roots] = " U: JVMTI Roots"; - _phase_names[evac_thread_roots] = " Evacuate Thread Roots"; - _phase_names[evac_code_roots] = " Evacuate Code Cache Roots"; - _phase_names[evac_string_table_roots] = " Evacuate String Table Roots"; - _phase_names[evac_universe_roots] = " Evacuate Universe Roots"; - _phase_names[evac_jni_roots] = " Evacuate JNI Roots"; - _phase_names[evac_jni_weak_roots] = " Evacuate JNI Weak Roots"; - _phase_names[evac_synchronizer_roots] = " Evacuate Synchronizer Roots"; - _phase_names[evac_flat_profiler_roots] = " Evacuate Flat Profiler Roots"; - _phase_names[evac_management_roots] = " Evacuate Management Roots"; - _phase_names[evac_system_dictionary_roots] = " Evacuate System Dictionary Roots"; - _phase_names[evac_cldg_roots] = " Evacuate CLDG Roots"; - _phase_names[evac_jvmti_roots] = " Evacuate JVMTI Roots"; + _phase_names[evac_thread_roots] = " E: Thread Roots"; + _phase_names[evac_code_roots] = " E: Code Cache Roots"; + _phase_names[evac_string_table_roots] = " E: String Table Roots"; + _phase_names[evac_universe_roots] = " E: Universe Roots"; + _phase_names[evac_jni_roots] = " E: JNI Roots"; + _phase_names[evac_jni_weak_roots] = " E: JNI Weak Roots"; + _phase_names[evac_synchronizer_roots] = " E: Synchronizer Roots"; + _phase_names[evac_flat_profiler_roots] = " E: Flat Profiler Roots"; + _phase_names[evac_management_roots] = " E: Management Roots"; + _phase_names[evac_system_dictionary_roots] = " E: System Dict Roots"; + _phase_names[evac_cldg_roots] = " E: CLDG Roots"; + _phase_names[evac_jvmti_roots] = " E: JVMTI Roots"; - _phase_names[recycle_regions] = " Recycle regions"; - _phase_names[reset_bitmaps] = "ResetBitmaps"; - _phase_names[resize_tlabs] = "Resize TLABs"; + _phase_names[recycle_regions] = " Recycle regions"; + _phase_names[reset_bitmaps] = "Reset Bitmaps"; + _phase_names[resize_tlabs] = "Resize TLABs"; - _phase_names[full_gc] = "Full GC Times"; - _phase_names[full_gc_heapdumps] = " Heap Dumps"; - _phase_names[full_gc_prepare] = " Prepare"; - _phase_names[full_gc_mark] = " Mark"; - _phase_names[full_gc_mark_drain_queues] = " Drain Queues"; - _phase_names[full_gc_mark_weakrefs] = " Weak References"; - _phase_names[full_gc_mark_class_unloading] = " Class Unloading"; - _phase_names[full_gc_calculate_addresses] = " Calculate Addresses"; - _phase_names[full_gc_adjust_pointers] = " Adjust Pointers"; - _phase_names[full_gc_copy_objects] = " Copy Objects"; + _phase_names[full_gc] = "Full GC"; + _phase_names[full_gc_heapdumps] = " Heap Dumps"; + _phase_names[full_gc_prepare] = " Prepare"; + _phase_names[full_gc_mark] = " Mark"; + _phase_names[full_gc_mark_drain_queues] = " Drain Queues"; + _phase_names[full_gc_mark_weakrefs] = " Weak References"; + _phase_names[full_gc_mark_class_unloading] = " Class Unloading"; + _phase_names[full_gc_calculate_addresses] = " Calculate Addresses"; + _phase_names[full_gc_adjust_pointers] = " Adjust Pointers"; + _phase_names[full_gc_copy_objects] = " Copy Objects"; - _phase_names[conc_mark] = "Concurrent Marking Times"; - _phase_names[conc_evac] = "Concurrent Evacuation Times"; + _phase_names[conc_mark] = "Concurrent Marking"; + _phase_names[conc_evac] = "Concurrent Evacuation"; if (ShenandoahGCHeuristics != NULL) { if (strcmp(ShenandoahGCHeuristics, "aggressive") == 0) { @@ -674,43 +676,35 @@ } void ShenandoahCollectorPolicy::print_tracing_info(outputStream* out) { + out->cr(); + out->print_cr("GC STATISTICS:"); + out->print_cr(" \"gross\" pauses include time to safepoint. \"net\" pauses are times spent in GC."); + out->print_cr(" \"a\" is average time for each phase, look at levels to see if average makes sense."); + out->print_cr(" \"lvls\" are 20%% step quantiles, the last level is 100%%, i.e. maximum."); + out->cr(); + for (uint i = 0; i < _num_phases; i++) { if (_timing_data[i]._secs.maximum() != 0) { print_summary_sd(out, _phase_names[i], &(_timing_data[i]._secs)); } } - out->print_cr("User requested GCs: "SIZE_FORMAT, _user_requested_gcs); - out->print_cr("Allocation failure GCs: "SIZE_FORMAT, _allocation_failure_gcs); - out->print_cr("Successful concurrent markings: "SIZE_FORMAT, _successful_cm); - out->print_cr("Degenerated concurrent markings: "SIZE_FORMAT, _degenerated_cm); - out->print_cr(" "); - double total_sum = _timing_data[init_mark_gross]._secs.sum() + - _timing_data[final_mark_gross]._secs.sum(); - double total_avg = (_timing_data[init_mark_gross]._secs.avg() + - _timing_data[final_mark_gross]._secs.avg()) / 2.0; - double total_max = MAX2(_timing_data[init_mark_gross]._secs.maximum(), - _timing_data[final_mark_gross]._secs.maximum()); - - out->print_cr("%-27s = %8.2lf s, avg = %8.2lf ms, max = %8.2lf ms", - "Total", total_sum, total_avg * 1000.0, total_max * 1000.0); - + out->cr(); + out->print_cr("" SIZE_FORMAT " allocation failure and " SIZE_FORMAT " user requested GCs", _allocation_failure_gcs, _user_requested_gcs); + out->print_cr("" SIZE_FORMAT " successful and " SIZE_FORMAT " degenerated concurrent markings", _successful_cm, _degenerated_cm); + out->cr(); } void ShenandoahCollectorPolicy::print_summary_sd(outputStream* out, const char* str, const HdrSeq* seq) { - out->print("%-34s = %8.2lf s (avg = %8.0lf us)", - str, seq->sum(), seq->avg() * 1000000.0); - out->print_cr(" (num = "INT32_FORMAT_W(5)", lvls (10%% step, us) = %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf, max = %8.0lf)", + out->print_cr("%-27s = %8.2lf s (a = %8.0lf us) (n = "INT32_FORMAT_W(5)") (lvls, us = %8.0lf, %8.0lf, %8.0lf, %8.0lf, %8.0lf)", + str, + seq->sum(), + seq->avg() * 1000000.0, seq->num(), - seq->percentile(10) * 1000000.0, seq->percentile(20) * 1000000.0, - seq->percentile(30) * 1000000.0, seq->percentile(40) * 1000000.0, - seq->percentile(50) * 1000000.0, seq->percentile(60) * 1000000.0, - seq->percentile(70) * 1000000.0, seq->percentile(80) * 1000000.0, - seq->percentile(90) * 1000000.0, seq->maximum() * 1000000.0 ); } diff -r c50c9462519a -r 42938de9bb9e src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Fri Jan 13 16:52:07 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Fri Jan 13 19:30:39 2017 +0100 @@ -41,6 +41,9 @@ public: enum TimingPhase { + total_pause_gross, + total_pause, + init_mark_gross, init_mark, accumulate_stats, diff -r c50c9462519a -r 42938de9bb9e src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Fri Jan 13 16:52:07 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Fri Jan 13 19:30:39 2017 +0100 @@ -108,9 +108,11 @@ { TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); VM_ShenandoahInitMark initMark; + heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause_gross); heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::init_mark_gross); VMThread::execute(&initMark); heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_mark_gross); + heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::total_pause_gross); } if (check_cancellation()) return; @@ -143,9 +145,11 @@ { TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); VM_ShenandoahStartEvacuation finishMark; + heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause_gross); heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::final_mark_gross); VMThread::execute(&finishMark); heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::final_mark_gross); + heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::total_pause_gross); } if (check_cancellation()) return; diff -r c50c9462519a -r 42938de9bb9e src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Fri Jan 13 16:52:07 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Fri Jan 13 19:30:39 2017 +0100 @@ -39,6 +39,7 @@ void VM_ShenandoahInitMark::doit() { ShenandoahHeap *sh = (ShenandoahHeap*) Universe::heap(); GCTraceTime time("Pause Init-Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); + sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::init_mark); assert(sh->is_next_bitmap_clear(), "need clear marking bitmap"); @@ -51,6 +52,7 @@ } sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_mark); + sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::total_pause); } @@ -105,10 +107,12 @@ ShenandoahHeap *sh = ShenandoahHeap::heap(); if (! sh->cancelled_concgc()) { GCTraceTime time("Pause Final Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); + sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::final_mark); sh->concurrentMark()->finish_mark_from_roots(); sh->stop_concurrent_marking(); sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::final_mark); + sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::total_pause); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::prepare_evac); sh->prepare_for_concurrent_evacuation(); changeset: 9519:5cc2468e8c44 user: rkennke date: Mon Jan 16 10:33:49 2017 +0100 summary: Fix (over) optimization for cmp-objects. diff -r 42938de9bb9e -r 5cc2468e8c44 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Fri Jan 13 19:30:39 2017 +0100 +++ b/src/share/vm/opto/graphKit.cpp Mon Jan 16 10:33:49 2017 +0100 @@ -4360,10 +4360,7 @@ // We know one arg is gonna be null. No need for barriers. return _gvn.transform(new (C) CmpPNode(b, a)); } - if (AllocateNode::Ideal_allocation(a, &_gvn) != NULL || AllocateNode::Ideal_allocation(b, &_gvn) != NULL) { - // We know one arg is already in to-space. No need for barriers. - return _gvn.transform(new (C) CmpPNode(b, a)); - } + const TypePtr* a_adr_type = ShenandoahBarrierNode::brooks_pointer_type(a_type); const TypePtr* b_adr_type = ShenandoahBarrierNode::brooks_pointer_type(b_type); if ((! ShenandoahBarrierNode::needs_barrier(&_gvn, NULL, a, memory(a_adr_type), false)) && diff -r 42938de9bb9e -r 5cc2468e8c44 src/share/vm/opto/subnode.cpp --- a/src/share/vm/opto/subnode.cpp Fri Jan 13 19:30:39 2017 +0100 +++ b/src/share/vm/opto/subnode.cpp Mon Jan 16 10:33:49 2017 +0100 @@ -1025,12 +1025,10 @@ if (UseShenandoahGC) { Node* in1 = in(1); Node* in2 = in(2); - if (in1->bottom_type() == TypePtr::NULL_PTR || - AllocateNode::Ideal_allocation(in1, phase) != NULL) { + if (in1->bottom_type() == TypePtr::NULL_PTR) { in2 = ShenandoahBarrierNode::skip_through_barrier(in2); } - if (in2->bottom_type() == TypePtr::NULL_PTR || - AllocateNode::Ideal_allocation(in2, phase) != NULL) { + if (in2->bottom_type() == TypePtr::NULL_PTR) { in1 = ShenandoahBarrierNode::skip_through_barrier(in1); } PhaseIterGVN* igvn = phase->is_IterGVN(); changeset: 9520:5222806b5846 user: shade date: Mon Jan 16 17:31:26 2017 +0100 summary: Optimize object/array marking with bit-stealing task encoding. diff -r 5cc2468e8c44 -r 5222806b5846 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Mon Jan 16 10:33:49 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Mon Jan 16 17:31:26 2017 +0100 @@ -28,7 +28,7 @@ #include "utilities/workgroup.hpp" #include "gc_implementation/shenandoah/shenandoahTaskqueue.hpp" -typedef ObjArrayFromToTask SCMTask; +typedef ObjArrayChunkedTask SCMTask; typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; typedef Padded SCMObjToScanQueue; @@ -57,7 +57,8 @@ ~ShenandoahMarkObjsClosure(); inline void do_task(SCMTask* task); - inline void do_chunked_array(objArrayOop array, int from, int to); + inline void do_chunked_array_start(objArrayOop array, int len); + inline void do_chunked_array(objArrayOop array, int chunk, int pow); inline void count_liveness(oop obj); }; diff -r 5cc2468e8c44 -r 5222806b5846 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Mon Jan 16 10:33:49 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Mon Jan 16 17:31:26 2017 +0100 @@ -53,31 +53,31 @@ assert(_heap->is_in(obj), "referenced objects must be in the heap. No?"); assert(_heap->is_marked_next(obj), "only marked objects on task queue"); - int from = task->from(); - if (from == -1) { + if (task->is_not_chunked()) { count_liveness(obj); - if (obj->is_objArray()) { - // Case 1: Array instance and no task bounds set. Must be the first time + if (!obj->is_objArray()) { + // Case 1: Normal oop, process as usual. + obj->oop_iterate(&_mark_refs); + } else { + // Case 2: Array instance and no chunk is set. Must be the first time // we visit it. objArrayOop array = objArrayOop(obj); int len = array->length(); if (len > 0) { - // Case 1a. Non-empty array. The header would be processed along with the + // Case 2a. Non-empty array. The header would be processed along with the // chunk that starts at offset=0, see ObjArrayKlass::oop_oop_iterate_range. - do_chunked_array(array, 0, len); + do_chunked_array_start(array, len); } else { - // Case 1b. Empty array. Only need to care about the header. + // Case 2b. Empty array. Only need to care about the header. _mark_refs.do_klass(obj->klass()); } - } else { - // Case 2: Normal oop, process as usual. - obj->oop_iterate(&_mark_refs); } } else { - // Case 3: Array chunk, has sensible (from, to) bounds. Process it. + // Case 3: Array chunk, has sensible chunk id. Process it. + int chunk = task->chunk(); assert(obj->is_objArray(), "expect object array"); objArrayOop array = objArrayOop(obj); - do_chunked_array(array, from, task->to()); + do_chunked_array(array, chunk, task->pow()); } } @@ -105,20 +105,87 @@ } template -inline void ShenandoahMarkObjsClosure::do_chunked_array(objArrayOop array, int from, int to) { - assert (from < to, "sanity"); +inline void ShenandoahMarkObjsClosure::do_chunked_array_start(objArrayOop array, int len) { + if (len <= (int) ObjArrayMarkingStride*2) { + // A few slices only, process directly + array->oop_iterate_range(&_mark_refs, 0, len); + } else { + int bits = log2_long(len); + // Compensate for non-power-of-two arrays, cover the array in excess: + if (len != (1 << bits)) bits++; + + // Only allow full chunks on the queue. This frees do_chunked_array() from checking from/to + // boundaries against array->length(), touching the array header on every chunk. + // + // To do this, we cut the prefix in full-sized chunks, and submit them on the queue. + // If the array is not divided in chunk sizes, then there would be an irregular tail, + // which we will process separately. + + int last_idx = 0; + + int chunk = 1; + int pow = bits; + + // Handle overflow + if (pow >= 31) { + assert (pow == 31, "sanity"); + pow--; + chunk = 2; + last_idx = (1 << pow); + bool pushed = _queue->push(SCMTask(array, 1, pow)); + assert(pushed, "overflow queue should always succeed pushing"); + } + + // Split out tasks, as suggested in ObjArrayChunkedTask docs. Record the last + // successful right boundary to figure out the irregular tail. + while ((1 << pow) > (int)ObjArrayMarkingStride && + (chunk*2 < SCMTask::chunk_size)) { + pow--; + int left_chunk = chunk*2 - 1; + int right_chunk = chunk*2; + int left_chunk_end = left_chunk * (1 << pow); + if (left_chunk_end < len) { + bool pushed = _queue->push(SCMTask(array, left_chunk, pow)); + assert(pushed, "overflow queue should always succeed pushing"); + chunk = right_chunk; + last_idx = left_chunk_end; + } else { + chunk = left_chunk; + } + } + + // Process the irregular tail, if present + int from = last_idx; + if (from < len) { + array->oop_iterate_range(&_mark_refs, from, len); + } + } +} + +template +inline void ShenandoahMarkObjsClosure::do_chunked_array(objArrayOop array, int chunk, int pow) { assert (ObjArrayMarkingStride > 0, "sanity"); - // Fork out tasks until we hit the leaf task. Larger tasks would go to the - // "stealing" part of the queue, which will seed other workers efficiently. - while ((to - from) > (int)ObjArrayMarkingStride) { - int mid = from + (to - from) / 2; - bool pushed = _queue->push(SCMTask(array, mid, to)); + // Split out tasks, as suggested in ObjArrayChunkedTask docs. Avoid pushing tasks that + // are known to start beyond the array. + while ((1 << pow) > (int)ObjArrayMarkingStride && (chunk*2 < SCMTask::chunk_size)) { + pow--; + chunk *= 2; + bool pushed = _queue->push(SCMTask(array, chunk - 1, pow)); assert(pushed, "overflow queue should always succeed pushing"); - to = mid; } - // Execute the leaf task + int chunk_size = 1 << pow; + + int from = (chunk - 1) * chunk_size; + int to = chunk * chunk_size; + +#ifdef ASSERT + int len = array->length(); + assert (0 <= from && from < len, err_msg("from is sane: %d/%d", from, len)); + assert (0 < to && to <= len, err_msg("to is sane: %d/%d", to, len)); +#endif + array->oop_iterate_range(&_mark_refs, from, to); } @@ -183,7 +250,7 @@ || oopDesc::bs()->is_safe(obj), "we don't want to mark objects in from-space"); - bool pushed = q->push(SCMTask(obj, -1, -1)); + bool pushed = q->push(SCMTask(obj)); assert(pushed, "overflow queue should always succeed pushing"); } diff -r 5cc2468e8c44 -r 5222806b5846 src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Mon Jan 16 10:33:49 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Mon Jan 16 17:31:26 2017 +0100 @@ -26,7 +26,7 @@ #include "gc_implementation/shenandoah/shenandoahTaskqueue.hpp" -typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; +typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; typedef Padded SCMObjToScanQueue; class ShenandoahHeap; diff -r 5cc2468e8c44 -r 5222806b5846 src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp Mon Jan 16 10:33:49 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.hpp Mon Jan 16 17:31:26 2017 +0100 @@ -61,43 +61,174 @@ E _elem; }; -class ObjArrayFromToTask +// ObjArrayChunkedTask +// +// Encodes both regular oops, and the array oops plus chunking data for parallel array processing. +// The design goal is to make the regular oop ops very fast, because that would be the prevailing +// case. On the other hand, it should not block parallel array processing from efficiently dividing +// the array work. +// +// The idea is to steal the bits from the 64-bit oop to encode array data, if needed. For the +// proper divide-and-conquer strategies, we want to encode the "blocking" data. It turns out, the +// most efficient way to do this is to encode the array block as (chunk * 2^pow), where it is assumed +// that the block has the size of 2^pow. This requires for pow to have only 5 bits (2^32) to encode +// all possible arrays. +// +// |---------oop---------|-pow-|--chunk---| +// 0 49 54 64 +// +// By definition, chunk == 0 means "no chunk", i.e. chunking starts from 1. +// +// This encoding gives a few interesting benefits: +// +// a) Encoding/decoding regular oops is very simple, because the upper bits are zero in that task: +// +// |---------oop---------|00000|0000000000| // no chunk data +// +// This helps the most ubiquitous path. The initialization amounts to putting the oop into the word +// with zero padding. Testing for "chunkedness" is testing for zero with chunk mask. +// +// b) Splitting tasks for divide-and-conquer is possible. Suppose we have chunk that covers +// interval [ (C-1)*2^P; C*2^P ). We can then split it into two chunks: +// <2*C - 1, P-1>, that covers interval [ (2*C - 2)*2^(P-1); (2*C - 1)*2^(P-1) ) +// <2*C, P-1>, that covers interval [ (2*C - 1)*2^(P-1); 2*C*2^(P-1) ) +// +// Observe that the union of these two intervals is: +// [ (2*C - 2)*2^(P-1); 2*C*2^(P-1) ) +// +// ...which is the original interval: +// [ (C-1)*2^P; C*2^P ) +// +// c) The divide-and-conquer strategy could even start with chunk <1, round-log2-len(arr)>, and split +// down in the parallel threads, which alleviates the upfront (serial) splitting costs. +// +// Encoding limitations caused by current bitscales mean: +// 10 bits for chunk: max 1024 blocks per array +// 5 bits for power: max 2^32 array +// 49 bits for oop: max 512 TB of addressable space +// +// Stealing bits from oop trims down the addressable space. Stealing too few bits for chunk ID limits +// potential parallelism. Stealing too few bits for pow limits the maximum array size that can be handled. +// In future, these might be rebalanced to favor one degree of freedom against another. For example, +// if/when Arrays 2.0 bring 2^64-sized arrays, we might need to steal another bit for power. We could regain +// some bits back if chunks are counted in ObjArrayMarkingStride units. +// +// There is also a fallback version that uses plain fields, when we don't have enough space to steal the +// bits from the native pointer. It is useful to debug the _LP64 version. +// +#ifdef _LP64 +class ObjArrayChunkedTask { public: - ObjArrayFromToTask(oop o = NULL, int from = 0, int to = 0): _obj(o), _from(from), _to(to) { } - ObjArrayFromToTask(oop o, size_t from, size_t to): _obj(o), _from(int(from)), _to(int(to)) { - assert(from <= size_t(max_jint), "too big"); - assert(to <= size_t(max_jint), "too big"); - assert(from < to, "sanity"); + enum { + chunk_bits = 10, + pow_bits = 5, + oop_bits = sizeof(uintptr_t)*8 - chunk_bits - pow_bits, + }; + enum { + chunk_size = nth_bit(chunk_bits), + pow_size = nth_bit(pow_bits), + oop_size = nth_bit(oop_bits), + }; + enum { + oop_shift = 0, + pow_shift = oop_shift + oop_bits, + chunk_shift = pow_shift + pow_bits, + }; + enum { + oop_mask = right_n_bits(oop_bits), + pow_mask = right_n_bits(pow_bits), + chunk_mask = right_n_bits(chunk_bits), + chunk_mask_unshift = ~right_n_bits(oop_bits + pow_bits), + }; + +public: + ObjArrayChunkedTask(oop o = NULL) { + _obj = ((uintptr_t)(void*) o) << oop_shift; } - ObjArrayFromToTask(const ObjArrayFromToTask& t): _obj(t._obj), _from(t._from), _to(t._to) { } + ObjArrayChunkedTask(oop o, int chunk, int mult) { + assert(0 <= chunk && chunk < chunk_size, err_msg("chunk is sane: %d", chunk)); + assert(0 <= mult && mult < pow_size, err_msg("pow is sane: %d", mult)); + uintptr_t t_b = ((uintptr_t) chunk) << chunk_shift; + uintptr_t t_m = ((uintptr_t) mult) << pow_shift; + uintptr_t obj = (uintptr_t)(void*)o; + assert(obj < oop_size, err_msg("obj ref is sane: " PTR_FORMAT, obj)); + intptr_t t_o = obj << oop_shift; + _obj = t_o | t_m | t_b; + } + ObjArrayChunkedTask(const ObjArrayChunkedTask& t): _obj(t._obj) { } - ObjArrayFromToTask& operator =(const ObjArrayFromToTask& t) { + ObjArrayChunkedTask& operator =(const ObjArrayChunkedTask& t) { _obj = t._obj; - _from = t._from; - _to = t._to; return *this; } - volatile ObjArrayFromToTask& - operator =(const volatile ObjArrayFromToTask& t) volatile { + volatile ObjArrayChunkedTask& + operator =(const volatile ObjArrayChunkedTask& t) volatile { + (void)const_cast(_obj = t._obj); + return *this; + } + + inline oop obj() const { return (oop) reinterpret_cast((_obj >> oop_shift) & oop_mask); } + inline int chunk() const { return (int) (_obj >> chunk_shift) & chunk_mask; } + inline int pow() const { return (int) ((_obj >> pow_shift) & pow_mask); } + inline bool is_not_chunked() const { return (_obj & chunk_mask_unshift) == 0; } + + DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid. + +private: + uintptr_t _obj; +}; +#else +class ObjArrayChunkedTask +{ +public: + enum { + chunk_bits = 10, + pow_bits = 5, + }; + enum { + chunk_size = nth_bit(chunk_bits), + pow_size = nth_bit(pow_bits), + }; +public: + ObjArrayChunkedTask(oop o = NULL, int chunk = 0, int pow = 0): _obj(o) { + assert(0 <= chunk && chunk < chunk_size, "chunk is sane: %d", chunk); + assert(0 <= pow && pow < pow_size, "pow is sane: %d", pow); + _chunk = chunk; + _pow = pow; + } + ObjArrayChunkedTask(const ObjArrayChunkedTask& t): _obj(t._obj), _chunk(t._chunk), _pow(t._pow) { } + + ObjArrayChunkedTask& operator =(const ObjArrayChunkedTask& t) { + _obj = t._obj; + _chunk = t._chunk; + _pow = t._pow; + return *this; + } + volatile ObjArrayChunkedTask& + operator =(const volatile ObjArrayChunkedTask& t) volatile { (void)const_cast(_obj = t._obj); - _from = t._from; - _to = t._to; + _chunk = t._chunk; + _pow = t._pow; return *this; } inline oop obj() const { return _obj; } - inline int from() const { return _from; } - inline int to() const { return _to; } + inline int chunk() const { return _chunk; } + inline int pow() const { return _pow; } + + inline bool is_not_chunked() const { return _chunk == 0; } DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid. private: oop _obj; - int _from, _to; + int _chunk; + int _pow; }; +#endif -typedef ObjArrayFromToTask SCMTask; +typedef ObjArrayChunkedTask SCMTask; typedef BufferedOverflowTaskQueue ShenandoahBufferedOverflowTaskQueue; typedef Padded SCMObjToScanQueue; diff -r 5cc2468e8c44 -r 5222806b5846 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Mon Jan 16 10:33:49 2017 +0100 +++ b/src/share/vm/runtime/arguments.cpp Mon Jan 16 17:31:26 2017 +0100 @@ -1723,6 +1723,13 @@ UNSUPPORTED_OPTION(UseShenandoahGC); #endif + if (MaxHeapSize >= ObjArrayChunkedTask::oop_size) { + jio_fprintf(defaultStream::error_stream(), + "Shenandoah GC cannot address more than " SIZE_FORMAT " bytes, and " SIZE_FORMAT " bytes heap requested.", + ObjArrayChunkedTask::oop_size, MaxHeapSize); + vm_exit(1); + } + FLAG_SET_DEFAULT(ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads()); changeset: 9521:47184cae7585 user: shade date: Mon Jan 16 19:31:06 2017 +0100 summary: GC stats table should report minimum and median. diff -r 5222806b5846 -r 47184cae7585 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Mon Jan 16 17:31:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Mon Jan 16 19:31:06 2017 +0100 @@ -680,7 +680,7 @@ out->print_cr("GC STATISTICS:"); out->print_cr(" \"gross\" pauses include time to safepoint. \"net\" pauses are times spent in GC."); out->print_cr(" \"a\" is average time for each phase, look at levels to see if average makes sense."); - out->print_cr(" \"lvls\" are 20%% step quantiles, the last level is 100%%, i.e. maximum."); + out->print_cr(" \"lvls\" are quantiles: 0%% (minimum), 25%%, 50%% (median), 75%%, 100%% (maximum)."); out->cr(); for (uint i = 0; i < _num_phases; i++) { @@ -701,10 +701,10 @@ seq->sum(), seq->avg() * 1000000.0, seq->num(), - seq->percentile(20) * 1000000.0, - seq->percentile(40) * 1000000.0, - seq->percentile(60) * 1000000.0, - seq->percentile(80) * 1000000.0, + seq->percentile(0) * 1000000.0, + seq->percentile(25) * 1000000.0, + seq->percentile(50) * 1000000.0, + seq->percentile(75) * 1000000.0, seq->maximum() * 1000000.0 ); } diff -r 5222806b5846 -r 47184cae7585 src/share/vm/utilities/numberSeq.cpp --- a/src/share/vm/utilities/numberSeq.cpp Mon Jan 16 17:31:26 2017 +0100 +++ b/src/share/vm/utilities/numberSeq.cpp Mon Jan 16 19:31:06 2017 +0100 @@ -338,7 +338,8 @@ } double HdrSeq::percentile(double level) const { - int target = (int) (level * num() / 100); + // target should be non-zero to find the first sample + int target = MAX2(1, (int) (level * num() / 100)); int cnt = 0; for (int mag = 0; mag < MagBuckets; mag++) { if (_hdr[mag] != NULL) { changeset: 9522:3d74d2afb6a5 user: shade date: Tue Jan 24 10:58:59 2017 +0100 summary: Avoid touching metadata if class unloading is not requested. diff -r 47184cae7585 -r 3d74d2afb6a5 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Jan 16 19:31:06 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Tue Jan 24 10:58:59 2017 +0100 @@ -131,14 +131,7 @@ } } -ShenandoahMarkUpdateRefsClosure::ShenandoahMarkUpdateRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : - MetadataAwareOopClosure(rp), - _queue(q), - _heap((ShenandoahHeap*) Universe::heap()) -{ -} - -ShenandoahMarkRefsClosure::ShenandoahMarkRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : +ShenandoahMarkRefsSuperClosure::ShenandoahMarkRefsSuperClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : MetadataAwareOopClosure(rp), _queue(q), _heap((ShenandoahHeap*) Universe::heap()) @@ -227,11 +220,21 @@ } } if (_update_refs) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); + if (_cm->unload_classes()) { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); + } } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); + if (_cm->unload_classes()) { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); + } } } }; @@ -242,10 +245,11 @@ ParallelTaskTerminator* _terminator; bool _update_refs; bool _count_live; + bool _unload_classes; public: - SCMFinalMarkingTask(ShenandoahConcurrentMark* cm, ParallelTaskTerminator* terminator, bool update_refs, bool count_live) : - AbstractGangTask("Shenandoah Final Marking"), _cm(cm), _terminator(terminator), _update_refs(update_refs), _count_live(count_live) { + SCMFinalMarkingTask(ShenandoahConcurrentMark* cm, ParallelTaskTerminator* terminator, bool update_refs, bool count_live, bool unload_classes) : + AbstractGangTask("Shenandoah Final Marking"), _cm(cm), _terminator(terminator), _update_refs(update_refs), _count_live(count_live), _unload_classes(unload_classes) { } void work(uint worker_id) { @@ -268,19 +272,39 @@ // Templates need constexprs, so we have to switch by the flags ourselves. if (_update_refs) { if (_count_live) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); + if (_unload_classes) { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); + if (_unload_classes) { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } } } else { if (_count_live) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); + if (_unload_classes) { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); + if (_unload_classes) { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + _cm->final_mark_loop(&cl, worker_id, q, _terminator); + } } } @@ -467,12 +491,12 @@ SharedHeap::StrongRootsScope scope(sh, true); if (UseShenandoahOWST) { ShenandoahTaskTerminator terminator(nworkers, task_queues()); - SCMFinalMarkingTask markingTask = SCMFinalMarkingTask(this, &terminator, sh->need_update_refs(), count_live); - sh->workers()->run_task(&markingTask); + SCMFinalMarkingTask task(this, &terminator, sh->need_update_refs(), count_live, unload_classes()); + sh->workers()->run_task(&task); } else { ParallelTaskTerminator terminator(nworkers, task_queues()); - SCMFinalMarkingTask markingTask = SCMFinalMarkingTask(this, &terminator, sh->need_update_refs(), count_live); - sh->workers()->run_task(&markingTask); + SCMFinalMarkingTask task(this, &terminator, sh->need_update_refs(), count_live, unload_classes()); + sh->workers()->run_task(&task); } policy->record_phase_end(full_gc ? ShenandoahCollectorPolicy::full_gc_mark_drain_queues : @@ -648,11 +672,21 @@ SCMObjToScanQueue* q = scm->get_queue(_worker_id); jushort* live_data = scm->get_liveness(_worker_id); if (sh->need_update_refs()) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - scm->final_mark_loop(&cl, _worker_id, q, _terminator); + if (scm->unload_classes()) { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + scm->final_mark_loop(&cl, _worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + scm->final_mark_loop(&cl, _worker_id, q, _terminator); + } } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - scm->final_mark_loop(&cl, _worker_id, q, _terminator); + if (scm->unload_classes()) { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + scm->final_mark_loop(&cl, _worker_id, q, _terminator); + } else { + ShenandoahMarkObjsClosure cl(q, rp, live_data); + scm->final_mark_loop(&cl, _worker_id, q, _terminator); + } } } }; diff -r 47184cae7585 -r 3d74d2afb6a5 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Mon Jan 16 19:31:06 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Tue Jan 24 10:58:59 2017 +0100 @@ -57,8 +57,8 @@ ~ShenandoahMarkObjsClosure(); inline void do_task(SCMTask* task); - inline void do_chunked_array_start(objArrayOop array, int len); - inline void do_chunked_array(objArrayOop array, int chunk, int pow); + inline void do_chunked_array_start(oop array); + inline void do_chunked_array(oop array, int chunk, int pow); inline void count_liveness(oop obj); }; diff -r 47184cae7585 -r 3d74d2afb6a5 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Mon Jan 16 19:31:06 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Tue Jan 24 10:58:59 2017 +0100 @@ -54,36 +54,23 @@ assert(_heap->is_marked_next(obj), "only marked objects on task queue"); if (task->is_not_chunked()) { - count_liveness(obj); + if (CL) count_liveness(obj); if (!obj->is_objArray()) { // Case 1: Normal oop, process as usual. obj->oop_iterate(&_mark_refs); } else { // Case 2: Array instance and no chunk is set. Must be the first time // we visit it. - objArrayOop array = objArrayOop(obj); - int len = array->length(); - if (len > 0) { - // Case 2a. Non-empty array. The header would be processed along with the - // chunk that starts at offset=0, see ObjArrayKlass::oop_oop_iterate_range. - do_chunked_array_start(array, len); - } else { - // Case 2b. Empty array. Only need to care about the header. - _mark_refs.do_klass(obj->klass()); - } + do_chunked_array_start(obj); } } else { // Case 3: Array chunk, has sensible chunk id. Process it. - int chunk = task->chunk(); - assert(obj->is_objArray(), "expect object array"); - objArrayOop array = objArrayOop(obj); - do_chunked_array(array, chunk, task->pow()); + do_chunked_array(obj, task->chunk(), task->pow()); } } template inline void ShenandoahMarkObjsClosure::count_liveness(oop obj) { - if (!CL) return; // no need to count liveness! uint region_idx = _heap->heap_region_index_containing(obj); jushort cur = _live_data[region_idx]; int size = obj->size() + BrooksPointer::word_size(); @@ -105,7 +92,11 @@ } template -inline void ShenandoahMarkObjsClosure::do_chunked_array_start(objArrayOop array, int len) { +inline void ShenandoahMarkObjsClosure::do_chunked_array_start(oop obj) { + assert(obj->is_objArray(), "expect object array"); + objArrayOop array = objArrayOop(obj); + int len = array->length(); + if (len <= (int) ObjArrayMarkingStride*2) { // A few slices only, process directly array->oop_iterate_range(&_mark_refs, 0, len); @@ -163,7 +154,10 @@ } template -inline void ShenandoahMarkObjsClosure::do_chunked_array(objArrayOop array, int chunk, int pow) { +inline void ShenandoahMarkObjsClosure::do_chunked_array(oop obj, int chunk, int pow) { + assert(obj->is_objArray(), "expect object array"); + objArrayOop array = objArrayOop(obj); + assert (ObjArrayMarkingStride > 0, "sanity"); // Split out tasks, as suggested in ObjArrayChunkedTask docs. Avoid pushing tasks that diff -r 47184cae7585 -r 3d74d2afb6a5 src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Mon Jan 16 19:31:06 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Tue Jan 24 10:58:59 2017 +0100 @@ -31,34 +31,67 @@ class ShenandoahHeap; -class ShenandoahMarkUpdateRefsClosure : public MetadataAwareOopClosure { +class ShenandoahMarkRefsSuperClosure : public MetadataAwareOopClosure { +private: SCMObjToScanQueue* _queue; ShenandoahHeap* _heap; +public: + ShenandoahMarkRefsSuperClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp); + template + void work(T *p); +}; + +class ShenandoahMarkUpdateRefsClosure : public ShenandoahMarkRefsSuperClosure { public: - ShenandoahMarkUpdateRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp); + ShenandoahMarkUpdateRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : + ShenandoahMarkRefsSuperClosure(q, rp) {}; template - void do_oop_nv(T* p); - + inline void do_oop_nv(T* p) { work(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } - virtual void do_oop(oop* p) { do_oop_nv(p); } - + virtual void do_oop(oop* p) { do_oop_nv(p); } + inline bool do_metadata_nv() { return false; } + virtual bool do_metadata() { return false; } }; -class ShenandoahMarkRefsClosure : public MetadataAwareOopClosure { - SCMObjToScanQueue* _queue; - ShenandoahHeap* _heap; - +class ShenandoahMarkUpdateRefsMetadataClosure : public ShenandoahMarkRefsSuperClosure { public: - ShenandoahMarkRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp); + ShenandoahMarkUpdateRefsMetadataClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : + ShenandoahMarkRefsSuperClosure(q, rp) {}; template - void do_oop_nv(T* p); + inline void do_oop_nv(T* p) { work(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + inline bool do_metadata_nv() { return true; } + virtual bool do_metadata() { return true; } +}; +class ShenandoahMarkRefsClosure : public ShenandoahMarkRefsSuperClosure { +public: + ShenandoahMarkRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : + ShenandoahMarkRefsSuperClosure(q, rp) {}; + + template + inline void do_oop_nv(T* p) { work(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } - virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + inline bool do_metadata_nv() { return false; } + virtual bool do_metadata() { return false; } +}; +class ShenandoahMarkRefsMetadataClosure : public ShenandoahMarkRefsSuperClosure { +public: + ShenandoahMarkRefsMetadataClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : + ShenandoahMarkRefsSuperClosure(q, rp) {}; + + template + inline void do_oop_nv(T* p) { work(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + inline bool do_metadata_nv() { return true; } + virtual bool do_metadata() { return true; } }; #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHOOPCLOSURES_HPP diff -r 47184cae7585 -r 3d74d2afb6a5 src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp Mon Jan 16 19:31:06 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp Tue Jan 24 10:58:59 2017 +0100 @@ -27,23 +27,19 @@ #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" -template -inline void ShenandoahMarkUpdateRefsClosure::do_oop_nv(T* p) { - // We piggy-back reference updating to the marking tasks. - oop obj = _heap->maybe_update_oop_ref(p); +template +inline void ShenandoahMarkRefsSuperClosure::work(T *p) { + oop obj; + if (UPDATE_REFS) { + // We piggy-back reference updating to the marking tasks. + obj = _heap->maybe_update_oop_ref(p); + } else { + obj = oopDesc::load_decode_heap_oop(p); + } assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static(obj)), "need to-space object here"); if (! oopDesc::is_null(obj)) { ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); } } -template -inline void ShenandoahMarkRefsClosure::do_oop_nv(T* p) { - oop obj = oopDesc::load_decode_heap_oop(p); - assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static(obj)), "need to-space object here"); - - if (! oopDesc::is_null(obj)) { - ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); - } -} #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHOOPCLOSURES_INLINE_HPP diff -r 47184cae7585 -r 3d74d2afb6a5 src/share/vm/gc_implementation/shenandoah/shenandoah_specialized_oop_closures.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_specialized_oop_closures.hpp Mon Jan 16 19:31:06 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_specialized_oop_closures.hpp Tue Jan 24 10:58:59 2017 +0100 @@ -25,10 +25,14 @@ #define SHARE_VM_GC_SHENANDOAH_SHENANDOAH_SPECIALIZED_OOP_CLOSURES_HPP class ShenandoahMarkUpdateRefsClosure; +class ShenandoahMarkUpdateRefsMetadataClosure; class ShenandoahMarkRefsClosure; +class ShenandoahMarkRefsMetadataClosure; #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_SHENANDOAH(f) \ f(ShenandoahMarkUpdateRefsClosure,_nv) \ - f(ShenandoahMarkRefsClosure,_nv) + f(ShenandoahMarkUpdateRefsMetadataClosure,_nv) \ + f(ShenandoahMarkRefsClosure,_nv) \ + f(ShenandoahMarkRefsMetadataClosure,_nv) #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAH_SPECIALIZED_OOP_CLOSURES_HPP changeset: 9523:6cc8a3870cca user: shade date: Wed Jan 25 11:06:23 2017 +0100 summary: Buffered TQ buffer breaks LIFO. diff -r 3d74d2afb6a5 -r 6cc8a3870cca src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.inline.hpp Tue Jan 24 10:58:59 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahTaskqueue.inline.hpp Wed Jan 25 11:06:23 2017 +0100 @@ -14,8 +14,10 @@ if (_buf_empty) { _elem = t; _buf_empty = false; - return true; } else { - return taskqueue_t::push(t); + bool pushed = taskqueue_t::push(_elem); + assert(pushed, "overflow queue should always succeed pushing"); + _elem = t; } + return true; } changeset: 9524:b0a4436f09f8 user: shade date: Thu Jan 26 19:57:06 2017 +0100 summary: Sorting the regions for collection set takes a while during pause. diff -r 6cc8a3870cca -r b0a4436f09f8 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Jan 25 11:06:23 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Thu Jan 26 19:57:06 2017 +0100 @@ -96,9 +96,24 @@ return cycle % ShenandoahUnloadClassesFrequency == 0; } -private: - static int compare_heap_regions_by_garbage(ShenandoahHeapRegion* a, ShenandoahHeapRegion* b); + virtual bool needs_regions_sorted_by_garbage() { + // Most of them do not. + return false; + } +public: + typedef struct { + size_t region_number; + size_t garbage; + } RegionGarbage; + + static int compare_by_garbage(RegionGarbage a, RegionGarbage b) { + if (a.garbage > b.garbage) + return -1; + else if (b.garbage < a.garbage) + return 1; + else return 0; + } }; ShenandoahHeuristics::ShenandoahHeuristics() : @@ -111,41 +126,23 @@ { } -int ShenandoahHeuristics::compare_heap_regions_by_garbage(ShenandoahHeapRegion* a, ShenandoahHeapRegion* b) { - if (a == NULL) { - if (b == NULL) { - return 0; - } else { - return 1; - } - } else if (b == NULL) { - return -1; - } - - size_t garbage_a = a->garbage(); - size_t garbage_b = b->garbage(); - - if (garbage_a > garbage_b) - return -1; - else if (garbage_a < garbage_b) - return 1; - else return 0; -} - void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collection_set) { - ShenandoahHeapRegionSet* sorted_regions = ShenandoahHeap::heap()->sorted_regions(); - sorted_regions->sort(compare_heap_regions_by_garbage); - start_choose_collection_set(); - size_t i = 0; - size_t end = sorted_regions->active_regions(); ShenandoahHeap* heap = ShenandoahHeap::heap(); - size_t total_garbage = heap->garbage(); + + // Step 1. Build up the region candidates we care about, rejecting losers and accepting winners right away. + + ShenandoahHeapRegionSet* regions = heap->regions(); + size_t end = regions->active_regions(); + + RegionGarbage candidates[end]; + size_t cand_idx = 0; + size_t immediate_garbage = 0; size_t immediate_regions = 0; for (size_t i = 0; i < end; i++) { - ShenandoahHeapRegion* region = sorted_regions->get(i); + ShenandoahHeapRegion* region = regions->get(i); if (! region->is_humongous() && ! region->is_pinned()) { if ((! region->is_empty()) && ! region->has_live()) { @@ -157,12 +154,11 @@ log_develop_trace(gc)("Choose region " SIZE_FORMAT " for immediate reclaim with garbage = " SIZE_FORMAT " and live = " SIZE_FORMAT "\n", region->region_number(), region->garbage(), region->get_live_data_bytes()); - } else if (region_in_collection_set(region, immediate_garbage)) { - log_develop_trace(gc)("Choose region " SIZE_FORMAT " with garbage = " SIZE_FORMAT - " and live = " SIZE_FORMAT "\n", - region->region_number(), region->garbage(), region->get_live_data_bytes()); - collection_set->add_region(region); - region->set_in_collection_set(true); + } else { + // This is our candidate for later consideration. + candidates[cand_idx].region_number = region->region_number(); + candidates[cand_idx].garbage = region->garbage(); + cand_idx++; } } else { assert(region->has_live() || region->is_empty() || region->is_pinned() || region->is_humongous(), "check rejected"); @@ -172,8 +168,28 @@ } } + // Step 2. Process the remanining candidates, if any. + + if (cand_idx > 0) { + if (needs_regions_sorted_by_garbage()) { + QuickSort::sort(candidates, cand_idx, compare_by_garbage, false); + } + + for (size_t i = 0; i < cand_idx; i++) { + ShenandoahHeapRegion *region = regions->get_fast(candidates[i].region_number); + if (region_in_collection_set(region, immediate_garbage)) { + log_develop_trace(gc)("Choose region " SIZE_FORMAT " with garbage = " SIZE_FORMAT + " and live = " SIZE_FORMAT "\n", + region->region_number(), region->garbage(), region->get_live_data_bytes()); + collection_set->add_region(region); + region->set_in_collection_set(true); + } + } + } + end_choose_collection_set(); + size_t total_garbage = heap->garbage(); log_debug(gc)("Total Garbage: "SIZE_FORMAT, total_garbage); log_debug(gc)("Immediate Garbage: "SIZE_FORMAT, immediate_garbage); log_debug(gc)("Immediate Garbage regions: "SIZE_FORMAT, immediate_regions); @@ -430,6 +446,9 @@ } } + virtual bool needs_regions_sorted_by_garbage() { + return true; + } }; class RatioHeuristics : public DynamicHeuristics { @@ -459,6 +478,10 @@ return false; } } + + virtual bool needs_regions_sorted_by_garbage() { + return true; + } }; ShenandoahCollectorPolicy::ShenandoahCollectorPolicy() : diff -r 6cc8a3870cca -r b0a4436f09f8 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Jan 25 11:06:23 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Thu Jan 26 19:57:06 2017 +0100 @@ -125,7 +125,6 @@ size_t regionSizeWords = ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize; assert(init_byte_size == _initialSize, "tautology"); _ordered_regions = new ShenandoahHeapRegionSet(_max_regions); - _sorted_regions = new ShenandoahHeapRegionSet(_max_regions); _collection_set = new ShenandoahCollectionSet(_max_regions); _free_regions = new ShenandoahFreeSet(_max_regions); @@ -162,7 +161,6 @@ regionSizeWords * i, regionSizeWords, i); _free_regions->add_region(current); _ordered_regions->add_region(current); - _sorted_regions->add_region(current); } } assert(((size_t) _ordered_regions->active_regions()) == _num_regions, ""); @@ -1980,7 +1978,6 @@ assert(_ordered_regions->active_regions() == new_region->region_number(), "must match"); _ordered_regions->add_region(new_region); - _sorted_regions->add_region(new_region); _in_cset_fast_test_base[new_region_index] = false; // Not in cset _next_top_at_mark_starts_base[new_region_index] = new_region->bottom(); _complete_top_at_mark_starts_base[new_region_index] = new_region->bottom(); diff -r 6cc8a3870cca -r b0a4436f09f8 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Jan 25 11:06:23 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Thu Jan 26 19:57:06 2017 +0100 @@ -328,7 +328,6 @@ void clear_cancelled_concgc(); ShenandoahHeapRegionSet* regions() { return _ordered_regions;} - ShenandoahHeapRegionSet* sorted_regions() { return _sorted_regions;} ShenandoahFreeSet* free_regions(); void clear_free_regions(); void add_free_region(ShenandoahHeapRegion* r); changeset: 9525:8103fde44729 user: shade date: Fri Jan 27 15:48:46 2017 +0100 summary: Interleave "process references" and "unload classes" to amortize the pause. diff -r b0a4436f09f8 -r 8103fde44729 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Thu Jan 26 19:57:06 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Fri Jan 27 15:48:46 2017 +0100 @@ -92,8 +92,11 @@ virtual bool unload_classes() { if (ShenandoahUnloadClassesFrequency == 0) return false; size_t cycle = ShenandoahHeap::heap()->shenandoahPolicy()->cycle_counter(); - // Process references every Nth GC cycle. - return cycle % ShenandoahUnloadClassesFrequency == 0; + // Unload classes every Nth GC cycle. + // This should not happen in the same cycle as process_references to amortize costs. + // Offsetting by one is enough to break the rendezvous when periods are equal. + // When periods are not equal, offsetting by one is just as good as any other guess. + return (cycle + 1) % ShenandoahUnloadClassesFrequency == 0; } virtual bool needs_regions_sorted_by_garbage() { changeset: 9526:4e0854067efb user: rkennke date: Mon Jan 30 17:33:21 2017 +0100 summary: Fix double-marking. diff -r 8103fde44729 -r 4e0854067efb src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Fri Jan 27 15:48:46 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Mon Jan 30 17:33:21 2017 +0100 @@ -44,6 +44,8 @@ uint _cancelled_cm_cycles_in_a_row; uint _successful_cm_cycles_in_a_row; + size_t _bytes_in_cset; + public: ShenandoahHeuristics(); @@ -72,6 +74,10 @@ _successful_cm_cycles_in_a_row++; } + virtual void record_full_gc() { + _bytes_in_cset = 0; + } + virtual void start_choose_collection_set() { } virtual void end_choose_collection_set() { @@ -124,6 +130,7 @@ _bytes_reclaimed_this_cycle(0), _bytes_allocated_start_CM(0), _bytes_allocated_during_CM(0), + _bytes_in_cset(0), _cancelled_cm_cycles_in_a_row(0), _successful_cm_cycles_in_a_row(0) { @@ -141,6 +148,7 @@ RegionGarbage candidates[end]; size_t cand_idx = 0; + _bytes_in_cset = 0; size_t immediate_garbage = 0; size_t immediate_regions = 0; @@ -186,6 +194,7 @@ region->region_number(), region->garbage(), region->get_live_data_bytes()); collection_set->add_region(region); region->set_in_collection_set(true); + _bytes_in_cset += region->used(); } } } @@ -339,9 +348,10 @@ size_t free_capacity = heap->free_regions()->capacity(); size_t free_used = heap->free_regions()->used(); assert(free_used <= free_capacity, "must use less than capacity"); - size_t available = free_capacity - free_used; - uintx factor = heap->need_update_refs() ? ShenandoahFreeThreshold : ShenandoahInitialFreeThreshold; - size_t targetStartMarking = (capacity * factor) / 100; + size_t cset = MIN2(_bytes_in_cset, (ShenandoahCSetThreshold * capacity) / 100); + size_t available = free_capacity - free_used + cset; + uintx threshold = ShenandoahFreeThreshold + ShenandoahCSetThreshold; + size_t targetStartMarking = (capacity * threshold) / 100; size_t threshold_bytes_allocated = heap->capacity() * ShenandoahAllocationThreshold / 100; if (available < targetStartMarking && @@ -404,8 +414,9 @@ size_t free_capacity = heap->free_regions()->capacity(); size_t free_used = heap->free_regions()->used(); assert(free_used <= free_capacity, "must use less than capacity"); - size_t available = free_capacity - free_used; - uintx factor = _free_threshold; + size_t cset = MIN2(_bytes_in_cset, (ShenandoahCSetThreshold * capacity) / 100); + size_t available = free_capacity - free_used + cset; + uintx factor = _free_threshold + ShenandoahCSetThreshold; size_t targetStartMarking = (capacity * factor) / 100; size_t threshold_bytes_allocated = heap->capacity() * ShenandoahAllocationThreshold / 100; @@ -684,6 +695,10 @@ _heuristics->record_cm_cancelled(); } +void ShenandoahCollectorPolicy::record_full_gc() { + _heuristics->record_full_gc(); +} + void ShenandoahCollectorPolicy::choose_collection_set(ShenandoahCollectionSet* collection_set) { _heuristics->choose_collection_set(collection_set); } diff -r 8103fde44729 -r 4e0854067efb src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Fri Jan 27 15:48:46 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Mon Jan 30 17:33:21 2017 +0100 @@ -185,6 +185,7 @@ void record_cm_cancelled(); void record_cm_success(); void record_cm_degenerated(); + void record_full_gc(); void choose_collection_set(ShenandoahCollectionSet* collection_set); void choose_free_set(ShenandoahFreeSet* free_set); diff -r 8103fde44729 -r 4e0854067efb src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Fri Jan 27 15:48:46 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Mon Jan 30 17:33:21 2017 +0100 @@ -202,6 +202,8 @@ _gc_timer->register_gc_end(); + policy->record_full_gc(); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_heapdumps); _heap->post_full_gc_dump(_gc_timer); policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_heapdumps); diff -r 8103fde44729 -r 4e0854067efb src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Fri Jan 27 15:48:46 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Mon Jan 30 17:33:21 2017 +0100 @@ -103,18 +103,14 @@ "Shenandoah GC dynamic Heuristic mode only (ignored otherwise). " \ "Defaults to 60%.") \ \ - product_rw(uintx, ShenandoahFreeThreshold, 25, \ + product_rw(uintx, ShenandoahFreeThreshold, 10, \ "Set the percentage of free heap at which a GC cycle is started. " \ "Applies to Shenandoah GC dynamic Heuristic mode only " \ - "(ignored otherwise). Defaults to 25%.") \ + "(ignored otherwise). Defaults to 10%.") \ \ - product_rw(uintx, ShenandoahInitialFreeThreshold, 50, \ - "Set the percentage of free heap at which an initial GC cycle " \ - "is started. An initial GC cycle is the first one after VM " \ - "start or after a full GC." \ - "Applies to Shenandoah GC dynamic Heuristic mode only " \ - "(ignored otherwise). Defaults to 50%.") \ - \ + product_rw(uintx, ShenandoahCSetThreshold, 40, \ + "Set the approximate target percentage of the heap for the" \ + "collection set. Defaults to 40%.") \ product_rw(uintx, ShenandoahAllocationThreshold, 0, \ "Set percentage of memory allocated since last GC cycle before " \ "a new GC cycle is started. " \ @@ -124,10 +120,10 @@ experimental(uintx, ShenandoahInitFreeThreshold, 10, \ "Initial remaininig free threshold for adaptive heuristics") \ \ - experimental(uintx, ShenandoahMinFreeThreshold, 3, \ + experimental(uintx, ShenandoahMinFreeThreshold, 5, \ "Minimum remaininig free threshold for adaptive heuristics") \ \ - experimental(uintx, ShenandoahMaxFreeThreshold, 30, \ + experimental(uintx, ShenandoahMaxFreeThreshold, 70, \ "Maximum remaininig free threshold for adaptive heuristics") \ \ experimental(uintx, ShenandoahHappyCyclesThreshold, 5, \ changeset: 9527:16e9455fff12 user: shade date: Tue Jan 31 14:50:04 2017 +0100 summary: Enable ShenandoahConcurrentCodeRoots. diff -r 4e0854067efb -r 16e9455fff12 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Jan 30 17:33:21 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Tue Jan 31 14:50:04 2017 +0100 @@ -161,6 +161,19 @@ CLDToOopClosure cldCl(&mark_cl); MarkingCodeBlobClosure blobsCl(&mark_cl, ! CodeBlobToOopClosure::FixRelocations); + // The rationale for selecting the roots to scan is as follows: + // a. With unload_classes = true, we only want to scan the actual strong roots from the + // code cache. This will allow us to identify the dead classes, unload them, *and* + // invalidate the relevant code cache blobs. This could be only done together with + // class unloading. + // b. With unload_classes = false, we have to nominally retain all the references from code + // cache, because there could be the case of embedded class/oop in the generated code, + // which we will never visit during mark. Without code cache invalidation, as in (a), + // we risk executing that code cache blob, and crashing. + // c. With ShenandoahConcurrentCodeRoots, we avoid scanning the entire code cache here, + // and instead do that in concurrent phase under the relevant lock. This saves init mark + // pause time. + ResourceMark m; if (heap->concurrentMark()->unload_classes()) { _rp->process_strong_roots(&mark_cl, _process_refs ? NULL : &mark_cl, &cldCl, &blobsCl, worker_id); diff -r 4e0854067efb -r 16e9455fff12 src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Mon Jan 30 17:33:21 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Tue Jan 31 14:50:04 2017 +0100 @@ -133,7 +133,7 @@ experimental(uint, ShenandoahMarkLoopStride, 1000, \ "How many items are processed during one marking step") \ \ - experimental(bool, ShenandoahConcurrentCodeRoots, false, \ + experimental(bool, ShenandoahConcurrentCodeRoots, true, \ "Scan code roots concurrently, instead of during a pause") \ \ experimental(bool, ShenandoahNoBarriersForConst, true, \ changeset: 9528:6f7575a34072 user: shade date: Tue Jan 31 20:07:22 2017 +0100 summary: Ensure BitMaps clearing is done with memset. diff -r 16e9455fff12 -r 6f7575a34072 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Jan 31 14:50:04 2017 +0100 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Jan 31 20:07:22 2017 +0100 @@ -207,8 +207,16 @@ mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); assert(!mr.is_empty(), "unexpected empty region"); // convert address range into offset range - _bm.at_put_range(heapWordToOffset(mr.start()), - heapWordToOffset(mr.end()), false); + _bm.clear_range(heapWordToOffset(mr.start()), + heapWordToOffset(mr.end())); +} + +void CMBitMap::clear_range_large(MemRegion mr) { + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); + assert(!mr.is_empty(), "unexpected empty region"); + // convert address range into offset range + _bm.clear_large_range(heapWordToOffset(mr.start()), + heapWordToOffset(mr.end())); } MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, diff -r 16e9455fff12 -r 6f7575a34072 src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Tue Jan 31 14:50:04 2017 +0100 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Tue Jan 31 20:07:22 2017 +0100 @@ -154,6 +154,8 @@ void parMarkRange(MemRegion mr); void clearRange(MemRegion mr); + void clear_range_large(MemRegion mr); + // Starting at the bit corresponding to "addr" (inclusive), find the next // "1" bit, if any. This bit starts some run of consecutive "1"'s; find // the end of this run (stopping at "end_addr"). Return the MemRegion diff -r 16e9455fff12 -r 6f7575a34072 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Tue Jan 31 14:50:04 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Tue Jan 31 20:07:22 2017 +0100 @@ -303,7 +303,7 @@ HeapWord* bottom = region->bottom(); HeapWord* top = heap->next_top_at_mark_start(region->bottom()); if (top > bottom) { - heap->next_mark_bit_map()->clearRange(MemRegion(bottom, top)); + heap->next_mark_bit_map()->clear_range_large(MemRegion(bottom, top)); } region = _regions->claim_next(); } @@ -335,7 +335,7 @@ HeapWord* bottom = region->bottom(); HeapWord* top = heap->complete_top_at_mark_start(region->bottom()); if (top > bottom) { - heap->complete_mark_bit_map()->clearRange(MemRegion(bottom, top)); + heap->complete_mark_bit_map()->clear_range_large(MemRegion(bottom, top)); } region = _regions->claim_next(); } diff -r 16e9455fff12 -r 6f7575a34072 src/share/vm/utilities/bitMap.cpp --- a/src/share/vm/utilities/bitMap.cpp Tue Jan 31 14:50:04 2017 +0100 +++ b/src/share/vm/utilities/bitMap.cpp Tue Jan 31 20:07:22 2017 +0100 @@ -175,8 +175,9 @@ idx_t beg_full_word = word_index_round_up(beg); idx_t end_full_word = word_index(end); - assert(end_full_word - beg_full_word >= 32, - "the range must include at least 32 bytes"); + if (end_full_word - beg_full_word < 32) { + clear_range(beg, end); + } // The range includes at least one full word. clear_range_within_word(beg, bit_index(beg_full_word)); changeset: 9529:c8881cbea37f user: shade date: Wed Feb 01 13:20:33 2017 +0100 summary: Update ShenandoahRegionSampling protocol. diff -r 6f7575a34072 -r c8881cbea37f src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Tue Jan 31 20:07:22 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Feb 01 13:20:33 2017 +0100 @@ -480,6 +480,15 @@ #endif } +class ResetRecentlyAllocated : public ShenandoahHeapRegionClosure { +public: + bool doHeapRegion(ShenandoahHeapRegion* r) { + ShenandoahHeap* sh = ShenandoahHeap::heap(); + r->set_recently_allocated(false); + return false; + } +}; + void ShenandoahConcurrentMark::shared_finish_mark_from_roots(bool full_gc) { assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); @@ -541,11 +550,19 @@ sh->workers()->run_task(&unlink_task, nworkers); ClassLoaderDataGraph::purge(); } + + // Mark finished. All recently allocated regions are not recent anymore. + { + ResetRecentlyAllocated cl; + sh->heap_region_iterate(&cl); + } + policy->record_phase_end(full_gc ? ShenandoahCollectorPolicy::full_gc_mark_class_unloading : ShenandoahCollectorPolicy::class_unloading); assert(task_queues()->is_empty(), "Should be empty"); + } #ifdef ASSERT diff -r 6f7575a34072 -r c8881cbea37f src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Tue Jan 31 20:07:22 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Wed Feb 01 13:20:33 2017 +0100 @@ -63,6 +63,14 @@ _live_data = 0; } +void ShenandoahHeapRegion::set_recently_allocated(bool value) { + _recycled = value; +} + +bool ShenandoahHeapRegion::is_recently_allocated() const { + return _recycled && used() > 0; +} + void ShenandoahHeapRegion::set_live_data(size_t s) { assert(Thread::current()->is_VM_thread(), "by VM thread"); _live_data = (jint) (s / HeapWordSize); @@ -261,6 +269,7 @@ clear_live_data(); _humongous_start = false; _humongous_continuation = false; + _recycled = true; set_in_collection_set(false); // Reset C-TAMS pointer to ensure size-based iteration, everything // in that regions is going to be new objects. @@ -364,7 +373,7 @@ } bool ShenandoahHeapRegion::is_pinned() { - assert(_critical_pins >= 0, "sanity"); - assert(SafepointSynchronize::is_at_safepoint(), "only at safepoints"); - return _critical_pins > 0; + jint v = OrderAccess::load_acquire(&_critical_pins); + assert(v >= 0, "sanity"); + return v > 0; } diff -r 6f7575a34072 -r c8881cbea37f src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp Tue Jan 31 20:07:22 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp Wed Feb 01 13:20:33 2017 +0100 @@ -46,6 +46,8 @@ bool _humongous_start; bool _humongous_continuation; + bool _recycled; + HeapWord* _new_top; volatile jint _critical_pins; @@ -70,6 +72,9 @@ void set_live_data(size_t s); inline void increase_live_data_words(jint s); + void set_recently_allocated(bool value); + bool is_recently_allocated() const; + bool has_live() const; size_t get_live_data_bytes() const; size_t get_live_data_words() const; diff -r 6f7575a34072 -r c8881cbea37f src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.cpp Tue Jan 31 20:07:22 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.cpp Wed Feb 01 13:20:33 2017 +0100 @@ -38,11 +38,14 @@ _name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1, mtGC); strcpy(_name_space, cns); - const char* cname = PerfDataManager::counter_name(_name_space, "max_regions"); + const char* cname = PerfDataManager::counter_name(_name_space, "timestamp"); + _timestamp = PerfDataManager::create_long_variable(SUN_GC, cname, PerfData::U_None, CHECK); + + cname = PerfDataManager::counter_name(_name_space, "max_regions"); PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_None, max_regions, CHECK); cname = PerfDataManager::counter_name(_name_space, "region_size"); - PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_None, ShenandoahHeapRegion::RegionSizeBytes, CHECK); + PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_None, ShenandoahHeapRegion::RegionSizeBytes >> 10, CHECK); cname = PerfDataManager::counter_name(_name_space, "status"); _status = PerfDataManager::create_long_variable(SUN_GC, cname, @@ -76,21 +79,25 @@ if (heap->is_evacuation_in_progress()) status |= 2; _status->set_value(status); + _timestamp->set_value(os::elapsed_counter()); + size_t num_regions = heap->num_regions(); size_t max_regions = heap->max_regions(); ShenandoahHeapRegionSet* regions = heap->regions(); for (uint i = 0; i < max_regions; i++) { if (i < num_regions) { ShenandoahHeapRegion* r = regions->get(i); - jlong data = (r->used() & USED_MASK) << USED_SHIFT; - data |= (r->get_live_data_bytes() & LIVE_MASK) << LIVE_SHIFT; + jlong data = ((r->used() >> 10) & USED_MASK) << USED_SHIFT; + data |= ((r->get_live_data_bytes() >> 10) & LIVE_MASK) << LIVE_SHIFT; jlong flags = 0; - if (r->in_collection_set()) flags |= 1 << 0; - if (r->is_humongous()) flags |= 1 << 1; + if (r->in_collection_set()) flags |= 1 << 1; + if (r->is_humongous()) flags |= 1 << 2; + if (r->is_recently_allocated()) flags |= 1 << 3; + if (r->is_pinned()) flags |= 1 << 4; data |= (flags & FLAGS_MASK) << FLAGS_SHIFT; _regions_data[i]->set_value(data); } else { - jlong flags = 1 << 2; + jlong flags = 1 << 0; flags = (flags & FLAGS_MASK) << FLAGS_SHIFT; _regions_data[i]->set_value(flags); } diff -r 6f7575a34072 -r c8881cbea37f src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.hpp Tue Jan 31 20:07:22 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionCounters.hpp Wed Feb 01 13:20:33 2017 +0100 @@ -30,8 +30,9 @@ * This provides the following in JVMStat: * * constants: + * - sun.gc.shenandoah.regions.timestamp the timestamp for this sample * - sun.gc.shenandoah.regions.max_regions maximum number of regions - * - sun.gc.shenandoah.regions.region_size size per region, in bytes + * - sun.gc.shenandoah.regions.region_size size per region, in kilobytes * * variables: * - sun.gc.shenandoah.regions.status current GC status: @@ -43,26 +44,29 @@ * where $ is the region number from 0 <= i < $max_regions * * in the following format: - * - bits 0-29 used memory in bytes - * - bits 30-59 live memory in bytes - * - bits 60-63 status - * - bit 60 set when region in collection set - * - bit 61 set when region is humongous - * - bit 62 set when region is not used yet + * - bits 0-28 used memory in kilobytes + * - bits 29-58 live memory in kilobytes + * - bits 58-63 status + * - bit 58 set when region is not used yet + * - bit 59 set when region in collection set + * - bit 60 set when region is humongous + * - bit 61 set when region is recently allocated + * - bit 62 set when region is pinned */ class ShenandoahHeapRegionCounters : public CHeapObj { private: - static const jlong USED_MASK = 0x3fffffff; // bits 0-29 - static const jlong USED_SHIFT = 0; + static const jlong USED_MASK = 0x1fffffff; // bits 0-28 + static const jlong USED_SHIFT = 0; - static const jlong LIVE_MASK = 0x3fffffff; // bits 30-59 - static const jlong LIVE_SHIFT = 30; + static const jlong LIVE_MASK = 0x1fffffff; // bits 29-58 + static const jlong LIVE_SHIFT = 29; - static const jlong FLAGS_MASK = 0xf; // bits 60-63 - static const jlong FLAGS_SHIFT = 60; // bits 60-63 + static const jlong FLAGS_MASK = 0x3f; // bits 58-63 + static const jlong FLAGS_SHIFT = 58; // bits 58-63 char* _name_space; PerfLongVariable** _regions_data; + PerfLongVariable* _timestamp; PerfLongVariable* _status; jlong _last_sample_millis; diff -r 6f7575a34072 -r c8881cbea37f src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Tue Jan 31 20:07:22 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Feb 01 13:20:33 2017 +0100 @@ -155,7 +155,7 @@ experimental(size_t, ShenandoahSATBBufferSize, 1 * K, \ "Number of entries in an SATB log buffer.") \ \ - product_rw(int, ShenandoahRegionSamplingRate, 100, \ + product_rw(int, ShenandoahRegionSamplingRate, 40, \ "Sampling rate for heap region sampling. " \ "Number of milliseconds between samples") \ \ changeset: 9530:6b0d158a7bd0 user: rkennke date: Wed Feb 01 16:53:41 2017 +0100 summary: Fix ShenandoahHeapRegion initialization. diff -r c8881cbea37f -r 6b0d158a7bd0 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Feb 01 13:20:33 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Feb 01 16:53:41 2017 +0100 @@ -156,9 +156,8 @@ { ShenandoahHeapLock lock(this); for (i = 0; i < _num_regions; i++) { - ShenandoahHeapRegion* current = new ShenandoahHeapRegion(); - current->initialize_heap_region(this, (HeapWord*) pgc_rs.base() + - regionSizeWords * i, regionSizeWords, i); + ShenandoahHeapRegion* current = new ShenandoahHeapRegion(this, (HeapWord*) pgc_rs.base() + + regionSizeWords * i, regionSizeWords, i); _free_regions->add_region(current); _ordered_regions->add_region(current); } @@ -1964,10 +1963,9 @@ size_t base = _num_regions; ensure_new_regions(num_regions); for (size_t i = 0; i < num_regions; i++) { - ShenandoahHeapRegion* new_region = new ShenandoahHeapRegion(); size_t new_region_index = i + base; HeapWord* start = _first_region_bottom + (ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize) * new_region_index; - new_region->initialize_heap_region(this, start, ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize, new_region_index); + ShenandoahHeapRegion* new_region = new ShenandoahHeapRegion(this, start, ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize, new_region_index); if (ShenandoahLogTrace) { ResourceMark rm; diff -r c8881cbea37f -r 6b0d158a7bd0 src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Wed Feb 01 13:20:33 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.cpp Wed Feb 01 16:53:41 2017 +0100 @@ -36,17 +36,22 @@ size_t ShenandoahHeapRegion::RegionSizeShift = 0; size_t ShenandoahHeapRegion::RegionSizeBytes = 0; -jint ShenandoahHeapRegion::initialize_heap_region(ShenandoahHeap* heap, HeapWord* start, - size_t regionSizeWords, size_t index) { - _heap = heap; - reserved = MemRegion(start, regionSizeWords); +ShenandoahHeapRegion::ShenandoahHeapRegion(ShenandoahHeap* heap, HeapWord* start, + size_t regionSizeWords, size_t index) : +#ifdef ASSERT + _mem_protection_level(0), +#endif + _heap(heap), + _region_number(index), + _live_data(0), + reserved(MemRegion(start, regionSizeWords)), + _humongous_start(false), + _humongous_continuation(false), + _recycled(true), + _new_top(NULL), + _critical_pins(0) { + ContiguousSpace::initialize(reserved, true, false); - _live_data = 0; - _region_number = index; -#ifdef ASSERT - _mem_protection_level = 1; // Off, level 1. -#endif - return JNI_OK; } size_t ShenandoahHeapRegion::region_number() const { diff -r c8881cbea37f -r 6b0d158a7bd0 src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp Wed Feb 01 13:20:33 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegion.hpp Wed Feb 01 16:53:41 2017 +0100 @@ -57,11 +57,10 @@ #endif public: + ShenandoahHeapRegion(ShenandoahHeap* heap, HeapWord* start, size_t regionSize, size_t index); + static void setup_heap_region_size(size_t initial_heap_size, size_t max_heap_size); - jint initialize_heap_region(ShenandoahHeap* heap, HeapWord* start, size_t regionSize, size_t index); - - size_t region_number() const; // Roll back the previous allocation of an object with specified size. changeset: 9531:a307e6df7440 user: shade date: Thu Feb 02 11:29:50 2017 +0100 summary: Parallel pre-touch marking bitmaps. diff -r 6b0d158a7bd0 -r a307e6df7440 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Feb 01 16:53:41 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Thu Feb 02 11:29:50 2017 +0100 @@ -75,11 +75,19 @@ class ShenandoahPretouchTask : public AbstractGangTask { private: ShenandoahHeapRegionSet* _regions; - size_t const _page_size; + const size_t _bitmap_size; + const size_t _page_size; + char* _bitmap0_base; + char* _bitmap1_base; public: - ShenandoahPretouchTask(ShenandoahHeapRegionSet* regions, size_t page_size) : + ShenandoahPretouchTask(ShenandoahHeapRegionSet* regions, + char* bitmap0_base, char* bitmap1_base, size_t bitmap_size, + size_t page_size) : AbstractGangTask("Shenandoah PreTouch"), + _bitmap0_base(bitmap0_base), + _bitmap1_base(bitmap1_base), _regions(regions), + _bitmap_size(bitmap_size), _page_size(page_size) { _regions->clear_current_index(); }; @@ -90,6 +98,19 @@ log_trace(gc, heap)("Pretouch region " SIZE_FORMAT ": " PTR_FORMAT " -> " PTR_FORMAT, r->region_number(), p2i(r->bottom()), p2i(r->end())); os::pretouch_memory((char*) r->bottom(), (char*) r->end()); + + size_t start = r->region_number() * ShenandoahHeapRegion::RegionSizeBytes / CMBitMap::mark_distance(); + size_t end = (r->region_number() + 1) * ShenandoahHeapRegion::RegionSizeBytes / CMBitMap::mark_distance(); + assert (end <= _bitmap_size, err_msg("end is sane: " SIZE_FORMAT " < " SIZE_FORMAT, end, _bitmap_size)); + + log_trace(gc, heap)("Pretouch bitmap under region " SIZE_FORMAT ": " PTR_FORMAT " -> " PTR_FORMAT, + r->region_number(), p2i(_bitmap0_base + start), p2i(_bitmap0_base + end)); + os::pretouch_memory(_bitmap0_base + start, _bitmap0_base + end); + + log_trace(gc, heap)("Pretouch bitmap under region " SIZE_FORMAT ": " PTR_FORMAT " -> " PTR_FORMAT, + r->region_number(), p2i(_bitmap1_base + start), p2i(_bitmap1_base + end)); + os::pretouch_memory(_bitmap1_base + start, _bitmap1_base + end); + r = _regions->claim_next(); } } @@ -197,13 +218,28 @@ os::commit_memory_or_exit(bitmap0.base(), bitmap0.size(), false, "couldn't allocate mark bitmap"); MemTracker::record_virtual_memory_type(bitmap0.base(), mtGC); MemRegion bitmap_region0 = MemRegion((HeapWord*) bitmap0.base(), bitmap0.size() / HeapWordSize); - _mark_bit_map0.initialize(heap_region, bitmap_region0); - _complete_mark_bit_map = &_mark_bit_map0; ReservedSpace bitmap1(bitmap_size, page_size); os::commit_memory_or_exit(bitmap1.base(), bitmap1.size(), false, "couldn't allocate mark bitmap"); MemTracker::record_virtual_memory_type(bitmap1.base(), mtGC); MemRegion bitmap_region1 = MemRegion((HeapWord*) bitmap1.base(), bitmap1.size() / HeapWordSize); + + if (ShenandoahAlwaysPreTouch) { + assert (!AlwaysPreTouch, "Should have been overridden"); + + // For NUMA, it is important to pre-touch the storage under bitmaps with worker threads, + // before initialize() below zeroes it with initializing thread. For any given region, + // we touch the region and the corresponding bitmaps from the same thread. + + log_info(gc, heap)("Parallel pretouch " SIZE_FORMAT " regions with " SIZE_FORMAT " byte pages", + _ordered_regions->count(), page_size); + ShenandoahPretouchTask cl(_ordered_regions, bitmap0.base(), bitmap1.base(), bitmap_size, page_size); + _workers->run_task(&cl); + } + + _mark_bit_map0.initialize(heap_region, bitmap_region0); + _complete_mark_bit_map = &_mark_bit_map0; + _mark_bit_map1.initialize(heap_region, bitmap_region1); _next_mark_bit_map = &_mark_bit_map1; @@ -213,17 +249,6 @@ ShenandoahMarkCompact::initialize(); - if (ShenandoahAlwaysPreTouch) { - assert (!AlwaysPreTouch, "Should have been overridden"); - - size_t page_size = UseLargePages ? os::large_page_size() : (size_t) os::vm_page_size(); - - log_info(gc, heap)("Parallel pretouch " SIZE_FORMAT " regions with " SIZE_FORMAT " byte pages", - _ordered_regions->count(), page_size); - ShenandoahPretouchTask cl(_ordered_regions, page_size); - _workers->run_task(&cl); - } - return JNI_OK; } changeset: 9532:273b4206fd1e user: zgu date: Mon Feb 06 16:58:31 2017 -0500 summary: Added UseDynamicNumberOfGCThreads support in Shenandoah diff -r a307e6df7440 -r 273b4206fd1e src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Thu Feb 02 11:29:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Mon Feb 06 16:58:31 2017 -0500 @@ -55,6 +55,8 @@ void record_bytes_start_CM(size_t bytes); void record_bytes_end_CM(size_t bytes); + size_t bytes_in_cset() const { return _bytes_in_cset; } + virtual void print_thresholds() { } @@ -762,5 +764,152 @@ return _phase_times; } + +uint ShenandoahCollectorPolicy::calc_workers_for_java_threads(uint application_workers) { + return (uint)ShenandoahGCWorkerPerJavaThread * application_workers; +} + +uint ShenandoahCollectorPolicy::calc_workers_for_live_set(size_t live_data) { + return (uint)(live_data / HeapSizePerGCThread); +} + + +uint ShenandoahCollectorPolicy::calc_default_active_workers( + uint total_workers, + uint min_workers, + uint active_workers, + uint application_workers, + uint workers_by_java_threads, + uint workers_by_liveset) { + // If the user has turned off using a dynamic number of GC threads + // or the users has requested a specific number, set the active + // number of workers to all the workers. + uint new_active_workers = total_workers; + uint prev_active_workers = active_workers; + uint active_workers_by_JT = 0; + uint active_workers_by_liveset = 0; + + active_workers_by_JT = MAX2(workers_by_java_threads, min_workers); + + // Choose a number of GC threads based on the live set. + active_workers_by_liveset = + MAX2((uint) 2U, workers_by_liveset); + + uint max_active_workers = + MAX2(active_workers_by_JT, active_workers_by_liveset); + + new_active_workers = MIN2(max_active_workers, total_workers); + + // Increase GC workers instantly but decrease them more + // slowly. + if (new_active_workers < prev_active_workers) { + new_active_workers = + MAX2(min_workers, (prev_active_workers + new_active_workers) / 2); + } + + if (UseNUMA) { + uint numa_groups = (uint)os::numa_get_groups_num(); + assert(numa_groups <= total_workers, "Not enough workers to cover all numa groups"); + new_active_workers = MAX2(max_active_workers, numa_groups); + } + + // Check once more that the number of workers is within the limits. + assert(min_workers <= total_workers, "Minimum workers not consistent with total workers"); + assert(new_active_workers >= min_workers, "Minimum workers not observed"); + assert(new_active_workers <= total_workers, "Total workers not observed"); + + log_trace(gc, task)("ShenandoahCollectorPolicy::calc_default_active_workers() : " + "active_workers(): " UINTX_FORMAT " new_active_workers: " UINTX_FORMAT " " + "prev_active_workers: " UINTX_FORMAT "\n" + " active_workers_by_JT: " UINTX_FORMAT " active_workers_by_liveset: " UINTX_FORMAT, + (uintx)active_workers, (uintx)new_active_workers, (uintx)prev_active_workers, + (uintx)active_workers_by_JT, (uintx)active_workers_by_liveset); + assert(new_active_workers > 0, "Always need at least 1"); + return new_active_workers; +} + +/** + * Initial marking phase also update references of live objects from previous concurrent GC cycle, + * so we take Java threads and live set into account. + */ +uint ShenandoahCollectorPolicy::calc_workers_for_init_marking(uint total_workers, + uint active_workers, + uint application_workers) { + + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { + assert(total_workers > 0, "Always need at least 1"); + return total_workers; + } else { + ShenandoahCollectorPolicy* policy = (ShenandoahCollectorPolicy*)ShenandoahHeap::heap()->collector_policy(); + size_t live_data = policy->_heuristics->bytes_in_cset(); + + return calc_default_active_workers(total_workers, (total_workers > 1) ? 2 : 1, + active_workers, application_workers, + calc_workers_for_java_threads(application_workers), + calc_workers_for_live_set(live_data)); + } +} + +uint ShenandoahCollectorPolicy::calc_workers_for_conc_marking(uint total_workers, + uint active_workers, + uint application_workers) { + + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { + assert(total_workers > 0, "Always need at least 1"); + return total_workers; + } else { + return calc_default_active_workers(total_workers, + (total_workers > 1 ? 2 : 1), active_workers, + application_workers, calc_workers_for_java_threads(application_workers), 0); + } +} + +uint ShenandoahCollectorPolicy::calc_workers_for_final_marking(uint total_workers, + uint active_workers, + uint application_workers) { + + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { + assert(total_workers > 0, "Always need at least 1"); + return total_workers; + } else { + return calc_default_active_workers(total_workers, + (total_workers > 1 ? 2 : 1), active_workers, + application_workers, calc_workers_for_java_threads(application_workers), 0); + } +} + +uint ShenandoahCollectorPolicy::calc_workers_for_evacuation(uint total_workers, + uint active_workers, + uint application_workers) { + + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { + assert(total_workers > 0, "Always need at least 1"); + return total_workers; + } else { + // Calculation based on live set + size_t live_data = 0; + ShenandoahHeap* heap = ShenandoahHeap::heap(); + if (heap->is_full_gc_in_progress()) { + ShenandoahHeapRegionSet* regions = heap->regions(); + for (size_t index = 0; index < regions->active_regions(); index ++) { + live_data += regions->get_fast(index)->get_live_data_bytes(); + } + } else { + ShenandoahCollectorPolicy* policy = (ShenandoahCollectorPolicy*)ShenandoahHeap::heap()->collector_policy(); + live_data = policy->_heuristics->bytes_in_cset(); + } + + uint active_workers_by_liveset = calc_workers_for_live_set(live_data); + return calc_default_active_workers(total_workers, + (total_workers > 1 ? 2 : 1), active_workers, + application_workers, 0, active_workers_by_liveset); + } +} + + GCTimer* ShenandoahCollectorPolicy::conc_timer() {return _conc_timer;} GCTimer* ShenandoahCollectorPolicy::stw_timer() {return _stw_timer;} diff -r a307e6df7440 -r 273b4206fd1e src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Thu Feb 02 11:29:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Mon Feb 06 16:58:31 2017 -0500 @@ -38,6 +38,8 @@ class ConcurrentGCTimer; class ShenandoahCollectorPolicy: public CollectorPolicy { +private: + static const float ShenandoahGCWorkerPerJavaThread = 0.5f; public: enum TimingPhase { @@ -205,7 +207,34 @@ void increase_cycle_counter(); size_t cycle_counter() const; + + static uint calc_workers_for_init_marking(uint total_workers, + uint active_workers, + uint application_workers); + + static uint calc_workers_for_conc_marking(uint total_workers, + uint active_workers, + uint application_workers); + + static uint calc_workers_for_final_marking(uint total_workers, + uint active_workers, + uint application_workers); + + static uint calc_workers_for_evacuation(uint total_workers, + uint active_workers, + uint application_workers); + private: + static uint calc_workers_for_java_threads(uint application_workers); + static uint calc_workers_for_live_set(size_t live_data); + + static uint calc_default_active_workers(uint total_workers, + uint min_workers, + uint active_workers, + uint application_workers, + uint workers_by_java_threads, + uint workers_by_liveset); + void print_summary_sd(outputStream* out, const char* str, const HdrSeq* seq); }; diff -r a307e6df7440 -r 273b4206fd1e src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Thu Feb 02 11:29:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Feb 06 16:58:31 2017 -0500 @@ -332,17 +332,17 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); ClassLoaderDataGraph::clear_claimed_marks(); + WorkGang* workers = heap->workers(); + uint nworkers = workers->active_workers(); - uint nworkers = heap->max_parallel_workers(); assert(nworkers <= task_queues()->size(), "Just check"); ShenandoahRootProcessor root_proc(heap, nworkers, ShenandoahCollectorPolicy::scan_thread_roots); TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); task_queues()->reserve(nworkers); - assert(heap->workers()->active_workers() == nworkers, "Not expecting other tasks"); ShenandoahInitMarkRootsTask mark_roots(&root_proc, process_references()); - heap->workers()->run_task(&mark_roots, nworkers); + workers->run_task(&mark_roots); if (ShenandoahConcurrentCodeRoots) { clear_claim_codecache(); } @@ -359,6 +359,14 @@ set_process_references(policy->process_references()); set_unload_classes(policy->unload_classes()); + // Set up parallel workers for initial marking + FlexibleWorkGang* workers = heap->workers(); + uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_init_marking( + workers->total_workers(), workers->active_workers(), + Threads::number_of_non_daemon_threads()); + + workers->set_active_workers(nworkers); + mark_roots(); } @@ -367,12 +375,11 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); ClassLoaderDataGraph::clear_claimed_marks(); - uint nworkers = heap->max_parallel_workers(); - assert(heap->workers()->active_workers() == nworkers, "Not expecting other tasks"); + uint nworkers = heap->workers()->active_workers(); + ShenandoahRootProcessor root_proc(heap, nworkers, ShenandoahCollectorPolicy::update_thread_roots); ShenandoahUpdateRootsTask update_roots(&root_proc); heap->workers()->run_task(&update_roots); - } void ShenandoahConcurrentMark::final_update_roots() { @@ -418,7 +425,14 @@ sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::conc_mark); // Concurrent marking, uses concurrent workers - uint nworkers = sh->max_conc_workers(); + // Setup workers for concurrent marking + FlexibleWorkGang* workers = sh->conc_workers(); + uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_conc_marking( + workers->total_workers(), workers->active_workers(), + Threads::number_of_non_daemon_threads()); + + workers->set_active_workers(nworkers); + if (process_references()) { ReferenceProcessor* rp = sh->ref_processor(); rp->set_active_mt_degree(nworkers); @@ -429,16 +443,15 @@ } task_queues()->reserve(nworkers); - assert(sh->conc_workers()->active_workers() == nworkers, "Not expecting other tasks"); if (UseShenandoahOWST) { ShenandoahTaskTerminator terminator(nworkers, task_queues()); SCMConcurrentMarkingTask markingTask = SCMConcurrentMarkingTask(this, &terminator, update_refs); - sh->conc_workers()->run_task(&markingTask, nworkers); + workers->run_task(&markingTask); } else { ParallelTaskTerminator terminator(nworkers, task_queues()); SCMConcurrentMarkingTask markingTask = SCMConcurrentMarkingTask(this, &terminator, update_refs); - sh->conc_workers()->run_task(&markingTask, nworkers); + workers->run_task(&markingTask); } assert(task_queues()->is_empty() || sh->cancelled_concgc(), "Should be empty when not cancelled"); @@ -458,6 +471,12 @@ ShenandoahHeap* sh = (ShenandoahHeap *) Universe::heap(); + // Setup workers for final marking + FlexibleWorkGang* workers = sh->workers(); + uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_final_marking( + workers->total_workers(), workers->active_workers(), Threads::number_of_non_daemon_threads()); + workers->set_active_workers(nworkers); + TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); shared_finish_mark_from_roots(/* full_gc = */ false); @@ -495,7 +514,8 @@ ShenandoahHeap* sh = ShenandoahHeap::heap(); ShenandoahCollectorPolicy* policy = sh->shenandoahPolicy(); - uint nworkers = sh->max_parallel_workers(); + uint nworkers = sh->workers()->active_workers(); + // Finally mark everything else we've got in our queues during the previous steps. // It does two different things for concurrent vs. mark-compact GC: // - For concurrent GC, it starts with empty task queues, drains the remaining @@ -547,7 +567,7 @@ // Unload classes and purge SystemDictionary. bool purged_class = SystemDictionary::do_unloading(&is_alive, false); ParallelCleaningTask unlink_task(&is_alive, true, true, nworkers, purged_class); - sh->workers()->run_task(&unlink_task, nworkers); + sh->workers()->run_task(&unlink_task); ClassLoaderDataGraph::purge(); } @@ -863,14 +883,16 @@ public: - ShenandoahRefProcTaskExecutor() : _workers(ShenandoahHeap::heap()->workers()) { + ShenandoahRefProcTaskExecutor(WorkGang* workers) : + _workers(workers) { } // Executes a task using worker threads. void execute(ProcessTask& task) { assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); - ShenandoahConcurrentMark* cm = ShenandoahHeap::heap()->concurrentMark(); + ShenandoahHeap* heap = ShenandoahHeap::heap(); + ShenandoahConcurrentMark* cm = heap->concurrentMark(); uint nworkers = _workers->active_workers(); cm->task_queues()->reserve(nworkers); if (UseShenandoahOWST) { @@ -895,12 +917,14 @@ assert(process_references(), "sanity"); ShenandoahHeap* sh = (ShenandoahHeap*) Universe::heap(); ReferenceProcessor* rp = sh->ref_processor(); + WorkGang* workers = sh->workers(); + uint nworkers = workers->active_workers(); // Setup collector policy for softref cleaning. bool clear_soft_refs = sh->collector_policy()->use_should_clear_all_soft_refs(true /* bogus arg*/); log_develop_debug(gc, ref)("clearing soft refs: %s", BOOL_TO_STR(clear_soft_refs)); rp->setup_policy(clear_soft_refs); - rp->set_active_mt_degree(sh->max_parallel_workers()); + rp->set_active_mt_degree(nworkers); uint serial_worker_id = 0; ShenandoahForwardedIsAliveClosure is_alive; @@ -909,7 +933,7 @@ ParallelTaskTerminator terminator(1, task_queues()); ShenandoahCMDrainMarkingStackClosure complete_gc(serial_worker_id, &terminator); - ShenandoahRefProcTaskExecutor executor; + ShenandoahRefProcTaskExecutor executor(workers); log_develop_trace(gc, ref)("start processing references"); @@ -1035,14 +1059,35 @@ SCMObjToScanQueue* q, ParallelTaskTerminator* terminator) { int seed = 17; + assert(q != NULL, "Sanity"); SCMObjToScanQueueSet* queues = task_queues(); + SCMObjToScanQueue* worker_queue = q; + SCMTask t; - SCMTask t; + /* + * There can be more queues than workers. + * To deal with the imbalance, we claim extra queues first, + * since marking can push new tasks into the queue associated + * with this worker id, and we come back to process this + * queue at the end. + */ + q = queues->claim_next(); + if (q == NULL) { + q = worker_queue; + } + while (true) { if (try_queue(q, t) || - queues->steal(worker_id, &seed, t)) { + (q == worker_queue && queues->steal(worker_id, &seed, t))) { cl->do_task(&t); } else { + if (q != worker_queue) { + q = queues->claim_next(); + if (q == NULL) { + q = worker_queue; + } + continue; + } if (terminator->offer_termination()) return; } } diff -r a307e6df7440 -r 273b4206fd1e src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Thu Feb 02 11:29:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Mon Feb 06 16:58:31 2017 -0500 @@ -295,10 +295,10 @@ // This is odd. They are concurrent gc threads, but they are also task threads. // Framework doesn't allow both. - _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, + _workers = new FlexibleWorkGang("Parallel GC Threads", ParallelGCThreads, /* are_GC_task_threads */true, /* are_ConcurrentGC_threads */false); - _conc_workers = new WorkGang("Concurrent GC Threads", ConcGCThreads, + _conc_workers = new FlexibleWorkGang("Concurrent GC Threads", ConcGCThreads, /* are_GC_task_threads */true, /* are_ConcurrentGC_threads */false); if ((_workers == NULL) || (_conc_workers == NULL)) { @@ -1319,7 +1319,15 @@ ParallelEvacuationTask evacuationTask = ParallelEvacuationTask(this, _collection_set); + // Setup workers for concurrent evacuation + WorkGang* workers = conc_workers(); + uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_evacuation( + workers->total_workers(), workers->active_workers(), Threads::number_of_non_daemon_threads()); + + uint old_num_workers = conc_workers()->active_workers(); + conc_workers()->set_active_workers(nworkers); conc_workers()->run_task(&evacuationTask); + conc_workers()->set_active_workers(old_num_workers); if (ShenandoahLogTrace) { ResourceMark rm; diff -r a307e6df7440 -r 273b4206fd1e src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Thu Feb 02 11:29:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Mon Feb 06 16:58:31 2017 -0500 @@ -132,8 +132,8 @@ uint _max_conc_workers; uint _max_workers; - WorkGang* _conc_workers; - WorkGang* _workers; + FlexibleWorkGang* _conc_workers; + FlexibleWorkGang* _workers; volatile size_t _used; @@ -374,8 +374,8 @@ ReferenceProcessor* ref_processor() { return _ref_processor;} - WorkGang* conc_workers() const { return _conc_workers;} - WorkGang* workers() const { return _workers;} + FlexibleWorkGang* conc_workers() const { return _conc_workers;} + FlexibleWorkGang* workers() const { return _workers;} uint max_conc_workers(); uint max_workers(); diff -r a307e6df7440 -r 273b4206fd1e src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Thu Feb 02 11:29:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Mon Feb 06 16:58:31 2017 -0500 @@ -160,12 +160,24 @@ _heap->set_need_update_refs(true); + + // Setup workers for phase 1 + FlexibleWorkGang* workers = _heap->workers(); + uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_init_marking( + workers->total_workers(), workers->active_workers(), Threads::number_of_non_daemon_threads()); + workers->set_active_workers(nworkers); + OrderAccess::fence(); policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_mark); phase1_mark_heap(); policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_mark); + // Setup workers for the rest + nworkers = ShenandoahCollectorPolicy::calc_workers_for_evacuation( + workers->total_workers(), workers->active_workers(), Threads::number_of_non_daemon_threads()); + workers->set_active_workers(nworkers); + OrderAccess::fence(); policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_calculate_addresses); @@ -278,7 +290,7 @@ // enable ("weak") refs discovery rp->enable_discovery(true /*verify_no_refs*/, true); rp->setup_policy(true); // snapshot the soft ref policy to be used in this cycle - rp->set_active_mt_degree(_heap->max_parallel_workers()); + rp->set_active_mt_degree(_heap->workers()->active_workers()); COMPILER2_PRESENT(DerivedPointerTable::clear()); cm->update_roots(); @@ -562,18 +574,21 @@ // Need cleared claim bits for the roots processing ClassLoaderDataGraph::clear_claimed_marks(); + WorkGang* workers = heap->workers(); + uint nworkers = workers->active_workers(); { COMPILER2_PRESENT(DerivedPointerTable::clear()); - ShenandoahRootProcessor rp(heap, heap->max_parallel_workers()); + + ShenandoahRootProcessor rp(heap, nworkers); ShenandoahAdjustRootPointersTask task(&rp); - heap->workers()->run_task(&task); + workers->run_task(&task); COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } ShenandoahHeapRegionSet* regions = heap->regions(); regions->clear_current_index(); ShenandoahAdjustPointersTask adjust_pointers_task(regions); - heap->workers()->run_task(&adjust_pointers_task); + workers->run_task(&adjust_pointers_task); } class ShenandoahCompactObjectsClosure : public ObjectClosure { changeset: 9533:90ed8f27e2ca user: rkennke date: Tue Feb 07 10:31:06 2017 +0100 summary: Consolidate oop closures. diff -r 273b4206fd1e -r 90ed8f27e2ca src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Feb 06 16:58:31 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Tue Feb 07 10:31:06 2017 +0100 @@ -41,38 +41,21 @@ #include "utilities/taskqueue.hpp" class ShenandoahInitMarkRootsClosure : public OopClosure { +private: SCMObjToScanQueue* _queue; ShenandoahHeap* _heap; -public: - ShenandoahInitMarkRootsClosure(SCMObjToScanQueue* q) : - _queue(q), - _heap((ShenandoahHeap*) Universe::heap()) - { - } - -private: template - inline void do_oop_work(T* p) { - T o = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(o)) { - oop obj = oopDesc::decode_heap_oop_not_null(o); - obj = ShenandoahBarrierSet::resolve_oop_static_not_null(obj); - assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), - "expect forwarded oop"); - ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); - } + inline void do_oop_nv(T* p) { + ShenandoahConcurrentMark::mark_through_ref(p, _heap, _queue); } public: - void do_oop(narrowOop* p) { - do_oop_work(p); - } + ShenandoahInitMarkRootsClosure(SCMObjToScanQueue* q) : + _queue(q), _heap(ShenandoahHeap::heap()) {}; - inline void do_oop(oop* p) { - do_oop_work(p); - } - + void do_oop(narrowOop* p) { do_oop_nv(p); } + void do_oop(oop* p) { do_oop_nv(p); } }; class SCMUpdateRefsClosure: public OopClosure { @@ -742,92 +725,40 @@ }; -class ShenandoahCMKeepAliveClosure: public OopClosure { +class ShenandoahCMKeepAliveClosure : public OopClosure { +private: SCMObjToScanQueue* _queue; - ShenandoahHeap* _sh; + ShenandoahHeap* _heap; + + template + inline void do_oop_nv(T* p) { + ShenandoahConcurrentMark::mark_through_ref(p, _heap, _queue); + } public: ShenandoahCMKeepAliveClosure(SCMObjToScanQueue* q) : - _queue(q) { - _sh = (ShenandoahHeap*) Universe::heap(); - } + _queue(q), _heap(ShenandoahHeap::heap()) {}; + void do_oop(narrowOop* p) { do_oop_nv(p); } + void do_oop(oop* p) { do_oop_nv(p); } +}; + +class ShenandoahCMKeepAliveUpdateClosure : public OopClosure { private: + SCMObjToScanQueue* _queue; + ShenandoahHeap* _heap; + template - inline void do_oop_work(T* p) { - - T o = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(o)) { - oop obj = oopDesc::decode_heap_oop_not_null(o); - assert(oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)), "only get updated oops in weak ref processing"); - -#ifdef ASSERT - if (ShenandoahLogTrace) { - ResourceMark rm; - outputStream* out = gclog_or_tty; - out->print("\twe're looking at location " - "*"PTR_FORMAT" = "PTR_FORMAT, - p2i(p), p2i((void*) obj)); - obj->print_on(out); - } -#endif - ShenandoahConcurrentMark::mark_and_push(obj, _sh, _queue); - } + inline void do_oop_nv(T* p) { + ShenandoahConcurrentMark::mark_through_ref(p, _heap, _queue); } public: - void do_oop(narrowOop* p) { - do_oop_work(p); - } + ShenandoahCMKeepAliveUpdateClosure(SCMObjToScanQueue* q) : + _queue(q), _heap(ShenandoahHeap::heap()) {}; - - void do_oop(oop* p) { - do_oop_work(p); - } - -}; - -class ShenandoahCMKeepAliveUpdateClosure: public OopClosure { - SCMObjToScanQueue* _queue; - ShenandoahHeap* _sh; - -public: - ShenandoahCMKeepAliveUpdateClosure(SCMObjToScanQueue* q) : - _queue(q) { - _sh = (ShenandoahHeap*) Universe::heap(); - } - -private: - template - inline void do_oop_work(T* p) { - T o = oopDesc::load_heap_oop(p); - if (! oopDesc::is_null(o)) { - oop obj = oopDesc::decode_heap_oop_not_null(o); - obj = _sh->update_oop_ref_not_null(p, obj); - assert(oopDesc::unsafe_equals(obj, oopDesc::bs()->read_barrier(obj)), "only get updated oops in weak ref processing"); -#ifdef ASSERT - if (ShenandoahLogTrace) { - ResourceMark rm; - outputStream* out = gclog_or_tty; - out->print("\twe're looking at location " - "*"PTR_FORMAT" = "PTR_FORMAT, - p2i(p), p2i((void*) obj)); - obj->print_on(out); - } -#endif - ShenandoahConcurrentMark::mark_and_push(obj, _sh, _queue); - } - } - -public: - void do_oop(narrowOop* p) { - do_oop_work(p); - } - - void do_oop(oop* p) { - do_oop_work(p); - } - + void do_oop(narrowOop* p) { do_oop_nv(p); } + void do_oop(oop* p) { do_oop_nv(p); } }; class ShenandoahRefProcTaskProxy : public AbstractGangTask { diff -r 273b4206fd1e -r 90ed8f27e2ca src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Mon Feb 06 16:58:31 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Tue Feb 07 10:31:06 2017 +0100 @@ -96,7 +96,8 @@ bool claim_codecache(); void clear_claim_codecache(); - static inline void mark_and_push(oop obj, ShenandoahHeap* heap, SCMObjToScanQueue* q); + template + static inline void mark_through_ref(T* p, ShenandoahHeap* heap, SCMObjToScanQueue* q); void mark_from_roots(); diff -r 273b4206fd1e -r 90ed8f27e2ca src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Mon Feb 06 16:58:31 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Tue Feb 07 10:31:06 2017 +0100 @@ -201,12 +201,8 @@ void do_buffer(void** buffer, size_t size) { for (size_t i = 0; i < size; ++i) { - void* entry = buffer[i]; - oop obj = oop(entry); - if (!oopDesc::is_null(obj)) { - obj = ShenandoahBarrierSet::resolve_oop_static_not_null(obj); - ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); - } + oop* p = (oop*) &buffer[i]; + ShenandoahConcurrentMark::mark_through_ref(p, _heap, _queue); } } }; @@ -218,42 +214,72 @@ return had_refs && try_queue(q, task); } -inline void ShenandoahConcurrentMark::mark_and_push(oop obj, ShenandoahHeap* heap, SCMObjToScanQueue* q) { +template +inline void ShenandoahConcurrentMark::mark_through_ref(T *p, ShenandoahHeap* heap, SCMObjToScanQueue* q) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + switch (UPDATE_REFS) { + case NONE: + break; + case RESOLVE: + obj = ShenandoahBarrierSet::resolve_oop_static_not_null(obj); + break; + case SIMPLE: + // We piggy-back reference updating to the marking tasks. + obj = heap->update_oop_ref_not_null(p, obj); + break; + case CONCURRENT: + obj = heap->maybe_update_oop_ref_not_null(p, obj); + break; + default: + ShouldNotReachHere(); + } + assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static(obj)), "need to-space object here"); + + // Note: Only when concurrently updating references can obj become NULL here. + // It happens when a mutator thread beats us by writing another value. In that + // case we don't need to do anything else. + if (UPDATE_REFS != CONCURRENT || ! oopDesc::is_null(obj)) { + + assert(! oopDesc::is_null(obj), "must not be null here"); #ifdef ASSERT - if (! oopDesc::bs()->is_safe(obj)) { - tty->print_cr("obj in cset: %s, obj: "PTR_FORMAT", forw: "PTR_FORMAT, - BOOL_TO_STR(heap->in_collection_set(obj)), - p2i(obj), - p2i(ShenandoahBarrierSet::resolve_oop_static_not_null(obj))); - heap->heap_region_containing((HeapWord*) obj)->print(); + if (! oopDesc::bs()->is_safe(obj)) { + tty->print_cr("obj in cset: %s, obj: "PTR_FORMAT", forw: "PTR_FORMAT, + BOOL_TO_STR(heap->in_collection_set(obj)), + p2i(obj), + p2i(ShenandoahBarrierSet::resolve_oop_static_not_null(obj))); + heap->heap_region_containing((HeapWord*) obj)->print(); + } +#endif + assert(oopDesc::bs()->is_safe(obj), "no ref in cset"); + assert(Universe::heap()->is_in(obj), err_msg("We shouldn't be calling this on objects not in the heap: "PTR_FORMAT, p2i(obj))); + if (heap->mark_next(obj)) { +#ifdef ASSERT + log_develop_trace(gc, marking)("marked obj: "PTR_FORMAT, p2i((HeapWord*) obj)); + + if (! oopDesc::bs()->is_safe(obj)) { + tty->print_cr("trying to mark obj: "PTR_FORMAT" (%s) in dirty region: ", p2i((HeapWord*) obj), BOOL_TO_STR(heap->is_marked_next(obj))); + // _heap->heap_region_containing(obj)->print(); + // _heap->print_heap_regions(); + } +#endif + assert(heap->cancelled_concgc() + || oopDesc::bs()->is_safe(obj), + "we don't want to mark objects in from-space"); + + bool pushed = q->push(SCMTask(obj)); + assert(pushed, "overflow queue should always succeed pushing"); + + } +#ifdef ASSERT + else { + log_develop_trace(gc, marking)("failed to mark obj (already marked): "PTR_FORMAT, p2i((HeapWord*) obj)); + assert(heap->is_marked_next(obj), "make sure object is marked"); + } +#endif + } } -#endif - assert(oopDesc::bs()->is_safe(obj), "no ref in cset"); - assert(Universe::heap()->is_in(obj), err_msg("We shouldn't be calling this on objects not in the heap: "PTR_FORMAT, p2i(obj))); - if (heap->mark_next(obj)) { -#ifdef ASSERT - log_develop_trace(gc, marking)("marked obj: "PTR_FORMAT, p2i((HeapWord*) obj)); - - if (! oopDesc::bs()->is_safe(obj)) { - tty->print_cr("trying to mark obj: "PTR_FORMAT" (%s) in dirty region: ", p2i((HeapWord*) obj), BOOL_TO_STR(heap->is_marked_next(obj))); - // _heap->heap_region_containing(obj)->print(); - // _heap->print_heap_regions(); - } -#endif - assert(heap->cancelled_concgc() - || oopDesc::bs()->is_safe(obj), - "we don't want to mark objects in from-space"); - - bool pushed = q->push(SCMTask(obj)); - assert(pushed, "overflow queue should always succeed pushing"); - - } -#ifdef ASSERT - else { - log_develop_trace(gc, marking)("failed to mark obj (already marked): "PTR_FORMAT, p2i((HeapWord*) obj)); - assert(heap->is_marked_next(obj), "make sure object is marked"); - } -#endif } #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHCONCURRENTMARK_INLINE_HPP diff -r 273b4206fd1e -r 90ed8f27e2ca src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Mon Feb 06 16:58:31 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Tue Feb 07 10:31:06 2017 +0100 @@ -356,6 +356,9 @@ template inline oop update_oop_ref_not_null(T* p, oop obj); + template + inline oop maybe_update_oop_ref_not_null(T* p, oop obj); + void print_heap_regions(outputStream* st = tty) const; void print_all_refs(const char* prefix); void print_heap_locations(HeapWord* start, HeapWord* end); @@ -430,9 +433,6 @@ void parallel_evacuate(); - template - inline oop maybe_update_oop_ref_not_null(T* p, oop obj); - inline oop atomic_compare_exchange_oop(oop n, narrowOop* addr, oop c); inline oop atomic_compare_exchange_oop(oop n, oop* addr, oop c); diff -r 273b4206fd1e -r 90ed8f27e2ca src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Mon Feb 06 16:58:31 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Tue Feb 07 10:31:06 2017 +0100 @@ -31,6 +31,13 @@ class ShenandoahHeap; +enum UpdateRefsMode { + NONE, // No reference updating + RESOLVE, // Only a read-barrier (no reference updating) + SIMPLE, // Reference updating using simple store + CONCURRENT // Reference updating using CAS +}; + class ShenandoahMarkRefsSuperClosure : public MetadataAwareOopClosure { private: SCMObjToScanQueue* _queue; @@ -38,7 +45,7 @@ public: ShenandoahMarkRefsSuperClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp); - template + template void work(T *p); }; @@ -48,7 +55,7 @@ ShenandoahMarkRefsSuperClosure(q, rp) {}; template - inline void do_oop_nv(T* p) { work(p); } + inline void do_oop_nv(T* p) { work(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } inline bool do_metadata_nv() { return false; } @@ -61,7 +68,7 @@ ShenandoahMarkRefsSuperClosure(q, rp) {}; template - inline void do_oop_nv(T* p) { work(p); } + inline void do_oop_nv(T* p) { work(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } inline bool do_metadata_nv() { return true; } @@ -74,7 +81,7 @@ ShenandoahMarkRefsSuperClosure(q, rp) {}; template - inline void do_oop_nv(T* p) { work(p); } + inline void do_oop_nv(T* p) { work(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } inline bool do_metadata_nv() { return false; } @@ -87,7 +94,7 @@ ShenandoahMarkRefsSuperClosure(q, rp) {}; template - inline void do_oop_nv(T* p) { work(p); } + inline void do_oop_nv(T* p) { work(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } inline bool do_metadata_nv() { return true; } diff -r 273b4206fd1e -r 90ed8f27e2ca src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp Mon Feb 06 16:58:31 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp Tue Feb 07 10:31:06 2017 +0100 @@ -27,19 +27,9 @@ #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" -template +template inline void ShenandoahMarkRefsSuperClosure::work(T *p) { - oop obj; - if (UPDATE_REFS) { - // We piggy-back reference updating to the marking tasks. - obj = _heap->maybe_update_oop_ref(p); - } else { - obj = oopDesc::load_decode_heap_oop(p); - } - assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static(obj)), "need to-space object here"); - if (! oopDesc::is_null(obj)) { - ShenandoahConcurrentMark::mark_and_push(obj, _heap, _queue); - } + ShenandoahConcurrentMark::mark_through_ref(p, _heap, _queue); } #endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAHOOPCLOSURES_INLINE_HPP changeset: 9534:a1cc4a5f553d user: shade date: Tue Feb 07 11:04:59 2017 +0100 summary: Cleanup SCM::mark_through_ref. diff -r 90ed8f27e2ca -r a1cc4a5f553d src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Tue Feb 07 10:31:06 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Tue Feb 07 11:04:59 2017 +0100 @@ -240,44 +240,19 @@ // Note: Only when concurrently updating references can obj become NULL here. // It happens when a mutator thread beats us by writing another value. In that // case we don't need to do anything else. - if (UPDATE_REFS != CONCURRENT || ! oopDesc::is_null(obj)) { - - assert(! oopDesc::is_null(obj), "must not be null here"); -#ifdef ASSERT - if (! oopDesc::bs()->is_safe(obj)) { - tty->print_cr("obj in cset: %s, obj: "PTR_FORMAT", forw: "PTR_FORMAT, - BOOL_TO_STR(heap->in_collection_set(obj)), - p2i(obj), - p2i(ShenandoahBarrierSet::resolve_oop_static_not_null(obj))); - heap->heap_region_containing((HeapWord*) obj)->print(); - } -#endif - assert(oopDesc::bs()->is_safe(obj), "no ref in cset"); - assert(Universe::heap()->is_in(obj), err_msg("We shouldn't be calling this on objects not in the heap: "PTR_FORMAT, p2i(obj))); + if (UPDATE_REFS != CONCURRENT || !oopDesc::is_null(obj)) { + assert(!oopDesc::is_null(obj), "Must not be null here"); + assert(heap->is_in(obj), err_msg("We shouldn't be calling this on objects not in the heap: " PTR_FORMAT, p2i(obj))); + assert(oopDesc::bs()->is_safe(obj), "Only mark objects in from-space"); if (heap->mark_next(obj)) { -#ifdef ASSERT - log_develop_trace(gc, marking)("marked obj: "PTR_FORMAT, p2i((HeapWord*) obj)); - - if (! oopDesc::bs()->is_safe(obj)) { - tty->print_cr("trying to mark obj: "PTR_FORMAT" (%s) in dirty region: ", p2i((HeapWord*) obj), BOOL_TO_STR(heap->is_marked_next(obj))); - // _heap->heap_region_containing(obj)->print(); - // _heap->print_heap_regions(); - } -#endif - assert(heap->cancelled_concgc() - || oopDesc::bs()->is_safe(obj), - "we don't want to mark objects in from-space"); + log_develop_trace(gc, marking)("Marked obj: " PTR_FORMAT, p2i((HeapWord*) obj)); bool pushed = q->push(SCMTask(obj)); assert(pushed, "overflow queue should always succeed pushing"); - + } else { + log_develop_trace(gc, marking)("Failed to mark obj (already marked): " PTR_FORMAT, p2i((HeapWord*) obj)); + assert(heap->is_marked_next(obj), "Consistency: should be marked."); } -#ifdef ASSERT - else { - log_develop_trace(gc, marking)("failed to mark obj (already marked): "PTR_FORMAT, p2i((HeapWord*) obj)); - assert(heap->is_marked_next(obj), "make sure object is marked"); - } -#endif } } } changeset: 9535:af7702f87e46 user: rkennke date: Tue Feb 07 11:07:28 2017 +0100 summary: Make adaptive default heuristics. diff -r a1cc4a5f553d -r af7702f87e46 src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Tue Feb 07 11:04:59 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Tue Feb 07 11:07:28 2017 +0100 @@ -60,10 +60,10 @@ "regions, based on ShenandoahMinRegionSize and " \ "ShenandoahMaxRegionSizeSize. ") \ \ - product(ccstr, ShenandoahGCHeuristics, "dynamic", \ + product(ccstr, ShenandoahGCHeuristics, "adaptive", \ "The heuristics to use in Shenandoah GC. Possible values: " \ "dynamic, adaptive, aggressive." \ - "Defauls to dynamic") \ + "Defaults to adaptive") \ \ product(uintx, ShenandoahRefProcFrequency, 5, \ "How often should (weak, soft, etc) references be processed. " \ changeset: 9536:a3615c10ac51 user: zgu date: Wed Feb 08 07:27:04 2017 -0500 summary: Fixed calculation of active workers when NUMA is enabled diff -r af7702f87e46 -r a3615c10ac51 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Tue Feb 07 11:07:28 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Feb 08 07:27:04 2017 -0500 @@ -810,7 +810,7 @@ if (UseNUMA) { uint numa_groups = (uint)os::numa_get_groups_num(); assert(numa_groups <= total_workers, "Not enough workers to cover all numa groups"); - new_active_workers = MAX2(max_active_workers, numa_groups); + new_active_workers = MAX2(new_active_workers, numa_groups); } // Check once more that the number of workers is within the limits. changeset: 9537:ccb32d242533 user: shade date: Wed Feb 08 15:07:55 2017 +0100 summary: Avoid scanning primitive arrays. diff -r a3615c10ac51 -r ccb32d242533 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Feb 08 07:27:04 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Feb 08 15:07:55 2017 +0100 @@ -55,16 +55,20 @@ if (task->is_not_chunked()) { if (CL) count_liveness(obj); - if (!obj->is_objArray()) { + if (obj->is_instance()) { // Case 1: Normal oop, process as usual. obj->oop_iterate(&_mark_refs); + } else if (obj->is_objArray()) { + // Case 2: Object array instance and no chunk is set. Must be the first + // time we visit it, start the chunked processing. + do_chunked_array_start(obj); } else { - // Case 2: Array instance and no chunk is set. Must be the first time - // we visit it. - do_chunked_array_start(obj); + // Case 3: Primitive array. Do nothing, no oops there. Metadata was + // handled in Universe roots. + assert (obj->is_typeArray(), "should be type array"); } } else { - // Case 3: Array chunk, has sensible chunk id. Process it. + // Case 4: Array chunk, has sensible chunk id. Process it. do_chunked_array(obj, task->chunk(), task->pow()); } } diff -r a3615c10ac51 -r ccb32d242533 src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Wed Feb 08 07:27:04 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Wed Feb 08 15:07:55 2017 +0100 @@ -116,6 +116,15 @@ if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_Universe_oops_do)) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::UniverseRoots, worker_id); Universe::oops_do(strong_roots); + KlassToOopClosure kcl(strong_roots); + kcl.do_klass(Universe::boolArrayKlassObj()); + kcl.do_klass(Universe::byteArrayKlassObj()); + kcl.do_klass(Universe::charArrayKlassObj()); + kcl.do_klass(Universe::intArrayKlassObj()); + kcl.do_klass(Universe::shortArrayKlassObj()); + kcl.do_klass(Universe::longArrayKlassObj()); + kcl.do_klass(Universe::singleArrayKlassObj()); + kcl.do_klass(Universe::doubleArrayKlassObj()); } if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_oops_do)) { changeset: 9538:9a210f2b0b29 user: rkennke date: Wed Feb 08 16:22:26 2017 +0100 summary: Improve adaptive heuristics. diff -r ccb32d242533 -r 9a210f2b0b29 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Feb 08 15:07:55 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Feb 08 16:22:26 2017 +0100 @@ -378,13 +378,21 @@ class AdaptiveHeuristics : public ShenandoahHeuristics { private: uintx _free_threshold; + TruncatedSeq* _cset_history; + public: AdaptiveHeuristics() : ShenandoahHeuristics(), - _free_threshold(ShenandoahInitFreeThreshold) { + _free_threshold(ShenandoahInitFreeThreshold), + _cset_history(new TruncatedSeq(ShenandoahHappyCyclesThreshold)) { + + _cset_history->add((double) ShenandoahCSetThreshold); + _cset_history->add((double) ShenandoahCSetThreshold); } - virtual ~AdaptiveHeuristics() {} + virtual ~AdaptiveHeuristics() { + delete _cset_history; + } virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { size_t threshold = ShenandoahHeapRegion::RegionSizeBytes * ShenandoahGarbageThreshold / 100; @@ -395,7 +403,7 @@ ShenandoahHeuristics::record_cm_cancelled(); if (_free_threshold < ShenandoahMaxFreeThreshold) { _free_threshold++; - log_debug(gc,ergo)("increasing free threshold to: "UINTX_FORMAT, _free_threshold); + log_info(gc,ergo)("increasing free threshold to: "UINTX_FORMAT, _free_threshold); } } @@ -404,7 +412,7 @@ if (_successful_cm_cycles_in_a_row > ShenandoahHappyCyclesThreshold && _free_threshold > ShenandoahMinFreeThreshold) { _free_threshold--; - log_debug(gc,ergo)("reducing free threshold to: "UINTX_FORMAT, _free_threshold); + log_info(gc,ergo)("reducing free threshold to: "UINTX_FORMAT, _free_threshold); _successful_cm_cycles_in_a_row = 0; } } @@ -416,9 +424,11 @@ size_t free_capacity = heap->free_regions()->capacity(); size_t free_used = heap->free_regions()->used(); assert(free_used <= free_capacity, "must use less than capacity"); - size_t cset = MIN2(_bytes_in_cset, (ShenandoahCSetThreshold * capacity) / 100); + // size_t cset_threshold = (size_t) _cset_history->maximum(); + size_t cset_threshold = (size_t) _cset_history->davg(); + size_t cset = MIN2(_bytes_in_cset, (cset_threshold * capacity) / 100); size_t available = free_capacity - free_used + cset; - uintx factor = _free_threshold + ShenandoahCSetThreshold; + uintx factor = _free_threshold + cset_threshold; size_t targetStartMarking = (capacity * factor) / 100; size_t threshold_bytes_allocated = heap->capacity() * ShenandoahAllocationThreshold / 100; @@ -430,6 +440,11 @@ shouldStartConcurrentMark = true; } + if (shouldStartConcurrentMark) { + log_info(gc,ergo)("predicted cset threshold: "SIZE_FORMAT, cset_threshold); + log_info(gc,ergo)("Starting concurrent mark at "SIZE_FORMAT"K CSet ("SIZE_FORMAT"%%)", _bytes_in_cset / K, _bytes_in_cset * 100 / capacity); + _cset_history->add((double) (_bytes_in_cset * 100 / capacity)); + } return shouldStartConcurrentMark; } diff -r ccb32d242533 -r 9a210f2b0b29 src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Feb 08 15:07:55 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Feb 08 16:22:26 2017 +0100 @@ -120,7 +120,7 @@ experimental(uintx, ShenandoahInitFreeThreshold, 10, \ "Initial remaininig free threshold for adaptive heuristics") \ \ - experimental(uintx, ShenandoahMinFreeThreshold, 5, \ + experimental(uintx, ShenandoahMinFreeThreshold, 3, \ "Minimum remaininig free threshold for adaptive heuristics") \ \ experimental(uintx, ShenandoahMaxFreeThreshold, 70, \ changeset: 9539:b22b5692689d user: shade date: Wed Feb 08 20:24:17 2017 +0100 summary: Avoid touching typeArrayKlass klasses. diff -r 9a210f2b0b29 -r b22b5692689d src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Feb 08 16:22:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Feb 08 20:24:17 2017 +0100 @@ -63,8 +63,10 @@ // time we visit it, start the chunked processing. do_chunked_array_start(obj); } else { - // Case 3: Primitive array. Do nothing, no oops there. Metadata was - // handled in Universe roots. + // Case 3: Primitive array. Do nothing, no oops there. We use the same + // performance tweak TypeArrayKlass::oop_oop_iterate_impl is using: + // We skip iterating over the klass pointer since we know that + // Universe::TypeArrayKlass never moves. assert (obj->is_typeArray(), "should be type array"); } } else { diff -r 9a210f2b0b29 -r b22b5692689d src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Wed Feb 08 16:22:26 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Wed Feb 08 20:24:17 2017 +0100 @@ -116,15 +116,6 @@ if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_Universe_oops_do)) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::UniverseRoots, worker_id); Universe::oops_do(strong_roots); - KlassToOopClosure kcl(strong_roots); - kcl.do_klass(Universe::boolArrayKlassObj()); - kcl.do_klass(Universe::byteArrayKlassObj()); - kcl.do_klass(Universe::charArrayKlassObj()); - kcl.do_klass(Universe::intArrayKlassObj()); - kcl.do_klass(Universe::shortArrayKlassObj()); - kcl.do_klass(Universe::longArrayKlassObj()); - kcl.do_klass(Universe::singleArrayKlassObj()); - kcl.do_klass(Universe::doubleArrayKlassObj()); } if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_oops_do)) { changeset: 9542:d5cdde4b0dda user: rkennke date: Sun Feb 12 14:27:32 2017 +0100 summary: Fix broken merge: added missing method in connode.cpp diff -r 70dd4b68570b -r d5cdde4b0dda src/share/vm/opto/connode.cpp --- a/src/share/vm/opto/connode.cpp Fri Feb 10 22:22:25 2017 +0100 +++ b/src/share/vm/opto/connode.cpp Sun Feb 12 14:27:32 2017 +0100 @@ -431,6 +431,24 @@ return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL; } +//------------------------------Ideal_DU_postCCP------------------------------- + +// Throw away cast after constant propagation + +Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { + + const Type *t = ccp->type(in(1)); + + ccp->hash_delete(this); + + set_type(t); // Turn into ID function + + ccp->hash_insert(this); + + return this; + +} + uint CastIINode::size_of() const { return sizeof(*this); } changeset: 9543:6016028fd420 user: roland date: Mon Feb 13 11:26:09 2017 +0100 summary: Barrier expansion fix (collect raw memory state) diff -r d5cdde4b0dda -r 6016028fd420 src/share/vm/opto/shenandoahSupport.cpp --- a/src/share/vm/opto/shenandoahSupport.cpp Sun Feb 12 14:27:32 2017 +0100 +++ b/src/share/vm/opto/shenandoahSupport.cpp Mon Feb 13 11:26:09 2017 +0100 @@ -2768,18 +2768,24 @@ DEBUG_ONLY(if (trace) { tty->print("YYY phi post: other"); other->dump(); }) if (other != mem) { if (other->is_Phi() && other->in(0) == r && mem->is_Phi() && mem->in(0) == r) { + bool identical = true; for (uint i = 1; i < mem->req(); i++) { + if (mem->in(i) != other->in(i)) { + identical = false; + } assert(mem->in(i) == other->in(i) || (C->get_alias_index(mem->adr_type()) == alias && mem->in(i) == memory_for(get_ctrl(other->in(i)), phis)), ""); } - if (mem->adr_type() == TypePtr::BOTTOM || C->get_alias_index(other->adr_type()) == alias) { + if (mem->adr_type() == TypePtr::BOTTOM || C->get_alias_index(other->adr_type()) == alias || !identical) { assert(mem->adr_type() != TypePtr::BOTTOM || mem == n, ""); DEBUG_ONLY(if (trace) { tty->print("YYY phi post: replacing other with"); mem->dump(); }) if (phis[other->_idx] != mem) { phis.map(other->_idx, mem); push_uses(C, other, wq, alias); } - lazy_replace(other, mem); + if (mem->adr_type() == TypePtr::BOTTOM || C->get_alias_index(other->adr_type()) == alias) { + lazy_replace(other, mem); + } wq.remove(other); phis.map(r->_idx, mem); cur_mem = mem; changeset: 9544:284095470525 user: shade date: Fri Feb 10 15:22:11 2017 +0100 summary: Non-updateref closures should not update refs. diff -r 6016028fd420 -r 284095470525 src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Mon Feb 13 11:26:09 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Fri Feb 10 15:22:11 2017 +0100 @@ -81,7 +81,7 @@ ShenandoahMarkRefsSuperClosure(q, rp) {}; template - inline void do_oop_nv(T* p) { work(p); } + inline void do_oop_nv(T* p) { work(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } inline bool do_metadata_nv() { return false; } @@ -94,7 +94,7 @@ ShenandoahMarkRefsSuperClosure(q, rp) {}; template - inline void do_oop_nv(T* p) { work(p); } + inline void do_oop_nv(T* p) { work(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } inline bool do_metadata_nv() { return true; } changeset: 9545:aab96e4f2596 user: shade date: Fri Feb 10 17:50:28 2017 +0100 summary: Generic mark loop. diff -r 284095470525 -r aab96e4f2596 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Fri Feb 10 15:22:11 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Fri Feb 10 17:50:28 2017 +0100 @@ -86,34 +86,6 @@ } }; -// Mark the object and add it to the queue to be scanned -template -ShenandoahMarkObjsClosure::ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp, jushort* live_data) : - _heap((ShenandoahHeap*)(Universe::heap())), - _queue(q), - _mark_refs(T(q, rp)), - _live_data(live_data) -{ - if (CL) { - Copy::fill_to_bytes(_live_data, _heap->max_regions() * sizeof(jushort)); - } -} - -template -ShenandoahMarkObjsClosure::~ShenandoahMarkObjsClosure() { - if (CL) { - for (uint i = 0; i < _heap->max_regions(); i++) { - ShenandoahHeapRegion* r = _heap->regions()->get(i); - if (r != NULL) { - jushort live = _live_data[i]; - if (live > 0) { - r->increase_live_data_words(live); - } - } - } - } -} - ShenandoahMarkRefsSuperClosure::ShenandoahMarkRefsSuperClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : MetadataAwareOopClosure(rp), _queue(q), @@ -215,23 +187,13 @@ CodeCache::blobs_do(&blobs); } } - if (_update_refs) { - if (_cm->unload_classes()) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); - } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); - } - } else { - if (_cm->unload_classes()) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); - } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->concurrent_mark_loop(&cl, worker_id, q, _terminator); - } - } + + _cm->mark_loop(worker_id, _terminator, rp, + true, // cancellable + true, // drain SATBs as we go + true, // count liveness + _cm->unload_classes(), + _update_refs); } }; @@ -262,47 +224,13 @@ } else { rp = NULL; } - SCMObjToScanQueue* q = _cm->get_queue(worker_id); - jushort* live_data = _cm->get_liveness(worker_id); - // Templates need constexprs, so we have to switch by the flags ourselves. - if (_update_refs) { - if (_count_live) { - if (_unload_classes) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); - } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); - } - } else { - if (_unload_classes) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); - } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); - } - } - } else { - if (_count_live) { - if (_unload_classes) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); - } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); - } - } else { - if (_unload_classes) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); - } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - _cm->final_mark_loop(&cl, worker_id, q, _terminator); - } - } - } + _cm->mark_loop(worker_id, _terminator, rp, + false, // not cancellable + false, // do not drain SATBs, already drained + _count_live, + _unload_classes, + _update_refs); assert(_cm->task_queues()->is_empty(), "Should be empty"); } @@ -378,6 +306,8 @@ void ShenandoahConcurrentMark::initialize(uint workers) { + _heap = ShenandoahHeap::heap(); + uint num_queues = MAX2(workers, 1U); _task_queues = new SCMObjToScanQueueSet((int) num_queues); @@ -702,25 +632,13 @@ } else { rp = NULL; } - SCMObjToScanQueue* q = scm->get_queue(_worker_id); - jushort* live_data = scm->get_liveness(_worker_id); - if (sh->need_update_refs()) { - if (scm->unload_classes()) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - scm->final_mark_loop(&cl, _worker_id, q, _terminator); - } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - scm->final_mark_loop(&cl, _worker_id, q, _terminator); - } - } else { - if (scm->unload_classes()) { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - scm->final_mark_loop(&cl, _worker_id, q, _terminator); - } else { - ShenandoahMarkObjsClosure cl(q, rp, live_data); - scm->final_mark_loop(&cl, _worker_id, q, _terminator); - } - } + + scm->mark_loop(_worker_id, _terminator, rp, + false, // not cancellable + false, // do not drain SATBs + true, // count liveness + scm->unload_classes(), + sh->need_update_refs()); } }; @@ -922,104 +840,107 @@ q->clear_buffer(); } -template -void ShenandoahConcurrentMark::concurrent_mark_loop(ShenandoahMarkObjsClosure* cl, - uint worker_id, - SCMObjToScanQueue* q, - ParallelTaskTerminator* terminator) { - ShenandoahHeap* heap = ShenandoahHeap::heap(); - int seed = 17; - uint stride = ShenandoahMarkLoopStride; - SCMObjToScanQueueSet* queues = task_queues(); +template +void ShenandoahConcurrentMark::mark_loop_prework(uint w, ParallelTaskTerminator *t, ReferenceProcessor *rp) { + SCMObjToScanQueue* q = get_queue(w); - // Drain outstanding queues first - if (!concurrent_process_queues(heap, q, cl)) { - ShenandoahCancelledTerminatorTerminator tt; - while (! terminator->offer_termination(&tt)); - return; + jushort* ld; + if (COUNT_LIVENESS) { + ld = get_liveness(w); + Copy::fill_to_bytes(ld, _heap->max_regions() * sizeof(jushort)); + } else { + ld = NULL; } - // Normal loop - while (true) { - if (heap->cancelled_concgc()) { - ShenandoahCancelledTerminatorTerminator tt; - while (! terminator->offer_termination(&tt)); - return; + // TODO: We can clean up this if we figure out how to do templated oop closures that + // play nice with specialized_oop_iterators. + if (CLASS_UNLOAD) { + if (UPDATE_REFS) { + ShenandoahMarkUpdateRefsMetadataClosure cl(q, rp); + mark_loop_work(&cl, ld, w, t); + } else { + ShenandoahMarkRefsMetadataClosure cl(q, rp); + mark_loop_work(&cl, ld, w, t); } - - SCMTask t; - for (uint i = 0; i < stride; i++) { - if (try_queue(q, t) || - try_draining_satb_buffer(q, t) || - queues->steal(worker_id, &seed, t)) { - cl->do_task(&t); - } else { - if (terminator->offer_termination()) return; - } + } else { + if (UPDATE_REFS) { + ShenandoahMarkUpdateRefsClosure cl(q, rp); + mark_loop_work(&cl, ld, w, t); + } else { + ShenandoahMarkRefsClosure cl(q, rp); + mark_loop_work(&cl, ld, w, t); } } -} -template -bool ShenandoahConcurrentMark::concurrent_process_queues(ShenandoahHeap* heap, - SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl) { - SCMObjToScanQueueSet* queues = task_queues(); - uint stride = ShenandoahMarkLoopStride; - while (true) { - if (heap->cancelled_concgc()) return false; - - SCMTask t; - for (uint i = 0; i < stride; i++) { - if (try_queue(q, t)) { - cl->do_task(&t); - } else { - assert(q->is_empty(), "Must be empty"); - q = queues->claim_next(); - if (q == NULL) { - return true; + if (COUNT_LIVENESS) { + for (uint i = 0; i < _heap->max_regions(); i++) { + ShenandoahHeapRegion *r = _heap->regions()->get(i); + if (r != NULL) { + jushort live = ld[i]; + if (live > 0) { + r->increase_live_data_words(live); } } } } } +template +void ShenandoahConcurrentMark::mark_loop_work(T* cl, jushort* live_data, uint worker_id, ParallelTaskTerminator *terminator) { + int seed = 17; + uint stride = CANCELLABLE ? ShenandoahMarkLoopStride : 1; -template -void ShenandoahConcurrentMark::final_mark_loop(ShenandoahMarkObjsClosure* cl, - uint worker_id, - SCMObjToScanQueue* q, - ParallelTaskTerminator* terminator) { - int seed = 17; - assert(q != NULL, "Sanity"); + ShenandoahHeap* heap = ShenandoahHeap::heap(); SCMObjToScanQueueSet* queues = task_queues(); - SCMObjToScanQueue* worker_queue = q; + SCMObjToScanQueue* q; SCMTask t; /* - * There can be more queues than workers. - * To deal with the imbalance, we claim extra queues first, - * since marking can push new tasks into the queue associated - * with this worker id, and we come back to process this - * queue at the end. + * Process outstanding queues, if any. + * + * There can be more queues than workers. To deal with the imbalance, we claim + * extra queues first. Since marking can push new tasks into the queue associated + * with this worker id, we come back to process this queue in the normal loop. */ q = queues->claim_next(); - if (q == NULL) { - q = worker_queue; + while (q != NULL) { + if (CANCELLABLE && heap->cancelled_concgc()) { + ShenandoahCancelledTerminatorTerminator tt; + while (!terminator->offer_termination(&tt)); + return; + } + + for (uint i = 0; i < stride; i++) { + if (try_queue(q, t)) { + do_task(q, cl, live_data, &t); + } else { + assert(q->is_empty(), "Must be empty"); + q = queues->claim_next(); + break; + } + } } + q = get_queue(worker_id); + + /* + * Normal marking loop: + */ while (true) { - if (try_queue(q, t) || - (q == worker_queue && queues->steal(worker_id, &seed, t))) { - cl->do_task(&t); - } else { - if (q != worker_queue) { - q = queues->claim_next(); - if (q == NULL) { - q = worker_queue; - } - continue; + if (CANCELLABLE && heap->cancelled_concgc()) { + ShenandoahCancelledTerminatorTerminator tt; + while (!terminator->offer_termination(&tt)); + return; + } + + for (uint i = 0; i < stride; i++) { + if (try_queue(q, t) || + (DRAIN_SATB && try_draining_satb_buffer(q, t)) || + queues->steal(worker_id, &seed, t)) { + do_task(q, cl, live_data, &t); + } else { + if (terminator->offer_termination()) return; } - if (terminator->offer_termination()) return; } } } diff -r 284095470525 -r aab96e4f2596 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Fri Feb 10 15:22:11 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.hpp Fri Feb 10 17:50:28 2017 +0100 @@ -46,25 +46,11 @@ }; #endif -template -class ShenandoahMarkObjsClosure { - ShenandoahHeap* _heap; - T _mark_refs; - SCMObjToScanQueue* _queue; - jushort* _live_data; -public: - ShenandoahMarkObjsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp, jushort* live_data); - ~ShenandoahMarkObjsClosure(); - - inline void do_task(SCMTask* task); - inline void do_chunked_array_start(oop array); - inline void do_chunked_array(oop array, int chunk, int pow); - inline void count_liveness(oop obj); -}; - class ShenandoahConcurrentMark: public CHeapObj { private: + ShenandoahHeap* _heap; + // The per-worker-thread work queues SCMObjToScanQueueSet* _task_queues; @@ -83,6 +69,65 @@ // too many atomic updates. size_t/jint is too large, jbyte is too small. jushort** _liveness_local; +private: + template + inline void do_task(SCMObjToScanQueue* q, T* cl, jushort* live_data, SCMTask* task); + + template + inline void do_chunked_array_start(SCMObjToScanQueue* q, T* cl, oop array); + + template + inline void do_chunked_array(SCMObjToScanQueue* q, T* cl, oop array, int chunk, int pow); + + inline void count_liveness(jushort* live_data, oop obj); + + // Actual mark loop with closures set up + template + void mark_loop_work(T* cl, jushort* live_data, uint worker_id, ParallelTaskTerminator *t); + + template + void mark_loop_prework(uint worker_id, ParallelTaskTerminator *terminator, ReferenceProcessor *rp); + + // ------------------------ Currying dynamic arguments to template args ---------------------------- + + template + void mark_loop_4(uint w, ParallelTaskTerminator* t, ReferenceProcessor* rp, bool b5) { + if (b5) { + mark_loop_prework(w, t, rp); + } else { + mark_loop_prework(w, t, rp); + } + }; + + template + void mark_loop_3(uint w, ParallelTaskTerminator* t, ReferenceProcessor* rp, bool b4, bool b5) { + if (b4) { + mark_loop_4(w, t, rp, b5); + } else { + mark_loop_4(w, t, rp, b5); + } + }; + + template + void mark_loop_2(uint w, ParallelTaskTerminator* t, ReferenceProcessor* rp, bool b3, bool b4, bool b5) { + if (b3) { + mark_loop_3(w, t, rp, b4, b5); + } else { + mark_loop_3(w, t, rp, b4, b5); + } + }; + + template + void mark_loop_1(uint w, ParallelTaskTerminator* t, ReferenceProcessor* rp, bool b2, bool b3, bool b4, bool b5) { + if (b2) { + mark_loop_2(w, t, rp, b3, b4, b5); + } else { + mark_loop_2(w, t, rp, b3, b4, b5); + } + }; + + // ------------------------ END: Currying dynamic arguments to template args ---------------------------- + public: // We need to do this later when the heap is already created. void initialize(uint workers); @@ -112,11 +157,16 @@ void finish_mark_from_roots(); // Those are only needed public because they're called from closures. - template - void concurrent_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* t); - - template - void final_mark_loop(ShenandoahMarkObjsClosure* cl, uint worker_id, SCMObjToScanQueue* q, ParallelTaskTerminator* t); + // Mark loop entry. + // Translates dynamic arguments to template parameters with progressive currying. + void mark_loop(uint worker_id, ParallelTaskTerminator* terminator, ReferenceProcessor *rp, + bool cancellable, bool drain_satb, bool count_liveness, bool class_unload, bool update_refs) { + if (cancellable) { + mark_loop_1(worker_id, terminator, rp, drain_satb, count_liveness, class_unload, update_refs); + } else { + mark_loop_1(worker_id, terminator, rp, drain_satb, count_liveness, class_unload, update_refs); + } + } inline bool try_queue(SCMObjToScanQueue* q, SCMTask &task); @@ -139,13 +189,6 @@ void weak_refs_work(); - /** - * Process assigned queue and others if there are any to be claimed. - * Return false if the process is terminated by concurrent gc cancellation. - */ - template - bool concurrent_process_queues(ShenandoahHeap* heap, SCMObjToScanQueue* q, ShenandoahMarkObjsClosure* cl); - #if TASKQUEUE_STATS static void print_taskqueue_stats_hdr(outputStream* const st = tty); void print_taskqueue_stats() const; diff -r 284095470525 -r aab96e4f2596 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Fri Feb 10 15:22:11 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Fri Feb 10 17:50:28 2017 +0100 @@ -32,8 +32,8 @@ #include "oops/oop.inline.hpp" #include "runtime/prefetch.inline.hpp" -template -void ShenandoahMarkObjsClosure::do_task(SCMTask* task) { +template +void ShenandoahConcurrentMark::do_task(SCMObjToScanQueue* q, T* cl, jushort* live_data, SCMTask* task) { oop obj = task->obj(); assert(obj != NULL, "expect non-null object"); @@ -54,14 +54,14 @@ assert(_heap->is_marked_next(obj), "only marked objects on task queue"); if (task->is_not_chunked()) { - if (CL) count_liveness(obj); + if (COUNT_LIVENESS) count_liveness(live_data, obj); if (obj->is_instance()) { // Case 1: Normal oop, process as usual. - obj->oop_iterate(&_mark_refs); + obj->oop_iterate(cl); } else if (obj->is_objArray()) { // Case 2: Object array instance and no chunk is set. Must be the first // time we visit it, start the chunked processing. - do_chunked_array_start(obj); + do_chunked_array_start(q, cl, obj); } else { // Case 3: Primitive array. Do nothing, no oops there. We use the same // performance tweak TypeArrayKlass::oop_oop_iterate_impl is using: @@ -71,14 +71,13 @@ } } else { // Case 4: Array chunk, has sensible chunk id. Process it. - do_chunked_array(obj, task->chunk(), task->pow()); + do_chunked_array(q, cl, obj, task->chunk(), task->pow()); } } -template -inline void ShenandoahMarkObjsClosure::count_liveness(oop obj) { +inline void ShenandoahConcurrentMark::count_liveness(jushort* live_data, oop obj) { uint region_idx = _heap->heap_region_index_containing(obj); - jushort cur = _live_data[region_idx]; + jushort cur = live_data[region_idx]; int size = obj->size() + BrooksPointer::word_size(); int max = (1 << (sizeof(jushort) * 8)) - 1; if (size >= max) { @@ -89,23 +88,23 @@ if (new_val >= max) { // overflow, flush to region data _heap->regions()->get_fast(region_idx)->increase_live_data_words(new_val); - _live_data[region_idx] = 0; + live_data[region_idx] = 0; } else { // still good, remember in locals - _live_data[region_idx] = (jushort) new_val; + live_data[region_idx] = (jushort) new_val; } } } -template -inline void ShenandoahMarkObjsClosure::do_chunked_array_start(oop obj) { +template +inline void ShenandoahConcurrentMark::do_chunked_array_start(SCMObjToScanQueue* q, T* cl, oop obj) { assert(obj->is_objArray(), "expect object array"); objArrayOop array = objArrayOop(obj); int len = array->length(); if (len <= (int) ObjArrayMarkingStride*2) { // A few slices only, process directly - array->oop_iterate_range(&_mark_refs, 0, len); + array->oop_iterate_range(cl, 0, len); } else { int bits = log2_long(len); // Compensate for non-power-of-two arrays, cover the array in excess: @@ -129,7 +128,7 @@ pow--; chunk = 2; last_idx = (1 << pow); - bool pushed = _queue->push(SCMTask(array, 1, pow)); + bool pushed = q->push(SCMTask(array, 1, pow)); assert(pushed, "overflow queue should always succeed pushing"); } @@ -142,7 +141,7 @@ int right_chunk = chunk*2; int left_chunk_end = left_chunk * (1 << pow); if (left_chunk_end < len) { - bool pushed = _queue->push(SCMTask(array, left_chunk, pow)); + bool pushed = q->push(SCMTask(array, left_chunk, pow)); assert(pushed, "overflow queue should always succeed pushing"); chunk = right_chunk; last_idx = left_chunk_end; @@ -154,13 +153,13 @@ // Process the irregular tail, if present int from = last_idx; if (from < len) { - array->oop_iterate_range(&_mark_refs, from, len); + array->oop_iterate_range(cl, from, len); } } } -template -inline void ShenandoahMarkObjsClosure::do_chunked_array(oop obj, int chunk, int pow) { +template +inline void ShenandoahConcurrentMark::do_chunked_array(SCMObjToScanQueue* q, T* cl, oop obj, int chunk, int pow) { assert(obj->is_objArray(), "expect object array"); objArrayOop array = objArrayOop(obj); @@ -171,7 +170,7 @@ while ((1 << pow) > (int)ObjArrayMarkingStride && (chunk*2 < SCMTask::chunk_size)) { pow--; chunk *= 2; - bool pushed = _queue->push(SCMTask(array, chunk - 1, pow)); + bool pushed = q->push(SCMTask(array, chunk - 1, pow)); assert(pushed, "overflow queue should always succeed pushing"); } @@ -186,7 +185,7 @@ assert (0 < to && to <= len, err_msg("to is sane: %d/%d", to, len)); #endif - array->oop_iterate_range(&_mark_refs, from, to); + array->oop_iterate_range(cl, from, to); } inline bool ShenandoahConcurrentMark::try_queue(SCMObjToScanQueue* q, SCMTask &task) { changeset: 9546:946c80b0c7f6 user: shade date: Mon Feb 13 10:33:50 2017 +0100 summary: Code cache scan should RESOLVE references. diff -r aab96e4f2596 -r 946c80b0c7f6 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Fri Feb 10 17:50:28 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Feb 13 10:33:50 2017 +0100 @@ -181,7 +181,7 @@ } if (ShenandoahConcurrentCodeRoots && _cm->claim_codecache()) { if (! _cm->unload_classes()) { - ShenandoahMarkRefsClosure cl(q, rp); + ShenandoahMarkResolveRefsClosure cl(q, rp); CodeBlobToOopClosure blobs(&cl, ! CodeBlobToOopClosure::FixRelocations); MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); CodeCache::blobs_do(&blobs); diff -r aab96e4f2596 -r 946c80b0c7f6 src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Fri Feb 10 17:50:28 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahOopClosures.hpp Mon Feb 13 10:33:50 2017 +0100 @@ -88,6 +88,19 @@ virtual bool do_metadata() { return false; } }; +class ShenandoahMarkResolveRefsClosure : public ShenandoahMarkRefsSuperClosure { +public: + ShenandoahMarkResolveRefsClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : + ShenandoahMarkRefsSuperClosure(q, rp) {}; + + template + inline void do_oop_nv(T* p) { work(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + inline bool do_metadata_nv() { return false; } + virtual bool do_metadata() { return false; } +}; + class ShenandoahMarkRefsMetadataClosure : public ShenandoahMarkRefsSuperClosure { public: ShenandoahMarkRefsMetadataClosure(SCMObjToScanQueue* q, ReferenceProcessor* rp) : changeset: 9547:39f068fd713e user: rkennke date: Mon Feb 13 16:36:43 2017 +0100 summary: Use correct number of threads for initial evacuation. diff -r 946c80b0c7f6 -r 39f068fd713e src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Mon Feb 13 10:33:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Mon Feb 13 16:36:43 2017 +0100 @@ -1266,7 +1266,8 @@ ClassLoaderDataGraph::clear_claimed_marks(); { - ShenandoahRootEvacuator rp(this, _max_parallel_workers, ShenandoahCollectorPolicy::evac_thread_roots); + uint nworkers = _workers->active_workers(); + ShenandoahRootEvacuator rp(this, nworkers, ShenandoahCollectorPolicy::evac_thread_roots); ShenandoahEvacuateUpdateRootsTask roots_task(&rp); workers()->run_task(&roots_task); } changeset: 9548:20dfe5963777 user: rkennke date: Mon Feb 13 16:51:58 2017 +0100 summary: Assert code roots are to-space ptrs when scanning/updating other roots. diff -r 39f068fd713e -r 20dfe5963777 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Feb 13 16:36:43 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Feb 13 16:51:58 2017 +0100 @@ -40,6 +40,23 @@ #include "oops/oop.inline.hpp" #include "utilities/taskqueue.hpp" +#ifdef ASSERT +class AssertToSpaceClosure : public OopClosure { +private: + template + inline void do_oop_nv(T* p) { + T o = oopDesc::load_heap_oop(p); + if (! oopDesc::is_null(o)) { + oop obj = oopDesc::decode_heap_oop_not_null(o); + assert(oopDesc::unsafe_equals(obj, ShenandoahBarrierSet::resolve_oop_static_not_null(obj)), "need to-space object here"); + } + } +public: + void do_oop(narrowOop* p) { do_oop_nv(p); } + void do_oop(oop* p) { do_oop_nv(p); } +}; +#endif + class ShenandoahInitMarkRootsClosure : public OopClosure { private: SCMObjToScanQueue* _queue; @@ -133,7 +150,20 @@ if (heap->concurrentMark()->unload_classes()) { _rp->process_strong_roots(&mark_cl, _process_refs ? NULL : &mark_cl, &cldCl, &blobsCl, worker_id); } else { - _rp->process_all_roots(&mark_cl, _process_refs ? NULL : &mark_cl, &cldCl, ShenandoahConcurrentCodeRoots ? NULL : &blobsCl, worker_id); + if (ShenandoahConcurrentCodeRoots) { + CodeBlobClosure* code_blobs; +#ifdef ASSERT + AssertToSpaceClosure assert_to_space_oops; + CodeBlobToOopClosure assert_to_space(&assert_to_space_oops, + !CodeBlobToOopClosure::FixRelocations); + code_blobs = &assert_to_space; +#else + code_blobs = NULL; +#endif + _rp->process_all_roots(&mark_cl, _process_refs ? NULL : &mark_cl, &cldCl, code_blobs, worker_id); + } else { + _rp->process_all_roots(&mark_cl, _process_refs ? NULL : &mark_cl, &cldCl, &blobsCl, worker_id); + } } } }; @@ -154,7 +184,15 @@ SCMUpdateRefsClosure cl; CLDToOopClosure cldCl(&cl); - _rp->process_all_roots(&cl, &cl, &cldCl, NULL, worker_id); + CodeBlobClosure* code_blobs; +#ifdef ASSERT + AssertToSpaceClosure assert_to_space_oops; + CodeBlobToOopClosure assert_to_space(&assert_to_space_oops, !CodeBlobToOopClosure::FixRelocations); + code_blobs = &assert_to_space; +#else + code_blobs = NULL; +#endif + _rp->process_all_roots(&cl, &cl, &cldCl, code_blobs, worker_id); } }; changeset: 9549:e8f359092e26 user: rkennke date: Tue Feb 14 15:52:50 2017 +0100 summary: Add compatibility with -verbose:gc and -XX+TraceGenNTime GC options for logging. diff -r 20dfe5963777 -r e8f359092e26 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Mon Feb 13 16:51:58 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Tue Feb 14 15:52:50 2017 +0100 @@ -1549,7 +1549,7 @@ } void ShenandoahHeap::print_tracing_info() const { - if (ShenandoahLogInfo) { + if (ShenandoahLogInfo || TraceGen0Time || TraceGen1Time) { ResourceMark rm; outputStream* out = gclog_or_tty; _shenandoah_policy->print_tracing_info(out); diff -r 20dfe5963777 -r e8f359092e26 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Mon Feb 13 16:51:58 2017 +0100 +++ b/src/share/vm/runtime/arguments.cpp Tue Feb 14 15:52:50 2017 +0100 @@ -1760,6 +1760,10 @@ FLAG_SET_DEFAULT(AlwaysPreTouch, false); FLAG_SET_DEFAULT(ShenandoahAlwaysPreTouch, true); } + + if (PrintGC && FLAG_IS_DEFAULT(ShenandoahLogInfo)) { + FLAG_SET_DEFAULT(ShenandoahLogInfo, true); + } } #if !INCLUDE_ALL_GCS changeset: 9550:215d1315dc60 user: zgu date: Mon Feb 13 16:08:54 2017 -0500 summary: Merge GC worker thread pool and enhance GC thread setup for each GC phase diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Tue Feb 14 15:52:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Mon Feb 13 16:08:54 2017 -0500 @@ -847,82 +847,101 @@ * Initial marking phase also update references of live objects from previous concurrent GC cycle, * so we take Java threads and live set into account. */ -uint ShenandoahCollectorPolicy::calc_workers_for_init_marking(uint total_workers, - uint active_workers, +uint ShenandoahCollectorPolicy::calc_workers_for_init_marking(uint active_workers, uint application_workers) { if (!UseDynamicNumberOfGCThreads || (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { - assert(total_workers > 0, "Always need at least 1"); - return total_workers; + assert(ParallelGCThreads > 0, "Always need at least 1"); + return ParallelGCThreads; } else { ShenandoahCollectorPolicy* policy = (ShenandoahCollectorPolicy*)ShenandoahHeap::heap()->collector_policy(); size_t live_data = policy->_heuristics->bytes_in_cset(); - return calc_default_active_workers(total_workers, (total_workers > 1) ? 2 : 1, + return calc_default_active_workers(ParallelGCThreads, (ParallelGCThreads > 1) ? 2 : 1, active_workers, application_workers, calc_workers_for_java_threads(application_workers), calc_workers_for_live_set(live_data)); } } -uint ShenandoahCollectorPolicy::calc_workers_for_conc_marking(uint total_workers, - uint active_workers, +uint ShenandoahCollectorPolicy::calc_workers_for_conc_marking(uint active_workers, + uint application_workers) { + + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ConcGCThreads) && !ForceDynamicNumberOfGCThreads)) { + assert(ConcGCThreads > 0, "Always need at least 1"); + return ConcGCThreads; + } else { + return calc_default_active_workers(ConcGCThreads, + (ConcGCThreads > 1 ? 2 : 1), active_workers, + application_workers, calc_workers_for_java_threads(application_workers), 0); + } +} + +uint ShenandoahCollectorPolicy::calc_workers_for_final_marking(uint active_workers, uint application_workers) { if (!UseDynamicNumberOfGCThreads || (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { - assert(total_workers > 0, "Always need at least 1"); - return total_workers; + assert(ParallelGCThreads > 0, "Always need at least 1"); + return ParallelGCThreads; } else { - return calc_default_active_workers(total_workers, - (total_workers > 1 ? 2 : 1), active_workers, + return calc_default_active_workers(ParallelGCThreads, + (ParallelGCThreads > 1 ? 2 : 1), active_workers, application_workers, calc_workers_for_java_threads(application_workers), 0); } } -uint ShenandoahCollectorPolicy::calc_workers_for_final_marking(uint total_workers, + // Calculate workers for concurrent evacuation (concurrent GC) +uint ShenandoahCollectorPolicy::calc_workers_for_conc_evacuation(uint active_workers, + uint application_workers) { + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ConcGCThreads) && !ForceDynamicNumberOfGCThreads)) { + assert(ConcGCThreads > 0, "Always need at least 1"); + return ConcGCThreads; + } else { + return calc_workers_for_evacuation(false, // not a full GC + ConcGCThreads, active_workers, application_workers); + } +} + + // Calculate workers for parallel evaculation (full GC) +uint ShenandoahCollectorPolicy::calc_workers_for_parallel_evacuation(uint active_workers, + uint application_workers) { + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { + assert(ParallelGCThreads > 0, "Always need at least 1"); + return ParallelGCThreads; + } else { + return calc_workers_for_evacuation(true, // a full GC + ParallelGCThreads, active_workers, application_workers); + } +} + + +uint ShenandoahCollectorPolicy::calc_workers_for_evacuation(bool full_gc, + uint total_workers, uint active_workers, uint application_workers) { - if (!UseDynamicNumberOfGCThreads || - (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { - assert(total_workers > 0, "Always need at least 1"); - return total_workers; + // Calculation based on live set + size_t live_data = 0; + ShenandoahHeap* heap = ShenandoahHeap::heap(); + if (full_gc) { + ShenandoahHeapRegionSet* regions = heap->regions(); + for (size_t index = 0; index < regions->active_regions(); index ++) { + live_data += regions->get_fast(index)->get_live_data_bytes(); + } } else { - return calc_default_active_workers(total_workers, - (total_workers > 1 ? 2 : 1), active_workers, - application_workers, calc_workers_for_java_threads(application_workers), 0); + ShenandoahCollectorPolicy* policy = (ShenandoahCollectorPolicy*)heap->collector_policy(); + live_data = policy->_heuristics->bytes_in_cset(); } -} -uint ShenandoahCollectorPolicy::calc_workers_for_evacuation(uint total_workers, - uint active_workers, - uint application_workers) { - - if (!UseDynamicNumberOfGCThreads || - (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { - assert(total_workers > 0, "Always need at least 1"); - return total_workers; - } else { - // Calculation based on live set - size_t live_data = 0; - ShenandoahHeap* heap = ShenandoahHeap::heap(); - if (heap->is_full_gc_in_progress()) { - ShenandoahHeapRegionSet* regions = heap->regions(); - for (size_t index = 0; index < regions->active_regions(); index ++) { - live_data += regions->get_fast(index)->get_live_data_bytes(); - } - } else { - ShenandoahCollectorPolicy* policy = (ShenandoahCollectorPolicy*)ShenandoahHeap::heap()->collector_policy(); - live_data = policy->_heuristics->bytes_in_cset(); - } - - uint active_workers_by_liveset = calc_workers_for_live_set(live_data); - return calc_default_active_workers(total_workers, + uint active_workers_by_liveset = calc_workers_for_live_set(live_data); + return calc_default_active_workers(total_workers, (total_workers > 1 ? 2 : 1), active_workers, application_workers, 0, active_workers_by_liveset); - } } diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Tue Feb 14 15:52:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Mon Feb 13 16:08:54 2017 -0500 @@ -208,21 +208,25 @@ size_t cycle_counter() const; - static uint calc_workers_for_init_marking(uint total_workers, - uint active_workers, + // Calculate the number of workers for initial marking + static uint calc_workers_for_init_marking(uint active_workers, uint application_workers); - static uint calc_workers_for_conc_marking(uint total_workers, - uint active_workers, + // Calculate the number of workers for concurrent marking + static uint calc_workers_for_conc_marking(uint active_workers, uint application_workers); - static uint calc_workers_for_final_marking(uint total_workers, - uint active_workers, - uint application_workers); + // Calculate the number of workers for final marking + static uint calc_workers_for_final_marking(uint active_workers, + uint application_workers); - static uint calc_workers_for_evacuation(uint total_workers, - uint active_workers, - uint application_workers); + // Calculate workers for concurrent evacuation (concurrent GC) + static uint calc_workers_for_conc_evacuation(uint active_workers, + uint application_workers); + + // Calculate workers for parallel evaculation (full GC) + static uint calc_workers_for_parallel_evacuation(uint active_workers, + uint application_workers); private: static uint calc_workers_for_java_threads(uint application_workers); @@ -235,6 +239,11 @@ uint workers_by_java_threads, uint workers_by_liveset); + static uint calc_workers_for_evacuation(bool full_gc, + uint total_workers, + uint active_workers, + uint application_workers); + void print_summary_sd(outputStream* out, const char* str, const HdrSeq* seq); }; diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Tue Feb 14 15:52:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Mon Feb 13 16:08:54 2017 -0500 @@ -308,14 +308,6 @@ set_process_references(policy->process_references()); set_unload_classes(policy->unload_classes()); - // Set up parallel workers for initial marking - FlexibleWorkGang* workers = heap->workers(); - uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_init_marking( - workers->total_workers(), workers->active_workers(), - Threads::number_of_non_daemon_threads()); - - workers->set_active_workers(nworkers); - mark_roots(); } @@ -370,20 +362,13 @@ void ShenandoahConcurrentMark::mark_from_roots() { ShenandoahHeap* sh = (ShenandoahHeap *) Universe::heap(); + WorkGang* workers = sh->workers(); + uint nworkers = workers->active_workers(); bool update_refs = sh->need_update_refs(); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::conc_mark); - // Concurrent marking, uses concurrent workers - // Setup workers for concurrent marking - FlexibleWorkGang* workers = sh->conc_workers(); - uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_conc_marking( - workers->total_workers(), workers->active_workers(), - Threads::number_of_non_daemon_threads()); - - workers->set_active_workers(nworkers); - if (process_references()) { ReferenceProcessor* rp = sh->ref_processor(); rp->set_active_mt_degree(nworkers); @@ -422,12 +407,6 @@ ShenandoahHeap* sh = (ShenandoahHeap *) Universe::heap(); - // Setup workers for final marking - FlexibleWorkGang* workers = sh->workers(); - uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_final_marking( - workers->total_workers(), workers->active_workers(), Threads::number_of_non_daemon_threads()); - workers->set_active_workers(nworkers); - TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); shared_finish_mark_from_roots(/* full_gc = */ false); @@ -940,6 +919,9 @@ * extra queues first. Since marking can push new tasks into the queue associated * with this worker id, we come back to process this queue in the normal loop. */ + assert(queues->get_reserved() == heap->workers()->active_workers(), + "Need to reserve proper number of queues"); + q = queues->claim_next(); while (q != NULL) { if (CANCELLABLE && heap->cancelled_concgc()) { diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Tue Feb 14 15:52:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Mon Feb 13 16:08:54 2017 -0500 @@ -106,6 +106,7 @@ // Start initial mark under STW: { + // Workers are setup by VM_ShenandoahInitMark TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); VM_ShenandoahInitMark initMark; heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause_gross); @@ -119,6 +120,12 @@ // Continue concurrent mark: { + // Setup workers for concurrent marking phase + FlexibleWorkGang* workers = heap->workers(); + uint n_workers = ShenandoahCollectorPolicy::calc_workers_for_conc_marking(workers->active_workers(), + Threads::number_of_non_daemon_threads()); + ShenandoahWorkerScope scope(workers, n_workers); + // GCTraceTime time("Concurrent marking", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); ShenandoahHeap::heap()->concurrentMark()->mark_from_roots(); @@ -143,6 +150,7 @@ // Proceed to complete marking under STW, and start evacuation: { + // Workers are setup by VM_ShenandoahStartEvacuation TraceCollectorStats tcs(heap->monitoring_support()->stw_collection_counters()); VM_ShenandoahStartEvacuation finishMark; heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause_gross); @@ -161,6 +169,12 @@ // Continue concurrent evacuation: { + // Setup workers for concurrent evacuation phase + FlexibleWorkGang* workers = heap->workers(); + uint n_workers = ShenandoahCollectorPolicy::calc_workers_for_conc_evacuation(workers->active_workers(), + Threads::number_of_non_daemon_threads()); + ShenandoahWorkerScope scope(workers, n_workers); + // GCTraceTime time("Concurrent evacuation ", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); heap->do_evacuation(); @@ -170,7 +184,9 @@ if (check_cancellation()) return; heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); - heap->reset_next_mark_bitmap(heap->conc_workers()); + FlexibleWorkGang* workers = heap->workers(); + ShenandoahPushWorkerScope scope(workers, heap->max_workers()); + heap->reset_next_mark_bitmap(workers); heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::reset_bitmaps); gc_timer->register_gc_end(); diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Tue Feb 14 15:52:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Mon Feb 13 16:08:54 2017 -0500 @@ -265,8 +265,7 @@ _max_allocated_gc(0), _allocated_last_gc(0), _used_start_gc(0), - _max_conc_workers((int) MAX2((uint) ConcGCThreads, 1U)), - _max_parallel_workers((int) MAX2((uint) ParallelGCThreads, 1U)), + _max_workers(MAX2(ConcGCThreads, ParallelGCThreads)), _ref_processor(NULL), _in_cset_fast_test(NULL), _in_cset_fast_test_base(NULL), @@ -293,19 +292,14 @@ _scm = new ShenandoahConcurrentMark(); _used = 0; - // This is odd. They are concurrent gc threads, but they are also task threads. - // Framework doesn't allow both. - _workers = new FlexibleWorkGang("Parallel GC Threads", ParallelGCThreads, + _max_workers = MAX2(_max_workers, 1U); + _workers = new FlexibleWorkGang("Shenandoah GC Threads", _max_workers, /* are_GC_task_threads */true, /* are_ConcurrentGC_threads */false); - _conc_workers = new FlexibleWorkGang("Concurrent GC Threads", ConcGCThreads, - /* are_GC_task_threads */true, - /* are_ConcurrentGC_threads */false); - if ((_workers == NULL) || (_conc_workers == NULL)) { + if (_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); } else { _workers->initialize_workers(); - _conc_workers->initialize_workers(); } } @@ -434,7 +428,6 @@ } } - _max_workers = MAX(_max_parallel_workers, _max_conc_workers); _scm->initialize(_max_workers); ref_processing_init(); @@ -1266,8 +1259,7 @@ ClassLoaderDataGraph::clear_claimed_marks(); { - uint nworkers = _workers->active_workers(); - ShenandoahRootEvacuator rp(this, nworkers, ShenandoahCollectorPolicy::evac_thread_roots); + ShenandoahRootEvacuator rp(this, workers()->active_workers(), ShenandoahCollectorPolicy::evac_thread_roots); ShenandoahEvacuateUpdateRootsTask roots_task(&rp); workers()->run_task(&roots_task); } @@ -1320,15 +1312,7 @@ ParallelEvacuationTask evacuationTask = ParallelEvacuationTask(this, _collection_set); - // Setup workers for concurrent evacuation - WorkGang* workers = conc_workers(); - uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_evacuation( - workers->total_workers(), workers->active_workers(), Threads::number_of_non_daemon_threads()); - - uint old_num_workers = conc_workers()->active_workers(); - conc_workers()->set_active_workers(nworkers); - conc_workers()->run_task(&evacuationTask); - conc_workers()->set_active_workers(old_num_workers); + workers()->run_task(&evacuationTask); if (ShenandoahLogTrace) { ResourceMark rm; @@ -1540,12 +1524,10 @@ void ShenandoahHeap::print_gc_threads_on(outputStream* st) const { workers()->print_worker_threads_on(st); - conc_workers()->print_worker_threads_on(st); } void ShenandoahHeap::gc_threads_do(ThreadClosure* tcl) const { workers()->threads_do(tcl); - conc_workers()->threads_do(tcl); } void ShenandoahHeap::print_tracing_info() const { @@ -2146,13 +2128,6 @@ return _max_workers; } -uint ShenandoahHeap::max_parallel_workers() { - return _max_parallel_workers; -} -uint ShenandoahHeap::max_conc_workers() { - return _max_conc_workers; -} - void ShenandoahHeap::stop() { // The shutdown sequence should be able to terminate when GC is running. diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Tue Feb 14 15:52:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Mon Feb 13 16:08:54 2017 -0500 @@ -25,6 +25,7 @@ #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHHEAP_HPP #include "gc_implementation/g1/concurrentMark.hpp" +#include "gc_implementation/shenandoah/shenandoahWorkGroup.hpp" class ConcurrentGCTimer; @@ -128,11 +129,8 @@ #ifndef NDEBUG uint _numAllocs; #endif - uint _max_parallel_workers; - uint _max_conc_workers; uint _max_workers; - FlexibleWorkGang* _conc_workers; FlexibleWorkGang* _workers; @@ -377,12 +375,9 @@ ReferenceProcessor* ref_processor() { return _ref_processor;} - FlexibleWorkGang* conc_workers() const { return _conc_workers;} FlexibleWorkGang* workers() const { return _workers;} - uint max_conc_workers(); uint max_workers(); - uint max_parallel_workers(); void do_evacuation(); ShenandoahHeapRegion* next_compaction_region(const ShenandoahHeapRegion* r); diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Tue Feb 14 15:52:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Mon Feb 13 16:08:54 2017 -0500 @@ -144,72 +144,76 @@ oopDesc::set_bs(&bs); policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_prepare); - { GCTraceTime time("Pause Full", ShenandoahLogInfo, true, _gc_timer, _heap->tracer()->gc_id()); - if (UseTLAB) { - _heap->ensure_parsability(true); - } + if (UseTLAB) { + _heap->ensure_parsability(true); + } - CodeCache::gc_prologue(); + CodeCache::gc_prologue(); - // We should save the marks of the currently locked biased monitors. - // The marking doesn't preserve the marks of biased objects. - //BiasedLocking::preserve_marks(); + // We should save the marks of the currently locked biased monitors. + // The marking doesn't preserve the marks of biased objects. + //BiasedLocking::preserve_marks(); - _heap->set_need_update_refs(true); + _heap->set_need_update_refs(true); + FlexibleWorkGang* workers = _heap->workers(); + // Setup workers for phase 1 + { + uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_init_marking( + workers->active_workers(), Threads::number_of_non_daemon_threads()); + workers->set_active_workers(nworkers); + ShenandoahWorkerScope scope(workers, nworkers); - // Setup workers for phase 1 - FlexibleWorkGang* workers = _heap->workers(); - uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_init_marking( - workers->total_workers(), workers->active_workers(), Threads::number_of_non_daemon_threads()); - workers->set_active_workers(nworkers); + OrderAccess::fence(); - OrderAccess::fence(); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_mark); + phase1_mark_heap(); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_mark); + } - policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_mark); - phase1_mark_heap(); - policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_mark); + // Setup workers for the rest + { + uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_parallel_evacuation( + workers->active_workers(), Threads::number_of_non_daemon_threads()); - // Setup workers for the rest - nworkers = ShenandoahCollectorPolicy::calc_workers_for_evacuation( - workers->total_workers(), workers->active_workers(), Threads::number_of_non_daemon_threads()); - workers->set_active_workers(nworkers); + ShenandoahWorkerScope scope(workers, nworkers); - OrderAccess::fence(); + OrderAccess::fence(); - policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_calculate_addresses); - ShenandoahHeapRegionSet* copy_queues[_heap->max_parallel_workers()]; - phase2_calculate_target_addresses(copy_queues); - policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_calculate_addresses); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_calculate_addresses); + ShenandoahHeapRegionSet* copy_queues[_heap->max_workers()]; + phase2_calculate_target_addresses(copy_queues); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_calculate_addresses); - OrderAccess::fence(); + OrderAccess::fence(); - policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_adjust_pointers); - phase3_update_references(); - policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_adjust_pointers); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_adjust_pointers); + phase3_update_references(); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_adjust_pointers); - policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_copy_objects); - phase4_compact_objects(copy_queues); - policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_copy_objects); + policy->record_phase_start(ShenandoahCollectorPolicy::full_gc_copy_objects); + phase4_compact_objects(copy_queues); + policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_copy_objects); - CodeCache::gc_epilogue(); - JvmtiExport::gc_epilogue(); + CodeCache::gc_epilogue(); + JvmtiExport::gc_epilogue(); + } - // refs processing: clean slate - // rp.enqueue_discovered_references(); + // refs processing: clean slate + // rp.enqueue_discovered_references(); - if (ShenandoahVerify) { - _heap->verify_heap_after_evacuation(); - } + if (ShenandoahVerify) { + _heap->verify_heap_after_evacuation(); + } - _heap->set_bytes_allocated_since_cm(0); + _heap->set_bytes_allocated_since_cm(0); - _heap->set_need_update_refs(false); + _heap->set_need_update_refs(false); - _heap->set_full_gc_in_progress(false); + _heap->set_full_gc_in_progress(false); } _gc_timer->register_gc_end(); @@ -467,7 +471,7 @@ heap->heap_region_iterate(&cl); // Initialize copy queues. - for (uint i = 0; i < heap->max_parallel_workers(); i++) { + for (uint i = 0; i < heap->max_workers(); i++) { copy_queues[i] = new ShenandoahHeapRegionSet(heap->max_regions()); } @@ -694,7 +698,7 @@ // Also clear the next bitmap in preparation for next marking. heap->reset_next_mark_bitmap(heap->workers()); - for (uint i = 0; i < heap->max_parallel_workers(); i++) { + for (uint i = 0; i < heap->max_workers(); i++) { delete copy_queues[i]; } diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahWorkGroup.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahWorkGroup.cpp Mon Feb 13 16:08:54 2017 -0500 @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "gc_implementation/shenandoah/shenandoahWorkGroup.hpp" + +ShenandoahWorkerScope::ShenandoahWorkerScope(FlexibleWorkGang* workers, uint nworkers) : + _workers(workers), _n_workers(nworkers) { + _workers->set_active_workers(nworkers); +} + +ShenandoahWorkerScope::~ShenandoahWorkerScope() { + assert(_workers->active_workers() == _n_workers, + "Active workers can not be changed within this scope"); +} + +ShenandoahPushWorkerScope::ShenandoahPushWorkerScope(FlexibleWorkGang* workers, uint nworkers) : + _workers(workers), _old_workers(workers->active_workers()), _n_workers(nworkers) { + _workers->set_active_workers(nworkers); +} + +ShenandoahPushWorkerScope::~ShenandoahPushWorkerScope() { + assert(_workers->active_workers() == _n_workers, + "Active workers can not be changed within this scope"); + // Restore old worker value + _workers->set_active_workers(_old_workers); +} diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/shenandoahWorkGroup.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahWorkGroup.hpp Mon Feb 13 16:08:54 2017 -0500 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAH_WORKGROUP_HPP +#define SHARE_VM_GC_SHENANDOAH_SHENANDOAH_WORKGROUP_HPP + +#include "utilities/workgroup.hpp" +#include "memory/allocation.hpp" + + +class ShenandoahWorkerScope : public StackObj { +private: + uint _n_workers; + FlexibleWorkGang* _workers; +public: + ShenandoahWorkerScope(FlexibleWorkGang* workers, uint nworkers); + ~ShenandoahWorkerScope(); +}; + + +class ShenandoahPushWorkerScope : StackObj { +private: + uint _n_workers; + uint _old_workers; + FlexibleWorkGang* _workers; + +public: + ShenandoahPushWorkerScope(FlexibleWorkGang* workers, uint nworkers); + ~ShenandoahPushWorkerScope(); +}; + +#endif // SHARE_VM_GC_SHENANDOAH_SHENANDOAH_WORKGROUP_HPP diff -r e8f359092e26 -r 215d1315dc60 src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Tue Feb 14 15:52:50 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Mon Feb 13 16:08:54 2017 -0500 @@ -25,8 +25,9 @@ #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahMarkCompact.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahWorkGroup.hpp" #include "gc_implementation/shenandoah/vm_operations_shenandoah.hpp" -#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" VM_Operation::VMOp_Type VM_ShenandoahInitMark::type() const { return VMOp_ShenandoahInitMark; @@ -38,6 +39,14 @@ void VM_ShenandoahInitMark::doit() { ShenandoahHeap *sh = (ShenandoahHeap*) Universe::heap(); + FlexibleWorkGang* workers = sh->workers(); + + // Calculate workers for initial marking + uint nworkers = ShenandoahCollectorPolicy::calc_workers_for_init_marking( + workers->active_workers(), Threads::number_of_non_daemon_threads()); + + ShenandoahWorkerScope scope(workers, nworkers); + GCTraceTime time("Pause Init-Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::init_mark); @@ -105,6 +114,12 @@ // evacuate roots right after finishing marking, so that we don't // get unmarked objects in the roots. ShenandoahHeap *sh = ShenandoahHeap::heap(); + // Setup workers for final marking + FlexibleWorkGang* workers = sh->workers(); + uint n_workers = ShenandoahCollectorPolicy::calc_workers_for_final_marking(workers->active_workers(), + Threads::number_of_non_daemon_threads()); + ShenandoahWorkerScope scope(workers, n_workers); + if (! sh->cancelled_concgc()) { GCTraceTime time("Pause Final Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause); changeset: 9551:901a3145878f user: shade date: Wed Feb 15 16:49:46 2017 +0100 summary: Backport most JDK 9 tests. diff -r 215d1315dc60 -r 901a3145878f src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Mon Feb 13 16:08:54 2017 -0500 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Feb 15 16:49:46 2017 +0100 @@ -45,14 +45,14 @@ "code that manipulates final fields." \ "Defaults to true. ") \ \ - product(size_t, ShenandoahHeapRegionSize, 0, \ + product(uintx, ShenandoahHeapRegionSize, 0, \ "Size of the Shenandoah regions. " \ "Determined automatically by default.") \ \ - experimental(size_t, ShenandoahMinRegionSize, 1 * M, \ + experimental(uintx, ShenandoahMinRegionSize, 1 * M, \ "Minimum heap region size. ") \ \ - experimental(size_t, ShenandoahMaxRegionSize, 32 * M, \ + experimental(uintx, ShenandoahMaxRegionSize, 32 * M, \ "Maximum heap region size. ") \ \ experimental(size_t, ShenandoahTargetNumRegions, 2048, \ diff -r 215d1315dc60 -r 901a3145878f test/TEST.groups --- a/test/TEST.groups Mon Feb 13 16:08:54 2017 -0500 +++ b/test/TEST.groups Wed Feb 15 16:49:46 2017 +0100 @@ -136,8 +136,6 @@ -gc/g1/TestGreyReclaimedHumongousObjects.java hotspot_gc_shenandoah = \ - gc/stress/TestGCOldWithShenandoah.java \ - gc/stress/gcbasher/TestGCBasherWithShenandoah.java \ gc/shenandoah/ hotspot_fast_gc_shenandoah = \ diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/AlwaysPreTouch.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/AlwaysPreTouch.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test AlwaysPreTouch + * @summary Check that Shenandoah's AlwaysPreTouch does not fire asserts + * @run main/othervm -XX:+UseShenandoahGC -XX:+AlwaysPreTouch -Xmx2g -Xms2g AlwaysPreTouch + */ + +public class AlwaysPreTouch { + + public static void main(String[] args) throws Exception { + // checking the initialization before entering main() + } + +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/C1VectorizedMismatch.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/C1VectorizedMismatch.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* @test + * @summary test C1 vectorized mismatch intrinsic + * @run main/othervm -XX:TieredStopAtLevel=1 -XX:+UseShenandoahGC -XX:ShenandoahGCHeuristics=aggressive C1VectorizedMismatch + */ + +import java.util.Arrays; + +public class C1VectorizedMismatch { + + private static final int NUM_RUNS = 10000; + private static final int ARRAY_SIZE=10000; + private static int[] a; + private static int[] b; + + public static void main(String[] args) { + a = new int[ARRAY_SIZE]; + b = new int[ARRAY_SIZE]; + for (int i = 0; i < NUM_RUNS; i++) { + test(); + } + } + + private static void test() { + int[] a1 = new int[ARRAY_SIZE]; + int[] b1 = new int[ARRAY_SIZE]; + fillArray(a); + System.arraycopy(a, 0, b, 0, ARRAY_SIZE); + if (! Arrays.equals(a, b)) { + throw new RuntimeException("arrays not equal"); + } + } + + private static void fillArray(int[] array) { + for (int i = 0; i < ARRAY_SIZE; i++) { + int val = (int) (Math.random() * Integer.MAX_VALUE); + array[i] = val; + } + } +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/EvilSyncBug.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/EvilSyncBug.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test EvilSyncBug + * @summary Tests for crash/assert when attaching init thread during shutdown + * @key gc + * @library /testlibrary + * @modules java.base/jdk.internal.misc + * java.management + * @run driver EvilSyncBug + */ + +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.locks.*; + +import com.oracle.java.testlibrary.*; + +public class EvilSyncBug { + + private static final int NUM_RUNS = 100; + + static Thread[] hooks = new MyHook[10000]; + + public static void main(String[] args) throws Exception { + if (args.length > 0) { + test(); + } else { + for (int i = 0; i < NUM_RUNS; i++) { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-Xmx128m", + "-Xmx128m", + "-XX:+UseShenandoahGC", + "-XX:ShenandoahGCHeuristics=aggressive", + "-XX:+UnlockDiagnosticVMOptions", + "-XX:+ShenandoahStoreCheck", + "EvilSyncBug", "test"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + } + } + } + + private static void test() throws Exception { + + for (int t = 0; t < hooks.length; t++) { + hooks[t] = new MyHook(); + } + + ExecutorService service = Executors.newFixedThreadPool( + 2, + r -> { + Thread t = new Thread(r); + t.setDaemon(true); + return t; + } + ); + + List> futures = new ArrayList<>(); + for (int c = 0; c < 100; c++) { + Runtime.getRuntime().addShutdownHook(hooks[c]); + final Test[] tests = new Test[1000]; + for (int t = 0; t < tests.length; t++) { + tests[t] = new Test(); + } + + Future f1 = service.submit(() -> { + Runtime.getRuntime().addShutdownHook(new MyHook()); + IntResult2 r = new IntResult2(); + for (Test test : tests) { + test.RL_Us(r); + } + }); + Future f2 = service.submit(() -> { + Runtime.getRuntime().addShutdownHook(new MyHook()); + for (Test test : tests) { + test.WLI_Us(); + } + }); + + futures.add(f1); + futures.add(f2); + } + + for (Future f : futures) { + f.get(); + } + } + + public static class IntResult2 { + int r1, r2; + } + + public static class Test { + final StampedLock lock = new StampedLock(); + + int x, y; + + public void RL_Us(IntResult2 r) { + StampedLock lock = this.lock; + long stamp = lock.readLock(); + r.r1 = x; + r.r2 = y; + lock.unlock(stamp); + } + + public void WLI_Us() { + try { + StampedLock lock = this.lock; + long stamp = lock.writeLockInterruptibly(); + x = 1; + y = 2; + lock.unlock(stamp); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + + private static class MyHook extends Thread { + @Override + public void run() { + try { + Thread.sleep(10); + } catch (Exception e) {} + } + } + +} + diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/HumongousRegionReclaimTest/TestHumongous.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/HumongousRegionReclaimTest/TestHumongous.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +/* + * @test TestHumongous + * @summary test reclaim of humongous object + * @run main/othervm -Xint -XX:+UseShenandoahGC -XX:ShenandoahHeapRegionSize=1M TestHumongous + */ + +public class TestHumongous { + // Shenandoah heap memory region size = 1 M + private static int M = 1024 * 1024; + public static void main(String[] args) { + // Construct a humongous object (oop) just fits multiple regions + // 8 bytes for object header, 8 bytes for brooks pointer + int size = 2 * M - 8 - 8; + // The oop spans 2 regions + char[] ch = new char[size]; + for (int index = 0; index < 10; index ++) { + ch[index] = 'A'; + } + + System.out.print(ch[1]); + ch = null; + // Force a GC to clean up above object + System.gc(); + } +} + diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/LargeObjectAlignment.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/LargeObjectAlignment.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test LargeObjectAlignment + * @summary Shenandoah crashes with -XX:ObjectAlignmentInBytes=16 + * @run main/othervm -XX:+UseShenandoahGC -XX:ObjectAlignmentInBytes=16 -Xint LargeObjectAlignment + * @run main/othervm -XX:+UseShenandoahGC -XX:ObjectAlignmentInBytes=16 -XX:-TieredCompilation LargeObjectAlignment + * @run main/othervm -XX:+UseShenandoahGC -XX:ObjectAlignmentInBytes=16 -XX:TieredStopAtLevel=1 LargeObjectAlignment + * @run main/othervm -XX:+UseShenandoahGC -XX:ObjectAlignmentInBytes=16 -XX:TieredStopAtLevel=4 LargeObjectAlignment + */ + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; + +public class LargeObjectAlignment { + + static final int SLABS_COUNT = Integer.getInteger("slabs", 10000); + static final int NODE_COUNT = Integer.getInteger("nodes", 10000); + static final long TIME_NS = 1000L * 1000L * Integer.getInteger("timeMs", 5000); + + static Object[] objects; + + public static void main(String[] args) throws Exception { + objects = new Object[SLABS_COUNT]; + + long start = System.nanoTime(); + while (System.nanoTime() - start < TIME_NS) { + objects[ThreadLocalRandom.current().nextInt(SLABS_COUNT)] = createSome(); + } + } + + public static Object createSome() { + List result = new ArrayList(); + for (int c = 0; c < NODE_COUNT; c++) { + result.add(new Integer(c)); + } + return result; + } + +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/MXNotificationsFullGC.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/MXNotificationsFullGC.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test MXNotificationsFullGC + * @summary Check that full GC notifications are reported on Shenandoah's full GCs + * @run main/othervm -XX:+UseShenandoahGC -XX:ShenandoahGCHeuristics=passive -Xmx1g -Xms1g MXNotificationsFullGC + */ + +import javax.management.*; +import java.lang.management.*; + +public class MXNotificationsFullGC { + + static volatile boolean notified; + static volatile Object sink; + + public static void main(String[] args) throws Exception { + NotificationListener listener = new NotificationListener() { + @Override + public void handleNotification(Notification n, Object o) { + if (n.getType().equals(com.sun.management.GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION)) { + notified = true; + } + } + }; + + for (GarbageCollectorMXBean bean : ManagementFactory.getGarbageCollectorMXBeans()) { + ((NotificationEmitter) bean).addNotificationListener(listener, null, null); + } + + // Allocate 4*100K*10K = 4G, enough to blow the 1G heap into full GC + for (int c = 0; c < 10_000; c++) { + sink = new int[100_000]; + } + + // GC notifications are asynchronous, wait a little + Thread.sleep(1000); + + if (!notified) { + throw new IllegalStateException("Should have been notified"); + } + } +} + + diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/TestAllocLargeObjOOM.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/TestAllocLargeObjOOM.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test TestAllocLargeObjOOM + * @key gc + * @summary Test allocation of object larger than heap should result OOM + * @library /testlibrary + * @modules java.base/jdk.internal.misc + * java.management + * @run main/othervm -Xmx10m -XX:+UseShenandoahGC TestAllocLargeObjOOM + */ +import com.oracle.java.testlibrary.*; +import java.lang.ref.SoftReference; +import java.util.LinkedList; +import java.util.Random; + +public class TestAllocLargeObjOOM { + public static void main(String[] args) { + try { + Random random = new Random(123); + long total_memory = Runtime.getRuntime().totalMemory(); + int size = (int)((total_memory + 7) / 8); + long[] large_array = new long[size]; + + System.out.println(large_array[random.nextInt() % size]); + Asserts.assertTrue(false, "Should not reach here"); + } catch (OutOfMemoryError e) { + Asserts.assertTrue(true, "Expected OOM"); + } + } +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/TestAllocSmallObjOOM.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/TestAllocSmallObjOOM.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test TestAllocSmallObjOOM + * @summary Test allocation of small object to result OOM, but not to crash JVM + * @modules java.base/jdk.internal.misc + * @library /testlibrary + * @run main/othervm TestAllocSmallObjOOM + */ + +import com.oracle.java.testlibrary.*; + + +public class TestAllocSmallObjOOM { + + public static void main(String[] args) { + try { + // Small heap size should result OOM during loading of system classes + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-Xmx5m", "-XX:+UseShenandoahGC"); + OutputAnalyzer analyzer = new OutputAnalyzer(pb.start()); + analyzer.shouldHaveExitValue(1); + analyzer.shouldContain("java.lang.OutOfMemoryError: Java heap space"); + } catch (Exception e) { + } + + } +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/TestRegionSizeArgs.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/TestRegionSizeArgs.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test TestRegionSizeArgs + * @summary Test that Shenandoah region size args are checked + * @key gc + * @library /testlibrary + * @modules java.base/jdk.internal.misc + * java.management + * @run driver TestRegionSizeArgs + */ + +import com.oracle.java.testlibrary.*; + +public class TestRegionSizeArgs { + public static void main(String[] args) throws Exception { + testInvalidRegionSizes(); + testMinRegionSize(); + testMaxRegionSize(); + } + + private static void testInvalidRegionSizes() throws Exception { + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:ShenandoahHeapRegionSize=200m", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahHeapRegionSize option"); + output.shouldHaveExitValue(1); + } + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:ShenandoahHeapRegionSize=11m", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahHeapRegionSize option"); + output.shouldHaveExitValue(1); + } + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:ShenandoahHeapRegionSize=9m", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + } + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:ShenandoahHeapRegionSize=255K", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahHeapRegionSize option"); + output.shouldHaveExitValue(1); + } + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:ShenandoahHeapRegionSize=260K", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + } + } + + private static void testMinRegionSize() throws Exception { + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:+UnlockExperimentalVMOptions", + "-XX:ShenandoahMinRegionSize=255K", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahMinRegionSize option"); + output.shouldHaveExitValue(1); + } + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:+UnlockExperimentalVMOptions", + "-XX:ShenandoahMinRegionSize=1M", + "-XX:ShenandoahMaxRegionSize=260K", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahMinRegionSize or -XX:ShenandoahMaxRegionSize"); + output.shouldHaveExitValue(1); + } + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:+UnlockExperimentalVMOptions", + "-XX:ShenandoahMinRegionSize=200m", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahMinRegionSize option"); + output.shouldHaveExitValue(1); + } + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:+UnlockExperimentalVMOptions", + "-XX:ShenandoahMinRegionSize=11m", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahMinRegionSize option"); + output.shouldHaveExitValue(1); + } + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:+UnlockExperimentalVMOptions", + "-XX:ShenandoahMinRegionSize=9m", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + } + + } + + private static void testMaxRegionSize() throws Exception { + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:+UnlockExperimentalVMOptions", + "-XX:ShenandoahMaxRegionSize=255K", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahMaxRegionSize option"); + output.shouldHaveExitValue(1); + } + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-Xms100m", + "-XX:+UnlockExperimentalVMOptions", + "-XX:ShenandoahMinRegionSize=1M", + "-XX:ShenandoahMaxRegionSize=260K", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Invalid -XX:ShenandoahMinRegionSize or -XX:ShenandoahMaxRegionSize"); + output.shouldHaveExitValue(1); + } + } +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/TestSingleThreadedShenandoah.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/TestSingleThreadedShenandoah.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* @test + * @summary test single worker threaded Shenandoah + * @run main/othervm -XX:+UseShenandoahGC -XX:ShenandoahGCHeuristics=aggressive -XX:ParallelGCThreads=1 -XX:ConcGCThreads=1 TestSingleThreadedShenandoah + */ + +public class TestSingleThreadedShenandoah { + + public static void main(String[] args) { + // Bug should crash before we get here. + } +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/acceptance/AllocIntArrays.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/acceptance/AllocIntArrays.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test AllocIntArrays + * @summary Acceptance tests: collector can withstand allocation + * @run main/othervm -XX:+UseShenandoahGC AllocIntArrays + * @run main/othervm -XX:+UseShenandoahGC -Xmx2g -Xms2g AllocIntArrays + */ + +public class AllocIntArrays { + + static final long TARGET_MB = Long.getLong("target", 20_000); // 20 Gb allocation + + static volatile Object sink; + + public static void main(String[] args) throws Exception { + final int min = 10; + final int max = 10_000_000; + for (int s = min; s <= max; s *= 10) { + System.out.println("int[" + s + "]"); + long count = TARGET_MB * 1024 * 1024 / (16 + 4*s); + for (long c = 0; c < count; c++) { + sink = new int[s]; + } + } + } + + +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/acceptance/AllocObjectArrays.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/acceptance/AllocObjectArrays.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test AllocObjectArrays + * @summary Acceptance tests: collector can withstand allocation + * @run main/othervm -XX:+UseShenandoahGC AllocObjectArrays + * @run main/othervm -XX:+UseShenandoahGC -Xmx2g -Xms2g AllocObjectArrays + */ + +public class AllocObjectArrays { + + static final long TARGET_MB = Long.getLong("target", 20_000); // 20 Gb allocation + + static volatile Object sink; + + public static void main(String[] args) throws Exception { + final int min = 10; + final int max = 10_000_000; + for (int s = min; s <= max; s *= 10) { + System.out.println("Object[" + s + "]"); + long count = TARGET_MB * 1024 * 1024 / (16 + 8*s); + for (long c = 0; c < count; c++) { + sink = new Object[s]; + } + } + } + + +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/acceptance/AllocObjects.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/acceptance/AllocObjects.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test AllocObjects + * @summary Acceptance tests: collector can withstand allocation + * @run main/othervm -XX:+UseShenandoahGC AllocObjects + * @run main/othervm -XX:+UseShenandoahGC -Xmx2g -Xms2g AllocObjects + */ + +public class AllocObjects { + + static final long TARGET_MB = Long.getLong("target", 50_000); // 50 Gb allocation + + static volatile Object sink; + + public static void main(String[] args) throws Exception { + long count = TARGET_MB * 1024 * 1024 / 16; + for (long c = 0; c < count; c++) { + sink = new Object(); + } + } + + +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/cas/ReferenceCAS.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/cas/ReferenceCAS.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test Shenandoah reference CAS test + * + * @run testng/othervm -Diters=20000 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC ReferenceCAS + * @run testng/othervm -Diters=100 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -Xint ReferenceCAS + * @run testng/othervm -Diters=20000 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -XX:-TieredCompilation ReferenceCAS + * @run testng/othervm -Diters=20000 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -XX:TieredStopAtLevel=1 ReferenceCAS + * @run testng/othervm -Diters=20000 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -XX:TieredStopAtLevel=4 ReferenceCAS + * + * @run testng/othervm -Diters=20000 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -XX:-UseCompressedOops ReferenceCAS + * @run testng/othervm -Diters=100 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -XX:-UseCompressedOops -Xint ReferenceCAS + * @run testng/othervm -Diters=20000 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -XX:-UseCompressedOops -XX:-TieredCompilation ReferenceCAS + * @run testng/othervm -Diters=20000 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -XX:-UseCompressedOops -XX:TieredStopAtLevel=1 ReferenceCAS + * @run testng/othervm -Diters=20000 -XX:ShenandoahGCHeuristics=aggressive -XX:+UseShenandoahGC -XX:-UseCompressedOops -XX:TieredStopAtLevel=4 ReferenceCAS + */ + +import org.testng.annotations.Test; + +import java.lang.reflect.Field; + +import static org.testng.Assert.*; + +public class ReferenceCAS { + + static final int ITERS = Integer.getInteger("iters", 1); + static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10); + + static final sun.misc.Unsafe UNSAFE; + static final long V_OFFSET; + + static { + try { + Field f = sun.misc.Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + UNSAFE = (sun.misc.Unsafe) f.get(null); + } catch (Exception e) { + throw new RuntimeException("Unable to get Unsafe instance.", e); + } + + try { + Field vField = ReferenceCAS.class.getDeclaredField("v"); + V_OFFSET = UNSAFE.objectFieldOffset(vField); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + Object v; + + @Test + public void testFieldInstance() { + ReferenceCAS t = new ReferenceCAS(); + for (int c = 0; c < ITERS; c++) { + testAccess(t, V_OFFSET); + } + } + + static void testAccess(Object base, long offset) { + String foo = new String("foo"); + String bar = new String("bar"); + String baz = new String("baz"); + UNSAFE.putObject(base, offset, "foo"); + { + String newval = bar; + boolean r = UNSAFE.compareAndSwapObject(base, offset, "foo", newval); + assertEquals(r, true, "success compareAndSwap Object"); + assertEquals(newval, "bar", "must not destroy newval"); + Object x = UNSAFE.getObject(base, offset); + assertEquals(x, "bar", "success compareAndSwap Object value"); + } + + { + String newval = baz; + boolean r = UNSAFE.compareAndSwapObject(base, offset, "foo", newval); + assertEquals(r, false, "failing compareAndSwap Object"); + assertEquals(newval, "baz", "must not destroy newval"); + Object x = UNSAFE.getObject(base, offset); + assertEquals(x, "bar", "failing compareAndSwap Object value"); + } + } + +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/compiler/TestExpandedWBLostNullCheckDep.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/compiler/TestExpandedWBLostNullCheckDep.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @summary Logic that moves a null check in the expanded barrier may cause a memory access that doesn't depend on the barrier to bypass the null check + * @requires vm.flavor == "server" + * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:-TieredCompilation -XX:+UseShenandoahGC -XX:+UnlockDiagnosticVMOptions -XX:+StressGCM -XX:+StressLCM TestExpandedWBLostNullCheckDep + */ + +public class TestExpandedWBLostNullCheckDep { + + static void test(int i, int[] arr) { + // arr.length depends on a null check for arr + if (i < 0 || i >= arr.length) { + } + // The write barrier here also depends on the null check. The + // null check is moved in the barrier to enable implicit null + // checks. The null check must not be moved arr.length + arr[i] = 0x42; + } + + static public void main(String[] args) { + int[] int_arr = new int[10]; + for (int i = 0; i < 20000; i++) { + test(0, int_arr); + } + try { + test(0, null); + } catch (NullPointerException npe) {} + } +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/compiler/TestNullCheck.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/compiler/TestNullCheck.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @summary implicit null check on brooks pointer must not cause crash + * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:-TieredCompilation -XX:+UseShenandoahGC -Xmx4G -XX:HeapBaseMinAddress=32G TestNullCheck + */ + +// HeapBaseMinAddress above forces compressed oops with a base + +public class TestNullCheck { + + int f; + + static int test1(TestNullCheck o) { + return o.f; + } + + static TestNullCheck static_obj = new TestNullCheck(); + + static int test2() { + return static_obj.f; + } + + static public void main(String[] args) { + TestNullCheck o = new TestNullCheck(); + for (int i = 0; i < 20000; i++) { + test1(o); + test2(); + } + try { + test1(null); + } catch(NullPointerException npe) {} + static_obj = null; + try { + test2(); + } catch(NullPointerException npe) {} + } +} diff -r 215d1315dc60 -r 901a3145878f test/gc/shenandoah/compiler/TestWriteBarrierClearControl.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/compiler/TestWriteBarrierClearControl.java Wed Feb 15 16:49:46 2017 +0100 @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @summary Clearing control during final graph reshape causes memory barrier to loose dependency on null check + * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:+UseShenandoahGC -XX:-TieredCompilation -XX:+UnlockDiagnosticVMOptions -XX:+StressLCM -XX:+StressGCM TestWriteBarrierClearControl + * + */ +public class TestWriteBarrierClearControl { + + int f; + + static void test1(TestWriteBarrierClearControl o) { + o.f = 0x42; + } + + static TestWriteBarrierClearControl fo = new TestWriteBarrierClearControl(); + + static void test2() { + TestWriteBarrierClearControl o = fo; + o.f = 0x42; + } + + static public void main(String[] args) { + TestWriteBarrierClearControl o = new TestWriteBarrierClearControl(); + for (int i = 0; i < 20000; i++) { + test1(o); + test2(); + } + try { + test1(null); + } catch(NullPointerException npe) {} + fo = null; + try { + test2(); + } catch(NullPointerException npe) {} + } +} changeset: 9552:b822aaaf5b7b user: shade date: Wed Feb 15 17:13:20 2017 +0100 summary: Sync up vm_operations_shenandoah.* against JDK 9. diff -r 901a3145878f -r b822aaaf5b7b src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Feb 15 16:49:46 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Feb 15 17:13:20 2017 +0100 @@ -29,14 +29,6 @@ #include "gc_implementation/shenandoah/shenandoahWorkGroup.hpp" #include "gc_implementation/shenandoah/vm_operations_shenandoah.hpp" -VM_Operation::VMOp_Type VM_ShenandoahInitMark::type() const { - return VMOp_ShenandoahInitMark; -} - -const char* VM_ShenandoahInitMark::name() const { - return "Shenandoah Initial Marking"; -} - void VM_ShenandoahInitMark::doit() { ShenandoahHeap *sh = (ShenandoahHeap*) Universe::heap(); FlexibleWorkGang* workers = sh->workers(); @@ -65,14 +57,6 @@ } -VM_Operation::VMOp_Type VM_ShenandoahFullGC::type() const { - return VMOp_ShenandoahFullGC; -} - -VM_ShenandoahFullGC::VM_ShenandoahFullGC(GCCause::Cause gc_cause) : - _gc_cause(gc_cause) { -} - void VM_ShenandoahFullGC::doit() { ShenandoahMarkCompact::do_mark_compact(_gc_cause); @@ -84,11 +68,6 @@ } } -const char* VM_ShenandoahFullGC::name() const { - return "Shenandoah Full GC"; -} - - bool VM_ShenandoahReferenceOperation::doit_prologue() { if (Thread::current()->is_Java_thread()) { InstanceRefKlass::acquire_pending_list_lock(&_pending_list_basic_lock); @@ -149,22 +128,6 @@ } } -VM_Operation::VMOp_Type VM_ShenandoahStartEvacuation::type() const { - return VMOp_ShenandoahStartEvacuation; -} - -const char* VM_ShenandoahStartEvacuation::name() const { - return "Start shenandoah evacuation"; -} - -VM_Operation::VMOp_Type VM_ShenandoahVerifyHeapAfterEvacuation::type() const { - return VMOp_ShenandoahVerifyHeapAfterEvacuation; -} - -const char* VM_ShenandoahVerifyHeapAfterEvacuation::name() const { - return "Shenandoah verify heap after evacuation"; -} - void VM_ShenandoahVerifyHeapAfterEvacuation::doit() { ShenandoahHeap *sh = ShenandoahHeap::heap(); diff -r 901a3145878f -r b822aaaf5b7b src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.hpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.hpp Wed Feb 15 16:49:46 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.hpp Wed Feb 15 17:13:20 2017 +0100 @@ -28,55 +28,59 @@ #include "gc_implementation/shared/vmGCOperations.hpp" // VM_operations for the Shenandoah Collector. -// For now we are just doing two pauses. The initial marking pause, and the final finish up marking and perform evacuation pause. -// VM_ShenandoahInitMark -// VM_ShenandoahFinishMark +// +// VM_ShenandoahOperation +// - VM_ShenandoahInitMark: initiate concurrent marking +// - VM_ShenandoahReferenceOperation: +// - VM_ShenandoahStartEvacuation: finish up concurrent marking, and start evacuation +// - VM_ShenandoahFullGC: do full GC -class VM_ShenandoahInitMark: public VM_Operation { - +class VM_ShenandoahOperation : public VM_Operation { public: - virtual VMOp_Type type() const; - virtual void doit(); - - virtual const char* name() const; + VM_ShenandoahOperation() {}; }; -class VM_ShenandoahReferenceOperation : public VM_Operation { +class VM_ShenandoahReferenceOperation : public VM_ShenandoahOperation { private: BasicLock _pending_list_basic_lock; public: + VM_ShenandoahReferenceOperation() : VM_ShenandoahOperation() {}; bool doit_prologue(); void doit_epilogue(); +}; +class VM_ShenandoahInitMark: public VM_ShenandoahOperation { +public: + VM_ShenandoahInitMark() : VM_ShenandoahOperation() {}; + VM_Operation::VMOp_Type type() const { return VMOp_ShenandoahInitMark; } + const char* name() const { return "Shenandoah Initial Marking"; } + virtual void doit(); }; class VM_ShenandoahStartEvacuation: public VM_ShenandoahReferenceOperation { - - public: - VMOp_Type type() const; - void doit(); - const char* name() const; - +public: + VM_ShenandoahStartEvacuation() : VM_ShenandoahReferenceOperation() {}; + VM_Operation::VMOp_Type type() const { return VMOp_ShenandoahStartEvacuation; } + const char* name() const { return "Start Shenandoah evacuation"; } + virtual void doit(); }; class VM_ShenandoahFullGC : public VM_ShenandoahReferenceOperation { private: GCCause::Cause _gc_cause; public: - VM_ShenandoahFullGC(GCCause::Cause gc_cause); - VMOp_Type type() const; - void doit(); - const char* name() const; + VM_ShenandoahFullGC(GCCause::Cause gc_cause) : VM_ShenandoahReferenceOperation(), _gc_cause(gc_cause) {}; + VM_Operation::VMOp_Type type() const { return VMOp_ShenandoahFullGC; } + const char* name() const { return "Shenandoah Full GC"; } + virtual void doit(); }; -class VM_ShenandoahVerifyHeapAfterEvacuation: public VM_Operation { - - public: - virtual VMOp_Type type() const; +class VM_ShenandoahVerifyHeapAfterEvacuation: public VM_ShenandoahOperation { +public: + VM_ShenandoahVerifyHeapAfterEvacuation() : VM_ShenandoahOperation() {}; + VM_Operation::VMOp_Type type() const { return VMOp_ShenandoahVerifyHeapAfterEvacuation; } + const char* name() const { return "Shenandoah verify heap after evacuation"; } virtual void doit(); - - virtual const char* name() const; - }; #endif //SHARE_VM_GC_SHENANDOAH_VM_OPERATIONS_SHENANDOAH_HPP changeset: 9553:781bddb64ad3 user: rkennke date: Wed Feb 15 17:24:58 2017 +0100 summary: Fix naked object comparisons. diff -r b822aaaf5b7b -r 781bddb64ad3 src/share/vm/code/dependencies.cpp --- a/src/share/vm/code/dependencies.cpp Wed Feb 15 17:13:20 2017 +0100 +++ b/src/share/vm/code/dependencies.cpp Wed Feb 15 17:24:58 2017 +0100 @@ -1481,12 +1481,12 @@ assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "sanity"); if (changes == NULL) { // Validate all CallSites - if (java_lang_invoke_CallSite::target(call_site) != method_handle) + if (! oopDesc::equals(java_lang_invoke_CallSite::target(call_site), method_handle)) return call_site->klass(); // assertion failed } else { // Validate the given CallSite - if (call_site == changes->call_site() && java_lang_invoke_CallSite::target(call_site) != changes->method_handle()) { - assert(method_handle != changes->method_handle(), "must be"); + if (oopDesc::equals(call_site, changes->call_site()) && ! oopDesc::equals(java_lang_invoke_CallSite::target(call_site), changes->method_handle())) { + assert(! oopDesc::equals(method_handle, changes->method_handle()), "must be"); return call_site->klass(); // assertion failed } } changeset: 9554:a12b1012b510 user: shade date: Wed Feb 15 18:59:43 2017 +0100 summary: Connection Matrix. diff -r 781bddb64ad3 -r a12b1012b510 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Feb 15 17:24:58 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.cpp Wed Feb 15 18:59:43 2017 +0100 @@ -86,8 +86,7 @@ } virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) = 0; - void choose_collection_set(ShenandoahCollectionSet* collection_set); - + virtual void choose_collection_set(ShenandoahCollectionSet* collection_set, int* connections=NULL); virtual void choose_free_set(ShenandoahFreeSet* free_set); virtual bool process_references() { @@ -138,7 +137,7 @@ { } -void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collection_set) { +void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collection_set, int* connections) { start_choose_collection_set(); ShenandoahHeap* heap = ShenandoahHeap::heap(); @@ -515,6 +514,106 @@ } }; +class ConnectionHeuristics : public ShenandoahHeuristics { +private: + size_t _max_live_data; + double _used_threshold_factor; + double _garbage_threshold_factor; + double _allocation_threshold_factor; + + uintx _used_threshold; + uintx _garbage_threshold; + uintx _allocation_threshold; + +public: + ConnectionHeuristics() : ShenandoahHeuristics() { + _max_live_data = 0; + + _used_threshold = 0; + _garbage_threshold = 0; + _allocation_threshold = 0; + + _used_threshold_factor = 0.; + _garbage_threshold_factor = 0.1; + _allocation_threshold_factor = 0.; + } + + virtual ~ConnectionHeuristics() {} + + virtual bool should_start_concurrent_mark(size_t used, size_t capacity) const { + size_t half_gig = 64 * 1024 * 1024; + size_t bytes_alloc = ShenandoahHeap::heap()->bytes_allocated_since_cm(); + bool result = bytes_alloc > half_gig; + if (result) tty->print("Starting a concurrent mark"); + return result; + } + + bool maybe_add_heap_region(ShenandoahHeapRegion* hr, ShenandoahCollectionSet* collection_set) { + if (!hr->is_humongous() && hr->has_live() && !collection_set->contains(hr)) { + collection_set->add_region_check_for_duplicates(hr); + hr->set_in_collection_set(true); + return true; + } + return false; + } + + virtual void choose_collection_set(ShenandoahCollectionSet* collection_set, int* connections) { + ShenandoahHeapRegionSet* regions = ShenandoahHeap::heap()->regions(); + size_t end = regions->active_regions(); + RegionGarbage sorted_by_garbage[end]; + for (size_t i = 0; i < end; i++) { + ShenandoahHeapRegion* r = regions->get_fast(i); + sorted_by_garbage[i].region_number = r->region_number(); + sorted_by_garbage[i].garbage = r->garbage(); + } + + QuickSort::sort(sorted_by_garbage, end, compare_by_garbage, false); + + int num = ShenandoahHeap::heap()->num_regions(); + // simulate write heuristics by picking best region. + int r = 0; + ShenandoahHeapRegion* choosenOne = regions->get(sorted_by_garbage[0].region_number); + + while (! maybe_add_heap_region(choosenOne, collection_set)) { + choosenOne = regions->get(sorted_by_garbage[++r].region_number); + } + + int region_number = choosenOne->region_number(); + log_develop_trace(gc)("Adding choosen region %d\n", region_number); + + // Add all the regions which point to this region. + for (int i = 0; i < num; i++) { + if (connections[i * num + region_number] > 0) { + ShenandoahHeapRegion* candidate = regions->get(sorted_by_garbage[i].region_number); + if (maybe_add_heap_region(candidate, collection_set)) + log_develop_trace(gc)("Adding region %d which points to the choosen region\n", i); + } + } + + // Add all the regions they point to. + for (size_t ci = 0; ci < collection_set->active_regions(); ci++) { + ShenandoahHeapRegion* cs_heap_region = collection_set->get(ci); + int cs_heap_region_number = cs_heap_region->region_number(); + for (int i = 0; i < num; i++) { + if (connections[i * num + cs_heap_region_number] > 0) { + ShenandoahHeapRegion* candidate = regions->get(sorted_by_garbage[i].region_number); + if (maybe_add_heap_region(candidate, collection_set)) { + log_develop_trace(gc) + ("Adding region %d which is pointed to by region %d\n", i, cs_heap_region_number); + } + } + } + } + _max_live_data = MAX2(_max_live_data, collection_set->live_data()); + collection_set->print(); + } + + virtual bool region_in_collection_set(ShenandoahHeapRegion* r, size_t immediate_garbage) { + assert(false, "Shouldn't get here"); + return false; + } +}; + ShenandoahCollectorPolicy::ShenandoahCollectorPolicy() : _cycle_counter(0), _successful_cm(0), @@ -625,6 +724,9 @@ } else if (strcmp(ShenandoahGCHeuristics, "passive") == 0) { log_info(gc, init)("Shenandoah heuristics: passive"); _heuristics = new PassiveHeuristics(); + } else if (strcmp(ShenandoahGCHeuristics, "connections") == 0) { + log_info(gc, init)("Shenandoah heuristics: connections"); + _heuristics = new ConnectionHeuristics(); } else { vm_exit_during_initialization("Unknown -XX:ShenandoahGCHeuristics option"); } @@ -716,8 +818,8 @@ _heuristics->record_full_gc(); } -void ShenandoahCollectorPolicy::choose_collection_set(ShenandoahCollectionSet* collection_set) { - _heuristics->choose_collection_set(collection_set); +void ShenandoahCollectorPolicy::choose_collection_set(ShenandoahCollectionSet* collection_set, int* connections) { + _heuristics->choose_collection_set(collection_set, connections); } void ShenandoahCollectorPolicy::choose_free_set(ShenandoahFreeSet* free_set) { diff -r 781bddb64ad3 -r a12b1012b510 src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Feb 15 17:24:58 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp Wed Feb 15 18:59:43 2017 +0100 @@ -189,7 +189,7 @@ void record_cm_degenerated(); void record_full_gc(); - void choose_collection_set(ShenandoahCollectionSet* collection_set); + void choose_collection_set(ShenandoahCollectionSet* collection_set, int* connections=NULL); void choose_free_set(ShenandoahFreeSet* free_set); bool process_references(); diff -r 781bddb64ad3 -r a12b1012b510 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Feb 15 17:24:58 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Feb 15 18:59:43 2017 +0100 @@ -1138,15 +1138,23 @@ _ordered_regions->heap_region_iterate(&ccsc); #endif - _shenandoah_policy->choose_collection_set(_collection_set); + if (UseShenandoahMatrix) { + int num = num_regions(); + int *connections = NEW_C_HEAP_ARRAY(int, num * num, mtGC); + calculate_matrix(connections); + print_matrix(connections); + _shenandoah_policy->choose_collection_set(_collection_set, connections); + FREE_C_HEAP_ARRAY(int,connections,mtGC); + } else { + _shenandoah_policy->choose_collection_set(_collection_set); + } _shenandoah_policy->choose_free_set(_free_regions); } - /* - tty->print("Sorted free regions\n"); - _free_regions->print(); - */ + if (UseShenandoahMatrix) { + _collection_set->print(); + } _bytes_allocated_since_cm = 0; @@ -2303,6 +2311,115 @@ return _gc_timer; } +class RecordAllRefsOopClosure: public ExtendedOopClosure { +private: + int _x; + int *_matrix; + int _num_regions; + oop _p; + +public: + RecordAllRefsOopClosure(int *matrix, int x, size_t num_regions, oop p) : + _matrix(matrix), _x(x), _num_regions(num_regions), _p(p) {} + + template + void do_oop_work(T* p) { + oop o = oopDesc::load_decode_heap_oop(p); + if (o != NULL) { + if (ShenandoahHeap::heap()->is_in(o) && o->is_oop() ) { + int y = ShenandoahHeap::heap()->heap_region_containing(o)->region_number(); + _matrix[_x * _num_regions + y]++; + } + } + } + void do_oop(oop* p) { + do_oop_work(p); + } + + void do_oop(narrowOop* p) { + do_oop_work(p); + } + +}; + +class RecordAllRefsObjectClosure : public ObjectClosure { + int *_matrix; + size_t _num_regions; + +public: + RecordAllRefsObjectClosure(int *matrix, size_t num_regions) : + _matrix(matrix), _num_regions(num_regions) {} + + void do_object(oop p) { + if (ShenandoahHeap::heap()->is_in(p) && ShenandoahHeap::heap()->is_marked_next(p) && p->is_oop()) { + int x = ShenandoahHeap::heap()->heap_region_containing(p)->region_number(); + RecordAllRefsOopClosure cl(_matrix, x, _num_regions, p); + p->oop_iterate(&cl); + } + } +}; +void ShenandoahHeap::calculate_matrix(int* connections) { + log_develop_trace(gc)("calculating matrix"); + ensure_parsability(false); + int num = num_regions(); + + for (int i = 0; i < num; i++) { + for (int j = 0; j < num; j++) { + connections[i * num + j] = 0; + } + } + + RecordAllRefsOopClosure cl(connections, 0, num, NULL); + roots_iterate(&cl); + + RecordAllRefsObjectClosure cl2(connections, num); + object_iterate(&cl2); + +} + +void ShenandoahHeap::print_matrix(int* connections) { + int num = num_regions(); + int cs_regions = 0; + int referenced = 0; + + for (int i = 0; i < num; i++) { + size_t liveData = ShenandoahHeap::heap()->regions()->get(i)->get_live_data_bytes(); + + int numReferencedRegions = 0; + int numReferencedByRegions = 0; + + for (int j = 0; j < num; j++) { + if (connections[i * num + j] > 0) + numReferencedRegions++; + + if (connections [j * num + i] > 0) + numReferencedByRegions++; + + cs_regions++; + referenced += numReferencedByRegions; + } + + if (ShenandoahHeap::heap()->regions()->get(i)->has_live()) { + tty->print("Region %d is referenced by %d regions {", + i, numReferencedByRegions); + int col_count = 0; + for (int j = 0; j < num; j++) { + int foo = connections[j * num + i]; + if (foo > 0) { + col_count++; + if ((col_count % 10) == 0) + tty->print("\n"); + tty->print("%d(%d), ", j,foo); + } + } + tty->print("} \n"); + } + } + + double avg = (double)referenced / (double) cs_regions; + tty->print("Average Number of regions scanned / region = %lf\n", avg); +} + class ShenandoahCountGarbageClosure : public ShenandoahHeapRegionClosure { private: size_t _garbage; diff -r 781bddb64ad3 -r a12b1012b510 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Feb 15 17:24:58 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Feb 15 18:59:43 2017 +0100 @@ -361,6 +361,9 @@ void print_all_refs(const char* prefix); void print_heap_locations(HeapWord* start, HeapWord* end); + void calculate_matrix(int* connections); + void print_matrix(int* connections); + size_t bytes_allocated_since_cm(); void set_bytes_allocated_since_cm(size_t bytes); diff -r 781bddb64ad3 -r a12b1012b510 src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp Wed Feb 15 17:24:58 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp Wed Feb 15 18:59:43 2017 +0100 @@ -71,6 +71,14 @@ } } +void ShenandoahHeapRegionSet::add_region_check_for_duplicates(ShenandoahHeapRegion* r) { + // FIXME There's a bug where the zeroth region is not checked, so check it here + if (_active_end < _reserved_end && !contains(r) && _regions[0] != r) { + _regions[_active_end] = r; + _active_end++; + } +} + // Apply blk->doHeapRegion() on all committed regions in address order, // terminating the iteration early if doHeapRegion() returns true. void ShenandoahHeapRegionSet::active_heap_region_iterate(ShenandoahHeapRegionClosure* blk, diff -r 781bddb64ad3 -r a12b1012b510 src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp Wed Feb 15 17:24:58 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.hpp Wed Feb 15 18:59:43 2017 +0100 @@ -78,6 +78,7 @@ } virtual void add_region(ShenandoahHeapRegion* r); + virtual void add_region_check_for_duplicates(ShenandoahHeapRegion* r); // Advance the iteration pointer to the next region. void next(); @@ -99,10 +100,10 @@ size_t current_index() { return _current_index;} void clear_current_index() {_current_index = 0; } + bool contains(ShenandoahHeapRegion* r); ShenandoahHeapRegion* current() const; protected: - bool contains(ShenandoahHeapRegion* r); void active_heap_region_iterate(ShenandoahHeapRegionClosure* blk, bool skip_dirty_regions = false, diff -r 781bddb64ad3 -r a12b1012b510 src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Feb 15 17:24:58 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Feb 15 18:59:43 2017 +0100 @@ -60,6 +60,9 @@ "regions, based on ShenandoahMinRegionSize and " \ "ShenandoahMaxRegionSizeSize. ") \ \ + product(bool, UseShenandoahMatrix, false, \ + "Keep a connection matrix and use this to drive collection sets") \ + \ product(ccstr, ShenandoahGCHeuristics, "adaptive", \ "The heuristics to use in Shenandoah GC. Possible values: " \ "dynamic, adaptive, aggressive." \ changeset: 9555:814ea1e6a91b user: shade date: Wed Feb 15 19:38:29 2017 +0100 summary: Assorted touchups to bring 8u closer to 9. diff -r a12b1012b510 -r 814ea1e6a91b src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Feb 15 18:59:43 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Feb 15 19:38:29 2017 +0100 @@ -987,11 +987,11 @@ return old == 0; } -jushort* ShenandoahConcurrentMark::get_liveness(uint worker_id) { - return _liveness_local[worker_id]; -} - void ShenandoahConcurrentMark::clear_claim_codecache() { assert(ShenandoahConcurrentCodeRoots, "must not be called otherwise"); _claimed_codecache = 0; } + +jushort* ShenandoahConcurrentMark::get_liveness(uint worker_id) { + return _liveness_local[worker_id]; +} diff -r a12b1012b510 -r 814ea1e6a91b src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Feb 15 18:59:43 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Feb 15 19:38:29 2017 +0100 @@ -329,8 +329,6 @@ }; void ShenandoahHeap::reset_next_mark_bitmap(WorkGang* workers) { - // GCTraceTime time("Concurrent reset bitmaps", ShenandoahLogInfo, true, gc_timer(), tracer()->gc_id()); - ResetNextBitmapTask task = ResetNextBitmapTask(_ordered_regions); workers->run_task(&task); } @@ -361,8 +359,6 @@ }; void ShenandoahHeap::reset_complete_mark_bitmap(WorkGang* workers) { - GCTraceTime time("Concurrent reset bitmaps", ShenandoahLogInfo, true, gc_timer(), tracer()->gc_id()); - ResetCompleteBitmapTask task = ResetCompleteBitmapTask(_ordered_regions); workers->run_task(&task); } @@ -1259,9 +1255,11 @@ COMPILER2_PRESENT(DerivedPointerTable::clear()); +#ifdef ASSERT if (ShenandoahVerifyReadsToFromSpace) { set_from_region_protection(false); } +#endif assert(SafepointSynchronize::is_at_safepoint(), "Only iterate roots while world is stopped"); ClassLoaderDataGraph::clear_claimed_marks(); @@ -1272,9 +1270,11 @@ workers()->run_task(&roots_task); } +#ifdef ASSERT if (ShenandoahVerifyReadsToFromSpace) { set_from_region_protection(true); } +#endif COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); diff -r a12b1012b510 -r 814ea1e6a91b src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Feb 15 18:59:43 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.hpp Wed Feb 15 19:38:29 2017 +0100 @@ -466,7 +466,9 @@ HeapWord* allocate_memory_work(size_t word_size); HeapWord* allocate_large_memory(size_t word_size); +#ifdef ASSERT void set_from_region_protection(bool protect); +#endif const char* cancel_cause_to_string(ShenandoahCancelCause cause); diff -r a12b1012b510 -r 814ea1e6a91b src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp Wed Feb 15 18:59:43 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeapRegionSet.cpp Wed Feb 15 19:38:29 2017 +0100 @@ -105,6 +105,8 @@ bool skip_dirty_regions, bool skip_humongous_continuation) const { size_t i; + + // There's a bug here where the zeroth region is missed --chf for (i = _current_index + 1; i < _active_end; i++) { ShenandoahHeapRegion* current = _regions[i]; assert(current->region_number() <= _reserved_end, "Tautology"); diff -r a12b1012b510 -r 814ea1e6a91b src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Feb 15 18:59:43 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Wed Feb 15 19:38:29 2017 +0100 @@ -92,9 +92,6 @@ product(bool, ShenandoahLogWarning, false, \ "Turns on logging in Shenandoah at warning level. ") \ \ - product(size_t, PreTouchParallelChunkSize, 1 * G, \ - "Per-thread chunk size for parallel memory pre-touch.") \ - \ product_rw(uintx, ShenandoahFullGCThreshold, 3, \ "How many cycles in a row to do degenerated marking on " \ "cancelled GC before triggering a full-gc" \ changeset: 9556:4ca998fc2ae8 user: shade date: Wed Feb 15 23:46:31 2017 +0100 summary: Bump the inlining limits for concurrent mark. diff -r 814ea1e6a91b -r 4ca998fc2ae8 make/linux/makefiles/gcc.make --- a/make/linux/makefiles/gcc.make Wed Feb 15 19:38:29 2017 +0100 +++ b/make/linux/makefiles/gcc.make Wed Feb 15 23:46:31 2017 +0100 @@ -261,6 +261,9 @@ endif endif +# Need extra inlining to collapse all the templated closures into the hot loop +OPT_CFLAGS/shenandoahConcurrentMark.o += $(OPT_CFLAGS) --param inline-unit-growth=1000 + # Flags for generating make dependency flags. DEPFLAGS = -MMD -MP -MF $(DEP_DIR)/$(@:%=%.d) ifeq ($(USE_CLANG),) diff -r 814ea1e6a91b -r 4ca998fc2ae8 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Feb 15 19:38:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.cpp Wed Feb 15 23:46:31 2017 +0100 @@ -26,6 +26,7 @@ #include "gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" +#include "gc_implementation/shenandoah/shenandoahOopClosures.inline.hpp" #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahRootProcessor.hpp" #include "gc_implementation/shenandoah/shenandoah_specialized_oop_closures.hpp" diff -r 814ea1e6a91b -r 4ca998fc2ae8 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Feb 15 19:38:29 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp Wed Feb 15 23:46:31 2017 +0100 @@ -27,6 +27,7 @@ #include "gc_implementation/shenandoah/brooksPointer.hpp" #include "gc_implementation/shenandoah/shenandoahBarrierSet.inline.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp" #include "gc_implementation/shenandoah/shenandoahTaskqueue.inline.hpp" #include "memory/iterator.inline.hpp" #include "oops/oop.inline.hpp" changeset: 9557:aaf244db1ec5 user: shade date: Thu Feb 16 17:54:05 2017 +0100 summary: Re-implement verbose GC logging. diff -r 4ca998fc2ae8 -r aaf244db1ec5 src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Wed Feb 15 23:46:31 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahConcurrentThread.cpp Thu Feb 16 17:54:05 2017 +0100 @@ -22,7 +22,7 @@ */ #include "gc_implementation/shared/gcTimer.hpp" -#include "gc_implementation/shared/gcTraceTime.hpp" +#include "gc_implementation/shenandoah/shenandoahGCTraceTime.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentThread.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" @@ -98,6 +98,7 @@ GCTracer* gc_tracer = heap->tracer(); gc_timer->register_gc_start(); + gc_tracer->report_gc_start(GCCause::_no_cause_specified, gc_timer->gc_start()); heap->shenandoahPolicy()->increase_cycle_counter(); @@ -126,7 +127,7 @@ Threads::number_of_non_daemon_threads()); ShenandoahWorkerScope scope(workers, n_workers); - // GCTraceTime time("Concurrent marking", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); + GCTraceTime time("Concurrent marking", ShenandoahLogInfo, gc_timer, gc_tracer->gc_id(), true); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); ShenandoahHeap::heap()->concurrentMark()->mark_from_roots(); } @@ -175,7 +176,7 @@ Threads::number_of_non_daemon_threads()); ShenandoahWorkerScope scope(workers, n_workers); - // GCTraceTime time("Concurrent evacuation ", ShenandoahLogInfo, true, gc_timer, gc_tracer->gc_id()); + GCTraceTime time("Concurrent evacuation", ShenandoahLogInfo, gc_timer, gc_tracer->gc_id(), true); TraceCollectorStats tcs(heap->monitoring_support()->concurrent_collection_counters()); heap->do_evacuation(); } @@ -183,13 +184,17 @@ // Prepare for the next normal cycle: if (check_cancellation()) return; - heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); - FlexibleWorkGang* workers = heap->workers(); - ShenandoahPushWorkerScope scope(workers, heap->max_workers()); - heap->reset_next_mark_bitmap(workers); - heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::reset_bitmaps); + { + GCTraceTime time("Concurrent reset bitmaps", ShenandoahLogInfo, gc_timer, gc_tracer->gc_id()); + heap->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::reset_bitmaps); + FlexibleWorkGang* workers = heap->workers(); + ShenandoahPushWorkerScope scope(workers, heap->max_workers()); + heap->reset_next_mark_bitmap(workers); + heap->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::reset_bitmaps); + } gc_timer->register_gc_end(); + gc_tracer->report_gc_end(gc_timer->gc_end(), gc_timer->time_partitions()); } bool ShenandoahConcurrentThread::check_cancellation() { diff -r 4ca998fc2ae8 -r aaf244db1ec5 src/share/vm/gc_implementation/shenandoah/shenandoahGCTraceTime.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahGCTraceTime.cpp Thu Feb 16 17:54:05 2017 +0100 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/shared/gcTimer.hpp" +#include "gc_implementation/shared/gcTrace.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.hpp" +#include "gc_implementation/shenandoah/shenandoahGCTraceTime.hpp" +#include "runtime/globals.hpp" +#include "runtime/os.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/ostream.hpp" +#include "utilities/ticks.inline.hpp" + + +ShenandoahGCTraceTime::ShenandoahGCTraceTime(const char* title, bool doit, GCTimer* timer, GCId gc_id, bool print_heap) : + _title(title), _doit(doit), _timer(timer), _start_counter(), _heap(ShenandoahHeap::heap()), _print_heap(print_heap) { + if (_doit || _timer != NULL) { + _start_counter.stamp(); + } + + if (_timer != NULL) { + _timer->register_gc_phase_start(title, _start_counter); + } + + if (_doit) { + _bytes_before = _heap->used(); + + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + if (!gc_id.is_undefined()) { + gclog_or_tty->print("#%u: ", gc_id.id()); + } + gclog_or_tty->print("[%s", title); + gclog_or_tty->flush(); + } +} + +ShenandoahGCTraceTime::~ShenandoahGCTraceTime() { + Ticks stop_counter; + + if (_doit || _timer != NULL) { + stop_counter.stamp(); + } + + if (_timer != NULL) { + _timer->register_gc_phase_end(stop_counter); + } + + if (_doit) { + const Tickspan duration = stop_counter - _start_counter; + double secs = TicksToTimeHelper::seconds(duration); + + size_t bytes_after = _heap->used(); + size_t capacity = _heap->capacity(); + + if (_print_heap) { + gclog_or_tty->print(" " SIZE_FORMAT "%s->" SIZE_FORMAT "%s(" SIZE_FORMAT "%s)", + byte_size_in_proper_unit(_bytes_before), + proper_unit_for_byte_size(_bytes_before), + byte_size_in_proper_unit(bytes_after), + proper_unit_for_byte_size(bytes_after), + byte_size_in_proper_unit(capacity), + proper_unit_for_byte_size(capacity)); + } + + gclog_or_tty->print_cr(", %.3f ms]", secs * 1000); + gclog_or_tty->flush(); + } +} diff -r 4ca998fc2ae8 -r aaf244db1ec5 src/share/vm/gc_implementation/shenandoah/shenandoahGCTraceTime.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahGCTraceTime.hpp Thu Feb 16 17:54:05 2017 +0100 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_SHENANDOAH_SHENANDOAHGCTRACETIME_HPP +#define SHARE_VM_GC_IMPLEMENTATION_SHENANDOAH_SHENANDOAHGCTRACETIME_HPP + +#include "gc_implementation/shared/gcTrace.hpp" +#include "prims/jni_md.h" +#include "utilities/ticks.hpp" + +class GCTimer; + +class ShenandoahGCTraceTime { + ShenandoahHeap* _heap; + const char* _title; + bool _doit; + bool _print_heap; + GCTimer* _timer; + Ticks _start_counter; + size_t _bytes_before; + + public: + ShenandoahGCTraceTime(const char* title, bool doit, GCTimer* timer, GCId gc_id, bool print_heap = false); + ~ShenandoahGCTraceTime(); +}; + +typedef ShenandoahGCTraceTime GCTraceTime; + +#endif // SHARE_VM_GC_IMPLEMENTATION_SHENANDOAH_SHENANDOAHGCTRACETIME_HPP diff -r 4ca998fc2ae8 -r aaf244db1ec5 src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Wed Feb 15 23:46:31 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Thu Feb 16 17:54:05 2017 +0100 @@ -24,7 +24,7 @@ #include "memory/allocation.hpp" #include "gc_implementation/shared/gcTimer.hpp" -#include "gc_implementation/shared/gcTraceTime.hpp" +#include "gc_implementation/shenandoah/shenandoahGCTraceTime.hpp" #include "gc_implementation/shared/parallelCleaning.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" diff -r 4ca998fc2ae8 -r aaf244db1ec5 src/share/vm/gc_implementation/shenandoah/shenandoahLogging.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahLogging.hpp Wed Feb 15 23:46:31 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahLogging.hpp Thu Feb 16 17:54:05 2017 +0100 @@ -1,9 +1,15 @@ +#ifndef SHARE_VM_GC_SHENANDOAH_SHENANDOAHLOGGING_HPP +#define SHARE_VM_GC_SHENANDOAH_SHENANDOAHLOGGING_HPP -#define log_trace(...) if (ShenandoahLogTrace) gclog_or_tty->print_cr -#define log_debug(...) if (ShenandoahLogDebug) gclog_or_tty->print_cr -#define log_info(...) if (ShenandoahLogInfo) gclog_or_tty->print_cr +#define log_trace(...) if (ShenandoahLogTrace) gclog_or_tty->print_cr +#define log_debug(...) if (ShenandoahLogDebug) gclog_or_tty->print_cr #define log_warning(...) if (ShenandoahLogInfo) gclog_or_tty->print_cr +// With ShenandoahLogInfo, only print out the single-"gc"-tag messages. +#define log_info(...) if (((strcmp(#__VA_ARGS__, "gc") == 0) && ShenandoahLogInfo) || \ + ((strcmp(#__VA_ARGS__, "gc") != 0) && ShenandoahLogDebug)) \ + gclog_or_tty->print_cr + #ifndef PRODUCT #define log_develop_trace(...) if (ShenandoahLogTrace) gclog_or_tty->print_cr #define log_develop_debug(...) if (ShenandoahLogDebug) gclog_or_tty->print_cr @@ -12,3 +18,5 @@ #define log_develop_trace(...) DUMMY_ARGUMENT_CONSUMER #define log_develop_debug(...) DUMMY_ARGUMENT_CONSUMER #endif + +#endif diff -r 4ca998fc2ae8 -r aaf244db1ec5 src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Wed Feb 15 23:46:31 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahMarkCompact.cpp Thu Feb 16 17:54:05 2017 +0100 @@ -22,7 +22,7 @@ */ #include "code/codeCache.hpp" -#include "gc_implementation/shared/gcTraceTime.hpp" +#include "gc_implementation/shenandoah/shenandoahGCTraceTime.hpp" #include "gc_implementation/shared/gcTimer.hpp" #include "gc_implementation/shared/isGCActiveMark.hpp" #include "gc_implementation/shenandoah/brooksPointer.hpp" @@ -92,6 +92,11 @@ ShenandoahCollectorPolicy* policy = _heap->shenandoahPolicy(); _gc_timer->register_gc_start(); + GCTracer* _gc_tracer = _heap->tracer(); + if (_gc_tracer->has_reported_gc_start()) { + _gc_tracer->report_gc_end(_gc_timer->gc_end(), _gc_timer->time_partitions()); + } + _gc_tracer->report_gc_start(gc_cause, _gc_timer->gc_start()); _heap->set_full_gc_in_progress(true); @@ -145,7 +150,7 @@ policy->record_phase_end(ShenandoahCollectorPolicy::full_gc_prepare); { - GCTraceTime time("Pause Full", ShenandoahLogInfo, true, _gc_timer, _heap->tracer()->gc_id()); + GCTraceTime time("Pause Full", ShenandoahLogInfo, _gc_timer, _gc_tracer->gc_id(), true); if (UseTLAB) { _heap->ensure_parsability(true); @@ -217,6 +222,7 @@ } _gc_timer->register_gc_end(); + _gc_tracer->report_gc_end(_gc_timer->gc_end(), _gc_timer->time_partitions()); policy->record_full_gc(); @@ -283,7 +289,7 @@ void ShenandoahMarkCompact::phase1_mark_heap() { ShenandoahHeap* _heap = ShenandoahHeap::heap(); - GCTraceTime time("Phase 1: Mark live objects", ShenandoahLogInfo, true, _gc_timer, _heap->tracer()->gc_id()); + GCTraceTime time("Phase 1: Mark live objects", ShenandoahLogDebug, _gc_timer, _heap->tracer()->gc_id()); ShenandoahConcurrentMark* cm = _heap->concurrentMark(); @@ -465,7 +471,7 @@ void ShenandoahMarkCompact::phase2_calculate_target_addresses(ShenandoahHeapRegionSet** copy_queues) { ShenandoahHeap* heap = ShenandoahHeap::heap(); - GCTraceTime time("Phase 2: Compute new object addresses", ShenandoahLogInfo, true, _gc_timer, heap->tracer()->gc_id()); + GCTraceTime time("Phase 2: Compute new object addresses", ShenandoahLogDebug, _gc_timer, heap->tracer()->gc_id()); ShenandoahMCReclaimHumongousRegionClosure cl; heap->heap_region_iterate(&cl); @@ -573,7 +579,7 @@ void ShenandoahMarkCompact::phase3_update_references() { ShenandoahHeap* heap = ShenandoahHeap::heap(); - GCTraceTime time("Phase 2: Adjust pointers", ShenandoahLogInfo, true, _gc_timer, heap->tracer()->gc_id()); + GCTraceTime time("Phase 2: Adjust pointers", ShenandoahLogDebug, _gc_timer, heap->tracer()->gc_id()); // Need cleared claim bits for the roots processing ClassLoaderDataGraph::clear_claimed_marks(); @@ -674,7 +680,7 @@ void ShenandoahMarkCompact::phase4_compact_objects(ShenandoahHeapRegionSet** copy_queues) { ShenandoahHeap* heap = ShenandoahHeap::heap(); - GCTraceTime time("Phase 4: Move objects", ShenandoahLogInfo, true, _gc_timer, heap->tracer()->gc_id()); + GCTraceTime time("Phase 4: Move objects", ShenandoahLogDebug, _gc_timer, heap->tracer()->gc_id()); ShenandoahCompactObjectsTask compact_task(copy_queues); heap->workers()->run_task(&compact_task); diff -r 4ca998fc2ae8 -r aaf244db1ec5 src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp --- a/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Wed Feb 15 23:46:31 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/vm_operations_shenandoah.cpp Thu Feb 16 17:54:05 2017 +0100 @@ -21,7 +21,7 @@ * */ -#include "gc_implementation/shared/gcTraceTime.hpp" +#include "gc_implementation/shenandoah/shenandoahGCTraceTime.hpp" #include "gc_implementation/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc_implementation/shenandoah/shenandoahMarkCompact.hpp" #include "gc_implementation/shenandoah/shenandoahConcurrentMark.inline.hpp" @@ -39,7 +39,7 @@ ShenandoahWorkerScope scope(workers, nworkers); - GCTraceTime time("Pause Init-Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); + GCTraceTime time("Pause Init Mark", ShenandoahLogInfo, sh->gc_timer(), sh->tracer()->gc_id()); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::init_mark); @@ -100,7 +100,7 @@ ShenandoahWorkerScope scope(workers, n_workers); if (! sh->cancelled_concgc()) { - GCTraceTime time("Pause Final Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); + GCTraceTime time("Pause Final Mark", ShenandoahLogInfo, sh->gc_timer(), sh->tracer()->gc_id(), true); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::total_pause); sh->shenandoahPolicy()->record_phase_start(ShenandoahCollectorPolicy::final_mark); sh->concurrentMark()->finish_mark_from_roots(); @@ -122,7 +122,7 @@ sh->shenandoahPolicy()->record_phase_end(ShenandoahCollectorPolicy::init_evac); } else { - GCTraceTime time("Cancel concurrent Mark", ShenandoahLogInfo, true, sh->gc_timer(), sh->tracer()->gc_id()); + GCTraceTime time("Cancel concurrent mark", ShenandoahLogInfo, sh->gc_timer(), sh->tracer()->gc_id()); sh->concurrentMark()->cancel(); sh->stop_concurrent_marking(); } changeset: 9558:52fb36602ecb user: roland date: Thu Feb 16 15:16:19 2017 +0100 summary: in cset fast test in C2 IR diff -r aaf244db1ec5 -r 52fb36602ecb src/cpu/aarch64/vm/stubGenerator_aarch64.cpp --- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Thu Feb 16 17:54:05 2017 +0100 +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Thu Feb 16 15:16:19 2017 +0100 @@ -562,21 +562,23 @@ // // Trash rscratch1, rscratch2. Preserve everything else. - address generate_shenandoah_wb(bool c_abi) { + address generate_shenandoah_wb(bool c_abi, bool do_cset_test) { StubCodeMark mark(this, "StubRoutines", "shenandoah_wb"); __ align(6); address start = __ pc(); - Label work, slow_case, lose, not_an_instance, is_array; - - __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); - __ lsr(rscratch1, r0, ShenandoahHeapRegion::RegionSizeShift); - __ ldrb(rscratch2, Address(rscratch2, rscratch1)); - __ tbnz(rscratch2, 0, work); - __ ret(lr); - - __ bind(work); + Label slow_case, lose, not_an_instance, is_array; + + if (do_cset_test) { + Label work; + __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); + __ lsr(rscratch1, r0, ShenandoahHeapRegion::RegionSizeShift); + __ ldrb(rscratch2, Address(rscratch2, rscratch1)); + __ tbnz(rscratch2, 0, work); + __ ret(lr); + __ bind(work); + } RegSet saved = RegSet::range(r1, r4); if (!c_abi) { @@ -4485,8 +4487,8 @@ void generate_barriers() { if (UseShenandoahGC) { - StubRoutines::aarch64::_shenandoah_wb = generate_shenandoah_wb(false); - StubRoutines::_shenandoah_wb_C = generate_shenandoah_wb(true); + StubRoutines::aarch64::_shenandoah_wb = generate_shenandoah_wb(false, true); + StubRoutines::_shenandoah_wb_C = generate_shenandoah_wb(true, !ShenandoahWriteBarrierCsetTestInIR); } } diff -r aaf244db1ec5 -r 52fb36602ecb src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Feb 16 17:54:05 2017 +0100 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Feb 16 15:16:19 2017 +0100 @@ -756,7 +756,7 @@ return start; } - address generate_shenandoah_wb(bool c_abi) { + address generate_shenandoah_wb(bool c_abi, bool do_cset_test) { StubCodeMark mark(this, "StubRoutines", "shenandoah_wb"); address start = __ pc(); @@ -781,22 +781,24 @@ } else { __ mov(rax, rdi); } - __ shrptr(rdi, ShenandoahHeapRegion::RegionSizeShift); - // live: r8 - __ movptr(r8, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); - __ movbool(r8, Address(r8, rdi, Address::times_1)); - // unlive: rdi - __ testbool(r8); - // unlive: r8 - __ jccb(Assembler::notZero, not_done); - - if (!c_abi) { - __ pop(r8); - __ pop(rdi); + if (do_cset_test) { + __ shrptr(rdi, ShenandoahHeapRegion::RegionSizeShift); + // live: r8 + __ movptr(r8, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); + __ movbool(r8, Address(r8, rdi, Address::times_1)); + // unlive: rdi + __ testbool(r8); + // unlive: r8 + __ jccb(Assembler::notZero, not_done); + + if (!c_abi) { + __ pop(r8); + __ pop(rdi); + } + __ ret(0); + + __ bind(not_done); } - __ ret(0); - - __ bind(not_done); if (!c_abi) { __ push(rcx); @@ -4292,8 +4294,8 @@ void generate_barriers() { if (UseShenandoahGC) { - StubRoutines::x86::_shenandoah_wb = generate_shenandoah_wb(false); - StubRoutines::_shenandoah_wb_C = generate_shenandoah_wb(true); + StubRoutines::x86::_shenandoah_wb = generate_shenandoah_wb(false, true); + StubRoutines::_shenandoah_wb_C = generate_shenandoah_wb(true, !ShenandoahWriteBarrierCsetTestInIR); } } diff -r aaf244db1ec5 -r 52fb36602ecb src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Thu Feb 16 17:54:05 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoah_globals.hpp Thu Feb 16 15:16:19 2017 +0100 @@ -149,6 +149,9 @@ experimental(bool, ShenandoahWriteBarrierToIR, true, \ "Convert write barrier to IR instead of using assembly blob") \ \ + experimental(bool, ShenandoahWriteBarrierCsetTestInIR, true, \ + "Perform cset test in IR rather than in the stub") \ + \ experimental(bool, UseShenandoahOWST, true, \ "Use Shenandoah work stealing termination protocol") \ \ diff -r aaf244db1ec5 -r 52fb36602ecb src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Thu Feb 16 17:54:05 2017 +0100 +++ b/src/share/vm/opto/loopnode.hpp Thu Feb 16 15:16:19 2017 +0100 @@ -1087,8 +1087,24 @@ void shenandoah_follow_barrier_uses(Node* n, Node* ctrl, Unique_Node_List& uses); bool shenandoah_already_has_better_phi(Node* region, int alias, Node* m, Node* m_ctrl); void shenandoah_collect_memory_nodes(int alias, Node_List& memory_nodes, Node_List& phis); - void shenandoah_collect_memory_nodes_helper(Node* n, int alias, GrowableArray& inputs, int adj, Node_List& memory_nodes, Node_List& phis, Node*& cur_mem, Unique_Node_List& wq); - void shenandoah_fix_raw_mem(Node* ctrl, Node* region, Node* raw_mem, Node* raw_mem_phi, Node_List& memory_nodes, Node_List& memory_phis, Unique_Node_List& uses); + void shenandoah_collect_memory_nodes_helper(Node* n, int alias, GrowableArray& inputs, int adj, + Node_List& memory_nodes, Node_List& phis, Node*& cur_mem, + Unique_Node_List& wq); + void shenandoah_fix_raw_mem(Node* ctrl, Node* region, Node* raw_mem, Node* raw_mem_phi, Node_List& memory_nodes, + Node_List& memory_phis, Unique_Node_List& uses); + void shenandoah_test_evacuation_in_progress(Node* ctrl, int alias, Node*& raw_mem, Node*& wb_mem, + IfNode*& evacuation_iff, Node*& evac_in_progress, + Node*& evac_not_in_progress); + void shenandoah_evacuation_not_in_progress(Node* c, Node* v, Node* unc_ctrl, Node* raw_mem, Node* wb_mem, Node* region, + Node* val_phi, Node* mem_phi, Node* raw_mem_phi, Node*& unc_region); + void shenandoah_evacuation_in_progress(Node* c, Node* val, Node* evacuation_iff, Node* unc, Node* unc_ctrl, + Node* raw_mem, Node* wb_mem, Node* region, Node* val_phi, Node* mem_phi, + Node* raw_mem_phi, Node* unc_region, int alias, Unique_Node_List& uses); + void shenandoah_evacuation_not_in_progress_null_check(Node*& c, Node*& val, Node* unc_ctrl, Node*& unc_region); + void shenandoah_evacuation_in_progress_null_check(Node*& c, Node*& val, Node* evacuation_iff, Node* unc, Node* unc_ctrl, + Node* unc_region, Unique_Node_List& uses); + void shenandoah_in_cset_fast_test(Node*& c, Node* rbtrue, Node* raw_mem, Node* wb_mem, Node* region, Node* val_phi, + Node* mem_phi, Node* raw_mem_phi); bool _created_loop_node; public: diff -r aaf244db1ec5 -r 52fb36602ecb src/share/vm/opto/shenandoahSupport.cpp --- a/src/share/vm/opto/shenandoahSupport.cpp Thu Feb 16 17:54:05 2017 +0100 +++ b/src/share/vm/opto/shenandoahSupport.cpp Thu Feb 16 15:16:19 2017 +0100 @@ -22,6 +22,8 @@ */ #include "gc_implementation/shenandoah/brooksPointer.hpp" +#include "gc_implementation/shenandoah/shenandoahHeapRegion.hpp" +#include "gc_implementation/shenandoah/shenandoahHeap.hpp" #include "opto/callnode.hpp" #include "opto/connode.hpp" #include "opto/phaseX.hpp" @@ -3251,6 +3253,248 @@ #endif } +void PhaseIdealLoop::shenandoah_test_evacuation_in_progress(Node* ctrl, int alias, Node*& raw_mem, Node*& wb_mem, + IfNode*& evacuation_iff, Node*& evac_in_progress, + Node*& evac_not_in_progress) { + IdealLoopTree *loop = get_loop(ctrl); + Node* thread = new (C) ThreadLocalNode(); + register_new_node(thread, ctrl); + Node* offset = _igvn.MakeConX(in_bytes(JavaThread::evacuation_in_progress_offset())); + set_ctrl(offset, C->root()); + Node* evacuation_in_progress_adr = new (C) AddPNode(C->top(), thread, offset); + register_new_node(evacuation_in_progress_adr, ctrl); + uint evacuation_in_progress_idx = Compile::AliasIdxRaw; + const TypePtr* evacuation_in_progress_adr_type = NULL; // debug-mode-only argument + debug_only(evacuation_in_progress_adr_type = C->get_adr_type(evacuation_in_progress_idx)); + + Node* evacuation_in_progress = new (C) LoadUBNode(ctrl, raw_mem, evacuation_in_progress_adr, + evacuation_in_progress_adr_type, TypeInt::BOOL, MemNode::unordered); + register_new_node(evacuation_in_progress, ctrl); + + Node* mb = MemBarNode::make(C, Op_MemBarAcquire, Compile::AliasIdxRaw); + mb->init_req(TypeFunc::Control, ctrl); + mb->init_req(TypeFunc::Memory, raw_mem); + register_control(mb, loop, ctrl); + Node* ctrl_proj = new (C) ProjNode(mb,TypeFunc::Control); + register_control(ctrl_proj, loop, mb); + raw_mem = new (C) ProjNode(mb, TypeFunc::Memory); + register_new_node(raw_mem, mb); + + mb = MemBarNode::make(C, Op_MemBarAcquire, alias); + mb->init_req(TypeFunc::Control, ctrl_proj); + mb->init_req(TypeFunc::Memory, wb_mem); + register_control(mb, loop, ctrl_proj); + ctrl_proj = new (C) ProjNode(mb,TypeFunc::Control); + register_control(ctrl_proj, loop, mb); + wb_mem = new (C) ProjNode(mb,TypeFunc::Memory); + register_new_node(wb_mem, mb); + + Node* evacuation_in_progress_cmp = new (C) CmpINode(evacuation_in_progress, _igvn.zerocon(T_INT)); + register_new_node(evacuation_in_progress_cmp, ctrl_proj); + Node* evacuation_in_progress_test = new (C) BoolNode(evacuation_in_progress_cmp, BoolTest::ne); + register_new_node(evacuation_in_progress_test, ctrl_proj); + evacuation_iff = new (C) IfNode(ctrl_proj, evacuation_in_progress_test, PROB_UNLIKELY(0.999), COUNT_UNKNOWN); + register_control(evacuation_iff, loop, ctrl_proj); + + evac_not_in_progress = new (C) IfFalseNode(evacuation_iff); + register_control(evac_not_in_progress, loop, evacuation_iff); + evac_in_progress = new (C) IfTrueNode(evacuation_iff); + register_control(evac_in_progress, loop, evacuation_iff); +} + +void PhaseIdealLoop::shenandoah_evacuation_not_in_progress_null_check(Node*& c, Node*& val, Node* unc_ctrl, Node*& unc_region) { + if (unc_ctrl != NULL) { + // Clone the null check in this branch to allow implicit null check + IdealLoopTree *loop = get_loop(c); + Node* iff = unc_ctrl->in(0); + assert(iff->is_If(), "broken"); + Node* new_iff = iff->clone(); + new_iff->set_req(0, c); + register_control(new_iff, loop, c); + Node* iffalse = new (C) IfFalseNode(new_iff->as_If()); + register_control(iffalse, loop, new_iff); + Node* iftrue = new (C) IfTrueNode(new_iff->as_If()); + register_control(iftrue, loop, new_iff); + c = iftrue; + unc_region = new (C) RegionNode(3); + unc_region->init_req(1, iffalse); + const Type *t = _igvn.type(val); + assert(val->Opcode() == Op_CastPP, "expect cast to non null here"); + Node* uncasted_val = val->in(1); + val = new (C) CastPPNode(uncasted_val, t); + val->init_req(0, c); + register_new_node(val, c); + } +} + +void PhaseIdealLoop::shenandoah_evacuation_not_in_progress(Node* c, Node* val, Node* unc_ctrl, Node* raw_mem, Node* wb_mem, Node* region, + Node* val_phi, Node* mem_phi, Node* raw_mem_phi, Node*& unc_region) { + shenandoah_evacuation_not_in_progress_null_check(c, val, unc_ctrl, unc_region); + region->init_req(1, c); + Node* rbfalse = new (C) ShenandoahReadBarrierNode(c, wb_mem, val); + register_new_node(rbfalse, c); + val_phi->init_req(1, rbfalse); + mem_phi->init_req(1, wb_mem); + raw_mem_phi->init_req(1, raw_mem); +} + +void PhaseIdealLoop::shenandoah_evacuation_in_progress_null_check(Node*& c, Node*& val, Node* evacuation_iff, Node* unc, Node* unc_ctrl, + Node* unc_region, Unique_Node_List& uses) { + if (unc != NULL) { + // Clone the null check in this branch to allow implicit null check + IdealLoopTree *loop = get_loop(c); + Node* iff = unc_ctrl->in(0); + assert(iff->is_If(), "broken"); + Node* new_iff = iff->clone(); + new_iff->set_req(0, c); + register_control(new_iff, loop, c); + Node* iffalse = new (C) IfFalseNode(new_iff->as_If()); + register_control(iffalse, loop, new_iff); + Node* iftrue = new (C) IfTrueNode(new_iff->as_If()); + register_control(iftrue, loop, new_iff); + c = iftrue; + unc_region->init_req(2, iffalse); + + Node* proj = iff->as_If()->proj_out(0); + assert(proj != unc_ctrl, "bad projection"); + Node* use = proj->unique_ctrl_out(); + + assert(use == unc || use->is_Region(), "what else?"); + + uses.clear(); + if (use == unc) { + set_idom(use, unc_region, dom_depth(unc_region)+1); + for (uint i = 1; i < unc->req(); i++) { + Node* n = unc->in(i); + if (has_ctrl(n) && get_ctrl(n) == proj) { + uses.push(n); + } + } + } else { + assert(use->is_Region(), "what else?"); + uint idx = 1; + for (; use->in(idx) != proj; idx++); + for (DUIterator_Fast imax, i = use->fast_outs(imax); i < imax; i++) { + Node* u = use->fast_out(i); + if (u->is_Phi() && get_ctrl(u->in(idx)) == proj) { + uses.push(u->in(idx)); + } + } + } + for(uint next = 0; next < uses.size(); next++ ) { + Node *n = uses.at(next); + assert(get_ctrl(n) == proj, "bad control"); + set_ctrl_and_loop(n, unc_region); + if (n->in(0) == proj) { + _igvn.replace_input_of(n, 0, unc_region); + } + for (uint i = 0; i < n->req(); i++) { + Node* m = n->in(i); + if (m != NULL && has_ctrl(m) && get_ctrl(m) == proj) { + uses.push(m); + } + } + } + + _igvn.rehash_node_delayed(use); + int nb = use->replace_edge(proj, unc_region); + assert(nb == 1, "only use expected"); + register_control(unc_region, _ltree_root, evacuation_iff); + + _igvn.replace_input_of(iff, 1, _igvn.intcon(1)); + const Type *t = _igvn.type(val); + assert(val->Opcode() == Op_CastPP, "expect cast to non null here"); + Node* uncasted_val = val->in(1); + val = new (C) CastPPNode(uncasted_val, t); + val->init_req(0, c); + register_new_node(val, c); + } +} + +void PhaseIdealLoop::shenandoah_in_cset_fast_test(Node*& c, Node* rbtrue, Node* raw_mem, Node* wb_mem, Node* region, Node* val_phi, Node* mem_phi, + Node* raw_mem_phi) { + if (ShenandoahWriteBarrierCsetTestInIR) { + IdealLoopTree *loop = get_loop(c); + Node* raw_rbtrue = new (C) CastP2XNode(c, rbtrue); + register_new_node(raw_rbtrue, c); + Node* cset_offset = new (C) URShiftXNode(raw_rbtrue, _igvn.intcon(ShenandoahHeapRegion::RegionSizeShift)); + register_new_node(cset_offset, c); + Node* in_cset_fast_test_base_addr = _igvn.makecon(TypeRawPtr::make(ShenandoahHeap::in_cset_fast_test_addr())); + set_ctrl(in_cset_fast_test_base_addr, C->root()); + Node* in_cset_fast_test_adr = new (C) AddPNode(C->top(), in_cset_fast_test_base_addr, cset_offset); + register_new_node(in_cset_fast_test_adr, c); + uint in_cset_fast_test_idx = Compile::AliasIdxRaw; + const TypePtr* in_cset_fast_test_adr_type = NULL; // debug-mode-only argument + debug_only(in_cset_fast_test_adr_type = C->get_adr_type(in_cset_fast_test_idx)); + Node* in_cset_fast_test_load = new (C) LoadUBNode(c, raw_mem, in_cset_fast_test_adr, in_cset_fast_test_adr_type, TypeInt::BOOL, MemNode::unordered); + register_new_node(in_cset_fast_test_load, c); + Node* in_cset_fast_test_cmp = new (C) CmpINode(in_cset_fast_test_load, _igvn.zerocon(T_INT)); + register_new_node(in_cset_fast_test_cmp, c); + Node* in_cset_fast_test_test = new (C) BoolNode(in_cset_fast_test_cmp, BoolTest::ne); + register_new_node(in_cset_fast_test_test, c); + IfNode* in_cset_fast_test_iff = new (C) IfNode(c, in_cset_fast_test_test, PROB_UNLIKELY(0.999), COUNT_UNKNOWN); + register_control(in_cset_fast_test_iff, loop, c); + + Node* in_cset_fast_test_success = new (C) IfFalseNode(in_cset_fast_test_iff); + register_control(in_cset_fast_test_success, loop, in_cset_fast_test_iff); + + region->init_req(3, in_cset_fast_test_success); + val_phi->init_req(3, rbtrue); + mem_phi->init_req(3, wb_mem); + raw_mem_phi->init_req(3, raw_mem); + + Node* in_cset_fast_test_failure = new (C) IfTrueNode(in_cset_fast_test_iff); + register_control(in_cset_fast_test_failure, loop, in_cset_fast_test_iff); + + c = in_cset_fast_test_failure; + } +} + +void PhaseIdealLoop::shenandoah_evacuation_in_progress(Node* c, Node* val, Node* evacuation_iff, Node* unc, Node* unc_ctrl, + Node* raw_mem, Node* wb_mem, Node* region, Node* val_phi, Node* mem_phi, + Node* raw_mem_phi, Node* unc_region, int alias, Unique_Node_List& uses) { + shenandoah_evacuation_in_progress_null_check(c, val, evacuation_iff, unc, unc_ctrl, unc_region, uses); + + IdealLoopTree *loop = get_loop(c); + Node* rbtrue = new (C) ShenandoahReadBarrierNode(c, raw_mem, val); + register_new_node(rbtrue, c); + + Node* in_cset_fast_test_failure = NULL; + shenandoah_in_cset_fast_test(c, rbtrue, raw_mem, wb_mem, region, val_phi, mem_phi, raw_mem_phi); + + // The slow path stub consumes and produces raw memory in addition + // to the existing memory edges + Node* base = shenandoah_find_bottom_mem(c); + + MergeMemNode* mm = MergeMemNode::make(C, base); + mm->set_memory_at(alias, wb_mem); + mm->set_memory_at(Compile::AliasIdxRaw, raw_mem); + register_new_node(mm, c); + + Node* call = new (C) CallLeafNoFPNode(OptoRuntime::shenandoah_write_barrier_Type(), StubRoutines::shenandoah_wb_C(), "shenandoah_write_barrier", TypeRawPtr::BOTTOM); + call->init_req(TypeFunc::Control, c); + call->init_req(TypeFunc::I_O, C->top()); + call->init_req(TypeFunc::Memory, mm); + call->init_req(TypeFunc::FramePtr, C->top()); + call->init_req(TypeFunc::ReturnAdr, C->top()); + call->init_req(TypeFunc::Parms, rbtrue); + register_control(call, loop, c); + Node* ctrl_proj = new (C) ProjNode(call, TypeFunc::Control); + register_control(ctrl_proj, loop, call); + Node* mem_proj = new (C) ProjNode(call, TypeFunc::Memory); + register_new_node(mem_proj, call); + Node* res_proj = new (C) ProjNode(call, TypeFunc::Parms); + register_new_node(res_proj, call); + Node* res = new (C) CheckCastPPNode(ctrl_proj, res_proj, _igvn.type(val)); + register_new_node(res, ctrl_proj); + region->init_req(2, ctrl_proj); + val_phi->init_req(2, res); + mem_phi->init_req(2, mem_proj); + raw_mem_phi->init_req(2, mem_proj); + register_control(region, loop, evacuation_iff); + +} + void PhaseIdealLoop::shenandoah_pin_and_expand_barriers() { const bool trace = false; Node_List memory_nodes; @@ -3303,16 +3547,9 @@ Node* ctrl = get_ctrl(wb); Node* raw_mem = shenandoah_find_raw_mem(ctrl, wb, memory_nodes, memory_phis, true); + Node* init_raw_mem = raw_mem; int alias = C->get_alias_index(wb->adr_type()); - Node* mem = wb->in(ShenandoahBarrierNode::Memory); - - // The slow path stub consumes and produces raw memory in addition - // to the existing memory edges - Node* base = shenandoah_find_bottom_mem(ctrl); - MergeMemNode* mm = MergeMemNode::make(C, base); - mm->set_memory_at(alias, mem); - mm->set_memory_at(Compile::AliasIdxRaw, raw_mem); - register_new_node(mm, ctrl); + Node* wb_mem = wb->in(ShenandoahBarrierNode::Memory); Node* val = wb->in(ShenandoahBarrierNode::ValueIn); Node* wbproj = wb->find_out_with(Op_ShenandoahWBMemProj); @@ -3321,175 +3558,37 @@ assert(val->Opcode() != Op_ShenandoahWriteBarrier || C->has_irreducible_loop(), "No chain of write barriers"); CallStaticJavaNode* unc = shenandoah_pin_and_expand_barriers_null_check(wb); - Node* unc_ctrl = val->in(0); - if (unc != NULL && val->in(0) != ctrl) { - unc = NULL; + Node* unc_ctrl = NULL; + if (unc != NULL) { + if (val->in(0) != ctrl) { + unc = NULL; + } else { + unc_ctrl = val->in(0); + } } Node* uncasted_val = val; if (unc != NULL) { uncasted_val = val->in(1); } - Node* thread = new (C) ThreadLocalNode(); - register_new_node(thread, ctrl); - Node* offset = _igvn.MakeConX(in_bytes(JavaThread::evacuation_in_progress_offset())); - set_ctrl(offset, C->root()); - Node* evacuation_in_progress_adr = new (C) AddPNode(C->top(), thread, offset); - register_new_node(evacuation_in_progress_adr, ctrl); - uint evacuation_in_progress_idx = Compile::AliasIdxRaw; - const TypePtr* evacuation_in_progress_adr_type = NULL; // debug-mode-only argument - debug_only(evacuation_in_progress_adr_type = C->get_adr_type(evacuation_in_progress_idx)); - Node* evacuation_in_progress = new (C) LoadUBNode(ctrl, raw_mem, evacuation_in_progress_adr, evacuation_in_progress_adr_type, TypeInt::BOOL, MemNode::unordered); - register_new_node(evacuation_in_progress, ctrl); + Node* evac_in_progress = NULL; + Node* evac_not_in_progress = NULL; + IfNode* evacuation_iff = NULL; + shenandoah_test_evacuation_in_progress(ctrl, alias, raw_mem, wb_mem, evacuation_iff, evac_in_progress, evac_not_in_progress); - Node* mb = MemBarNode::make(C, Op_MemBarAcquire, Compile::AliasIdxRaw); - mb->init_req(TypeFunc::Control, ctrl); - mb->init_req(TypeFunc::Memory, mm); - register_control(mb, loop, ctrl); - Node* ctrl_proj = new (C) ProjNode(mb,TypeFunc::Control); - register_control(ctrl_proj, loop, mb); - Node* mem_proj = new (C) ProjNode(mb,TypeFunc::Memory); - register_new_node(mem_proj, mb); + Node* region = new (C) RegionNode(4); + Node* val_phi = PhiNode::make_blank(region, val); + Node* mem_phi = PhiNode::make(region, wb_mem, Type::MEMORY, C->alias_type(wb->adr_type())->adr_type()); + Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM); - Node* evacuation_in_progress_cmp = new (C) CmpINode(evacuation_in_progress, _igvn.zerocon(T_INT)); - register_new_node(evacuation_in_progress_cmp, ctrl_proj); - Node* evacuation_in_progress_test = new (C) BoolNode(evacuation_in_progress_cmp, BoolTest::ne); - register_new_node(evacuation_in_progress_test, ctrl_proj); - IfNode* evacuation_iff = new (C) IfNode(ctrl_proj, evacuation_in_progress_test, PROB_UNLIKELY(0.999), COUNT_UNKNOWN); - register_control(evacuation_iff, loop, ctrl_proj); - Node* region = new (C) RegionNode(3); - Node* val_phi = PhiNode::make_blank(region, val); + Node* unc_region = NULL; + shenandoah_evacuation_not_in_progress(evac_not_in_progress, val, unc_ctrl, raw_mem, wb_mem, + region, val_phi, mem_phi, raw_mem_phi, unc_region); - Node* mem_phi = PhiNode::make(region, mem_proj, Type::MEMORY, C->alias_type(wb->adr_type())->adr_type()); - Node* raw_mem_phi = PhiNode::make(region, mem_proj, Type::MEMORY, TypeRawPtr::BOTTOM); - Node* iffalse = new (C) IfFalseNode(evacuation_iff); - register_control(iffalse, loop, evacuation_iff); - Node* iftrue = new (C) IfTrueNode(evacuation_iff); - register_control(iftrue, loop, evacuation_iff); - - Node* c = iffalse; - Node* v = uncasted_val; - Node* unc_region = NULL; - if (unc != NULL) { - // Clone the null check in this branch to allow implicit null check - Node* iff = unc_ctrl->in(0); - assert(iff->is_If(), "broken"); - Node* new_iff = iff->clone(); - new_iff->set_req(0, c); - register_control(new_iff, loop, c); - Node* iffalse = new (C) IfFalseNode(new_iff->as_If()); - register_control(iffalse, loop, new_iff); - Node* iftrue = new (C) IfTrueNode(new_iff->as_If()); - register_control(iftrue, loop, new_iff); - c = iftrue; - unc_region = new (C) RegionNode(3); - unc_region->init_req(1, iffalse); - const Type *t = _igvn.type(val); - v = new (C) CastPPNode(uncasted_val, t); - v->init_req(0, c); - register_new_node(v, c); - } - region->init_req(1, c); - Node* rbfalse = new (C) ShenandoahReadBarrierNode(c, mem_proj, v); - register_new_node(rbfalse, c); - val_phi->init_req(1, rbfalse); - mem_phi->init_req(1, mem_proj); - raw_mem_phi->init_req(1, mem_proj); - - c = iftrue; - - if (unc != NULL) { - // Clone the null check in this branch to allow implicit null check - Node* iff = unc_ctrl->in(0); - assert(iff->is_If(), "broken"); - Node* new_iff = iff->clone(); - new_iff->set_req(0, c); - register_control(new_iff, loop, c); - Node* iffalse = new (C) IfFalseNode(new_iff->as_If()); - register_control(iffalse, loop, new_iff); - Node* iftrue = new (C) IfTrueNode(new_iff->as_If()); - register_control(iftrue, loop, new_iff); - c = iftrue; - unc_region->init_req(2, iffalse); - - Node* proj = iff->as_If()->proj_out(0); - assert(proj != unc_ctrl, "bad projection"); - Node* use = proj->unique_ctrl_out(); - - assert(use == unc || use->is_Region(), "what else?"); - - uses.clear(); - if (use == unc) { - set_idom(use, unc_region, dom_depth(unc_region)+1); - for (uint i = 1; i < unc->req(); i++) { - Node* n = unc->in(i); - if (has_ctrl(n) && get_ctrl(n) == proj) { - uses.push(n); - } - } - } else { - assert(use->is_Region(), "what else?"); - uint idx = 1; - for (; use->in(idx) != proj; idx++); - for (DUIterator_Fast imax, i = use->fast_outs(imax); i < imax; i++) { - Node* u = use->fast_out(i); - if (u->is_Phi() && get_ctrl(u->in(idx)) == proj) { - uses.push(u->in(idx)); - } - } - } - for(uint next = 0; next < uses.size(); next++ ) { - Node *n = uses.at(next); - assert(get_ctrl(n) == proj, "bad control"); - set_ctrl_and_loop(n, unc_region); - if (n->in(0) == proj) { - _igvn.replace_input_of(n, 0, unc_region); - } - for (uint i = 0; i < n->req(); i++) { - Node* m = n->in(i); - if (m != NULL && has_ctrl(m) && get_ctrl(m) == proj) { - uses.push(m); - } - } - } - - _igvn.rehash_node_delayed(use); - int nb = use->replace_edge(proj, unc_region); - assert(nb == 1, "only use expected"); - register_control(unc_region, _ltree_root, evacuation_iff); - - _igvn.replace_input_of(iff, 1, _igvn.intcon(1)); - const Type *t = _igvn.type(val); - v = new (C) CastPPNode(uncasted_val, t); - v->init_req(0, c); - register_new_node(v, c); - } - - Node* rbtrue = new (C) ShenandoahReadBarrierNode(c, mem_proj, v); - register_new_node(rbtrue, c); - - Node* call = new (C) CallLeafNoFPNode(OptoRuntime::shenandoah_write_barrier_Type(), StubRoutines::shenandoah_wb_C(), "shenandoah_write_barrier", TypeRawPtr::BOTTOM); - call->init_req(TypeFunc::Control, c); - call->init_req(TypeFunc::I_O, C->top()); - call->init_req(TypeFunc::Memory, mem_proj); - call->init_req(TypeFunc::FramePtr, C->top()); - call->init_req(TypeFunc::ReturnAdr, C->top()); - call->init_req(TypeFunc::Parms, rbtrue); - register_control(call, loop, c); - ctrl_proj = new (C) ProjNode(call, TypeFunc::Control); - register_control(ctrl_proj, loop, call); - mem_proj = new (C) ProjNode(call, TypeFunc::Memory); - register_new_node(mem_proj, call); - Node* res_proj = new (C) ProjNode(call, TypeFunc::Parms); - register_new_node(res_proj, call); - Node* res = new (C) CheckCastPPNode(ctrl_proj, res_proj, _igvn.type(val)); - register_new_node(res, ctrl_proj); - region->init_req(2, ctrl_proj); - val_phi->init_req(2, res); - mem_phi->init_req(2, mem_proj); - raw_mem_phi->init_req(2, mem_proj); - register_control(region, loop, evacuation_iff); + shenandoah_evacuation_in_progress(evac_in_progress, val, evacuation_iff, unc, unc_ctrl, + raw_mem, wb_mem, region, val_phi, mem_phi, raw_mem_phi, + unc_region, alias, uses); Node* out_val = val_phi; register_new_node(val_phi, region); register_new_node(mem_phi, region); @@ -3503,8 +3602,8 @@ // its memory is control dependent on the barrier's input control) // must stay above the barrier. uses_to_ignore.clear(); - if (has_ctrl(raw_mem) && get_ctrl(raw_mem) == ctrl && !raw_mem->is_Phi()) { - uses_to_ignore.push(raw_mem); + if (has_ctrl(init_raw_mem) && get_ctrl(init_raw_mem) == ctrl && !init_raw_mem->is_Phi()) { + uses_to_ignore.push(init_raw_mem); } for (uint next = 0; next < uses_to_ignore.size(); next++) { Node *n = uses_to_ignore.at(next); @@ -3534,7 +3633,7 @@ set_idom(u, region, dom_depth(region)); } } else if (get_ctrl(u) == ctrl) { - assert(u != raw_mem, "should leave input raw mem above the barrier"); + assert(u != init_raw_mem, "should leave input raw mem above the barrier"); uses.push(u); } assert(nb == 1, "more than 1 ctrl input?"); @@ -3570,7 +3669,7 @@ for(uint next = 0; next < uses.size(); next++ ) { Node *n = uses.at(next); assert(get_ctrl(n) == ctrl, "bad control"); - assert(n != raw_mem, "should leave input raw mem above the barrier"); + assert(n != init_raw_mem, "should leave input raw mem above the barrier"); set_ctrl(n, region); shenandoah_follow_barrier_uses(n, ctrl, uses); } @@ -3583,7 +3682,7 @@ // region and at enclosing loop heads. Use the memory state // collected in memory_nodes to fix the memory graph. Update that // memory state as we go. - shenandoah_fix_raw_mem(ctrl ,region, raw_mem, raw_mem_phi, memory_nodes, memory_phis, uses); + shenandoah_fix_raw_mem(ctrl ,region, init_raw_mem, raw_mem_phi, memory_nodes, memory_phis, uses); assert(C->shenandoah_barriers_count() == cnt - 1, "not replaced"); } changeset: 9559:d3495160a06b user: roland date: Thu Feb 16 17:02:59 2017 +0100 summary: pre barrier for scalarized objects should be removed diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/callnode.cpp --- a/src/share/vm/opto/callnode.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/callnode.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -1019,6 +1019,34 @@ } #endif +Node *CallLeafNode::Ideal(PhaseGVN *phase, bool can_reshape) { + if (is_g1_wb_pre_call()) { + uint cnt = OptoRuntime::g1_wb_pre_Type()->domain()->cnt(); + if (req() > cnt) { + Node* addp = in(cnt); + if (has_only_g1_wb_pre_uses(addp)) { + del_req(cnt); + if (can_reshape) { + phase->is_IterGVN()->_worklist.push(addp); + } + return this; + } + } + } + + return CallNode::Ideal(phase, can_reshape); +} + +bool CallLeafNode::has_only_g1_wb_pre_uses(Node* n) { + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* u = n->fast_out(i); + if (!u->is_g1_wb_pre_call()) { + return false; + } + } + return n->outcnt() > 0; +} + //============================================================================= void SafePointNode::set_local(JVMState* jvms, uint idx, Node *c) { diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/callnode.hpp Thu Feb 16 17:02:59 2017 +0100 @@ -780,6 +780,11 @@ } virtual int Opcode() const; virtual bool guaranteed_safepoint() { return false; } + virtual bool is_g1_wb_pre_call() const { return entry_point() == CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre); } + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); + + static bool has_only_g1_wb_pre_uses(Node* n); + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/cfgnode.hpp --- a/src/share/vm/opto/cfgnode.hpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/cfgnode.hpp Thu Feb 16 17:02:59 2017 +0100 @@ -356,6 +356,8 @@ // Returns NULL is it couldn't improve the type. static const TypeInt* filtered_int_type(PhaseGVN* phase, Node* val, Node* if_proj); + bool is_g1_marking_if(PhaseTransform *phase) const; + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/compile.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -411,6 +411,11 @@ if (n->outcnt() == 1 && n->has_special_unique_user()) { record_for_igvn(n->unique_out()); } + if (n->Opcode() == Op_AddP && CallLeafNode::has_only_g1_wb_pre_uses(n)) { + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + record_for_igvn(n->fast_out(i)); + } + } } // Remove useless macro and predicate opaq nodes for (int i = C->macro_count()-1; i >= 0; i--) { @@ -2753,6 +2758,15 @@ case Op_CallLeafNoFP: { assert( n->is_Call(), "" ); CallNode *call = n->as_Call(); + if (UseShenandoahGC && call->is_g1_wb_pre_call()) { + uint cnt = OptoRuntime::g1_wb_pre_Type()->domain()->cnt(); + if (call->req() > cnt) { + assert(call->req() == cnt+1, "only one extra input"); + Node* addp = call->in(cnt); + assert(!CallLeafNode::has_only_g1_wb_pre_uses(addp), "useless address computation?"); + call->del_req(cnt); + } + } // Count call sites where the FP mode bit would have to be flipped. // Do not count uncommon runtime calls: // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking, diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/graphKit.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -3869,6 +3869,15 @@ final_sync(ideal); } +static void g1_write_barrier_pre_helper(const GraphKit& kit, Node* adr) { + if (UseShenandoahGC && adr != NULL) { + Node* c = kit.control(); + Node* call = c->in(1)->in(1)->in(1)->in(0); + assert(call->is_g1_wb_pre_call(), "g1_wb_pre call expected"); + call->add_req(adr); + } +} + // G1 pre/post barriers void GraphKit::g1_write_barrier_pre(bool do_load, Node* obj, @@ -3967,6 +3976,7 @@ // Final sync IdealKit and GraphKit. final_sync(ideal); + g1_write_barrier_pre_helper(*this, adr); } // diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/ifnode.cpp --- a/src/share/vm/opto/ifnode.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/ifnode.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -1196,6 +1196,22 @@ return iff; } +bool IfNode::is_g1_marking_if(PhaseTransform *phase) const { + if (Opcode() != Op_If) { + return false; + } + + Node* bol = in(1); + assert(bol->is_Bool(), ""); + Node* cmpx = bol->in(1); + if (bol->as_Bool()->_test._test == BoolTest::ne && + cmpx->is_Cmp() && cmpx->in(2) == phase->intcon(0) && + cmpx->in(1)->is_g1_marking_load()) { + return true; + } + return false; +} + //------------------------------Identity--------------------------------------- // If the test is constant & we match, then we are the input Control Node *IfFalseNode::Identity( PhaseTransform *phase ) { diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/macro.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -282,22 +282,10 @@ if (!this_region->in(ind)->is_IfFalse()) { ind = 2; } - if (this_region->in(ind)->is_IfFalse()) { - Node* bol = this_region->in(ind)->in(0)->in(1); - assert(bol->is_Bool(), ""); - cmpx = bol->in(1); - if (bol->as_Bool()->_test._test == BoolTest::ne && - cmpx->is_Cmp() && cmpx->in(2) == intcon(0) && - cmpx->in(1)->is_Load()) { - Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address); - const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_active()); - if (adr->is_AddP() && adr->in(AddPNode::Base) == top() && - adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && - adr->in(AddPNode::Offset) == MakeConX(marking_offset)) { - _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ)); - } - } + if (this_region->in(ind)->is_IfFalse() && + this_region->in(ind)->in(0)->is_g1_marking_if(&_igvn)) { + Node* cmpx = this_region->in(ind)->in(0)->in(1)->in(1); + _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ)); } } // Now CastP2X can be removed since it is used only on dead path @@ -307,6 +295,26 @@ } } +void PhaseMacroExpand::eliminate_g1_wb_pre(Node* n) { + Node* c = n->as_Call()->proj_out(TypeFunc::Control); + c = c->unique_ctrl_out(); + assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?"); + c = c->unique_ctrl_out(); + assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?"); + Node* iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0); + assert(iff->is_If(), "expect test"); + if (!iff->is_g1_marking_if(&_igvn)) { + c = c->unique_ctrl_out(); + assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?"); + iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0); + assert(iff->is_g1_marking_if(&_igvn), "expect marking test"); + } + Node* cmpx = iff->in(1)->in(1); + _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ)); + _igvn.rehash_node_delayed(n); + n->del_req(n->req()-1); +} + // Search for a memory operation for the specified memory slice. static Node *scan_mem_chain(Node *mem, int alias_idx, int offset, Node *start_mem, Node *alloc, PhaseGVN *phase) { Node *orig_mem = mem; @@ -615,7 +623,8 @@ for (DUIterator_Fast kmax, k = use->fast_outs(kmax); k < kmax && can_eliminate; k++) { Node* n = use->fast_out(k); - if (!n->is_Store() && n->Opcode() != Op_CastP2X) { + if (!n->is_Store() && n->Opcode() != Op_CastP2X && + !n->is_g1_wb_pre_call()) { DEBUG_ONLY(disq_node = n;) if (n->is_Load() || n->is_LoadStore()) { NOT_PRODUCT(fail_eliminate = "Field load";) @@ -886,11 +895,14 @@ } #endif _igvn.replace_node(n, n->in(MemNode::Memory)); + } else if (n->is_g1_wb_pre_call()) { + eliminate_g1_wb_pre(n); } else { eliminate_card_mark(n); } k -= (oc2 - use->outcnt()); } + _igvn.remove_dead_node(use); } else { eliminate_card_mark(use); } diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/macro.hpp --- a/src/share/vm/opto/macro.hpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/macro.hpp Thu Feb 16 17:02:59 2017 +0100 @@ -95,6 +95,7 @@ void process_users_of_allocation(CallNode *alloc); void eliminate_card_mark(Node *cm); + void eliminate_g1_wb_pre(Node *n); void mark_eliminated_box(Node* box, Node* obj); void mark_eliminated_locking_nodes(AbstractLockNode *alock); bool eliminate_locking_node(AbstractLockNode *alock); diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/memnode.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -924,7 +924,6 @@ if (!phase->type(value)->higher_equal(phase->type(this))) return this; } - Node* value_no_barrier = ShenandoahBarrierNode::skip_through_barrier(value); PhaseIterGVN* igvn = phase->is_IterGVN(); if (UseShenandoahGC && igvn != NULL && @@ -945,7 +944,21 @@ } // (This works even when value is a Con, but LoadNode::Value // usually runs first, producing the singleton type of the Con.) - return ShenandoahBarrierNode::skip_through_barrier(value); + if (UseShenandoahGC) { + Node* value_no_barrier = ShenandoahBarrierNode::skip_through_barrier(value->Opcode() == Op_EncodeP ? value->in(1) : value); + if (value->Opcode() == Op_EncodeP) { + if (value_no_barrier != value->in(1)) { + Node* encode = value->clone(); + encode->set_req(1, value_no_barrier); + encode = phase->transform(encode); + return encode; + } + } else { + return value_no_barrier; + } + } + + return value; } // Search for an existing data phi which was generated before for the same diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/node.cpp --- a/src/share/vm/opto/node.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/node.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -1404,6 +1404,8 @@ // The restriction (outcnt() <= 2) is the same as in set_req_X() // and remove_globally_dead_node(). igvn->add_users_to_worklist( n ); + } else if (n->Opcode() == Op_AddP && CallLeafNode::has_only_g1_wb_pre_uses(n)) { + igvn->add_users_to_worklist(n); } } } diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/node.hpp Thu Feb 16 17:02:59 2017 +0100 @@ -900,6 +900,8 @@ bool dominates(Node* sub, Node_List &nlist); virtual bool is_g1_marking_load() const { return false; } + virtual bool is_g1_marking_if(PhaseTransform *phase) const { return false; } + virtual bool is_g1_wb_pre_call() const { return false; } protected: bool remove_dead_region(PhaseGVN *phase, bool can_reshape); diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/phaseX.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -1296,6 +1296,8 @@ } else if (dead->Opcode() == Op_ShenandoahWBMemProj) { assert(i == 0 && in->Opcode() == Op_ShenandoahWriteBarrier, "broken graph"); _worklist.push(in); + } else if (in->Opcode() == Op_AddP && CallLeafNode::has_only_g1_wb_pre_uses(in)) { + add_users_to_worklist(in); } if (ReduceFieldZeroing && dead->is_Load() && i == MemNode::Memory && in->is_Proj() && in->in(0) != NULL && in->in(0)->is_Initialize()) { @@ -1929,6 +1931,9 @@ default: break; } + if (old->Opcode() == Op_AddP && CallLeafNode::has_only_g1_wb_pre_uses(old)) { + igvn->add_users_to_worklist(old); + } } } diff -r 52fb36602ecb -r d3495160a06b src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Thu Feb 16 15:16:19 2017 +0100 +++ b/src/share/vm/opto/superword.cpp Thu Feb 16 17:02:59 2017 +0100 @@ -2314,19 +2314,21 @@ // Detect a Shenandoah write barrier between the pre and main loop // (which could break loop alignment code) CountedLoopNode *main_head = slp->lp()->as_CountedLoop(); - Node* c = main_head->in(LoopNode::EntryControl)->in(0)->in(0)->in(0); - if (!c->is_CountedLoopEnd()) { - // in case of a reserve copy - c = c->in(0)->in(0); - assert(c->is_CountedLoopEnd(), "where's the pre loop?"); - } - CountedLoopEndNode* pre_end = c->as_CountedLoopEnd(); - CountedLoopNode* pre_loop = pre_end->loopnode(); - assert(pre_loop->is_pre_loop(), "where's the pre loop?"); + if (main_head->is_main_loop()) { + Node* c = main_head->in(LoopNode::EntryControl)->in(0)->in(0)->in(0); + if (!c->is_CountedLoopEnd()) { + // in case of a reserve copy + c = c->in(0)->in(0); + assert(c->is_CountedLoopEnd(), "where's the pre loop?"); + } + CountedLoopEndNode* pre_end = c->as_CountedLoopEnd(); + CountedLoopNode* pre_loop = pre_end->loopnode(); + assert(pre_loop->is_pre_loop(), "where's the pre loop?"); - Node* base_c = phase()->get_ctrl(base); - if (!phase()->is_dominator(base_c, pre_loop)) { - return; + Node* base_c = phase()->get_ctrl(base); + if (!phase()->is_dominator(base_c, pre_loop)) { + return; + } } for (int i = 0; i < 3; i++) { if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { changeset: 9560:4666aea197d4 user: roland date: Thu Feb 16 17:06:02 2017 +0100 summary: Fixes to write barrier expansion diff -r d3495160a06b -r 4666aea197d4 src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Thu Feb 16 17:02:59 2017 +0100 +++ b/src/share/vm/opto/loopnode.hpp Thu Feb 16 17:06:02 2017 +0100 @@ -1090,7 +1090,8 @@ void shenandoah_collect_memory_nodes_helper(Node* n, int alias, GrowableArray& inputs, int adj, Node_List& memory_nodes, Node_List& phis, Node*& cur_mem, Unique_Node_List& wq); - void shenandoah_fix_raw_mem(Node* ctrl, Node* region, Node* raw_mem, Node* raw_mem_phi, Node_List& memory_nodes, + void shenandoah_fix_raw_mem(Node* ctrl, Node* region, Node* raw_mem, Node* raw_mem_for_ctrl, + Node* raw_mem_phi, Node_List& memory_nodes, Node_List& memory_phis, Unique_Node_List& uses); void shenandoah_test_evacuation_in_progress(Node* ctrl, int alias, Node*& raw_mem, Node*& wb_mem, IfNode*& evacuation_iff, Node*& evac_in_progress, @@ -1105,6 +1106,7 @@ Node* unc_region, Unique_Node_List& uses); void shenandoah_in_cset_fast_test(Node*& c, Node* rbtrue, Node* raw_mem, Node* wb_mem, Node* region, Node* val_phi, Node* mem_phi, Node* raw_mem_phi); + Node* shenandoah_get_ctrl(Node* n); bool _created_loop_node; public: diff -r d3495160a06b -r 4666aea197d4 src/share/vm/opto/shenandoahSupport.cpp --- a/src/share/vm/opto/shenandoahSupport.cpp Thu Feb 16 17:02:59 2017 +0100 +++ b/src/share/vm/opto/shenandoahSupport.cpp Thu Feb 16 17:06:02 2017 +0100 @@ -2553,22 +2553,22 @@ assert(n == NULL || ctrl_or_self(n) == ctrl, ""); Node* raw_mem = memory_for(memory_nodes[ctrl->_idx], phis); Node* c = ctrl; - while (raw_mem == NULL || (strict && get_ctrl(raw_mem) != c && (!c->is_CatchProj() || c->in(0)->in(0)->in(0) != get_ctrl(raw_mem)))) { + while (raw_mem == NULL || (strict && shenandoah_get_ctrl(raw_mem) != c && (!c->is_CatchProj() || c->in(0)->in(0)->in(0) != shenandoah_get_ctrl(raw_mem)))) { c = idom(c); raw_mem = memory_for(memory_nodes[c->_idx], phis); } - if (n != NULL && get_ctrl(raw_mem) == ctrl) { + if (n != NULL && shenandoah_get_ctrl(raw_mem) == ctrl) { while (!shenandoah_is_dominator_same_ctrl(c, raw_mem, n) && ctrl_or_self(raw_mem) == ctrl) { raw_mem = shenandoah_next_mem(raw_mem, Compile::AliasIdxRaw); } if (raw_mem->is_MergeMem()) { raw_mem = raw_mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); } - if (get_ctrl(raw_mem) != ctrl) { + if (shenandoah_get_ctrl(raw_mem) != ctrl) { do { c = idom(c); raw_mem = memory_for(memory_nodes[c->_idx], phis); - } while (raw_mem == NULL || (strict && get_ctrl(raw_mem) != c && (!c->is_CatchProj() || c->in(0)->in(0)->in(0) != get_ctrl(raw_mem)))); + } while (raw_mem == NULL || (strict && shenandoah_get_ctrl(raw_mem) != c && (!c->is_CatchProj() || c->in(0)->in(0)->in(0) != shenandoah_get_ctrl(raw_mem)))); } } assert(raw_mem->bottom_type() == Type::MEMORY, ""); @@ -2825,14 +2825,35 @@ } if (other == other2) { memory_nodes.map(r->_idx, memory_for(phis[r->_idx], phis)); - } else if (get_ctrl(other) != get_ctrl(other2)) { + } else if (other == NULL || get_ctrl(other) != get_ctrl(other2)) { + if (other == NULL) { + other = mem; + } assert(shenandoah_is_dominator(get_ctrl(other), get_ctrl(other2), other, other2) && !shenandoah_is_dominator(get_ctrl(other2), get_ctrl(other), other2, other), ""); - memory_nodes.map(r->_idx, memory_for(phis[r->_idx], phis)); + memory_nodes.map(r->_idx, other2); } else { assert(ctrl_or_self(other2) == r && shenandoah_is_dominator_same_ctrl(r, other, other2) && !shenandoah_is_dominator_same_ctrl(r, other2, other), ""); } } +Node* PhaseIdealLoop::shenandoah_get_ctrl(Node* n) { + Node* c = get_ctrl(n); + if (n->is_Proj() && n->in(0)->is_Call()) { + assert(c == n->in(0), ""); + CallNode* call = c->as_Call(); + CallProjections projs; + call->extract_projections(&projs, true, false); + if (projs.catchall_memproj != NULL) { + if (projs.fallthrough_memproj == n) { + c = projs.fallthrough_catchproj; + } else { + assert(projs.catchall_memproj == n, ""); + c = projs.catchall_catchproj; + } + } + } + return c; +} void PhaseIdealLoop::shenandoah_collect_memory_nodes(int alias, Node_List& memory_nodes, Node_List& phis) { const bool trace = false; @@ -2942,24 +2963,10 @@ } else if (!n->is_Root()) { cur_mem = n; DEBUG_ONLY(if (trace) { tty->print("YYY setting cur_mem %d", __LINE__); cur_mem->dump(); }) - Node* c = get_ctrl(n); + Node* c = shenandoah_get_ctrl(n); Node* mem = memory_for(memory_nodes[c->_idx], phis); DEBUG_ONLY(if (trace) { tty->print("YYY post"); n->dump(); }) - if (n->is_Proj() && n->in(0)->is_Call()) { - assert(c == n->in(0), ""); - CallNode* call = c->as_Call(); - CallProjections projs; - call->extract_projections(&projs, true, false); - if (projs.catchall_memproj != NULL) { - if (projs.fallthrough_memproj == n) { - c = projs.fallthrough_catchproj; - } else { - assert(projs.catchall_memproj == n, ""); - c = projs.catchall_catchproj; - } - } - } - assert(mem == NULL || mem == n || shenandoah_is_dominator_same_ctrl(c, mem, n) && !shenandoah_is_dominator_same_ctrl(c, n, mem), ""); + assert(mem == NULL || mem == n || shenandoah_is_dominator(get_ctrl(mem), get_ctrl(n), mem, n) && !shenandoah_is_dominator(get_ctrl(n), get_ctrl(mem), n, mem), ""); memory_nodes.map(c->_idx, n); } stack.pop(); @@ -3001,13 +3008,14 @@ #endif } -void PhaseIdealLoop::shenandoah_fix_raw_mem(Node* ctrl, Node* region, Node* raw_mem, Node* raw_mem_phi, Node_List& memory_nodes, Node_List& memory_phis, Unique_Node_List& uses) { +void PhaseIdealLoop::shenandoah_fix_raw_mem(Node* ctrl, Node* region, Node* raw_mem, Node* raw_mem_for_ctrl, Node* raw_mem_phi, + Node_List& memory_nodes, Node_List& memory_phis, Unique_Node_List& uses) { const bool trace = false; DEBUG_ONLY(if (trace) { tty->print("ZZZ control is"); ctrl->dump(); }); DEBUG_ONLY(if (trace) { tty->print("ZZZ mem is"); raw_mem->dump(); }); GrowableArray phis; - Node* old = shenandoah_find_raw_mem(ctrl, NULL, memory_nodes, memory_phis, true); - if (old != raw_mem) { + if (raw_mem_for_ctrl != raw_mem) { + Node* old = raw_mem_for_ctrl; Node* prev = NULL; while (old != raw_mem) { assert(old->is_Store() || old->is_LoadStore() || old->is_ClearArray(), ""); @@ -3016,7 +3024,7 @@ } assert(prev != NULL, ""); memory_nodes.map(ctrl->_idx, raw_mem); - memory_nodes.map(region->_idx, old); + memory_nodes.map(region->_idx, raw_mem_for_ctrl); _igvn.replace_input_of(prev, MemNode::Memory, raw_mem_phi); } else { memory_nodes.map(region->_idx, raw_mem_phi); @@ -3195,27 +3203,29 @@ } } else if (u->is_Phi()) { assert(u->bottom_type() == Type::MEMORY, "what else?"); - Node* region = u->in(0); - bool replaced = false; - for (uint j = 1; j < u->req(); j++) { - if (u->in(j) == raw_mem) { - Node* m = shenandoah_find_raw_mem(region->in(j), NULL, memory_nodes, memory_phis, true); - Node* nnew = m; - if (m != raw_mem) { - if (u->adr_type() == TypePtr::BOTTOM) { - if (mm == NULL || 1) { - mm = shenandoah_allocate_merge_mem(raw_mem, alias, m, ctrl_or_self(m)); + if (u->adr_type() == TypeRawPtr::BOTTOM || u->adr_type() == TypePtr::BOTTOM) { + Node* region = u->in(0); + bool replaced = false; + for (uint j = 1; j < u->req(); j++) { + if (u->in(j) == raw_mem) { + Node* m = shenandoah_find_raw_mem(region->in(j), NULL, memory_nodes, memory_phis, true); + Node* nnew = m; + if (m != raw_mem) { + if (u->adr_type() == TypePtr::BOTTOM) { + if (mm == NULL || 1) { + mm = shenandoah_allocate_merge_mem(raw_mem, alias, m, ctrl_or_self(m)); + } + nnew = mm; } - nnew = mm; + DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of phi %d", j); u->dump(); }) + _igvn.replace_input_of(u, j, nnew); + replaced = true; } - DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of phi %d", j); u->dump(); }) - _igvn.replace_input_of(u, j, nnew); - replaced = true; } } - } - if (replaced) { - --i; + if (replaced) { + --i; + } } } else if (u->adr_type() == TypePtr::BOTTOM || u->adr_type() == NULL) { @@ -3456,7 +3466,7 @@ shenandoah_evacuation_in_progress_null_check(c, val, evacuation_iff, unc, unc_ctrl, unc_region, uses); IdealLoopTree *loop = get_loop(c); - Node* rbtrue = new (C) ShenandoahReadBarrierNode(c, raw_mem, val); + Node* rbtrue = new (C) ShenandoahReadBarrierNode(c, wb_mem, val); register_new_node(rbtrue, c); Node* in_cset_fast_test_failure = NULL; @@ -3548,6 +3558,7 @@ Node* raw_mem = shenandoah_find_raw_mem(ctrl, wb, memory_nodes, memory_phis, true); Node* init_raw_mem = raw_mem; + Node* raw_mem_for_ctrl = shenandoah_find_raw_mem(ctrl, NULL, memory_nodes, memory_phis, true); int alias = C->get_alias_index(wb->adr_type()); Node* wb_mem = wb->in(ShenandoahBarrierNode::Memory); @@ -3682,7 +3693,7 @@ // region and at enclosing loop heads. Use the memory state // collected in memory_nodes to fix the memory graph. Update that // memory state as we go. - shenandoah_fix_raw_mem(ctrl ,region, init_raw_mem, raw_mem_phi, memory_nodes, memory_phis, uses); + shenandoah_fix_raw_mem(ctrl,region, init_raw_mem, raw_mem_for_ctrl, raw_mem_phi, memory_nodes, memory_phis, uses); assert(C->shenandoah_barriers_count() == cnt - 1, "not replaced"); } changeset: 9561:2b9f9739a2e4 user: roland date: Thu Feb 16 14:25:16 2017 +0100 summary: Null check object parameter of unsafe access even if it's known to be non null diff -r 4666aea197d4 -r 2b9f9739a2e4 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Thu Feb 16 17:06:02 2017 +0100 +++ b/src/share/vm/opto/library_call.cpp Thu Feb 16 14:25:16 2017 +0100 @@ -2379,7 +2379,16 @@ } else { if (UseShenandoahGC) { if (kind == Type::OopPtr) { - base = cast_not_null(base, false); + // A cast without a null check should be sufficient here (we + // know base is an oop with a low offset so it can't be null) + // but if there's a dominating null check with both branches + // taken and the cast is pushed in both branches, the cast + // will become top in the null branch but the control flow + // won't go away. Use a null check instead. Worst case, the + // null check becomes an implicit null check with the follow + // barrier and is essentially free. + Node* ctrl = top(); + base = null_check_oop(base, &ctrl, true); if (is_store) { base = shenandoah_write_barrier(base); } else { diff -r 4666aea197d4 -r 2b9f9739a2e4 test/gc/shenandoah/compiler/TestMaybeNullUnsafeAccess.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/compiler/TestMaybeNullUnsafeAccess.java Thu Feb 16 14:25:16 2017 +0100 @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @summary + * @modules java.base/jdk.internal.misc:+open + * + * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:-TieredCompilation TestMaybeNullUnsafeAccess + * + */ + +import jdk.internal.misc.Unsafe; +import java.lang.reflect.Field; + +public class TestMaybeNullUnsafeAccess { + + static final jdk.internal.misc.Unsafe UNSAFE = Unsafe.getUnsafe(); + static final long F_OFFSET; + + static class A { + int f; + } + + static { + try { + Field fField = A.class.getDeclaredField("f"); + F_OFFSET = UNSAFE.objectFieldOffset(fField); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + static A test_helper(Object o) { + return (A)o; + } + + + static int test(Object o) { + int f = 0; + for (int i = 0; i < 100; i++) { + A a = test_helper(o); + f = UNSAFE.getInt(a, F_OFFSET); + } + return f; + } + + static public void main(String[] args) { + A a = new A(); + for (int i = 0; i < 20000; i++) { + test_helper(null); + test_helper(a); + test(a); + } + } + +} changeset: 9562:8d92142d6f87 user: roland date: Thu Feb 16 17:46:02 2017 +0100 summary: g1 marking load can be either a int or boolean load diff -r 2b9f9739a2e4 -r 8d92142d6f87 src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Thu Feb 16 14:25:16 2017 +0100 +++ b/src/share/vm/opto/memnode.hpp Thu Feb 16 17:46:02 2017 +0100 @@ -250,6 +250,14 @@ // Helper function to allow a raw load without control edge for some cases static bool is_immutable_value(Node* adr); #endif + + virtual bool is_g1_marking_load() const { + const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()); + return in(2)->is_AddP() && in(2)->in(2)->Opcode() == Op_ThreadLocal + && in(2)->in(3)->is_Con() + && in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset; + } + protected: const Type* load_array_final_field(const TypeKlassPtr *tkls, ciKlass* klass) const; @@ -291,13 +299,6 @@ virtual const Type *Value(PhaseTransform *phase) const; virtual int store_Opcode() const { return Op_StoreB; } virtual BasicType memory_type() const { return T_BYTE; } - - virtual bool is_g1_marking_load() const { - const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()); - return in(2)->is_AddP() && in(2)->in(2)->Opcode() == Op_ThreadLocal - && in(2)->in(3)->is_Con() - && in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset; - } }; //------------------------------LoadUSNode------------------------------------- changeset: 9563:2d1b77ba27f1 user: roland date: Thu Feb 16 20:44:40 2017 +0100 summary: fix TestMaybeNullUnsafeAccess for jdk 8 diff -r 8d92142d6f87 -r 2d1b77ba27f1 test/gc/shenandoah/compiler/TestMaybeNullUnsafeAccess.java --- a/test/gc/shenandoah/compiler/TestMaybeNullUnsafeAccess.java Thu Feb 16 17:46:02 2017 +0100 +++ b/test/gc/shenandoah/compiler/TestMaybeNullUnsafeAccess.java Thu Feb 16 20:44:40 2017 +0100 @@ -23,19 +23,20 @@ /** * @test - * @summary - * @modules java.base/jdk.internal.misc:+open + * @summary cast on before unsafe access moved in dominating null check null path causes crash * + * @library /testlibrary * @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:-TieredCompilation TestMaybeNullUnsafeAccess * */ -import jdk.internal.misc.Unsafe; +import sun.misc.Unsafe; import java.lang.reflect.Field; +import com.oracle.java.testlibrary.*; public class TestMaybeNullUnsafeAccess { - static final jdk.internal.misc.Unsafe UNSAFE = Unsafe.getUnsafe(); + static final sun.misc.Unsafe UNSAFE = Utils.getUnsafe(); static final long F_OFFSET; static class A { changeset: 9564:7e8f2bbb312c user: shade date: Fri Feb 17 11:18:13 2017 +0100 summary: Backport TestShenandoahArgumentRanges and ShenandoahJNICritical tests. diff -r 2d1b77ba27f1 -r 7e8f2bbb312c src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Thu Feb 16 20:44:40 2017 +0100 +++ b/src/share/vm/runtime/arguments.cpp Fri Feb 17 11:18:13 2017 +0100 @@ -1723,6 +1723,24 @@ UNSUPPORTED_OPTION(UseShenandoahGC); #endif + if (!FLAG_IS_DEFAULT(ShenandoahGarbageThreshold)) { + if (0 > ShenandoahGarbageThreshold || ShenandoahGarbageThreshold > 100) { + vm_exit_during_initialization("The flag -XX:ShenandoahGarbageThreshold is out of range", NULL); + } + } + + if (!FLAG_IS_DEFAULT(ShenandoahAllocationThreshold)) { + if (0 > ShenandoahAllocationThreshold || ShenandoahAllocationThreshold > 100) { + vm_exit_during_initialization("The flag -XX:ShenandoahAllocationThreshold is out of range", NULL); + } + } + + if (!FLAG_IS_DEFAULT(ShenandoahFreeThreshold)) { + if (0 > ShenandoahFreeThreshold || ShenandoahFreeThreshold > 100) { + vm_exit_during_initialization("The flag -XX:ShenandoahFreeThreshold is out of range", NULL); + } + } + if (MaxHeapSize >= ObjArrayChunkedTask::oop_size) { jio_fprintf(defaultStream::error_stream(), "Shenandoah GC cannot address more than " SIZE_FORMAT " bytes, and " SIZE_FORMAT " bytes heap requested.", diff -r 2d1b77ba27f1 -r 7e8f2bbb312c test/gc/shenandoah/ShenandoahJNICritical.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/ShenandoahJNICritical.java Fri Feb 17 11:18:13 2017 +0100 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import java.util.Arrays; + +public class ShenandoahJNICritical { + static { + System.loadLibrary("ShenandoahJNICritical"); + } + + private static final int NUM_RUNS = 10000; + private static final int ARRAY_SIZE=10000; + private static int[] a; + private static int[] b; + private static native void copyAtoB(int[] a, int[] b); + + public static void main(String[] args) { + a = new int[ARRAY_SIZE]; + b = new int[ARRAY_SIZE]; + for (int i = 0; i < NUM_RUNS; i++) { + test(); + } + } + + private static void test() { + int[] a1 = new int[ARRAY_SIZE]; + int[] b1 = new int[ARRAY_SIZE]; + fillArray(a); + copyAtoB(a, b); + copyAtoB(a1, b1); // Don't optimize out garbage arrays. + if (! Arrays.equals(a, b)) { + throw new RuntimeException("arrays not equal"); + } + } + + private static void fillArray(int[] array) { + for (int i = 0; i < ARRAY_SIZE; i++) { + int val = (int) (Math.random() * Integer.MAX_VALUE); + array[i] = val; + } + } +} diff -r 2d1b77ba27f1 -r 7e8f2bbb312c test/gc/shenandoah/ShenandoahJNICritical.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/ShenandoahJNICritical.sh Fri Feb 17 11:18:13 2017 +0100 @@ -0,0 +1,77 @@ +#!/bin/sh + +# +# Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +## +## @test +## @summary test JNI critical arrays support in Shenandoah +## @run shell/timeout=30 ShenandoahJNICritical.sh +## + +if [ "${TESTSRC}" = "" ] +then + TESTSRC=${PWD} + echo "TESTSRC not set. Using "${TESTSRC}" as default" +fi +echo "TESTSRC=${TESTSRC}" +## Adding common setup Variables for running shell tests. +. ${TESTSRC}/../../test_env.sh + +# set platform-dependent variables +if [ "$VM_OS" = "linux" ]; then + echo "Testing on linux" + gcc_cmd=`which gcc` + if [ "x$gcc_cmd" = "x" ]; then + echo "WARNING: gcc not found. Cannot execute test." 2>&1 + exit 0; + fi +else + echo "Test passed; only valid for linux: $VM_OS" + exit 0; +fi + +THIS_DIR=. + +cp ${TESTSRC}${FS}*.java ${THIS_DIR} +${TESTJAVA}${FS}bin${FS}javac ShenandoahJNICritical.java + +$gcc_cmd -O1 -DLINUX -fPIC -shared \ + -o ${THIS_DIR}${FS}libShenandoahJNICritical.so \ + -I${TESTJAVA}${FS}include \ + -I${TESTJAVA}${FS}include${FS}linux \ + ${TESTSRC}${FS}libShenandoahJNICritical.c + +# run the java test in the background +cmd="${TESTJAVA}${FS}bin${FS}java -XX:+UseShenandoahGC -XX:ShenandoahGCHeuristics=aggressive \ + -Djava.library.path=${THIS_DIR}${FS} ShenandoahJNICritical" + +echo "$cmd" +eval $cmd + +if [ $? -ne 0 ] +then + echo "Test Failed" + exit 1 +fi + diff -r 2d1b77ba27f1 -r 7e8f2bbb312c test/gc/shenandoah/TestShenandoahArgumentRanges.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/TestShenandoahArgumentRanges.java Fri Feb 17 11:18:13 2017 +0100 @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test TestShenandoahArgumentRanges + * @summary Test that Shenandoah arguments are checked for ranges where applicable + * @key gc + * @library /testlibrary + * @modules java.base/jdk.internal.misc + * java.management + * @run driver TestShenandoahArgumentRanges + */ + +import com.oracle.java.testlibrary.*; + +public class TestShenandoahArgumentRanges { + public static void main(String[] args) throws Exception { + testRange("ShenandoahGarbageThreshold", 0, 100); + testRange("ShenandoahFreeThreshold", 0, 100); + testRange("ShenandoahAllocationThreshold", 0, 100); + testHeuristics(); + } + + private static void testHeuristics() throws Exception { + + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-XX:ShenandoahGCHeuristics=aggressive", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + } + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-XX:ShenandoahGCHeuristics=dynamic", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + } + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-XX:ShenandoahGCHeuristics=fluff", + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldMatch("Unknown -XX:ShenandoahGCHeuristics option"); + output.shouldHaveExitValue(1); + } + } + + private static void testRange(String option, int min, int max) throws Exception { + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-XX:" + option + "=" + (max + 1), + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(1); + } + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-XX:" + option + "=" + max, + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + } + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-XX:" + option + "=" + (min - 1), + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(1); + } + { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseShenandoahGC", + "-XX:" + option + "=" + min, + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldHaveExitValue(0); + } + } +} diff -r 2d1b77ba27f1 -r 7e8f2bbb312c test/gc/shenandoah/libShenandoahJNICritical.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/shenandoah/libShenandoahJNICritical.c Fri Feb 17 11:18:13 2017 +0100 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. and/or its affiliates. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include + +JNIEXPORT void JNICALL +Java_ShenandoahJNICritical_copyAtoB(JNIEnv *env, jclass unused, jintArray a, jintArray b) { + jint len = (*env)->GetArrayLength(env, a); + jint* aa = (*env)->GetPrimitiveArrayCritical(env, a, 0); + jint* bb = (*env)->GetPrimitiveArrayCritical(env, b, 0); + memcpy(bb, aa, len * sizeof(jint)); + (*env)->ReleasePrimitiveArrayCritical(env, b, bb, 0); + (*env)->ReleasePrimitiveArrayCritical(env, a, aa, 0); +} changeset: 9565:e2f0ae67e95c user: shade date: Fri Feb 17 16:23:33 2017 +0100 summary: Sync up differences in Root{Processor,Evacuator} and Heap. diff -r 7e8f2bbb312c -r e2f0ae67e95c src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Feb 17 11:18:13 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahHeap.cpp Fri Feb 17 16:23:33 2017 +0100 @@ -2217,7 +2217,7 @@ } size_t ShenandoahHeap::conservative_max_heap_alignment() { - return 32 * M; + return ShenandoahMaxRegionSize; } size_t ShenandoahHeap::bytes_allocated_since_cm() { diff -r 7e8f2bbb312c -r e2f0ae67e95c src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Fri Feb 17 11:18:13 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.cpp Fri Feb 17 16:23:33 2017 +0100 @@ -37,18 +37,19 @@ ShenandoahRootProcessor::ShenandoahRootProcessor(ShenandoahHeap* heap, uint n_workers, ShenandoahCollectorPolicy::TimingPhase phase) : - _process_strong_tasks(SHENANDOAH_RP_PS_NumElements), + _process_strong_tasks(new SubTasksDone(SHENANDOAH_RP_PS_NumElements)), _srs(heap, true), _phase(phase), _cld_iterator(ClassLoaderDataGraph::parallel_cld_root_iterator()) , _om_iterator(ObjectSynchronizer::parallel_iterator()) { heap->shenandoahPolicy()->record_workers_start(_phase); - _process_strong_tasks.set_n_threads(n_workers); + _process_strong_tasks->set_n_threads(n_workers); heap->set_par_threads(n_workers); } ShenandoahRootProcessor::~ShenandoahRootProcessor() { + delete _process_strong_tasks; ShenandoahHeap::heap()->shenandoahPolicy()->record_workers_end(_phase); } @@ -61,7 +62,7 @@ process_java_roots(oops, clds, clds, NULL, blobs, worker_id); process_vm_roots(oops, NULL, weak_oops, worker_id); - _process_strong_tasks.all_tasks_completed(); + _process_strong_tasks->all_tasks_completed(); } void ShenandoahRootProcessor::process_all_roots(OopClosure* oops, @@ -76,12 +77,12 @@ if (blobs != NULL) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::CodeCacheRoots, worker_id); - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_CodeCache_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_CodeCache_oops_do)) { CodeCache::blobs_do(blobs); } } - _process_strong_tasks.all_tasks_completed(); + _process_strong_tasks->all_tasks_completed(); } void ShenandoahRootProcessor::process_java_roots(OopClosure* strong_roots, @@ -113,34 +114,34 @@ uint worker_id) { ShenandoahPhaseTimes* phase_times = ShenandoahHeap::heap()->shenandoahPolicy()->phase_times(); - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_Universe_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_Universe_oops_do)) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::UniverseRoots, worker_id); Universe::oops_do(strong_roots); } - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_oops_do)) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::JNIRoots, worker_id); JNIHandles::oops_do(strong_roots); } - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_FlatProfiler_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_FlatProfiler_oops_do)) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::FlatProfilerRoots, worker_id); FlatProfiler::oops_do(strong_roots); } - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_Management_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_Management_oops_do)) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::ManagementRoots, worker_id); Management::oops_do(strong_roots); } - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_jvmti_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_jvmti_oops_do)) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::JVMTIRoots, worker_id); JvmtiExport::oops_do(strong_roots); } - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_SystemDictionary_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_SystemDictionary_oops_do)) { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::SystemDictionaryRoots, worker_id); SystemDictionary::roots_oops_do(strong_roots, weak_roots); } if (jni_weak_roots != NULL) { - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_weak_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_JNIHandles_weak_oops_do)) { ShenandoahAlwaysTrueClosure always_true; ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::JNIWeakRoots, worker_id); JNIHandles::weak_oops_do(&always_true, jni_weak_roots); @@ -160,16 +161,17 @@ } ShenandoahRootEvacuator::ShenandoahRootEvacuator(ShenandoahHeap* heap, uint n_workers, ShenandoahCollectorPolicy::TimingPhase phase) : - _process_strong_tasks(SHENANDOAH_RP_PS_NumElements), + _process_strong_tasks(new SubTasksDone(SHENANDOAH_RP_PS_NumElements)), _srs(heap, true), _phase(phase) { - _process_strong_tasks.set_n_threads(n_workers); + _process_strong_tasks->set_n_threads(n_workers); heap->set_par_threads(n_workers); heap->shenandoahPolicy()->record_workers_start(_phase); } ShenandoahRootEvacuator::~ShenandoahRootEvacuator() { + delete _process_strong_tasks; ShenandoahHeap::heap()->shenandoahPolicy()->record_workers_end(_phase); } @@ -186,12 +188,12 @@ { ShenandoahParPhaseTimesTracker timer(phase_times, ShenandoahPhaseTimes::CodeCacheRoots, worker_id); - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_CodeCache_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_CodeCache_oops_do)) { CodeCache::blobs_do(blobs); } } - if (!_process_strong_tasks.is_task_claimed(SHENANDOAH_RP_PS_ReferenceProcessor_oops_do)) { + if (!_process_strong_tasks->is_task_claimed(SHENANDOAH_RP_PS_ReferenceProcessor_oops_do)) { // Evacuate the PLL here so that the SurrogateLockerThread doesn't // have to. If the SLT runs into OOM during evacuation, the // ShenandoahConcurrentThread cannot get back from VMThread::execute() @@ -199,5 +201,5 @@ oop pll = java_lang_ref_Reference::pending_list_lock(); oopDesc::bs()->write_barrier(pll); } - _process_strong_tasks.all_tasks_completed(); + _process_strong_tasks->all_tasks_completed(); } diff -r 7e8f2bbb312c -r e2f0ae67e95c src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp --- a/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp Fri Feb 17 11:18:13 2017 +0100 +++ b/src/share/vm/gc_implementation/shenandoah/shenandoahRootProcessor.hpp Fri Feb 17 16:23:33 2017 +0100 @@ -58,7 +58,7 @@ }; class ShenandoahRootProcessor : public StackObj { - SubTasksDone _process_strong_tasks; + SubTasksDone* _process_strong_tasks; SharedHeap::StrongRootsScope _srs; ShenandoahCollectorPolicy::TimingPhase _phase; ParallelCLDRootIterator _cld_iterator; @@ -98,7 +98,7 @@ }; class ShenandoahRootEvacuator : public StackObj { - SubTasksDone _process_strong_tasks; + SubTasksDone* _process_strong_tasks; SharedHeap::StrongRootsScope _srs; ShenandoahCollectorPolicy::TimingPhase _phase;