--- old/src/share/vm/gc/g1/dirtyCardQueue.cpp 2016-03-01 21:48:22.694923932 -0500 +++ new/src/share/vm/gc/g1/dirtyCardQueue.cpp 2016-03-01 21:48:22.598923456 -0500 @@ -115,9 +115,8 @@ uint worker_i) { bool res = true; if (_buf != NULL) { - res = apply_closure_to_buffer(cl, _buf, _index, _sz, - consume, - worker_i); + BufferNode* node = BufferNode::make_node_from_buffer(_buf, _index); + res = apply_closure_to_buffer(cl, node, _sz, consume, worker_i); if (res && consume) { _index = _sz; } @@ -126,25 +125,27 @@ } bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl, - void** buf, - size_t index, size_t sz, + BufferNode* node, size_t sz, bool consume, uint worker_i) { if (cl == NULL) return true; + void** buf = BufferNode::make_buffer_from_node(node); size_t limit = byte_index_to_index(sz); - for (size_t i = byte_index_to_index(index); i < limit; ++i) { + for (size_t i = byte_index_to_index(node->index()); i < limit; ++i) { jbyte* card_ptr = static_cast(buf[i]); - if (card_ptr != NULL) { - // Set the entry to null, so we don't do it again (via the test - // above) if we reconsider this buffer. + assert(card_ptr != NULL, "invariant"); + if (!cl->do_card_ptr(card_ptr, worker_i)) { if (consume) { - buf[i] = NULL; - } - if (!cl->do_card_ptr(card_ptr, worker_i)) { - return false; + size_t new_index = index_to_byte_index(i + 1); + assert(new_index <= sz, "invariant"); + node->set_index(new_index); } + return false; } } + if (consume) { + node->set_index(sz); + } return true; } @@ -188,14 +189,15 @@ t->dirty_card_queue().handle_zero_index(); } -bool DirtyCardQueueSet::mut_process_buffer(void** buf) { +bool DirtyCardQueueSet::mut_process_buffer(BufferNode* node) { guarantee(_free_ids != NULL, "must be"); // claim a par id uint worker_i = _free_ids->claim_par_id(); - bool b = DirtyCardQueue::apply_closure_to_buffer(_mut_process_closure, buf, 0, - _sz, true, worker_i); + bool b = DirtyCardQueue::apply_closure_to_buffer(_mut_process_closure, + node, _sz, + true, worker_i); if (b) { Atomic::inc(&_processed_buffers_mut); } @@ -239,49 +241,30 @@ if (nd == NULL) { return false; } else { - void** buf = BufferNode::make_buffer_from_node(nd); - size_t index = nd->index(); - if (DirtyCardQueue::apply_closure_to_buffer(cl, - buf, index, _sz, - true, worker_i)) { + if (DirtyCardQueue::apply_closure_to_buffer(cl, nd, _sz, true, worker_i)) { // Done with fully processed buffer. - deallocate_buffer(buf); + deallocate_buffer(nd); Atomic::inc(&_processed_buffers_rs_thread); return true; } else { // Return partially processed buffer to the queue. - enqueue_complete_buffer(buf, index); + enqueue_complete_buffer(nd); return false; } } } -void DirtyCardQueueSet::apply_closure_to_all_completed_buffers(CardTableEntryClosure* cl) { - BufferNode* nd = _completed_buffers_head; - while (nd != NULL) { - bool b = - DirtyCardQueue::apply_closure_to_buffer(cl, - BufferNode::make_buffer_from_node(nd), - 0, _sz, false); - guarantee(b, "Should not stop early."); - nd = nd->next(); - } -} - void DirtyCardQueueSet::par_apply_closure_to_all_completed_buffers(CardTableEntryClosure* cl) { BufferNode* nd = _cur_par_buffer_node; while (nd != NULL) { - BufferNode* next = (BufferNode*)nd->next(); - BufferNode* actual = (BufferNode*)Atomic::cmpxchg_ptr((void*)next, (volatile void*)&_cur_par_buffer_node, (void*)nd); + BufferNode* next = nd->next(); + void* actual = Atomic::cmpxchg_ptr(next, &_cur_par_buffer_node, nd); if (actual == nd) { - bool b = - DirtyCardQueue::apply_closure_to_buffer(cl, - BufferNode::make_buffer_from_node(actual), - 0, _sz, false); + bool b = DirtyCardQueue::apply_closure_to_buffer(cl, nd, _sz, false); guarantee(b, "Should not stop early."); nd = next; } else { - nd = actual; + nd = static_cast(actual); } } } @@ -304,7 +287,7 @@ while (buffers_to_delete != NULL) { BufferNode* nd = buffers_to_delete; buffers_to_delete = nd->next(); - deallocate_buffer(BufferNode::make_buffer_from_node(nd)); + deallocate_buffer(nd); } } @@ -320,6 +303,13 @@ shared_dirty_card_queue()->reset(); } +void DirtyCardQueueSet::concatenate_log(DirtyCardQueue& dcq) { + if (!dcq.is_empty()) { + enqueue_complete_buffer( + BufferNode::make_node_from_buffer(dcq.get_buf(), dcq.get_index())); + dcq.reinitialize(); + } +} void DirtyCardQueueSet::concatenate_logs() { // Iterate over all the threads, if we find a partial log add it to @@ -329,23 +319,9 @@ _max_completed_queue = max_jint; assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); for (JavaThread* t = Threads::first(); t; t = t->next()) { - DirtyCardQueue& dcq = t->dirty_card_queue(); - if (dcq.size() != 0) { - void** buf = dcq.get_buf(); - // We must NULL out the unused entries, then enqueue. - size_t limit = dcq.byte_index_to_index(dcq.get_index()); - for (size_t i = 0; i < limit; ++i) { - buf[i] = NULL; - } - enqueue_complete_buffer(dcq.get_buf(), dcq.get_index()); - dcq.reinitialize(); - } - } - if (_shared_dirty_card_queue.size() != 0) { - enqueue_complete_buffer(_shared_dirty_card_queue.get_buf(), - _shared_dirty_card_queue.get_index()); - _shared_dirty_card_queue.reinitialize(); + concatenate_log(t->dirty_card_queue()); } + concatenate_log(_shared_dirty_card_queue); // Restore the completed buffer queue limit. _max_completed_queue = save_max_completed_queue; } --- old/src/share/vm/gc/g1/dirtyCardQueue.hpp 2016-03-01 21:48:23.254926709 -0500 +++ new/src/share/vm/gc/g1/dirtyCardQueue.hpp 2016-03-01 21:48:23.154926213 -0500 @@ -52,21 +52,22 @@ // Process queue entries and release resources. void flush() { flush_impl(); } - // Apply the closure to all elements, and reset the index to make the - // buffer empty. If a closure application returns "false", return - // "false" immediately, halting the iteration. If "consume" is true, - // deletes processed entries from logs. + // Apply the closure to all active elements, from index to size. If + // all closure applications return true, then returns true. Stops + // processing after the first false closure application and returns + // false. If "consume" is true, index is updated to follow the last + // processed element. bool apply_closure(CardTableEntryClosure* cl, bool consume = true, uint worker_i = 0); - // Apply the closure to all elements of "buf", down to "index" - // (inclusive.) If returns "false", then a closure application returned - // "false", and we return immediately. If "consume" is true, entries are - // set to NULL as they are processed, so they will not be processed again - // later. + // Apply the closure to all active elements of "node", from it's + // index to sz. If all closure applications return true, then + // returns true. Stops processing after the first false closure + // application and returns false. If "consume" is true, the node's + // index is updated to follow the last processed element. static bool apply_closure_to_buffer(CardTableEntryClosure* cl, - void** buf, size_t index, size_t sz, + BufferNode* node, size_t sz, bool consume = true, uint worker_i = 0); void **get_buf() { return _buf;} @@ -95,7 +96,7 @@ DirtyCardQueue _shared_dirty_card_queue; // Override. - bool mut_process_buffer(void** buf); + bool mut_process_buffer(BufferNode* node); // Protected by the _cbl_mon. FreeIdSet* _free_ids; @@ -107,6 +108,9 @@ // Current buffer node used for parallel iteration. BufferNode* volatile _cur_par_buffer_node; + + void concatenate_log(DirtyCardQueue& dcq); + public: DirtyCardQueueSet(bool notify_when_complete = true); @@ -126,12 +130,13 @@ static void handle_zero_index_for_thread(JavaThread* t); // If there exists some completed buffer, pop it, then apply the - // specified closure to all its elements, nulling out those elements - // processed. If all elements are processed, returns "true". If no - // completed buffers exist, returns false. If a completed buffer exists, - // but is only partially completed before a "yield" happens, the - // partially completed buffer (with its processed elements set to NULL) - // is returned to the completed buffer set, and this call returns false. + // specified closure to its active elements. If all active elements + // are processed, returns "true". If no completed buffers exist, + // returns false. If a completed buffer exists, but is only + // partially completed before a "yield" happens, the partially + // completed buffer (with its index updated to exclude the processed + // eleemnts) is returned to the completed buffer set, and this call + // returns false. bool apply_closure_to_completed_buffer(CardTableEntryClosure* cl, uint worker_i, size_t stop_at, @@ -139,13 +144,10 @@ BufferNode* get_completed_buffer(size_t stop_at); - // Applies the current closure to all completed buffers, - // non-consumptively. - void apply_closure_to_all_completed_buffers(CardTableEntryClosure* cl); - void reset_for_par_iteration() { _cur_par_buffer_node = _completed_buffers_head; } // Applies the current closure to all completed buffers, non-consumptively. - // Parallel version. + // Can be used in parallel, all callers using the iteration state initialized + // by reset_for_par_iteration. void par_apply_closure_to_all_completed_buffers(CardTableEntryClosure* cl); DirtyCardQueue* shared_dirty_card_queue() { --- old/src/share/vm/gc/g1/ptrQueue.cpp 2016-03-01 21:48:23.806929447 -0500 +++ new/src/share/vm/gc/g1/ptrQueue.cpp 2016-03-01 21:48:23.710928971 -0500 @@ -43,16 +43,12 @@ void PtrQueue::flush_impl() { if (!_permanent && _buf != NULL) { + BufferNode* node = BufferNode::make_node_from_buffer(_buf, _index); if (_index == _sz) { // No work to do. - qset()->deallocate_buffer(_buf); + qset()->deallocate_buffer(node); } else { - // We must NULL out the unused entries, then enqueue. - size_t limit = byte_index_to_index(_index); - for (size_t i = 0; i < limit; ++i) { - _buf[i] = NULL; - } - qset()->enqueue_complete_buffer(_buf); + qset()->enqueue_complete_buffer(node); } _buf = NULL; _index = 0; @@ -74,7 +70,7 @@ assert(_index <= _sz, "Invariant."); } -void PtrQueue::locking_enqueue_completed_buffer(void** buf) { +void PtrQueue::locking_enqueue_completed_buffer(BufferNode* node) { assert(_lock->owned_by_self(), "Required."); // We have to unlock _lock (which may be Shared_DirtyCardQ_lock) before @@ -82,7 +78,7 @@ // have the same rank and we may get the "possible deadlock" message _lock->unlock(); - qset()->enqueue_complete_buffer(buf); + qset()->enqueue_complete_buffer(node); // We must relock only because the caller will unlock, for the normal // case. _lock->lock_without_safepoint_check(); @@ -157,10 +153,9 @@ return BufferNode::make_buffer_from_node(node); } -void PtrQueueSet::deallocate_buffer(void** buf) { +void PtrQueueSet::deallocate_buffer(BufferNode* node) { assert(_sz > 0, "Didn't set a buffer size."); MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); - BufferNode *node = BufferNode::make_node_from_buffer(buf); node->set_next(_fl_owner->_buf_free_list); _fl_owner->_buf_free_list = node; _fl_owner->_buf_free_list_sz++; @@ -211,10 +206,10 @@ // preventing the subsequent the multiple enqueue, and // install a newly allocated buffer below. - void** buf = _buf; // local pointer to completed buffer + BufferNode* node = BufferNode::make_node_from_buffer(_buf, _index); _buf = NULL; // clear shared _buf field - locking_enqueue_completed_buffer(buf); // enqueue completed buffer + locking_enqueue_completed_buffer(node); // enqueue completed buffer // While the current thread was enqueueing the buffer another thread // may have a allocated a new buffer and inserted it into this pointer @@ -224,9 +219,11 @@ if (_buf != NULL) return; } else { - if (qset()->process_or_enqueue_complete_buffer(_buf)) { + BufferNode* node = BufferNode::make_node_from_buffer(_buf, _index); + if (qset()->process_or_enqueue_complete_buffer(node)) { // Recycle the buffer. No allocation. - _sz = qset()->buffer_size(); + assert(_buf == BufferNode::make_buffer_from_node(node), "invariant"); + assert(_sz == qset()->buffer_size(), "invariant"); _index = _sz; return; } @@ -238,12 +235,12 @@ _index = _sz; } -bool PtrQueueSet::process_or_enqueue_complete_buffer(void** buf) { +bool PtrQueueSet::process_or_enqueue_complete_buffer(BufferNode* node) { if (Thread::current()->is_Java_thread()) { // We don't lock. It is fine to be epsilon-precise here. if (_max_completed_queue == 0 || _max_completed_queue > 0 && _n_completed_buffers >= _max_completed_queue + _completed_queue_padding) { - bool b = mut_process_buffer(buf); + bool b = mut_process_buffer(node); if (b) { // True here means that the buffer hasn't been deallocated and the caller may reuse it. return true; @@ -251,14 +248,12 @@ } } // The buffer will be enqueued. The caller will have to get a new one. - enqueue_complete_buffer(buf); + enqueue_complete_buffer(node); return false; } -void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index) { +void PtrQueueSet::enqueue_complete_buffer(BufferNode* cbn) { MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); - BufferNode* cbn = BufferNode::make_node_from_buffer(buf); - cbn->set_index(index); cbn->set_next(NULL); if (_completed_buffers_tail == NULL) { assert(_completed_buffers_head == NULL, "Well-formedness"); --- old/src/share/vm/gc/g1/ptrQueue.hpp 2016-03-01 21:48:24.358932184 -0500 +++ new/src/share/vm/gc/g1/ptrQueue.hpp 2016-03-01 21:48:24.254931668 -0500 @@ -33,6 +33,7 @@ // the addresses of modified old-generation objects. This type supports // this operation. +class BufferNode; class PtrQueueSet; class PtrQueue VALUE_OBJ_CLASS_SPEC { friend class VMStructs; @@ -104,7 +105,7 @@ // get into an infinite loop). virtual bool should_enqueue_buffer() { return true; } void handle_zero_index(); - void locking_enqueue_completed_buffer(void** buf); + void locking_enqueue_completed_buffer(BufferNode* node); void enqueue_known_active(void* ptr); @@ -136,6 +137,10 @@ return ind / sizeof(void*); } + static size_t index_to_byte_index(size_t ind) { + return ind * sizeof(void*); + } + // To support compiler. protected: @@ -186,10 +191,13 @@ // Free a BufferNode. static void deallocate(BufferNode* node); - // Return the BufferNode containing the buffer. - static BufferNode* make_node_from_buffer(void** buffer) { - return reinterpret_cast( - reinterpret_cast(buffer) - buffer_offset()); + // Return the BufferNode containing the buffer, after setting its index. + static BufferNode* make_node_from_buffer(void** buffer, size_t index) { + BufferNode* node = + reinterpret_cast( + reinterpret_cast(buffer) - buffer_offset()); + node->set_index(index); + return node; } // Return the buffer for node. @@ -243,7 +251,7 @@ // A mutator thread does the the work of processing a buffer. // Returns "true" iff the work is complete (and the buffer may be // deallocated). - virtual bool mut_process_buffer(void** buf) { + virtual bool mut_process_buffer(BufferNode* node) { ShouldNotReachHere(); return false; } @@ -267,13 +275,13 @@ // Return an empty buffer to the free list. The "buf" argument is // required to be a pointer to the head of an array of length "_sz". - void deallocate_buffer(void** buf); + void deallocate_buffer(BufferNode* node); // Declares that "buf" is a complete buffer. - void enqueue_complete_buffer(void** buf, size_t index = 0); + void enqueue_complete_buffer(BufferNode* node); // To be invoked by the mutator. - bool process_or_enqueue_complete_buffer(void** buf); + bool process_or_enqueue_complete_buffer(BufferNode* node); bool completed_buffers_exist_dirty() { return _n_completed_buffers > 0; --- old/src/share/vm/gc/g1/satbMarkQueue.cpp 2016-03-01 21:48:24.938935060 -0500 +++ new/src/share/vm/gc/g1/satbMarkQueue.cpp 2016-03-01 21:48:24.842934584 -0500 @@ -100,6 +100,10 @@ return true; } +inline bool retain_entry(const void* entry, G1CollectedHeap* heap) { + return requires_marking(entry, heap) && !heap->isMarkedNext((oop)entry); +} + // This method removes entries from a SATB buffer that will not be // useful to the concurrent marking threads. Entries are retained if // they require marking and are not already marked. Retained entries @@ -114,43 +118,23 @@ return; } - // Used for sanity checking at the end of the loop. - DEBUG_ONLY(size_t entries = 0; size_t retained = 0;) - assert(_index <= _sz, "invariant"); - void** limit = &buf[byte_index_to_index(_index)]; - void** src = &buf[byte_index_to_index(_sz)]; - void** dst = src; - - while (limit < src) { - DEBUG_ONLY(entries += 1;) - --src; + + // Two-fingered compaction toward the end. + void** src = &buf[byte_index_to_index(_index)]; + void** dst = &buf[byte_index_to_index(_sz)]; + for ( ; src < dst; ++src) { + // Search low to high for an entry to keep. void* entry = *src; - // NULL the entry so that unused parts of the buffer contain NULLs - // at the end. If we are going to retain it we will copy it to its - // final place. If we have retained all entries we have visited so - // far, we'll just end up copying it to the same place. - *src = NULL; - - if (requires_marking(entry, g1h) && !g1h->isMarkedNext((oop)entry)) { - --dst; - assert(*dst == NULL, "filtering destination should be clear"); - *dst = entry; - DEBUG_ONLY(retained += 1;); + if (retain_entry(entry, g1h)) { + // Found keeper. Search high to low for an entry to discard. + while ((src < --dst) && retain_entry(*dst, g1h)) { } + if (src >= dst) break; // Done if no discard found. + *dst = entry; // Replace discard with keeper. } } - size_t new_index = pointer_delta(dst, buf, 1); - -#ifdef ASSERT - size_t entries_calc = (_sz - _index) / sizeof(void*); - assert(entries == entries_calc, "the number of entries we counted " - "should match the number of entries we calculated"); - size_t retained_calc = (_sz - new_index) / sizeof(void*); - assert(retained == retained_calc, "the number of retained entries we counted " - "should match the number of retained entries we calculated"); -#endif // ASSERT - - _index = new_index; + assert(src == dst, "invariant"); + _index = pointer_delta(dst, buf, 1); } // This method will first apply the above filtering to the buffer. If @@ -286,19 +270,11 @@ } if (nd != NULL) { void **buf = BufferNode::make_buffer_from_node(nd); - // Skip over NULL entries at beginning (e.g. push end) of buffer. - // Filtering can result in non-full completed buffers; see - // should_enqueue_buffer. - assert(_sz % sizeof(void*) == 0, "invariant"); - size_t limit = SATBMarkQueue::byte_index_to_index(_sz); - for (size_t i = 0; i < limit; ++i) { - if (buf[i] != NULL) { - // Found the end of the block of NULLs; process the remainder. - cl->do_buffer(buf + i, limit - i); - break; - } - } - deallocate_buffer(buf); + size_t index = SATBMarkQueue::byte_index_to_index(nd->index()); + size_t size = SATBMarkQueue::byte_index_to_index(_sz); + assert(index <= size, "invariant"); + cl->do_buffer(buf + index, size - index); + deallocate_buffer(nd); return true; } else { return false; @@ -355,7 +331,7 @@ while (buffers_to_delete != NULL) { BufferNode* nd = buffers_to_delete; buffers_to_delete = nd->next(); - deallocate_buffer(BufferNode::make_buffer_from_node(nd)); + deallocate_buffer(nd); } assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); // So we can safely manipulate these queues. --- old/src/share/vm/gc/g1/satbMarkQueue.hpp 2016-03-01 21:48:25.486937777 -0500 +++ new/src/share/vm/gc/g1/satbMarkQueue.hpp 2016-03-01 21:48:25.390937301 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -115,9 +115,8 @@ // If there exists some completed buffer, pop and process it, and // return true. Otherwise return false. Processing a buffer - // consists of applying the closure to the buffer range starting - // with the first non-NULL entry to the end of the buffer; the - // leading entries may be NULL due to filtering. + // consists of applying the closure to the active range of the + // buffer; the leading entries may be excluded due to filtering. bool apply_closure_to_completed_buffer(SATBBufferClosure* cl); #ifndef PRODUCT