src/share/vm/utilities/taskqueue.hpp

Print this page
rev 3897 : Fix memory ordering in taskqueue for platforms with weak memory ordering (PPC)

Accesses to fields _bottom and _age were not properly ordered for
platforms with weak memory ordering. Volatile is not sufficient to
enforce this, because it depends on what the compiler assumes to be
necessary for volatile variables.

Pull getter/setter routines to TaskQueueSuper and use methods from
OrderAccess to access the fields in _age. Change the code to always
use the getter/setter methods.
Relax constraints for accesses to locals oldAge and newAge.

The OrderAccess routines used do simple load/stores on x86_64.

*** 136,189 **** class TaskQueueSuper: public CHeapObj<F> { protected: // Internal type for indexing the queue; also used for the tag. typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; // The first free element after the last one pushed (mod N). volatile uint _bottom; enum { MOD_N_MASK = N - 1 }; class Age { public: Age(size_t data = 0) { _data = data; } Age(const Age& age) { _data = age._data; } Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } ! Age get() const volatile { return _data; } ! void set(Age age) volatile { _data = age._data; } ! ! idx_t top() const volatile { return _fields._top; } ! idx_t tag() const volatile { return _fields._tag; } // Increment top; if it wraps, increment tag also. void increment() { _fields._top = increment_index(_fields._top); if (_fields._top == 0) ++_fields._tag; } - Age cmpxchg(const Age new_age, const Age old_age) volatile { - return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, - (volatile intptr_t *)&_data, - (intptr_t)old_age._data); - } - bool operator ==(const Age& other) const { return _data == other._data; } ! private: struct fields { idx_t _top; idx_t _tag; }; union { size_t _data; fields _fields; }; }; volatile Age _age; // These both operate mod N. static uint increment_index(uint ind) { return (ind + 1) & MOD_N_MASK; } static uint decrement_index(uint ind) { --- 136,216 ---- class TaskQueueSuper: public CHeapObj<F> { protected: // Internal type for indexing the queue; also used for the tag. typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; + private: // The first free element after the last one pushed (mod N). + // We keep _bottom private and DO NOT USE IT DIRECTLY. volatile uint _bottom; + protected: + // Access to _bottom must be ordered. Use OrderAccess. + inline uint get_bottom() const { + return OrderAccess::load_acquire((volatile juint*)&_bottom); + } + + inline void set_bottom(uint new_bottom) { + OrderAccess::release_store(&_bottom, new_bottom); + } + enum { MOD_N_MASK = N - 1 }; + // Simple field access here, so no OrderAccess and no volatiles. class Age { public: Age(size_t data = 0) { _data = data; } Age(const Age& age) { _data = age._data; } Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } ! idx_t top() const { return _fields._top; } ! idx_t tag() const { return _fields._tag; } // Increment top; if it wraps, increment tag also. void increment() { _fields._top = increment_index(_fields._top); if (_fields._top == 0) ++_fields._tag; } bool operator ==(const Age& other) const { return _data == other._data; } ! public: struct fields { idx_t _top; idx_t _tag; }; union { size_t _data; fields _fields; }; }; + + private: + // Keep _age private and DO NOT USE IT DIRECTLY. volatile Age _age; + protected: + inline idx_t get_age_top() const { + return OrderAccess::load_acquire((volatile idx_t*) &(_age._fields._top)); + } + + inline Age get_age() { + size_t res = OrderAccess::load_ptr_acquire((volatile intptr_t*) &_age); + return *(Age*)(&res); + } + + inline void set_age(Age a) { + OrderAccess::release_store_ptr((volatile intptr_t*) &_age, *(size_t*)(&a)); + } + + Age cmpxchg_age(const Age new_age, const Age old_age) volatile { + return (Age)(size_t)Atomic::cmpxchg_ptr((intptr_t)new_age._data, + (volatile intptr_t*) &(_age._data), + (intptr_t)old_age._data); + } + // These both operate mod N. static uint increment_index(uint ind) { return (ind + 1) & MOD_N_MASK; } static uint decrement_index(uint ind) {
*** 218,244 **** public: TaskQueueSuper() : _bottom(0), _age() {} // Return true if the TaskQueue contains/does not contain any tasks. ! bool peek() const { return _bottom != _age.top(); } bool is_empty() const { return size() == 0; } // Return an estimate of the number of elements in the queue. // The "careful" version admits the possibility of pop_local/pop_global // races. uint size() const { ! return size(_bottom, _age.top()); } uint dirty_size() const { ! return dirty_size(_bottom, _age.top()); } void set_empty() { ! _bottom = 0; ! _age.set(0); } // Maximum number of elements allowed in the queue. This is two less // than the actual queue size, for somewhat complicated reasons. uint max_elems() const { return N - 2; } --- 245,274 ---- public: TaskQueueSuper() : _bottom(0), _age() {} // Return true if the TaskQueue contains/does not contain any tasks. ! // Use getters/setters with OrderAccess when accessing _bottom and _age. ! bool peek() { ! return get_bottom() != get_age_top(); ! } bool is_empty() const { return size() == 0; } // Return an estimate of the number of elements in the queue. // The "careful" version admits the possibility of pop_local/pop_global // races. uint size() const { ! return size(get_bottom(), get_age_top()); } uint dirty_size() const { ! return dirty_size(get_bottom(), get_age_top()); } void set_empty() { ! set_bottom(0); ! set_age(0); } // Maximum number of elements allowed in the queue. This is two less // than the actual queue size, for somewhat complicated reasons. uint max_elems() const { return N - 2; }
*** 255,269 **** class GenericTaskQueue: public TaskQueueSuper<N, F> { protected: typedef typename TaskQueueSuper<N, F>::Age Age; typedef typename TaskQueueSuper<N, F>::idx_t idx_t; - using TaskQueueSuper<N, F>::_bottom; - using TaskQueueSuper<N, F>::_age; using TaskQueueSuper<N, F>::increment_index; using TaskQueueSuper<N, F>::decrement_index; using TaskQueueSuper<N, F>::dirty_size; public: using TaskQueueSuper<N, F>::max_elems; using TaskQueueSuper<N, F>::size; --- 285,299 ---- class GenericTaskQueue: public TaskQueueSuper<N, F> { protected: typedef typename TaskQueueSuper<N, F>::Age Age; typedef typename TaskQueueSuper<N, F>::idx_t idx_t; using TaskQueueSuper<N, F>::increment_index; using TaskQueueSuper<N, F>::decrement_index; using TaskQueueSuper<N, F>::dirty_size; + using TaskQueueSuper<N, F>::get_age_top; + using TaskQueueSuper<N, F>::get_age; public: using TaskQueueSuper<N, F>::max_elems; using TaskQueueSuper<N, F>::size;
*** 319,329 **** template<class E, MEMFLAGS F, unsigned int N> void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) { // tty->print_cr("START OopTaskQueue::oops_do"); uint iters = size(); ! uint index = _bottom; for (uint i = 0; i < iters; ++i) { index = decrement_index(index); // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, // index, &_elems[index], _elems[index]); E* t = (E*)&_elems[index]; // cast away volatility --- 349,359 ---- template<class E, MEMFLAGS F, unsigned int N> void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) { // tty->print_cr("START OopTaskQueue::oops_do"); uint iters = size(); ! uint index = this->get_bottom(); for (uint i = 0; i < iters; ++i) { index = decrement_index(index); // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, // index, &_elems[index], _elems[index]); E* t = (E*)&_elems[index]; // cast away volatility
*** 336,349 **** template<class E, MEMFLAGS F, unsigned int N> bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) { if (dirty_n_elems == N - 1) { // Actually means 0, so do the push. ! uint localBot = _bottom; // g++ complains if the volatile result of the assignment is unused. const_cast<E&>(_elems[localBot] = t); ! OrderAccess::release_store(&_bottom, increment_index(localBot)); TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } return false; } --- 366,379 ---- template<class E, MEMFLAGS F, unsigned int N> bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) { if (dirty_n_elems == N - 1) { // Actually means 0, so do the push. ! uint localBot = this->get_bottom(); // g++ complains if the volatile result of the assignment is unused. const_cast<E&>(_elems[localBot] = t); ! set_bottom(increment_index(localBot)); TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } return false; }
*** 369,407 **** // Perhaps a competing pop_global has already incremented "top", in which // case it wins the element. if (localBot == oldAge.top()) { // No competing pop_global has yet incremented "top"; we'll try to // install new_age, thus claiming the element. ! Age tempAge = _age.cmpxchg(newAge, oldAge); if (tempAge == oldAge) { // We win. ! assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); return true; } } // We lose; a completing pop_global gets the element. But the queue is empty // and top is greater than bottom. Fix this representation of the empty queue // to become the canonical one. ! _age.set(newAge); ! assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); return false; } template<class E, MEMFLAGS F, unsigned int N> bool GenericTaskQueue<E, F, N>::pop_global(E& t) { ! Age oldAge = _age.get(); ! uint localBot = _bottom; uint n_elems = size(localBot, oldAge.top()); if (n_elems == 0) { return false; } const_cast<E&>(t = _elems[oldAge.top()]); Age newAge(oldAge); newAge.increment(); ! Age resAge = _age.cmpxchg(newAge, oldAge); // Note that using "_bottom" here might fail, since a pop_local might // have decremented it. assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); return resAge == oldAge; --- 399,437 ---- // Perhaps a competing pop_global has already incremented "top", in which // case it wins the element. if (localBot == oldAge.top()) { // No competing pop_global has yet incremented "top"; we'll try to // install new_age, thus claiming the element. ! Age tempAge = cmpxchg_age(newAge, oldAge); if (tempAge == oldAge) { // We win. ! assert(dirty_size(localBot, get_age_top()) != N - 1, "sanity"); TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); return true; } } // We lose; a completing pop_global gets the element. But the queue is empty // and top is greater than bottom. Fix this representation of the empty queue // to become the canonical one. ! set_age(newAge); ! assert(dirty_size(localBot, get_age_top()) != N - 1, "sanity"); return false; } template<class E, MEMFLAGS F, unsigned int N> bool GenericTaskQueue<E, F, N>::pop_global(E& t) { ! Age oldAge = get_age(); ! uint localBot = this->get_bottom(); uint n_elems = size(localBot, oldAge.top()); if (n_elems == 0) { return false; } const_cast<E&>(t = _elems[oldAge.top()]); Age newAge(oldAge); newAge.increment(); ! Age resAge = cmpxchg_age(newAge, oldAge); // Note that using "_bottom" here might fail, since a pop_local might // have decremented it. assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); return resAge == oldAge;
*** 631,685 **** #endif }; template<class E, MEMFLAGS F, unsigned int N> inline bool GenericTaskQueue<E, F, N>::push(E t) { ! uint localBot = _bottom; ! assert((localBot >= 0) && (localBot < N), "_bottom out of range."); ! idx_t top = _age.top(); uint dirty_n_elems = dirty_size(localBot, top); assert(dirty_n_elems < N, "n_elems out of range."); if (dirty_n_elems < max_elems()) { // g++ complains if the volatile result of the assignment is unused. const_cast<E&>(_elems[localBot] = t); ! OrderAccess::release_store(&_bottom, increment_index(localBot)); TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } else { return push_slow(t, dirty_n_elems); } } template<class E, MEMFLAGS F, unsigned int N> inline bool GenericTaskQueue<E, F, N>::pop_local(E& t) { ! uint localBot = _bottom; // This value cannot be N-1. That can only occur as a result of // the assignment to bottom in this method. If it does, this method // resets the size to 0 before the next call (which is sequential, // since this is pop_local.) ! uint dirty_n_elems = dirty_size(localBot, _age.top()); assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); if (dirty_n_elems == 0) return false; localBot = decrement_index(localBot); ! _bottom = localBot; // This is necessary to prevent any read below from being reordered // before the store just above. OrderAccess::fence(); const_cast<E&>(t = _elems[localBot]); // This is a second read of "age"; the "size()" above is the first. // If there's still at least one element in the queue, based on the // "_bottom" and "age" we've read, then there can be no interference with // a "pop_global" operation, and we're done. ! idx_t tp = _age.top(); // XXX if (size(localBot, tp) > 0) { assert(dirty_size(localBot, tp) != N - 1, "sanity"); TASKQUEUE_STATS_ONLY(stats.record_pop()); return true; } else { // Otherwise, the queue contained exactly one element; we take the slow // path. ! return pop_local_slow(localBot, _age.get()); } } typedef GenericTaskQueue<oop, mtGC> OopTaskQueue; typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet; --- 661,715 ---- #endif }; template<class E, MEMFLAGS F, unsigned int N> inline bool GenericTaskQueue<E, F, N>::push(E t) { ! uint localBot = this->get_bottom(); ! assert(localBot < N, "_bottom out of range."); ! idx_t top = get_age_top(); uint dirty_n_elems = dirty_size(localBot, top); assert(dirty_n_elems < N, "n_elems out of range."); if (dirty_n_elems < max_elems()) { // g++ complains if the volatile result of the assignment is unused. const_cast<E&>(_elems[localBot] = t); ! set_bottom(increment_index(localBot)); TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } else { return push_slow(t, dirty_n_elems); } } template<class E, MEMFLAGS F, unsigned int N> inline bool GenericTaskQueue<E, F, N>::pop_local(E& t) { ! uint localBot = this->get_bottom(); // This value cannot be N-1. That can only occur as a result of // the assignment to bottom in this method. If it does, this method // resets the size to 0 before the next call (which is sequential, // since this is pop_local.) ! uint dirty_n_elems = dirty_size(localBot, get_age_top()); assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); if (dirty_n_elems == 0) return false; localBot = decrement_index(localBot); ! this->set_bottom(localBot); // This is necessary to prevent any read below from being reordered // before the store just above. OrderAccess::fence(); const_cast<E&>(t = _elems[localBot]); // This is a second read of "age"; the "size()" above is the first. // If there's still at least one element in the queue, based on the // "_bottom" and "age" we've read, then there can be no interference with // a "pop_global" operation, and we're done. ! idx_t tp = get_age_top(); // XXX if (size(localBot, tp) > 0) { assert(dirty_size(localBot, tp) != N - 1, "sanity"); TASKQUEUE_STATS_ONLY(stats.record_pop()); return true; } else { // Otherwise, the queue contained exactly one element; we take the slow // path. ! return pop_local_slow(localBot, get_age()); } } typedef GenericTaskQueue<oop, mtGC> OopTaskQueue; typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet;