src/share/vm/utilities/taskqueue.hpp

Print this page
rev 4231 : Fix memory ordering in taskqueue for platforms with weak memory ordering (PPC).

Add fence as described in "Correct and Efficient Work-Stealing for Weak Memory Models;
Le, Pop, Cohen, Nardelli; PPoPP 13".  With four or more GC threads bottom can be
older than age in get_bottom().


 373     // install new_age, thus claiming the element.
 374     Age tempAge = _age.cmpxchg(newAge, oldAge);
 375     if (tempAge == oldAge) {
 376       // We win.
 377       assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
 378       TASKQUEUE_STATS_ONLY(stats.record_pop_slow());
 379       return true;
 380     }
 381   }
 382   // We lose; a completing pop_global gets the element.  But the queue is empty
 383   // and top is greater than bottom.  Fix this representation of the empty queue
 384   // to become the canonical one.
 385   _age.set(newAge);
 386   assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
 387   return false;
 388 }
 389 
 390 template<class E, MEMFLAGS F, unsigned int N>
 391 bool GenericTaskQueue<E, F, N>::pop_global(E& t) {
 392   Age oldAge = _age.get();
 393   uint localBot = _bottom;







 394   uint n_elems = size(localBot, oldAge.top());
 395   if (n_elems == 0) {
 396     return false;
 397   }
 398 
 399   const_cast<E&>(t = _elems[oldAge.top()]);
 400   Age newAge(oldAge);
 401   newAge.increment();
 402   Age resAge = _age.cmpxchg(newAge, oldAge);
 403 
 404   // Note that using "_bottom" here might fail, since a pop_local might
 405   // have decremented it.
 406   assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
 407   return resAge == oldAge;
 408 }
 409 
 410 template<class E, MEMFLAGS F, unsigned int N>
 411 GenericTaskQueue<E, F, N>::~GenericTaskQueue() {
 412   FREE_C_HEAP_ARRAY(E, _elems, F);
 413 }


 617   // Reset the terminator, so that it may be reused again.
 618   // The caller is responsible for ensuring that this is done
 619   // in an MT-safe manner, once the previous round of use of
 620   // the terminator is finished.
 621   void reset_for_reuse();
 622   // Same as above but the number of parallel threads is set to the
 623   // given number.
 624   void reset_for_reuse(int n_threads);
 625 
 626 #ifdef TRACESPINNING
 627   static uint total_yields() { return _total_yields; }
 628   static uint total_spins() { return _total_spins; }
 629   static uint total_peeks() { return _total_peeks; }
 630   static void print_termination_counts();
 631 #endif
 632 };
 633 
 634 template<class E, MEMFLAGS F, unsigned int N> inline bool
 635 GenericTaskQueue<E, F, N>::push(E t) {
 636   uint localBot = _bottom;
 637   assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
 638   idx_t top = _age.top();
 639   uint dirty_n_elems = dirty_size(localBot, top);
 640   assert(dirty_n_elems < N, "n_elems out of range.");
 641   if (dirty_n_elems < max_elems()) {
 642     // g++ complains if the volatile result of the assignment is unused.
 643     const_cast<E&>(_elems[localBot] = t);
 644     OrderAccess::release_store(&_bottom, increment_index(localBot));
 645     TASKQUEUE_STATS_ONLY(stats.record_push());
 646     return true;
 647   } else {
 648     return push_slow(t, dirty_n_elems);
 649   }
 650 }
 651 
 652 template<class E, MEMFLAGS F, unsigned int N> inline bool
 653 GenericTaskQueue<E, F, N>::pop_local(E& t) {
 654   uint localBot = _bottom;
 655   // This value cannot be N-1.  That can only occur as a result of
 656   // the assignment to bottom in this method.  If it does, this method
 657   // resets the size to 0 before the next call (which is sequential,




 373     // install new_age, thus claiming the element.
 374     Age tempAge = _age.cmpxchg(newAge, oldAge);
 375     if (tempAge == oldAge) {
 376       // We win.
 377       assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
 378       TASKQUEUE_STATS_ONLY(stats.record_pop_slow());
 379       return true;
 380     }
 381   }
 382   // We lose; a completing pop_global gets the element.  But the queue is empty
 383   // and top is greater than bottom.  Fix this representation of the empty queue
 384   // to become the canonical one.
 385   _age.set(newAge);
 386   assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
 387   return false;
 388 }
 389 
 390 template<class E, MEMFLAGS F, unsigned int N>
 391 bool GenericTaskQueue<E, F, N>::pop_global(E& t) {
 392   Age oldAge = _age.get();
 393   // Architectures with weak memory model require a fence here. The
 394   // fence has a cumulative effect on getting age and getting bottom.
 395   // This way it is guaranteed that bottom is not older than age,
 396   // what is crucial for the correctness of the algorithm.
 397 #if (defined ARM || defined IA64 || defined PPC64)
 398   OrderAccess::fence();
 399 #endif
 400   uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom);
 401   uint n_elems = size(localBot, oldAge.top());
 402   if (n_elems == 0) {
 403     return false;
 404   }
 405 
 406   const_cast<E&>(t = _elems[oldAge.top()]);
 407   Age newAge(oldAge);
 408   newAge.increment();
 409   Age resAge = _age.cmpxchg(newAge, oldAge);
 410 
 411   // Note that using "_bottom" here might fail, since a pop_local might
 412   // have decremented it.
 413   assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
 414   return resAge == oldAge;
 415 }
 416 
 417 template<class E, MEMFLAGS F, unsigned int N>
 418 GenericTaskQueue<E, F, N>::~GenericTaskQueue() {
 419   FREE_C_HEAP_ARRAY(E, _elems, F);
 420 }


 624   // Reset the terminator, so that it may be reused again.
 625   // The caller is responsible for ensuring that this is done
 626   // in an MT-safe manner, once the previous round of use of
 627   // the terminator is finished.
 628   void reset_for_reuse();
 629   // Same as above but the number of parallel threads is set to the
 630   // given number.
 631   void reset_for_reuse(int n_threads);
 632 
 633 #ifdef TRACESPINNING
 634   static uint total_yields() { return _total_yields; }
 635   static uint total_spins() { return _total_spins; }
 636   static uint total_peeks() { return _total_peeks; }
 637   static void print_termination_counts();
 638 #endif
 639 };
 640 
 641 template<class E, MEMFLAGS F, unsigned int N> inline bool
 642 GenericTaskQueue<E, F, N>::push(E t) {
 643   uint localBot = _bottom;
 644   assert(localBot < N, "_bottom out of range.");
 645   idx_t top = _age.top();
 646   uint dirty_n_elems = dirty_size(localBot, top);
 647   assert(dirty_n_elems < N, "n_elems out of range.");
 648   if (dirty_n_elems < max_elems()) {
 649     // g++ complains if the volatile result of the assignment is unused.
 650     const_cast<E&>(_elems[localBot] = t);
 651     OrderAccess::release_store(&_bottom, increment_index(localBot));
 652     TASKQUEUE_STATS_ONLY(stats.record_push());
 653     return true;
 654   } else {
 655     return push_slow(t, dirty_n_elems);
 656   }
 657 }
 658 
 659 template<class E, MEMFLAGS F, unsigned int N> inline bool
 660 GenericTaskQueue<E, F, N>::pop_local(E& t) {
 661   uint localBot = _bottom;
 662   // This value cannot be N-1.  That can only occur as a result of
 663   // the assignment to bottom in this method.  If it does, this method
 664   // resets the size to 0 before the next call (which is sequential,