1 /*
   2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_UTILITIES_WORKGROUP_HPP
  26 #define SHARE_VM_UTILITIES_WORKGROUP_HPP
  27 
  28 #include "runtime/thread.inline.hpp"
  29 #include "utilities/taskqueue.hpp"
  30 
  31 // Task class hierarchy:
  32 //   AbstractGangTask
  33 //     AbstractGangTaskWOopQueues
  34 //
  35 // Gang/Group class hierarchy:
  36 //   AbstractWorkGang
  37 //     WorkGang
  38 //       FlexibleWorkGang
  39 //         YieldingFlexibleWorkGang (defined in another file)
  40 //
  41 // Worker class hierarchy:
  42 //   GangWorker (subclass of WorkerThread)
  43 //     YieldingFlexibleGangWorker   (defined in another file)
  44 
  45 // Forward declarations of classes defined here
  46 
  47 class WorkGang;
  48 class GangWorker;
  49 class YieldingFlexibleGangWorker;
  50 class YieldingFlexibleGangTask;
  51 class WorkData;
  52 class AbstractWorkGang;
  53 
  54 // An abstract task to be worked on by a gang.
  55 // You subclass this to supply your own work() method
  56 class AbstractGangTask VALUE_OBJ_CLASS_SPEC {
  57 public:
  58   // The abstract work method.
  59   // The argument tells you which member of the gang you are.
  60   virtual void work(uint worker_id) = 0;
  61 
  62   // This method configures the task for proper termination.
  63   // Some tasks do not have any requirements on termination
  64   // and may inherit this method that does nothing.  Some
  65   // tasks do some coordination on termination and override
  66   // this method to implement that coordination.
  67   virtual void set_for_termination(int active_workers) {};
  68 
  69   // Debugging accessor for the name.
  70   const char* name() const PRODUCT_RETURN_(return NULL;);
  71   int counter() { return _counter; }
  72   void set_counter(int value) { _counter = value; }
  73   int *address_of_counter() { return &_counter; }
  74 
  75   // RTTI
  76   NOT_PRODUCT(virtual bool is_YieldingFlexibleGang_task() const {
  77     return false;
  78   })
  79 
  80 private:
  81   NOT_PRODUCT(const char* _name;)
  82   // ??? Should a task have a priority associated with it?
  83   // ??? Or can the run method adjust priority as needed?
  84   int _counter;
  85 
  86 protected:
  87   // Constructor and desctructor: only construct subclasses.
  88   AbstractGangTask(const char* name)
  89   {
  90     NOT_PRODUCT(_name = name);
  91     _counter = 0;
  92   }
  93   ~AbstractGangTask() { }
  94 
  95 public:
  96 };
  97 
  98 class AbstractGangTaskWOopQueues : public AbstractGangTask {
  99   OopTaskQueueSet*       _queues;
 100   ParallelTaskTerminator _terminator;
 101  public:
 102   AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues) :
 103     AbstractGangTask(name), _queues(queues), _terminator(0, _queues) {}
 104   ParallelTaskTerminator* terminator() { return &_terminator; }
 105   virtual void set_for_termination(int active_workers) {
 106     terminator()->reset_for_reuse(active_workers);
 107   }
 108   OopTaskQueueSet* queues() { return _queues; }
 109 };
 110 
 111 
 112 // Class AbstractWorkGang:
 113 // An abstract class representing a gang of workers.
 114 // You subclass this to supply an implementation of run_task().
 115 class AbstractWorkGang: public CHeapObj<mtInternal> {
 116   // Here's the public interface to this class.
 117 public:
 118   // Constructor and destructor.
 119   AbstractWorkGang(const char* name, bool are_GC_task_threads,
 120                    bool are_ConcurrentGC_threads);
 121   ~AbstractWorkGang();
 122   // Run a task, returns when the task is done (or terminated).
 123   virtual void run_task(AbstractGangTask* task) = 0;
 124   // Stop and terminate all workers.
 125   virtual void stop();
 126   // Return true if more workers should be applied to the task.
 127   virtual bool needs_more_workers() const { return true; }
 128 public:
 129   // Debugging.
 130   const char* name() const;
 131 protected:
 132   // Initialize only instance data.
 133   const bool _are_GC_task_threads;
 134   const bool _are_ConcurrentGC_threads;
 135   // Printing support.
 136   const char* _name;
 137   // The monitor which protects these data,
 138   // and notifies of changes in it.
 139   Monitor*  _monitor;
 140   // The count of the number of workers in the gang.
 141   uint _total_workers;
 142   // Whether the workers should terminate.
 143   bool _terminate;
 144   // The array of worker threads for this gang.
 145   // This is only needed for cleaning up.
 146   GangWorker** _gang_workers;
 147   // The task for this gang.
 148   AbstractGangTask* _task;
 149   // A sequence number for the current task.
 150   int _sequence_number;
 151   // The number of started workers.
 152   uint _started_workers;
 153   // The number of finished workers.
 154   uint _finished_workers;
 155 public:
 156   // Accessors for fields
 157   Monitor* monitor() const {
 158     return _monitor;
 159   }
 160   uint total_workers() const {
 161     return _total_workers;
 162   }
 163   virtual uint active_workers() const {
 164     return _total_workers;
 165   }
 166   bool terminate() const {
 167     return _terminate;
 168   }
 169   GangWorker** gang_workers() const {
 170     return _gang_workers;
 171   }
 172   AbstractGangTask* task() const {
 173     return _task;
 174   }
 175   int sequence_number() const {
 176     return _sequence_number;
 177   }
 178   uint started_workers() const {
 179     return _started_workers;
 180   }
 181   uint finished_workers() const {
 182     return _finished_workers;
 183   }
 184   bool are_GC_task_threads() const {
 185     return _are_GC_task_threads;
 186   }
 187   bool are_ConcurrentGC_threads() const {
 188     return _are_ConcurrentGC_threads;
 189   }
 190   // Predicates.
 191   bool is_idle() const {
 192     return (task() == NULL);
 193   }
 194   // Return the Ith gang worker.
 195   GangWorker* gang_worker(uint i) const;
 196 
 197   void threads_do(ThreadClosure* tc) const;
 198 
 199   // Printing
 200   void print_worker_threads_on(outputStream *st) const;
 201   void print_worker_threads() const {
 202     print_worker_threads_on(tty);
 203   }
 204 
 205 protected:
 206   friend class GangWorker;
 207   friend class YieldingFlexibleGangWorker;
 208   // Note activation and deactivation of workers.
 209   // These methods should only be called with the mutex held.
 210   void internal_worker_poll(WorkData* data) const;
 211   void internal_note_start();
 212   void internal_note_finish();
 213 };
 214 
 215 class WorkData: public StackObj {
 216   // This would be a struct, but I want accessor methods.
 217 private:
 218   bool              _terminate;
 219   AbstractGangTask* _task;
 220   int               _sequence_number;
 221 public:
 222   // Constructor and destructor
 223   WorkData() {
 224     _terminate       = false;
 225     _task            = NULL;
 226     _sequence_number = 0;
 227   }
 228   ~WorkData() {
 229   }
 230   // Accessors and modifiers
 231   bool terminate()                       const { return _terminate;  }
 232   void set_terminate(bool value)               { _terminate = value; }
 233   AbstractGangTask* task()               const { return _task; }
 234   void set_task(AbstractGangTask* value)       { _task = value; }
 235   int sequence_number()                  const { return _sequence_number; }
 236   void set_sequence_number(int value)          { _sequence_number = value; }
 237 
 238   YieldingFlexibleGangTask* yf_task()    const {
 239     return (YieldingFlexibleGangTask*)_task;
 240   }
 241 };
 242 
 243 // Class WorkGang:
 244 class WorkGang: public AbstractWorkGang {
 245 public:
 246   // Constructor
 247   WorkGang(const char* name, uint workers,
 248            bool are_GC_task_threads, bool are_ConcurrentGC_threads);
 249   // Run a task, returns when the task is done (or terminated).
 250   virtual void run_task(AbstractGangTask* task);
 251   void run_task(AbstractGangTask* task, uint no_of_parallel_workers);
 252   // Allocate a worker and return a pointer to it.
 253   virtual GangWorker* allocate_worker(uint which);
 254   // Initialize workers in the gang.  Return true if initialization
 255   // succeeded. The type of the worker can be overridden in a derived
 256   // class with the appropriate implementation of allocate_worker().
 257   bool initialize_workers();
 258 };
 259 
 260 // Class GangWorker:
 261 //   Several instances of this class run in parallel as workers for a gang.
 262 class GangWorker: public WorkerThread {
 263 public:
 264   // Constructors and destructor.
 265   GangWorker(AbstractWorkGang* gang, uint id);
 266 
 267   // The only real method: run a task for the gang.
 268   virtual void run();
 269   // Predicate for Thread
 270   virtual bool is_GC_task_thread() const;
 271   virtual bool is_ConcurrentGC_thread() const;
 272   // Printing
 273   void print_on(outputStream* st) const;
 274   virtual void print() const { print_on(tty); }
 275 protected:
 276   AbstractWorkGang* _gang;
 277 
 278   virtual void initialize();
 279   virtual void loop();
 280 
 281 public:
 282   AbstractWorkGang* gang() const { return _gang; }
 283 };
 284 
 285 // Dynamic number of worker threads
 286 //
 287 // This type of work gang is used to run different numbers of
 288 // worker threads at different times.  The
 289 // number of workers run for a task is "_active_workers"
 290 // instead of "_total_workers" in a WorkGang.  The method
 291 // "needs_more_workers()" returns true until "_active_workers"
 292 // have been started and returns false afterwards.  The
 293 // implementation of "needs_more_workers()" in WorkGang always
 294 // returns true so that all workers are started.  The method
 295 // "loop()" in GangWorker was modified to ask "needs_more_workers()"
 296 // in its loop to decide if it should start working on a task.
 297 // A worker in "loop()" waits for notification on the WorkGang
 298 // monitor and execution of each worker as it checks for work
 299 // is serialized via the same monitor.  The "needs_more_workers()"
 300 // call is serialized and additionally the calculation for the
 301 // "part" (effectively the worker id for executing the task) is
 302 // serialized to give each worker a unique "part".  Workers that
 303 // are not needed for this tasks (i.e., "_active_workers" have
 304 // been started before it, continue to wait for work.
 305 //
 306 // Note on use of FlexibleWorkGang's for GC.
 307 // There are three places where task completion is determined.
 308 // In
 309 //    1) ParallelTaskTerminator::offer_termination() where _n_threads
 310 //    must be set to the correct value so that count of workers that
 311 //    have offered termination will exactly match the number
 312 //    working on the task.  Tasks such as those derived from GCTask
 313 //    use ParallelTaskTerminator's.  Tasks that want load balancing
 314 //    by work stealing use this method to gauge completion.
 315 //    2) SubTasksDone has a variable _n_threads that is used in
 316 //    all_tasks_completed() to determine completion.  all_tasks_complete()
 317 //    counts the number of tasks that have been done and then reset
 318 //    the SubTasksDone so that it can be used again.  When the number of
 319 //    tasks is set to the number of GC workers, then _n_threads must
 320 //    be set to the number of active GC workers. G1RootProcessor and
 321 //    GenCollectedHeap have SubTasksDone.
 322 //    3) SequentialSubTasksDone has an _n_threads that is used in
 323 //    a way similar to SubTasksDone and has the same dependency on the
 324 //    number of active GC workers.  CompactibleFreeListSpace and Space
 325 //    have SequentialSubTasksDone's.
 326 //
 327 // Examples of using SubTasksDone and SequentialSubTasksDone:
 328 //  G1RootProcessor and GenCollectedHeap::process_roots() use
 329 //  SubTasksDone* _process_strong_tasks to claim tasks for workers
 330 //
 331 //  GenCollectedHeap::gen_process_roots() calls
 332 //      rem_set()->younger_refs_iterate()
 333 //  to scan the card table and which eventually calls down into
 334 //  CardTableModRefBS::par_non_clean_card_iterate_work().  This method
 335 //  uses SequentialSubTasksDone* _pst to claim tasks.
 336 //  Both SubTasksDone and SequentialSubTasksDone call their method
 337 //  all_tasks_completed() to count the number of GC workers that have
 338 //  finished their work.  That logic is "when all the workers are
 339 //  finished the tasks are finished".
 340 //
 341 //  The pattern that appears  in the code is to set _n_threads
 342 //  to a value > 1 before a task that you would like executed in parallel
 343 //  and then to set it to 0 after that task has completed.  A value of
 344 //  0 is a "special" value in set_n_threads() which translates to
 345 //  setting _n_threads to 1.
 346 //
 347 //  Some code uses _n_termination to decide if work should be done in
 348 //  parallel.  The notorious possibly_parallel_oops_do() in threads.cpp
 349 //  is an example of such code.  Look for variable "is_par" for other
 350 //  examples.
 351 //
 352 //  The active_workers is not reset to 0 after a parallel phase.  It's
 353 //  value may be used in later phases and in one instance at least
 354 //  (the parallel remark) it has to be used (the parallel remark depends
 355 //  on the partitioning done in the previous parallel scavenge).
 356 
 357 class FlexibleWorkGang: public WorkGang {
 358   // The currently active workers in this gang.
 359   // This is a number that is dynamically adjusted
 360   // and checked in the run_task() method at each invocation.
 361   // As described above _active_workers determines the number
 362   // of threads started on a task.  It must also be used to
 363   // determine completion.
 364 
 365  protected:
 366   uint _active_workers;
 367  public:
 368   // Constructor and destructor.
 369   // Initialize active_workers to a minimum value.  Setting it to
 370   // the parameter "workers" will initialize it to a maximum
 371   // value which is not desirable.
 372   FlexibleWorkGang(const char* name, uint workers,
 373                    bool are_GC_task_threads,
 374                    bool  are_ConcurrentGC_threads) :
 375     WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads),
 376     _active_workers(UseDynamicNumberOfGCThreads ? 1U : ParallelGCThreads) {}
 377   // Accessors for fields
 378   virtual uint active_workers() const { return _active_workers; }
 379   void set_active_workers(uint v) {
 380     assert(v <= _total_workers,
 381            "Trying to set more workers active than there are");
 382     _active_workers = MIN2(v, _total_workers);
 383     assert(v != 0, "Trying to set active workers to 0");
 384     _active_workers = MAX2(1U, _active_workers);
 385     assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers,
 386            "Unless dynamic should use total workers");
 387   }
 388   virtual void run_task(AbstractGangTask* task);
 389   virtual bool needs_more_workers() const {
 390     return _started_workers < _active_workers;
 391   }
 392 };
 393 
 394 // Work gangs in garbage collectors: 2009-06-10
 395 //
 396 // SharedHeap - work gang for stop-the-world parallel collection.
 397 //   Used by
 398 //     ParNewGeneration
 399 //     CMSParRemarkTask
 400 //     CMSRefProcTaskExecutor
 401 //     G1CollectedHeap
 402 //     G1ParFinalCountTask
 403 // ConcurrentMark
 404 // CMSCollector
 405 
 406 // A class that acts as a synchronisation barrier. Workers enter
 407 // the barrier and must wait until all other workers have entered
 408 // before any of them may leave.
 409 
 410 class WorkGangBarrierSync : public StackObj {
 411 protected:
 412   Monitor _monitor;
 413   uint    _n_workers;
 414   uint    _n_completed;
 415   bool    _should_reset;
 416   bool    _aborted;
 417 
 418   Monitor* monitor()        { return &_monitor; }
 419   uint     n_workers()      { return _n_workers; }
 420   uint     n_completed()    { return _n_completed; }
 421   bool     should_reset()   { return _should_reset; }
 422   bool     aborted()        { return _aborted; }
 423 
 424   void     zero_completed() { _n_completed = 0; }
 425   void     inc_completed()  { _n_completed++; }
 426   void     set_aborted()    { _aborted = true; }
 427   void     set_should_reset(bool v) { _should_reset = v; }
 428 
 429 public:
 430   WorkGangBarrierSync();
 431   WorkGangBarrierSync(uint n_workers, const char* name);
 432 
 433   // Set the number of workers that will use the barrier.
 434   // Must be called before any of the workers start running.
 435   void set_n_workers(uint n_workers);
 436 
 437   // Enter the barrier. A worker that enters the barrier will
 438   // not be allowed to leave until all other threads have
 439   // also entered the barrier or the barrier is aborted.
 440   // Returns false if the barrier was aborted.
 441   bool enter();
 442 
 443   // Aborts the barrier and wakes up any threads waiting for
 444   // the barrier to complete. The barrier will remain in the
 445   // aborted state until the next call to set_n_workers().
 446   void abort();
 447 };
 448 
 449 // A class to manage claiming of subtasks within a group of tasks.  The
 450 // subtasks will be identified by integer indices, usually elements of an
 451 // enumeration type.
 452 
 453 class SubTasksDone: public CHeapObj<mtInternal> {
 454   uint* _tasks;
 455   uint _n_tasks;
 456   // _n_threads is used to determine when a sub task is done.
 457   // It does not control how many threads will execute the subtask
 458   // but must be initialized to the number that do execute the task
 459   // in order to correctly decide when the subtask is done (all the
 460   // threads working on the task have finished).
 461   uint _n_threads;
 462   uint _threads_completed;
 463 #ifdef ASSERT
 464   volatile uint _claimed;
 465 #endif
 466 
 467   // Set all tasks to unclaimed.
 468   void clear();
 469 
 470 public:
 471   // Initializes "this" to a state in which there are "n" tasks to be
 472   // processed, none of the which are originally claimed.  The number of
 473   // threads doing the tasks is initialized 1.
 474   SubTasksDone(uint n);
 475 
 476   // True iff the object is in a valid state.
 477   bool valid();
 478 
 479   // Get/set the number of parallel threads doing the tasks to "t".  Can only
 480   // be called before tasks start or after they are complete.
 481   uint n_threads() { return _n_threads; }
 482   void set_n_threads(uint t);
 483 
 484   // Returns "false" if the task "t" is unclaimed, and ensures that task is
 485   // claimed.  The task "t" is required to be within the range of "this".
 486   bool is_task_claimed(uint t);
 487 
 488   // The calling thread asserts that it has attempted to claim all the
 489   // tasks that it will try to claim.  Every thread in the parallel task
 490   // must execute this.  (When the last thread does so, the task array is
 491   // cleared.)
 492   void all_tasks_completed();
 493 
 494   // Destructor.
 495   ~SubTasksDone();
 496 };
 497 
 498 // As above, but for sequential tasks, i.e. instead of claiming
 499 // sub-tasks from a set (possibly an enumeration), claim sub-tasks
 500 // in sequential order. This is ideal for claiming dynamically
 501 // partitioned tasks (like striding in the parallel remembered
 502 // set scanning). Note that unlike the above class this is
 503 // a stack object - is there any reason for it not to be?
 504 
 505 class SequentialSubTasksDone : public StackObj {
 506 protected:
 507   uint _n_tasks;     // Total number of tasks available.
 508   uint _n_claimed;   // Number of tasks claimed.
 509   // _n_threads is used to determine when a sub task is done.
 510   // See comments on SubTasksDone::_n_threads
 511   uint _n_threads;   // Total number of parallel threads.
 512   uint _n_completed; // Number of completed threads.
 513 
 514   void clear();
 515 
 516 public:
 517   SequentialSubTasksDone() {
 518     clear();
 519   }
 520   ~SequentialSubTasksDone() {}
 521 
 522   // True iff the object is in a valid state.
 523   bool valid();
 524 
 525   // number of tasks
 526   uint n_tasks() const { return _n_tasks; }
 527 
 528   // Get/set the number of parallel threads doing the tasks to t.
 529   // Should be called before the task starts but it is safe
 530   // to call this once a task is running provided that all
 531   // threads agree on the number of threads.
 532   uint n_threads() { return _n_threads; }
 533   void set_n_threads(uint t) { _n_threads = t; }
 534 
 535   // Set the number of tasks to be claimed to t. As above,
 536   // should be called before the tasks start but it is safe
 537   // to call this once a task is running provided all threads
 538   // agree on the number of tasks.
 539   void set_n_tasks(uint t) { _n_tasks = t; }
 540 
 541   // Returns false if the next task in the sequence is unclaimed,
 542   // and ensures that it is claimed. Will set t to be the index
 543   // of the claimed task in the sequence. Will return true if
 544   // the task cannot be claimed and there are none left to claim.
 545   bool is_task_claimed(uint& t);
 546 
 547   // The calling thread asserts that it has attempted to claim
 548   // all the tasks it possibly can in the sequence. Every thread
 549   // claiming tasks must promise call this. Returns true if this
 550   // is the last thread to complete so that the thread can perform
 551   // cleanup if necessary.
 552   bool all_tasks_completed();
 553 };
 554 
 555 // Represents a set of free small integer ids.
 556 class FreeIdSet : public CHeapObj<mtInternal> {
 557   enum {
 558     end_of_list = -1,
 559     claimed = -2
 560   };
 561 
 562   int _sz;
 563   Monitor* _mon;
 564 
 565   int* _ids;
 566   int _hd;
 567   int _waiters;
 568   int _claimed;
 569 
 570   static bool _safepoint;
 571   typedef FreeIdSet* FreeIdSetPtr;
 572   static const int NSets = 10;
 573   static FreeIdSetPtr _sets[NSets];
 574   static bool _stat_init;
 575   int _index;
 576 
 577 public:
 578   FreeIdSet(int sz, Monitor* mon);
 579   ~FreeIdSet();
 580 
 581   static void set_safepoint(bool b);
 582 
 583   // Attempt to claim the given id permanently.  Returns "true" iff
 584   // successful.
 585   bool claim_perm_id(int i);
 586 
 587   // Returns an unclaimed parallel id (waiting for one to be released if
 588   // necessary).  Returns "-1" if a GC wakes up a wait for an id.
 589   int claim_par_id();
 590 
 591   void release_par_id(int id);
 592 };
 593 
 594 #endif // SHARE_VM_UTILITIES_WORKGROUP_HPP