1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 /*
  26  * This file is available under and governed by the GNU General Public
  27  * License version 2 only, as published by the Free Software Foundation.
  28  * However, the following notice accompanied the original version of this
  29  * file:
  30  *
  31  * Written by Doug Lea with assistance from members of JCP JSR-166
  32  * Expert Group and released to the public domain, as explained at
  33  * http://creativecommons.org/publicdomain/zero/1.0/
  34  */
  35 
  36 package java.util;
  37 
  38 import java.util.concurrent.CountedCompleter;
  39 import java.util.concurrent.ForkJoinPool;
  40 import java.util.function.BinaryOperator;
  41 import java.util.function.DoubleBinaryOperator;
  42 import java.util.function.IntBinaryOperator;
  43 import java.util.function.LongBinaryOperator;
  44 
  45 /**
  46  * ForkJoin tasks to perform Arrays.parallelPrefix operations.
  47  *
  48  * @author Doug Lea
  49  * @since 1.8
  50  */
  51 class ArrayPrefixHelpers {
  52     private ArrayPrefixHelpers() {} // non-instantiable
  53 
  54     /*
  55      * Parallel prefix (aka cumulate, scan) task classes
  56      * are based loosely on Guy Blelloch's original
  57      * algorithm (http://www.cs.cmu.edu/~scandal/alg/scan.html):
  58      *  Keep dividing by two to threshold segment size, and then:
  59      *   Pass 1: Create tree of partial sums for each segment
  60      *   Pass 2: For each segment, cumulate with offset of left sibling
  61      *
  62      * This version improves performance within FJ framework mainly by
  63      * allowing the second pass of ready left-hand sides to proceed
  64      * even if some right-hand side first passes are still executing.
  65      * It also combines first and second pass for leftmost segment,
  66      * and skips the first pass for rightmost segment (whose result is
  67      * not needed for second pass).  It similarly manages to avoid
  68      * requiring that users supply an identity basis for accumulations
  69      * by tracking those segments/subtasks for which the first
  70      * existing element is used as base.
  71      *
  72      * Managing this relies on ORing some bits in the pendingCount for
  73      * phases/states: CUMULATE, SUMMED, and FINISHED. CUMULATE is the
  74      * main phase bit. When false, segments compute only their sum.
  75      * When true, they cumulate array elements. CUMULATE is set at
  76      * root at beginning of second pass and then propagated down. But
  77      * it may also be set earlier for subtrees with lo==0 (the left
  78      * spine of tree). SUMMED is a one bit join count. For leafs, it
  79      * is set when summed. For internal nodes, it becomes true when
  80      * one child is summed.  When the second child finishes summing,
  81      * we then moves up tree to trigger the cumulate phase. FINISHED
  82      * is also a one bit join count. For leafs, it is set when
  83      * cumulated. For internal nodes, it becomes true when one child
  84      * is cumulated.  When the second child finishes cumulating, it
  85      * then moves up tree, completing at the root.
  86      *
  87      * To better exploit locality and reduce overhead, the compute
  88      * method loops starting with the current task, moving if possible
  89      * to one of its subtasks rather than forking.
  90      *
  91      * As usual for this sort of utility, there are 4 versions, that
  92      * are simple copy/paste/adapt variants of each other.  (The
  93      * double and int versions differ from long version solely by
  94      * replacing "long" (with case-matching)).
  95      */
  96 
  97     // see above
  98     static final int CUMULATE = 1;
  99     static final int SUMMED   = 2;
 100     static final int FINISHED = 4;
 101 
 102     /** The smallest subtask array partition size to use as threshold */
 103     static final int MIN_PARTITION = 16;
 104 
 105     static final class CumulateTask<T> extends CountedCompleter<Void> {
 106         final T[] array;
 107         final BinaryOperator<T> function;
 108         CumulateTask<T> left, right;
 109         T in, out;
 110         final int lo, hi, origin, fence, threshold;
 111 
 112         /** Root task constructor */
 113         public CumulateTask(CumulateTask<T> parent,
 114                             BinaryOperator<T> function,
 115                             T[] array, int lo, int hi) {
 116             super(parent);
 117             this.function = function; this.array = array;
 118             this.lo = this.origin = lo; this.hi = this.fence = hi;
 119             int p;
 120             this.threshold =
 121                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 122                 <= MIN_PARTITION ? MIN_PARTITION : p;
 123         }
 124 
 125         /** Subtask constructor */
 126         CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function,
 127                      T[] array, int origin, int fence, int threshold,
 128                      int lo, int hi) {
 129             super(parent);
 130             this.function = function; this.array = array;
 131             this.origin = origin; this.fence = fence;
 132             this.threshold = threshold;
 133             this.lo = lo; this.hi = hi;
 134         }
 135 
 136         public final void compute() {
 137             final BinaryOperator<T> fn;
 138             final T[] a;
 139             if ((fn = this.function) == null || (a = this.array) == null)
 140                 throw new NullPointerException();    // hoist checks
 141             int th = threshold, org = origin, fnc = fence, l, h;
 142             CumulateTask<T> t = this;
 143             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 144                 if (h - l > th) {
 145                     CumulateTask<T> lt = t.left, rt = t.right, f;
 146                     if (lt == null) {                // first pass
 147                         int mid = (l + h) >>> 1;
 148                         f = rt = t.right =
 149                             new CumulateTask<T>(t, fn, a, org, fnc, th, mid, h);
 150                         t = lt = t.left =
 151                             new CumulateTask<T>(t, fn, a, org, fnc, th, l, mid);
 152                     }
 153                     else {                           // possibly refork
 154                         T pin = t.in;
 155                         lt.in = pin;
 156                         f = t = null;
 157                         if (rt != null) {
 158                             T lout = lt.out;
 159                             rt.in = (l == org ? lout :
 160                                      fn.apply(pin, lout));
 161                             for (int c;;) {
 162                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 163                                     break;
 164                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 165                                     t = rt;
 166                                     break;
 167                                 }
 168                             }
 169                         }
 170                         for (int c;;) {
 171                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 172                                 break;
 173                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 174                                 if (t != null)
 175                                     f = t;
 176                                 t = lt;
 177                                 break;
 178                             }
 179                         }
 180                         if (t == null)
 181                             break;
 182                     }
 183                     if (f != null)
 184                         f.fork();
 185                 }
 186                 else {
 187                     int state; // Transition to sum, cumulate, or both
 188                     for (int b;;) {
 189                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 190                             break outer;                      // already done
 191                         state = ((b & CUMULATE) != 0 ? FINISHED :
 192                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 193                         if (t.compareAndSetPendingCount(b, b|state))
 194                             break;
 195                     }
 196 
 197                     T sum;
 198                     if (state != SUMMED) {
 199                         int first;
 200                         if (l == org) {                       // leftmost; no in
 201                             sum = a[org];
 202                             first = org + 1;
 203                         }
 204                         else {
 205                             sum = t.in;
 206                             first = l;
 207                         }
 208                         for (int i = first; i < h; ++i)       // cumulate
 209                             a[i] = sum = fn.apply(sum, a[i]);
 210                     }
 211                     else if (h < fnc) {                       // skip rightmost
 212                         sum = a[l];
 213                         for (int i = l + 1; i < h; ++i)       // sum only
 214                             sum = fn.apply(sum, a[i]);
 215                     }
 216                     else
 217                         sum = t.in;
 218                     t.out = sum;
 219                     for (CumulateTask<T> par;;) {             // propagate
 220                         @SuppressWarnings("unchecked") CumulateTask<T> partmp
 221                             = (CumulateTask<T>)t.getCompleter();
 222                         if ((par = partmp) == null) {
 223                             if ((state & FINISHED) != 0)      // enable join
 224                                 t.quietlyComplete();
 225                             break outer;
 226                         }
 227                         int b = par.getPendingCount();
 228                         if ((b & state & FINISHED) != 0)
 229                             t = par;                          // both done
 230                         else if ((b & state & SUMMED) != 0) { // both summed
 231                             int nextState; CumulateTask<T> lt, rt;
 232                             if ((lt = par.left) != null &&
 233                                 (rt = par.right) != null) {
 234                                 T lout = lt.out;
 235                                 par.out = (rt.hi == fnc ? lout :
 236                                            fn.apply(lout, rt.out));
 237                             }
 238                             int refork = (((b & CUMULATE) == 0 &&
 239                                            par.lo == org) ? CUMULATE : 0);
 240                             if ((nextState = b|state|refork) == b ||
 241                                 par.compareAndSetPendingCount(b, nextState)) {
 242                                 state = SUMMED;               // drop finished
 243                                 t = par;
 244                                 if (refork != 0)
 245                                     par.fork();
 246                             }
 247                         }
 248                         else if (par.compareAndSetPendingCount(b, b|state))
 249                             break outer;                      // sib not ready
 250                     }
 251                 }
 252             }
 253         }
 254         private static final long serialVersionUID = 5293554502939613543L;
 255     }
 256 
 257     static final class LongCumulateTask extends CountedCompleter<Void> {
 258         final long[] array;
 259         final LongBinaryOperator function;
 260         LongCumulateTask left, right;
 261         long in, out;
 262         final int lo, hi, origin, fence, threshold;
 263 
 264         /** Root task constructor */
 265         public LongCumulateTask(LongCumulateTask parent,
 266                                 LongBinaryOperator function,
 267                                 long[] array, int lo, int hi) {
 268             super(parent);
 269             this.function = function; this.array = array;
 270             this.lo = this.origin = lo; this.hi = this.fence = hi;
 271             int p;
 272             this.threshold =
 273                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 274                 <= MIN_PARTITION ? MIN_PARTITION : p;
 275         }
 276 
 277         /** Subtask constructor */
 278         LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function,
 279                          long[] array, int origin, int fence, int threshold,
 280                          int lo, int hi) {
 281             super(parent);
 282             this.function = function; this.array = array;
 283             this.origin = origin; this.fence = fence;
 284             this.threshold = threshold;
 285             this.lo = lo; this.hi = hi;
 286         }
 287 
 288         public final void compute() {
 289             final LongBinaryOperator fn;
 290             final long[] a;
 291             if ((fn = this.function) == null || (a = this.array) == null)
 292                 throw new NullPointerException();    // hoist checks
 293             int th = threshold, org = origin, fnc = fence, l, h;
 294             LongCumulateTask t = this;
 295             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 296                 if (h - l > th) {
 297                     LongCumulateTask lt = t.left, rt = t.right, f;
 298                     if (lt == null) {                // first pass
 299                         int mid = (l + h) >>> 1;
 300                         f = rt = t.right =
 301                             new LongCumulateTask(t, fn, a, org, fnc, th, mid, h);
 302                         t = lt = t.left =
 303                             new LongCumulateTask(t, fn, a, org, fnc, th, l, mid);
 304                     }
 305                     else {                           // possibly refork
 306                         long pin = t.in;
 307                         lt.in = pin;
 308                         f = t = null;
 309                         if (rt != null) {
 310                             long lout = lt.out;
 311                             rt.in = (l == org ? lout :
 312                                      fn.applyAsLong(pin, lout));
 313                             for (int c;;) {
 314                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 315                                     break;
 316                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 317                                     t = rt;
 318                                     break;
 319                                 }
 320                             }
 321                         }
 322                         for (int c;;) {
 323                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 324                                 break;
 325                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 326                                 if (t != null)
 327                                     f = t;
 328                                 t = lt;
 329                                 break;
 330                             }
 331                         }
 332                         if (t == null)
 333                             break;
 334                     }
 335                     if (f != null)
 336                         f.fork();
 337                 }
 338                 else {
 339                     int state; // Transition to sum, cumulate, or both
 340                     for (int b;;) {
 341                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 342                             break outer;                      // already done
 343                         state = ((b & CUMULATE) != 0 ? FINISHED :
 344                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 345                         if (t.compareAndSetPendingCount(b, b|state))
 346                             break;
 347                     }
 348 
 349                     long sum;
 350                     if (state != SUMMED) {
 351                         int first;
 352                         if (l == org) {                       // leftmost; no in
 353                             sum = a[org];
 354                             first = org + 1;
 355                         }
 356                         else {
 357                             sum = t.in;
 358                             first = l;
 359                         }
 360                         for (int i = first; i < h; ++i)       // cumulate
 361                             a[i] = sum = fn.applyAsLong(sum, a[i]);
 362                     }
 363                     else if (h < fnc) {                       // skip rightmost
 364                         sum = a[l];
 365                         for (int i = l + 1; i < h; ++i)       // sum only
 366                             sum = fn.applyAsLong(sum, a[i]);
 367                     }
 368                     else
 369                         sum = t.in;
 370                     t.out = sum;
 371                     for (LongCumulateTask par;;) {            // propagate
 372                         if ((par = (LongCumulateTask)t.getCompleter()) == null) {
 373                             if ((state & FINISHED) != 0)      // enable join
 374                                 t.quietlyComplete();
 375                             break outer;
 376                         }
 377                         int b = par.getPendingCount();
 378                         if ((b & state & FINISHED) != 0)
 379                             t = par;                          // both done
 380                         else if ((b & state & SUMMED) != 0) { // both summed
 381                             int nextState; LongCumulateTask lt, rt;
 382                             if ((lt = par.left) != null &&
 383                                 (rt = par.right) != null) {
 384                                 long lout = lt.out;
 385                                 par.out = (rt.hi == fnc ? lout :
 386                                            fn.applyAsLong(lout, rt.out));
 387                             }
 388                             int refork = (((b & CUMULATE) == 0 &&
 389                                            par.lo == org) ? CUMULATE : 0);
 390                             if ((nextState = b|state|refork) == b ||
 391                                 par.compareAndSetPendingCount(b, nextState)) {
 392                                 state = SUMMED;               // drop finished
 393                                 t = par;
 394                                 if (refork != 0)
 395                                     par.fork();
 396                             }
 397                         }
 398                         else if (par.compareAndSetPendingCount(b, b|state))
 399                             break outer;                      // sib not ready
 400                     }
 401                 }
 402             }
 403         }
 404         private static final long serialVersionUID = -5074099945909284273L;
 405     }
 406 
 407     static final class DoubleCumulateTask extends CountedCompleter<Void> {
 408         final double[] array;
 409         final DoubleBinaryOperator function;
 410         DoubleCumulateTask left, right;
 411         double in, out;
 412         final int lo, hi, origin, fence, threshold;
 413 
 414         /** Root task constructor */
 415         public DoubleCumulateTask(DoubleCumulateTask parent,
 416                                   DoubleBinaryOperator function,
 417                                   double[] array, int lo, int hi) {
 418             super(parent);
 419             this.function = function; this.array = array;
 420             this.lo = this.origin = lo; this.hi = this.fence = hi;
 421             int p;
 422             this.threshold =
 423                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 424                 <= MIN_PARTITION ? MIN_PARTITION : p;
 425         }
 426 
 427         /** Subtask constructor */
 428         DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function,
 429                            double[] array, int origin, int fence, int threshold,
 430                            int lo, int hi) {
 431             super(parent);
 432             this.function = function; this.array = array;
 433             this.origin = origin; this.fence = fence;
 434             this.threshold = threshold;
 435             this.lo = lo; this.hi = hi;
 436         }
 437 
 438         public final void compute() {
 439             final DoubleBinaryOperator fn;
 440             final double[] a;
 441             if ((fn = this.function) == null || (a = this.array) == null)
 442                 throw new NullPointerException();    // hoist checks
 443             int th = threshold, org = origin, fnc = fence, l, h;
 444             DoubleCumulateTask t = this;
 445             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 446                 if (h - l > th) {
 447                     DoubleCumulateTask lt = t.left, rt = t.right, f;
 448                     if (lt == null) {                // first pass
 449                         int mid = (l + h) >>> 1;
 450                         f = rt = t.right =
 451                             new DoubleCumulateTask(t, fn, a, org, fnc, th, mid, h);
 452                         t = lt = t.left =
 453                             new DoubleCumulateTask(t, fn, a, org, fnc, th, l, mid);
 454                     }
 455                     else {                           // possibly refork
 456                         double pin = t.in;
 457                         lt.in = pin;
 458                         f = t = null;
 459                         if (rt != null) {
 460                             double lout = lt.out;
 461                             rt.in = (l == org ? lout :
 462                                      fn.applyAsDouble(pin, lout));
 463                             for (int c;;) {
 464                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 465                                     break;
 466                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 467                                     t = rt;
 468                                     break;
 469                                 }
 470                             }
 471                         }
 472                         for (int c;;) {
 473                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 474                                 break;
 475                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 476                                 if (t != null)
 477                                     f = t;
 478                                 t = lt;
 479                                 break;
 480                             }
 481                         }
 482                         if (t == null)
 483                             break;
 484                     }
 485                     if (f != null)
 486                         f.fork();
 487                 }
 488                 else {
 489                     int state; // Transition to sum, cumulate, or both
 490                     for (int b;;) {
 491                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 492                             break outer;                      // already done
 493                         state = ((b & CUMULATE) != 0 ? FINISHED :
 494                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 495                         if (t.compareAndSetPendingCount(b, b|state))
 496                             break;
 497                     }
 498 
 499                     double sum;
 500                     if (state != SUMMED) {
 501                         int first;
 502                         if (l == org) {                       // leftmost; no in
 503                             sum = a[org];
 504                             first = org + 1;
 505                         }
 506                         else {
 507                             sum = t.in;
 508                             first = l;
 509                         }
 510                         for (int i = first; i < h; ++i)       // cumulate
 511                             a[i] = sum = fn.applyAsDouble(sum, a[i]);
 512                     }
 513                     else if (h < fnc) {                       // skip rightmost
 514                         sum = a[l];
 515                         for (int i = l + 1; i < h; ++i)       // sum only
 516                             sum = fn.applyAsDouble(sum, a[i]);
 517                     }
 518                     else
 519                         sum = t.in;
 520                     t.out = sum;
 521                     for (DoubleCumulateTask par;;) {            // propagate
 522                         if ((par = (DoubleCumulateTask)t.getCompleter()) == null) {
 523                             if ((state & FINISHED) != 0)      // enable join
 524                                 t.quietlyComplete();
 525                             break outer;
 526                         }
 527                         int b = par.getPendingCount();
 528                         if ((b & state & FINISHED) != 0)
 529                             t = par;                          // both done
 530                         else if ((b & state & SUMMED) != 0) { // both summed
 531                             int nextState; DoubleCumulateTask lt, rt;
 532                             if ((lt = par.left) != null &&
 533                                 (rt = par.right) != null) {
 534                                 double lout = lt.out;
 535                                 par.out = (rt.hi == fnc ? lout :
 536                                            fn.applyAsDouble(lout, rt.out));
 537                             }
 538                             int refork = (((b & CUMULATE) == 0 &&
 539                                            par.lo == org) ? CUMULATE : 0);
 540                             if ((nextState = b|state|refork) == b ||
 541                                 par.compareAndSetPendingCount(b, nextState)) {
 542                                 state = SUMMED;               // drop finished
 543                                 t = par;
 544                                 if (refork != 0)
 545                                     par.fork();
 546                             }
 547                         }
 548                         else if (par.compareAndSetPendingCount(b, b|state))
 549                             break outer;                      // sib not ready
 550                     }
 551                 }
 552             }
 553         }
 554         private static final long serialVersionUID = -586947823794232033L;
 555     }
 556 
 557     static final class IntCumulateTask extends CountedCompleter<Void> {
 558         final int[] array;
 559         final IntBinaryOperator function;
 560         IntCumulateTask left, right;
 561         int in, out;
 562         final int lo, hi, origin, fence, threshold;
 563 
 564         /** Root task constructor */
 565         public IntCumulateTask(IntCumulateTask parent,
 566                                IntBinaryOperator function,
 567                                int[] array, int lo, int hi) {
 568             super(parent);
 569             this.function = function; this.array = array;
 570             this.lo = this.origin = lo; this.hi = this.fence = hi;
 571             int p;
 572             this.threshold =
 573                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 574                 <= MIN_PARTITION ? MIN_PARTITION : p;
 575         }
 576 
 577         /** Subtask constructor */
 578         IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function,
 579                         int[] array, int origin, int fence, int threshold,
 580                         int lo, int hi) {
 581             super(parent);
 582             this.function = function; this.array = array;
 583             this.origin = origin; this.fence = fence;
 584             this.threshold = threshold;
 585             this.lo = lo; this.hi = hi;
 586         }
 587 
 588         public final void compute() {
 589             final IntBinaryOperator fn;
 590             final int[] a;
 591             if ((fn = this.function) == null || (a = this.array) == null)
 592                 throw new NullPointerException();    // hoist checks
 593             int th = threshold, org = origin, fnc = fence, l, h;
 594             IntCumulateTask t = this;
 595             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 596                 if (h - l > th) {
 597                     IntCumulateTask lt = t.left, rt = t.right, f;
 598                     if (lt == null) {                // first pass
 599                         int mid = (l + h) >>> 1;
 600                         f = rt = t.right =
 601                             new IntCumulateTask(t, fn, a, org, fnc, th, mid, h);
 602                         t = lt = t.left =
 603                             new IntCumulateTask(t, fn, a, org, fnc, th, l, mid);
 604                     }
 605                     else {                           // possibly refork
 606                         int pin = t.in;
 607                         lt.in = pin;
 608                         f = t = null;
 609                         if (rt != null) {
 610                             int lout = lt.out;
 611                             rt.in = (l == org ? lout :
 612                                      fn.applyAsInt(pin, lout));
 613                             for (int c;;) {
 614                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 615                                     break;
 616                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 617                                     t = rt;
 618                                     break;
 619                                 }
 620                             }
 621                         }
 622                         for (int c;;) {
 623                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 624                                 break;
 625                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 626                                 if (t != null)
 627                                     f = t;
 628                                 t = lt;
 629                                 break;
 630                             }
 631                         }
 632                         if (t == null)
 633                             break;
 634                     }
 635                     if (f != null)
 636                         f.fork();
 637                 }
 638                 else {
 639                     int state; // Transition to sum, cumulate, or both
 640                     for (int b;;) {
 641                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 642                             break outer;                      // already done
 643                         state = ((b & CUMULATE) != 0 ? FINISHED :
 644                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 645                         if (t.compareAndSetPendingCount(b, b|state))
 646                             break;
 647                     }
 648 
 649                     int sum;
 650                     if (state != SUMMED) {
 651                         int first;
 652                         if (l == org) {                       // leftmost; no in
 653                             sum = a[org];
 654                             first = org + 1;
 655                         }
 656                         else {
 657                             sum = t.in;
 658                             first = l;
 659                         }
 660                         for (int i = first; i < h; ++i)       // cumulate
 661                             a[i] = sum = fn.applyAsInt(sum, a[i]);
 662                     }
 663                     else if (h < fnc) {                       // skip rightmost
 664                         sum = a[l];
 665                         for (int i = l + 1; i < h; ++i)       // sum only
 666                             sum = fn.applyAsInt(sum, a[i]);
 667                     }
 668                     else
 669                         sum = t.in;
 670                     t.out = sum;
 671                     for (IntCumulateTask par;;) {            // propagate
 672                         if ((par = (IntCumulateTask)t.getCompleter()) == null) {
 673                             if ((state & FINISHED) != 0)      // enable join
 674                                 t.quietlyComplete();
 675                             break outer;
 676                         }
 677                         int b = par.getPendingCount();
 678                         if ((b & state & FINISHED) != 0)
 679                             t = par;                          // both done
 680                         else if ((b & state & SUMMED) != 0) { // both summed
 681                             int nextState; IntCumulateTask lt, rt;
 682                             if ((lt = par.left) != null &&
 683                                 (rt = par.right) != null) {
 684                                 int lout = lt.out;
 685                                 par.out = (rt.hi == fnc ? lout :
 686                                            fn.applyAsInt(lout, rt.out));
 687                             }
 688                             int refork = (((b & CUMULATE) == 0 &&
 689                                            par.lo == org) ? CUMULATE : 0);
 690                             if ((nextState = b|state|refork) == b ||
 691                                 par.compareAndSetPendingCount(b, nextState)) {
 692                                 state = SUMMED;               // drop finished
 693                                 t = par;
 694                                 if (refork != 0)
 695                                     par.fork();
 696                             }
 697                         }
 698                         else if (par.compareAndSetPendingCount(b, b|state))
 699                             break outer;                      // sib not ready
 700                     }
 701                 }
 702             }
 703         }
 704         private static final long serialVersionUID = 3731755594596840961L;
 705     }
 706 }