1 /*
   2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package java.util;
  26 
  27 /*
  28  * Written by Doug Lea with assistance from members of JCP JSR-166
  29  * Expert Group and released to the public domain, as explained at
  30  * http://creativecommons.org/publicdomain/zero/1.0/
  31  */
  32 
  33 import java.util.concurrent.ForkJoinPool;
  34 import java.util.concurrent.CountedCompleter;
  35 import java.util.function.BinaryOperator;
  36 import java.util.function.IntBinaryOperator;
  37 import java.util.function.LongBinaryOperator;
  38 import java.util.function.DoubleBinaryOperator;
  39 
  40 /**
  41  * ForkJoin tasks to perform Arrays.parallelPrefix operations.
  42  *
  43  * @author Doug Lea
  44  * @since 1.8
  45  */
  46 class ArrayPrefixHelpers {
  47     private ArrayPrefixHelpers() {}; // non-instantiable
  48 
  49     /*
  50      * Parallel prefix (aka cumulate, scan) task classes
  51      * are based loosely on Guy Blelloch's original
  52      * algorithm (http://www.cs.cmu.edu/~scandal/alg/scan.html):
  53      *  Keep dividing by two to threshold segment size, and then:
  54      *   Pass 1: Create tree of partial sums for each segment
  55      *   Pass 2: For each segment, cumulate with offset of left sibling
  56      *
  57      * This version improves performance within FJ framework mainly by
  58      * allowing the second pass of ready left-hand sides to proceed
  59      * even if some right-hand side first passes are still executing.
  60      * It also combines first and second pass for leftmost segment,
  61      * and skips the first pass for rightmost segment (whose result is
  62      * not needed for second pass).  It similarly manages to avoid
  63      * requiring that users supply an identity basis for accumulations
  64      * by tracking those segments/subtasks for which the first
  65      * existing element is used as base.
  66      *
  67      * Managing this relies on ORing some bits in the pendingCount for
  68      * phases/states: CUMULATE, SUMMED, and FINISHED. CUMULATE is the
  69      * main phase bit. When false, segments compute only their sum.
  70      * When true, they cumulate array elements. CUMULATE is set at
  71      * root at beginning of second pass and then propagated down. But
  72      * it may also be set earlier for subtrees with lo==0 (the left
  73      * spine of tree). SUMMED is a one bit join count. For leafs, it
  74      * is set when summed. For internal nodes, it becomes true when
  75      * one child is summed.  When the second child finishes summing,
  76      * we then moves up tree to trigger the cumulate phase. FINISHED
  77      * is also a one bit join count. For leafs, it is set when
  78      * cumulated. For internal nodes, it becomes true when one child
  79      * is cumulated.  When the second child finishes cumulating, it
  80      * then moves up tree, completing at the root.
  81      *
  82      * To better exploit locality and reduce overhead, the compute
  83      * method loops starting with the current task, moving if possible
  84      * to one of its subtasks rather than forking.
  85      *
  86      * As usual for this sort of utility, there are 4 versions, that
  87      * are simple copy/paste/adapt variants of each other.  (The
  88      * double and int versions differ from long version soley by
  89      * replacing "long" (with case-matching)).
  90      */
  91 
  92     // see above
  93     static final int CUMULATE = 1;
  94     static final int SUMMED   = 2;
  95     static final int FINISHED = 4;
  96 
  97     /** The smallest subtask array partition size to use as threshold */
  98     static final int MIN_PARTITION = 16;
  99 
 100     static final class CumulateTask<T> extends CountedCompleter<Void> {
 101         final T[] array;
 102         final BinaryOperator<T> function;
 103         CumulateTask<T> left, right;
 104         T in, out;
 105         final int lo, hi, origin, fence, threshold;
 106 
 107         /** Root task constructor */
 108         public CumulateTask(CumulateTask<T> parent,
 109                             BinaryOperator<T> function,
 110                             T[] array, int lo, int hi) {
 111             super(parent);
 112             this.function = function; this.array = array;
 113             this.lo = this.origin = lo; this.hi = this.fence = hi;
 114             int p;
 115             this.threshold =
 116                     (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 117                     <= MIN_PARTITION ? MIN_PARTITION : p;
 118         }
 119 
 120         /** Subtask constructor */
 121         CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function,
 122                      T[] array, int origin, int fence, int threshold,
 123                      int lo, int hi) {
 124             super(parent);
 125             this.function = function; this.array = array;
 126             this.origin = origin; this.fence = fence;
 127             this.threshold = threshold;
 128             this.lo = lo; this.hi = hi;
 129         }
 130 
 131         public final void compute() {
 132             final BinaryOperator<T> fn;
 133             final T[] a;
 134             if ((fn = this.function) == null || (a = this.array) == null)
 135                 throw new NullPointerException();    // hoist checks
 136             int th = threshold, org = origin, fnc = fence, l, h;
 137             CumulateTask<T> t = this;
 138             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 139                 if (h - l > th) {
 140                     CumulateTask<T> lt = t.left, rt = t.right, f;
 141                     if (lt == null) {                // first pass
 142                         int mid = (l + h) >>> 1;
 143                         f = rt = t.right =
 144                                 new CumulateTask<T>(t, fn, a, org, fnc, th, mid, h);
 145                         t = lt = t.left  =
 146                                 new CumulateTask<T>(t, fn, a, org, fnc, th, l, mid);
 147                     }
 148                     else {                           // possibly refork
 149                         T pin = t.in;
 150                         lt.in = pin;
 151                         f = t = null;
 152                         if (rt != null) {
 153                             T lout = lt.out;
 154                             rt.in = (l == org ? lout :
 155                                      fn.apply(pin, lout));
 156                             for (int c;;) {
 157                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 158                                     break;
 159                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 160                                     t = rt;
 161                                     break;
 162                                 }
 163                             }
 164                         }
 165                         for (int c;;) {
 166                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 167                                 break;
 168                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 169                                 if (t != null)
 170                                     f = t;
 171                                 t = lt;
 172                                 break;
 173                             }
 174                         }
 175                         if (t == null)
 176                             break;
 177                     }
 178                     if (f != null)
 179                         f.fork();
 180                 }
 181                 else {
 182                     int state; // Transition to sum, cumulate, or both
 183                     for (int b;;) {
 184                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 185                             break outer;                      // already done
 186                         state = ((b & CUMULATE) != 0? FINISHED :
 187                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 188                         if (t.compareAndSetPendingCount(b, b|state))
 189                             break;
 190                     }
 191 
 192                     T sum;
 193                     if (state != SUMMED) {
 194                         int first;
 195                         if (l == org) {                       // leftmost; no in
 196                             sum = a[org];
 197                             first = org + 1;
 198                         }
 199                         else {
 200                             sum = t.in;
 201                             first = l;
 202                         }
 203                         for (int i = first; i < h; ++i)       // cumulate
 204                             a[i] = sum = fn.apply(sum, a[i]);
 205                     }
 206                     else if (h < fnc) {                       // skip rightmost
 207                         sum = a[l];
 208                         for (int i = l + 1; i < h; ++i)       // sum only
 209                             sum = fn.apply(sum, a[i]);
 210                     }
 211                     else
 212                         sum = t.in;
 213                     t.out = sum;
 214                     for (CumulateTask<T> par;;) {             // propagate
 215                         @SuppressWarnings("unchecked") CumulateTask<T> partmp
 216                             = (CumulateTask<T>)t.getCompleter();
 217                         if ((par = partmp) == null) {
 218                             if ((state & FINISHED) != 0)      // enable join
 219                                 t.quietlyComplete();
 220                             break outer;
 221                         }
 222                         int b = par.getPendingCount();
 223                         if ((b & state & FINISHED) != 0)
 224                             t = par;                          // both done
 225                         else if ((b & state & SUMMED) != 0) { // both summed
 226                             int nextState; CumulateTask<T> lt, rt;
 227                             if ((lt = par.left) != null &&
 228                                 (rt = par.right) != null) {
 229                                 T lout = lt.out;
 230                                 par.out = (rt.hi == fnc ? lout :
 231                                            fn.apply(lout, rt.out));
 232                             }
 233                             int refork = (((b & CUMULATE) == 0 &&
 234                                            par.lo == org) ? CUMULATE : 0);
 235                             if ((nextState = b|state|refork) == b ||
 236                                 par.compareAndSetPendingCount(b, nextState)) {
 237                                 state = SUMMED;               // drop finished
 238                                 t = par;
 239                                 if (refork != 0)
 240                                     par.fork();
 241                             }
 242                         }
 243                         else if (par.compareAndSetPendingCount(b, b|state))
 244                             break outer;                      // sib not ready
 245                     }
 246                 }
 247             }
 248         }
 249         private static final long serialVersionUID = 5293554502939613543L;
 250     }
 251 
 252     static final class LongCumulateTask extends CountedCompleter<Void> {
 253         final long[] array;
 254         final LongBinaryOperator function;
 255         LongCumulateTask left, right;
 256         long in, out;
 257         final int lo, hi, origin, fence, threshold;
 258 
 259         /** Root task constructor */
 260         public LongCumulateTask(LongCumulateTask parent,
 261                                 LongBinaryOperator function,
 262                                 long[] array, int lo, int hi) {
 263             super(parent);
 264             this.function = function; this.array = array;
 265             this.lo = this.origin = lo; this.hi = this.fence = hi;
 266             int p;
 267             this.threshold =
 268                     (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 269                     <= MIN_PARTITION ? MIN_PARTITION : p;
 270         }
 271 
 272         /** Subtask constructor */
 273         LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function,
 274                          long[] array, int origin, int fence, int threshold,
 275                          int lo, int hi) {
 276             super(parent);
 277             this.function = function; this.array = array;
 278             this.origin = origin; this.fence = fence;
 279             this.threshold = threshold;
 280             this.lo = lo; this.hi = hi;
 281         }
 282 
 283         public final void compute() {
 284             final LongBinaryOperator fn;
 285             final long[] a;
 286             if ((fn = this.function) == null || (a = this.array) == null)
 287                 throw new NullPointerException();    // hoist checks
 288             int th = threshold, org = origin, fnc = fence, l, h;
 289             LongCumulateTask t = this;
 290             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 291                 if (h - l > th) {
 292                     LongCumulateTask lt = t.left, rt = t.right, f;
 293                     if (lt == null) {                // first pass
 294                         int mid = (l + h) >>> 1;
 295                         f = rt = t.right =
 296                                 new LongCumulateTask(t, fn, a, org, fnc, th, mid, h);
 297                         t = lt = t.left  =
 298                                 new LongCumulateTask(t, fn, a, org, fnc, th, l, mid);
 299                     }
 300                     else {                           // possibly refork
 301                         long pin = t.in;
 302                         lt.in = pin;
 303                         f = t = null;
 304                         if (rt != null) {
 305                             long lout = lt.out;
 306                             rt.in = (l == org ? lout :
 307                                      fn.applyAsLong(pin, lout));
 308                             for (int c;;) {
 309                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 310                                     break;
 311                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 312                                     t = rt;
 313                                     break;
 314                                 }
 315                             }
 316                         }
 317                         for (int c;;) {
 318                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 319                                 break;
 320                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 321                                 if (t != null)
 322                                     f = t;
 323                                 t = lt;
 324                                 break;
 325                             }
 326                         }
 327                         if (t == null)
 328                             break;
 329                     }
 330                     if (f != null)
 331                         f.fork();
 332                 }
 333                 else {
 334                     int state; // Transition to sum, cumulate, or both
 335                     for (int b;;) {
 336                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 337                             break outer;                      // already done
 338                         state = ((b & CUMULATE) != 0? FINISHED :
 339                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 340                         if (t.compareAndSetPendingCount(b, b|state))
 341                             break;
 342                     }
 343 
 344                     long sum;
 345                     if (state != SUMMED) {
 346                         int first;
 347                         if (l == org) {                       // leftmost; no in
 348                             sum = a[org];
 349                             first = org + 1;
 350                         }
 351                         else {
 352                             sum = t.in;
 353                             first = l;
 354                         }
 355                         for (int i = first; i < h; ++i)       // cumulate
 356                             a[i] = sum = fn.applyAsLong(sum, a[i]);
 357                     }
 358                     else if (h < fnc) {                       // skip rightmost
 359                         sum = a[l];
 360                         for (int i = l + 1; i < h; ++i)       // sum only
 361                             sum = fn.applyAsLong(sum, a[i]);
 362                     }
 363                     else
 364                         sum = t.in;
 365                     t.out = sum;
 366                     for (LongCumulateTask par;;) {            // propagate
 367                         if ((par = (LongCumulateTask)t.getCompleter()) == null) {
 368                             if ((state & FINISHED) != 0)      // enable join
 369                                 t.quietlyComplete();
 370                             break outer;
 371                         }
 372                         int b = par.getPendingCount();
 373                         if ((b & state & FINISHED) != 0)
 374                             t = par;                          // both done
 375                         else if ((b & state & SUMMED) != 0) { // both summed
 376                             int nextState; LongCumulateTask lt, rt;
 377                             if ((lt = par.left) != null &&
 378                                 (rt = par.right) != null) {
 379                                 long lout = lt.out;
 380                                 par.out = (rt.hi == fnc ? lout :
 381                                            fn.applyAsLong(lout, rt.out));
 382                             }
 383                             int refork = (((b & CUMULATE) == 0 &&
 384                                            par.lo == org) ? CUMULATE : 0);
 385                             if ((nextState = b|state|refork) == b ||
 386                                 par.compareAndSetPendingCount(b, nextState)) {
 387                                 state = SUMMED;               // drop finished
 388                                 t = par;
 389                                 if (refork != 0)
 390                                     par.fork();
 391                             }
 392                         }
 393                         else if (par.compareAndSetPendingCount(b, b|state))
 394                             break outer;                      // sib not ready
 395                     }
 396                 }
 397             }
 398         }
 399         private static final long serialVersionUID = -5074099945909284273L;
 400     }
 401 
 402     static final class DoubleCumulateTask extends CountedCompleter<Void> {
 403         final double[] array;
 404         final DoubleBinaryOperator function;
 405         DoubleCumulateTask left, right;
 406         double in, out;
 407         final int lo, hi, origin, fence, threshold;
 408 
 409         /** Root task constructor */
 410         public DoubleCumulateTask(DoubleCumulateTask parent,
 411                                   DoubleBinaryOperator function,
 412                                   double[] array, int lo, int hi) {
 413             super(parent);
 414             this.function = function; this.array = array;
 415             this.lo = this.origin = lo; this.hi = this.fence = hi;
 416             int p;
 417             this.threshold =
 418                     (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 419                     <= MIN_PARTITION ? MIN_PARTITION : p;
 420         }
 421 
 422         /** Subtask constructor */
 423         DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function,
 424                            double[] array, int origin, int fence, int threshold,
 425                            int lo, int hi) {
 426             super(parent);
 427             this.function = function; this.array = array;
 428             this.origin = origin; this.fence = fence;
 429             this.threshold = threshold;
 430             this.lo = lo; this.hi = hi;
 431         }
 432 
 433         public final void compute() {
 434             final DoubleBinaryOperator fn;
 435             final double[] a;
 436             if ((fn = this.function) == null || (a = this.array) == null)
 437                 throw new NullPointerException();    // hoist checks
 438             int th = threshold, org = origin, fnc = fence, l, h;
 439             DoubleCumulateTask t = this;
 440             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 441                 if (h - l > th) {
 442                     DoubleCumulateTask lt = t.left, rt = t.right, f;
 443                     if (lt == null) {                // first pass
 444                         int mid = (l + h) >>> 1;
 445                         f = rt = t.right =
 446                                 new DoubleCumulateTask(t, fn, a, org, fnc, th, mid, h);
 447                         t = lt = t.left  =
 448                                 new DoubleCumulateTask(t, fn, a, org, fnc, th, l, mid);
 449                     }
 450                     else {                           // possibly refork
 451                         double pin = t.in;
 452                         lt.in = pin;
 453                         f = t = null;
 454                         if (rt != null) {
 455                             double lout = lt.out;
 456                             rt.in = (l == org ? lout :
 457                                      fn.applyAsDouble(pin, lout));
 458                             for (int c;;) {
 459                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 460                                     break;
 461                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 462                                     t = rt;
 463                                     break;
 464                                 }
 465                             }
 466                         }
 467                         for (int c;;) {
 468                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 469                                 break;
 470                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 471                                 if (t != null)
 472                                     f = t;
 473                                 t = lt;
 474                                 break;
 475                             }
 476                         }
 477                         if (t == null)
 478                             break;
 479                     }
 480                     if (f != null)
 481                         f.fork();
 482                 }
 483                 else {
 484                     int state; // Transition to sum, cumulate, or both
 485                     for (int b;;) {
 486                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 487                             break outer;                      // already done
 488                         state = ((b & CUMULATE) != 0? FINISHED :
 489                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 490                         if (t.compareAndSetPendingCount(b, b|state))
 491                             break;
 492                     }
 493 
 494                     double sum;
 495                     if (state != SUMMED) {
 496                         int first;
 497                         if (l == org) {                       // leftmost; no in
 498                             sum = a[org];
 499                             first = org + 1;
 500                         }
 501                         else {
 502                             sum = t.in;
 503                             first = l;
 504                         }
 505                         for (int i = first; i < h; ++i)       // cumulate
 506                             a[i] = sum = fn.applyAsDouble(sum, a[i]);
 507                     }
 508                     else if (h < fnc) {                       // skip rightmost
 509                         sum = a[l];
 510                         for (int i = l + 1; i < h; ++i)       // sum only
 511                             sum = fn.applyAsDouble(sum, a[i]);
 512                     }
 513                     else
 514                         sum = t.in;
 515                     t.out = sum;
 516                     for (DoubleCumulateTask par;;) {            // propagate
 517                         if ((par = (DoubleCumulateTask)t.getCompleter()) == null) {
 518                             if ((state & FINISHED) != 0)      // enable join
 519                                 t.quietlyComplete();
 520                             break outer;
 521                         }
 522                         int b = par.getPendingCount();
 523                         if ((b & state & FINISHED) != 0)
 524                             t = par;                          // both done
 525                         else if ((b & state & SUMMED) != 0) { // both summed
 526                             int nextState; DoubleCumulateTask lt, rt;
 527                             if ((lt = par.left) != null &&
 528                                 (rt = par.right) != null) {
 529                                 double lout = lt.out;
 530                                 par.out = (rt.hi == fnc ? lout :
 531                                            fn.applyAsDouble(lout, rt.out));
 532                             }
 533                             int refork = (((b & CUMULATE) == 0 &&
 534                                            par.lo == org) ? CUMULATE : 0);
 535                             if ((nextState = b|state|refork) == b ||
 536                                 par.compareAndSetPendingCount(b, nextState)) {
 537                                 state = SUMMED;               // drop finished
 538                                 t = par;
 539                                 if (refork != 0)
 540                                     par.fork();
 541                             }
 542                         }
 543                         else if (par.compareAndSetPendingCount(b, b|state))
 544                             break outer;                      // sib not ready
 545                     }
 546                 }
 547             }
 548         }
 549         private static final long serialVersionUID = -586947823794232033L;
 550     }
 551 
 552     static final class IntCumulateTask extends CountedCompleter<Void> {
 553         final int[] array;
 554         final IntBinaryOperator function;
 555         IntCumulateTask left, right;
 556         int in, out;
 557         final int lo, hi, origin, fence, threshold;
 558 
 559         /** Root task constructor */
 560         public IntCumulateTask(IntCumulateTask parent,
 561                                IntBinaryOperator function,
 562                                int[] array, int lo, int hi) {
 563             super(parent);
 564             this.function = function; this.array = array;
 565             this.lo = this.origin = lo; this.hi = this.fence = hi;
 566             int p;
 567             this.threshold =
 568                     (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 569                     <= MIN_PARTITION ? MIN_PARTITION : p;
 570         }
 571 
 572         /** Subtask constructor */
 573         IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function,
 574                         int[] array, int origin, int fence, int threshold,
 575                         int lo, int hi) {
 576             super(parent);
 577             this.function = function; this.array = array;
 578             this.origin = origin; this.fence = fence;
 579             this.threshold = threshold;
 580             this.lo = lo; this.hi = hi;
 581         }
 582 
 583         public final void compute() {
 584             final IntBinaryOperator fn;
 585             final int[] a;
 586             if ((fn = this.function) == null || (a = this.array) == null)
 587                 throw new NullPointerException();    // hoist checks
 588             int th = threshold, org = origin, fnc = fence, l, h;
 589             IntCumulateTask t = this;
 590             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 591                 if (h - l > th) {
 592                     IntCumulateTask lt = t.left, rt = t.right, f;
 593                     if (lt == null) {                // first pass
 594                         int mid = (l + h) >>> 1;
 595                         f = rt = t.right =
 596                                 new IntCumulateTask(t, fn, a, org, fnc, th, mid, h);
 597                         t = lt = t.left  =
 598                                 new IntCumulateTask(t, fn, a, org, fnc, th, l, mid);
 599                     }
 600                     else {                           // possibly refork
 601                         int pin = t.in;
 602                         lt.in = pin;
 603                         f = t = null;
 604                         if (rt != null) {
 605                             int lout = lt.out;
 606                             rt.in = (l == org ? lout :
 607                                      fn.applyAsInt(pin, lout));
 608                             for (int c;;) {
 609                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 610                                     break;
 611                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 612                                     t = rt;
 613                                     break;
 614                                 }
 615                             }
 616                         }
 617                         for (int c;;) {
 618                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 619                                 break;
 620                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 621                                 if (t != null)
 622                                     f = t;
 623                                 t = lt;
 624                                 break;
 625                             }
 626                         }
 627                         if (t == null)
 628                             break;
 629                     }
 630                     if (f != null)
 631                         f.fork();
 632                 }
 633                 else {
 634                     int state; // Transition to sum, cumulate, or both
 635                     for (int b;;) {
 636                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 637                             break outer;                      // already done
 638                         state = ((b & CUMULATE) != 0? FINISHED :
 639                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 640                         if (t.compareAndSetPendingCount(b, b|state))
 641                             break;
 642                     }
 643 
 644                     int sum;
 645                     if (state != SUMMED) {
 646                         int first;
 647                         if (l == org) {                       // leftmost; no in
 648                             sum = a[org];
 649                             first = org + 1;
 650                         }
 651                         else {
 652                             sum = t.in;
 653                             first = l;
 654                         }
 655                         for (int i = first; i < h; ++i)       // cumulate
 656                             a[i] = sum = fn.applyAsInt(sum, a[i]);
 657                     }
 658                     else if (h < fnc) {                       // skip rightmost
 659                         sum = a[l];
 660                         for (int i = l + 1; i < h; ++i)       // sum only
 661                             sum = fn.applyAsInt(sum, a[i]);
 662                     }
 663                     else
 664                         sum = t.in;
 665                     t.out = sum;
 666                     for (IntCumulateTask par;;) {            // propagate
 667                         if ((par = (IntCumulateTask)t.getCompleter()) == null) {
 668                             if ((state & FINISHED) != 0)      // enable join
 669                                 t.quietlyComplete();
 670                             break outer;
 671                         }
 672                         int b = par.getPendingCount();
 673                         if ((b & state & FINISHED) != 0)
 674                             t = par;                          // both done
 675                         else if ((b & state & SUMMED) != 0) { // both summed
 676                             int nextState; IntCumulateTask lt, rt;
 677                             if ((lt = par.left) != null &&
 678                                 (rt = par.right) != null) {
 679                                 int lout = lt.out;
 680                                 par.out = (rt.hi == fnc ? lout :
 681                                            fn.applyAsInt(lout, rt.out));
 682                             }
 683                             int refork = (((b & CUMULATE) == 0 &&
 684                                            par.lo == org) ? CUMULATE : 0);
 685                             if ((nextState = b|state|refork) == b ||
 686                                 par.compareAndSetPendingCount(b, nextState)) {
 687                                 state = SUMMED;               // drop finished
 688                                 t = par;
 689                                 if (refork != 0)
 690                                     par.fork();
 691                             }
 692                         }
 693                         else if (par.compareAndSetPendingCount(b, b|state))
 694                             break outer;                      // sib not ready
 695                     }
 696                 }
 697             }
 698         }
 699         private static final long serialVersionUID = 3731755594596840961L;
 700     }
 701 }