1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 /*
  26  * This file is available under and governed by the GNU General Public
  27  * License version 2 only, as published by the Free Software Foundation.
  28  * However, the following notice accompanied the original version of this
  29  * file:
  30  *
  31  * Written by Doug Lea with assistance from members of JCP JSR-166
  32  * Expert Group and released to the public domain, as explained at
  33  * http://creativecommons.org/publicdomain/zero/1.0/
  34  */
  35 
  36 package java.util;
  37 
  38 import java.util.concurrent.CountedCompleter;
  39 import java.util.concurrent.ForkJoinPool;
  40 import java.util.function.BinaryOperator;
  41 import java.util.function.DoubleBinaryOperator;
  42 import java.util.function.IntBinaryOperator;
  43 import java.util.function.LongBinaryOperator;
  44 
  45 /**
  46  * ForkJoin tasks to perform Arrays.parallelPrefix operations.
  47  *
  48  * @author Doug Lea
  49  * @since 1.8
  50  */
  51 class ArrayPrefixHelpers {
  52     private ArrayPrefixHelpers() {} // non-instantiable
  53 
  54     /*
  55      * Parallel prefix (aka cumulate, scan) task classes
  56      * are based loosely on Guy Blelloch's original
  57      * algorithm (http://www.cs.cmu.edu/~scandal/alg/scan.html):
  58      *  Keep dividing by two to threshold segment size, and then:
  59      *   Pass 1: Create tree of partial sums for each segment
  60      *   Pass 2: For each segment, cumulate with offset of left sibling
  61      *
  62      * This version improves performance within FJ framework mainly by
  63      * allowing the second pass of ready left-hand sides to proceed
  64      * even if some right-hand side first passes are still executing.
  65      * It also combines first and second pass for leftmost segment,
  66      * and skips the first pass for rightmost segment (whose result is
  67      * not needed for second pass).  It similarly manages to avoid
  68      * requiring that users supply an identity basis for accumulations
  69      * by tracking those segments/subtasks for which the first
  70      * existing element is used as base.
  71      *
  72      * Managing this relies on ORing some bits in the pendingCount for
  73      * phases/states: CUMULATE, SUMMED, and FINISHED. CUMULATE is the
  74      * main phase bit. When false, segments compute only their sum.
  75      * When true, they cumulate array elements. CUMULATE is set at
  76      * root at beginning of second pass and then propagated down. But
  77      * it may also be set earlier for subtrees with lo==0 (the left
  78      * spine of tree). SUMMED is a one bit join count. For leafs, it
  79      * is set when summed. For internal nodes, it becomes true when
  80      * one child is summed.  When the second child finishes summing,
  81      * we then moves up tree to trigger the cumulate phase. FINISHED
  82      * is also a one bit join count. For leafs, it is set when
  83      * cumulated. For internal nodes, it becomes true when one child
  84      * is cumulated.  When the second child finishes cumulating, it
  85      * then moves up tree, completing at the root.
  86      *
  87      * To better exploit locality and reduce overhead, the compute
  88      * method loops starting with the current task, moving if possible
  89      * to one of its subtasks rather than forking.
  90      *
  91      * As usual for this sort of utility, there are 4 versions, that
  92      * are simple copy/paste/adapt variants of each other.  (The
  93      * double and int versions differ from long version solely by
  94      * replacing "long" (with case-matching)).
  95      */
  96 
  97     // see above
  98     static final int CUMULATE = 1;
  99     static final int SUMMED   = 2;
 100     static final int FINISHED = 4;
 101 
 102     /** The smallest subtask array partition size to use as threshold */
 103     static final int MIN_PARTITION = 16;
 104 
 105     static final class CumulateTask<T> extends CountedCompleter<Void> {
 106         @SuppressWarnings("serial") // Not statically typed as Serializable
 107         final T[] array;
 108         @SuppressWarnings("serial") // Not statically typed as Serializable
 109         final BinaryOperator<T> function;
 110         CumulateTask<T> left, right;
 111         @SuppressWarnings("serial") // Not statically typed as Serializable
 112         T in;
 113         @SuppressWarnings("serial") // Not statically typed as Serializable
 114         T out;
 115         final int lo, hi, origin, fence, threshold;
 116 
 117         /** Root task constructor */
 118         public CumulateTask(CumulateTask<T> parent,
 119                             BinaryOperator<T> function,
 120                             T[] array, int lo, int hi) {
 121             super(parent);
 122             this.function = function; this.array = array;
 123             this.lo = this.origin = lo; this.hi = this.fence = hi;
 124             int p;
 125             this.threshold =
 126                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 127                 <= MIN_PARTITION ? MIN_PARTITION : p;
 128         }
 129 
 130         /** Subtask constructor */
 131         CumulateTask(CumulateTask<T> parent, BinaryOperator<T> function,
 132                      T[] array, int origin, int fence, int threshold,
 133                      int lo, int hi) {
 134             super(parent);
 135             this.function = function; this.array = array;
 136             this.origin = origin; this.fence = fence;
 137             this.threshold = threshold;
 138             this.lo = lo; this.hi = hi;
 139         }
 140 
 141         public final void compute() {
 142             final BinaryOperator<T> fn;
 143             final T[] a;
 144             if ((fn = this.function) == null || (a = this.array) == null)
 145                 throw new NullPointerException();    // hoist checks
 146             int th = threshold, org = origin, fnc = fence, l, h;
 147             CumulateTask<T> t = this;
 148             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 149                 if (h - l > th) {
 150                     CumulateTask<T> lt = t.left, rt = t.right, f;
 151                     if (lt == null) {                // first pass
 152                         int mid = (l + h) >>> 1;
 153                         f = rt = t.right =
 154                             new CumulateTask<T>(t, fn, a, org, fnc, th, mid, h);
 155                         t = lt = t.left =
 156                             new CumulateTask<T>(t, fn, a, org, fnc, th, l, mid);
 157                     }
 158                     else {                           // possibly refork
 159                         T pin = t.in;
 160                         lt.in = pin;
 161                         f = t = null;
 162                         if (rt != null) {
 163                             T lout = lt.out;
 164                             rt.in = (l == org ? lout :
 165                                      fn.apply(pin, lout));
 166                             for (int c;;) {
 167                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 168                                     break;
 169                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 170                                     t = rt;
 171                                     break;
 172                                 }
 173                             }
 174                         }
 175                         for (int c;;) {
 176                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 177                                 break;
 178                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 179                                 if (t != null)
 180                                     f = t;
 181                                 t = lt;
 182                                 break;
 183                             }
 184                         }
 185                         if (t == null)
 186                             break;
 187                     }
 188                     if (f != null)
 189                         f.fork();
 190                 }
 191                 else {
 192                     int state; // Transition to sum, cumulate, or both
 193                     for (int b;;) {
 194                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 195                             break outer;                      // already done
 196                         state = ((b & CUMULATE) != 0 ? FINISHED :
 197                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 198                         if (t.compareAndSetPendingCount(b, b|state))
 199                             break;
 200                     }
 201 
 202                     T sum;
 203                     if (state != SUMMED) {
 204                         int first;
 205                         if (l == org) {                       // leftmost; no in
 206                             sum = a[org];
 207                             first = org + 1;
 208                         }
 209                         else {
 210                             sum = t.in;
 211                             first = l;
 212                         }
 213                         for (int i = first; i < h; ++i)       // cumulate
 214                             a[i] = sum = fn.apply(sum, a[i]);
 215                     }
 216                     else if (h < fnc) {                       // skip rightmost
 217                         sum = a[l];
 218                         for (int i = l + 1; i < h; ++i)       // sum only
 219                             sum = fn.apply(sum, a[i]);
 220                     }
 221                     else
 222                         sum = t.in;
 223                     t.out = sum;
 224                     for (CumulateTask<T> par;;) {             // propagate
 225                         @SuppressWarnings("unchecked") CumulateTask<T> partmp
 226                             = (CumulateTask<T>)t.getCompleter();
 227                         if ((par = partmp) == null) {
 228                             if ((state & FINISHED) != 0)      // enable join
 229                                 t.quietlyComplete();
 230                             break outer;
 231                         }
 232                         int b = par.getPendingCount();
 233                         if ((b & state & FINISHED) != 0)
 234                             t = par;                          // both done
 235                         else if ((b & state & SUMMED) != 0) { // both summed
 236                             int nextState; CumulateTask<T> lt, rt;
 237                             if ((lt = par.left) != null &&
 238                                 (rt = par.right) != null) {
 239                                 T lout = lt.out;
 240                                 par.out = (rt.hi == fnc ? lout :
 241                                            fn.apply(lout, rt.out));
 242                             }
 243                             int refork = (((b & CUMULATE) == 0 &&
 244                                            par.lo == org) ? CUMULATE : 0);
 245                             if ((nextState = b|state|refork) == b ||
 246                                 par.compareAndSetPendingCount(b, nextState)) {
 247                                 state = SUMMED;               // drop finished
 248                                 t = par;
 249                                 if (refork != 0)
 250                                     par.fork();
 251                             }
 252                         }
 253                         else if (par.compareAndSetPendingCount(b, b|state))
 254                             break outer;                      // sib not ready
 255                     }
 256                 }
 257             }
 258         }
 259         @java.io.Serial
 260         private static final long serialVersionUID = 5293554502939613543L;
 261     }
 262 
 263     static final class LongCumulateTask extends CountedCompleter<Void> {
 264         final long[] array;
 265         @SuppressWarnings("serial") // Not statically typed as Serializable
 266         final LongBinaryOperator function;
 267         LongCumulateTask left, right;
 268         long in, out;
 269         final int lo, hi, origin, fence, threshold;
 270 
 271         /** Root task constructor */
 272         public LongCumulateTask(LongCumulateTask parent,
 273                                 LongBinaryOperator function,
 274                                 long[] array, int lo, int hi) {
 275             super(parent);
 276             this.function = function; this.array = array;
 277             this.lo = this.origin = lo; this.hi = this.fence = hi;
 278             int p;
 279             this.threshold =
 280                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 281                 <= MIN_PARTITION ? MIN_PARTITION : p;
 282         }
 283 
 284         /** Subtask constructor */
 285         LongCumulateTask(LongCumulateTask parent, LongBinaryOperator function,
 286                          long[] array, int origin, int fence, int threshold,
 287                          int lo, int hi) {
 288             super(parent);
 289             this.function = function; this.array = array;
 290             this.origin = origin; this.fence = fence;
 291             this.threshold = threshold;
 292             this.lo = lo; this.hi = hi;
 293         }
 294 
 295         public final void compute() {
 296             final LongBinaryOperator fn;
 297             final long[] a;
 298             if ((fn = this.function) == null || (a = this.array) == null)
 299                 throw new NullPointerException();    // hoist checks
 300             int th = threshold, org = origin, fnc = fence, l, h;
 301             LongCumulateTask t = this;
 302             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 303                 if (h - l > th) {
 304                     LongCumulateTask lt = t.left, rt = t.right, f;
 305                     if (lt == null) {                // first pass
 306                         int mid = (l + h) >>> 1;
 307                         f = rt = t.right =
 308                             new LongCumulateTask(t, fn, a, org, fnc, th, mid, h);
 309                         t = lt = t.left =
 310                             new LongCumulateTask(t, fn, a, org, fnc, th, l, mid);
 311                     }
 312                     else {                           // possibly refork
 313                         long pin = t.in;
 314                         lt.in = pin;
 315                         f = t = null;
 316                         if (rt != null) {
 317                             long lout = lt.out;
 318                             rt.in = (l == org ? lout :
 319                                      fn.applyAsLong(pin, lout));
 320                             for (int c;;) {
 321                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 322                                     break;
 323                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 324                                     t = rt;
 325                                     break;
 326                                 }
 327                             }
 328                         }
 329                         for (int c;;) {
 330                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 331                                 break;
 332                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 333                                 if (t != null)
 334                                     f = t;
 335                                 t = lt;
 336                                 break;
 337                             }
 338                         }
 339                         if (t == null)
 340                             break;
 341                     }
 342                     if (f != null)
 343                         f.fork();
 344                 }
 345                 else {
 346                     int state; // Transition to sum, cumulate, or both
 347                     for (int b;;) {
 348                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 349                             break outer;                      // already done
 350                         state = ((b & CUMULATE) != 0 ? FINISHED :
 351                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 352                         if (t.compareAndSetPendingCount(b, b|state))
 353                             break;
 354                     }
 355 
 356                     long sum;
 357                     if (state != SUMMED) {
 358                         int first;
 359                         if (l == org) {                       // leftmost; no in
 360                             sum = a[org];
 361                             first = org + 1;
 362                         }
 363                         else {
 364                             sum = t.in;
 365                             first = l;
 366                         }
 367                         for (int i = first; i < h; ++i)       // cumulate
 368                             a[i] = sum = fn.applyAsLong(sum, a[i]);
 369                     }
 370                     else if (h < fnc) {                       // skip rightmost
 371                         sum = a[l];
 372                         for (int i = l + 1; i < h; ++i)       // sum only
 373                             sum = fn.applyAsLong(sum, a[i]);
 374                     }
 375                     else
 376                         sum = t.in;
 377                     t.out = sum;
 378                     for (LongCumulateTask par;;) {            // propagate
 379                         if ((par = (LongCumulateTask)t.getCompleter()) == null) {
 380                             if ((state & FINISHED) != 0)      // enable join
 381                                 t.quietlyComplete();
 382                             break outer;
 383                         }
 384                         int b = par.getPendingCount();
 385                         if ((b & state & FINISHED) != 0)
 386                             t = par;                          // both done
 387                         else if ((b & state & SUMMED) != 0) { // both summed
 388                             int nextState; LongCumulateTask lt, rt;
 389                             if ((lt = par.left) != null &&
 390                                 (rt = par.right) != null) {
 391                                 long lout = lt.out;
 392                                 par.out = (rt.hi == fnc ? lout :
 393                                            fn.applyAsLong(lout, rt.out));
 394                             }
 395                             int refork = (((b & CUMULATE) == 0 &&
 396                                            par.lo == org) ? CUMULATE : 0);
 397                             if ((nextState = b|state|refork) == b ||
 398                                 par.compareAndSetPendingCount(b, nextState)) {
 399                                 state = SUMMED;               // drop finished
 400                                 t = par;
 401                                 if (refork != 0)
 402                                     par.fork();
 403                             }
 404                         }
 405                         else if (par.compareAndSetPendingCount(b, b|state))
 406                             break outer;                      // sib not ready
 407                     }
 408                 }
 409             }
 410         }
 411         @java.io.Serial
 412         private static final long serialVersionUID = -5074099945909284273L;
 413     }
 414 
 415     static final class DoubleCumulateTask extends CountedCompleter<Void> {
 416         final double[] array;
 417         @SuppressWarnings("serial") // Not statically typed as Serializable
 418         final DoubleBinaryOperator function;
 419         DoubleCumulateTask left, right;
 420         double in, out;
 421         final int lo, hi, origin, fence, threshold;
 422 
 423         /** Root task constructor */
 424         public DoubleCumulateTask(DoubleCumulateTask parent,
 425                                   DoubleBinaryOperator function,
 426                                   double[] array, int lo, int hi) {
 427             super(parent);
 428             this.function = function; this.array = array;
 429             this.lo = this.origin = lo; this.hi = this.fence = hi;
 430             int p;
 431             this.threshold =
 432                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 433                 <= MIN_PARTITION ? MIN_PARTITION : p;
 434         }
 435 
 436         /** Subtask constructor */
 437         DoubleCumulateTask(DoubleCumulateTask parent, DoubleBinaryOperator function,
 438                            double[] array, int origin, int fence, int threshold,
 439                            int lo, int hi) {
 440             super(parent);
 441             this.function = function; this.array = array;
 442             this.origin = origin; this.fence = fence;
 443             this.threshold = threshold;
 444             this.lo = lo; this.hi = hi;
 445         }
 446 
 447         public final void compute() {
 448             final DoubleBinaryOperator fn;
 449             final double[] a;
 450             if ((fn = this.function) == null || (a = this.array) == null)
 451                 throw new NullPointerException();    // hoist checks
 452             int th = threshold, org = origin, fnc = fence, l, h;
 453             DoubleCumulateTask t = this;
 454             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 455                 if (h - l > th) {
 456                     DoubleCumulateTask lt = t.left, rt = t.right, f;
 457                     if (lt == null) {                // first pass
 458                         int mid = (l + h) >>> 1;
 459                         f = rt = t.right =
 460                             new DoubleCumulateTask(t, fn, a, org, fnc, th, mid, h);
 461                         t = lt = t.left =
 462                             new DoubleCumulateTask(t, fn, a, org, fnc, th, l, mid);
 463                     }
 464                     else {                           // possibly refork
 465                         double pin = t.in;
 466                         lt.in = pin;
 467                         f = t = null;
 468                         if (rt != null) {
 469                             double lout = lt.out;
 470                             rt.in = (l == org ? lout :
 471                                      fn.applyAsDouble(pin, lout));
 472                             for (int c;;) {
 473                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 474                                     break;
 475                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 476                                     t = rt;
 477                                     break;
 478                                 }
 479                             }
 480                         }
 481                         for (int c;;) {
 482                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 483                                 break;
 484                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 485                                 if (t != null)
 486                                     f = t;
 487                                 t = lt;
 488                                 break;
 489                             }
 490                         }
 491                         if (t == null)
 492                             break;
 493                     }
 494                     if (f != null)
 495                         f.fork();
 496                 }
 497                 else {
 498                     int state; // Transition to sum, cumulate, or both
 499                     for (int b;;) {
 500                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 501                             break outer;                      // already done
 502                         state = ((b & CUMULATE) != 0 ? FINISHED :
 503                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 504                         if (t.compareAndSetPendingCount(b, b|state))
 505                             break;
 506                     }
 507 
 508                     double sum;
 509                     if (state != SUMMED) {
 510                         int first;
 511                         if (l == org) {                       // leftmost; no in
 512                             sum = a[org];
 513                             first = org + 1;
 514                         }
 515                         else {
 516                             sum = t.in;
 517                             first = l;
 518                         }
 519                         for (int i = first; i < h; ++i)       // cumulate
 520                             a[i] = sum = fn.applyAsDouble(sum, a[i]);
 521                     }
 522                     else if (h < fnc) {                       // skip rightmost
 523                         sum = a[l];
 524                         for (int i = l + 1; i < h; ++i)       // sum only
 525                             sum = fn.applyAsDouble(sum, a[i]);
 526                     }
 527                     else
 528                         sum = t.in;
 529                     t.out = sum;
 530                     for (DoubleCumulateTask par;;) {            // propagate
 531                         if ((par = (DoubleCumulateTask)t.getCompleter()) == null) {
 532                             if ((state & FINISHED) != 0)      // enable join
 533                                 t.quietlyComplete();
 534                             break outer;
 535                         }
 536                         int b = par.getPendingCount();
 537                         if ((b & state & FINISHED) != 0)
 538                             t = par;                          // both done
 539                         else if ((b & state & SUMMED) != 0) { // both summed
 540                             int nextState; DoubleCumulateTask lt, rt;
 541                             if ((lt = par.left) != null &&
 542                                 (rt = par.right) != null) {
 543                                 double lout = lt.out;
 544                                 par.out = (rt.hi == fnc ? lout :
 545                                            fn.applyAsDouble(lout, rt.out));
 546                             }
 547                             int refork = (((b & CUMULATE) == 0 &&
 548                                            par.lo == org) ? CUMULATE : 0);
 549                             if ((nextState = b|state|refork) == b ||
 550                                 par.compareAndSetPendingCount(b, nextState)) {
 551                                 state = SUMMED;               // drop finished
 552                                 t = par;
 553                                 if (refork != 0)
 554                                     par.fork();
 555                             }
 556                         }
 557                         else if (par.compareAndSetPendingCount(b, b|state))
 558                             break outer;                      // sib not ready
 559                     }
 560                 }
 561             }
 562         }
 563         @java.io.Serial
 564         private static final long serialVersionUID = -586947823794232033L;
 565     }
 566 
 567     static final class IntCumulateTask extends CountedCompleter<Void> {
 568         final int[] array;
 569         @SuppressWarnings("serial") // Not statically typed as Serializable
 570         final IntBinaryOperator function;
 571         IntCumulateTask left, right;
 572         int in, out;
 573         final int lo, hi, origin, fence, threshold;
 574 
 575         /** Root task constructor */
 576         public IntCumulateTask(IntCumulateTask parent,
 577                                IntBinaryOperator function,
 578                                int[] array, int lo, int hi) {
 579             super(parent);
 580             this.function = function; this.array = array;
 581             this.lo = this.origin = lo; this.hi = this.fence = hi;
 582             int p;
 583             this.threshold =
 584                 (p = (hi - lo) / (ForkJoinPool.getCommonPoolParallelism() << 3))
 585                 <= MIN_PARTITION ? MIN_PARTITION : p;
 586         }
 587 
 588         /** Subtask constructor */
 589         IntCumulateTask(IntCumulateTask parent, IntBinaryOperator function,
 590                         int[] array, int origin, int fence, int threshold,
 591                         int lo, int hi) {
 592             super(parent);
 593             this.function = function; this.array = array;
 594             this.origin = origin; this.fence = fence;
 595             this.threshold = threshold;
 596             this.lo = lo; this.hi = hi;
 597         }
 598 
 599         public final void compute() {
 600             final IntBinaryOperator fn;
 601             final int[] a;
 602             if ((fn = this.function) == null || (a = this.array) == null)
 603                 throw new NullPointerException();    // hoist checks
 604             int th = threshold, org = origin, fnc = fence, l, h;
 605             IntCumulateTask t = this;
 606             outer: while ((l = t.lo) >= 0 && (h = t.hi) <= a.length) {
 607                 if (h - l > th) {
 608                     IntCumulateTask lt = t.left, rt = t.right, f;
 609                     if (lt == null) {                // first pass
 610                         int mid = (l + h) >>> 1;
 611                         f = rt = t.right =
 612                             new IntCumulateTask(t, fn, a, org, fnc, th, mid, h);
 613                         t = lt = t.left =
 614                             new IntCumulateTask(t, fn, a, org, fnc, th, l, mid);
 615                     }
 616                     else {                           // possibly refork
 617                         int pin = t.in;
 618                         lt.in = pin;
 619                         f = t = null;
 620                         if (rt != null) {
 621                             int lout = lt.out;
 622                             rt.in = (l == org ? lout :
 623                                      fn.applyAsInt(pin, lout));
 624                             for (int c;;) {
 625                                 if (((c = rt.getPendingCount()) & CUMULATE) != 0)
 626                                     break;
 627                                 if (rt.compareAndSetPendingCount(c, c|CUMULATE)){
 628                                     t = rt;
 629                                     break;
 630                                 }
 631                             }
 632                         }
 633                         for (int c;;) {
 634                             if (((c = lt.getPendingCount()) & CUMULATE) != 0)
 635                                 break;
 636                             if (lt.compareAndSetPendingCount(c, c|CUMULATE)) {
 637                                 if (t != null)
 638                                     f = t;
 639                                 t = lt;
 640                                 break;
 641                             }
 642                         }
 643                         if (t == null)
 644                             break;
 645                     }
 646                     if (f != null)
 647                         f.fork();
 648                 }
 649                 else {
 650                     int state; // Transition to sum, cumulate, or both
 651                     for (int b;;) {
 652                         if (((b = t.getPendingCount()) & FINISHED) != 0)
 653                             break outer;                      // already done
 654                         state = ((b & CUMULATE) != 0 ? FINISHED :
 655                                  (l > org) ? SUMMED : (SUMMED|FINISHED));
 656                         if (t.compareAndSetPendingCount(b, b|state))
 657                             break;
 658                     }
 659 
 660                     int sum;
 661                     if (state != SUMMED) {
 662                         int first;
 663                         if (l == org) {                       // leftmost; no in
 664                             sum = a[org];
 665                             first = org + 1;
 666                         }
 667                         else {
 668                             sum = t.in;
 669                             first = l;
 670                         }
 671                         for (int i = first; i < h; ++i)       // cumulate
 672                             a[i] = sum = fn.applyAsInt(sum, a[i]);
 673                     }
 674                     else if (h < fnc) {                       // skip rightmost
 675                         sum = a[l];
 676                         for (int i = l + 1; i < h; ++i)       // sum only
 677                             sum = fn.applyAsInt(sum, a[i]);
 678                     }
 679                     else
 680                         sum = t.in;
 681                     t.out = sum;
 682                     for (IntCumulateTask par;;) {            // propagate
 683                         if ((par = (IntCumulateTask)t.getCompleter()) == null) {
 684                             if ((state & FINISHED) != 0)      // enable join
 685                                 t.quietlyComplete();
 686                             break outer;
 687                         }
 688                         int b = par.getPendingCount();
 689                         if ((b & state & FINISHED) != 0)
 690                             t = par;                          // both done
 691                         else if ((b & state & SUMMED) != 0) { // both summed
 692                             int nextState; IntCumulateTask lt, rt;
 693                             if ((lt = par.left) != null &&
 694                                 (rt = par.right) != null) {
 695                                 int lout = lt.out;
 696                                 par.out = (rt.hi == fnc ? lout :
 697                                            fn.applyAsInt(lout, rt.out));
 698                             }
 699                             int refork = (((b & CUMULATE) == 0 &&
 700                                            par.lo == org) ? CUMULATE : 0);
 701                             if ((nextState = b|state|refork) == b ||
 702                                 par.compareAndSetPendingCount(b, nextState)) {
 703                                 state = SUMMED;               // drop finished
 704                                 t = par;
 705                                 if (refork != 0)
 706                                     par.fork();
 707                             }
 708                         }
 709                         else if (par.compareAndSetPendingCount(b, b|state))
 710                             break outer;                      // sib not ready
 711                     }
 712                 }
 713             }
 714         }
 715         @java.io.Serial
 716         private static final long serialVersionUID = 3731755594596840961L;
 717     }
 718 }