1 /*
   2  * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util;
  27 
  28 import java.util.concurrent.CountedCompleter;
  29 import java.util.concurrent.RecursiveTask;
  30 
  31 /**
  32  * This class implements powerful and fully optimized versions, both
  33  * sequential and parallel, of the Dual-Pivot Quicksort algorithm by
  34  * Vladimir Yaroslavskiy, Jon Bentley and Josh Bloch. This algorithm
  35  * offers O(n log(n)) performance on all data sets, and is typically
  36  * faster than traditional (one-pivot) Quicksort implementations.
  37  *
  38  * There are also additional algorithms, invoked from the Dual-Pivot
  39  * Quicksort, such as mixed insertion sort, merging of runs and heap
  40  * sort, counting sort and parallel merge sort.
  41  *
  42  * @author Vladimir Yaroslavskiy
  43  * @author Jon Bentley
  44  * @author Josh Bloch
  45  * @author Doug Lea
  46  *
  47  * @version 2018.08.18
  48  *
  49  * @since 1.7 * 14
  50  */
  51 final class DualPivotQuicksort {
  52 
  53     /**
  54      * Prevents instantiation.
  55      */
  56     private DualPivotQuicksort() {}
  57 
  58     /**
  59      * Max array size to use mixed insertion sort.
  60      */
  61     private static final int MAX_MIXED_INSERTION_SORT_SIZE = 114;
  62 
  63     /**
  64      * Max array size to use insertion sort.
  65      */
  66     private static final int MAX_INSERTION_SORT_SIZE = 41;
  67 
  68     /**
  69      * Min array size to perform sorting in parallel.
  70      */
  71     private static final int MIN_PARALLEL_SORT_SIZE = 4 << 10;
  72 
  73     /**
  74      * Min array size to try merging of runs.
  75      */
  76     private static final int MIN_TRY_MERGE_SIZE = 4 << 10;
  77 
  78     /**
  79      * Min size of the first run to continue with scanning.
  80      */
  81     private static final int MIN_FIRST_RUN_SIZE = 16;
  82 
  83     /**
  84      * Min factor for the first runs to continue scanning.
  85      */
  86     private static final int MIN_FIRST_RUNS_FACTOR = 7;
  87 
  88     /**
  89      * Max capacity of the index array for tracking runs.
  90      */
  91     private static final int MAX_RUN_CAPACITY = 5 << 10;
  92 
  93     /**
  94      * Min number of runs, required by parallel merging.
  95      */
  96     private static final int MIN_RUN_COUNT = 4;
  97 
  98     /**
  99      * Min array size to use parallel merging of parts.
 100      */
 101     private static final int MIN_PARALLEL_MERGE_PARTS_SIZE = 4 << 10;
 102 
 103     /**
 104      * Min size of a byte array to use counting sort.
 105      */
 106     private static final int MIN_BYTE_COUNTING_SORT_SIZE = 64;
 107 
 108     /**
 109      * Min size of a short or char array to use counting sort.
 110      */
 111     private static final int MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE = 1750;
 112 
 113     /**
 114      * Max double recursive partitioning depth before using heap sort.
 115      */
 116     private static final int MAX_RECURSION_DEPTH = 64 << 1;
 117 
 118     /**
 119      * Calculates the double depth of parallel merging.
 120      * Depth is negative, if tasks split before sorting.
 121      *
 122      * @param parallelism the parallelism level
 123      * @param size the target size
 124      * @return the depth of parallel merging
 125      */
 126     private static int getDepth(int parallelism, int size) {
 127         int depth = 0;
 128 
 129         while ((parallelism >>= 3) > 0 && (size >>= 2) > 0) {
 130             depth -= 2;
 131         }
 132         return depth;
 133     }
 134 
 135     /**
 136      * Sorts the specified range of the array using parallel merge
 137      * sort and/or Dual-Pivot Quicksort.
 138      *
 139      * To balance the faster splitting and parallelism of merge sort
 140      * with the faster element partitioning of Quicksort, ranges are
 141      * subdivided in tiers such that, if there is enough parallelism,
 142      * the four-way parallel merge is started, still ensuring enough
 143      * parallelism to process the partitions.
 144      *
 145      * @param a the array to be sorted
 146      * @param parallelism the parallelism level
 147      * @param low the index of the first element, inclusive, to be sorted
 148      * @param high the index of the last element, exclusive, to be sorted
 149      */
 150     static void sort(int[] a, int parallelism, int low, int high) {
 151         int size = high - low;
 152 
 153         if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) {
 154             int depth = getDepth(parallelism, size >> 12);
 155             int[] b = depth == 0 ? null : new int[size];
 156             new Sorter(null, a, b, low, size, low, depth).invoke();
 157         } else {
 158             sort(null, a, 0, low, high);
 159         }
 160     }
 161 
 162     /**
 163      * Sorts the specified array using the Dual-Pivot Quicksort and/or
 164      * other sorts in special-cases, possibly with parallel partitions.
 165      *
 166      * @param sorter parallel context
 167      * @param a the array to be sorted
 168      * @param bits the combination of recursion depth and bit flag, where
 169      *        the right bit "0" indicates that array is the leftmost part
 170      * @param low the index of the first element, inclusive, to be sorted
 171      * @param high the index of the last element, exclusive, to be sorted
 172      */
 173     static void sort(Sorter sorter, int[] a, int bits, int low, int high) {
 174         while (true) {
 175             int end = high - 1, size = high - low;
 176 
 177             /*
 178              * Run mixed insertion sort on small non-leftmost parts.
 179              */
 180             if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) {
 181                 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high);
 182                 return;
 183             }
 184 
 185             /*
 186              * Invoke insertion sort on small leftmost part.
 187              */
 188             if (size < MAX_INSERTION_SORT_SIZE) {
 189                 insertionSort(a, low, high);
 190                 return;
 191             }
 192 
 193             /*
 194              * Check if the whole array or large non-leftmost
 195              * parts are nearly sorted and then merge runs.
 196              */
 197             if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0)
 198                     && tryMergeRuns(sorter, a, low, size)) {
 199                 return;
 200             }
 201 
 202             /*
 203              * Switch to heap sort if execution
 204              * time is becoming quadratic.
 205              */
 206             if ((bits += 2) > MAX_RECURSION_DEPTH) {
 207                 heapSort(a, low, high);
 208                 return;
 209             }
 210 
 211             /*
 212              * Use an inexpensive approximation of the golden ratio
 213              * to select five sample elements and determine pivots.
 214              */
 215             int step = (size >> 3) * 3 + 3;
 216 
 217             /*
 218              * Five elements around (and including) the central element
 219              * will be used for pivot selection as described below. The
 220              * unequal choice of spacing these elements was empirically
 221              * determined to work well on a wide variety of inputs.
 222              */
 223             int e1 = low + step;
 224             int e5 = end - step;
 225             int e3 = (e1 + e5) >>> 1;
 226             int e2 = (e1 + e3) >>> 1;
 227             int e4 = (e3 + e5) >>> 1;
 228             int a3 = a[e3];
 229 
 230             /*
 231              * Sort these elements in place by the combination
 232              * of 4-element sorting network and insertion sort.
 233              *
 234              *    5 ------o-----------o------------
 235              *            |           |
 236              *    4 ------|-----o-----o-----o------
 237              *            |     |           |
 238              *    2 ------o-----|-----o-----o------
 239              *                  |     |
 240              *    1 ------------o-----o------------
 241              */
 242             if (a[e5] < a[e2]) { int t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
 243             if (a[e4] < a[e1]) { int t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
 244             if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
 245             if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
 246             if (a[e4] < a[e2]) { int t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
 247 
 248             if (a3 < a[e2]) {
 249                 if (a3 < a[e1]) {
 250                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
 251                 } else {
 252                     a[e3] = a[e2]; a[e2] = a3;
 253                 }
 254             } else if (a3 > a[e4]) {
 255                 if (a3 > a[e5]) {
 256                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
 257                 } else {
 258                     a[e3] = a[e4]; a[e4] = a3;
 259                 }
 260             }
 261 
 262             // Pointers
 263             int lower = low; // The index of the last element of the left part
 264             int upper = end; // The index of the first element of the right part
 265 
 266             /*
 267              * Partitioning with 2 pivots in case of different elements.
 268              */
 269             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
 270 
 271                 /*
 272                  * Use the first and fifth of the five sorted elements as
 273                  * the pivots. These values are inexpensive approximation
 274                  * of tertiles. Note, that pivot1 < pivot2.
 275                  */
 276                 int pivot1 = a[e1];
 277                 int pivot2 = a[e5];
 278 
 279                 /*
 280                  * The first and the last elements to be sorted are moved
 281                  * to the locations formerly occupied by the pivots. When
 282                  * partitioning is completed, the pivots are swapped back
 283                  * into their final positions, and excluded from the next
 284                  * subsequent sorting.
 285                  */
 286                 a[e1] = a[lower];
 287                 a[e5] = a[upper];
 288 
 289                 /*
 290                  * Skip elements, which are less or greater than the pivots.
 291                  */
 292                 while (a[++lower] < pivot1);
 293                 while (a[--upper] > pivot2);
 294 
 295                 /*
 296                  * Backward 3-interval partitioning
 297                  *
 298                  *   left part                 central part          right part
 299                  * +------------------------------------------------------------+
 300                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
 301                  * +------------------------------------------------------------+
 302                  *             ^       ^                            ^
 303                  *             |       |                            |
 304                  *           lower     k                          upper
 305                  *
 306                  * Invariants:
 307                  *
 308                  *              all in (low, lower] < pivot1
 309                  *    pivot1 <= all in (k, upper)  <= pivot2
 310                  *              all in [upper, end) > pivot2
 311                  *
 312                  * Pointer k is the last index of ?-part
 313                  */
 314                 for (int unused = --lower, k = ++upper; --k > lower; ) {
 315                     int ak = a[k];
 316 
 317                     if (ak < pivot1) { // Move a[k] to the left side
 318                         while (lower < k) {
 319                             if (a[++lower] >= pivot1) {
 320                                 if (a[lower] > pivot2) {
 321                                     a[k] = a[--upper];
 322                                     a[upper] = a[lower];
 323                                 } else {
 324                                     a[k] = a[lower];
 325                                 }
 326                                 a[lower] = ak;
 327                                 break;
 328                             }
 329                         }
 330                     } else if (ak > pivot2) { // Move a[k] to the right side
 331                         a[k] = a[--upper];
 332                         a[upper] = ak;
 333                     }
 334                 }
 335 
 336                 /*
 337                  * Swap the pivots into their final positions.
 338                  */
 339                 a[low] = a[lower]; a[lower] = pivot1;
 340                 a[end] = a[upper]; a[upper] = pivot2;
 341 
 342                 /*
 343                  * Sort non-left parts recursively (possibly in parallel),
 344                  * excluding known pivots.
 345                  */
 346                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
 347                     sorter.forkSorter(bits | 1, lower + 1, upper);
 348                     sorter.forkSorter(bits | 1, upper + 1, high);
 349                 } else {
 350                     sort(sorter, a, bits | 1, lower + 1, upper);
 351                     sort(sorter, a, bits | 1, upper + 1, high);
 352                 }
 353 
 354             } else { // Use single pivot in case of many equal elements
 355 
 356                 /*
 357                  * Use the third of the five sorted elements as the pivot.
 358                  * This value is inexpensive approximation of the median.
 359                  */
 360                 int pivot = a[e3];
 361 
 362                 /*
 363                  * The first element to be sorted is moved to the
 364                  * location formerly occupied by the pivot. After
 365                  * completion of partitioning the pivot is swapped
 366                  * back into its final position, and excluded from
 367                  * the next subsequent sorting.
 368                  */
 369                 a[e3] = a[lower];
 370 
 371                 /*
 372                  * Traditional 3-way (Dutch National Flag) partitioning
 373                  *
 374                  *   left part                 central part    right part
 375                  * +------------------------------------------------------+
 376                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
 377                  * +------------------------------------------------------+
 378                  *              ^           ^                ^
 379                  *              |           |                |
 380                  *            lower         k              upper
 381                  *
 382                  * Invariants:
 383                  *
 384                  *   all in (low, lower] < pivot
 385                  *   all in (k, upper)  == pivot
 386                  *   all in [upper, end] > pivot
 387                  *
 388                  * Pointer k is the last index of ?-part
 389                  */
 390                 for (int k = ++upper; --k > lower; ) {
 391                     int ak = a[k];
 392 
 393                     if (ak != pivot) {
 394                         a[k] = pivot;
 395 
 396                         if (ak < pivot) { // Move a[k] to the left side
 397                             while (a[++lower] < pivot);
 398 
 399                             if (a[lower] > pivot) {
 400                                 a[--upper] = a[lower];
 401                             }
 402                             a[lower] = ak;
 403                         } else { // ak > pivot - Move a[k] to the right side
 404                             a[--upper] = ak;
 405                         }
 406                     }
 407                 }
 408 
 409                 /*
 410                  * Swap the pivot into its final position.
 411                  */
 412                 a[low] = a[lower]; a[lower] = pivot;
 413 
 414                 /*
 415                  * Sort the right part (possibly in parallel), excluding
 416                  * known pivot. All elements from the central part are
 417                  * equal and therefore already sorted.
 418                  */
 419                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
 420                     sorter.forkSorter(bits | 1, upper, high);
 421                 } else {
 422                     sort(sorter, a, bits | 1, upper, high);
 423                 }
 424             }
 425             high = lower; // Iterate along the left part
 426         }
 427     }
 428 
 429     /**
 430      * Sorts the specified range of the array using mixed insertion sort.
 431      *
 432      * Mixed insertion sort is combination of simple insertion sort,
 433      * pin insertion sort and pair insertion sort.
 434      *
 435      * In the context of Dual-Pivot Quicksort, the pivot element
 436      * from the left part plays the role of sentinel, because it
 437      * is less than any elements from the given part. Therefore,
 438      * expensive check of the left range can be skipped on each
 439      * iteration unless it is the leftmost call.
 440      *
 441      * @param a the array to be sorted
 442      * @param low the index of the first element, inclusive, to be sorted
 443      * @param end the index of the last element for simple insertion sort
 444      * @param high the index of the last element, exclusive, to be sorted
 445      */
 446     private static void mixedInsertionSort(int[] a, int low, int end, int high) {
 447         if (end == high) {
 448 
 449             /*
 450              * Invoke simple insertion sort on tiny array.
 451              */
 452             for (int i; ++low < end; ) {
 453                 int ai = a[i = low];
 454 
 455                 while (ai < a[--i]) {
 456                     a[i + 1] = a[i];
 457                 }
 458                 a[i + 1] = ai;
 459             }
 460         } else {
 461 
 462             /*
 463              * Start with pin insertion sort on small part.
 464              *
 465              * Pin insertion sort is extended simple insertion sort.
 466              * The main idea of this sort is to put elements larger
 467              * than an element called pin to the end of array (the
 468              * proper area for such elements). It avoids expensive
 469              * movements of these elements through the whole array.
 470              */
 471             int pin = a[end];
 472 
 473             for (int i, p = high; ++low < end; ) {
 474                 int ai = a[i = low];
 475 
 476                 if (ai < a[i - 1]) { // Small element
 477 
 478                     /*
 479                      * Insert small element into sorted part.
 480                      */
 481                     a[i] = a[--i];
 482 
 483                     while (ai < a[--i]) {
 484                         a[i + 1] = a[i];
 485                     }
 486                     a[i + 1] = ai;
 487 
 488                 } else if (p > i && ai > pin) { // Large element
 489 
 490                     /*
 491                      * Find element smaller than pin.
 492                      */
 493                     while (a[--p] > pin);
 494 
 495                     /*
 496                      * Swap it with large element.
 497                      */
 498                     if (p > i) {
 499                         ai = a[p];
 500                         a[p] = a[i];
 501                     }
 502 
 503                     /*
 504                      * Insert small element into sorted part.
 505                      */
 506                     while (ai < a[--i]) {
 507                         a[i + 1] = a[i];
 508                     }
 509                     a[i + 1] = ai;
 510                 }
 511             }
 512 
 513             /*
 514              * Continue with pair insertion sort on remain part.
 515              */
 516             for (int i; low < high; ++low) {
 517                 int a1 = a[i = low], a2 = a[++low];
 518 
 519                 /*
 520                  * Insert two elements per iteration: at first, insert the
 521                  * larger element and then insert the smaller element, but
 522                  * from the position where the larger element was inserted.
 523                  */
 524                 if (a1 > a2) {
 525 
 526                     while (a1 < a[--i]) {
 527                         a[i + 2] = a[i];
 528                     }
 529                     a[++i + 1] = a1;
 530 
 531                     while (a2 < a[--i]) {
 532                         a[i + 1] = a[i];
 533                     }
 534                     a[i + 1] = a2;
 535 
 536                 } else if (a1 < a[i - 1]) {
 537 
 538                     while (a2 < a[--i]) {
 539                         a[i + 2] = a[i];
 540                     }
 541                     a[++i + 1] = a2;
 542 
 543                     while (a1 < a[--i]) {
 544                         a[i + 1] = a[i];
 545                     }
 546                     a[i + 1] = a1;
 547                 }
 548             }
 549         }
 550     }
 551 
 552     /**
 553      * Sorts the specified range of the array using insertion sort.
 554      *
 555      * @param a the array to be sorted
 556      * @param low the index of the first element, inclusive, to be sorted
 557      * @param high the index of the last element, exclusive, to be sorted
 558      */
 559     private static void insertionSort(int[] a, int low, int high) {
 560         for (int i, k = low; ++k < high; ) {
 561             int ai = a[i = k];
 562 
 563             if (ai < a[i - 1]) {
 564                 while (--i >= low && ai < a[i]) {
 565                     a[i + 1] = a[i];
 566                 }
 567                 a[i + 1] = ai;
 568             }
 569         }
 570     }
 571 
 572     /**
 573      * Sorts the specified range of the array using heap sort.
 574      *
 575      * @param a the array to be sorted
 576      * @param low the index of the first element, inclusive, to be sorted
 577      * @param high the index of the last element, exclusive, to be sorted
 578      */
 579     private static void heapSort(int[] a, int low, int high) {
 580         for (int k = (low + high) >>> 1; k > low; ) {
 581             pushDown(a, --k, a[k], low, high);
 582         }
 583         while (--high > low) {
 584             int max = a[low];
 585             pushDown(a, low, a[high], low, high);
 586             a[high] = max;
 587         }
 588     }
 589 
 590     /**
 591      * Pushes specified element down during heap sort.
 592      *
 593      * @param a the given array
 594      * @param p the start index
 595      * @param value the given element
 596      * @param low the index of the first element, inclusive, to be sorted
 597      * @param high the index of the last element, exclusive, to be sorted
 598      */
 599     private static void pushDown(int[] a, int p, int value, int low, int high) {
 600         for (int k ;; a[p] = a[p = k]) {
 601             k = (p << 1) - low + 2; // Index of the right child
 602 
 603             if (k > high) {
 604                 break;
 605             }
 606             if (k == high || a[k] < a[k - 1]) {
 607                 --k;
 608             }
 609             if (a[k] <= value) {
 610                 break;
 611             }
 612         }
 613         a[p] = value;
 614     }
 615 
 616     /**
 617      * Tries to sort the specified range of the array.
 618      *
 619      * @param sorter parallel context
 620      * @param a the array to be sorted
 621      * @param low the index of the first element to be sorted
 622      * @param size the array size
 623      * @return true if finally sorted, false otherwise
 624      */
 625     private static boolean tryMergeRuns(Sorter sorter, int[] a, int low, int size) {
 626 
 627         /*
 628          * The run array is constructed only if initial runs are
 629          * long enough to continue, run[i] then holds start index
 630          * of the i-th sequence of elements in non-descending order.
 631          */
 632         int[] run = null;
 633         int high = low + size;
 634         int count = 1, last = low;
 635 
 636         /*
 637          * Identify all possible runs.
 638          */
 639         for (int k = low + 1; k < high; ) {
 640 
 641             /*
 642              * Find the end index of the current run.
 643              */
 644             if (a[k - 1] < a[k]) {
 645 
 646                 // Identify ascending sequence
 647                 while (++k < high && a[k - 1] <= a[k]);
 648 
 649             } else if (a[k - 1] > a[k]) {
 650 
 651                 // Identify descending sequence
 652                 while (++k < high && a[k - 1] >= a[k]);
 653 
 654                 // Reverse into ascending order
 655                 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) {
 656                     int ai = a[i]; a[i] = a[j]; a[j] = ai;
 657                 }
 658             } else { // Identify constant sequence
 659                 for (int ak = a[k]; ++k < high && ak == a[k]; );
 660 
 661                 if (k < high) {
 662                     continue;
 663                 }
 664             }
 665 
 666             /*
 667              * Check special cases.
 668              */
 669             if (run == null) {
 670                 if (k == high) {
 671 
 672                     /*
 673                      * The array is monotonous sequence,
 674                      * and therefore already sorted.
 675                      */
 676                     return true;
 677                 }
 678 
 679                 if (k - low < MIN_FIRST_RUN_SIZE) {
 680 
 681                     /*
 682                      * The first run is too small
 683                      * to proceed with scanning.
 684                      */
 685                     return false;
 686                 }
 687 
 688                 run = new int[((size >> 10) | 0x7F) & 0x3FF];
 689                 run[0] = low;
 690 
 691             } else if (a[last - 1] > a[last]) {
 692 
 693                 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) {
 694 
 695                     /*
 696                      * The first runs are not long
 697                      * enough to continue scanning.
 698                      */
 699                     return false;
 700                 }
 701 
 702                 if (++count == MAX_RUN_CAPACITY) {
 703 
 704                     /*
 705                      * Array is not highly structured.
 706                      */
 707                     return false;
 708                 }
 709 
 710                 if (count == run.length) {
 711 
 712                     /*
 713                      * Increase capacity of index array.
 714                      */
 715                     run = Arrays.copyOf(run, count << 1);
 716                 }
 717             }
 718             run[count] = (last = k);
 719         }
 720 
 721         /*
 722          * Merge runs of highly structured array.
 723          */
 724         if (count > 1) {
 725             int[] b; int offset = low;
 726 
 727             if (sorter == null || (b = (int[]) sorter.b) == null) {
 728                 b = new int[size];
 729             } else {
 730                 offset = sorter.offset;
 731             }
 732             mergeRuns(a, b, offset, 1, sorter != null, run, 0, count);
 733         }
 734         return true;
 735     }
 736 
 737     /**
 738      * Merges the specified runs.
 739      *
 740      * @param a the source array
 741      * @param b the temporary buffer used in merging
 742      * @param offset the start index in the source, inclusive
 743      * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0)
 744      * @param parallel indicates whether merging is performed in parallel
 745      * @param run the start indexes of the runs, inclusive
 746      * @param lo the start index of the first run, inclusive
 747      * @param hi the start index of the last run, inclusive
 748      * @return the destination where runs are merged
 749      */
 750     private static int[] mergeRuns(int[] a, int[] b, int offset,
 751             int aim, boolean parallel, int[] run, int lo, int hi) {
 752 
 753         if (hi - lo == 1) {
 754             if (aim >= 0) {
 755                 return a;
 756             }
 757             for (int i = run[hi], j = i - offset, low = run[lo]; i > low;
 758                 b[--j] = a[--i]
 759             );
 760             return b;
 761         }
 762 
 763         /*
 764          * Split into approximately equal parts.
 765          */
 766         int mi = lo, rmi = (run[lo] + run[hi]) >>> 1;
 767         while (run[++mi + 1] <= rmi);
 768 
 769         /*
 770          * Merge the left and right parts.
 771          */
 772         int[] a1, a2;
 773 
 774         if (parallel && hi - lo > MIN_RUN_COUNT) {
 775             RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe();
 776             a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi);
 777             a2 = (int[]) merger.getDestination();
 778         } else {
 779             a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi);
 780             a2 = mergeRuns(a, b, offset,    0, false, run, mi, hi);
 781         }
 782 
 783         int[] dst = a1 == a ? b : a;
 784 
 785         int k   = a1 == a ? run[lo] - offset : run[lo];
 786         int lo1 = a1 == b ? run[lo] - offset : run[lo];
 787         int hi1 = a1 == b ? run[mi] - offset : run[mi];
 788         int lo2 = a2 == b ? run[mi] - offset : run[mi];
 789         int hi2 = a2 == b ? run[hi] - offset : run[hi];
 790 
 791         if (parallel) {
 792             new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke();
 793         } else {
 794             mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2);
 795         }
 796         return dst;
 797     }
 798 
 799     /**
 800      * Merges the sorted parts.
 801      *
 802      * @param merger parallel context
 803      * @param dst the destination where parts are merged
 804      * @param k the start index of the destination, inclusive
 805      * @param a1 the first part
 806      * @param lo1 the start index of the first part, inclusive
 807      * @param hi1 the end index of the first part, exclusive
 808      * @param a2 the second part
 809      * @param lo2 the start index of the second part, inclusive
 810      * @param hi2 the end index of the second part, exclusive
 811      */
 812     private static void mergeParts(Merger merger, int[] dst, int k,
 813             int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2) {
 814 
 815         if (merger != null && a1 == a2) {
 816 
 817             while (true) {
 818 
 819                 /*
 820                  * The first part must be larger.
 821                  */
 822                 if (hi1 - lo1 < hi2 - lo2) {
 823                     int lo = lo1; lo1 = lo2; lo2 = lo;
 824                     int hi = hi1; hi1 = hi2; hi2 = hi;
 825                 }
 826 
 827                 /*
 828                  * Small parts will be merged sequentially.
 829                  */
 830                 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) {
 831                     break;
 832                 }
 833 
 834                 /*
 835                  * Find the median of the larger part.
 836                  */
 837                 int mi1 = (lo1 + hi1) >>> 1;
 838                 int key = a1[mi1];
 839                 int mi2 = hi2;
 840 
 841                 /*
 842                  * Partition the smaller part.
 843                  */
 844                 for (int loo = lo2; loo < mi2; ) {
 845                     int t = (loo + mi2) >>> 1;
 846 
 847                     if (key > a2[t]) {
 848                         loo = t + 1;
 849                     } else {
 850                         mi2 = t;
 851                     }
 852                 }
 853 
 854                 int d = mi2 - lo2 + mi1 - lo1;
 855 
 856                 /*
 857                  * Merge the right sub-parts in parallel.
 858                  */
 859                 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2);
 860 
 861                 /*
 862                  * Process the sub-left parts.
 863                  */
 864                 hi1 = mi1;
 865                 hi2 = mi2;
 866             }
 867         }
 868 
 869         /*
 870          * Merge small parts sequentially.
 871          */
 872         while (lo1 < hi1 && lo2 < hi2) {
 873             dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++];
 874         }
 875         if (dst != a1 || k < lo1) {
 876             while (lo1 < hi1) {
 877                 dst[k++] = a1[lo1++];
 878             }
 879         }
 880         if (dst != a2 || k < lo2) {
 881             while (lo2 < hi2) {
 882                 dst[k++] = a2[lo2++];
 883             }
 884         }
 885     }
 886 
 887 // [long]
 888 
 889     /**
 890      * Sorts the specified range of the array using parallel merge
 891      * sort and/or Dual-Pivot Quicksort.
 892      *
 893      * To balance the faster splitting and parallelism of merge sort
 894      * with the faster element partitioning of Quicksort, ranges are
 895      * subdivided in tiers such that, if there is enough parallelism,
 896      * the four-way parallel merge is started, still ensuring enough
 897      * parallelism to process the partitions.
 898      *
 899      * @param a the array to be sorted
 900      * @param parallelism the parallelism level
 901      * @param low the index of the first element, inclusive, to be sorted
 902      * @param high the index of the last element, exclusive, to be sorted
 903      */
 904     static void sort(long[] a, int parallelism, int low, int high) {
 905         int size = high - low;
 906 
 907         if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) {
 908             int depth = getDepth(parallelism, size >> 12);
 909             long[] b = depth == 0 ? null : new long[size];
 910             new Sorter(null, a, b, low, size, low, depth).invoke();
 911         } else {
 912             sort(null, a, 0, low, high);
 913         }
 914     }
 915 
 916     /**
 917      * Sorts the specified array using the Dual-Pivot Quicksort and/or
 918      * other sorts in special-cases, possibly with parallel partitions.
 919      *
 920      * @param sorter parallel context
 921      * @param a the array to be sorted
 922      * @param bits the combination of recursion depth and bit flag, where
 923      *        the right bit "0" indicates that array is the leftmost part
 924      * @param low the index of the first element, inclusive, to be sorted
 925      * @param high the index of the last element, exclusive, to be sorted
 926      */
 927     static void sort(Sorter sorter, long[] a, int bits, int low, int high) {
 928         while (true) {
 929             int end = high - 1, size = high - low;
 930 
 931             /*
 932              * Run mixed insertion sort on small non-leftmost parts.
 933              */
 934             if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) {
 935                 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high);
 936                 return;
 937             }
 938 
 939             /*
 940              * Invoke insertion sort on small leftmost part.
 941              */
 942             if (size < MAX_INSERTION_SORT_SIZE) {
 943                 insertionSort(a, low, high);
 944                 return;
 945             }
 946 
 947             /*
 948              * Check if the whole array or large non-leftmost
 949              * parts are nearly sorted and then merge runs.
 950              */
 951             if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0)
 952                     && tryMergeRuns(sorter, a, low, size)) {
 953                 return;
 954             }
 955 
 956             /*
 957              * Switch to heap sort if execution
 958              * time is becoming quadratic.
 959              */
 960             if ((bits += 2) > MAX_RECURSION_DEPTH) {
 961                 heapSort(a, low, high);
 962                 return;
 963             }
 964 
 965             /*
 966              * Use an inexpensive approximation of the golden ratio
 967              * to select five sample elements and determine pivots.
 968              */
 969             int step = (size >> 3) * 3 + 3;
 970 
 971             /*
 972              * Five elements around (and including) the central element
 973              * will be used for pivot selection as described below. The
 974              * unequal choice of spacing these elements was empirically
 975              * determined to work well on a wide variety of inputs.
 976              */
 977             int e1 = low + step;
 978             int e5 = end - step;
 979             int e3 = (e1 + e5) >>> 1;
 980             int e2 = (e1 + e3) >>> 1;
 981             int e4 = (e3 + e5) >>> 1;
 982             long a3 = a[e3];
 983 
 984             /*
 985              * Sort these elements in place by the combination
 986              * of 4-element sorting network and insertion sort.
 987              *
 988              *    5 ------o-----------o------------
 989              *            |           |
 990              *    4 ------|-----o-----o-----o------
 991              *            |     |           |
 992              *    2 ------o-----|-----o-----o------
 993              *                  |     |
 994              *    1 ------------o-----o------------
 995              */
 996             if (a[e5] < a[e2]) { long t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
 997             if (a[e4] < a[e1]) { long t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
 998             if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
 999             if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
1000             if (a[e4] < a[e2]) { long t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
1001 
1002             if (a3 < a[e2]) {
1003                 if (a3 < a[e1]) {
1004                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
1005                 } else {
1006                     a[e3] = a[e2]; a[e2] = a3;
1007                 }
1008             } else if (a3 > a[e4]) {
1009                 if (a3 > a[e5]) {
1010                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
1011                 } else {
1012                     a[e3] = a[e4]; a[e4] = a3;
1013                 }
1014             }
1015 
1016             // Pointers
1017             int lower = low; // The index of the last element of the left part
1018             int upper = end; // The index of the first element of the right part
1019 
1020             /*
1021              * Partitioning with 2 pivots in case of different elements.
1022              */
1023             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
1024 
1025                 /*
1026                  * Use the first and fifth of the five sorted elements as
1027                  * the pivots. These values are inexpensive approximation
1028                  * of tertiles. Note, that pivot1 < pivot2.
1029                  */
1030                 long pivot1 = a[e1];
1031                 long pivot2 = a[e5];
1032 
1033                 /*
1034                  * The first and the last elements to be sorted are moved
1035                  * to the locations formerly occupied by the pivots. When
1036                  * partitioning is completed, the pivots are swapped back
1037                  * into their final positions, and excluded from the next
1038                  * subsequent sorting.
1039                  */
1040                 a[e1] = a[lower];
1041                 a[e5] = a[upper];
1042 
1043                 /*
1044                  * Skip elements, which are less or greater than the pivots.
1045                  */
1046                 while (a[++lower] < pivot1);
1047                 while (a[--upper] > pivot2);
1048 
1049                 /*
1050                  * Backward 3-interval partitioning
1051                  *
1052                  *   left part                 central part          right part
1053                  * +------------------------------------------------------------+
1054                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
1055                  * +------------------------------------------------------------+
1056                  *             ^       ^                            ^
1057                  *             |       |                            |
1058                  *           lower     k                          upper
1059                  *
1060                  * Invariants:
1061                  *
1062                  *              all in (low, lower] < pivot1
1063                  *    pivot1 <= all in (k, upper)  <= pivot2
1064                  *              all in [upper, end) > pivot2
1065                  *
1066                  * Pointer k is the last index of ?-part
1067                  */
1068                 for (int unused = --lower, k = ++upper; --k > lower; ) {
1069                     long ak = a[k];
1070 
1071                     if (ak < pivot1) { // Move a[k] to the left side
1072                         while (lower < k) {
1073                             if (a[++lower] >= pivot1) {
1074                                 if (a[lower] > pivot2) {
1075                                     a[k] = a[--upper];
1076                                     a[upper] = a[lower];
1077                                 } else {
1078                                     a[k] = a[lower];
1079                                 }
1080                                 a[lower] = ak;
1081                                 break;
1082                             }
1083                         }
1084                     } else if (ak > pivot2) { // Move a[k] to the right side
1085                         a[k] = a[--upper];
1086                         a[upper] = ak;
1087                     }
1088                 }
1089 
1090                 /*
1091                  * Swap the pivots into their final positions.
1092                  */
1093                 a[low] = a[lower]; a[lower] = pivot1;
1094                 a[end] = a[upper]; a[upper] = pivot2;
1095 
1096                 /*
1097                  * Sort non-left parts recursively (possibly in parallel),
1098                  * excluding known pivots.
1099                  */
1100                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
1101                     sorter.forkSorter(bits | 1, lower + 1, upper);
1102                     sorter.forkSorter(bits | 1, upper + 1, high);
1103                 } else {
1104                     sort(sorter, a, bits | 1, lower + 1, upper);
1105                     sort(sorter, a, bits | 1, upper + 1, high);
1106                 }
1107 
1108             } else { // Use single pivot in case of many equal elements
1109 
1110                 /*
1111                  * Use the third of the five sorted elements as the pivot.
1112                  * This value is inexpensive approximation of the median.
1113                  */
1114                 long pivot = a[e3];
1115 
1116                 /*
1117                  * The first element to be sorted is moved to the
1118                  * location formerly occupied by the pivot. After
1119                  * completion of partitioning the pivot is swapped
1120                  * back into its final position, and excluded from
1121                  * the next subsequent sorting.
1122                  */
1123                 a[e3] = a[lower];
1124 
1125                 /*
1126                  * Traditional 3-way (Dutch National Flag) partitioning
1127                  *
1128                  *   left part                 central part    right part
1129                  * +------------------------------------------------------+
1130                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
1131                  * +------------------------------------------------------+
1132                  *              ^           ^                ^
1133                  *              |           |                |
1134                  *            lower         k              upper
1135                  *
1136                  * Invariants:
1137                  *
1138                  *   all in (low, lower] < pivot
1139                  *   all in (k, upper)  == pivot
1140                  *   all in [upper, end] > pivot
1141                  *
1142                  * Pointer k is the last index of ?-part
1143                  */
1144                 for (int k = ++upper; --k > lower; ) {
1145                     long ak = a[k];
1146 
1147                     if (ak != pivot) {
1148                         a[k] = pivot;
1149 
1150                         if (ak < pivot) { // Move a[k] to the left side
1151                             while (a[++lower] < pivot);
1152 
1153                             if (a[lower] > pivot) {
1154                                 a[--upper] = a[lower];
1155                             }
1156                             a[lower] = ak;
1157                         } else { // ak > pivot - Move a[k] to the right side
1158                             a[--upper] = ak;
1159                         }
1160                     }
1161                 }
1162 
1163                 /*
1164                  * Swap the pivot into its final position.
1165                  */
1166                 a[low] = a[lower]; a[lower] = pivot;
1167 
1168                 /*
1169                  * Sort the right part (possibly in parallel), excluding
1170                  * known pivot. All elements from the central part are
1171                  * equal and therefore already sorted.
1172                  */
1173                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
1174                     sorter.forkSorter(bits | 1, upper, high);
1175                 } else {
1176                     sort(sorter, a, bits | 1, upper, high);
1177                 }
1178             }
1179             high = lower; // Iterate along the left part
1180         }
1181     }
1182 
1183     /**
1184      * Sorts the specified range of the array using mixed insertion sort.
1185      *
1186      * Mixed insertion sort is combination of simple insertion sort,
1187      * pin insertion sort and pair insertion sort.
1188      *
1189      * In the context of Dual-Pivot Quicksort, the pivot element
1190      * from the left part plays the role of sentinel, because it
1191      * is less than any elements from the given part. Therefore,
1192      * expensive check of the left range can be skipped on each
1193      * iteration unless it is the leftmost call.
1194      *
1195      * @param a the array to be sorted
1196      * @param low the index of the first element, inclusive, to be sorted
1197      * @param end the index of the last element for simple insertion sort
1198      * @param high the index of the last element, exclusive, to be sorted
1199      */
1200     private static void mixedInsertionSort(long[] a, int low, int end, int high) {
1201         if (end == high) {
1202 
1203             /*
1204              * Invoke simple insertion sort on tiny array.
1205              */
1206             for (int i; ++low < end; ) {
1207                 long ai = a[i = low];
1208 
1209                 while (ai < a[--i]) {
1210                     a[i + 1] = a[i];
1211                 }
1212                 a[i + 1] = ai;
1213             }
1214         } else {
1215 
1216             /*
1217              * Start with pin insertion sort on small part.
1218              *
1219              * Pin insertion sort is extended simple insertion sort.
1220              * The main idea of this sort is to put elements larger
1221              * than an element called pin to the end of array (the
1222              * proper area for such elements). It avoids expensive
1223              * movements of these elements through the whole array.
1224              */
1225             long pin = a[end];
1226 
1227             for (int i, p = high; ++low < end; ) {
1228                 long ai = a[i = low];
1229 
1230                 if (ai < a[i - 1]) { // Small element
1231 
1232                     /*
1233                      * Insert small element into sorted part.
1234                      */
1235                     a[i] = a[--i];
1236 
1237                     while (ai < a[--i]) {
1238                         a[i + 1] = a[i];
1239                     }
1240                     a[i + 1] = ai;
1241 
1242                 } else if (p > i && ai > pin) { // Large element
1243 
1244                     /*
1245                      * Find element smaller than pin.
1246                      */
1247                     while (a[--p] > pin);
1248 
1249                     /*
1250                      * Swap it with large element.
1251                      */
1252                     if (p > i) {
1253                         ai = a[p];
1254                         a[p] = a[i];
1255                     }
1256 
1257                     /*
1258                      * Insert small element into sorted part.
1259                      */
1260                     while (ai < a[--i]) {
1261                         a[i + 1] = a[i];
1262                     }
1263                     a[i + 1] = ai;
1264                 }
1265             }
1266 
1267             /*
1268              * Continue with pair insertion sort on remain part.
1269              */
1270             for (int i; low < high; ++low) {
1271                 long a1 = a[i = low], a2 = a[++low];
1272 
1273                 /*
1274                  * Insert two elements per iteration: at first, insert the
1275                  * larger element and then insert the smaller element, but
1276                  * from the position where the larger element was inserted.
1277                  */
1278                 if (a1 > a2) {
1279 
1280                     while (a1 < a[--i]) {
1281                         a[i + 2] = a[i];
1282                     }
1283                     a[++i + 1] = a1;
1284 
1285                     while (a2 < a[--i]) {
1286                         a[i + 1] = a[i];
1287                     }
1288                     a[i + 1] = a2;
1289 
1290                 } else if (a1 < a[i - 1]) {
1291 
1292                     while (a2 < a[--i]) {
1293                         a[i + 2] = a[i];
1294                     }
1295                     a[++i + 1] = a2;
1296 
1297                     while (a1 < a[--i]) {
1298                         a[i + 1] = a[i];
1299                     }
1300                     a[i + 1] = a1;
1301                 }
1302             }
1303         }
1304     }
1305 
1306     /**
1307      * Sorts the specified range of the array using insertion sort.
1308      *
1309      * @param a the array to be sorted
1310      * @param low the index of the first element, inclusive, to be sorted
1311      * @param high the index of the last element, exclusive, to be sorted
1312      */
1313     private static void insertionSort(long[] a, int low, int high) {
1314         for (int i, k = low; ++k < high; ) {
1315             long ai = a[i = k];
1316 
1317             if (ai < a[i - 1]) {
1318                 while (--i >= low && ai < a[i]) {
1319                     a[i + 1] = a[i];
1320                 }
1321                 a[i + 1] = ai;
1322             }
1323         }
1324     }
1325 
1326     /**
1327      * Sorts the specified range of the array using heap sort.
1328      *
1329      * @param a the array to be sorted
1330      * @param low the index of the first element, inclusive, to be sorted
1331      * @param high the index of the last element, exclusive, to be sorted
1332      */
1333     private static void heapSort(long[] a, int low, int high) {
1334         for (int k = (low + high) >>> 1; k > low; ) {
1335             pushDown(a, --k, a[k], low, high);
1336         }
1337         while (--high > low) {
1338             long max = a[low];
1339             pushDown(a, low, a[high], low, high);
1340             a[high] = max;
1341         }
1342     }
1343 
1344     /**
1345      * Pushes specified element down during heap sort.
1346      *
1347      * @param a the given array
1348      * @param p the start index
1349      * @param value the given element
1350      * @param low the index of the first element, inclusive, to be sorted
1351      * @param high the index of the last element, exclusive, to be sorted
1352      */
1353     private static void pushDown(long[] a, int p, long value, int low, int high) {
1354         for (int k ;; a[p] = a[p = k]) {
1355             k = (p << 1) - low + 2; // Index of the right child
1356 
1357             if (k > high) {
1358                 break;
1359             }
1360             if (k == high || a[k] < a[k - 1]) {
1361                 --k;
1362             }
1363             if (a[k] <= value) {
1364                 break;
1365             }
1366         }
1367         a[p] = value;
1368     }
1369 
1370     /**
1371      * Tries to sort the specified range of the array.
1372      *
1373      * @param sorter parallel context
1374      * @param a the array to be sorted
1375      * @param low the index of the first element to be sorted
1376      * @param size the array size
1377      * @return true if finally sorted, false otherwise
1378      */
1379     private static boolean tryMergeRuns(Sorter sorter, long[] a, int low, int size) {
1380 
1381         /*
1382          * The run array is constructed only if initial runs are
1383          * long enough to continue, run[i] then holds start index
1384          * of the i-th sequence of elements in non-descending order.
1385          */
1386         int[] run = null;
1387         int high = low + size;
1388         int count = 1, last = low;
1389 
1390         /*
1391          * Identify all possible runs.
1392          */
1393         for (int k = low + 1; k < high; ) {
1394 
1395             /*
1396              * Find the end index of the current run.
1397              */
1398             if (a[k - 1] < a[k]) {
1399 
1400                 // Identify ascending sequence
1401                 while (++k < high && a[k - 1] <= a[k]);
1402 
1403             } else if (a[k - 1] > a[k]) {
1404 
1405                 // Identify descending sequence
1406                 while (++k < high && a[k - 1] >= a[k]);
1407 
1408                 // Reverse into ascending order
1409                 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) {
1410                     long ai = a[i]; a[i] = a[j]; a[j] = ai;
1411                 }
1412             } else { // Identify constant sequence
1413                 for (long ak = a[k]; ++k < high && ak == a[k]; );
1414 
1415                 if (k < high) {
1416                     continue;
1417                 }
1418             }
1419 
1420             /*
1421              * Check special cases.
1422              */
1423             if (run == null) {
1424                 if (k == high) {
1425 
1426                     /*
1427                      * The array is monotonous sequence,
1428                      * and therefore already sorted.
1429                      */
1430                     return true;
1431                 }
1432 
1433                 if (k - low < MIN_FIRST_RUN_SIZE) {
1434 
1435                     /*
1436                      * The first run is too small
1437                      * to proceed with scanning.
1438                      */
1439                     return false;
1440                 }
1441 
1442                 run = new int[((size >> 10) | 0x7F) & 0x3FF];
1443                 run[0] = low;
1444 
1445             } else if (a[last - 1] > a[last]) {
1446 
1447                 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) {
1448 
1449                     /*
1450                      * The first runs are not long
1451                      * enough to continue scanning.
1452                      */
1453                     return false;
1454                 }
1455 
1456                 if (++count == MAX_RUN_CAPACITY) {
1457 
1458                     /*
1459                      * Array is not highly structured.
1460                      */
1461                     return false;
1462                 }
1463 
1464                 if (count == run.length) {
1465 
1466                     /*
1467                      * Increase capacity of index array.
1468                      */
1469                     run = Arrays.copyOf(run, count << 1);
1470                 }
1471             }
1472             run[count] = (last = k);
1473         }
1474 
1475         /*
1476          * Merge runs of highly structured array.
1477          */
1478         if (count > 1) {
1479             long[] b; int offset = low;
1480 
1481             if (sorter == null || (b = (long[]) sorter.b) == null) {
1482                 b = new long[size];
1483             } else {
1484                 offset = sorter.offset;
1485             }
1486             mergeRuns(a, b, offset, 1, sorter != null, run, 0, count);
1487         }
1488         return true;
1489     }
1490 
1491     /**
1492      * Merges the specified runs.
1493      *
1494      * @param a the source array
1495      * @param b the temporary buffer used in merging
1496      * @param offset the start index in the source, inclusive
1497      * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0)
1498      * @param parallel indicates whether merging is performed in parallel
1499      * @param run the start indexes of the runs, inclusive
1500      * @param lo the start index of the first run, inclusive
1501      * @param hi the start index of the last run, inclusive
1502      * @return the destination where runs are merged
1503      */
1504     private static long[] mergeRuns(long[] a, long[] b, int offset,
1505             int aim, boolean parallel, int[] run, int lo, int hi) {
1506 
1507         if (hi - lo == 1) {
1508             if (aim >= 0) {
1509                 return a;
1510             }
1511             for (int i = run[hi], j = i - offset, low = run[lo]; i > low;
1512                 b[--j] = a[--i]
1513             );
1514             return b;
1515         }
1516 
1517         /*
1518          * Split into approximately equal parts.
1519          */
1520         int mi = lo, rmi = (run[lo] + run[hi]) >>> 1;
1521         while (run[++mi + 1] <= rmi);
1522 
1523         /*
1524          * Merge the left and right parts.
1525          */
1526         long[] a1, a2;
1527 
1528         if (parallel && hi - lo > MIN_RUN_COUNT) {
1529             RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe();
1530             a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi);
1531             a2 = (long[]) merger.getDestination();
1532         } else {
1533             a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi);
1534             a2 = mergeRuns(a, b, offset,    0, false, run, mi, hi);
1535         }
1536 
1537         long[] dst = a1 == a ? b : a;
1538 
1539         int k   = a1 == a ? run[lo] - offset : run[lo];
1540         int lo1 = a1 == b ? run[lo] - offset : run[lo];
1541         int hi1 = a1 == b ? run[mi] - offset : run[mi];
1542         int lo2 = a2 == b ? run[mi] - offset : run[mi];
1543         int hi2 = a2 == b ? run[hi] - offset : run[hi];
1544 
1545         if (parallel) {
1546             new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke();
1547         } else {
1548             mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2);
1549         }
1550         return dst;
1551     }
1552 
1553     /**
1554      * Merges the sorted parts.
1555      *
1556      * @param merger parallel context
1557      * @param dst the destination where parts are merged
1558      * @param k the start index of the destination, inclusive
1559      * @param a1 the first part
1560      * @param lo1 the start index of the first part, inclusive
1561      * @param hi1 the end index of the first part, exclusive
1562      * @param a2 the second part
1563      * @param lo2 the start index of the second part, inclusive
1564      * @param hi2 the end index of the second part, exclusive
1565      */
1566     private static void mergeParts(Merger merger, long[] dst, int k,
1567             long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2) {
1568 
1569         if (merger != null && a1 == a2) {
1570 
1571             while (true) {
1572 
1573                 /*
1574                  * The first part must be larger.
1575                  */
1576                 if (hi1 - lo1 < hi2 - lo2) {
1577                     int lo = lo1; lo1 = lo2; lo2 = lo;
1578                     int hi = hi1; hi1 = hi2; hi2 = hi;
1579                 }
1580 
1581                 /*
1582                  * Small parts will be merged sequentially.
1583                  */
1584                 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) {
1585                     break;
1586                 }
1587 
1588                 /*
1589                  * Find the median of the larger part.
1590                  */
1591                 int mi1 = (lo1 + hi1) >>> 1;
1592                 long key = a1[mi1];
1593                 int mi2 = hi2;
1594 
1595                 /*
1596                  * Partition the smaller part.
1597                  */
1598                 for (int loo = lo2; loo < mi2; ) {
1599                     int t = (loo + mi2) >>> 1;
1600 
1601                     if (key > a2[t]) {
1602                         loo = t + 1;
1603                     } else {
1604                         mi2 = t;
1605                     }
1606                 }
1607 
1608                 int d = mi2 - lo2 + mi1 - lo1;
1609 
1610                 /*
1611                  * Merge the right sub-parts in parallel.
1612                  */
1613                 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2);
1614 
1615                 /*
1616                  * Process the sub-left parts.
1617                  */
1618                 hi1 = mi1;
1619                 hi2 = mi2;
1620             }
1621         }
1622 
1623         /*
1624          * Merge small parts sequentially.
1625          */
1626         while (lo1 < hi1 && lo2 < hi2) {
1627             dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++];
1628         }
1629         if (dst != a1 || k < lo1) {
1630             while (lo1 < hi1) {
1631                 dst[k++] = a1[lo1++];
1632             }
1633         }
1634         if (dst != a2 || k < lo2) {
1635             while (lo2 < hi2) {
1636                 dst[k++] = a2[lo2++];
1637             }
1638         }
1639     }
1640 
1641 // [byte]
1642 
1643     /**
1644      * Sorts the specified range of the array using
1645      * counting sort or insertion sort.
1646      *
1647      * @param a the array to be sorted
1648      * @param low the index of the first element, inclusive, to be sorted
1649      * @param high the index of the last element, exclusive, to be sorted
1650      */
1651     static void sort(byte[] a, int low, int high) {
1652         if (high - low > MIN_BYTE_COUNTING_SORT_SIZE) {
1653             countingSort(a, low, high);
1654         } else {
1655             insertionSort(a, low, high);
1656         }
1657     }
1658 
1659     /**
1660      * Sorts the specified range of the array using insertion sort.
1661      *
1662      * @param a the array to be sorted
1663      * @param low the index of the first element, inclusive, to be sorted
1664      * @param high the index of the last element, exclusive, to be sorted
1665      */
1666     private static void insertionSort(byte[] a, int low, int high) {
1667         for (int i, k = low; ++k < high; ) {
1668             byte ai = a[i = k];
1669 
1670             if (ai < a[i - 1]) {
1671                 while (--i >= low && ai < a[i]) {
1672                     a[i + 1] = a[i];
1673                 }
1674                 a[i + 1] = ai;
1675             }
1676         }
1677     }
1678 
1679     /**
1680      * The number of distinct byte values.
1681      */
1682     private static final int NUM_BYTE_VALUES = 1 << 8;
1683 
1684     /**
1685      * Max index of byte counter.
1686      */
1687     private static final int MAX_BYTE_INDEX = Byte.MAX_VALUE + NUM_BYTE_VALUES + 1;
1688 
1689     /**
1690      * Sorts the specified range of the array using counting sort.
1691      *
1692      * @param a the array to be sorted
1693      * @param low the index of the first element, inclusive, to be sorted
1694      * @param high the index of the last element, exclusive, to be sorted
1695      */
1696     private static void countingSort(byte[] a, int low, int high) {
1697         int[] count = new int[NUM_BYTE_VALUES];
1698 
1699         /*
1700          * Compute a histogram with the number of each values.
1701          */
1702         for (int i = high; i > low; ++count[a[--i] & 0xFF]);
1703 
1704         /*
1705          * Place values on their final positions.
1706          */
1707         if (high - low > NUM_BYTE_VALUES) {
1708             for (int i = MAX_BYTE_INDEX; --i > Byte.MAX_VALUE; ) {
1709                 int value = i & 0xFF;
1710 
1711                 for (low = high - count[value]; high > low;
1712                     a[--high] = (byte) value
1713                 );
1714             }
1715         } else {
1716             for (int i = MAX_BYTE_INDEX; high > low; ) {
1717                 while (count[--i & 0xFF] == 0);
1718 
1719                 int value = i & 0xFF;
1720                 int c = count[value];
1721 
1722                 do {
1723                     a[--high] = (byte) value;
1724                 } while (--c > 0);
1725             }
1726         }
1727     }
1728 
1729 // [char]
1730 
1731     /**
1732      * Sorts the specified range of the array using
1733      * counting sort or Dual-Pivot Quicksort.
1734      *
1735      * @param a the array to be sorted
1736      * @param low the index of the first element, inclusive, to be sorted
1737      * @param high the index of the last element, exclusive, to be sorted
1738      */
1739     static void sort(char[] a, int low, int high) {
1740         if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) {
1741             countingSort(a, low, high);
1742         } else {
1743             sort(a, 0, low, high);
1744         }
1745     }
1746 
1747     /**
1748      * Sorts the specified array using the Dual-Pivot Quicksort and/or
1749      * other sorts in special-cases, possibly with parallel partitions.
1750      *
1751      * @param a the array to be sorted
1752      * @param bits the combination of recursion depth and bit flag, where
1753      *        the right bit "0" indicates that array is the leftmost part
1754      * @param low the index of the first element, inclusive, to be sorted
1755      * @param high the index of the last element, exclusive, to be sorted
1756      */
1757     static void sort(char[] a, int bits, int low, int high) {
1758         while (true) {
1759             int end = high - 1, size = high - low;
1760 
1761             /*
1762              * Invoke insertion sort on small leftmost part.
1763              */
1764             if (size < MAX_INSERTION_SORT_SIZE) {
1765                 insertionSort(a, low, high);
1766                 return;
1767             }
1768 
1769             /*
1770              * Switch to counting sort if execution
1771              * time is becoming quadratic.
1772              */
1773             if ((bits += 2) > MAX_RECURSION_DEPTH) {
1774                 countingSort(a, low, high);
1775                 return;
1776             }
1777 
1778             /*
1779              * Use an inexpensive approximation of the golden ratio
1780              * to select five sample elements and determine pivots.
1781              */
1782             int step = (size >> 3) * 3 + 3;
1783 
1784             /*
1785              * Five elements around (and including) the central element
1786              * will be used for pivot selection as described below. The
1787              * unequal choice of spacing these elements was empirically
1788              * determined to work well on a wide variety of inputs.
1789              */
1790             int e1 = low + step;
1791             int e5 = end - step;
1792             int e3 = (e1 + e5) >>> 1;
1793             int e2 = (e1 + e3) >>> 1;
1794             int e4 = (e3 + e5) >>> 1;
1795             char a3 = a[e3];
1796 
1797             /*
1798              * Sort these elements in place by the combination
1799              * of 4-element sorting network and insertion sort.
1800              *
1801              *    5 ------o-----------o------------
1802              *            |           |
1803              *    4 ------|-----o-----o-----o------
1804              *            |     |           |
1805              *    2 ------o-----|-----o-----o------
1806              *                  |     |
1807              *    1 ------------o-----o------------
1808              */
1809             if (a[e5] < a[e2]) { char t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
1810             if (a[e4] < a[e1]) { char t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
1811             if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
1812             if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
1813             if (a[e4] < a[e2]) { char t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
1814 
1815             if (a3 < a[e2]) {
1816                 if (a3 < a[e1]) {
1817                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
1818                 } else {
1819                     a[e3] = a[e2]; a[e2] = a3;
1820                 }
1821             } else if (a3 > a[e4]) {
1822                 if (a3 > a[e5]) {
1823                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
1824                 } else {
1825                     a[e3] = a[e4]; a[e4] = a3;
1826                 }
1827             }
1828 
1829             // Pointers
1830             int lower = low; // The index of the last element of the left part
1831             int upper = end; // The index of the first element of the right part
1832 
1833             /*
1834              * Partitioning with 2 pivots in case of different elements.
1835              */
1836             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
1837 
1838                 /*
1839                  * Use the first and fifth of the five sorted elements as
1840                  * the pivots. These values are inexpensive approximation
1841                  * of tertiles. Note, that pivot1 < pivot2.
1842                  */
1843                 char pivot1 = a[e1];
1844                 char pivot2 = a[e5];
1845 
1846                 /*
1847                  * The first and the last elements to be sorted are moved
1848                  * to the locations formerly occupied by the pivots. When
1849                  * partitioning is completed, the pivots are swapped back
1850                  * into their final positions, and excluded from the next
1851                  * subsequent sorting.
1852                  */
1853                 a[e1] = a[lower];
1854                 a[e5] = a[upper];
1855 
1856                 /*
1857                  * Skip elements, which are less or greater than the pivots.
1858                  */
1859                 while (a[++lower] < pivot1);
1860                 while (a[--upper] > pivot2);
1861 
1862                 /*
1863                  * Backward 3-interval partitioning
1864                  *
1865                  *   left part                 central part          right part
1866                  * +------------------------------------------------------------+
1867                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
1868                  * +------------------------------------------------------------+
1869                  *             ^       ^                            ^
1870                  *             |       |                            |
1871                  *           lower     k                          upper
1872                  *
1873                  * Invariants:
1874                  *
1875                  *              all in (low, lower] < pivot1
1876                  *    pivot1 <= all in (k, upper)  <= pivot2
1877                  *              all in [upper, end) > pivot2
1878                  *
1879                  * Pointer k is the last index of ?-part
1880                  */
1881                 for (int unused = --lower, k = ++upper; --k > lower; ) {
1882                     char ak = a[k];
1883 
1884                     if (ak < pivot1) { // Move a[k] to the left side
1885                         while (lower < k) {
1886                             if (a[++lower] >= pivot1) {
1887                                 if (a[lower] > pivot2) {
1888                                     a[k] = a[--upper];
1889                                     a[upper] = a[lower];
1890                                 } else {
1891                                     a[k] = a[lower];
1892                                 }
1893                                 a[lower] = ak;
1894                                 break;
1895                             }
1896                         }
1897                     } else if (ak > pivot2) { // Move a[k] to the right side
1898                         a[k] = a[--upper];
1899                         a[upper] = ak;
1900                     }
1901                 }
1902 
1903                 /*
1904                  * Swap the pivots into their final positions.
1905                  */
1906                 a[low] = a[lower]; a[lower] = pivot1;
1907                 a[end] = a[upper]; a[upper] = pivot2;
1908 
1909                 /*
1910                  * Sort non-left parts recursively,
1911                  * excluding known pivots.
1912                  */
1913                 sort(a, bits | 1, lower + 1, upper);
1914                 sort(a, bits | 1, upper + 1, high);
1915 
1916             } else { // Use single pivot in case of many equal elements
1917 
1918                 /*
1919                  * Use the third of the five sorted elements as the pivot.
1920                  * This value is inexpensive approximation of the median.
1921                  */
1922                 char pivot = a[e3];
1923 
1924                 /*
1925                  * The first element to be sorted is moved to the
1926                  * location formerly occupied by the pivot. After
1927                  * completion of partitioning the pivot is swapped
1928                  * back into its final position, and excluded from
1929                  * the next subsequent sorting.
1930                  */
1931                 a[e3] = a[lower];
1932 
1933                 /*
1934                  * Traditional 3-way (Dutch National Flag) partitioning
1935                  *
1936                  *   left part                 central part    right part
1937                  * +------------------------------------------------------+
1938                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
1939                  * +------------------------------------------------------+
1940                  *              ^           ^                ^
1941                  *              |           |                |
1942                  *            lower         k              upper
1943                  *
1944                  * Invariants:
1945                  *
1946                  *   all in (low, lower] < pivot
1947                  *   all in (k, upper)  == pivot
1948                  *   all in [upper, end] > pivot
1949                  *
1950                  * Pointer k is the last index of ?-part
1951                  */
1952                 for (int k = ++upper; --k > lower; ) {
1953                     char ak = a[k];
1954 
1955                     if (ak != pivot) {
1956                         a[k] = pivot;
1957 
1958                         if (ak < pivot) { // Move a[k] to the left side
1959                             while (a[++lower] < pivot);
1960 
1961                             if (a[lower] > pivot) {
1962                                 a[--upper] = a[lower];
1963                             }
1964                             a[lower] = ak;
1965                         } else { // ak > pivot - Move a[k] to the right side
1966                             a[--upper] = ak;
1967                         }
1968                     }
1969                 }
1970 
1971                 /*
1972                  * Swap the pivot into its final position.
1973                  */
1974                 a[low] = a[lower]; a[lower] = pivot;
1975 
1976                 /*
1977                  * Sort the right part, excluding known pivot.
1978                  * All elements from the central part are
1979                  * equal and therefore already sorted.
1980                  */
1981                 sort(a, bits | 1, upper, high);
1982             }
1983             high = lower; // Iterate along the left part
1984         }
1985     }
1986 
1987     /**
1988      * Sorts the specified range of the array using insertion sort.
1989      *
1990      * @param a the array to be sorted
1991      * @param low the index of the first element, inclusive, to be sorted
1992      * @param high the index of the last element, exclusive, to be sorted
1993      */
1994     private static void insertionSort(char[] a, int low, int high) {
1995         for (int i, k = low; ++k < high; ) {
1996             char ai = a[i = k];
1997 
1998             if (ai < a[i - 1]) {
1999                 while (--i >= low && ai < a[i]) {
2000                     a[i + 1] = a[i];
2001                 }
2002                 a[i + 1] = ai;
2003             }
2004         }
2005     }
2006 
2007     /**
2008      * The number of distinct char values.
2009      */
2010     private static final int NUM_CHAR_VALUES = 1 << 16;
2011 
2012     /**
2013      * Sorts the specified range of the array using counting sort.
2014      *
2015      * @param a the array to be sorted
2016      * @param low the index of the first element, inclusive, to be sorted
2017      * @param high the index of the last element, exclusive, to be sorted
2018      */
2019     private static void countingSort(char[] a, int low, int high) {
2020         int[] count = new int[NUM_CHAR_VALUES];
2021 
2022         /*
2023          * Compute a histogram with the number of each values.
2024          */
2025         for (int i = high; i > low; ++count[a[--i]]);
2026 
2027         /*
2028          * Place values on their final positions.
2029          */
2030         if (high - low > NUM_CHAR_VALUES) {
2031             for (int i = NUM_CHAR_VALUES; i > 0; ) {
2032                 for (low = high - count[--i]; high > low;
2033                     a[--high] = (char) i
2034                 );
2035             }
2036         } else {
2037             for (int i = NUM_CHAR_VALUES; high > low; ) {
2038                 while (count[--i] == 0);
2039                 int c = count[i];
2040 
2041                 do {
2042                     a[--high] = (char) i;
2043                 } while (--c > 0);
2044             }
2045         }
2046     }
2047 
2048 // [short]
2049 
2050     /**
2051      * Sorts the specified range of the array using
2052      * counting sort or Dual-Pivot Quicksort.
2053      *
2054      * @param a the array to be sorted
2055      * @param low the index of the first element, inclusive, to be sorted
2056      * @param high the index of the last element, exclusive, to be sorted
2057      */
2058     static void sort(short[] a, int low, int high) {
2059         if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) {
2060             countingSort(a, low, high);
2061         } else {
2062             sort(a, 0, low, high);
2063         }
2064     }
2065 
2066     /**
2067      * Sorts the specified array using the Dual-Pivot Quicksort and/or
2068      * other sorts in special-cases, possibly with parallel partitions.
2069      *
2070      * @param a the array to be sorted
2071      * @param bits the combination of recursion depth and bit flag, where
2072      *        the right bit "0" indicates that array is the leftmost part
2073      * @param low the index of the first element, inclusive, to be sorted
2074      * @param high the index of the last element, exclusive, to be sorted
2075      */
2076     static void sort(short[] a, int bits, int low, int high) {
2077         while (true) {
2078             int end = high - 1, size = high - low;
2079 
2080             /*
2081              * Invoke insertion sort on small leftmost part.
2082              */
2083             if (size < MAX_INSERTION_SORT_SIZE) {
2084                 insertionSort(a, low, high);
2085                 return;
2086             }
2087 
2088             /*
2089              * Switch to counting sort if execution
2090              * time is becoming quadratic.
2091              */
2092             if ((bits += 2) > MAX_RECURSION_DEPTH) {
2093                 countingSort(a, low, high);
2094                 return;
2095             }
2096 
2097             /*
2098              * Use an inexpensive approximation of the golden ratio
2099              * to select five sample elements and determine pivots.
2100              */
2101             int step = (size >> 3) * 3 + 3;
2102 
2103             /*
2104              * Five elements around (and including) the central element
2105              * will be used for pivot selection as described below. The
2106              * unequal choice of spacing these elements was empirically
2107              * determined to work well on a wide variety of inputs.
2108              */
2109             int e1 = low + step;
2110             int e5 = end - step;
2111             int e3 = (e1 + e5) >>> 1;
2112             int e2 = (e1 + e3) >>> 1;
2113             int e4 = (e3 + e5) >>> 1;
2114             short a3 = a[e3];
2115 
2116             /*
2117              * Sort these elements in place by the combination
2118              * of 4-element sorting network and insertion sort.
2119              *
2120              *    5 ------o-----------o------------
2121              *            |           |
2122              *    4 ------|-----o-----o-----o------
2123              *            |     |           |
2124              *    2 ------o-----|-----o-----o------
2125              *                  |     |
2126              *    1 ------------o-----o------------
2127              */
2128             if (a[e5] < a[e2]) { short t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
2129             if (a[e4] < a[e1]) { short t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
2130             if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
2131             if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
2132             if (a[e4] < a[e2]) { short t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
2133 
2134             if (a3 < a[e2]) {
2135                 if (a3 < a[e1]) {
2136                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
2137                 } else {
2138                     a[e3] = a[e2]; a[e2] = a3;
2139                 }
2140             } else if (a3 > a[e4]) {
2141                 if (a3 > a[e5]) {
2142                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
2143                 } else {
2144                     a[e3] = a[e4]; a[e4] = a3;
2145                 }
2146             }
2147 
2148             // Pointers
2149             int lower = low; // The index of the last element of the left part
2150             int upper = end; // The index of the first element of the right part
2151 
2152             /*
2153              * Partitioning with 2 pivots in case of different elements.
2154              */
2155             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
2156 
2157                 /*
2158                  * Use the first and fifth of the five sorted elements as
2159                  * the pivots. These values are inexpensive approximation
2160                  * of tertiles. Note, that pivot1 < pivot2.
2161                  */
2162                 short pivot1 = a[e1];
2163                 short pivot2 = a[e5];
2164 
2165                 /*
2166                  * The first and the last elements to be sorted are moved
2167                  * to the locations formerly occupied by the pivots. When
2168                  * partitioning is completed, the pivots are swapped back
2169                  * into their final positions, and excluded from the next
2170                  * subsequent sorting.
2171                  */
2172                 a[e1] = a[lower];
2173                 a[e5] = a[upper];
2174 
2175                 /*
2176                  * Skip elements, which are less or greater than the pivots.
2177                  */
2178                 while (a[++lower] < pivot1);
2179                 while (a[--upper] > pivot2);
2180 
2181                 /*
2182                  * Backward 3-interval partitioning
2183                  *
2184                  *   left part                 central part          right part
2185                  * +------------------------------------------------------------+
2186                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
2187                  * +------------------------------------------------------------+
2188                  *             ^       ^                            ^
2189                  *             |       |                            |
2190                  *           lower     k                          upper
2191                  *
2192                  * Invariants:
2193                  *
2194                  *              all in (low, lower] < pivot1
2195                  *    pivot1 <= all in (k, upper)  <= pivot2
2196                  *              all in [upper, end) > pivot2
2197                  *
2198                  * Pointer k is the last index of ?-part
2199                  */
2200                 for (int unused = --lower, k = ++upper; --k > lower; ) {
2201                     short ak = a[k];
2202 
2203                     if (ak < pivot1) { // Move a[k] to the left side
2204                         while (lower < k) {
2205                             if (a[++lower] >= pivot1) {
2206                                 if (a[lower] > pivot2) {
2207                                     a[k] = a[--upper];
2208                                     a[upper] = a[lower];
2209                                 } else {
2210                                     a[k] = a[lower];
2211                                 }
2212                                 a[lower] = ak;
2213                                 break;
2214                             }
2215                         }
2216                     } else if (ak > pivot2) { // Move a[k] to the right side
2217                         a[k] = a[--upper];
2218                         a[upper] = ak;
2219                     }
2220                 }
2221 
2222                 /*
2223                  * Swap the pivots into their final positions.
2224                  */
2225                 a[low] = a[lower]; a[lower] = pivot1;
2226                 a[end] = a[upper]; a[upper] = pivot2;
2227 
2228                 /*
2229                  * Sort non-left parts recursively,
2230                  * excluding known pivots.
2231                  */
2232                 sort(a, bits | 1, lower + 1, upper);
2233                 sort(a, bits | 1, upper + 1, high);
2234 
2235             } else { // Use single pivot in case of many equal elements
2236 
2237                 /*
2238                  * Use the third of the five sorted elements as the pivot.
2239                  * This value is inexpensive approximation of the median.
2240                  */
2241                 short pivot = a[e3];
2242 
2243                 /*
2244                  * The first element to be sorted is moved to the
2245                  * location formerly occupied by the pivot. After
2246                  * completion of partitioning the pivot is swapped
2247                  * back into its final position, and excluded from
2248                  * the next subsequent sorting.
2249                  */
2250                 a[e3] = a[lower];
2251 
2252                 /*
2253                  * Traditional 3-way (Dutch National Flag) partitioning
2254                  *
2255                  *   left part                 central part    right part
2256                  * +------------------------------------------------------+
2257                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
2258                  * +------------------------------------------------------+
2259                  *              ^           ^                ^
2260                  *              |           |                |
2261                  *            lower         k              upper
2262                  *
2263                  * Invariants:
2264                  *
2265                  *   all in (low, lower] < pivot
2266                  *   all in (k, upper)  == pivot
2267                  *   all in [upper, end] > pivot
2268                  *
2269                  * Pointer k is the last index of ?-part
2270                  */
2271                 for (int k = ++upper; --k > lower; ) {
2272                     short ak = a[k];
2273 
2274                     if (ak != pivot) {
2275                         a[k] = pivot;
2276 
2277                         if (ak < pivot) { // Move a[k] to the left side
2278                             while (a[++lower] < pivot);
2279 
2280                             if (a[lower] > pivot) {
2281                                 a[--upper] = a[lower];
2282                             }
2283                             a[lower] = ak;
2284                         } else { // ak > pivot - Move a[k] to the right side
2285                             a[--upper] = ak;
2286                         }
2287                     }
2288                 }
2289 
2290                 /*
2291                  * Swap the pivot into its final position.
2292                  */
2293                 a[low] = a[lower]; a[lower] = pivot;
2294 
2295                 /*
2296                  * Sort the right part, excluding known pivot.
2297                  * All elements from the central part are
2298                  * equal and therefore already sorted.
2299                  */
2300                 sort(a, bits | 1, upper, high);
2301             }
2302             high = lower; // Iterate along the left part
2303         }
2304     }
2305 
2306     /**
2307      * Sorts the specified range of the array using insertion sort.
2308      *
2309      * @param a the array to be sorted
2310      * @param low the index of the first element, inclusive, to be sorted
2311      * @param high the index of the last element, exclusive, to be sorted
2312      */
2313     private static void insertionSort(short[] a, int low, int high) {
2314         for (int i, k = low; ++k < high; ) {
2315             short ai = a[i = k];
2316 
2317             if (ai < a[i - 1]) {
2318                 while (--i >= low && ai < a[i]) {
2319                     a[i + 1] = a[i];
2320                 }
2321                 a[i + 1] = ai;
2322             }
2323         }
2324     }
2325 
2326     /**
2327      * The number of distinct short values.
2328      */
2329     private static final int NUM_SHORT_VALUES = 1 << 16;
2330 
2331     /**
2332      * Max index of short counter.
2333      */
2334     private static final int MAX_SHORT_INDEX = Short.MAX_VALUE + NUM_SHORT_VALUES + 1;
2335 
2336     /**
2337      * Sorts the specified range of the array using counting sort.
2338      *
2339      * @param a the array to be sorted
2340      * @param low the index of the first element, inclusive, to be sorted
2341      * @param high the index of the last element, exclusive, to be sorted
2342      */
2343     private static void countingSort(short[] a, int low, int high) {
2344         int[] count = new int[NUM_SHORT_VALUES];
2345 
2346         /*
2347          * Compute a histogram with the number of each values.
2348          */
2349         for (int i = high; i > low; ++count[a[--i] & 0xFFFF]);
2350 
2351         /*
2352          * Place values on their final positions.
2353          */
2354         if (high - low > NUM_SHORT_VALUES) {
2355             for (int i = MAX_SHORT_INDEX; --i > Short.MAX_VALUE; ) {
2356                 int value = i & 0xFFFF;
2357 
2358                 for (low = high - count[value]; high > low;
2359                     a[--high] = (short) value
2360                 );
2361             }
2362         } else {
2363             for (int i = MAX_SHORT_INDEX; high > low; ) {
2364                 while (count[--i & 0xFFFF] == 0);
2365 
2366                 int value = i & 0xFFFF;
2367                 int c = count[value];
2368 
2369                 do {
2370                     a[--high] = (short) value;
2371                 } while (--c > 0);
2372             }
2373         }
2374     }
2375 
2376 // [float]
2377 
2378     /**
2379      * Sorts the specified range of the array using parallel merge
2380      * sort and/or Dual-Pivot Quicksort.
2381      *
2382      * To balance the faster splitting and parallelism of merge sort
2383      * with the faster element partitioning of Quicksort, ranges are
2384      * subdivided in tiers such that, if there is enough parallelism,
2385      * the four-way parallel merge is started, still ensuring enough
2386      * parallelism to process the partitions.
2387      *
2388      * @param a the array to be sorted
2389      * @param parallelism the parallelism level
2390      * @param low the index of the first element, inclusive, to be sorted
2391      * @param high the index of the last element, exclusive, to be sorted
2392      */
2393     static void sort(float[] a, int parallelism, int low, int high) {
2394         /*
2395          * Phase 1. Count the number of negative zero -0.0f,
2396          * turn them into positive zero, and move all NaNs
2397          * to the end of the array.
2398          */
2399         int numNegativeZero = 0;
2400 
2401         for (int k = high; k > low; ) {
2402             float ak = a[--k];
2403 
2404             if (ak == 0.0f && Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f
2405                 numNegativeZero += 1;
2406                 a[k] = 0.0f;
2407             } else if (ak != ak) { // ak is NaN
2408                 a[k] = a[--high];
2409                 a[high] = ak;
2410             }
2411         }
2412 
2413         /*
2414          * Phase 2. Sort everything except NaNs,
2415          * which are already in place.
2416          */
2417         int size = high - low;
2418 
2419         if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) {
2420             int depth = getDepth(parallelism, size >> 12);
2421             float[] b = depth == 0 ? null : new float[size];
2422             new Sorter(null, a, b, low, size, low, depth).invoke();
2423         } else {
2424             sort(null, a, 0, low, high);
2425         }
2426 
2427         /*
2428          * Phase 3. Turn positive zero 0.0f
2429          * back into negative zero -0.0f.
2430          */
2431         if (++numNegativeZero == 1) {
2432             return;
2433         }
2434 
2435         /*
2436          * Find the position one less than
2437          * the index of the first zero.
2438          */
2439         while (low <= high) {
2440             int middle = (low + high) >>> 1;
2441 
2442             if (a[middle] < 0) {
2443                 low = middle + 1;
2444             } else {
2445                 high = middle - 1;
2446             }
2447         }
2448 
2449         /*
2450          * Replace the required number of 0.0f by -0.0f.
2451          */
2452         while (--numNegativeZero > 0) {
2453             a[++high] = -0.0f;
2454         }
2455     }
2456 
2457     /**
2458      * Sorts the specified array using the Dual-Pivot Quicksort and/or
2459      * other sorts in special-cases, possibly with parallel partitions.
2460      *
2461      * @param sorter parallel context
2462      * @param a the array to be sorted
2463      * @param bits the combination of recursion depth and bit flag, where
2464      *        the right bit "0" indicates that array is the leftmost part
2465      * @param low the index of the first element, inclusive, to be sorted
2466      * @param high the index of the last element, exclusive, to be sorted
2467      */
2468     static void sort(Sorter sorter, float[] a, int bits, int low, int high) {
2469         while (true) {
2470             int end = high - 1, size = high - low;
2471 
2472             /*
2473              * Run mixed insertion sort on small non-leftmost parts.
2474              */
2475             if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) {
2476                 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high);
2477                 return;
2478             }
2479 
2480             /*
2481              * Invoke insertion sort on small leftmost part.
2482              */
2483             if (size < MAX_INSERTION_SORT_SIZE) {
2484                 insertionSort(a, low, high);
2485                 return;
2486             }
2487 
2488             /*
2489              * Check if the whole array or large non-leftmost
2490              * parts are nearly sorted and then merge runs.
2491              */
2492             if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0)
2493                     && tryMergeRuns(sorter, a, low, size)) {
2494                 return;
2495             }
2496 
2497             /*
2498              * Switch to heap sort if execution
2499              * time is becoming quadratic.
2500              */
2501             if ((bits += 2) > MAX_RECURSION_DEPTH) {
2502                 heapSort(a, low, high);
2503                 return;
2504             }
2505 
2506             /*
2507              * Use an inexpensive approximation of the golden ratio
2508              * to select five sample elements and determine pivots.
2509              */
2510             int step = (size >> 3) * 3 + 3;
2511 
2512             /*
2513              * Five elements around (and including) the central element
2514              * will be used for pivot selection as described below. The
2515              * unequal choice of spacing these elements was empirically
2516              * determined to work well on a wide variety of inputs.
2517              */
2518             int e1 = low + step;
2519             int e5 = end - step;
2520             int e3 = (e1 + e5) >>> 1;
2521             int e2 = (e1 + e3) >>> 1;
2522             int e4 = (e3 + e5) >>> 1;
2523             float a3 = a[e3];
2524 
2525             /*
2526              * Sort these elements in place by the combination
2527              * of 4-element sorting network and insertion sort.
2528              *
2529              *    5 ------o-----------o------------
2530              *            |           |
2531              *    4 ------|-----o-----o-----o------
2532              *            |     |           |
2533              *    2 ------o-----|-----o-----o------
2534              *                  |     |
2535              *    1 ------------o-----o------------
2536              */
2537             if (a[e5] < a[e2]) { float t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
2538             if (a[e4] < a[e1]) { float t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
2539             if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
2540             if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
2541             if (a[e4] < a[e2]) { float t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
2542 
2543             if (a3 < a[e2]) {
2544                 if (a3 < a[e1]) {
2545                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
2546                 } else {
2547                     a[e3] = a[e2]; a[e2] = a3;
2548                 }
2549             } else if (a3 > a[e4]) {
2550                 if (a3 > a[e5]) {
2551                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
2552                 } else {
2553                     a[e3] = a[e4]; a[e4] = a3;
2554                 }
2555             }
2556 
2557             // Pointers
2558             int lower = low; // The index of the last element of the left part
2559             int upper = end; // The index of the first element of the right part
2560 
2561             /*
2562              * Partitioning with 2 pivots in case of different elements.
2563              */
2564             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
2565 
2566                 /*
2567                  * Use the first and fifth of the five sorted elements as
2568                  * the pivots. These values are inexpensive approximation
2569                  * of tertiles. Note, that pivot1 < pivot2.
2570                  */
2571                 float pivot1 = a[e1];
2572                 float pivot2 = a[e5];
2573 
2574                 /*
2575                  * The first and the last elements to be sorted are moved
2576                  * to the locations formerly occupied by the pivots. When
2577                  * partitioning is completed, the pivots are swapped back
2578                  * into their final positions, and excluded from the next
2579                  * subsequent sorting.
2580                  */
2581                 a[e1] = a[lower];
2582                 a[e5] = a[upper];
2583 
2584                 /*
2585                  * Skip elements, which are less or greater than the pivots.
2586                  */
2587                 while (a[++lower] < pivot1);
2588                 while (a[--upper] > pivot2);
2589 
2590                 /*
2591                  * Backward 3-interval partitioning
2592                  *
2593                  *   left part                 central part          right part
2594                  * +------------------------------------------------------------+
2595                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
2596                  * +------------------------------------------------------------+
2597                  *             ^       ^                            ^
2598                  *             |       |                            |
2599                  *           lower     k                          upper
2600                  *
2601                  * Invariants:
2602                  *
2603                  *              all in (low, lower] < pivot1
2604                  *    pivot1 <= all in (k, upper)  <= pivot2
2605                  *              all in [upper, end) > pivot2
2606                  *
2607                  * Pointer k is the last index of ?-part
2608                  */
2609                 for (int unused = --lower, k = ++upper; --k > lower; ) {
2610                     float ak = a[k];
2611 
2612                     if (ak < pivot1) { // Move a[k] to the left side
2613                         while (lower < k) {
2614                             if (a[++lower] >= pivot1) {
2615                                 if (a[lower] > pivot2) {
2616                                     a[k] = a[--upper];
2617                                     a[upper] = a[lower];
2618                                 } else {
2619                                     a[k] = a[lower];
2620                                 }
2621                                 a[lower] = ak;
2622                                 break;
2623                             }
2624                         }
2625                     } else if (ak > pivot2) { // Move a[k] to the right side
2626                         a[k] = a[--upper];
2627                         a[upper] = ak;
2628                     }
2629                 }
2630 
2631                 /*
2632                  * Swap the pivots into their final positions.
2633                  */
2634                 a[low] = a[lower]; a[lower] = pivot1;
2635                 a[end] = a[upper]; a[upper] = pivot2;
2636 
2637                 /*
2638                  * Sort non-left parts recursively (possibly in parallel),
2639                  * excluding known pivots.
2640                  */
2641                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
2642                     sorter.forkSorter(bits | 1, lower + 1, upper);
2643                     sorter.forkSorter(bits | 1, upper + 1, high);
2644                 } else {
2645                     sort(sorter, a, bits | 1, lower + 1, upper);
2646                     sort(sorter, a, bits | 1, upper + 1, high);
2647                 }
2648 
2649             } else { // Use single pivot in case of many equal elements
2650 
2651                 /*
2652                  * Use the third of the five sorted elements as the pivot.
2653                  * This value is inexpensive approximation of the median.
2654                  */
2655                 float pivot = a[e3];
2656 
2657                 /*
2658                  * The first element to be sorted is moved to the
2659                  * location formerly occupied by the pivot. After
2660                  * completion of partitioning the pivot is swapped
2661                  * back into its final position, and excluded from
2662                  * the next subsequent sorting.
2663                  */
2664                 a[e3] = a[lower];
2665 
2666                 /*
2667                  * Traditional 3-way (Dutch National Flag) partitioning
2668                  *
2669                  *   left part                 central part    right part
2670                  * +------------------------------------------------------+
2671                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
2672                  * +------------------------------------------------------+
2673                  *              ^           ^                ^
2674                  *              |           |                |
2675                  *            lower         k              upper
2676                  *
2677                  * Invariants:
2678                  *
2679                  *   all in (low, lower] < pivot
2680                  *   all in (k, upper)  == pivot
2681                  *   all in [upper, end] > pivot
2682                  *
2683                  * Pointer k is the last index of ?-part
2684                  */
2685                 for (int k = ++upper; --k > lower; ) {
2686                     float ak = a[k];
2687 
2688                     if (ak != pivot) {
2689                         a[k] = pivot;
2690 
2691                         if (ak < pivot) { // Move a[k] to the left side
2692                             while (a[++lower] < pivot);
2693 
2694                             if (a[lower] > pivot) {
2695                                 a[--upper] = a[lower];
2696                             }
2697                             a[lower] = ak;
2698                         } else { // ak > pivot - Move a[k] to the right side
2699                             a[--upper] = ak;
2700                         }
2701                     }
2702                 }
2703 
2704                 /*
2705                  * Swap the pivot into its final position.
2706                  */
2707                 a[low] = a[lower]; a[lower] = pivot;
2708 
2709                 /*
2710                  * Sort the right part (possibly in parallel), excluding
2711                  * known pivot. All elements from the central part are
2712                  * equal and therefore already sorted.
2713                  */
2714                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
2715                     sorter.forkSorter(bits | 1, upper, high);
2716                 } else {
2717                     sort(sorter, a, bits | 1, upper, high);
2718                 }
2719             }
2720             high = lower; // Iterate along the left part
2721         }
2722     }
2723 
2724     /**
2725      * Sorts the specified range of the array using mixed insertion sort.
2726      *
2727      * Mixed insertion sort is combination of simple insertion sort,
2728      * pin insertion sort and pair insertion sort.
2729      *
2730      * In the context of Dual-Pivot Quicksort, the pivot element
2731      * from the left part plays the role of sentinel, because it
2732      * is less than any elements from the given part. Therefore,
2733      * expensive check of the left range can be skipped on each
2734      * iteration unless it is the leftmost call.
2735      *
2736      * @param a the array to be sorted
2737      * @param low the index of the first element, inclusive, to be sorted
2738      * @param end the index of the last element for simple insertion sort
2739      * @param high the index of the last element, exclusive, to be sorted
2740      */
2741     private static void mixedInsertionSort(float[] a, int low, int end, int high) {
2742         if (end == high) {
2743 
2744             /*
2745              * Invoke simple insertion sort on tiny array.
2746              */
2747             for (int i; ++low < end; ) {
2748                 float ai = a[i = low];
2749 
2750                 while (ai < a[--i]) {
2751                     a[i + 1] = a[i];
2752                 }
2753                 a[i + 1] = ai;
2754             }
2755         } else {
2756 
2757             /*
2758              * Start with pin insertion sort on small part.
2759              *
2760              * Pin insertion sort is extended simple insertion sort.
2761              * The main idea of this sort is to put elements larger
2762              * than an element called pin to the end of array (the
2763              * proper area for such elements). It avoids expensive
2764              * movements of these elements through the whole array.
2765              */
2766             float pin = a[end];
2767 
2768             for (int i, p = high; ++low < end; ) {
2769                 float ai = a[i = low];
2770 
2771                 if (ai < a[i - 1]) { // Small element
2772 
2773                     /*
2774                      * Insert small element into sorted part.
2775                      */
2776                     a[i] = a[--i];
2777 
2778                     while (ai < a[--i]) {
2779                         a[i + 1] = a[i];
2780                     }
2781                     a[i + 1] = ai;
2782 
2783                 } else if (p > i && ai > pin) { // Large element
2784 
2785                     /*
2786                      * Find element smaller than pin.
2787                      */
2788                     while (a[--p] > pin);
2789 
2790                     /*
2791                      * Swap it with large element.
2792                      */
2793                     if (p > i) {
2794                         ai = a[p];
2795                         a[p] = a[i];
2796                     }
2797 
2798                     /*
2799                      * Insert small element into sorted part.
2800                      */
2801                     while (ai < a[--i]) {
2802                         a[i + 1] = a[i];
2803                     }
2804                     a[i + 1] = ai;
2805                 }
2806             }
2807 
2808             /*
2809              * Continue with pair insertion sort on remain part.
2810              */
2811             for (int i; low < high; ++low) {
2812                 float a1 = a[i = low], a2 = a[++low];
2813 
2814                 /*
2815                  * Insert two elements per iteration: at first, insert the
2816                  * larger element and then insert the smaller element, but
2817                  * from the position where the larger element was inserted.
2818                  */
2819                 if (a1 > a2) {
2820 
2821                     while (a1 < a[--i]) {
2822                         a[i + 2] = a[i];
2823                     }
2824                     a[++i + 1] = a1;
2825 
2826                     while (a2 < a[--i]) {
2827                         a[i + 1] = a[i];
2828                     }
2829                     a[i + 1] = a2;
2830 
2831                 } else if (a1 < a[i - 1]) {
2832 
2833                     while (a2 < a[--i]) {
2834                         a[i + 2] = a[i];
2835                     }
2836                     a[++i + 1] = a2;
2837 
2838                     while (a1 < a[--i]) {
2839                         a[i + 1] = a[i];
2840                     }
2841                     a[i + 1] = a1;
2842                 }
2843             }
2844         }
2845     }
2846 
2847     /**
2848      * Sorts the specified range of the array using insertion sort.
2849      *
2850      * @param a the array to be sorted
2851      * @param low the index of the first element, inclusive, to be sorted
2852      * @param high the index of the last element, exclusive, to be sorted
2853      */
2854     private static void insertionSort(float[] a, int low, int high) {
2855         for (int i, k = low; ++k < high; ) {
2856             float ai = a[i = k];
2857 
2858             if (ai < a[i - 1]) {
2859                 while (--i >= low && ai < a[i]) {
2860                     a[i + 1] = a[i];
2861                 }
2862                 a[i + 1] = ai;
2863             }
2864         }
2865     }
2866 
2867     /**
2868      * Sorts the specified range of the array using heap sort.
2869      *
2870      * @param a the array to be sorted
2871      * @param low the index of the first element, inclusive, to be sorted
2872      * @param high the index of the last element, exclusive, to be sorted
2873      */
2874     private static void heapSort(float[] a, int low, int high) {
2875         for (int k = (low + high) >>> 1; k > low; ) {
2876             pushDown(a, --k, a[k], low, high);
2877         }
2878         while (--high > low) {
2879             float max = a[low];
2880             pushDown(a, low, a[high], low, high);
2881             a[high] = max;
2882         }
2883     }
2884 
2885     /**
2886      * Pushes specified element down during heap sort.
2887      *
2888      * @param a the given array
2889      * @param p the start index
2890      * @param value the given element
2891      * @param low the index of the first element, inclusive, to be sorted
2892      * @param high the index of the last element, exclusive, to be sorted
2893      */
2894     private static void pushDown(float[] a, int p, float value, int low, int high) {
2895         for (int k ;; a[p] = a[p = k]) {
2896             k = (p << 1) - low + 2; // Index of the right child
2897 
2898             if (k > high) {
2899                 break;
2900             }
2901             if (k == high || a[k] < a[k - 1]) {
2902                 --k;
2903             }
2904             if (a[k] <= value) {
2905                 break;
2906             }
2907         }
2908         a[p] = value;
2909     }
2910 
2911     /**
2912      * Tries to sort the specified range of the array.
2913      *
2914      * @param sorter parallel context
2915      * @param a the array to be sorted
2916      * @param low the index of the first element to be sorted
2917      * @param size the array size
2918      * @return true if finally sorted, false otherwise
2919      */
2920     private static boolean tryMergeRuns(Sorter sorter, float[] a, int low, int size) {
2921 
2922         /*
2923          * The run array is constructed only if initial runs are
2924          * long enough to continue, run[i] then holds start index
2925          * of the i-th sequence of elements in non-descending order.
2926          */
2927         int[] run = null;
2928         int high = low + size;
2929         int count = 1, last = low;
2930 
2931         /*
2932          * Identify all possible runs.
2933          */
2934         for (int k = low + 1; k < high; ) {
2935 
2936             /*
2937              * Find the end index of the current run.
2938              */
2939             if (a[k - 1] < a[k]) {
2940 
2941                 // Identify ascending sequence
2942                 while (++k < high && a[k - 1] <= a[k]);
2943 
2944             } else if (a[k - 1] > a[k]) {
2945 
2946                 // Identify descending sequence
2947                 while (++k < high && a[k - 1] >= a[k]);
2948 
2949                 // Reverse into ascending order
2950                 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) {
2951                     float ai = a[i]; a[i] = a[j]; a[j] = ai;
2952                 }
2953             } else { // Identify constant sequence
2954                 for (float ak = a[k]; ++k < high && ak == a[k]; );
2955 
2956                 if (k < high) {
2957                     continue;
2958                 }
2959             }
2960 
2961             /*
2962              * Check special cases.
2963              */
2964             if (run == null) {
2965                 if (k == high) {
2966 
2967                     /*
2968                      * The array is monotonous sequence,
2969                      * and therefore already sorted.
2970                      */
2971                     return true;
2972                 }
2973 
2974                 if (k - low < MIN_FIRST_RUN_SIZE) {
2975 
2976                     /*
2977                      * The first run is too small
2978                      * to proceed with scanning.
2979                      */
2980                     return false;
2981                 }
2982 
2983                 run = new int[((size >> 10) | 0x7F) & 0x3FF];
2984                 run[0] = low;
2985 
2986             } else if (a[last - 1] > a[last]) {
2987 
2988                 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) {
2989 
2990                     /*
2991                      * The first runs are not long
2992                      * enough to continue scanning.
2993                      */
2994                     return false;
2995                 }
2996 
2997                 if (++count == MAX_RUN_CAPACITY) {
2998 
2999                     /*
3000                      * Array is not highly structured.
3001                      */
3002                     return false;
3003                 }
3004 
3005                 if (count == run.length) {
3006 
3007                     /*
3008                      * Increase capacity of index array.
3009                      */
3010                     run = Arrays.copyOf(run, count << 1);
3011                 }
3012             }
3013             run[count] = (last = k);
3014         }
3015 
3016         /*
3017          * Merge runs of highly structured array.
3018          */
3019         if (count > 1) {
3020             float[] b; int offset = low;
3021 
3022             if (sorter == null || (b = (float[]) sorter.b) == null) {
3023                 b = new float[size];
3024             } else {
3025                 offset = sorter.offset;
3026             }
3027             mergeRuns(a, b, offset, 1, sorter != null, run, 0, count);
3028         }
3029         return true;
3030     }
3031 
3032     /**
3033      * Merges the specified runs.
3034      *
3035      * @param a the source array
3036      * @param b the temporary buffer used in merging
3037      * @param offset the start index in the source, inclusive
3038      * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0)
3039      * @param parallel indicates whether merging is performed in parallel
3040      * @param run the start indexes of the runs, inclusive
3041      * @param lo the start index of the first run, inclusive
3042      * @param hi the start index of the last run, inclusive
3043      * @return the destination where runs are merged
3044      */
3045     private static float[] mergeRuns(float[] a, float[] b, int offset,
3046             int aim, boolean parallel, int[] run, int lo, int hi) {
3047 
3048         if (hi - lo == 1) {
3049             if (aim >= 0) {
3050                 return a;
3051             }
3052             for (int i = run[hi], j = i - offset, low = run[lo]; i > low;
3053                 b[--j] = a[--i]
3054             );
3055             return b;
3056         }
3057 
3058         /*
3059          * Split into approximately equal parts.
3060          */
3061         int mi = lo, rmi = (run[lo] + run[hi]) >>> 1;
3062         while (run[++mi + 1] <= rmi);
3063 
3064         /*
3065          * Merge the left and right parts.
3066          */
3067         float[] a1, a2;
3068 
3069         if (parallel && hi - lo > MIN_RUN_COUNT) {
3070             RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe();
3071             a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi);
3072             a2 = (float[]) merger.getDestination();
3073         } else {
3074             a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi);
3075             a2 = mergeRuns(a, b, offset,    0, false, run, mi, hi);
3076         }
3077 
3078         float[] dst = a1 == a ? b : a;
3079 
3080         int k   = a1 == a ? run[lo] - offset : run[lo];
3081         int lo1 = a1 == b ? run[lo] - offset : run[lo];
3082         int hi1 = a1 == b ? run[mi] - offset : run[mi];
3083         int lo2 = a2 == b ? run[mi] - offset : run[mi];
3084         int hi2 = a2 == b ? run[hi] - offset : run[hi];
3085 
3086         if (parallel) {
3087             new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke();
3088         } else {
3089             mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2);
3090         }
3091         return dst;
3092     }
3093 
3094     /**
3095      * Merges the sorted parts.
3096      *
3097      * @param merger parallel context
3098      * @param dst the destination where parts are merged
3099      * @param k the start index of the destination, inclusive
3100      * @param a1 the first part
3101      * @param lo1 the start index of the first part, inclusive
3102      * @param hi1 the end index of the first part, exclusive
3103      * @param a2 the second part
3104      * @param lo2 the start index of the second part, inclusive
3105      * @param hi2 the end index of the second part, exclusive
3106      */
3107     private static void mergeParts(Merger merger, float[] dst, int k,
3108             float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2) {
3109 
3110         if (merger != null && a1 == a2) {
3111 
3112             while (true) {
3113 
3114                 /*
3115                  * The first part must be larger.
3116                  */
3117                 if (hi1 - lo1 < hi2 - lo2) {
3118                     int lo = lo1; lo1 = lo2; lo2 = lo;
3119                     int hi = hi1; hi1 = hi2; hi2 = hi;
3120                 }
3121 
3122                 /*
3123                  * Small parts will be merged sequentially.
3124                  */
3125                 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) {
3126                     break;
3127                 }
3128 
3129                 /*
3130                  * Find the median of the larger part.
3131                  */
3132                 int mi1 = (lo1 + hi1) >>> 1;
3133                 float key = a1[mi1];
3134                 int mi2 = hi2;
3135 
3136                 /*
3137                  * Partition the smaller part.
3138                  */
3139                 for (int loo = lo2; loo < mi2; ) {
3140                     int t = (loo + mi2) >>> 1;
3141 
3142                     if (key > a2[t]) {
3143                         loo = t + 1;
3144                     } else {
3145                         mi2 = t;
3146                     }
3147                 }
3148 
3149                 int d = mi2 - lo2 + mi1 - lo1;
3150 
3151                 /*
3152                  * Merge the right sub-parts in parallel.
3153                  */
3154                 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2);
3155 
3156                 /*
3157                  * Process the sub-left parts.
3158                  */
3159                 hi1 = mi1;
3160                 hi2 = mi2;
3161             }
3162         }
3163 
3164         /*
3165          * Merge small parts sequentially.
3166          */
3167         while (lo1 < hi1 && lo2 < hi2) {
3168             dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++];
3169         }
3170         if (dst != a1 || k < lo1) {
3171             while (lo1 < hi1) {
3172                 dst[k++] = a1[lo1++];
3173             }
3174         }
3175         if (dst != a2 || k < lo2) {
3176             while (lo2 < hi2) {
3177                 dst[k++] = a2[lo2++];
3178             }
3179         }
3180     }
3181 
3182 // [double]
3183 
3184     /**
3185      * Sorts the specified range of the array using parallel merge
3186      * sort and/or Dual-Pivot Quicksort.
3187      *
3188      * To balance the faster splitting and parallelism of merge sort
3189      * with the faster element partitioning of Quicksort, ranges are
3190      * subdivided in tiers such that, if there is enough parallelism,
3191      * the four-way parallel merge is started, still ensuring enough
3192      * parallelism to process the partitions.
3193      *
3194      * @param a the array to be sorted
3195      * @param parallelism the parallelism level
3196      * @param low the index of the first element, inclusive, to be sorted
3197      * @param high the index of the last element, exclusive, to be sorted
3198      */
3199     static void sort(double[] a, int parallelism, int low, int high) {
3200         /*
3201          * Phase 1. Count the number of negative zero -0.0d,
3202          * turn them into positive zero, and move all NaNs
3203          * to the end of the array.
3204          */
3205         int numNegativeZero = 0;
3206 
3207         for (int k = high; k > low; ) {
3208             double ak = a[--k];
3209 
3210             if (ak == 0.0d && Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d
3211                 numNegativeZero += 1;
3212                 a[k] = 0.0d;
3213             } else if (ak != ak) { // ak is NaN
3214                 a[k] = a[--high];
3215                 a[high] = ak;
3216             }
3217         }
3218 
3219         /*
3220          * Phase 2. Sort everything except NaNs,
3221          * which are already in place.
3222          */
3223         int size = high - low;
3224 
3225         if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) {
3226             int depth = getDepth(parallelism, size >> 12);
3227             double[] b = depth == 0 ? null : new double[size];
3228             new Sorter(null, a, b, low, size, low, depth).invoke();
3229         } else {
3230             sort(null, a, 0, low, high);
3231         }
3232 
3233         /*
3234          * Phase 3. Turn positive zero 0.0d
3235          * back into negative zero -0.0d.
3236          */
3237         if (++numNegativeZero == 1) {
3238             return;
3239         }
3240 
3241         /*
3242          * Find the position one less than
3243          * the index of the first zero.
3244          */
3245         while (low <= high) {
3246             int middle = (low + high) >>> 1;
3247 
3248             if (a[middle] < 0) {
3249                 low = middle + 1;
3250             } else {
3251                 high = middle - 1;
3252             }
3253         }
3254 
3255         /*
3256          * Replace the required number of 0.0d by -0.0d.
3257          */
3258         while (--numNegativeZero > 0) {
3259             a[++high] = -0.0d;
3260         }
3261     }
3262 
3263     /**
3264      * Sorts the specified array using the Dual-Pivot Quicksort and/or
3265      * other sorts in special-cases, possibly with parallel partitions.
3266      *
3267      * @param sorter parallel context
3268      * @param a the array to be sorted
3269      * @param bits the combination of recursion depth and bit flag, where
3270      *        the right bit "0" indicates that array is the leftmost part
3271      * @param low the index of the first element, inclusive, to be sorted
3272      * @param high the index of the last element, exclusive, to be sorted
3273      */
3274     static void sort(Sorter sorter, double[] a, int bits, int low, int high) {
3275         while (true) {
3276             int end = high - 1, size = high - low;
3277 
3278             /*
3279              * Run mixed insertion sort on small non-leftmost parts.
3280              */
3281             if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) {
3282                 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high);
3283                 return;
3284             }
3285 
3286             /*
3287              * Invoke insertion sort on small leftmost part.
3288              */
3289             if (size < MAX_INSERTION_SORT_SIZE) {
3290                 insertionSort(a, low, high);
3291                 return;
3292             }
3293 
3294             /*
3295              * Check if the whole array or large non-leftmost
3296              * parts are nearly sorted and then merge runs.
3297              */
3298             if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0)
3299                     && tryMergeRuns(sorter, a, low, size)) {
3300                 return;
3301             }
3302 
3303             /*
3304              * Switch to heap sort if execution
3305              * time is becoming quadratic.
3306              */
3307             if ((bits += 2) > MAX_RECURSION_DEPTH) {
3308                 heapSort(a, low, high);
3309                 return;
3310             }
3311 
3312             /*
3313              * Use an inexpensive approximation of the golden ratio
3314              * to select five sample elements and determine pivots.
3315              */
3316             int step = (size >> 3) * 3 + 3;
3317 
3318             /*
3319              * Five elements around (and including) the central element
3320              * will be used for pivot selection as described below. The
3321              * unequal choice of spacing these elements was empirically
3322              * determined to work well on a wide variety of inputs.
3323              */
3324             int e1 = low + step;
3325             int e5 = end - step;
3326             int e3 = (e1 + e5) >>> 1;
3327             int e2 = (e1 + e3) >>> 1;
3328             int e4 = (e3 + e5) >>> 1;
3329             double a3 = a[e3];
3330 
3331             /*
3332              * Sort these elements in place by the combination
3333              * of 4-element sorting network and insertion sort.
3334              *
3335              *    5 ------o-----------o------------
3336              *            |           |
3337              *    4 ------|-----o-----o-----o------
3338              *            |     |           |
3339              *    2 ------o-----|-----o-----o------
3340              *                  |     |
3341              *    1 ------------o-----o------------
3342              */
3343             if (a[e5] < a[e2]) { double t = a[e5]; a[e5] = a[e2]; a[e2] = t; }
3344             if (a[e4] < a[e1]) { double t = a[e4]; a[e4] = a[e1]; a[e1] = t; }
3345             if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; }
3346             if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
3347             if (a[e4] < a[e2]) { double t = a[e4]; a[e4] = a[e2]; a[e2] = t; }
3348 
3349             if (a3 < a[e2]) {
3350                 if (a3 < a[e1]) {
3351                     a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3;
3352                 } else {
3353                     a[e3] = a[e2]; a[e2] = a3;
3354                 }
3355             } else if (a3 > a[e4]) {
3356                 if (a3 > a[e5]) {
3357                     a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3;
3358                 } else {
3359                     a[e3] = a[e4]; a[e4] = a3;
3360                 }
3361             }
3362 
3363             // Pointers
3364             int lower = low; // The index of the last element of the left part
3365             int upper = end; // The index of the first element of the right part
3366 
3367             /*
3368              * Partitioning with 2 pivots in case of different elements.
3369              */
3370             if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) {
3371 
3372                 /*
3373                  * Use the first and fifth of the five sorted elements as
3374                  * the pivots. These values are inexpensive approximation
3375                  * of tertiles. Note, that pivot1 < pivot2.
3376                  */
3377                 double pivot1 = a[e1];
3378                 double pivot2 = a[e5];
3379 
3380                 /*
3381                  * The first and the last elements to be sorted are moved
3382                  * to the locations formerly occupied by the pivots. When
3383                  * partitioning is completed, the pivots are swapped back
3384                  * into their final positions, and excluded from the next
3385                  * subsequent sorting.
3386                  */
3387                 a[e1] = a[lower];
3388                 a[e5] = a[upper];
3389 
3390                 /*
3391                  * Skip elements, which are less or greater than the pivots.
3392                  */
3393                 while (a[++lower] < pivot1);
3394                 while (a[--upper] > pivot2);
3395 
3396                 /*
3397                  * Backward 3-interval partitioning
3398                  *
3399                  *   left part                 central part          right part
3400                  * +------------------------------------------------------------+
3401                  * |  < pivot1  |   ?   |  pivot1 <= && <= pivot2  |  > pivot2  |
3402                  * +------------------------------------------------------------+
3403                  *             ^       ^                            ^
3404                  *             |       |                            |
3405                  *           lower     k                          upper
3406                  *
3407                  * Invariants:
3408                  *
3409                  *              all in (low, lower] < pivot1
3410                  *    pivot1 <= all in (k, upper)  <= pivot2
3411                  *              all in [upper, end) > pivot2
3412                  *
3413                  * Pointer k is the last index of ?-part
3414                  */
3415                 for (int unused = --lower, k = ++upper; --k > lower; ) {
3416                     double ak = a[k];
3417 
3418                     if (ak < pivot1) { // Move a[k] to the left side
3419                         while (lower < k) {
3420                             if (a[++lower] >= pivot1) {
3421                                 if (a[lower] > pivot2) {
3422                                     a[k] = a[--upper];
3423                                     a[upper] = a[lower];
3424                                 } else {
3425                                     a[k] = a[lower];
3426                                 }
3427                                 a[lower] = ak;
3428                                 break;
3429                             }
3430                         }
3431                     } else if (ak > pivot2) { // Move a[k] to the right side
3432                         a[k] = a[--upper];
3433                         a[upper] = ak;
3434                     }
3435                 }
3436 
3437                 /*
3438                  * Swap the pivots into their final positions.
3439                  */
3440                 a[low] = a[lower]; a[lower] = pivot1;
3441                 a[end] = a[upper]; a[upper] = pivot2;
3442 
3443                 /*
3444                  * Sort non-left parts recursively (possibly in parallel),
3445                  * excluding known pivots.
3446                  */
3447                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
3448                     sorter.forkSorter(bits | 1, lower + 1, upper);
3449                     sorter.forkSorter(bits | 1, upper + 1, high);
3450                 } else {
3451                     sort(sorter, a, bits | 1, lower + 1, upper);
3452                     sort(sorter, a, bits | 1, upper + 1, high);
3453                 }
3454 
3455             } else { // Use single pivot in case of many equal elements
3456 
3457                 /*
3458                  * Use the third of the five sorted elements as the pivot.
3459                  * This value is inexpensive approximation of the median.
3460                  */
3461                 double pivot = a[e3];
3462 
3463                 /*
3464                  * The first element to be sorted is moved to the
3465                  * location formerly occupied by the pivot. After
3466                  * completion of partitioning the pivot is swapped
3467                  * back into its final position, and excluded from
3468                  * the next subsequent sorting.
3469                  */
3470                 a[e3] = a[lower];
3471 
3472                 /*
3473                  * Traditional 3-way (Dutch National Flag) partitioning
3474                  *
3475                  *   left part                 central part    right part
3476                  * +------------------------------------------------------+
3477                  * |   < pivot   |     ?     |   == pivot   |   > pivot   |
3478                  * +------------------------------------------------------+
3479                  *              ^           ^                ^
3480                  *              |           |                |
3481                  *            lower         k              upper
3482                  *
3483                  * Invariants:
3484                  *
3485                  *   all in (low, lower] < pivot
3486                  *   all in (k, upper)  == pivot
3487                  *   all in [upper, end] > pivot
3488                  *
3489                  * Pointer k is the last index of ?-part
3490                  */
3491                 for (int k = ++upper; --k > lower; ) {
3492                     double ak = a[k];
3493 
3494                     if (ak != pivot) {
3495                         a[k] = pivot;
3496 
3497                         if (ak < pivot) { // Move a[k] to the left side
3498                             while (a[++lower] < pivot);
3499 
3500                             if (a[lower] > pivot) {
3501                                 a[--upper] = a[lower];
3502                             }
3503                             a[lower] = ak;
3504                         } else { // ak > pivot - Move a[k] to the right side
3505                             a[--upper] = ak;
3506                         }
3507                     }
3508                 }
3509 
3510                 /*
3511                  * Swap the pivot into its final position.
3512                  */
3513                 a[low] = a[lower]; a[lower] = pivot;
3514 
3515                 /*
3516                  * Sort the right part (possibly in parallel), excluding
3517                  * known pivot. All elements from the central part are
3518                  * equal and therefore already sorted.
3519                  */
3520                 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) {
3521                     sorter.forkSorter(bits | 1, upper, high);
3522                 } else {
3523                     sort(sorter, a, bits | 1, upper, high);
3524                 }
3525             }
3526             high = lower; // Iterate along the left part
3527         }
3528     }
3529 
3530     /**
3531      * Sorts the specified range of the array using mixed insertion sort.
3532      *
3533      * Mixed insertion sort is combination of simple insertion sort,
3534      * pin insertion sort and pair insertion sort.
3535      *
3536      * In the context of Dual-Pivot Quicksort, the pivot element
3537      * from the left part plays the role of sentinel, because it
3538      * is less than any elements from the given part. Therefore,
3539      * expensive check of the left range can be skipped on each
3540      * iteration unless it is the leftmost call.
3541      *
3542      * @param a the array to be sorted
3543      * @param low the index of the first element, inclusive, to be sorted
3544      * @param end the index of the last element for simple insertion sort
3545      * @param high the index of the last element, exclusive, to be sorted
3546      */
3547     private static void mixedInsertionSort(double[] a, int low, int end, int high) {
3548         if (end == high) {
3549 
3550             /*
3551              * Invoke simple insertion sort on tiny array.
3552              */
3553             for (int i; ++low < end; ) {
3554                 double ai = a[i = low];
3555 
3556                 while (ai < a[--i]) {
3557                     a[i + 1] = a[i];
3558                 }
3559                 a[i + 1] = ai;
3560             }
3561         } else {
3562 
3563             /*
3564              * Start with pin insertion sort on small part.
3565              *
3566              * Pin insertion sort is extended simple insertion sort.
3567              * The main idea of this sort is to put elements larger
3568              * than an element called pin to the end of array (the
3569              * proper area for such elements). It avoids expensive
3570              * movements of these elements through the whole array.
3571              */
3572             double pin = a[end];
3573 
3574             for (int i, p = high; ++low < end; ) {
3575                 double ai = a[i = low];
3576 
3577                 if (ai < a[i - 1]) { // Small element
3578 
3579                     /*
3580                      * Insert small element into sorted part.
3581                      */
3582                     a[i] = a[--i];
3583 
3584                     while (ai < a[--i]) {
3585                         a[i + 1] = a[i];
3586                     }
3587                     a[i + 1] = ai;
3588 
3589                 } else if (p > i && ai > pin) { // Large element
3590 
3591                     /*
3592                      * Find element smaller than pin.
3593                      */
3594                     while (a[--p] > pin);
3595 
3596                     /*
3597                      * Swap it with large element.
3598                      */
3599                     if (p > i) {
3600                         ai = a[p];
3601                         a[p] = a[i];
3602                     }
3603 
3604                     /*
3605                      * Insert small element into sorted part.
3606                      */
3607                     while (ai < a[--i]) {
3608                         a[i + 1] = a[i];
3609                     }
3610                     a[i + 1] = ai;
3611                 }
3612             }
3613 
3614             /*
3615              * Continue with pair insertion sort on remain part.
3616              */
3617             for (int i; low < high; ++low) {
3618                 double a1 = a[i = low], a2 = a[++low];
3619 
3620                 /*
3621                  * Insert two elements per iteration: at first, insert the
3622                  * larger element and then insert the smaller element, but
3623                  * from the position where the larger element was inserted.
3624                  */
3625                 if (a1 > a2) {
3626 
3627                     while (a1 < a[--i]) {
3628                         a[i + 2] = a[i];
3629                     }
3630                     a[++i + 1] = a1;
3631 
3632                     while (a2 < a[--i]) {
3633                         a[i + 1] = a[i];
3634                     }
3635                     a[i + 1] = a2;
3636 
3637                 } else if (a1 < a[i - 1]) {
3638 
3639                     while (a2 < a[--i]) {
3640                         a[i + 2] = a[i];
3641                     }
3642                     a[++i + 1] = a2;
3643 
3644                     while (a1 < a[--i]) {
3645                         a[i + 1] = a[i];
3646                     }
3647                     a[i + 1] = a1;
3648                 }
3649             }
3650         }
3651     }
3652 
3653     /**
3654      * Sorts the specified range of the array using insertion sort.
3655      *
3656      * @param a the array to be sorted
3657      * @param low the index of the first element, inclusive, to be sorted
3658      * @param high the index of the last element, exclusive, to be sorted
3659      */
3660     private static void insertionSort(double[] a, int low, int high) {
3661         for (int i, k = low; ++k < high; ) {
3662             double ai = a[i = k];
3663 
3664             if (ai < a[i - 1]) {
3665                 while (--i >= low && ai < a[i]) {
3666                     a[i + 1] = a[i];
3667                 }
3668                 a[i + 1] = ai;
3669             }
3670         }
3671     }
3672 
3673     /**
3674      * Sorts the specified range of the array using heap sort.
3675      *
3676      * @param a the array to be sorted
3677      * @param low the index of the first element, inclusive, to be sorted
3678      * @param high the index of the last element, exclusive, to be sorted
3679      */
3680     private static void heapSort(double[] a, int low, int high) {
3681         for (int k = (low + high) >>> 1; k > low; ) {
3682             pushDown(a, --k, a[k], low, high);
3683         }
3684         while (--high > low) {
3685             double max = a[low];
3686             pushDown(a, low, a[high], low, high);
3687             a[high] = max;
3688         }
3689     }
3690 
3691     /**
3692      * Pushes specified element down during heap sort.
3693      *
3694      * @param a the given array
3695      * @param p the start index
3696      * @param value the given element
3697      * @param low the index of the first element, inclusive, to be sorted
3698      * @param high the index of the last element, exclusive, to be sorted
3699      */
3700     private static void pushDown(double[] a, int p, double value, int low, int high) {
3701         for (int k ;; a[p] = a[p = k]) {
3702             k = (p << 1) - low + 2; // Index of the right child
3703 
3704             if (k > high) {
3705                 break;
3706             }
3707             if (k == high || a[k] < a[k - 1]) {
3708                 --k;
3709             }
3710             if (a[k] <= value) {
3711                 break;
3712             }
3713         }
3714         a[p] = value;
3715     }
3716 
3717     /**
3718      * Tries to sort the specified range of the array.
3719      *
3720      * @param sorter parallel context
3721      * @param a the array to be sorted
3722      * @param low the index of the first element to be sorted
3723      * @param size the array size
3724      * @return true if finally sorted, false otherwise
3725      */
3726     private static boolean tryMergeRuns(Sorter sorter, double[] a, int low, int size) {
3727 
3728         /*
3729          * The run array is constructed only if initial runs are
3730          * long enough to continue, run[i] then holds start index
3731          * of the i-th sequence of elements in non-descending order.
3732          */
3733         int[] run = null;
3734         int high = low + size;
3735         int count = 1, last = low;
3736 
3737         /*
3738          * Identify all possible runs.
3739          */
3740         for (int k = low + 1; k < high; ) {
3741 
3742             /*
3743              * Find the end index of the current run.
3744              */
3745             if (a[k - 1] < a[k]) {
3746 
3747                 // Identify ascending sequence
3748                 while (++k < high && a[k - 1] <= a[k]);
3749 
3750             } else if (a[k - 1] > a[k]) {
3751 
3752                 // Identify descending sequence
3753                 while (++k < high && a[k - 1] >= a[k]);
3754 
3755                 // Reverse into ascending order
3756                 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) {
3757                     double ai = a[i]; a[i] = a[j]; a[j] = ai;
3758                 }
3759             } else { // Identify constant sequence
3760                 for (double ak = a[k]; ++k < high && ak == a[k]; );
3761 
3762                 if (k < high) {
3763                     continue;
3764                 }
3765             }
3766 
3767             /*
3768              * Check special cases.
3769              */
3770             if (run == null) {
3771                 if (k == high) {
3772 
3773                     /*
3774                      * The array is monotonous sequence,
3775                      * and therefore already sorted.
3776                      */
3777                     return true;
3778                 }
3779 
3780                 if (k - low < MIN_FIRST_RUN_SIZE) {
3781 
3782                     /*
3783                      * The first run is too small
3784                      * to proceed with scanning.
3785                      */
3786                     return false;
3787                 }
3788 
3789                 run = new int[((size >> 10) | 0x7F) & 0x3FF];
3790                 run[0] = low;
3791 
3792             } else if (a[last - 1] > a[last]) {
3793 
3794                 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) {
3795 
3796                     /*
3797                      * The first runs are not long
3798                      * enough to continue scanning.
3799                      */
3800                     return false;
3801                 }
3802 
3803                 if (++count == MAX_RUN_CAPACITY) {
3804 
3805                     /*
3806                      * Array is not highly structured.
3807                      */
3808                     return false;
3809                 }
3810 
3811                 if (count == run.length) {
3812 
3813                     /*
3814                      * Increase capacity of index array.
3815                      */
3816                     run = Arrays.copyOf(run, count << 1);
3817                 }
3818             }
3819             run[count] = (last = k);
3820         }
3821 
3822         /*
3823          * Merge runs of highly structured array.
3824          */
3825         if (count > 1) {
3826             double[] b; int offset = low;
3827 
3828             if (sorter == null || (b = (double[]) sorter.b) == null) {
3829                 b = new double[size];
3830             } else {
3831                 offset = sorter.offset;
3832             }
3833             mergeRuns(a, b, offset, 1, sorter != null, run, 0, count);
3834         }
3835         return true;
3836     }
3837 
3838     /**
3839      * Merges the specified runs.
3840      *
3841      * @param a the source array
3842      * @param b the temporary buffer used in merging
3843      * @param offset the start index in the source, inclusive
3844      * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0)
3845      * @param parallel indicates whether merging is performed in parallel
3846      * @param run the start indexes of the runs, inclusive
3847      * @param lo the start index of the first run, inclusive
3848      * @param hi the start index of the last run, inclusive
3849      * @return the destination where runs are merged
3850      */
3851     private static double[] mergeRuns(double[] a, double[] b, int offset,
3852             int aim, boolean parallel, int[] run, int lo, int hi) {
3853 
3854         if (hi - lo == 1) {
3855             if (aim >= 0) {
3856                 return a;
3857             }
3858             for (int i = run[hi], j = i - offset, low = run[lo]; i > low;
3859                 b[--j] = a[--i]
3860             );
3861             return b;
3862         }
3863 
3864         /*
3865          * Split into approximately equal parts.
3866          */
3867         int mi = lo, rmi = (run[lo] + run[hi]) >>> 1;
3868         while (run[++mi + 1] <= rmi);
3869 
3870         /*
3871          * Merge the left and right parts.
3872          */
3873         double[] a1, a2;
3874 
3875         if (parallel && hi - lo > MIN_RUN_COUNT) {
3876             RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe();
3877             a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi);
3878             a2 = (double[]) merger.getDestination();
3879         } else {
3880             a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi);
3881             a2 = mergeRuns(a, b, offset,    0, false, run, mi, hi);
3882         }
3883 
3884         double[] dst = a1 == a ? b : a;
3885 
3886         int k   = a1 == a ? run[lo] - offset : run[lo];
3887         int lo1 = a1 == b ? run[lo] - offset : run[lo];
3888         int hi1 = a1 == b ? run[mi] - offset : run[mi];
3889         int lo2 = a2 == b ? run[mi] - offset : run[mi];
3890         int hi2 = a2 == b ? run[hi] - offset : run[hi];
3891 
3892         if (parallel) {
3893             new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke();
3894         } else {
3895             mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2);
3896         }
3897         return dst;
3898     }
3899 
3900     /**
3901      * Merges the sorted parts.
3902      *
3903      * @param merger parallel context
3904      * @param dst the destination where parts are merged
3905      * @param k the start index of the destination, inclusive
3906      * @param a1 the first part
3907      * @param lo1 the start index of the first part, inclusive
3908      * @param hi1 the end index of the first part, exclusive
3909      * @param a2 the second part
3910      * @param lo2 the start index of the second part, inclusive
3911      * @param hi2 the end index of the second part, exclusive
3912      */
3913     private static void mergeParts(Merger merger, double[] dst, int k,
3914             double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2) {
3915 
3916         if (merger != null && a1 == a2) {
3917 
3918             while (true) {
3919 
3920                 /*
3921                  * The first part must be larger.
3922                  */
3923                 if (hi1 - lo1 < hi2 - lo2) {
3924                     int lo = lo1; lo1 = lo2; lo2 = lo;
3925                     int hi = hi1; hi1 = hi2; hi2 = hi;
3926                 }
3927 
3928                 /*
3929                  * Small parts will be merged sequentially.
3930                  */
3931                 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) {
3932                     break;
3933                 }
3934 
3935                 /*
3936                  * Find the median of the larger part.
3937                  */
3938                 int mi1 = (lo1 + hi1) >>> 1;
3939                 double key = a1[mi1];
3940                 int mi2 = hi2;
3941 
3942                 /*
3943                  * Partition the smaller part.
3944                  */
3945                 for (int loo = lo2; loo < mi2; ) {
3946                     int t = (loo + mi2) >>> 1;
3947 
3948                     if (key > a2[t]) {
3949                         loo = t + 1;
3950                     } else {
3951                         mi2 = t;
3952                     }
3953                 }
3954 
3955                 int d = mi2 - lo2 + mi1 - lo1;
3956 
3957                 /*
3958                  * Merge the right sub-parts in parallel.
3959                  */
3960                 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2);
3961 
3962                 /*
3963                  * Process the sub-left parts.
3964                  */
3965                 hi1 = mi1;
3966                 hi2 = mi2;
3967             }
3968         }
3969 
3970         /*
3971          * Merge small parts sequentially.
3972          */
3973         while (lo1 < hi1 && lo2 < hi2) {
3974             dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++];
3975         }
3976         if (dst != a1 || k < lo1) {
3977             while (lo1 < hi1) {
3978                 dst[k++] = a1[lo1++];
3979             }
3980         }
3981         if (dst != a2 || k < lo2) {
3982             while (lo2 < hi2) {
3983                 dst[k++] = a2[lo2++];
3984             }
3985         }
3986     }
3987 
3988 // [class]
3989 
3990     /**
3991      * This class implements parallel sorting.
3992      */
3993     private static final class Sorter extends CountedCompleter<Void> {
3994         private static final long serialVersionUID = 20180818L;
3995         private final Object a, b;
3996         private final int low, size, offset, depth;
3997 
3998         private Sorter(CountedCompleter<?> parent,
3999                 Object a, Object b, int low, int size, int offset, int depth) {
4000             super(parent);
4001             this.a = a;
4002             this.b = b;
4003             this.low = low;
4004             this.size = size;
4005             this.offset = offset;
4006             this.depth = depth;
4007         }
4008 
4009         @Override
4010         public final void compute() {
4011             if (depth < 0) {
4012                 setPendingCount(2);
4013                 int half = size >> 1;
4014                 new Sorter(this, b, a, low, half, offset, depth + 1).fork();
4015                 new Sorter(this, b, a, low + half, size - half, offset, depth + 1).compute();
4016             } else {
4017                 if (a instanceof int[]) {
4018                     sort(this, (int[]) a, depth, low, low + size);
4019                 } else if (a instanceof long[]) {
4020                     sort(this, (long[]) a, depth, low, low + size);
4021                 } else if (a instanceof float[]) {
4022                     sort(this, (float[]) a, depth, low, low + size);
4023                 } else if (a instanceof double[]) {
4024                     sort(this, (double[]) a, depth, low, low + size);
4025                 } else {
4026                     throw new IllegalArgumentException(
4027                         "Unknown type of array: " + a.getClass().getName());
4028                 }
4029             }
4030             tryComplete();
4031         }
4032 
4033         @Override
4034         public final void onCompletion(CountedCompleter<?> caller) {
4035             if (depth < 0) {
4036                 int mi = low + (size >> 1);
4037                 boolean src = (depth & 1) == 0;
4038 
4039                 new Merger(null,
4040                     a,
4041                     src ? low : low - offset,
4042                     b,
4043                     src ? low - offset : low,
4044                     src ? mi - offset : mi,
4045                     b,
4046                     src ? mi - offset : mi,
4047                     src ? low + size - offset : low + size
4048                 ).invoke();
4049             }
4050         }
4051 
4052         private void forkSorter(int depth, int low, int high) {
4053             addToPendingCount(1);
4054             Object a = this.a; // Use local variable for performance
4055             new Sorter(this, a, b, low, high - low, offset, depth).fork();
4056         }
4057     }
4058 
4059     /**
4060      * This class implements parallel merging.
4061      */
4062     private static final class Merger extends CountedCompleter<Void> {
4063         private static final long serialVersionUID = 20180818L;
4064         private final Object dst, a1, a2;
4065         private final int k, lo1, hi1, lo2, hi2;
4066 
4067         private Merger(CountedCompleter<?> parent, Object dst, int k,
4068                 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) {
4069             super(parent);
4070             this.dst = dst;
4071             this.k = k;
4072             this.a1 = a1;
4073             this.lo1 = lo1;
4074             this.hi1 = hi1;
4075             this.a2 = a2;
4076             this.lo2 = lo2;
4077             this.hi2 = hi2;
4078         }
4079 
4080         @Override
4081         public final void compute() {
4082             if (dst instanceof int[]) {
4083                 mergeParts(this, (int[]) dst, k,
4084                     (int[]) a1, lo1, hi1, (int[]) a2, lo2, hi2);
4085             } else if (dst instanceof long[]) {
4086                 mergeParts(this, (long[]) dst, k,
4087                     (long[]) a1, lo1, hi1, (long[]) a2, lo2, hi2);
4088             } else if (dst instanceof float[]) {
4089                 mergeParts(this, (float[]) dst, k,
4090                     (float[]) a1, lo1, hi1, (float[]) a2, lo2, hi2);
4091             } else if (dst instanceof double[]) {
4092                 mergeParts(this, (double[]) dst, k,
4093                     (double[]) a1, lo1, hi1, (double[]) a2, lo2, hi2);
4094             } else {
4095                 throw new IllegalArgumentException(
4096                     "Unknown type of array: " + dst.getClass().getName());
4097             }
4098             propagateCompletion();
4099         }
4100 
4101         private void forkMerger(Object dst, int k,
4102                 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) {
4103             addToPendingCount(1);
4104             new Merger(this, dst, k, a1, lo1, hi1, a2, lo2, hi2).fork();
4105         }
4106     }
4107 
4108     /**
4109      * This class implements parallel merging of runs.
4110      */
4111     private static final class RunMerger extends RecursiveTask<Object> {
4112         private static final long serialVersionUID = 20180818L;
4113         private final Object a, b;
4114         private final int[] run;
4115         private final int offset, aim, lo, hi;
4116 
4117         private RunMerger(Object a, Object b, int offset,
4118                 int aim, int[] run, int lo, int hi) {
4119             this.a = a;
4120             this.b = b;
4121             this.offset = offset;
4122             this.aim = aim;
4123             this.run = run;
4124             this.lo = lo;
4125             this.hi = hi;
4126         }
4127 
4128         @Override
4129         protected final Object compute() {
4130             if (a instanceof int[]) {
4131                 return mergeRuns((int[]) a, (int[]) b, offset, aim, true, run, lo, hi);
4132             }
4133             if (a instanceof long[]) {
4134                 return mergeRuns((long[]) a, (long[]) b, offset, aim, true, run, lo, hi);
4135             }
4136             if (a instanceof float[]) {
4137                 return mergeRuns((float[]) a, (float[]) b, offset, aim, true, run, lo, hi);
4138             }
4139             if (a instanceof double[]) {
4140                 return mergeRuns((double[]) a, (double[]) b, offset, aim, true, run, lo, hi);
4141             }
4142             throw new IllegalArgumentException(
4143                 "Unknown type of array: " + a.getClass().getName());
4144         }
4145 
4146         private RunMerger forkMe() {
4147             fork();
4148             return this;
4149         }
4150 
4151         private Object getDestination() {
4152             join();
4153             return getRawResult();
4154         }
4155     }
4156 }