1 /* 2 * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.util.concurrent.CountedCompleter; 29 import java.util.concurrent.RecursiveTask; 30 31 /** 32 * This class implements powerful and fully optimized versions, both 33 * sequential and parallel, of the Dual-Pivot Quicksort algorithm by 34 * Vladimir Yaroslavskiy, Jon Bentley and Josh Bloch. This algorithm 35 * offers O(n log(n)) performance on all data sets, and is typically 36 * faster than traditional (one-pivot) Quicksort implementations. 37 * 38 * There are also additional algorithms, invoked from the Dual-Pivot 39 * Quicksort, such as mixed insertion sort, merging of runs and heap 40 * sort, counting sort and parallel merge sort. 41 * 42 * @author Vladimir Yaroslavskiy 43 * @author Jon Bentley 44 * @author Josh Bloch 45 * @author Doug Lea 46 * 47 * @version 2018.08.18 48 * 49 * @since 14 50 */ 51 final class DualPivotQuicksort { 52 53 /** 54 * Prevents instantiation. 55 */ 56 private DualPivotQuicksort() {} 57 58 /** 59 * Max array size to use mixed insertion sort. 60 */ 61 private static final int MAX_MIXED_INSERTION_SORT_SIZE = 114; 62 63 /** 64 * Max array size to use insertion sort. 65 */ 66 private static final int MAX_INSERTION_SORT_SIZE = 41; 67 68 /** 69 * Min array size to perform sorting in parallel. 70 */ 71 private static final int MIN_PARALLEL_SORT_SIZE = 4 << 10; 72 73 /** 74 * Min array size to try merging of runs. 75 */ 76 private static final int MIN_TRY_MERGE_SIZE = 4 << 10; 77 78 /** 79 * Min size of the first run to continue with scanning. 80 */ 81 private static final int MIN_FIRST_RUN_SIZE = 16; 82 83 /** 84 * Min factor for the first runs to continue scanning. 85 */ 86 private static final int MIN_FIRST_RUNS_FACTOR = 7; 87 88 /** 89 * Max capacity of the index array for tracking runs. 90 */ 91 private static final int MAX_RUN_CAPACITY = 5 << 10; 92 93 /** 94 * Min number of runs, required by parallel merging. 95 */ 96 private static final int MIN_RUN_COUNT = 4; 97 98 /** 99 * Min array size to use parallel merging of parts. 100 */ 101 private static final int MIN_PARALLEL_MERGE_PARTS_SIZE = 4 << 10; 102 103 /** 104 * Min size of a byte array to use counting sort. 105 */ 106 private static final int MIN_BYTE_COUNTING_SORT_SIZE = 64; 107 108 /** 109 * Min size of a short or char array to use counting sort. 110 */ 111 private static final int MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE = 1750; 112 113 /** 114 * Max double recursive partitioning depth before using heap sort. 115 */ 116 private static final int MAX_RECURSION_DEPTH = 64 << 1; 117 118 /** 119 * Calculates the double depth of parallel merging. 120 * Depth is negative, if tasks split before sorting. 121 * 122 * @param parallelism the parallelism level 123 * @param size the target size 124 * @return the depth of parallel merging 125 */ 126 private static int getDepth(int parallelism, int size) { 127 int depth = 0; 128 129 while ((parallelism >>= 3) > 0 && (size >>= 2) > 0) { 130 depth -= 2; 131 } 132 return depth; 133 } 134 135 /** 136 * Sorts the specified range of the array using parallel merge 137 * sort and/or Dual-Pivot Quicksort. 138 * 139 * To balance the faster splitting and parallelism of merge sort 140 * with the faster element partitioning of Quicksort, ranges are 141 * subdivided in tiers such that, if there is enough parallelism, 142 * the four-way parallel merge is started, still ensuring enough 143 * parallelism to process the partitions. 144 * 145 * @param a the array to be sorted 146 * @param parallelism the parallelism level 147 * @param low the index of the first element, inclusive, to be sorted 148 * @param high the index of the last element, exclusive, to be sorted 149 */ 150 static void sort(int[] a, int parallelism, int low, int high) { 151 int size = high - low; 152 153 if (parallelism > 0 && size > MIN_PARALLEL_SORT_SIZE) { 154 int depth = getDepth(parallelism, size >> 12); 155 int[] b = depth == 0 ? null : new int[size]; 156 new Sorter(null, a, b, low, size, low, depth).invoke(); 157 } else { 158 sort(null, a, 0, low, high); 159 } 160 } 161 162 /** 163 * Sorts the specified array using the Dual-Pivot Quicksort and/or 164 * other sorts in special-cases, possibly with parallel partitions. 165 * 166 * @param sorter parallel context 167 * @param a the array to be sorted 168 * @param bits the combination of recursion depth and bit flag, where 169 * the right bit "0" indicates that array is the leftmost part 170 * @param low the index of the first element, inclusive, to be sorted 171 * @param high the index of the last element, exclusive, to be sorted 172 */ 173 static void sort(Sorter sorter, int[] a, int bits, int low, int high) { 174 while (true) { 175 int end = high - 1, size = high - low; 176 177 /* 178 * Run mixed insertion sort on small non-leftmost parts. 179 */ 180 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 181 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 182 return; 183 } 184 185 /* 186 * Invoke insertion sort on small leftmost part. 187 */ 188 if (size < MAX_INSERTION_SORT_SIZE) { 189 insertionSort(a, low, high); 190 return; 191 } 192 193 /* 194 * Check if the whole array or large non-leftmost 195 * parts are nearly sorted and then merge runs. 196 */ 197 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 198 && tryMergeRuns(sorter, a, low, size)) { 199 return; 200 } 201 202 /* 203 * Switch to heap sort if execution 204 * time is becoming quadratic. 205 */ 206 if ((bits += 2) > MAX_RECURSION_DEPTH) { 207 heapSort(a, low, high); 208 return; 209 } 210 211 /* 212 * Use an inexpensive approximation of the golden ratio 213 * to select five sample elements and determine pivots. 214 */ 215 int step = (size >> 3) * 3 + 3; 216 217 /* 218 * Five elements around (and including) the central element 219 * will be used for pivot selection as described below. The 220 * unequal choice of spacing these elements was empirically 221 * determined to work well on a wide variety of inputs. 222 */ 223 int e1 = low + step; 224 int e5 = end - step; 225 int e3 = (e1 + e5) >>> 1; 226 int e2 = (e1 + e3) >>> 1; 227 int e4 = (e3 + e5) >>> 1; 228 int a3 = a[e3]; 229 230 /* 231 * Sort these elements in place by the combination 232 * of 4-element sorting network and insertion sort. 233 * 234 * 5 ------o-----------o----------- 235 * | | 236 * 4 ------|-----o-----o-----o----- 237 * | | | 238 * 2 ------o-----|-----o-----o----- 239 * | | 240 * 1 ------------o-----o----------- 241 */ 242 if (a[e5] < a[e2]) { int t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 243 if (a[e4] < a[e1]) { int t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 244 if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 245 if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 246 if (a[e4] < a[e2]) { int t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 247 248 if (a3 < a[e2]) { 249 if (a3 < a[e1]) { 250 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 251 } else { 252 a[e3] = a[e2]; a[e2] = a3; 253 } 254 } else if (a3 > a[e4]) { 255 if (a3 > a[e5]) { 256 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 257 } else { 258 a[e3] = a[e4]; a[e4] = a3; 259 } 260 } 261 262 // Pointers 263 int lower = low; // The index of the last element of the left part 264 int upper = end; // The index of the first element of the right part 265 266 /* 267 * Partitioning with 2 pivots in case of different elements. 268 */ 269 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 270 271 /* 272 * Use the first and fifth of the five sorted elements as 273 * the pivots. These values are inexpensive approximation 274 * of tertiles. Note, that pivot1 < pivot2. 275 */ 276 int pivot1 = a[e1]; 277 int pivot2 = a[e5]; 278 279 /* 280 * The first and the last elements to be sorted are moved 281 * to the locations formerly occupied by the pivots. When 282 * partitioning is completed, the pivots are swapped back 283 * into their final positions, and excluded from the next 284 * subsequent sorting. 285 */ 286 a[e1] = a[lower]; 287 a[e5] = a[upper]; 288 289 /* 290 * Skip elements, which are less or greater than the pivots. 291 */ 292 while (a[++lower] < pivot1); 293 while (a[--upper] > pivot2); 294 295 /* 296 * Backward 3-interval partitioning 297 * 298 * left part central part right part 299 * +------------------------------------------------------------+ 300 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 301 * +------------------------------------------------------------+ 302 * ^ ^ ^ 303 * | | | 304 * lower k upper 305 * 306 * Invariants: 307 * 308 * all in (low, lower] < pivot1 309 * pivot1 <= all in (k, upper) <= pivot2 310 * all in [upper, end) > pivot2 311 * 312 * Pointer k is the last index of ?-part 313 */ 314 for (int unused = --lower, k = ++upper; --k > lower; ) { 315 int ak = a[k]; 316 317 if (ak < pivot1) { // Move a[k] to the left side 318 while (lower < k) { 319 if (a[++lower] >= pivot1) { 320 if (a[lower] > pivot2) { 321 a[k] = a[--upper]; 322 a[upper] = a[lower]; 323 } else { 324 a[k] = a[lower]; 325 } 326 a[lower] = ak; 327 break; 328 } 329 } 330 } else if (ak > pivot2) { // Move a[k] to the right side 331 a[k] = a[--upper]; 332 a[upper] = ak; 333 } 334 } 335 336 /* 337 * Swap the pivots into their final positions. 338 */ 339 a[low] = a[lower]; a[lower] = pivot1; 340 a[end] = a[upper]; a[upper] = pivot2; 341 342 /* 343 * Sort non-left parts recursively (possibly in parallel), 344 * excluding known pivots. 345 */ 346 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 347 sorter.forkSorter(bits | 1, lower + 1, upper); 348 sorter.forkSorter(bits | 1, upper + 1, high); 349 } else { 350 sort(sorter, a, bits | 1, lower + 1, upper); 351 sort(sorter, a, bits | 1, upper + 1, high); 352 } 353 354 } else { // Use single pivot in case of many equal elements 355 356 /* 357 * Use the third of the five sorted elements as the pivot. 358 * This value is inexpensive approximation of the median. 359 */ 360 int pivot = a[e3]; 361 362 /* 363 * The first element to be sorted is moved to the 364 * location formerly occupied by the pivot. After 365 * completion of partitioning the pivot is swapped 366 * back into its final position, and excluded from 367 * the next subsequent sorting. 368 */ 369 a[e3] = a[lower]; 370 371 /* 372 * Traditional 3-way (Dutch National Flag) partitioning 373 * 374 * left part central part right part 375 * +------------------------------------------------------+ 376 * | < pivot | ? | == pivot | > pivot | 377 * +------------------------------------------------------+ 378 * ^ ^ ^ 379 * | | | 380 * lower k upper 381 * 382 * Invariants: 383 * 384 * all in (low, lower] < pivot 385 * all in (k, upper) == pivot 386 * all in [upper, end] > pivot 387 * 388 * Pointer k is the last index of ?-part 389 */ 390 for (int k = ++upper; --k > lower; ) { 391 int ak = a[k]; 392 393 if (ak != pivot) { 394 a[k] = pivot; 395 396 if (ak < pivot) { // Move a[k] to the left side 397 while (a[++lower] < pivot); 398 399 if (a[lower] > pivot) { 400 a[--upper] = a[lower]; 401 } 402 a[lower] = ak; 403 } else { // ak > pivot - Move a[k] to the right side 404 a[--upper] = ak; 405 } 406 } 407 } 408 409 /* 410 * Swap the pivot into its final position. 411 */ 412 a[low] = a[lower]; a[lower] = pivot; 413 414 /* 415 * Sort the right part (possibly in parallel), excluding 416 * known pivot. All elements from the central part are 417 * equal and therefore already sorted. 418 */ 419 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 420 sorter.forkSorter(bits | 1, upper, high); 421 } else { 422 sort(sorter, a, bits | 1, upper, high); 423 } 424 } 425 high = lower; // Iterate along the left part 426 } 427 } 428 429 /** 430 * Sorts the specified range of the array using mixed insertion sort. 431 * 432 * Mixed insertion sort is combination of simple insertion sort, 433 * pin insertion sort and pair insertion sort. 434 * 435 * In the context of Dual-Pivot Quicksort, the pivot element 436 * from the left part plays the role of sentinel, because it 437 * is less than any elements from the given part. Therefore, 438 * expensive check of the left range can be skipped on each 439 * iteration unless it is the leftmost call. 440 * 441 * @param a the array to be sorted 442 * @param low the index of the first element, inclusive, to be sorted 443 * @param end the index of the last element for simple insertion sort 444 * @param high the index of the last element, exclusive, to be sorted 445 */ 446 private static void mixedInsertionSort(int[] a, int low, int end, int high) { 447 if (end == high) { 448 449 /* 450 * Invoke simple insertion sort on tiny array. 451 */ 452 for (int i; ++low < end; ) { 453 int ai = a[i = low]; 454 455 while (ai < a[--i]) { 456 a[i + 1] = a[i]; 457 } 458 a[i + 1] = ai; 459 } 460 } else { 461 462 /* 463 * Start with pin insertion sort on small part. 464 * 465 * Pin insertion sort is extended simple insertion sort. 466 * The main idea of this sort is to put elements larger 467 * than an element called pin to the end of array (the 468 * proper area for such elements). It avoids expensive 469 * movements of these elements through the whole array. 470 */ 471 int pin = a[end]; 472 473 for (int i, p = high; ++low < end; ) { 474 int ai = a[i = low]; 475 476 if (ai < a[i - 1]) { // Small element 477 478 /* 479 * Insert small element into sorted part. 480 */ 481 a[i] = a[--i]; 482 483 while (ai < a[--i]) { 484 a[i + 1] = a[i]; 485 } 486 a[i + 1] = ai; 487 488 } else if (p > i && ai > pin) { // Large element 489 490 /* 491 * Find element smaller than pin. 492 */ 493 while (a[--p] > pin); 494 495 /* 496 * Swap it with large element. 497 */ 498 if (p > i) { 499 ai = a[p]; 500 a[p] = a[i]; 501 } 502 503 /* 504 * Insert small element into sorted part. 505 */ 506 while (ai < a[--i]) { 507 a[i + 1] = a[i]; 508 } 509 a[i + 1] = ai; 510 } 511 } 512 513 /* 514 * Continue with pair insertion sort on remain part. 515 */ 516 for (int i; low < high; ++low) { 517 int a1 = a[i = low], a2 = a[++low]; 518 519 /* 520 * Insert two elements per iteration: at first, insert the 521 * larger element and then insert the smaller element, but 522 * from the position where the larger element was inserted. 523 */ 524 if (a1 > a2) { 525 526 while (a1 < a[--i]) { 527 a[i + 2] = a[i]; 528 } 529 a[++i + 1] = a1; 530 531 while (a2 < a[--i]) { 532 a[i + 1] = a[i]; 533 } 534 a[i + 1] = a2; 535 536 } else if (a1 < a[i - 1]) { 537 538 while (a2 < a[--i]) { 539 a[i + 2] = a[i]; 540 } 541 a[++i + 1] = a2; 542 543 while (a1 < a[--i]) { 544 a[i + 1] = a[i]; 545 } 546 a[i + 1] = a1; 547 } 548 } 549 } 550 } 551 552 /** 553 * Sorts the specified range of the array using insertion sort. 554 * 555 * @param a the array to be sorted 556 * @param low the index of the first element, inclusive, to be sorted 557 * @param high the index of the last element, exclusive, to be sorted 558 */ 559 private static void insertionSort(int[] a, int low, int high) { 560 for (int i, k = low; ++k < high; ) { 561 int ai = a[i = k]; 562 563 if (ai < a[i - 1]) { 564 while (--i >= low && ai < a[i]) { 565 a[i + 1] = a[i]; 566 } 567 a[i + 1] = ai; 568 } 569 } 570 } 571 572 /** 573 * Sorts the specified range of the array using heap sort. 574 * 575 * @param a the array to be sorted 576 * @param low the index of the first element, inclusive, to be sorted 577 * @param high the index of the last element, exclusive, to be sorted 578 */ 579 private static void heapSort(int[] a, int low, int high) { 580 for (int k = (low + high) >>> 1; k > low; ) { 581 pushDown(a, --k, a[k], low, high); 582 } 583 while (--high > low) { 584 int max = a[low]; 585 pushDown(a, low, a[high], low, high); 586 a[high] = max; 587 } 588 } 589 590 /** 591 * Pushes specified element down during heap sort. 592 * 593 * @param a the given array 594 * @param p the start index 595 * @param value the given element 596 * @param low the index of the first element, inclusive, to be sorted 597 * @param high the index of the last element, exclusive, to be sorted 598 */ 599 private static void pushDown(int[] a, int p, int value, int low, int high) { 600 for (int k ;; a[p] = a[p = k]) { 601 k = (p << 1) - low + 2; // Index of the right child 602 603 if (k > high) { 604 break; 605 } 606 if (k == high || a[k] < a[k - 1]) { 607 --k; 608 } 609 if (a[k] <= value) { 610 break; 611 } 612 } 613 a[p] = value; 614 } 615 616 /** 617 * Tries to sort the specified range of the array. 618 * 619 * @param sorter parallel context 620 * @param a the array to be sorted 621 * @param low the index of the first element to be sorted 622 * @param size the array size 623 * @return true if finally sorted, false otherwise 624 */ 625 private static boolean tryMergeRuns(Sorter sorter, int[] a, int low, int size) { 626 627 /* 628 * The run array is constructed only if initial runs are 629 * long enough to continue, run[i] then holds start index 630 * of the i-th sequence of elements in non-descending order. 631 */ 632 int[] run = null; 633 int high = low + size; 634 int count = 1, last = low; 635 636 /* 637 * Identify all possible runs. 638 */ 639 for (int k = low + 1; k < high; ) { 640 641 /* 642 * Find the end index of the current run. 643 */ 644 if (a[k - 1] < a[k]) { 645 646 // Identify ascending sequence 647 while (++k < high && a[k - 1] <= a[k]); 648 649 } else if (a[k - 1] > a[k]) { 650 651 // Identify descending sequence 652 while (++k < high && a[k - 1] >= a[k]); 653 654 // Reverse into ascending order 655 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 656 int ai = a[i]; a[i] = a[j]; a[j] = ai; 657 } 658 } else { // Identify constant sequence 659 for (int ak = a[k]; ++k < high && ak == a[k]; ); 660 661 if (k < high) { 662 continue; 663 } 664 } 665 666 /* 667 * Check special cases. 668 */ 669 if (run == null) { 670 if (k == high) { 671 672 /* 673 * The array is monotonous sequence, 674 * and therefore already sorted. 675 */ 676 return true; 677 } 678 679 if (k - low < MIN_FIRST_RUN_SIZE) { 680 681 /* 682 * The first run is too small 683 * to proceed with scanning. 684 */ 685 return false; 686 } 687 688 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 689 run[0] = low; 690 691 } else if (a[last - 1] > a[last]) { 692 693 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 694 695 /* 696 * The first runs are not long 697 * enough to continue scanning. 698 */ 699 return false; 700 } 701 702 if (++count == MAX_RUN_CAPACITY) { 703 704 /* 705 * Array is not highly structured. 706 */ 707 return false; 708 } 709 710 if (count == run.length) { 711 712 /* 713 * Increase capacity of index array. 714 */ 715 run = Arrays.copyOf(run, count << 1); 716 } 717 } 718 run[count] = (last = k); 719 } 720 721 /* 722 * Merge runs of highly structured array. 723 */ 724 if (count > 1) { 725 int[] b; int offset = low; 726 727 if (sorter == null || (b = (int[]) sorter.b) == null) { 728 b = new int[size]; 729 } else { 730 offset = sorter.offset; 731 } 732 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 733 } 734 return true; 735 } 736 737 /** 738 * Merges the specified runs. 739 * 740 * @param a the source array 741 * @param b the temporary buffer used in merging 742 * @param offset the start index in the source, inclusive 743 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 744 * @param parallel indicates whether merging is performed in parallel 745 * @param run the start indexes of the runs, inclusive 746 * @param lo the start index of the first run, inclusive 747 * @param hi the start index of the last run, inclusive 748 * @return the destination where runs are merged 749 */ 750 private static int[] mergeRuns(int[] a, int[] b, int offset, 751 int aim, boolean parallel, int[] run, int lo, int hi) { 752 753 if (hi - lo == 1) { 754 if (aim >= 0) { 755 return a; 756 } 757 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 758 b[--j] = a[--i] 759 ); 760 return b; 761 } 762 763 /* 764 * Split into approximately equal parts. 765 */ 766 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 767 while (run[++mi + 1] <= rmi); 768 769 /* 770 * Merge the left and right parts. 771 */ 772 int[] a1, a2; 773 774 if (parallel && hi - lo > MIN_RUN_COUNT) { 775 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 776 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 777 a2 = (int[]) merger.getDestination(); 778 } else { 779 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 780 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 781 } 782 783 int[] dst = a1 == a ? b : a; 784 785 int k = a1 == a ? run[lo] - offset : run[lo]; 786 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 787 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 788 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 789 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 790 791 if (parallel) { 792 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 793 } else { 794 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 795 } 796 return dst; 797 } 798 799 /** 800 * Merges the sorted parts. 801 * 802 * @param merger parallel context 803 * @param dst the destination where parts are merged 804 * @param k the start index of the destination, inclusive 805 * @param a1 the first part 806 * @param lo1 the start index of the first part, inclusive 807 * @param hi1 the end index of the first part, exclusive 808 * @param a2 the second part 809 * @param lo2 the start index of the second part, inclusive 810 * @param hi2 the end index of the second part, exclusive 811 */ 812 private static void mergeParts(Merger merger, int[] dst, int k, 813 int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2) { 814 815 if (merger != null && a1 == a2) { 816 817 while (true) { 818 819 /* 820 * The first part must be larger. 821 */ 822 if (hi1 - lo1 < hi2 - lo2) { 823 int lo = lo1; lo1 = lo2; lo2 = lo; 824 int hi = hi1; hi1 = hi2; hi2 = hi; 825 } 826 827 /* 828 * Small parts will be merged sequentially. 829 */ 830 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 831 break; 832 } 833 834 /* 835 * Find the median of the larger part. 836 */ 837 int mi1 = (lo1 + hi1) >>> 1; 838 int key = a1[mi1]; 839 int mi2 = hi2; 840 841 /* 842 * Partition the smaller part. 843 */ 844 for (int loo = lo2; loo < mi2; ) { 845 int t = (loo + mi2) >>> 1; 846 847 if (key > a2[t]) { 848 loo = t + 1; 849 } else { 850 mi2 = t; 851 } 852 } 853 854 int d = mi2 - lo2 + mi1 - lo1; 855 856 /* 857 * Merge the right sub-parts in parallel. 858 */ 859 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 860 861 /* 862 * Process the sub-left parts. 863 */ 864 hi1 = mi1; 865 hi2 = mi2; 866 } 867 } 868 869 /* 870 * Merge small parts sequentially. 871 */ 872 while (lo1 < hi1 && lo2 < hi2) { 873 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 874 } 875 if (dst != a1 || k < lo1) { 876 while (lo1 < hi1) { 877 dst[k++] = a1[lo1++]; 878 } 879 } 880 if (dst != a2 || k < lo2) { 881 while (lo2 < hi2) { 882 dst[k++] = a2[lo2++]; 883 } 884 } 885 } 886 887 // [long] 888 889 /** 890 * Sorts the specified range of the array using parallel merge 891 * sort and/or Dual-Pivot Quicksort. 892 * 893 * To balance the faster splitting and parallelism of merge sort 894 * with the faster element partitioning of Quicksort, ranges are 895 * subdivided in tiers such that, if there is enough parallelism, 896 * the four-way parallel merge is started, still ensuring enough 897 * parallelism to process the partitions. 898 * 899 * @param a the array to be sorted 900 * @param parallelism the parallelism level 901 * @param low the index of the first element, inclusive, to be sorted 902 * @param high the index of the last element, exclusive, to be sorted 903 */ 904 static void sort(long[] a, int parallelism, int low, int high) { 905 int size = high - low; 906 907 if (parallelism > 0 && size > MIN_PARALLEL_SORT_SIZE) { 908 int depth = getDepth(parallelism, size >> 12); 909 long[] b = depth == 0 ? null : new long[size]; 910 new Sorter(null, a, b, low, size, low, depth).invoke(); 911 } else { 912 sort(null, a, 0, low, high); 913 } 914 } 915 916 /** 917 * Sorts the specified array using the Dual-Pivot Quicksort and/or 918 * other sorts in special-cases, possibly with parallel partitions. 919 * 920 * @param sorter parallel context 921 * @param a the array to be sorted 922 * @param bits the combination of recursion depth and bit flag, where 923 * the right bit "0" indicates that array is the leftmost part 924 * @param low the index of the first element, inclusive, to be sorted 925 * @param high the index of the last element, exclusive, to be sorted 926 */ 927 static void sort(Sorter sorter, long[] a, int bits, int low, int high) { 928 while (true) { 929 int end = high - 1, size = high - low; 930 931 /* 932 * Run mixed insertion sort on small non-leftmost parts. 933 */ 934 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 935 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 936 return; 937 } 938 939 /* 940 * Invoke insertion sort on small leftmost part. 941 */ 942 if (size < MAX_INSERTION_SORT_SIZE) { 943 insertionSort(a, low, high); 944 return; 945 } 946 947 /* 948 * Check if the whole array or large non-leftmost 949 * parts are nearly sorted and then merge runs. 950 */ 951 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 952 && tryMergeRuns(sorter, a, low, size)) { 953 return; 954 } 955 956 /* 957 * Switch to heap sort if execution 958 * time is becoming quadratic. 959 */ 960 if ((bits += 2) > MAX_RECURSION_DEPTH) { 961 heapSort(a, low, high); 962 return; 963 } 964 965 /* 966 * Use an inexpensive approximation of the golden ratio 967 * to select five sample elements and determine pivots. 968 */ 969 int step = (size >> 3) * 3 + 3; 970 971 /* 972 * Five elements around (and including) the central element 973 * will be used for pivot selection as described below. The 974 * unequal choice of spacing these elements was empirically 975 * determined to work well on a wide variety of inputs. 976 */ 977 int e1 = low + step; 978 int e5 = end - step; 979 int e3 = (e1 + e5) >>> 1; 980 int e2 = (e1 + e3) >>> 1; 981 int e4 = (e3 + e5) >>> 1; 982 long a3 = a[e3]; 983 984 /* 985 * Sort these elements in place by the combination 986 * of 4-element sorting network and insertion sort. 987 * 988 * 5 ------o-----------o----------- 989 * | | 990 * 4 ------|-----o-----o-----o----- 991 * | | | 992 * 2 ------o-----|-----o-----o----- 993 * | | 994 * 1 ------------o-----o----------- 995 */ 996 if (a[e5] < a[e2]) { long t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 997 if (a[e4] < a[e1]) { long t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 998 if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 999 if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 1000 if (a[e4] < a[e2]) { long t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 1001 1002 if (a3 < a[e2]) { 1003 if (a3 < a[e1]) { 1004 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 1005 } else { 1006 a[e3] = a[e2]; a[e2] = a3; 1007 } 1008 } else if (a3 > a[e4]) { 1009 if (a3 > a[e5]) { 1010 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 1011 } else { 1012 a[e3] = a[e4]; a[e4] = a3; 1013 } 1014 } 1015 1016 // Pointers 1017 int lower = low; // The index of the last element of the left part 1018 int upper = end; // The index of the first element of the right part 1019 1020 /* 1021 * Partitioning with 2 pivots in case of different elements. 1022 */ 1023 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 1024 1025 /* 1026 * Use the first and fifth of the five sorted elements as 1027 * the pivots. These values are inexpensive approximation 1028 * of tertiles. Note, that pivot1 < pivot2. 1029 */ 1030 long pivot1 = a[e1]; 1031 long pivot2 = a[e5]; 1032 1033 /* 1034 * The first and the last elements to be sorted are moved 1035 * to the locations formerly occupied by the pivots. When 1036 * partitioning is completed, the pivots are swapped back 1037 * into their final positions, and excluded from the next 1038 * subsequent sorting. 1039 */ 1040 a[e1] = a[lower]; 1041 a[e5] = a[upper]; 1042 1043 /* 1044 * Skip elements, which are less or greater than the pivots. 1045 */ 1046 while (a[++lower] < pivot1); 1047 while (a[--upper] > pivot2); 1048 1049 /* 1050 * Backward 3-interval partitioning 1051 * 1052 * left part central part right part 1053 * +------------------------------------------------------------+ 1054 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 1055 * +------------------------------------------------------------+ 1056 * ^ ^ ^ 1057 * | | | 1058 * lower k upper 1059 * 1060 * Invariants: 1061 * 1062 * all in (low, lower] < pivot1 1063 * pivot1 <= all in (k, upper) <= pivot2 1064 * all in [upper, end) > pivot2 1065 * 1066 * Pointer k is the last index of ?-part 1067 */ 1068 for (int unused = --lower, k = ++upper; --k > lower; ) { 1069 long ak = a[k]; 1070 1071 if (ak < pivot1) { // Move a[k] to the left side 1072 while (lower < k) { 1073 if (a[++lower] >= pivot1) { 1074 if (a[lower] > pivot2) { 1075 a[k] = a[--upper]; 1076 a[upper] = a[lower]; 1077 } else { 1078 a[k] = a[lower]; 1079 } 1080 a[lower] = ak; 1081 break; 1082 } 1083 } 1084 } else if (ak > pivot2) { // Move a[k] to the right side 1085 a[k] = a[--upper]; 1086 a[upper] = ak; 1087 } 1088 } 1089 1090 /* 1091 * Swap the pivots into their final positions. 1092 */ 1093 a[low] = a[lower]; a[lower] = pivot1; 1094 a[end] = a[upper]; a[upper] = pivot2; 1095 1096 /* 1097 * Sort non-left parts recursively (possibly in parallel), 1098 * excluding known pivots. 1099 */ 1100 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 1101 sorter.forkSorter(bits | 1, lower + 1, upper); 1102 sorter.forkSorter(bits | 1, upper + 1, high); 1103 } else { 1104 sort(sorter, a, bits | 1, lower + 1, upper); 1105 sort(sorter, a, bits | 1, upper + 1, high); 1106 } 1107 1108 } else { // Use single pivot in case of many equal elements 1109 1110 /* 1111 * Use the third of the five sorted elements as the pivot. 1112 * This value is inexpensive approximation of the median. 1113 */ 1114 long pivot = a[e3]; 1115 1116 /* 1117 * The first element to be sorted is moved to the 1118 * location formerly occupied by the pivot. After 1119 * completion of partitioning the pivot is swapped 1120 * back into its final position, and excluded from 1121 * the next subsequent sorting. 1122 */ 1123 a[e3] = a[lower]; 1124 1125 /* 1126 * Traditional 3-way (Dutch National Flag) partitioning 1127 * 1128 * left part central part right part 1129 * +------------------------------------------------------+ 1130 * | < pivot | ? | == pivot | > pivot | 1131 * +------------------------------------------------------+ 1132 * ^ ^ ^ 1133 * | | | 1134 * lower k upper 1135 * 1136 * Invariants: 1137 * 1138 * all in (low, lower] < pivot 1139 * all in (k, upper) == pivot 1140 * all in [upper, end] > pivot 1141 * 1142 * Pointer k is the last index of ?-part 1143 */ 1144 for (int k = ++upper; --k > lower; ) { 1145 long ak = a[k]; 1146 1147 if (ak != pivot) { 1148 a[k] = pivot; 1149 1150 if (ak < pivot) { // Move a[k] to the left side 1151 while (a[++lower] < pivot); 1152 1153 if (a[lower] > pivot) { 1154 a[--upper] = a[lower]; 1155 } 1156 a[lower] = ak; 1157 } else { // ak > pivot - Move a[k] to the right side 1158 a[--upper] = ak; 1159 } 1160 } 1161 } 1162 1163 /* 1164 * Swap the pivot into its final position. 1165 */ 1166 a[low] = a[lower]; a[lower] = pivot; 1167 1168 /* 1169 * Sort the right part (possibly in parallel), excluding 1170 * known pivot. All elements from the central part are 1171 * equal and therefore already sorted. 1172 */ 1173 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 1174 sorter.forkSorter(bits | 1, upper, high); 1175 } else { 1176 sort(sorter, a, bits | 1, upper, high); 1177 } 1178 } 1179 high = lower; // Iterate along the left part 1180 } 1181 } 1182 1183 /** 1184 * Sorts the specified range of the array using mixed insertion sort. 1185 * 1186 * Mixed insertion sort is combination of simple insertion sort, 1187 * pin insertion sort and pair insertion sort. 1188 * 1189 * In the context of Dual-Pivot Quicksort, the pivot element 1190 * from the left part plays the role of sentinel, because it 1191 * is less than any elements from the given part. Therefore, 1192 * expensive check of the left range can be skipped on each 1193 * iteration unless it is the leftmost call. 1194 * 1195 * @param a the array to be sorted 1196 * @param low the index of the first element, inclusive, to be sorted 1197 * @param end the index of the last element for simple insertion sort 1198 * @param high the index of the last element, exclusive, to be sorted 1199 */ 1200 private static void mixedInsertionSort(long[] a, int low, int end, int high) { 1201 if (end == high) { 1202 1203 /* 1204 * Invoke simple insertion sort on tiny array. 1205 */ 1206 for (int i; ++low < end; ) { 1207 long ai = a[i = low]; 1208 1209 while (ai < a[--i]) { 1210 a[i + 1] = a[i]; 1211 } 1212 a[i + 1] = ai; 1213 } 1214 } else { 1215 1216 /* 1217 * Start with pin insertion sort on small part. 1218 * 1219 * Pin insertion sort is extended simple insertion sort. 1220 * The main idea of this sort is to put elements larger 1221 * than an element called pin to the end of array (the 1222 * proper area for such elements). It avoids expensive 1223 * movements of these elements through the whole array. 1224 */ 1225 long pin = a[end]; 1226 1227 for (int i, p = high; ++low < end; ) { 1228 long ai = a[i = low]; 1229 1230 if (ai < a[i - 1]) { // Small element 1231 1232 /* 1233 * Insert small element into sorted part. 1234 */ 1235 a[i] = a[--i]; 1236 1237 while (ai < a[--i]) { 1238 a[i + 1] = a[i]; 1239 } 1240 a[i + 1] = ai; 1241 1242 } else if (p > i && ai > pin) { // Large element 1243 1244 /* 1245 * Find element smaller than pin. 1246 */ 1247 while (a[--p] > pin); 1248 1249 /* 1250 * Swap it with large element. 1251 */ 1252 if (p > i) { 1253 ai = a[p]; 1254 a[p] = a[i]; 1255 } 1256 1257 /* 1258 * Insert small element into sorted part. 1259 */ 1260 while (ai < a[--i]) { 1261 a[i + 1] = a[i]; 1262 } 1263 a[i + 1] = ai; 1264 } 1265 } 1266 1267 /* 1268 * Continue with pair insertion sort on remain part. 1269 */ 1270 for (int i; low < high; ++low) { 1271 long a1 = a[i = low], a2 = a[++low]; 1272 1273 /* 1274 * Insert two elements per iteration: at first, insert the 1275 * larger element and then insert the smaller element, but 1276 * from the position where the larger element was inserted. 1277 */ 1278 if (a1 > a2) { 1279 1280 while (a1 < a[--i]) { 1281 a[i + 2] = a[i]; 1282 } 1283 a[++i + 1] = a1; 1284 1285 while (a2 < a[--i]) { 1286 a[i + 1] = a[i]; 1287 } 1288 a[i + 1] = a2; 1289 1290 } else if (a1 < a[i - 1]) { 1291 1292 while (a2 < a[--i]) { 1293 a[i + 2] = a[i]; 1294 } 1295 a[++i + 1] = a2; 1296 1297 while (a1 < a[--i]) { 1298 a[i + 1] = a[i]; 1299 } 1300 a[i + 1] = a1; 1301 } 1302 } 1303 } 1304 } 1305 1306 /** 1307 * Sorts the specified range of the array using insertion sort. 1308 * 1309 * @param a the array to be sorted 1310 * @param low the index of the first element, inclusive, to be sorted 1311 * @param high the index of the last element, exclusive, to be sorted 1312 */ 1313 private static void insertionSort(long[] a, int low, int high) { 1314 for (int i, k = low; ++k < high; ) { 1315 long ai = a[i = k]; 1316 1317 if (ai < a[i - 1]) { 1318 while (--i >= low && ai < a[i]) { 1319 a[i + 1] = a[i]; 1320 } 1321 a[i + 1] = ai; 1322 } 1323 } 1324 } 1325 1326 /** 1327 * Sorts the specified range of the array using heap sort. 1328 * 1329 * @param a the array to be sorted 1330 * @param low the index of the first element, inclusive, to be sorted 1331 * @param high the index of the last element, exclusive, to be sorted 1332 */ 1333 private static void heapSort(long[] a, int low, int high) { 1334 for (int k = (low + high) >>> 1; k > low; ) { 1335 pushDown(a, --k, a[k], low, high); 1336 } 1337 while (--high > low) { 1338 long max = a[low]; 1339 pushDown(a, low, a[high], low, high); 1340 a[high] = max; 1341 } 1342 } 1343 1344 /** 1345 * Pushes specified element down during heap sort. 1346 * 1347 * @param a the given array 1348 * @param p the start index 1349 * @param value the given element 1350 * @param low the index of the first element, inclusive, to be sorted 1351 * @param high the index of the last element, exclusive, to be sorted 1352 */ 1353 private static void pushDown(long[] a, int p, long value, int low, int high) { 1354 for (int k ;; a[p] = a[p = k]) { 1355 k = (p << 1) - low + 2; // Index of the right child 1356 1357 if (k > high) { 1358 break; 1359 } 1360 if (k == high || a[k] < a[k - 1]) { 1361 --k; 1362 } 1363 if (a[k] <= value) { 1364 break; 1365 } 1366 } 1367 a[p] = value; 1368 } 1369 1370 /** 1371 * Tries to sort the specified range of the array. 1372 * 1373 * @param sorter parallel context 1374 * @param a the array to be sorted 1375 * @param low the index of the first element to be sorted 1376 * @param size the array size 1377 * @return true if finally sorted, false otherwise 1378 */ 1379 private static boolean tryMergeRuns(Sorter sorter, long[] a, int low, int size) { 1380 1381 /* 1382 * The run array is constructed only if initial runs are 1383 * long enough to continue, run[i] then holds start index 1384 * of the i-th sequence of elements in non-descending order. 1385 */ 1386 int[] run = null; 1387 int high = low + size; 1388 int count = 1, last = low; 1389 1390 /* 1391 * Identify all possible runs. 1392 */ 1393 for (int k = low + 1; k < high; ) { 1394 1395 /* 1396 * Find the end index of the current run. 1397 */ 1398 if (a[k - 1] < a[k]) { 1399 1400 // Identify ascending sequence 1401 while (++k < high && a[k - 1] <= a[k]); 1402 1403 } else if (a[k - 1] > a[k]) { 1404 1405 // Identify descending sequence 1406 while (++k < high && a[k - 1] >= a[k]); 1407 1408 // Reverse into ascending order 1409 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 1410 long ai = a[i]; a[i] = a[j]; a[j] = ai; 1411 } 1412 } else { // Identify constant sequence 1413 for (long ak = a[k]; ++k < high && ak == a[k]; ); 1414 1415 if (k < high) { 1416 continue; 1417 } 1418 } 1419 1420 /* 1421 * Check special cases. 1422 */ 1423 if (run == null) { 1424 if (k == high) { 1425 1426 /* 1427 * The array is monotonous sequence, 1428 * and therefore already sorted. 1429 */ 1430 return true; 1431 } 1432 1433 if (k - low < MIN_FIRST_RUN_SIZE) { 1434 1435 /* 1436 * The first run is too small 1437 * to proceed with scanning. 1438 */ 1439 return false; 1440 } 1441 1442 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 1443 run[0] = low; 1444 1445 } else if (a[last - 1] > a[last]) { 1446 1447 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 1448 1449 /* 1450 * The first runs are not long 1451 * enough to continue scanning. 1452 */ 1453 return false; 1454 } 1455 1456 if (++count == MAX_RUN_CAPACITY) { 1457 1458 /* 1459 * Array is not highly structured. 1460 */ 1461 return false; 1462 } 1463 1464 if (count == run.length) { 1465 1466 /* 1467 * Increase capacity of index array. 1468 */ 1469 run = Arrays.copyOf(run, count << 1); 1470 } 1471 } 1472 run[count] = (last = k); 1473 } 1474 1475 /* 1476 * Merge runs of highly structured array. 1477 */ 1478 if (count > 1) { 1479 long[] b; int offset = low; 1480 1481 if (sorter == null || (b = (long[]) sorter.b) == null) { 1482 b = new long[size]; 1483 } else { 1484 offset = sorter.offset; 1485 } 1486 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 1487 } 1488 return true; 1489 } 1490 1491 /** 1492 * Merges the specified runs. 1493 * 1494 * @param a the source array 1495 * @param b the temporary buffer used in merging 1496 * @param offset the start index in the source, inclusive 1497 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 1498 * @param parallel indicates whether merging is performed in parallel 1499 * @param run the start indexes of the runs, inclusive 1500 * @param lo the start index of the first run, inclusive 1501 * @param hi the start index of the last run, inclusive 1502 * @return the destination where runs are merged 1503 */ 1504 private static long[] mergeRuns(long[] a, long[] b, int offset, 1505 int aim, boolean parallel, int[] run, int lo, int hi) { 1506 1507 if (hi - lo == 1) { 1508 if (aim >= 0) { 1509 return a; 1510 } 1511 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 1512 b[--j] = a[--i] 1513 ); 1514 return b; 1515 } 1516 1517 /* 1518 * Split into approximately equal parts. 1519 */ 1520 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 1521 while (run[++mi + 1] <= rmi); 1522 1523 /* 1524 * Merge the left and right parts. 1525 */ 1526 long[] a1, a2; 1527 1528 if (parallel && hi - lo > MIN_RUN_COUNT) { 1529 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 1530 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 1531 a2 = (long[]) merger.getDestination(); 1532 } else { 1533 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 1534 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 1535 } 1536 1537 long[] dst = a1 == a ? b : a; 1538 1539 int k = a1 == a ? run[lo] - offset : run[lo]; 1540 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 1541 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 1542 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 1543 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 1544 1545 if (parallel) { 1546 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 1547 } else { 1548 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 1549 } 1550 return dst; 1551 } 1552 1553 /** 1554 * Merges the sorted parts. 1555 * 1556 * @param merger parallel context 1557 * @param dst the destination where parts are merged 1558 * @param k the start index of the destination, inclusive 1559 * @param a1 the first part 1560 * @param lo1 the start index of the first part, inclusive 1561 * @param hi1 the end index of the first part, exclusive 1562 * @param a2 the second part 1563 * @param lo2 the start index of the second part, inclusive 1564 * @param hi2 the end index of the second part, exclusive 1565 */ 1566 private static void mergeParts(Merger merger, long[] dst, int k, 1567 long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2) { 1568 1569 if (merger != null && a1 == a2) { 1570 1571 while (true) { 1572 1573 /* 1574 * The first part must be larger. 1575 */ 1576 if (hi1 - lo1 < hi2 - lo2) { 1577 int lo = lo1; lo1 = lo2; lo2 = lo; 1578 int hi = hi1; hi1 = hi2; hi2 = hi; 1579 } 1580 1581 /* 1582 * Small parts will be merged sequentially. 1583 */ 1584 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 1585 break; 1586 } 1587 1588 /* 1589 * Find the median of the larger part. 1590 */ 1591 int mi1 = (lo1 + hi1) >>> 1; 1592 long key = a1[mi1]; 1593 int mi2 = hi2; 1594 1595 /* 1596 * Partition the smaller part. 1597 */ 1598 for (int loo = lo2; loo < mi2; ) { 1599 int t = (loo + mi2) >>> 1; 1600 1601 if (key > a2[t]) { 1602 loo = t + 1; 1603 } else { 1604 mi2 = t; 1605 } 1606 } 1607 1608 int d = mi2 - lo2 + mi1 - lo1; 1609 1610 /* 1611 * Merge the right sub-parts in parallel. 1612 */ 1613 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 1614 1615 /* 1616 * Process the sub-left parts. 1617 */ 1618 hi1 = mi1; 1619 hi2 = mi2; 1620 } 1621 } 1622 1623 /* 1624 * Merge small parts sequentially. 1625 */ 1626 while (lo1 < hi1 && lo2 < hi2) { 1627 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 1628 } 1629 if (dst != a1 || k < lo1) { 1630 while (lo1 < hi1) { 1631 dst[k++] = a1[lo1++]; 1632 } 1633 } 1634 if (dst != a2 || k < lo2) { 1635 while (lo2 < hi2) { 1636 dst[k++] = a2[lo2++]; 1637 } 1638 } 1639 } 1640 1641 // [byte] 1642 1643 /** 1644 * Sorts the specified range of the array using 1645 * counting sort or insertion sort. 1646 * 1647 * @param a the array to be sorted 1648 * @param low the index of the first element, inclusive, to be sorted 1649 * @param high the index of the last element, exclusive, to be sorted 1650 */ 1651 static void sort(byte[] a, int low, int high) { 1652 if (high - low > MIN_BYTE_COUNTING_SORT_SIZE) { 1653 countingSort(a, low, high); 1654 } else { 1655 insertionSort(a, low, high); 1656 } 1657 } 1658 1659 /** 1660 * Sorts the specified range of the array using insertion sort. 1661 * 1662 * @param a the array to be sorted 1663 * @param low the index of the first element, inclusive, to be sorted 1664 * @param high the index of the last element, exclusive, to be sorted 1665 */ 1666 private static void insertionSort(byte[] a, int low, int high) { 1667 for (int i, k = low; ++k < high; ) { 1668 byte ai = a[i = k]; 1669 1670 if (ai < a[i - 1]) { 1671 while (--i >= low && ai < a[i]) { 1672 a[i + 1] = a[i]; 1673 } 1674 a[i + 1] = ai; 1675 } 1676 } 1677 } 1678 1679 /** 1680 * The number of distinct byte values. 1681 */ 1682 private static final int NUM_BYTE_VALUES = 1 << 8; 1683 1684 /** 1685 * Max index of byte counter. 1686 */ 1687 private static final int MAX_BYTE_INDEX = Byte.MAX_VALUE + NUM_BYTE_VALUES + 1; 1688 1689 /** 1690 * Sorts the specified range of the array using counting sort. 1691 * 1692 * @param a the array to be sorted 1693 * @param low the index of the first element, inclusive, to be sorted 1694 * @param high the index of the last element, exclusive, to be sorted 1695 */ 1696 private static void countingSort(byte[] a, int low, int high) { 1697 int[] count = new int[NUM_BYTE_VALUES]; 1698 1699 /* 1700 * Compute a histogram with the number of each values. 1701 */ 1702 for (int i = high; i > low; ++count[a[--i] & 0xFF]); 1703 1704 /* 1705 * Place values on their final positions. 1706 */ 1707 for (int k = high, i = MAX_BYTE_INDEX; --i > Byte.MAX_VALUE; ) { 1708 int value = i & 0xFF; 1709 for (low = k - count[value]; k > low; a[--k] = (byte) value); 1710 } 1711 } 1712 1713 // [char] 1714 1715 /** 1716 * Sorts the specified range of the array using 1717 * counting sort or Dual-Pivot Quicksort. 1718 * 1719 * @param a the array to be sorted 1720 * @param low the index of the first element, inclusive, to be sorted 1721 * @param high the index of the last element, exclusive, to be sorted 1722 */ 1723 static void sort(char[] a, int low, int high) { 1724 if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { 1725 countingSort(a, low, high); 1726 } else { 1727 sort(a, 0, low, high); 1728 } 1729 } 1730 1731 /** 1732 * Sorts the specified array using the Dual-Pivot Quicksort and/or 1733 * other sorts in special-cases, possibly with parallel partitions. 1734 * 1735 * @param a the array to be sorted 1736 * @param bits the combination of recursion depth and bit flag, where 1737 * the right bit "0" indicates that array is the leftmost part 1738 * @param low the index of the first element, inclusive, to be sorted 1739 * @param high the index of the last element, exclusive, to be sorted 1740 */ 1741 static void sort(char[] a, int bits, int low, int high) { 1742 while (true) { 1743 int end = high - 1, size = high - low; 1744 1745 /* 1746 * Run mixed insertion sort on small non-leftmost parts. 1747 */ 1748 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 1749 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 1750 return; 1751 } 1752 1753 /* 1754 * Invoke insertion sort on small leftmost part. 1755 */ 1756 if (size < MAX_INSERTION_SORT_SIZE) { 1757 insertionSort(a, low, high); 1758 return; 1759 } 1760 1761 /* 1762 * Switch to counting sort if execution 1763 * time is becoming quadratic. 1764 */ 1765 if ((bits += 2) > MAX_RECURSION_DEPTH) { 1766 countingSort(a, low, high); 1767 return; 1768 } 1769 1770 /* 1771 * Use an inexpensive approximation of the golden ratio 1772 * to select five sample elements and determine pivots. 1773 */ 1774 int step = (size >> 3) * 3 + 3; 1775 1776 /* 1777 * Five elements around (and including) the central element 1778 * will be used for pivot selection as described below. The 1779 * unequal choice of spacing these elements was empirically 1780 * determined to work well on a wide variety of inputs. 1781 */ 1782 int e1 = low + step; 1783 int e5 = end - step; 1784 int e3 = (e1 + e5) >>> 1; 1785 int e2 = (e1 + e3) >>> 1; 1786 int e4 = (e3 + e5) >>> 1; 1787 char a3 = a[e3]; 1788 1789 /* 1790 * Sort these elements in place by the combination 1791 * of 4-element sorting network and insertion sort. 1792 * 1793 * 5 ------o-----------o----------- 1794 * | | 1795 * 4 ------|-----o-----o-----o----- 1796 * | | | 1797 * 2 ------o-----|-----o-----o----- 1798 * | | 1799 * 1 ------------o-----o----------- 1800 */ 1801 if (a[e5] < a[e2]) { char t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 1802 if (a[e4] < a[e1]) { char t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 1803 if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 1804 if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 1805 if (a[e4] < a[e2]) { char t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 1806 1807 if (a3 < a[e2]) { 1808 if (a3 < a[e1]) { 1809 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 1810 } else { 1811 a[e3] = a[e2]; a[e2] = a3; 1812 } 1813 } else if (a3 > a[e4]) { 1814 if (a3 > a[e5]) { 1815 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 1816 } else { 1817 a[e3] = a[e4]; a[e4] = a3; 1818 } 1819 } 1820 1821 // Pointers 1822 int lower = low; // The index of the last element of the left part 1823 int upper = end; // The index of the first element of the right part 1824 1825 /* 1826 * Partitioning with 2 pivots in case of different elements. 1827 */ 1828 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 1829 1830 /* 1831 * Use the first and fifth of the five sorted elements as 1832 * the pivots. These values are inexpensive approximation 1833 * of tertiles. Note, that pivot1 < pivot2. 1834 */ 1835 char pivot1 = a[e1]; 1836 char pivot2 = a[e5]; 1837 1838 /* 1839 * The first and the last elements to be sorted are moved 1840 * to the locations formerly occupied by the pivots. When 1841 * partitioning is completed, the pivots are swapped back 1842 * into their final positions, and excluded from the next 1843 * subsequent sorting. 1844 */ 1845 a[e1] = a[lower]; 1846 a[e5] = a[upper]; 1847 1848 /* 1849 * Skip elements, which are less or greater than the pivots. 1850 */ 1851 while (a[++lower] < pivot1); 1852 while (a[--upper] > pivot2); 1853 1854 /* 1855 * Backward 3-interval partitioning 1856 * 1857 * left part central part right part 1858 * +------------------------------------------------------------+ 1859 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 1860 * +------------------------------------------------------------+ 1861 * ^ ^ ^ 1862 * | | | 1863 * lower k upper 1864 * 1865 * Invariants: 1866 * 1867 * all in (low, lower] < pivot1 1868 * pivot1 <= all in (k, upper) <= pivot2 1869 * all in [upper, end) > pivot2 1870 * 1871 * Pointer k is the last index of ?-part 1872 */ 1873 for (int unused = --lower, k = ++upper; --k > lower; ) { 1874 char ak = a[k]; 1875 1876 if (ak < pivot1) { // Move a[k] to the left side 1877 while (lower < k) { 1878 if (a[++lower] >= pivot1) { 1879 if (a[lower] > pivot2) { 1880 a[k] = a[--upper]; 1881 a[upper] = a[lower]; 1882 } else { 1883 a[k] = a[lower]; 1884 } 1885 a[lower] = ak; 1886 break; 1887 } 1888 } 1889 } else if (ak > pivot2) { // Move a[k] to the right side 1890 a[k] = a[--upper]; 1891 a[upper] = ak; 1892 } 1893 } 1894 1895 /* 1896 * Swap the pivots into their final positions. 1897 */ 1898 a[low] = a[lower]; a[lower] = pivot1; 1899 a[end] = a[upper]; a[upper] = pivot2; 1900 1901 /* 1902 * Sort non-left parts recursively, 1903 * excluding known pivots. 1904 */ 1905 sort(a, bits | 1, lower + 1, upper); 1906 sort(a, bits | 1, upper + 1, high); 1907 1908 } else { // Use single pivot in case of many equal elements 1909 1910 /* 1911 * Use the third of the five sorted elements as the pivot. 1912 * This value is inexpensive approximation of the median. 1913 */ 1914 char pivot = a[e3]; 1915 1916 /* 1917 * The first element to be sorted is moved to the 1918 * location formerly occupied by the pivot. After 1919 * completion of partitioning the pivot is swapped 1920 * back into its final position, and excluded from 1921 * the next subsequent sorting. 1922 */ 1923 a[e3] = a[lower]; 1924 1925 /* 1926 * Traditional 3-way (Dutch National Flag) partitioning 1927 * 1928 * left part central part right part 1929 * +------------------------------------------------------+ 1930 * | < pivot | ? | == pivot | > pivot | 1931 * +------------------------------------------------------+ 1932 * ^ ^ ^ 1933 * | | | 1934 * lower k upper 1935 * 1936 * Invariants: 1937 * 1938 * all in (low, lower] < pivot 1939 * all in (k, upper) == pivot 1940 * all in [upper, end] > pivot 1941 * 1942 * Pointer k is the last index of ?-part 1943 */ 1944 for (int k = ++upper; --k > lower; ) { 1945 char ak = a[k]; 1946 1947 if (ak != pivot) { 1948 a[k] = pivot; 1949 1950 if (ak < pivot) { // Move a[k] to the left side 1951 while (a[++lower] < pivot); 1952 1953 if (a[lower] > pivot) { 1954 a[--upper] = a[lower]; 1955 } 1956 a[lower] = ak; 1957 } else { // ak > pivot - Move a[k] to the right side 1958 a[--upper] = ak; 1959 } 1960 } 1961 } 1962 1963 /* 1964 * Swap the pivot into its final position. 1965 */ 1966 a[low] = a[lower]; a[lower] = pivot; 1967 1968 /* 1969 * Sort the right part, excluding known pivot. 1970 * All elements from the central part are 1971 * equal and therefore already sorted. 1972 */ 1973 sort(a, bits | 1, upper, high); 1974 } 1975 high = lower; // Iterate along the left part 1976 } 1977 } 1978 1979 /** 1980 * Sorts the specified range of the array using mixed insertion sort. 1981 * 1982 * Mixed insertion sort is combination of simple insertion sort, 1983 * pin insertion sort and pair insertion sort. 1984 * 1985 * In the context of Dual-Pivot Quicksort, the pivot element 1986 * from the left part plays the role of sentinel, because it 1987 * is less than any elements from the given part. Therefore, 1988 * expensive check of the left range can be skipped on each 1989 * iteration unless it is the leftmost call. 1990 * 1991 * @param a the array to be sorted 1992 * @param low the index of the first element, inclusive, to be sorted 1993 * @param end the index of the last element for simple insertion sort 1994 * @param high the index of the last element, exclusive, to be sorted 1995 */ 1996 private static void mixedInsertionSort(char[] a, int low, int end, int high) { 1997 if (end == high) { 1998 1999 /* 2000 * Invoke simple insertion sort on tiny array. 2001 */ 2002 for (int i; ++low < end; ) { 2003 char ai = a[i = low]; 2004 2005 while (ai < a[--i]) { 2006 a[i + 1] = a[i]; 2007 } 2008 a[i + 1] = ai; 2009 } 2010 } else { 2011 2012 /* 2013 * Start with pin insertion sort on small part. 2014 * 2015 * Pin insertion sort is extended simple insertion sort. 2016 * The main idea of this sort is to put elements larger 2017 * than an element called pin to the end of array (the 2018 * proper area for such elements). It avoids expensive 2019 * movements of these elements through the whole array. 2020 */ 2021 char pin = a[end]; 2022 2023 for (int i, p = high; ++low < end; ) { 2024 char ai = a[i = low]; 2025 2026 if (ai < a[i - 1]) { // Small element 2027 2028 /* 2029 * Insert small element into sorted part. 2030 */ 2031 a[i] = a[--i]; 2032 2033 while (ai < a[--i]) { 2034 a[i + 1] = a[i]; 2035 } 2036 a[i + 1] = ai; 2037 2038 } else if (p > i && ai > pin) { // Large element 2039 2040 /* 2041 * Find element smaller than pin. 2042 */ 2043 while (a[--p] > pin); 2044 2045 /* 2046 * Swap it with large element. 2047 */ 2048 if (p > i) { 2049 ai = a[p]; 2050 a[p] = a[i]; 2051 } 2052 2053 /* 2054 * Insert small element into sorted part. 2055 */ 2056 while (ai < a[--i]) { 2057 a[i + 1] = a[i]; 2058 } 2059 a[i + 1] = ai; 2060 } 2061 } 2062 2063 /* 2064 * Continue with pair insertion sort on remain part. 2065 */ 2066 for (int i; low < high; ++low) { 2067 char a1 = a[i = low], a2 = a[++low]; 2068 2069 /* 2070 * Insert two elements per iteration: at first, insert the 2071 * larger element and then insert the smaller element, but 2072 * from the position where the larger element was inserted. 2073 */ 2074 if (a1 > a2) { 2075 2076 while (a1 < a[--i]) { 2077 a[i + 2] = a[i]; 2078 } 2079 a[++i + 1] = a1; 2080 2081 while (a2 < a[--i]) { 2082 a[i + 1] = a[i]; 2083 } 2084 a[i + 1] = a2; 2085 2086 } else if (a1 < a[i - 1]) { 2087 2088 while (a2 < a[--i]) { 2089 a[i + 2] = a[i]; 2090 } 2091 a[++i + 1] = a2; 2092 2093 while (a1 < a[--i]) { 2094 a[i + 1] = a[i]; 2095 } 2096 a[i + 1] = a1; 2097 } 2098 } 2099 } 2100 } 2101 2102 /** 2103 * Sorts the specified range of the array using insertion sort. 2104 * 2105 * @param a the array to be sorted 2106 * @param low the index of the first element, inclusive, to be sorted 2107 * @param high the index of the last element, exclusive, to be sorted 2108 */ 2109 private static void insertionSort(char[] a, int low, int high) { 2110 for (int i, k = low; ++k < high; ) { 2111 char ai = a[i = k]; 2112 2113 if (ai < a[i - 1]) { 2114 while (--i >= low && ai < a[i]) { 2115 a[i + 1] = a[i]; 2116 } 2117 a[i + 1] = ai; 2118 } 2119 } 2120 } 2121 2122 /** 2123 * The number of distinct char values. 2124 */ 2125 private static final int NUM_CHAR_VALUES = 1 << 16; 2126 2127 /** 2128 * Sorts the specified range of the array using counting sort. 2129 * 2130 * @param a the array to be sorted 2131 * @param low the index of the first element, inclusive, to be sorted 2132 * @param high the index of the last element, exclusive, to be sorted 2133 */ 2134 private static void countingSort(char[] a, int low, int high) { 2135 int[] count = new int[NUM_CHAR_VALUES]; 2136 2137 /* 2138 * Compute a histogram with the number of each values. 2139 */ 2140 for (int i = high; i > low; ++count[a[--i]]); 2141 2142 /* 2143 * Place values on their final positions. 2144 */ 2145 for (int k = high, i = NUM_CHAR_VALUES; --i > -1; ) { 2146 for (low = k - count[i]; k > low; a[--k] = (char) i); 2147 } 2148 } 2149 2150 // [short] 2151 2152 /** 2153 * Sorts the specified range of the array using 2154 * counting sort or Dual-Pivot Quicksort. 2155 * 2156 * @param a the array to be sorted 2157 * @param low the index of the first element, inclusive, to be sorted 2158 * @param high the index of the last element, exclusive, to be sorted 2159 */ 2160 static void sort(short[] a, int low, int high) { 2161 if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { 2162 countingSort(a, low, high); 2163 } else { 2164 sort(a, 0, low, high); 2165 } 2166 } 2167 2168 /** 2169 * Sorts the specified array using the Dual-Pivot Quicksort and/or 2170 * other sorts in special-cases, possibly with parallel partitions. 2171 * 2172 * @param a the array to be sorted 2173 * @param bits the combination of recursion depth and bit flag, where 2174 * the right bit "0" indicates that array is the leftmost part 2175 * @param low the index of the first element, inclusive, to be sorted 2176 * @param high the index of the last element, exclusive, to be sorted 2177 */ 2178 static void sort(short[] a, int bits, int low, int high) { 2179 while (true) { 2180 int end = high - 1, size = high - low; 2181 2182 /* 2183 * Run mixed insertion sort on small non-leftmost parts. 2184 */ 2185 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 2186 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 2187 return; 2188 } 2189 2190 /* 2191 * Invoke insertion sort on small leftmost part. 2192 */ 2193 if (size < MAX_INSERTION_SORT_SIZE) { 2194 insertionSort(a, low, high); 2195 return; 2196 } 2197 2198 /* 2199 * Switch to counting sort if execution 2200 * time is becoming quadratic. 2201 */ 2202 if ((bits += 2) > MAX_RECURSION_DEPTH) { 2203 countingSort(a, low, high); 2204 return; 2205 } 2206 2207 /* 2208 * Use an inexpensive approximation of the golden ratio 2209 * to select five sample elements and determine pivots. 2210 */ 2211 int step = (size >> 3) * 3 + 3; 2212 2213 /* 2214 * Five elements around (and including) the central element 2215 * will be used for pivot selection as described below. The 2216 * unequal choice of spacing these elements was empirically 2217 * determined to work well on a wide variety of inputs. 2218 */ 2219 int e1 = low + step; 2220 int e5 = end - step; 2221 int e3 = (e1 + e5) >>> 1; 2222 int e2 = (e1 + e3) >>> 1; 2223 int e4 = (e3 + e5) >>> 1; 2224 short a3 = a[e3]; 2225 2226 /* 2227 * Sort these elements in place by the combination 2228 * of 4-element sorting network and insertion sort. 2229 * 2230 * 5 ------o-----------o----------- 2231 * | | 2232 * 4 ------|-----o-----o-----o----- 2233 * | | | 2234 * 2 ------o-----|-----o-----o----- 2235 * | | 2236 * 1 ------------o-----o----------- 2237 */ 2238 if (a[e5] < a[e2]) { short t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 2239 if (a[e4] < a[e1]) { short t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 2240 if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 2241 if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 2242 if (a[e4] < a[e2]) { short t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 2243 2244 if (a3 < a[e2]) { 2245 if (a3 < a[e1]) { 2246 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 2247 } else { 2248 a[e3] = a[e2]; a[e2] = a3; 2249 } 2250 } else if (a3 > a[e4]) { 2251 if (a3 > a[e5]) { 2252 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 2253 } else { 2254 a[e3] = a[e4]; a[e4] = a3; 2255 } 2256 } 2257 2258 // Pointers 2259 int lower = low; // The index of the last element of the left part 2260 int upper = end; // The index of the first element of the right part 2261 2262 /* 2263 * Partitioning with 2 pivots in case of different elements. 2264 */ 2265 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 2266 2267 /* 2268 * Use the first and fifth of the five sorted elements as 2269 * the pivots. These values are inexpensive approximation 2270 * of tertiles. Note, that pivot1 < pivot2. 2271 */ 2272 short pivot1 = a[e1]; 2273 short pivot2 = a[e5]; 2274 2275 /* 2276 * The first and the last elements to be sorted are moved 2277 * to the locations formerly occupied by the pivots. When 2278 * partitioning is completed, the pivots are swapped back 2279 * into their final positions, and excluded from the next 2280 * subsequent sorting. 2281 */ 2282 a[e1] = a[lower]; 2283 a[e5] = a[upper]; 2284 2285 /* 2286 * Skip elements, which are less or greater than the pivots. 2287 */ 2288 while (a[++lower] < pivot1); 2289 while (a[--upper] > pivot2); 2290 2291 /* 2292 * Backward 3-interval partitioning 2293 * 2294 * left part central part right part 2295 * +------------------------------------------------------------+ 2296 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 2297 * +------------------------------------------------------------+ 2298 * ^ ^ ^ 2299 * | | | 2300 * lower k upper 2301 * 2302 * Invariants: 2303 * 2304 * all in (low, lower] < pivot1 2305 * pivot1 <= all in (k, upper) <= pivot2 2306 * all in [upper, end) > pivot2 2307 * 2308 * Pointer k is the last index of ?-part 2309 */ 2310 for (int unused = --lower, k = ++upper; --k > lower; ) { 2311 short ak = a[k]; 2312 2313 if (ak < pivot1) { // Move a[k] to the left side 2314 while (lower < k) { 2315 if (a[++lower] >= pivot1) { 2316 if (a[lower] > pivot2) { 2317 a[k] = a[--upper]; 2318 a[upper] = a[lower]; 2319 } else { 2320 a[k] = a[lower]; 2321 } 2322 a[lower] = ak; 2323 break; 2324 } 2325 } 2326 } else if (ak > pivot2) { // Move a[k] to the right side 2327 a[k] = a[--upper]; 2328 a[upper] = ak; 2329 } 2330 } 2331 2332 /* 2333 * Swap the pivots into their final positions. 2334 */ 2335 a[low] = a[lower]; a[lower] = pivot1; 2336 a[end] = a[upper]; a[upper] = pivot2; 2337 2338 /* 2339 * Sort non-left parts recursively, 2340 * excluding known pivots. 2341 */ 2342 sort(a, bits | 1, lower + 1, upper); 2343 sort(a, bits | 1, upper + 1, high); 2344 2345 } else { // Use single pivot in case of many equal elements 2346 2347 /* 2348 * Use the third of the five sorted elements as the pivot. 2349 * This value is inexpensive approximation of the median. 2350 */ 2351 short pivot = a[e3]; 2352 2353 /* 2354 * The first element to be sorted is moved to the 2355 * location formerly occupied by the pivot. After 2356 * completion of partitioning the pivot is swapped 2357 * back into its final position, and excluded from 2358 * the next subsequent sorting. 2359 */ 2360 a[e3] = a[lower]; 2361 2362 /* 2363 * Traditional 3-way (Dutch National Flag) partitioning 2364 * 2365 * left part central part right part 2366 * +------------------------------------------------------+ 2367 * | < pivot | ? | == pivot | > pivot | 2368 * +------------------------------------------------------+ 2369 * ^ ^ ^ 2370 * | | | 2371 * lower k upper 2372 * 2373 * Invariants: 2374 * 2375 * all in (low, lower] < pivot 2376 * all in (k, upper) == pivot 2377 * all in [upper, end] > pivot 2378 * 2379 * Pointer k is the last index of ?-part 2380 */ 2381 for (int k = ++upper; --k > lower; ) { 2382 short ak = a[k]; 2383 2384 if (ak != pivot) { 2385 a[k] = pivot; 2386 2387 if (ak < pivot) { // Move a[k] to the left side 2388 while (a[++lower] < pivot); 2389 2390 if (a[lower] > pivot) { 2391 a[--upper] = a[lower]; 2392 } 2393 a[lower] = ak; 2394 } else { // ak > pivot - Move a[k] to the right side 2395 a[--upper] = ak; 2396 } 2397 } 2398 } 2399 2400 /* 2401 * Swap the pivot into its final position. 2402 */ 2403 a[low] = a[lower]; a[lower] = pivot; 2404 2405 /* 2406 * Sort the right part, excluding known pivot. 2407 * All elements from the central part are 2408 * equal and therefore already sorted. 2409 */ 2410 sort(a, bits | 1, upper, high); 2411 } 2412 high = lower; // Iterate along the left part 2413 } 2414 } 2415 2416 /** 2417 * Sorts the specified range of the array using mixed insertion sort. 2418 * 2419 * Mixed insertion sort is combination of simple insertion sort, 2420 * pin insertion sort and pair insertion sort. 2421 * 2422 * In the context of Dual-Pivot Quicksort, the pivot element 2423 * from the left part plays the role of sentinel, because it 2424 * is less than any elements from the given part. Therefore, 2425 * expensive check of the left range can be skipped on each 2426 * iteration unless it is the leftmost call. 2427 * 2428 * @param a the array to be sorted 2429 * @param low the index of the first element, inclusive, to be sorted 2430 * @param end the index of the last element for simple insertion sort 2431 * @param high the index of the last element, exclusive, to be sorted 2432 */ 2433 private static void mixedInsertionSort(short[] a, int low, int end, int high) { 2434 if (end == high) { 2435 2436 /* 2437 * Invoke simple insertion sort on tiny array. 2438 */ 2439 for (int i; ++low < end; ) { 2440 short ai = a[i = low]; 2441 2442 while (ai < a[--i]) { 2443 a[i + 1] = a[i]; 2444 } 2445 a[i + 1] = ai; 2446 } 2447 } else { 2448 2449 /* 2450 * Start with pin insertion sort on small part. 2451 * 2452 * Pin insertion sort is extended simple insertion sort. 2453 * The main idea of this sort is to put elements larger 2454 * than an element called pin to the end of array (the 2455 * proper area for such elements). It avoids expensive 2456 * movements of these elements through the whole array. 2457 */ 2458 short pin = a[end]; 2459 2460 for (int i, p = high; ++low < end; ) { 2461 short ai = a[i = low]; 2462 2463 if (ai < a[i - 1]) { // Small element 2464 2465 /* 2466 * Insert small element into sorted part. 2467 */ 2468 a[i] = a[--i]; 2469 2470 while (ai < a[--i]) { 2471 a[i + 1] = a[i]; 2472 } 2473 a[i + 1] = ai; 2474 2475 } else if (p > i && ai > pin) { // Large element 2476 2477 /* 2478 * Find element smaller than pin. 2479 */ 2480 while (a[--p] > pin); 2481 2482 /* 2483 * Swap it with large element. 2484 */ 2485 if (p > i) { 2486 ai = a[p]; 2487 a[p] = a[i]; 2488 } 2489 2490 /* 2491 * Insert small element into sorted part. 2492 */ 2493 while (ai < a[--i]) { 2494 a[i + 1] = a[i]; 2495 } 2496 a[i + 1] = ai; 2497 } 2498 } 2499 2500 /* 2501 * Continue with pair insertion sort on remain part. 2502 */ 2503 for (int i; low < high; ++low) { 2504 short a1 = a[i = low], a2 = a[++low]; 2505 2506 /* 2507 * Insert two elements per iteration: at first, insert the 2508 * larger element and then insert the smaller element, but 2509 * from the position where the larger element was inserted. 2510 */ 2511 if (a1 > a2) { 2512 2513 while (a1 < a[--i]) { 2514 a[i + 2] = a[i]; 2515 } 2516 a[++i + 1] = a1; 2517 2518 while (a2 < a[--i]) { 2519 a[i + 1] = a[i]; 2520 } 2521 a[i + 1] = a2; 2522 2523 } else if (a1 < a[i - 1]) { 2524 2525 while (a2 < a[--i]) { 2526 a[i + 2] = a[i]; 2527 } 2528 a[++i + 1] = a2; 2529 2530 while (a1 < a[--i]) { 2531 a[i + 1] = a[i]; 2532 } 2533 a[i + 1] = a1; 2534 } 2535 } 2536 } 2537 } 2538 2539 /** 2540 * Sorts the specified range of the array using insertion sort. 2541 * 2542 * @param a the array to be sorted 2543 * @param low the index of the first element, inclusive, to be sorted 2544 * @param high the index of the last element, exclusive, to be sorted 2545 */ 2546 private static void insertionSort(short[] a, int low, int high) { 2547 for (int i, k = low; ++k < high; ) { 2548 short ai = a[i = k]; 2549 2550 if (ai < a[i - 1]) { 2551 while (--i >= low && ai < a[i]) { 2552 a[i + 1] = a[i]; 2553 } 2554 a[i + 1] = ai; 2555 } 2556 } 2557 } 2558 2559 /** 2560 * The number of distinct short values. 2561 */ 2562 private static final int NUM_SHORT_VALUES = 1 << 16; 2563 2564 /** 2565 * Max index of short counter. 2566 */ 2567 private static final int MAX_SHORT_INDEX = Short.MAX_VALUE + NUM_SHORT_VALUES + 1; 2568 2569 /** 2570 * Sorts the specified range of the array using counting sort. 2571 * 2572 * @param a the array to be sorted 2573 * @param low the index of the first element, inclusive, to be sorted 2574 * @param high the index of the last element, exclusive, to be sorted 2575 */ 2576 private static void countingSort(short[] a, int low, int high) { 2577 int[] count = new int[NUM_SHORT_VALUES]; 2578 2579 /* 2580 * Compute a histogram with the number of each values. 2581 */ 2582 for (int i = high; i > low; ++count[a[--i] & 0xFFFF]); 2583 2584 /* 2585 * Place values on their final positions. 2586 */ 2587 for (int k = high, i = MAX_SHORT_INDEX; --i > Short.MAX_VALUE; ) { 2588 int value = i & 0xFFFF; 2589 for (low = k - count[value]; k > low; a[--k] = (short) value); 2590 } 2591 } 2592 2593 // [float] 2594 2595 /** 2596 * Sorts the specified range of the array using parallel merge 2597 * sort and/or Dual-Pivot Quicksort. 2598 * 2599 * To balance the faster splitting and parallelism of merge sort 2600 * with the faster element partitioning of Quicksort, ranges are 2601 * subdivided in tiers such that, if there is enough parallelism, 2602 * the four-way parallel merge is started, still ensuring enough 2603 * parallelism to process the partitions. 2604 * 2605 * @param a the array to be sorted 2606 * @param parallelism the parallelism level 2607 * @param low the index of the first element, inclusive, to be sorted 2608 * @param high the index of the last element, exclusive, to be sorted 2609 */ 2610 static void sort(float[] a, int parallelism, int low, int high) { 2611 /* 2612 * Phase 1. Count the number of negative zero -0.0f, 2613 * turn them into positive zero, and move all NaNs 2614 * to the end of the array. 2615 */ 2616 int numNegativeZero = 0; 2617 2618 for (int k = high; k > low; ) { 2619 float ak = a[--k]; 2620 2621 if (ak == 0.0f && Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f 2622 numNegativeZero += 1; 2623 a[k] = 0.0f; 2624 } else if (ak != ak) { // ak is NaN 2625 a[k] = a[--high]; 2626 a[high] = ak; 2627 } 2628 } 2629 2630 /* 2631 * Phase 2. Sort everything except NaNs, 2632 * which are already in place. 2633 */ 2634 int size = high - low; 2635 2636 if (parallelism > 0 && size > MIN_PARALLEL_SORT_SIZE) { 2637 int depth = getDepth(parallelism, size >> 12); 2638 float[] b = depth == 0 ? null : new float[size]; 2639 new Sorter(null, a, b, low, size, low, depth).invoke(); 2640 } else { 2641 sort(null, a, 0, low, high); 2642 } 2643 2644 /* 2645 * Phase 3. Turn positive zero 0.0f 2646 * back into negative zero -0.0f. 2647 */ 2648 if (++numNegativeZero == 1) { 2649 return; 2650 } 2651 2652 /* 2653 * Find the position one less than 2654 * the index of the first zero. 2655 */ 2656 while (low <= high) { 2657 int middle = (low + high) >>> 1; 2658 2659 if (a[middle] < 0) { 2660 low = middle + 1; 2661 } else { 2662 high = middle - 1; 2663 } 2664 } 2665 2666 /* 2667 * Replace the required number of 0.0f by -0.0f. 2668 */ 2669 while (--numNegativeZero > 0) { 2670 a[++high] = -0.0f; 2671 } 2672 } 2673 2674 /** 2675 * Sorts the specified array using the Dual-Pivot Quicksort and/or 2676 * other sorts in special-cases, possibly with parallel partitions. 2677 * 2678 * @param sorter parallel context 2679 * @param a the array to be sorted 2680 * @param bits the combination of recursion depth and bit flag, where 2681 * the right bit "0" indicates that array is the leftmost part 2682 * @param low the index of the first element, inclusive, to be sorted 2683 * @param high the index of the last element, exclusive, to be sorted 2684 */ 2685 static void sort(Sorter sorter, float[] a, int bits, int low, int high) { 2686 while (true) { 2687 int end = high - 1, size = high - low; 2688 2689 /* 2690 * Run mixed insertion sort on small non-leftmost parts. 2691 */ 2692 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 2693 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 2694 return; 2695 } 2696 2697 /* 2698 * Invoke insertion sort on small leftmost part. 2699 */ 2700 if (size < MAX_INSERTION_SORT_SIZE) { 2701 insertionSort(a, low, high); 2702 return; 2703 } 2704 2705 /* 2706 * Check if the whole array or large non-leftmost 2707 * parts are nearly sorted and then merge runs. 2708 */ 2709 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 2710 && tryMergeRuns(sorter, a, low, size)) { 2711 return; 2712 } 2713 2714 /* 2715 * Switch to heap sort if execution 2716 * time is becoming quadratic. 2717 */ 2718 if ((bits += 2) > MAX_RECURSION_DEPTH) { 2719 heapSort(a, low, high); 2720 return; 2721 } 2722 2723 /* 2724 * Use an inexpensive approximation of the golden ratio 2725 * to select five sample elements and determine pivots. 2726 */ 2727 int step = (size >> 3) * 3 + 3; 2728 2729 /* 2730 * Five elements around (and including) the central element 2731 * will be used for pivot selection as described below. The 2732 * unequal choice of spacing these elements was empirically 2733 * determined to work well on a wide variety of inputs. 2734 */ 2735 int e1 = low + step; 2736 int e5 = end - step; 2737 int e3 = (e1 + e5) >>> 1; 2738 int e2 = (e1 + e3) >>> 1; 2739 int e4 = (e3 + e5) >>> 1; 2740 float a3 = a[e3]; 2741 2742 /* 2743 * Sort these elements in place by the combination 2744 * of 4-element sorting network and insertion sort. 2745 * 2746 * 5 ------o-----------o----------- 2747 * | | 2748 * 4 ------|-----o-----o-----o----- 2749 * | | | 2750 * 2 ------o-----|-----o-----o----- 2751 * | | 2752 * 1 ------------o-----o----------- 2753 */ 2754 if (a[e5] < a[e2]) { float t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 2755 if (a[e4] < a[e1]) { float t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 2756 if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 2757 if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 2758 if (a[e4] < a[e2]) { float t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 2759 2760 if (a3 < a[e2]) { 2761 if (a3 < a[e1]) { 2762 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 2763 } else { 2764 a[e3] = a[e2]; a[e2] = a3; 2765 } 2766 } else if (a3 > a[e4]) { 2767 if (a3 > a[e5]) { 2768 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 2769 } else { 2770 a[e3] = a[e4]; a[e4] = a3; 2771 } 2772 } 2773 2774 // Pointers 2775 int lower = low; // The index of the last element of the left part 2776 int upper = end; // The index of the first element of the right part 2777 2778 /* 2779 * Partitioning with 2 pivots in case of different elements. 2780 */ 2781 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 2782 2783 /* 2784 * Use the first and fifth of the five sorted elements as 2785 * the pivots. These values are inexpensive approximation 2786 * of tertiles. Note, that pivot1 < pivot2. 2787 */ 2788 float pivot1 = a[e1]; 2789 float pivot2 = a[e5]; 2790 2791 /* 2792 * The first and the last elements to be sorted are moved 2793 * to the locations formerly occupied by the pivots. When 2794 * partitioning is completed, the pivots are swapped back 2795 * into their final positions, and excluded from the next 2796 * subsequent sorting. 2797 */ 2798 a[e1] = a[lower]; 2799 a[e5] = a[upper]; 2800 2801 /* 2802 * Skip elements, which are less or greater than the pivots. 2803 */ 2804 while (a[++lower] < pivot1); 2805 while (a[--upper] > pivot2); 2806 2807 /* 2808 * Backward 3-interval partitioning 2809 * 2810 * left part central part right part 2811 * +------------------------------------------------------------+ 2812 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 2813 * +------------------------------------------------------------+ 2814 * ^ ^ ^ 2815 * | | | 2816 * lower k upper 2817 * 2818 * Invariants: 2819 * 2820 * all in (low, lower] < pivot1 2821 * pivot1 <= all in (k, upper) <= pivot2 2822 * all in [upper, end) > pivot2 2823 * 2824 * Pointer k is the last index of ?-part 2825 */ 2826 for (int unused = --lower, k = ++upper; --k > lower; ) { 2827 float ak = a[k]; 2828 2829 if (ak < pivot1) { // Move a[k] to the left side 2830 while (lower < k) { 2831 if (a[++lower] >= pivot1) { 2832 if (a[lower] > pivot2) { 2833 a[k] = a[--upper]; 2834 a[upper] = a[lower]; 2835 } else { 2836 a[k] = a[lower]; 2837 } 2838 a[lower] = ak; 2839 break; 2840 } 2841 } 2842 } else if (ak > pivot2) { // Move a[k] to the right side 2843 a[k] = a[--upper]; 2844 a[upper] = ak; 2845 } 2846 } 2847 2848 /* 2849 * Swap the pivots into their final positions. 2850 */ 2851 a[low] = a[lower]; a[lower] = pivot1; 2852 a[end] = a[upper]; a[upper] = pivot2; 2853 2854 /* 2855 * Sort non-left parts recursively (possibly in parallel), 2856 * excluding known pivots. 2857 */ 2858 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 2859 sorter.forkSorter(bits | 1, lower + 1, upper); 2860 sorter.forkSorter(bits | 1, upper + 1, high); 2861 } else { 2862 sort(sorter, a, bits | 1, lower + 1, upper); 2863 sort(sorter, a, bits | 1, upper + 1, high); 2864 } 2865 2866 } else { // Use single pivot in case of many equal elements 2867 2868 /* 2869 * Use the third of the five sorted elements as the pivot. 2870 * This value is inexpensive approximation of the median. 2871 */ 2872 float pivot = a[e3]; 2873 2874 /* 2875 * The first element to be sorted is moved to the 2876 * location formerly occupied by the pivot. After 2877 * completion of partitioning the pivot is swapped 2878 * back into its final position, and excluded from 2879 * the next subsequent sorting. 2880 */ 2881 a[e3] = a[lower]; 2882 2883 /* 2884 * Traditional 3-way (Dutch National Flag) partitioning 2885 * 2886 * left part central part right part 2887 * +------------------------------------------------------+ 2888 * | < pivot | ? | == pivot | > pivot | 2889 * +------------------------------------------------------+ 2890 * ^ ^ ^ 2891 * | | | 2892 * lower k upper 2893 * 2894 * Invariants: 2895 * 2896 * all in (low, lower] < pivot 2897 * all in (k, upper) == pivot 2898 * all in [upper, end] > pivot 2899 * 2900 * Pointer k is the last index of ?-part 2901 */ 2902 for (int k = ++upper; --k > lower; ) { 2903 float ak = a[k]; 2904 2905 if (ak != pivot) { 2906 a[k] = pivot; 2907 2908 if (ak < pivot) { // Move a[k] to the left side 2909 while (a[++lower] < pivot); 2910 2911 if (a[lower] > pivot) { 2912 a[--upper] = a[lower]; 2913 } 2914 a[lower] = ak; 2915 } else { // ak > pivot - Move a[k] to the right side 2916 a[--upper] = ak; 2917 } 2918 } 2919 } 2920 2921 /* 2922 * Swap the pivot into its final position. 2923 */ 2924 a[low] = a[lower]; a[lower] = pivot; 2925 2926 /* 2927 * Sort the right part (possibly in parallel), excluding 2928 * known pivot. All elements from the central part are 2929 * equal and therefore already sorted. 2930 */ 2931 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 2932 sorter.forkSorter(bits | 1, upper, high); 2933 } else { 2934 sort(sorter, a, bits | 1, upper, high); 2935 } 2936 } 2937 high = lower; // Iterate along the left part 2938 } 2939 } 2940 2941 /** 2942 * Sorts the specified range of the array using mixed insertion sort. 2943 * 2944 * Mixed insertion sort is combination of simple insertion sort, 2945 * pin insertion sort and pair insertion sort. 2946 * 2947 * In the context of Dual-Pivot Quicksort, the pivot element 2948 * from the left part plays the role of sentinel, because it 2949 * is less than any elements from the given part. Therefore, 2950 * expensive check of the left range can be skipped on each 2951 * iteration unless it is the leftmost call. 2952 * 2953 * @param a the array to be sorted 2954 * @param low the index of the first element, inclusive, to be sorted 2955 * @param end the index of the last element for simple insertion sort 2956 * @param high the index of the last element, exclusive, to be sorted 2957 */ 2958 private static void mixedInsertionSort(float[] a, int low, int end, int high) { 2959 if (end == high) { 2960 2961 /* 2962 * Invoke simple insertion sort on tiny array. 2963 */ 2964 for (int i; ++low < end; ) { 2965 float ai = a[i = low]; 2966 2967 while (ai < a[--i]) { 2968 a[i + 1] = a[i]; 2969 } 2970 a[i + 1] = ai; 2971 } 2972 } else { 2973 2974 /* 2975 * Start with pin insertion sort on small part. 2976 * 2977 * Pin insertion sort is extended simple insertion sort. 2978 * The main idea of this sort is to put elements larger 2979 * than an element called pin to the end of array (the 2980 * proper area for such elements). It avoids expensive 2981 * movements of these elements through the whole array. 2982 */ 2983 float pin = a[end]; 2984 2985 for (int i, p = high; ++low < end; ) { 2986 float ai = a[i = low]; 2987 2988 if (ai < a[i - 1]) { // Small element 2989 2990 /* 2991 * Insert small element into sorted part. 2992 */ 2993 a[i] = a[--i]; 2994 2995 while (ai < a[--i]) { 2996 a[i + 1] = a[i]; 2997 } 2998 a[i + 1] = ai; 2999 3000 } else if (p > i && ai > pin) { // Large element 3001 3002 /* 3003 * Find element smaller than pin. 3004 */ 3005 while (a[--p] > pin); 3006 3007 /* 3008 * Swap it with large element. 3009 */ 3010 if (p > i) { 3011 ai = a[p]; 3012 a[p] = a[i]; 3013 } 3014 3015 /* 3016 * Insert small element into sorted part. 3017 */ 3018 while (ai < a[--i]) { 3019 a[i + 1] = a[i]; 3020 } 3021 a[i + 1] = ai; 3022 } 3023 } 3024 3025 /* 3026 * Continue with pair insertion sort on remain part. 3027 */ 3028 for (int i; low < high; ++low) { 3029 float a1 = a[i = low], a2 = a[++low]; 3030 3031 /* 3032 * Insert two elements per iteration: at first, insert the 3033 * larger element and then insert the smaller element, but 3034 * from the position where the larger element was inserted. 3035 */ 3036 if (a1 > a2) { 3037 3038 while (a1 < a[--i]) { 3039 a[i + 2] = a[i]; 3040 } 3041 a[++i + 1] = a1; 3042 3043 while (a2 < a[--i]) { 3044 a[i + 1] = a[i]; 3045 } 3046 a[i + 1] = a2; 3047 3048 } else if (a1 < a[i - 1]) { 3049 3050 while (a2 < a[--i]) { 3051 a[i + 2] = a[i]; 3052 } 3053 a[++i + 1] = a2; 3054 3055 while (a1 < a[--i]) { 3056 a[i + 1] = a[i]; 3057 } 3058 a[i + 1] = a1; 3059 } 3060 } 3061 } 3062 } 3063 3064 /** 3065 * Sorts the specified range of the array using insertion sort. 3066 * 3067 * @param a the array to be sorted 3068 * @param low the index of the first element, inclusive, to be sorted 3069 * @param high the index of the last element, exclusive, to be sorted 3070 */ 3071 private static void insertionSort(float[] a, int low, int high) { 3072 for (int i, k = low; ++k < high; ) { 3073 float ai = a[i = k]; 3074 3075 if (ai < a[i - 1]) { 3076 while (--i >= low && ai < a[i]) { 3077 a[i + 1] = a[i]; 3078 } 3079 a[i + 1] = ai; 3080 } 3081 } 3082 } 3083 3084 /** 3085 * Sorts the specified range of the array using heap sort. 3086 * 3087 * @param a the array to be sorted 3088 * @param low the index of the first element, inclusive, to be sorted 3089 * @param high the index of the last element, exclusive, to be sorted 3090 */ 3091 private static void heapSort(float[] a, int low, int high) { 3092 for (int k = (low + high) >>> 1; k > low; ) { 3093 pushDown(a, --k, a[k], low, high); 3094 } 3095 while (--high > low) { 3096 float max = a[low]; 3097 pushDown(a, low, a[high], low, high); 3098 a[high] = max; 3099 } 3100 } 3101 3102 /** 3103 * Pushes specified element down during heap sort. 3104 * 3105 * @param a the given array 3106 * @param p the start index 3107 * @param value the given element 3108 * @param low the index of the first element, inclusive, to be sorted 3109 * @param high the index of the last element, exclusive, to be sorted 3110 */ 3111 private static void pushDown(float[] a, int p, float value, int low, int high) { 3112 for (int k ;; a[p] = a[p = k]) { 3113 k = (p << 1) - low + 2; // Index of the right child 3114 3115 if (k > high) { 3116 break; 3117 } 3118 if (k == high || a[k] < a[k - 1]) { 3119 --k; 3120 } 3121 if (a[k] <= value) { 3122 break; 3123 } 3124 } 3125 a[p] = value; 3126 } 3127 3128 /** 3129 * Tries to sort the specified range of the array. 3130 * 3131 * @param sorter parallel context 3132 * @param a the array to be sorted 3133 * @param low the index of the first element to be sorted 3134 * @param size the array size 3135 * @return true if finally sorted, false otherwise 3136 */ 3137 private static boolean tryMergeRuns(Sorter sorter, float[] a, int low, int size) { 3138 3139 /* 3140 * The run array is constructed only if initial runs are 3141 * long enough to continue, run[i] then holds start index 3142 * of the i-th sequence of elements in non-descending order. 3143 */ 3144 int[] run = null; 3145 int high = low + size; 3146 int count = 1, last = low; 3147 3148 /* 3149 * Identify all possible runs. 3150 */ 3151 for (int k = low + 1; k < high; ) { 3152 3153 /* 3154 * Find the end index of the current run. 3155 */ 3156 if (a[k - 1] < a[k]) { 3157 3158 // Identify ascending sequence 3159 while (++k < high && a[k - 1] <= a[k]); 3160 3161 } else if (a[k - 1] > a[k]) { 3162 3163 // Identify descending sequence 3164 while (++k < high && a[k - 1] >= a[k]); 3165 3166 // Reverse into ascending order 3167 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 3168 float ai = a[i]; a[i] = a[j]; a[j] = ai; 3169 } 3170 } else { // Identify constant sequence 3171 for (float ak = a[k]; ++k < high && ak == a[k]; ); 3172 3173 if (k < high) { 3174 continue; 3175 } 3176 } 3177 3178 /* 3179 * Check special cases. 3180 */ 3181 if (run == null) { 3182 if (k == high) { 3183 3184 /* 3185 * The array is monotonous sequence, 3186 * and therefore already sorted. 3187 */ 3188 return true; 3189 } 3190 3191 if (k - low < MIN_FIRST_RUN_SIZE) { 3192 3193 /* 3194 * The first run is too small 3195 * to proceed with scanning. 3196 */ 3197 return false; 3198 } 3199 3200 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 3201 run[0] = low; 3202 3203 } else if (a[last - 1] > a[last]) { 3204 3205 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 3206 3207 /* 3208 * The first runs are not long 3209 * enough to continue scanning. 3210 */ 3211 return false; 3212 } 3213 3214 if (++count == MAX_RUN_CAPACITY) { 3215 3216 /* 3217 * Array is not highly structured. 3218 */ 3219 return false; 3220 } 3221 3222 if (count == run.length) { 3223 3224 /* 3225 * Increase capacity of index array. 3226 */ 3227 run = Arrays.copyOf(run, count << 1); 3228 } 3229 } 3230 run[count] = (last = k); 3231 } 3232 3233 /* 3234 * Merge runs of highly structured array. 3235 */ 3236 if (count > 1) { 3237 float[] b; int offset = low; 3238 3239 if (sorter == null || (b = (float[]) sorter.b) == null) { 3240 b = new float[size]; 3241 } else { 3242 offset = sorter.offset; 3243 } 3244 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 3245 } 3246 return true; 3247 } 3248 3249 /** 3250 * Merges the specified runs. 3251 * 3252 * @param a the source array 3253 * @param b the temporary buffer used in merging 3254 * @param offset the start index in the source, inclusive 3255 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 3256 * @param parallel indicates whether merging is performed in parallel 3257 * @param run the start indexes of the runs, inclusive 3258 * @param lo the start index of the first run, inclusive 3259 * @param hi the start index of the last run, inclusive 3260 * @return the destination where runs are merged 3261 */ 3262 private static float[] mergeRuns(float[] a, float[] b, int offset, 3263 int aim, boolean parallel, int[] run, int lo, int hi) { 3264 3265 if (hi - lo == 1) { 3266 if (aim >= 0) { 3267 return a; 3268 } 3269 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 3270 b[--j] = a[--i] 3271 ); 3272 return b; 3273 } 3274 3275 /* 3276 * Split into approximately equal parts. 3277 */ 3278 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 3279 while (run[++mi + 1] <= rmi); 3280 3281 /* 3282 * Merge the left and right parts. 3283 */ 3284 float[] a1, a2; 3285 3286 if (parallel && hi - lo > MIN_RUN_COUNT) { 3287 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 3288 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 3289 a2 = (float[]) merger.getDestination(); 3290 } else { 3291 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 3292 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 3293 } 3294 3295 float[] dst = a1 == a ? b : a; 3296 3297 int k = a1 == a ? run[lo] - offset : run[lo]; 3298 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 3299 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 3300 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 3301 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 3302 3303 if (parallel) { 3304 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 3305 } else { 3306 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 3307 } 3308 return dst; 3309 } 3310 3311 /** 3312 * Merges the sorted parts. 3313 * 3314 * @param merger parallel context 3315 * @param dst the destination where parts are merged 3316 * @param k the start index of the destination, inclusive 3317 * @param a1 the first part 3318 * @param lo1 the start index of the first part, inclusive 3319 * @param hi1 the end index of the first part, exclusive 3320 * @param a2 the second part 3321 * @param lo2 the start index of the second part, inclusive 3322 * @param hi2 the end index of the second part, exclusive 3323 */ 3324 private static void mergeParts(Merger merger, float[] dst, int k, 3325 float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2) { 3326 3327 if (merger != null && a1 == a2) { 3328 3329 while (true) { 3330 3331 /* 3332 * The first part must be larger. 3333 */ 3334 if (hi1 - lo1 < hi2 - lo2) { 3335 int lo = lo1; lo1 = lo2; lo2 = lo; 3336 int hi = hi1; hi1 = hi2; hi2 = hi; 3337 } 3338 3339 /* 3340 * Small parts will be merged sequentially. 3341 */ 3342 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 3343 break; 3344 } 3345 3346 /* 3347 * Find the median of the larger part. 3348 */ 3349 int mi1 = (lo1 + hi1) >>> 1; 3350 float key = a1[mi1]; 3351 int mi2 = hi2; 3352 3353 /* 3354 * Partition the smaller part. 3355 */ 3356 for (int loo = lo2; loo < mi2; ) { 3357 int t = (loo + mi2) >>> 1; 3358 3359 if (key > a2[t]) { 3360 loo = t + 1; 3361 } else { 3362 mi2 = t; 3363 } 3364 } 3365 3366 int d = mi2 - lo2 + mi1 - lo1; 3367 3368 /* 3369 * Merge the right sub-parts in parallel. 3370 */ 3371 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 3372 3373 /* 3374 * Process the sub-left parts. 3375 */ 3376 hi1 = mi1; 3377 hi2 = mi2; 3378 } 3379 } 3380 3381 /* 3382 * Merge small parts sequentially. 3383 */ 3384 while (lo1 < hi1 && lo2 < hi2) { 3385 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 3386 } 3387 if (dst != a1 || k < lo1) { 3388 while (lo1 < hi1) { 3389 dst[k++] = a1[lo1++]; 3390 } 3391 } 3392 if (dst != a2 || k < lo2) { 3393 while (lo2 < hi2) { 3394 dst[k++] = a2[lo2++]; 3395 } 3396 } 3397 } 3398 3399 // [double] 3400 3401 /** 3402 * Sorts the specified range of the array using parallel merge 3403 * sort and/or Dual-Pivot Quicksort. 3404 * 3405 * To balance the faster splitting and parallelism of merge sort 3406 * with the faster element partitioning of Quicksort, ranges are 3407 * subdivided in tiers such that, if there is enough parallelism, 3408 * the four-way parallel merge is started, still ensuring enough 3409 * parallelism to process the partitions. 3410 * 3411 * @param a the array to be sorted 3412 * @param parallelism the parallelism level 3413 * @param low the index of the first element, inclusive, to be sorted 3414 * @param high the index of the last element, exclusive, to be sorted 3415 */ 3416 static void sort(double[] a, int parallelism, int low, int high) { 3417 /* 3418 * Phase 1. Count the number of negative zero -0.0d, 3419 * turn them into positive zero, and move all NaNs 3420 * to the end of the array. 3421 */ 3422 int numNegativeZero = 0; 3423 3424 for (int k = high; k > low; ) { 3425 double ak = a[--k]; 3426 3427 if (ak == 0.0d && Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d 3428 numNegativeZero += 1; 3429 a[k] = 0.0d; 3430 } else if (ak != ak) { // ak is NaN 3431 a[k] = a[--high]; 3432 a[high] = ak; 3433 } 3434 } 3435 3436 /* 3437 * Phase 2. Sort everything except NaNs, 3438 * which are already in place. 3439 */ 3440 int size = high - low; 3441 3442 if (parallelism > 0 && size > MIN_PARALLEL_SORT_SIZE) { 3443 int depth = getDepth(parallelism, size >> 12); 3444 double[] b = depth == 0 ? null : new double[size]; 3445 new Sorter(null, a, b, low, size, low, depth).invoke(); 3446 } else { 3447 sort(null, a, 0, low, high); 3448 } 3449 3450 /* 3451 * Phase 3. Turn positive zero 0.0d 3452 * back into negative zero -0.0d. 3453 */ 3454 if (++numNegativeZero == 1) { 3455 return; 3456 } 3457 3458 /* 3459 * Find the position one less than 3460 * the index of the first zero. 3461 */ 3462 while (low <= high) { 3463 int middle = (low + high) >>> 1; 3464 3465 if (a[middle] < 0) { 3466 low = middle + 1; 3467 } else { 3468 high = middle - 1; 3469 } 3470 } 3471 3472 /* 3473 * Replace the required number of 0.0d by -0.0d. 3474 */ 3475 while (--numNegativeZero > 0) { 3476 a[++high] = -0.0d; 3477 } 3478 } 3479 3480 /** 3481 * Sorts the specified array using the Dual-Pivot Quicksort and/or 3482 * other sorts in special-cases, possibly with parallel partitions. 3483 * 3484 * @param sorter parallel context 3485 * @param a the array to be sorted 3486 * @param bits the combination of recursion depth and bit flag, where 3487 * the right bit "0" indicates that array is the leftmost part 3488 * @param low the index of the first element, inclusive, to be sorted 3489 * @param high the index of the last element, exclusive, to be sorted 3490 */ 3491 static void sort(Sorter sorter, double[] a, int bits, int low, int high) { 3492 while (true) { 3493 int end = high - 1, size = high - low; 3494 3495 /* 3496 * Run mixed insertion sort on small non-leftmost parts. 3497 */ 3498 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 3499 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 3500 return; 3501 } 3502 3503 /* 3504 * Invoke insertion sort on small leftmost part. 3505 */ 3506 if (size < MAX_INSERTION_SORT_SIZE) { 3507 insertionSort(a, low, high); 3508 return; 3509 } 3510 3511 /* 3512 * Check if the whole array or large non-leftmost 3513 * parts are nearly sorted and then merge runs. 3514 */ 3515 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 3516 && tryMergeRuns(sorter, a, low, size)) { 3517 return; 3518 } 3519 3520 /* 3521 * Switch to heap sort if execution 3522 * time is becoming quadratic. 3523 */ 3524 if ((bits += 2) > MAX_RECURSION_DEPTH) { 3525 heapSort(a, low, high); 3526 return; 3527 } 3528 3529 /* 3530 * Use an inexpensive approximation of the golden ratio 3531 * to select five sample elements and determine pivots. 3532 */ 3533 int step = (size >> 3) * 3 + 3; 3534 3535 /* 3536 * Five elements around (and including) the central element 3537 * will be used for pivot selection as described below. The 3538 * unequal choice of spacing these elements was empirically 3539 * determined to work well on a wide variety of inputs. 3540 */ 3541 int e1 = low + step; 3542 int e5 = end - step; 3543 int e3 = (e1 + e5) >>> 1; 3544 int e2 = (e1 + e3) >>> 1; 3545 int e4 = (e3 + e5) >>> 1; 3546 double a3 = a[e3]; 3547 3548 /* 3549 * Sort these elements in place by the combination 3550 * of 4-element sorting network and insertion sort. 3551 * 3552 * 5 ------o-----------o----------- 3553 * | | 3554 * 4 ------|-----o-----o-----o----- 3555 * | | | 3556 * 2 ------o-----|-----o-----o----- 3557 * | | 3558 * 1 ------------o-----o----------- 3559 */ 3560 if (a[e5] < a[e2]) { double t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 3561 if (a[e4] < a[e1]) { double t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 3562 if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 3563 if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 3564 if (a[e4] < a[e2]) { double t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 3565 3566 if (a3 < a[e2]) { 3567 if (a3 < a[e1]) { 3568 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 3569 } else { 3570 a[e3] = a[e2]; a[e2] = a3; 3571 } 3572 } else if (a3 > a[e4]) { 3573 if (a3 > a[e5]) { 3574 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 3575 } else { 3576 a[e3] = a[e4]; a[e4] = a3; 3577 } 3578 } 3579 3580 // Pointers 3581 int lower = low; // The index of the last element of the left part 3582 int upper = end; // The index of the first element of the right part 3583 3584 /* 3585 * Partitioning with 2 pivots in case of different elements. 3586 */ 3587 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 3588 3589 /* 3590 * Use the first and fifth of the five sorted elements as 3591 * the pivots. These values are inexpensive approximation 3592 * of tertiles. Note, that pivot1 < pivot2. 3593 */ 3594 double pivot1 = a[e1]; 3595 double pivot2 = a[e5]; 3596 3597 /* 3598 * The first and the last elements to be sorted are moved 3599 * to the locations formerly occupied by the pivots. When 3600 * partitioning is completed, the pivots are swapped back 3601 * into their final positions, and excluded from the next 3602 * subsequent sorting. 3603 */ 3604 a[e1] = a[lower]; 3605 a[e5] = a[upper]; 3606 3607 /* 3608 * Skip elements, which are less or greater than the pivots. 3609 */ 3610 while (a[++lower] < pivot1); 3611 while (a[--upper] > pivot2); 3612 3613 /* 3614 * Backward 3-interval partitioning 3615 * 3616 * left part central part right part 3617 * +------------------------------------------------------------+ 3618 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 3619 * +------------------------------------------------------------+ 3620 * ^ ^ ^ 3621 * | | | 3622 * lower k upper 3623 * 3624 * Invariants: 3625 * 3626 * all in (low, lower] < pivot1 3627 * pivot1 <= all in (k, upper) <= pivot2 3628 * all in [upper, end) > pivot2 3629 * 3630 * Pointer k is the last index of ?-part 3631 */ 3632 for (int unused = --lower, k = ++upper; --k > lower; ) { 3633 double ak = a[k]; 3634 3635 if (ak < pivot1) { // Move a[k] to the left side 3636 while (lower < k) { 3637 if (a[++lower] >= pivot1) { 3638 if (a[lower] > pivot2) { 3639 a[k] = a[--upper]; 3640 a[upper] = a[lower]; 3641 } else { 3642 a[k] = a[lower]; 3643 } 3644 a[lower] = ak; 3645 break; 3646 } 3647 } 3648 } else if (ak > pivot2) { // Move a[k] to the right side 3649 a[k] = a[--upper]; 3650 a[upper] = ak; 3651 } 3652 } 3653 3654 /* 3655 * Swap the pivots into their final positions. 3656 */ 3657 a[low] = a[lower]; a[lower] = pivot1; 3658 a[end] = a[upper]; a[upper] = pivot2; 3659 3660 /* 3661 * Sort non-left parts recursively (possibly in parallel), 3662 * excluding known pivots. 3663 */ 3664 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 3665 sorter.forkSorter(bits | 1, lower + 1, upper); 3666 sorter.forkSorter(bits | 1, upper + 1, high); 3667 } else { 3668 sort(sorter, a, bits | 1, lower + 1, upper); 3669 sort(sorter, a, bits | 1, upper + 1, high); 3670 } 3671 3672 } else { // Use single pivot in case of many equal elements 3673 3674 /* 3675 * Use the third of the five sorted elements as the pivot. 3676 * This value is inexpensive approximation of the median. 3677 */ 3678 double pivot = a[e3]; 3679 3680 /* 3681 * The first element to be sorted is moved to the 3682 * location formerly occupied by the pivot. After 3683 * completion of partitioning the pivot is swapped 3684 * back into its final position, and excluded from 3685 * the next subsequent sorting. 3686 */ 3687 a[e3] = a[lower]; 3688 3689 /* 3690 * Traditional 3-way (Dutch National Flag) partitioning 3691 * 3692 * left part central part right part 3693 * +------------------------------------------------------+ 3694 * | < pivot | ? | == pivot | > pivot | 3695 * +------------------------------------------------------+ 3696 * ^ ^ ^ 3697 * | | | 3698 * lower k upper 3699 * 3700 * Invariants: 3701 * 3702 * all in (low, lower] < pivot 3703 * all in (k, upper) == pivot 3704 * all in [upper, end] > pivot 3705 * 3706 * Pointer k is the last index of ?-part 3707 */ 3708 for (int k = ++upper; --k > lower; ) { 3709 double ak = a[k]; 3710 3711 if (ak != pivot) { 3712 a[k] = pivot; 3713 3714 if (ak < pivot) { // Move a[k] to the left side 3715 while (a[++lower] < pivot); 3716 3717 if (a[lower] > pivot) { 3718 a[--upper] = a[lower]; 3719 } 3720 a[lower] = ak; 3721 } else { // ak > pivot - Move a[k] to the right side 3722 a[--upper] = ak; 3723 } 3724 } 3725 } 3726 3727 /* 3728 * Swap the pivot into its final position. 3729 */ 3730 a[low] = a[lower]; a[lower] = pivot; 3731 3732 /* 3733 * Sort the right part (possibly in parallel), excluding 3734 * known pivot. All elements from the central part are 3735 * equal and therefore already sorted. 3736 */ 3737 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 3738 sorter.forkSorter(bits | 1, upper, high); 3739 } else { 3740 sort(sorter, a, bits | 1, upper, high); 3741 } 3742 } 3743 high = lower; // Iterate along the left part 3744 } 3745 } 3746 3747 /** 3748 * Sorts the specified range of the array using mixed insertion sort. 3749 * 3750 * Mixed insertion sort is combination of simple insertion sort, 3751 * pin insertion sort and pair insertion sort. 3752 * 3753 * In the context of Dual-Pivot Quicksort, the pivot element 3754 * from the left part plays the role of sentinel, because it 3755 * is less than any elements from the given part. Therefore, 3756 * expensive check of the left range can be skipped on each 3757 * iteration unless it is the leftmost call. 3758 * 3759 * @param a the array to be sorted 3760 * @param low the index of the first element, inclusive, to be sorted 3761 * @param end the index of the last element for simple insertion sort 3762 * @param high the index of the last element, exclusive, to be sorted 3763 */ 3764 private static void mixedInsertionSort(double[] a, int low, int end, int high) { 3765 if (end == high) { 3766 3767 /* 3768 * Invoke simple insertion sort on tiny array. 3769 */ 3770 for (int i; ++low < end; ) { 3771 double ai = a[i = low]; 3772 3773 while (ai < a[--i]) { 3774 a[i + 1] = a[i]; 3775 } 3776 a[i + 1] = ai; 3777 } 3778 } else { 3779 3780 /* 3781 * Start with pin insertion sort on small part. 3782 * 3783 * Pin insertion sort is extended simple insertion sort. 3784 * The main idea of this sort is to put elements larger 3785 * than an element called pin to the end of array (the 3786 * proper area for such elements). It avoids expensive 3787 * movements of these elements through the whole array. 3788 */ 3789 double pin = a[end]; 3790 3791 for (int i, p = high; ++low < end; ) { 3792 double ai = a[i = low]; 3793 3794 if (ai < a[i - 1]) { // Small element 3795 3796 /* 3797 * Insert small element into sorted part. 3798 */ 3799 a[i] = a[--i]; 3800 3801 while (ai < a[--i]) { 3802 a[i + 1] = a[i]; 3803 } 3804 a[i + 1] = ai; 3805 3806 } else if (p > i && ai > pin) { // Large element 3807 3808 /* 3809 * Find element smaller than pin. 3810 */ 3811 while (a[--p] > pin); 3812 3813 /* 3814 * Swap it with large element. 3815 */ 3816 if (p > i) { 3817 ai = a[p]; 3818 a[p] = a[i]; 3819 } 3820 3821 /* 3822 * Insert small element into sorted part. 3823 */ 3824 while (ai < a[--i]) { 3825 a[i + 1] = a[i]; 3826 } 3827 a[i + 1] = ai; 3828 } 3829 } 3830 3831 /* 3832 * Continue with pair insertion sort on remain part. 3833 */ 3834 for (int i; low < high; ++low) { 3835 double a1 = a[i = low], a2 = a[++low]; 3836 3837 /* 3838 * Insert two elements per iteration: at first, insert the 3839 * larger element and then insert the smaller element, but 3840 * from the position where the larger element was inserted. 3841 */ 3842 if (a1 > a2) { 3843 3844 while (a1 < a[--i]) { 3845 a[i + 2] = a[i]; 3846 } 3847 a[++i + 1] = a1; 3848 3849 while (a2 < a[--i]) { 3850 a[i + 1] = a[i]; 3851 } 3852 a[i + 1] = a2; 3853 3854 } else if (a1 < a[i - 1]) { 3855 3856 while (a2 < a[--i]) { 3857 a[i + 2] = a[i]; 3858 } 3859 a[++i + 1] = a2; 3860 3861 while (a1 < a[--i]) { 3862 a[i + 1] = a[i]; 3863 } 3864 a[i + 1] = a1; 3865 } 3866 } 3867 } 3868 } 3869 3870 /** 3871 * Sorts the specified range of the array using insertion sort. 3872 * 3873 * @param a the array to be sorted 3874 * @param low the index of the first element, inclusive, to be sorted 3875 * @param high the index of the last element, exclusive, to be sorted 3876 */ 3877 private static void insertionSort(double[] a, int low, int high) { 3878 for (int i, k = low; ++k < high; ) { 3879 double ai = a[i = k]; 3880 3881 if (ai < a[i - 1]) { 3882 while (--i >= low && ai < a[i]) { 3883 a[i + 1] = a[i]; 3884 } 3885 a[i + 1] = ai; 3886 } 3887 } 3888 } 3889 3890 /** 3891 * Sorts the specified range of the array using heap sort. 3892 * 3893 * @param a the array to be sorted 3894 * @param low the index of the first element, inclusive, to be sorted 3895 * @param high the index of the last element, exclusive, to be sorted 3896 */ 3897 private static void heapSort(double[] a, int low, int high) { 3898 for (int k = (low + high) >>> 1; k > low; ) { 3899 pushDown(a, --k, a[k], low, high); 3900 } 3901 while (--high > low) { 3902 double max = a[low]; 3903 pushDown(a, low, a[high], low, high); 3904 a[high] = max; 3905 } 3906 } 3907 3908 /** 3909 * Pushes specified element down during heap sort. 3910 * 3911 * @param a the given array 3912 * @param p the start index 3913 * @param value the given element 3914 * @param low the index of the first element, inclusive, to be sorted 3915 * @param high the index of the last element, exclusive, to be sorted 3916 */ 3917 private static void pushDown(double[] a, int p, double value, int low, int high) { 3918 for (int k ;; a[p] = a[p = k]) { 3919 k = (p << 1) - low + 2; // Index of the right child 3920 3921 if (k > high) { 3922 break; 3923 } 3924 if (k == high || a[k] < a[k - 1]) { 3925 --k; 3926 } 3927 if (a[k] <= value) { 3928 break; 3929 } 3930 } 3931 a[p] = value; 3932 } 3933 3934 /** 3935 * Tries to sort the specified range of the array. 3936 * 3937 * @param sorter parallel context 3938 * @param a the array to be sorted 3939 * @param low the index of the first element to be sorted 3940 * @param size the array size 3941 * @return true if finally sorted, false otherwise 3942 */ 3943 private static boolean tryMergeRuns(Sorter sorter, double[] a, int low, int size) { 3944 3945 /* 3946 * The run array is constructed only if initial runs are 3947 * long enough to continue, run[i] then holds start index 3948 * of the i-th sequence of elements in non-descending order. 3949 */ 3950 int[] run = null; 3951 int high = low + size; 3952 int count = 1, last = low; 3953 3954 /* 3955 * Identify all possible runs. 3956 */ 3957 for (int k = low + 1; k < high; ) { 3958 3959 /* 3960 * Find the end index of the current run. 3961 */ 3962 if (a[k - 1] < a[k]) { 3963 3964 // Identify ascending sequence 3965 while (++k < high && a[k - 1] <= a[k]); 3966 3967 } else if (a[k - 1] > a[k]) { 3968 3969 // Identify descending sequence 3970 while (++k < high && a[k - 1] >= a[k]); 3971 3972 // Reverse into ascending order 3973 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 3974 double ai = a[i]; a[i] = a[j]; a[j] = ai; 3975 } 3976 } else { // Identify constant sequence 3977 for (double ak = a[k]; ++k < high && ak == a[k]; ); 3978 3979 if (k < high) { 3980 continue; 3981 } 3982 } 3983 3984 /* 3985 * Check special cases. 3986 */ 3987 if (run == null) { 3988 if (k == high) { 3989 3990 /* 3991 * The array is monotonous sequence, 3992 * and therefore already sorted. 3993 */ 3994 return true; 3995 } 3996 3997 if (k - low < MIN_FIRST_RUN_SIZE) { 3998 3999 /* 4000 * The first run is too small 4001 * to proceed with scanning. 4002 */ 4003 return false; 4004 } 4005 4006 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 4007 run[0] = low; 4008 4009 } else if (a[last - 1] > a[last]) { 4010 4011 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 4012 4013 /* 4014 * The first runs are not long 4015 * enough to continue scanning. 4016 */ 4017 return false; 4018 } 4019 4020 if (++count == MAX_RUN_CAPACITY) { 4021 4022 /* 4023 * Array is not highly structured. 4024 */ 4025 return false; 4026 } 4027 4028 if (count == run.length) { 4029 4030 /* 4031 * Increase capacity of index array. 4032 */ 4033 run = Arrays.copyOf(run, count << 1); 4034 } 4035 } 4036 run[count] = (last = k); 4037 } 4038 4039 /* 4040 * Merge runs of highly structured array. 4041 */ 4042 if (count > 1) { 4043 double[] b; int offset = low; 4044 4045 if (sorter == null || (b = (double[]) sorter.b) == null) { 4046 b = new double[size]; 4047 } else { 4048 offset = sorter.offset; 4049 } 4050 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 4051 } 4052 return true; 4053 } 4054 4055 /** 4056 * Merges the specified runs. 4057 * 4058 * @param a the source array 4059 * @param b the temporary buffer used in merging 4060 * @param offset the start index in the source, inclusive 4061 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 4062 * @param parallel indicates whether merging is performed in parallel 4063 * @param run the start indexes of the runs, inclusive 4064 * @param lo the start index of the first run, inclusive 4065 * @param hi the start index of the last run, inclusive 4066 * @return the destination where runs are merged 4067 */ 4068 private static double[] mergeRuns(double[] a, double[] b, int offset, 4069 int aim, boolean parallel, int[] run, int lo, int hi) { 4070 4071 if (hi - lo == 1) { 4072 if (aim >= 0) { 4073 return a; 4074 } 4075 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 4076 b[--j] = a[--i] 4077 ); 4078 return b; 4079 } 4080 4081 /* 4082 * Split into approximately equal parts. 4083 */ 4084 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 4085 while (run[++mi + 1] <= rmi); 4086 4087 /* 4088 * Merge the left and right parts. 4089 */ 4090 double[] a1, a2; 4091 4092 if (parallel && hi - lo > MIN_RUN_COUNT) { 4093 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 4094 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 4095 a2 = (double[]) merger.getDestination(); 4096 } else { 4097 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 4098 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 4099 } 4100 4101 double[] dst = a1 == a ? b : a; 4102 4103 int k = a1 == a ? run[lo] - offset : run[lo]; 4104 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 4105 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 4106 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 4107 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 4108 4109 if (parallel) { 4110 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 4111 } else { 4112 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 4113 } 4114 return dst; 4115 } 4116 4117 /** 4118 * Merges the sorted parts. 4119 * 4120 * @param merger parallel context 4121 * @param dst the destination where parts are merged 4122 * @param k the start index of the destination, inclusive 4123 * @param a1 the first part 4124 * @param lo1 the start index of the first part, inclusive 4125 * @param hi1 the end index of the first part, exclusive 4126 * @param a2 the second part 4127 * @param lo2 the start index of the second part, inclusive 4128 * @param hi2 the end index of the second part, exclusive 4129 */ 4130 private static void mergeParts(Merger merger, double[] dst, int k, 4131 double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2) { 4132 4133 if (merger != null && a1 == a2) { 4134 4135 while (true) { 4136 4137 /* 4138 * The first part must be larger. 4139 */ 4140 if (hi1 - lo1 < hi2 - lo2) { 4141 int lo = lo1; lo1 = lo2; lo2 = lo; 4142 int hi = hi1; hi1 = hi2; hi2 = hi; 4143 } 4144 4145 /* 4146 * Small parts will be merged sequentially. 4147 */ 4148 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 4149 break; 4150 } 4151 4152 /* 4153 * Find the median of the larger part. 4154 */ 4155 int mi1 = (lo1 + hi1) >>> 1; 4156 double key = a1[mi1]; 4157 int mi2 = hi2; 4158 4159 /* 4160 * Partition the smaller part. 4161 */ 4162 for (int loo = lo2; loo < mi2; ) { 4163 int t = (loo + mi2) >>> 1; 4164 4165 if (key > a2[t]) { 4166 loo = t + 1; 4167 } else { 4168 mi2 = t; 4169 } 4170 } 4171 4172 int d = mi2 - lo2 + mi1 - lo1; 4173 4174 /* 4175 * Merge the right sub-parts in parallel. 4176 */ 4177 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 4178 4179 /* 4180 * Process the sub-left parts. 4181 */ 4182 hi1 = mi1; 4183 hi2 = mi2; 4184 } 4185 } 4186 4187 /* 4188 * Merge small parts sequentially. 4189 */ 4190 while (lo1 < hi1 && lo2 < hi2) { 4191 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 4192 } 4193 if (dst != a1 || k < lo1) { 4194 while (lo1 < hi1) { 4195 dst[k++] = a1[lo1++]; 4196 } 4197 } 4198 if (dst != a2 || k < lo2) { 4199 while (lo2 < hi2) { 4200 dst[k++] = a2[lo2++]; 4201 } 4202 } 4203 } 4204 4205 // [class] 4206 4207 /** 4208 * This class implements parallel sorting. 4209 */ 4210 private static final class Sorter extends CountedCompleter<Void> { 4211 private static final long serialVersionUID = 20180818L; 4212 private final Object a, b; 4213 private final int low, size, offset, depth; 4214 4215 private Sorter(CountedCompleter<?> parent, 4216 Object a, Object b, int low, int size, int offset, int depth) { 4217 super(parent); 4218 this.a = a; 4219 this.b = b; 4220 this.low = low; 4221 this.size = size; 4222 this.offset = offset; 4223 this.depth = depth; 4224 } 4225 4226 @Override 4227 public final void compute() { 4228 if (depth < 0) { 4229 setPendingCount(2); 4230 int half = size >> 1; 4231 new Sorter(this, b, a, low, half, offset, depth + 1).fork(); 4232 new Sorter(this, b, a, low + half, size - half, offset, depth + 1).compute(); 4233 } else { 4234 if (a instanceof int[]) { 4235 sort(this, (int[]) a, depth, low, low + size); 4236 } else if (a instanceof long[]) { 4237 sort(this, (long[]) a, depth, low, low + size); 4238 } else if (a instanceof float[]) { 4239 sort(this, (float[]) a, depth, low, low + size); 4240 } else if (a instanceof double[]) { 4241 sort(this, (double[]) a, depth, low, low + size); 4242 } else { 4243 throw new IllegalArgumentException( 4244 "Unknown type of array: " + a.getClass().getName()); 4245 } 4246 } 4247 tryComplete(); 4248 } 4249 4250 @Override 4251 public final void onCompletion(CountedCompleter<?> caller) { 4252 if (depth < 0) { 4253 int mi = low + (size >> 1); 4254 boolean src = (depth & 1) == 0; 4255 4256 new Merger(null, 4257 a, 4258 src ? low : low - offset, 4259 b, 4260 src ? low - offset : low, 4261 src ? mi - offset : mi, 4262 b, 4263 src ? mi - offset : mi, 4264 src ? low + size - offset : low + size 4265 ).invoke(); 4266 } 4267 } 4268 4269 private void forkSorter(int depth, int low, int high) { 4270 addToPendingCount(1); 4271 Object a = this.a; // Use local variable for performance 4272 new Sorter(this, a, b, low, high - low, offset, depth).fork(); 4273 } 4274 } 4275 4276 /** 4277 * This class implements parallel merging. 4278 */ 4279 private static final class Merger extends CountedCompleter<Void> { 4280 private static final long serialVersionUID = 20180818L; 4281 private final Object dst, a1, a2; 4282 private final int k, lo1, hi1, lo2, hi2; 4283 4284 private Merger(CountedCompleter<?> parent, Object dst, int k, 4285 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { 4286 super(parent); 4287 this.dst = dst; 4288 this.k = k; 4289 this.a1 = a1; 4290 this.lo1 = lo1; 4291 this.hi1 = hi1; 4292 this.a2 = a2; 4293 this.lo2 = lo2; 4294 this.hi2 = hi2; 4295 } 4296 4297 @Override 4298 public final void compute() { 4299 if (dst instanceof int[]) { 4300 mergeParts(this, (int[]) dst, k, 4301 (int[]) a1, lo1, hi1, (int[]) a2, lo2, hi2); 4302 } else if (dst instanceof long[]) { 4303 mergeParts(this, (long[]) dst, k, 4304 (long[]) a1, lo1, hi1, (long[]) a2, lo2, hi2); 4305 } else if (dst instanceof float[]) { 4306 mergeParts(this, (float[]) dst, k, 4307 (float[]) a1, lo1, hi1, (float[]) a2, lo2, hi2); 4308 } else if (dst instanceof double[]) { 4309 mergeParts(this, (double[]) dst, k, 4310 (double[]) a1, lo1, hi1, (double[]) a2, lo2, hi2); 4311 } else { 4312 throw new IllegalArgumentException( 4313 "Unknown type of array: " + dst.getClass().getName()); 4314 } 4315 propagateCompletion(); 4316 } 4317 4318 private void forkMerger(Object dst, int k, 4319 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { 4320 addToPendingCount(1); 4321 new Merger(this, dst, k, a1, lo1, hi1, a2, lo2, hi2).fork(); 4322 } 4323 } 4324 4325 /** 4326 * This class implements parallel merging of runs. 4327 */ 4328 private static final class RunMerger extends RecursiveTask<Object> { 4329 private static final long serialVersionUID = 20180818L; 4330 private final Object a, b; 4331 private final int[] run; 4332 private final int offset, aim, lo, hi; 4333 4334 private RunMerger(Object a, Object b, int offset, 4335 int aim, int[] run, int lo, int hi) { 4336 this.a = a; 4337 this.b = b; 4338 this.offset = offset; 4339 this.aim = aim; 4340 this.run = run; 4341 this.lo = lo; 4342 this.hi = hi; 4343 } 4344 4345 @Override 4346 protected final Object compute() { 4347 if (a instanceof int[]) { 4348 return mergeRuns((int[]) a, (int[]) b, offset, aim, true, run, lo, hi); 4349 } 4350 if (a instanceof long[]) { 4351 return mergeRuns((long[]) a, (long[]) b, offset, aim, true, run, lo, hi); 4352 } 4353 if (a instanceof float[]) { 4354 return mergeRuns((float[]) a, (float[]) b, offset, aim, true, run, lo, hi); 4355 } 4356 if (a instanceof double[]) { 4357 return mergeRuns((double[]) a, (double[]) b, offset, aim, true, run, lo, hi); 4358 } 4359 throw new IllegalArgumentException( 4360 "Unknown type of array: " + a.getClass().getName()); 4361 } 4362 4363 private RunMerger forkMe() { 4364 fork(); 4365 return this; 4366 } 4367 4368 private Object getDestination() { 4369 join(); 4370 return getRawResult(); 4371 } 4372 } 4373 }