1 /* 2 * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.util.concurrent.CountedCompleter; 29 import java.util.concurrent.RecursiveTask; 30 31 /** 32 * This class implements powerful and fully optimized versions, both 33 * sequential and parallel, of the Dual-Pivot Quicksort algorithm by 34 * Vladimir Yaroslavskiy, Jon Bentley and Josh Bloch. This algorithm 35 * offers O(n log(n)) performance on all data sets, and is typically 36 * faster than traditional (one-pivot) Quicksort implementations. 37 * 38 * There are also additional algorithms, invoked from the Dual-Pivot 39 * Quicksort, such as mixed insertion sort, merging of runs and heap 40 * sort, counting sort and parallel merge sort. 41 * 42 * @author Vladimir Yaroslavskiy 43 * @author Jon Bentley 44 * @author Josh Bloch 45 * @author Doug Lea 46 * 47 * @version 2018.08.18 48 * 49 * @since 1.7 * 14 50 */ 51 final class DualPivotQuicksort { 52 53 /** 54 * Prevents instantiation. 55 */ 56 private DualPivotQuicksort() {} 57 58 /** 59 * Max array size to use mixed insertion sort. 60 */ 61 private static final int MAX_MIXED_INSERTION_SORT_SIZE = 114; 62 63 /** 64 * Max array size to use insertion sort. 65 */ 66 private static final int MAX_INSERTION_SORT_SIZE = 41; 67 68 /** 69 * Min array size to perform sorting in parallel. 70 */ 71 private static final int MIN_PARALLEL_SORT_SIZE = 4 << 10; 72 73 /** 74 * Min array size to try merging of runs. 75 */ 76 private static final int MIN_TRY_MERGE_SIZE = 4 << 10; 77 78 /** 79 * Min size of the first run to continue with scanning. 80 */ 81 private static final int MIN_FIRST_RUN_SIZE = 16; 82 83 /** 84 * Min factor for the first runs to continue scanning. 85 */ 86 private static final int MIN_FIRST_RUNS_FACTOR = 7; 87 88 /** 89 * Max capacity of the index array for tracking runs. 90 */ 91 private static final int MAX_RUN_CAPACITY = 5 << 10; 92 93 /** 94 * Min number of runs, required by parallel merging. 95 */ 96 private static final int MIN_RUN_COUNT = 4; 97 98 /** 99 * Min array size to use parallel merging of parts. 100 */ 101 private static final int MIN_PARALLEL_MERGE_PARTS_SIZE = 4 << 10; 102 103 /** 104 * Min size of a byte array to use counting sort. 105 */ 106 private static final int MIN_BYTE_COUNTING_SORT_SIZE = 64; 107 108 /** 109 * Min size of a short or char array to use counting sort. 110 */ 111 private static final int MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE = 1750; 112 113 /** 114 * Max double recursive partitioning depth before using heap sort. 115 */ 116 private static final int MAX_RECURSION_DEPTH = 64 << 1; 117 118 /** 119 * Calculates the double depth of parallel merging. 120 * Depth is negative, if tasks split before sorting. 121 * 122 * @param parallelism the parallelism level 123 * @param size the target size 124 * @return the depth of parallel merging 125 */ 126 private static int getDepth(int parallelism, int size) { 127 int depth = 0; 128 129 while ((parallelism >>= 3) > 0 && (size >>= 2) > 0) { 130 depth -= 2; 131 } 132 return depth; 133 } 134 135 /** 136 * Sorts the specified range of the array using parallel merge 137 * sort and/or Dual-Pivot Quicksort. 138 * 139 * To balance the faster splitting and parallelism of merge sort 140 * with the faster element partitioning of Quicksort, ranges are 141 * subdivided in tiers such that, if there is enough parallelism, 142 * the four-way parallel merge is started, still ensuring enough 143 * parallelism to process the partitions. 144 * 145 * @param a the array to be sorted 146 * @param parallelism the parallelism level 147 * @param low the index of the first element, inclusive, to be sorted 148 * @param high the index of the last element, exclusive, to be sorted 149 */ 150 static void sort(int[] a, int parallelism, int low, int high) { 151 int size = high - low; 152 153 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { 154 int depth = getDepth(parallelism, size >> 12); 155 int[] b = depth == 0 ? null : new int[size]; 156 new Sorter(null, a, b, low, size, low, depth).invoke(); 157 } else { 158 sort(null, a, 0, low, high); 159 } 160 } 161 162 /** 163 * Sorts the specified array using the Dual-Pivot Quicksort and/or 164 * other sorts in special-cases, possibly with parallel partitions. 165 * 166 * @param sorter parallel context 167 * @param a the array to be sorted 168 * @param bits the combination of recursion depth and bit flag, where 169 * the right bit "0" indicates that array is the leftmost part 170 * @param low the index of the first element, inclusive, to be sorted 171 * @param high the index of the last element, exclusive, to be sorted 172 */ 173 static void sort(Sorter sorter, int[] a, int bits, int low, int high) { 174 while (true) { 175 int end = high - 1, size = high - low; 176 177 /* 178 * Run mixed insertion sort on small non-leftmost parts. 179 */ 180 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 181 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 182 return; 183 } 184 185 /* 186 * Invoke insertion sort on small leftmost part. 187 */ 188 if (size < MAX_INSERTION_SORT_SIZE) { 189 insertionSort(a, low, high); 190 return; 191 } 192 193 /* 194 * Check if the whole array or large non-leftmost 195 * parts are nearly sorted and then merge runs. 196 */ 197 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 198 && tryMergeRuns(sorter, a, low, size)) { 199 return; 200 } 201 202 /* 203 * Switch to heap sort if execution 204 * time is becoming quadratic. 205 */ 206 if ((bits += 2) > MAX_RECURSION_DEPTH) { 207 heapSort(a, low, high); 208 return; 209 } 210 211 /* 212 * Use an inexpensive approximation of the golden ratio 213 * to select five sample elements and determine pivots. 214 */ 215 int step = (size >> 3) * 3 + 3; 216 217 /* 218 * Five elements around (and including) the central element 219 * will be used for pivot selection as described below. The 220 * unequal choice of spacing these elements was empirically 221 * determined to work well on a wide variety of inputs. 222 */ 223 int e1 = low + step; 224 int e5 = end - step; 225 int e3 = (e1 + e5) >>> 1; 226 int e2 = (e1 + e3) >>> 1; 227 int e4 = (e3 + e5) >>> 1; 228 int a3 = a[e3]; 229 230 /* 231 * Sort these elements in place by the combination 232 * of 4-element sorting network and insertion sort. 233 * 234 * 5 ------o-----------o------------ 235 * | | 236 * 4 ------|-----o-----o-----o------ 237 * | | | 238 * 2 ------o-----|-----o-----o------ 239 * | | 240 * 1 ------------o-----o------------ 241 */ 242 if (a[e5] < a[e2]) { int t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 243 if (a[e4] < a[e1]) { int t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 244 if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 245 if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 246 if (a[e4] < a[e2]) { int t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 247 248 if (a3 < a[e2]) { 249 if (a3 < a[e1]) { 250 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 251 } else { 252 a[e3] = a[e2]; a[e2] = a3; 253 } 254 } else if (a3 > a[e4]) { 255 if (a3 > a[e5]) { 256 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 257 } else { 258 a[e3] = a[e4]; a[e4] = a3; 259 } 260 } 261 262 // Pointers 263 int lower = low; // The index of the last element of the left part 264 int upper = end; // The index of the first element of the right part 265 266 /* 267 * Partitioning with 2 pivots in case of different elements. 268 */ 269 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 270 271 /* 272 * Use the first and fifth of the five sorted elements as 273 * the pivots. These values are inexpensive approximation 274 * of tertiles. Note, that pivot1 < pivot2. 275 */ 276 int pivot1 = a[e1]; 277 int pivot2 = a[e5]; 278 279 /* 280 * The first and the last elements to be sorted are moved 281 * to the locations formerly occupied by the pivots. When 282 * partitioning is completed, the pivots are swapped back 283 * into their final positions, and excluded from the next 284 * subsequent sorting. 285 */ 286 a[e1] = a[lower]; 287 a[e5] = a[upper]; 288 289 /* 290 * Skip elements, which are less or greater than the pivots. 291 */ 292 while (a[++lower] < pivot1); 293 while (a[--upper] > pivot2); 294 295 /* 296 * Backward 3-interval partitioning 297 * 298 * left part central part right part 299 * +------------------------------------------------------------+ 300 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 301 * +------------------------------------------------------------+ 302 * ^ ^ ^ 303 * | | | 304 * lower k upper 305 * 306 * Invariants: 307 * 308 * all in (low, lower] < pivot1 309 * pivot1 <= all in (k, upper) <= pivot2 310 * all in [upper, end) > pivot2 311 * 312 * Pointer k is the last index of ?-part 313 */ 314 for (int unused = --lower, k = ++upper; --k > lower; ) { 315 int ak = a[k]; 316 317 if (ak < pivot1) { // Move a[k] to the left side 318 while (lower < k) { 319 if (a[++lower] >= pivot1) { 320 if (a[lower] > pivot2) { 321 a[k] = a[--upper]; 322 a[upper] = a[lower]; 323 } else { 324 a[k] = a[lower]; 325 } 326 a[lower] = ak; 327 break; 328 } 329 } 330 } else if (ak > pivot2) { // Move a[k] to the right side 331 a[k] = a[--upper]; 332 a[upper] = ak; 333 } 334 } 335 336 /* 337 * Swap the pivots into their final positions. 338 */ 339 a[low] = a[lower]; a[lower] = pivot1; 340 a[end] = a[upper]; a[upper] = pivot2; 341 342 /* 343 * Sort non-left parts recursively (possibly in parallel), 344 * excluding known pivots. 345 */ 346 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 347 sorter.forkSorter(bits | 1, lower + 1, upper); 348 sorter.forkSorter(bits | 1, upper + 1, high); 349 } else { 350 sort(sorter, a, bits | 1, lower + 1, upper); 351 sort(sorter, a, bits | 1, upper + 1, high); 352 } 353 354 } else { // Use single pivot in case of many equal elements 355 356 /* 357 * Use the third of the five sorted elements as the pivot. 358 * This value is inexpensive approximation of the median. 359 */ 360 int pivot = a[e3]; 361 362 /* 363 * The first element to be sorted is moved to the 364 * location formerly occupied by the pivot. After 365 * completion of partitioning the pivot is swapped 366 * back into its final position, and excluded from 367 * the next subsequent sorting. 368 */ 369 a[e3] = a[lower]; 370 371 /* 372 * Traditional 3-way (Dutch National Flag) partitioning 373 * 374 * left part central part right part 375 * +------------------------------------------------------+ 376 * | < pivot | ? | == pivot | > pivot | 377 * +------------------------------------------------------+ 378 * ^ ^ ^ 379 * | | | 380 * lower k upper 381 * 382 * Invariants: 383 * 384 * all in (low, lower] < pivot 385 * all in (k, upper) == pivot 386 * all in [upper, end] > pivot 387 * 388 * Pointer k is the last index of ?-part 389 */ 390 for (int k = ++upper; --k > lower; ) { 391 int ak = a[k]; 392 393 if (ak != pivot) { 394 a[k] = pivot; 395 396 if (ak < pivot) { // Move a[k] to the left side 397 while (a[++lower] < pivot); 398 399 if (a[lower] > pivot) { 400 a[--upper] = a[lower]; 401 } 402 a[lower] = ak; 403 } else { // ak > pivot - Move a[k] to the right side 404 a[--upper] = ak; 405 } 406 } 407 } 408 409 /* 410 * Swap the pivot into its final position. 411 */ 412 a[low] = a[lower]; a[lower] = pivot; 413 414 /* 415 * Sort the right part (possibly in parallel), excluding 416 * known pivot. All elements from the central part are 417 * equal and therefore already sorted. 418 */ 419 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 420 sorter.forkSorter(bits | 1, upper, high); 421 } else { 422 sort(sorter, a, bits | 1, upper, high); 423 } 424 } 425 high = lower; // Iterate along the left part 426 } 427 } 428 429 /** 430 * Sorts the specified range of the array using mixed insertion sort. 431 * 432 * Mixed insertion sort is combination of simple insertion sort, 433 * pin insertion sort and pair insertion sort. 434 * 435 * In the context of Dual-Pivot Quicksort, the pivot element 436 * from the left part plays the role of sentinel, because it 437 * is less than any elements from the given part. Therefore, 438 * expensive check of the left range can be skipped on each 439 * iteration unless it is the leftmost call. 440 * 441 * @param a the array to be sorted 442 * @param low the index of the first element, inclusive, to be sorted 443 * @param end the index of the last element for simple insertion sort 444 * @param high the index of the last element, exclusive, to be sorted 445 */ 446 private static void mixedInsertionSort(int[] a, int low, int end, int high) { 447 if (end == high) { 448 449 /* 450 * Invoke simple insertion sort on tiny array. 451 */ 452 for (int i; ++low < end; ) { 453 int ai = a[i = low]; 454 455 while (ai < a[--i]) { 456 a[i + 1] = a[i]; 457 } 458 a[i + 1] = ai; 459 } 460 } else { 461 462 /* 463 * Start with pin insertion sort on small part. 464 * 465 * Pin insertion sort is extended simple insertion sort. 466 * The main idea of this sort is to put elements larger 467 * than an element called pin to the end of array (the 468 * proper area for such elements). It avoids expensive 469 * movements of these elements through the whole array. 470 */ 471 int pin = a[end]; 472 473 for (int i, p = high; ++low < end; ) { 474 int ai = a[i = low]; 475 476 if (ai < a[i - 1]) { // Small element 477 478 /* 479 * Insert small element into sorted part. 480 */ 481 a[i] = a[--i]; 482 483 while (ai < a[--i]) { 484 a[i + 1] = a[i]; 485 } 486 a[i + 1] = ai; 487 488 } else if (p > i && ai > pin) { // Large element 489 490 /* 491 * Find element smaller than pin. 492 */ 493 while (a[--p] > pin); 494 495 /* 496 * Swap it with large element. 497 */ 498 if (p > i) { 499 ai = a[p]; 500 a[p] = a[i]; 501 } 502 503 /* 504 * Insert small element into sorted part. 505 */ 506 while (ai < a[--i]) { 507 a[i + 1] = a[i]; 508 } 509 a[i + 1] = ai; 510 } 511 } 512 513 /* 514 * Continue with pair insertion sort on remain part. 515 */ 516 for (int i; low < high; ++low) { 517 int a1 = a[i = low], a2 = a[++low]; 518 519 /* 520 * Insert two elements per iteration: at first, insert the 521 * larger element and then insert the smaller element, but 522 * from the position where the larger element was inserted. 523 */ 524 if (a1 > a2) { 525 526 while (a1 < a[--i]) { 527 a[i + 2] = a[i]; 528 } 529 a[++i + 1] = a1; 530 531 while (a2 < a[--i]) { 532 a[i + 1] = a[i]; 533 } 534 a[i + 1] = a2; 535 536 } else if (a1 < a[i - 1]) { 537 538 while (a2 < a[--i]) { 539 a[i + 2] = a[i]; 540 } 541 a[++i + 1] = a2; 542 543 while (a1 < a[--i]) { 544 a[i + 1] = a[i]; 545 } 546 a[i + 1] = a1; 547 } 548 } 549 } 550 } 551 552 /** 553 * Sorts the specified range of the array using insertion sort. 554 * 555 * @param a the array to be sorted 556 * @param low the index of the first element, inclusive, to be sorted 557 * @param high the index of the last element, exclusive, to be sorted 558 */ 559 private static void insertionSort(int[] a, int low, int high) { 560 for (int i, k = low; ++k < high; ) { 561 int ai = a[i = k]; 562 563 if (ai < a[i - 1]) { 564 while (--i >= low && ai < a[i]) { 565 a[i + 1] = a[i]; 566 } 567 a[i + 1] = ai; 568 } 569 } 570 } 571 572 /** 573 * Sorts the specified range of the array using heap sort. 574 * 575 * @param a the array to be sorted 576 * @param low the index of the first element, inclusive, to be sorted 577 * @param high the index of the last element, exclusive, to be sorted 578 */ 579 private static void heapSort(int[] a, int low, int high) { 580 for (int k = (low + high) >>> 1; k > low; ) { 581 pushDown(a, --k, a[k], low, high); 582 } 583 while (--high > low) { 584 int max = a[low]; 585 pushDown(a, low, a[high], low, high); 586 a[high] = max; 587 } 588 } 589 590 /** 591 * Pushes specified element down during heap sort. 592 * 593 * @param a the given array 594 * @param p the start index 595 * @param value the given element 596 * @param low the index of the first element, inclusive, to be sorted 597 * @param high the index of the last element, exclusive, to be sorted 598 */ 599 private static void pushDown(int[] a, int p, int value, int low, int high) { 600 for (int k ;; a[p] = a[p = k]) { 601 k = (p << 1) - low + 2; // Index of the right child 602 603 if (k > high) { 604 break; 605 } 606 if (k == high || a[k] < a[k - 1]) { 607 --k; 608 } 609 if (a[k] <= value) { 610 break; 611 } 612 } 613 a[p] = value; 614 } 615 616 /** 617 * Tries to sort the specified range of the array. 618 * 619 * @param sorter parallel context 620 * @param a the array to be sorted 621 * @param low the index of the first element to be sorted 622 * @param size the array size 623 * @return true if finally sorted, false otherwise 624 */ 625 private static boolean tryMergeRuns(Sorter sorter, int[] a, int low, int size) { 626 627 /* 628 * The run array is constructed only if initial runs are 629 * long enough to continue, run[i] then holds start index 630 * of the i-th sequence of elements in non-descending order. 631 */ 632 int[] run = null; 633 int high = low + size; 634 int count = 1, last = low; 635 636 /* 637 * Identify all possible runs. 638 */ 639 for (int k = low + 1; k < high; ) { 640 641 /* 642 * Find the end index of the current run. 643 */ 644 if (a[k - 1] < a[k]) { 645 646 // Identify ascending sequence 647 while (++k < high && a[k - 1] <= a[k]); 648 649 } else if (a[k - 1] > a[k]) { 650 651 // Identify descending sequence 652 while (++k < high && a[k - 1] >= a[k]); 653 654 // Reverse into ascending order 655 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 656 int ai = a[i]; a[i] = a[j]; a[j] = ai; 657 } 658 } else { // Identify constant sequence 659 for (int ak = a[k]; ++k < high && ak == a[k]; ); 660 661 if (k < high) { 662 continue; 663 } 664 } 665 666 /* 667 * Check special cases. 668 */ 669 if (run == null) { 670 if (k == high) { 671 672 /* 673 * The array is monotonous sequence, 674 * and therefore already sorted. 675 */ 676 return true; 677 } 678 679 if (k - low < MIN_FIRST_RUN_SIZE) { 680 681 /* 682 * The first run is too small 683 * to proceed with scanning. 684 */ 685 return false; 686 } 687 688 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 689 run[0] = low; 690 691 } else if (a[last - 1] > a[last]) { 692 693 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 694 695 /* 696 * The first runs are not long 697 * enough to continue scanning. 698 */ 699 return false; 700 } 701 702 if (++count == MAX_RUN_CAPACITY) { 703 704 /* 705 * Array is not highly structured. 706 */ 707 return false; 708 } 709 710 if (count == run.length) { 711 712 /* 713 * Increase capacity of index array. 714 */ 715 run = Arrays.copyOf(run, count << 1); 716 } 717 } 718 run[count] = (last = k); 719 } 720 721 /* 722 * Merge runs of highly structured array. 723 */ 724 if (count > 1) { 725 int[] b; int offset = low; 726 727 if (sorter == null || (b = (int[]) sorter.b) == null) { 728 b = new int[size]; 729 } else { 730 offset = sorter.offset; 731 } 732 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 733 } 734 return true; 735 } 736 737 /** 738 * Merges the specified runs. 739 * 740 * @param a the source array 741 * @param b the temporary buffer used in merging 742 * @param offset the start index in the source, inclusive 743 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 744 * @param parallel indicates whether merging is performed in parallel 745 * @param run the start indexes of the runs, inclusive 746 * @param lo the start index of the first run, inclusive 747 * @param hi the start index of the last run, inclusive 748 * @return the destination where runs are merged 749 */ 750 private static int[] mergeRuns(int[] a, int[] b, int offset, 751 int aim, boolean parallel, int[] run, int lo, int hi) { 752 753 if (hi - lo == 1) { 754 if (aim >= 0) { 755 return a; 756 } 757 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 758 b[--j] = a[--i] 759 ); 760 return b; 761 } 762 763 /* 764 * Split into approximately equal parts. 765 */ 766 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 767 while (run[++mi + 1] <= rmi); 768 769 /* 770 * Merge the left and right parts. 771 */ 772 int[] a1, a2; 773 774 if (parallel && hi - lo > MIN_RUN_COUNT) { 775 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 776 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 777 a2 = (int[]) merger.getDestination(); 778 } else { 779 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 780 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 781 } 782 783 int[] dst = a1 == a ? b : a; 784 785 int k = a1 == a ? run[lo] - offset : run[lo]; 786 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 787 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 788 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 789 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 790 791 if (parallel) { 792 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 793 } else { 794 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 795 } 796 return dst; 797 } 798 799 /** 800 * Merges the sorted parts. 801 * 802 * @param merger parallel context 803 * @param dst the destination where parts are merged 804 * @param k the start index of the destination, inclusive 805 * @param a1 the first part 806 * @param lo1 the start index of the first part, inclusive 807 * @param hi1 the end index of the first part, exclusive 808 * @param a2 the second part 809 * @param lo2 the start index of the second part, inclusive 810 * @param hi2 the end index of the second part, exclusive 811 */ 812 private static void mergeParts(Merger merger, int[] dst, int k, 813 int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2) { 814 815 if (merger != null && a1 == a2) { 816 817 while (true) { 818 819 /* 820 * The first part must be larger. 821 */ 822 if (hi1 - lo1 < hi2 - lo2) { 823 int lo = lo1; lo1 = lo2; lo2 = lo; 824 int hi = hi1; hi1 = hi2; hi2 = hi; 825 } 826 827 /* 828 * Small parts will be merged sequentially. 829 */ 830 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 831 break; 832 } 833 834 /* 835 * Find the median of the larger part. 836 */ 837 int mi1 = (lo1 + hi1) >>> 1; 838 int key = a1[mi1]; 839 int mi2 = hi2; 840 841 /* 842 * Partition the smaller part. 843 */ 844 for (int loo = lo2; loo < mi2; ) { 845 int t = (loo + mi2) >>> 1; 846 847 if (key > a2[t]) { 848 loo = t + 1; 849 } else { 850 mi2 = t; 851 } 852 } 853 854 int d = mi2 - lo2 + mi1 - lo1; 855 856 /* 857 * Merge the right sub-parts in parallel. 858 */ 859 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 860 861 /* 862 * Process the sub-left parts. 863 */ 864 hi1 = mi1; 865 hi2 = mi2; 866 } 867 } 868 869 /* 870 * Merge small parts sequentially. 871 */ 872 while (lo1 < hi1 && lo2 < hi2) { 873 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 874 } 875 if (dst != a1 || k < lo1) { 876 while (lo1 < hi1) { 877 dst[k++] = a1[lo1++]; 878 } 879 } 880 if (dst != a2 || k < lo2) { 881 while (lo2 < hi2) { 882 dst[k++] = a2[lo2++]; 883 } 884 } 885 } 886 887 // [long] 888 889 /** 890 * Sorts the specified range of the array using parallel merge 891 * sort and/or Dual-Pivot Quicksort. 892 * 893 * To balance the faster splitting and parallelism of merge sort 894 * with the faster element partitioning of Quicksort, ranges are 895 * subdivided in tiers such that, if there is enough parallelism, 896 * the four-way parallel merge is started, still ensuring enough 897 * parallelism to process the partitions. 898 * 899 * @param a the array to be sorted 900 * @param parallelism the parallelism level 901 * @param low the index of the first element, inclusive, to be sorted 902 * @param high the index of the last element, exclusive, to be sorted 903 */ 904 static void sort(long[] a, int parallelism, int low, int high) { 905 int size = high - low; 906 907 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { 908 int depth = getDepth(parallelism, size >> 12); 909 long[] b = depth == 0 ? null : new long[size]; 910 new Sorter(null, a, b, low, size, low, depth).invoke(); 911 } else { 912 sort(null, a, 0, low, high); 913 } 914 } 915 916 /** 917 * Sorts the specified array using the Dual-Pivot Quicksort and/or 918 * other sorts in special-cases, possibly with parallel partitions. 919 * 920 * @param sorter parallel context 921 * @param a the array to be sorted 922 * @param bits the combination of recursion depth and bit flag, where 923 * the right bit "0" indicates that array is the leftmost part 924 * @param low the index of the first element, inclusive, to be sorted 925 * @param high the index of the last element, exclusive, to be sorted 926 */ 927 static void sort(Sorter sorter, long[] a, int bits, int low, int high) { 928 while (true) { 929 int end = high - 1, size = high - low; 930 931 /* 932 * Run mixed insertion sort on small non-leftmost parts. 933 */ 934 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 935 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 936 return; 937 } 938 939 /* 940 * Invoke insertion sort on small leftmost part. 941 */ 942 if (size < MAX_INSERTION_SORT_SIZE) { 943 insertionSort(a, low, high); 944 return; 945 } 946 947 /* 948 * Check if the whole array or large non-leftmost 949 * parts are nearly sorted and then merge runs. 950 */ 951 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 952 && tryMergeRuns(sorter, a, low, size)) { 953 return; 954 } 955 956 /* 957 * Switch to heap sort if execution 958 * time is becoming quadratic. 959 */ 960 if ((bits += 2) > MAX_RECURSION_DEPTH) { 961 heapSort(a, low, high); 962 return; 963 } 964 965 /* 966 * Use an inexpensive approximation of the golden ratio 967 * to select five sample elements and determine pivots. 968 */ 969 int step = (size >> 3) * 3 + 3; 970 971 /* 972 * Five elements around (and including) the central element 973 * will be used for pivot selection as described below. The 974 * unequal choice of spacing these elements was empirically 975 * determined to work well on a wide variety of inputs. 976 */ 977 int e1 = low + step; 978 int e5 = end - step; 979 int e3 = (e1 + e5) >>> 1; 980 int e2 = (e1 + e3) >>> 1; 981 int e4 = (e3 + e5) >>> 1; 982 long a3 = a[e3]; 983 984 /* 985 * Sort these elements in place by the combination 986 * of 4-element sorting network and insertion sort. 987 * 988 * 5 ------o-----------o------------ 989 * | | 990 * 4 ------|-----o-----o-----o------ 991 * | | | 992 * 2 ------o-----|-----o-----o------ 993 * | | 994 * 1 ------------o-----o------------ 995 */ 996 if (a[e5] < a[e2]) { long t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 997 if (a[e4] < a[e1]) { long t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 998 if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 999 if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 1000 if (a[e4] < a[e2]) { long t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 1001 1002 if (a3 < a[e2]) { 1003 if (a3 < a[e1]) { 1004 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 1005 } else { 1006 a[e3] = a[e2]; a[e2] = a3; 1007 } 1008 } else if (a3 > a[e4]) { 1009 if (a3 > a[e5]) { 1010 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 1011 } else { 1012 a[e3] = a[e4]; a[e4] = a3; 1013 } 1014 } 1015 1016 // Pointers 1017 int lower = low; // The index of the last element of the left part 1018 int upper = end; // The index of the first element of the right part 1019 1020 /* 1021 * Partitioning with 2 pivots in case of different elements. 1022 */ 1023 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 1024 1025 /* 1026 * Use the first and fifth of the five sorted elements as 1027 * the pivots. These values are inexpensive approximation 1028 * of tertiles. Note, that pivot1 < pivot2. 1029 */ 1030 long pivot1 = a[e1]; 1031 long pivot2 = a[e5]; 1032 1033 /* 1034 * The first and the last elements to be sorted are moved 1035 * to the locations formerly occupied by the pivots. When 1036 * partitioning is completed, the pivots are swapped back 1037 * into their final positions, and excluded from the next 1038 * subsequent sorting. 1039 */ 1040 a[e1] = a[lower]; 1041 a[e5] = a[upper]; 1042 1043 /* 1044 * Skip elements, which are less or greater than the pivots. 1045 */ 1046 while (a[++lower] < pivot1); 1047 while (a[--upper] > pivot2); 1048 1049 /* 1050 * Backward 3-interval partitioning 1051 * 1052 * left part central part right part 1053 * +------------------------------------------------------------+ 1054 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 1055 * +------------------------------------------------------------+ 1056 * ^ ^ ^ 1057 * | | | 1058 * lower k upper 1059 * 1060 * Invariants: 1061 * 1062 * all in (low, lower] < pivot1 1063 * pivot1 <= all in (k, upper) <= pivot2 1064 * all in [upper, end) > pivot2 1065 * 1066 * Pointer k is the last index of ?-part 1067 */ 1068 for (int unused = --lower, k = ++upper; --k > lower; ) { 1069 long ak = a[k]; 1070 1071 if (ak < pivot1) { // Move a[k] to the left side 1072 while (lower < k) { 1073 if (a[++lower] >= pivot1) { 1074 if (a[lower] > pivot2) { 1075 a[k] = a[--upper]; 1076 a[upper] = a[lower]; 1077 } else { 1078 a[k] = a[lower]; 1079 } 1080 a[lower] = ak; 1081 break; 1082 } 1083 } 1084 } else if (ak > pivot2) { // Move a[k] to the right side 1085 a[k] = a[--upper]; 1086 a[upper] = ak; 1087 } 1088 } 1089 1090 /* 1091 * Swap the pivots into their final positions. 1092 */ 1093 a[low] = a[lower]; a[lower] = pivot1; 1094 a[end] = a[upper]; a[upper] = pivot2; 1095 1096 /* 1097 * Sort non-left parts recursively (possibly in parallel), 1098 * excluding known pivots. 1099 */ 1100 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 1101 sorter.forkSorter(bits | 1, lower + 1, upper); 1102 sorter.forkSorter(bits | 1, upper + 1, high); 1103 } else { 1104 sort(sorter, a, bits | 1, lower + 1, upper); 1105 sort(sorter, a, bits | 1, upper + 1, high); 1106 } 1107 1108 } else { // Use single pivot in case of many equal elements 1109 1110 /* 1111 * Use the third of the five sorted elements as the pivot. 1112 * This value is inexpensive approximation of the median. 1113 */ 1114 long pivot = a[e3]; 1115 1116 /* 1117 * The first element to be sorted is moved to the 1118 * location formerly occupied by the pivot. After 1119 * completion of partitioning the pivot is swapped 1120 * back into its final position, and excluded from 1121 * the next subsequent sorting. 1122 */ 1123 a[e3] = a[lower]; 1124 1125 /* 1126 * Traditional 3-way (Dutch National Flag) partitioning 1127 * 1128 * left part central part right part 1129 * +------------------------------------------------------+ 1130 * | < pivot | ? | == pivot | > pivot | 1131 * +------------------------------------------------------+ 1132 * ^ ^ ^ 1133 * | | | 1134 * lower k upper 1135 * 1136 * Invariants: 1137 * 1138 * all in (low, lower] < pivot 1139 * all in (k, upper) == pivot 1140 * all in [upper, end] > pivot 1141 * 1142 * Pointer k is the last index of ?-part 1143 */ 1144 for (int k = ++upper; --k > lower; ) { 1145 long ak = a[k]; 1146 1147 if (ak != pivot) { 1148 a[k] = pivot; 1149 1150 if (ak < pivot) { // Move a[k] to the left side 1151 while (a[++lower] < pivot); 1152 1153 if (a[lower] > pivot) { 1154 a[--upper] = a[lower]; 1155 } 1156 a[lower] = ak; 1157 } else { // ak > pivot - Move a[k] to the right side 1158 a[--upper] = ak; 1159 } 1160 } 1161 } 1162 1163 /* 1164 * Swap the pivot into its final position. 1165 */ 1166 a[low] = a[lower]; a[lower] = pivot; 1167 1168 /* 1169 * Sort the right part (possibly in parallel), excluding 1170 * known pivot. All elements from the central part are 1171 * equal and therefore already sorted. 1172 */ 1173 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 1174 sorter.forkSorter(bits | 1, upper, high); 1175 } else { 1176 sort(sorter, a, bits | 1, upper, high); 1177 } 1178 } 1179 high = lower; // Iterate along the left part 1180 } 1181 } 1182 1183 /** 1184 * Sorts the specified range of the array using mixed insertion sort. 1185 * 1186 * Mixed insertion sort is combination of simple insertion sort, 1187 * pin insertion sort and pair insertion sort. 1188 * 1189 * In the context of Dual-Pivot Quicksort, the pivot element 1190 * from the left part plays the role of sentinel, because it 1191 * is less than any elements from the given part. Therefore, 1192 * expensive check of the left range can be skipped on each 1193 * iteration unless it is the leftmost call. 1194 * 1195 * @param a the array to be sorted 1196 * @param low the index of the first element, inclusive, to be sorted 1197 * @param end the index of the last element for simple insertion sort 1198 * @param high the index of the last element, exclusive, to be sorted 1199 */ 1200 private static void mixedInsertionSort(long[] a, int low, int end, int high) { 1201 if (end == high) { 1202 1203 /* 1204 * Invoke simple insertion sort on tiny array. 1205 */ 1206 for (int i; ++low < end; ) { 1207 long ai = a[i = low]; 1208 1209 while (ai < a[--i]) { 1210 a[i + 1] = a[i]; 1211 } 1212 a[i + 1] = ai; 1213 } 1214 } else { 1215 1216 /* 1217 * Start with pin insertion sort on small part. 1218 * 1219 * Pin insertion sort is extended simple insertion sort. 1220 * The main idea of this sort is to put elements larger 1221 * than an element called pin to the end of array (the 1222 * proper area for such elements). It avoids expensive 1223 * movements of these elements through the whole array. 1224 */ 1225 long pin = a[end]; 1226 1227 for (int i, p = high; ++low < end; ) { 1228 long ai = a[i = low]; 1229 1230 if (ai < a[i - 1]) { // Small element 1231 1232 /* 1233 * Insert small element into sorted part. 1234 */ 1235 a[i] = a[--i]; 1236 1237 while (ai < a[--i]) { 1238 a[i + 1] = a[i]; 1239 } 1240 a[i + 1] = ai; 1241 1242 } else if (p > i && ai > pin) { // Large element 1243 1244 /* 1245 * Find element smaller than pin. 1246 */ 1247 while (a[--p] > pin); 1248 1249 /* 1250 * Swap it with large element. 1251 */ 1252 if (p > i) { 1253 ai = a[p]; 1254 a[p] = a[i]; 1255 } 1256 1257 /* 1258 * Insert small element into sorted part. 1259 */ 1260 while (ai < a[--i]) { 1261 a[i + 1] = a[i]; 1262 } 1263 a[i + 1] = ai; 1264 } 1265 } 1266 1267 /* 1268 * Continue with pair insertion sort on remain part. 1269 */ 1270 for (int i; low < high; ++low) { 1271 long a1 = a[i = low], a2 = a[++low]; 1272 1273 /* 1274 * Insert two elements per iteration: at first, insert the 1275 * larger element and then insert the smaller element, but 1276 * from the position where the larger element was inserted. 1277 */ 1278 if (a1 > a2) { 1279 1280 while (a1 < a[--i]) { 1281 a[i + 2] = a[i]; 1282 } 1283 a[++i + 1] = a1; 1284 1285 while (a2 < a[--i]) { 1286 a[i + 1] = a[i]; 1287 } 1288 a[i + 1] = a2; 1289 1290 } else if (a1 < a[i - 1]) { 1291 1292 while (a2 < a[--i]) { 1293 a[i + 2] = a[i]; 1294 } 1295 a[++i + 1] = a2; 1296 1297 while (a1 < a[--i]) { 1298 a[i + 1] = a[i]; 1299 } 1300 a[i + 1] = a1; 1301 } 1302 } 1303 } 1304 } 1305 1306 /** 1307 * Sorts the specified range of the array using insertion sort. 1308 * 1309 * @param a the array to be sorted 1310 * @param low the index of the first element, inclusive, to be sorted 1311 * @param high the index of the last element, exclusive, to be sorted 1312 */ 1313 private static void insertionSort(long[] a, int low, int high) { 1314 for (int i, k = low; ++k < high; ) { 1315 long ai = a[i = k]; 1316 1317 if (ai < a[i - 1]) { 1318 while (--i >= low && ai < a[i]) { 1319 a[i + 1] = a[i]; 1320 } 1321 a[i + 1] = ai; 1322 } 1323 } 1324 } 1325 1326 /** 1327 * Sorts the specified range of the array using heap sort. 1328 * 1329 * @param a the array to be sorted 1330 * @param low the index of the first element, inclusive, to be sorted 1331 * @param high the index of the last element, exclusive, to be sorted 1332 */ 1333 private static void heapSort(long[] a, int low, int high) { 1334 for (int k = (low + high) >>> 1; k > low; ) { 1335 pushDown(a, --k, a[k], low, high); 1336 } 1337 while (--high > low) { 1338 long max = a[low]; 1339 pushDown(a, low, a[high], low, high); 1340 a[high] = max; 1341 } 1342 } 1343 1344 /** 1345 * Pushes specified element down during heap sort. 1346 * 1347 * @param a the given array 1348 * @param p the start index 1349 * @param value the given element 1350 * @param low the index of the first element, inclusive, to be sorted 1351 * @param high the index of the last element, exclusive, to be sorted 1352 */ 1353 private static void pushDown(long[] a, int p, long value, int low, int high) { 1354 for (int k ;; a[p] = a[p = k]) { 1355 k = (p << 1) - low + 2; // Index of the right child 1356 1357 if (k > high) { 1358 break; 1359 } 1360 if (k == high || a[k] < a[k - 1]) { 1361 --k; 1362 } 1363 if (a[k] <= value) { 1364 break; 1365 } 1366 } 1367 a[p] = value; 1368 } 1369 1370 /** 1371 * Tries to sort the specified range of the array. 1372 * 1373 * @param sorter parallel context 1374 * @param a the array to be sorted 1375 * @param low the index of the first element to be sorted 1376 * @param size the array size 1377 * @return true if finally sorted, false otherwise 1378 */ 1379 private static boolean tryMergeRuns(Sorter sorter, long[] a, int low, int size) { 1380 1381 /* 1382 * The run array is constructed only if initial runs are 1383 * long enough to continue, run[i] then holds start index 1384 * of the i-th sequence of elements in non-descending order. 1385 */ 1386 int[] run = null; 1387 int high = low + size; 1388 int count = 1, last = low; 1389 1390 /* 1391 * Identify all possible runs. 1392 */ 1393 for (int k = low + 1; k < high; ) { 1394 1395 /* 1396 * Find the end index of the current run. 1397 */ 1398 if (a[k - 1] < a[k]) { 1399 1400 // Identify ascending sequence 1401 while (++k < high && a[k - 1] <= a[k]); 1402 1403 } else if (a[k - 1] > a[k]) { 1404 1405 // Identify descending sequence 1406 while (++k < high && a[k - 1] >= a[k]); 1407 1408 // Reverse into ascending order 1409 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 1410 long ai = a[i]; a[i] = a[j]; a[j] = ai; 1411 } 1412 } else { // Identify constant sequence 1413 for (long ak = a[k]; ++k < high && ak == a[k]; ); 1414 1415 if (k < high) { 1416 continue; 1417 } 1418 } 1419 1420 /* 1421 * Check special cases. 1422 */ 1423 if (run == null) { 1424 if (k == high) { 1425 1426 /* 1427 * The array is monotonous sequence, 1428 * and therefore already sorted. 1429 */ 1430 return true; 1431 } 1432 1433 if (k - low < MIN_FIRST_RUN_SIZE) { 1434 1435 /* 1436 * The first run is too small 1437 * to proceed with scanning. 1438 */ 1439 return false; 1440 } 1441 1442 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 1443 run[0] = low; 1444 1445 } else if (a[last - 1] > a[last]) { 1446 1447 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 1448 1449 /* 1450 * The first runs are not long 1451 * enough to continue scanning. 1452 */ 1453 return false; 1454 } 1455 1456 if (++count == MAX_RUN_CAPACITY) { 1457 1458 /* 1459 * Array is not highly structured. 1460 */ 1461 return false; 1462 } 1463 1464 if (count == run.length) { 1465 1466 /* 1467 * Increase capacity of index array. 1468 */ 1469 run = Arrays.copyOf(run, count << 1); 1470 } 1471 } 1472 run[count] = (last = k); 1473 } 1474 1475 /* 1476 * Merge runs of highly structured array. 1477 */ 1478 if (count > 1) { 1479 long[] b; int offset = low; 1480 1481 if (sorter == null || (b = (long[]) sorter.b) == null) { 1482 b = new long[size]; 1483 } else { 1484 offset = sorter.offset; 1485 } 1486 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 1487 } 1488 return true; 1489 } 1490 1491 /** 1492 * Merges the specified runs. 1493 * 1494 * @param a the source array 1495 * @param b the temporary buffer used in merging 1496 * @param offset the start index in the source, inclusive 1497 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 1498 * @param parallel indicates whether merging is performed in parallel 1499 * @param run the start indexes of the runs, inclusive 1500 * @param lo the start index of the first run, inclusive 1501 * @param hi the start index of the last run, inclusive 1502 * @return the destination where runs are merged 1503 */ 1504 private static long[] mergeRuns(long[] a, long[] b, int offset, 1505 int aim, boolean parallel, int[] run, int lo, int hi) { 1506 1507 if (hi - lo == 1) { 1508 if (aim >= 0) { 1509 return a; 1510 } 1511 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 1512 b[--j] = a[--i] 1513 ); 1514 return b; 1515 } 1516 1517 /* 1518 * Split into approximately equal parts. 1519 */ 1520 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 1521 while (run[++mi + 1] <= rmi); 1522 1523 /* 1524 * Merge the left and right parts. 1525 */ 1526 long[] a1, a2; 1527 1528 if (parallel && hi - lo > MIN_RUN_COUNT) { 1529 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 1530 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 1531 a2 = (long[]) merger.getDestination(); 1532 } else { 1533 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 1534 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 1535 } 1536 1537 long[] dst = a1 == a ? b : a; 1538 1539 int k = a1 == a ? run[lo] - offset : run[lo]; 1540 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 1541 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 1542 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 1543 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 1544 1545 if (parallel) { 1546 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 1547 } else { 1548 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 1549 } 1550 return dst; 1551 } 1552 1553 /** 1554 * Merges the sorted parts. 1555 * 1556 * @param merger parallel context 1557 * @param dst the destination where parts are merged 1558 * @param k the start index of the destination, inclusive 1559 * @param a1 the first part 1560 * @param lo1 the start index of the first part, inclusive 1561 * @param hi1 the end index of the first part, exclusive 1562 * @param a2 the second part 1563 * @param lo2 the start index of the second part, inclusive 1564 * @param hi2 the end index of the second part, exclusive 1565 */ 1566 private static void mergeParts(Merger merger, long[] dst, int k, 1567 long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2) { 1568 1569 if (merger != null && a1 == a2) { 1570 1571 while (true) { 1572 1573 /* 1574 * The first part must be larger. 1575 */ 1576 if (hi1 - lo1 < hi2 - lo2) { 1577 int lo = lo1; lo1 = lo2; lo2 = lo; 1578 int hi = hi1; hi1 = hi2; hi2 = hi; 1579 } 1580 1581 /* 1582 * Small parts will be merged sequentially. 1583 */ 1584 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 1585 break; 1586 } 1587 1588 /* 1589 * Find the median of the larger part. 1590 */ 1591 int mi1 = (lo1 + hi1) >>> 1; 1592 long key = a1[mi1]; 1593 int mi2 = hi2; 1594 1595 /* 1596 * Partition the smaller part. 1597 */ 1598 for (int loo = lo2; loo < mi2; ) { 1599 int t = (loo + mi2) >>> 1; 1600 1601 if (key > a2[t]) { 1602 loo = t + 1; 1603 } else { 1604 mi2 = t; 1605 } 1606 } 1607 1608 int d = mi2 - lo2 + mi1 - lo1; 1609 1610 /* 1611 * Merge the right sub-parts in parallel. 1612 */ 1613 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 1614 1615 /* 1616 * Process the sub-left parts. 1617 */ 1618 hi1 = mi1; 1619 hi2 = mi2; 1620 } 1621 } 1622 1623 /* 1624 * Merge small parts sequentially. 1625 */ 1626 while (lo1 < hi1 && lo2 < hi2) { 1627 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 1628 } 1629 if (dst != a1 || k < lo1) { 1630 while (lo1 < hi1) { 1631 dst[k++] = a1[lo1++]; 1632 } 1633 } 1634 if (dst != a2 || k < lo2) { 1635 while (lo2 < hi2) { 1636 dst[k++] = a2[lo2++]; 1637 } 1638 } 1639 } 1640 1641 // [byte] 1642 1643 /** 1644 * Sorts the specified range of the array using 1645 * counting sort or insertion sort. 1646 * 1647 * @param a the array to be sorted 1648 * @param low the index of the first element, inclusive, to be sorted 1649 * @param high the index of the last element, exclusive, to be sorted 1650 */ 1651 static void sort(byte[] a, int low, int high) { 1652 if (high - low > MIN_BYTE_COUNTING_SORT_SIZE) { 1653 countingSort(a, low, high); 1654 } else { 1655 insertionSort(a, low, high); 1656 } 1657 } 1658 1659 /** 1660 * Sorts the specified range of the array using insertion sort. 1661 * 1662 * @param a the array to be sorted 1663 * @param low the index of the first element, inclusive, to be sorted 1664 * @param high the index of the last element, exclusive, to be sorted 1665 */ 1666 private static void insertionSort(byte[] a, int low, int high) { 1667 for (int i, k = low; ++k < high; ) { 1668 byte ai = a[i = k]; 1669 1670 if (ai < a[i - 1]) { 1671 while (--i >= low && ai < a[i]) { 1672 a[i + 1] = a[i]; 1673 } 1674 a[i + 1] = ai; 1675 } 1676 } 1677 } 1678 1679 /** 1680 * The number of distinct byte values. 1681 */ 1682 private static final int NUM_BYTE_VALUES = 1 << 8; 1683 1684 /** 1685 * Max index of byte counter. 1686 */ 1687 private static final int MAX_BYTE_INDEX = Byte.MAX_VALUE + NUM_BYTE_VALUES + 1; 1688 1689 /** 1690 * Sorts the specified range of the array using counting sort. 1691 * 1692 * @param a the array to be sorted 1693 * @param low the index of the first element, inclusive, to be sorted 1694 * @param high the index of the last element, exclusive, to be sorted 1695 */ 1696 private static void countingSort(byte[] a, int low, int high) { 1697 int[] count = new int[NUM_BYTE_VALUES]; 1698 1699 /* 1700 * Compute a histogram with the number of each values. 1701 */ 1702 for (int i = high; i > low; ++count[a[--i] & 0xFF]); 1703 1704 /* 1705 * Place values on their final positions. 1706 */ 1707 if (high - low > NUM_BYTE_VALUES) { 1708 for (int i = MAX_BYTE_INDEX; --i > Byte.MAX_VALUE; ) { 1709 int value = i & 0xFF; 1710 1711 for (low = high - count[value]; high > low; 1712 a[--high] = (byte) value 1713 ); 1714 } 1715 } else { 1716 for (int i = MAX_BYTE_INDEX; high > low; ) { 1717 while (count[--i & 0xFF] == 0); 1718 1719 int value = i & 0xFF; 1720 int c = count[value]; 1721 1722 do { 1723 a[--high] = (byte) value; 1724 } while (--c > 0); 1725 } 1726 } 1727 } 1728 1729 // [char] 1730 1731 /** 1732 * Sorts the specified range of the array using 1733 * counting sort or Dual-Pivot Quicksort. 1734 * 1735 * @param a the array to be sorted 1736 * @param low the index of the first element, inclusive, to be sorted 1737 * @param high the index of the last element, exclusive, to be sorted 1738 */ 1739 static void sort(char[] a, int low, int high) { 1740 if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { 1741 countingSort(a, low, high); 1742 } else { 1743 sort(a, 0, low, high); 1744 } 1745 } 1746 1747 /** 1748 * Sorts the specified array using the Dual-Pivot Quicksort and/or 1749 * other sorts in special-cases, possibly with parallel partitions. 1750 * 1751 * @param a the array to be sorted 1752 * @param bits the combination of recursion depth and bit flag, where 1753 * the right bit "0" indicates that array is the leftmost part 1754 * @param low the index of the first element, inclusive, to be sorted 1755 * @param high the index of the last element, exclusive, to be sorted 1756 */ 1757 static void sort(char[] a, int bits, int low, int high) { 1758 while (true) { 1759 int end = high - 1, size = high - low; 1760 1761 /* 1762 * Invoke insertion sort on small leftmost part. 1763 */ 1764 if (size < MAX_INSERTION_SORT_SIZE) { 1765 insertionSort(a, low, high); 1766 return; 1767 } 1768 1769 /* 1770 * Switch to counting sort if execution 1771 * time is becoming quadratic. 1772 */ 1773 if ((bits += 2) > MAX_RECURSION_DEPTH) { 1774 countingSort(a, low, high); 1775 return; 1776 } 1777 1778 /* 1779 * Use an inexpensive approximation of the golden ratio 1780 * to select five sample elements and determine pivots. 1781 */ 1782 int step = (size >> 3) * 3 + 3; 1783 1784 /* 1785 * Five elements around (and including) the central element 1786 * will be used for pivot selection as described below. The 1787 * unequal choice of spacing these elements was empirically 1788 * determined to work well on a wide variety of inputs. 1789 */ 1790 int e1 = low + step; 1791 int e5 = end - step; 1792 int e3 = (e1 + e5) >>> 1; 1793 int e2 = (e1 + e3) >>> 1; 1794 int e4 = (e3 + e5) >>> 1; 1795 char a3 = a[e3]; 1796 1797 /* 1798 * Sort these elements in place by the combination 1799 * of 4-element sorting network and insertion sort. 1800 * 1801 * 5 ------o-----------o------------ 1802 * | | 1803 * 4 ------|-----o-----o-----o------ 1804 * | | | 1805 * 2 ------o-----|-----o-----o------ 1806 * | | 1807 * 1 ------------o-----o------------ 1808 */ 1809 if (a[e5] < a[e2]) { char t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 1810 if (a[e4] < a[e1]) { char t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 1811 if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 1812 if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 1813 if (a[e4] < a[e2]) { char t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 1814 1815 if (a3 < a[e2]) { 1816 if (a3 < a[e1]) { 1817 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 1818 } else { 1819 a[e3] = a[e2]; a[e2] = a3; 1820 } 1821 } else if (a3 > a[e4]) { 1822 if (a3 > a[e5]) { 1823 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 1824 } else { 1825 a[e3] = a[e4]; a[e4] = a3; 1826 } 1827 } 1828 1829 // Pointers 1830 int lower = low; // The index of the last element of the left part 1831 int upper = end; // The index of the first element of the right part 1832 1833 /* 1834 * Partitioning with 2 pivots in case of different elements. 1835 */ 1836 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 1837 1838 /* 1839 * Use the first and fifth of the five sorted elements as 1840 * the pivots. These values are inexpensive approximation 1841 * of tertiles. Note, that pivot1 < pivot2. 1842 */ 1843 char pivot1 = a[e1]; 1844 char pivot2 = a[e5]; 1845 1846 /* 1847 * The first and the last elements to be sorted are moved 1848 * to the locations formerly occupied by the pivots. When 1849 * partitioning is completed, the pivots are swapped back 1850 * into their final positions, and excluded from the next 1851 * subsequent sorting. 1852 */ 1853 a[e1] = a[lower]; 1854 a[e5] = a[upper]; 1855 1856 /* 1857 * Skip elements, which are less or greater than the pivots. 1858 */ 1859 while (a[++lower] < pivot1); 1860 while (a[--upper] > pivot2); 1861 1862 /* 1863 * Backward 3-interval partitioning 1864 * 1865 * left part central part right part 1866 * +------------------------------------------------------------+ 1867 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 1868 * +------------------------------------------------------------+ 1869 * ^ ^ ^ 1870 * | | | 1871 * lower k upper 1872 * 1873 * Invariants: 1874 * 1875 * all in (low, lower] < pivot1 1876 * pivot1 <= all in (k, upper) <= pivot2 1877 * all in [upper, end) > pivot2 1878 * 1879 * Pointer k is the last index of ?-part 1880 */ 1881 for (int unused = --lower, k = ++upper; --k > lower; ) { 1882 char ak = a[k]; 1883 1884 if (ak < pivot1) { // Move a[k] to the left side 1885 while (lower < k) { 1886 if (a[++lower] >= pivot1) { 1887 if (a[lower] > pivot2) { 1888 a[k] = a[--upper]; 1889 a[upper] = a[lower]; 1890 } else { 1891 a[k] = a[lower]; 1892 } 1893 a[lower] = ak; 1894 break; 1895 } 1896 } 1897 } else if (ak > pivot2) { // Move a[k] to the right side 1898 a[k] = a[--upper]; 1899 a[upper] = ak; 1900 } 1901 } 1902 1903 /* 1904 * Swap the pivots into their final positions. 1905 */ 1906 a[low] = a[lower]; a[lower] = pivot1; 1907 a[end] = a[upper]; a[upper] = pivot2; 1908 1909 /* 1910 * Sort non-left parts recursively, 1911 * excluding known pivots. 1912 */ 1913 sort(a, bits | 1, lower + 1, upper); 1914 sort(a, bits | 1, upper + 1, high); 1915 1916 } else { // Use single pivot in case of many equal elements 1917 1918 /* 1919 * Use the third of the five sorted elements as the pivot. 1920 * This value is inexpensive approximation of the median. 1921 */ 1922 char pivot = a[e3]; 1923 1924 /* 1925 * The first element to be sorted is moved to the 1926 * location formerly occupied by the pivot. After 1927 * completion of partitioning the pivot is swapped 1928 * back into its final position, and excluded from 1929 * the next subsequent sorting. 1930 */ 1931 a[e3] = a[lower]; 1932 1933 /* 1934 * Traditional 3-way (Dutch National Flag) partitioning 1935 * 1936 * left part central part right part 1937 * +------------------------------------------------------+ 1938 * | < pivot | ? | == pivot | > pivot | 1939 * +------------------------------------------------------+ 1940 * ^ ^ ^ 1941 * | | | 1942 * lower k upper 1943 * 1944 * Invariants: 1945 * 1946 * all in (low, lower] < pivot 1947 * all in (k, upper) == pivot 1948 * all in [upper, end] > pivot 1949 * 1950 * Pointer k is the last index of ?-part 1951 */ 1952 for (int k = ++upper; --k > lower; ) { 1953 char ak = a[k]; 1954 1955 if (ak != pivot) { 1956 a[k] = pivot; 1957 1958 if (ak < pivot) { // Move a[k] to the left side 1959 while (a[++lower] < pivot); 1960 1961 if (a[lower] > pivot) { 1962 a[--upper] = a[lower]; 1963 } 1964 a[lower] = ak; 1965 } else { // ak > pivot - Move a[k] to the right side 1966 a[--upper] = ak; 1967 } 1968 } 1969 } 1970 1971 /* 1972 * Swap the pivot into its final position. 1973 */ 1974 a[low] = a[lower]; a[lower] = pivot; 1975 1976 /* 1977 * Sort the right part, excluding known pivot. 1978 * All elements from the central part are 1979 * equal and therefore already sorted. 1980 */ 1981 sort(a, bits | 1, upper, high); 1982 } 1983 high = lower; // Iterate along the left part 1984 } 1985 } 1986 1987 /** 1988 * Sorts the specified range of the array using insertion sort. 1989 * 1990 * @param a the array to be sorted 1991 * @param low the index of the first element, inclusive, to be sorted 1992 * @param high the index of the last element, exclusive, to be sorted 1993 */ 1994 private static void insertionSort(char[] a, int low, int high) { 1995 for (int i, k = low; ++k < high; ) { 1996 char ai = a[i = k]; 1997 1998 if (ai < a[i - 1]) { 1999 while (--i >= low && ai < a[i]) { 2000 a[i + 1] = a[i]; 2001 } 2002 a[i + 1] = ai; 2003 } 2004 } 2005 } 2006 2007 /** 2008 * The number of distinct char values. 2009 */ 2010 private static final int NUM_CHAR_VALUES = 1 << 16; 2011 2012 /** 2013 * Sorts the specified range of the array using counting sort. 2014 * 2015 * @param a the array to be sorted 2016 * @param low the index of the first element, inclusive, to be sorted 2017 * @param high the index of the last element, exclusive, to be sorted 2018 */ 2019 private static void countingSort(char[] a, int low, int high) { 2020 int[] count = new int[NUM_CHAR_VALUES]; 2021 2022 /* 2023 * Compute a histogram with the number of each values. 2024 */ 2025 for (int i = high; i > low; ++count[a[--i]]); 2026 2027 /* 2028 * Place values on their final positions. 2029 */ 2030 if (high - low > NUM_CHAR_VALUES) { 2031 for (int i = NUM_CHAR_VALUES; i > 0; ) { 2032 for (low = high - count[--i]; high > low; 2033 a[--high] = (char) i 2034 ); 2035 } 2036 } else { 2037 for (int i = NUM_CHAR_VALUES; high > low; ) { 2038 while (count[--i] == 0); 2039 int c = count[i]; 2040 2041 do { 2042 a[--high] = (char) i; 2043 } while (--c > 0); 2044 } 2045 } 2046 } 2047 2048 // [short] 2049 2050 /** 2051 * Sorts the specified range of the array using 2052 * counting sort or Dual-Pivot Quicksort. 2053 * 2054 * @param a the array to be sorted 2055 * @param low the index of the first element, inclusive, to be sorted 2056 * @param high the index of the last element, exclusive, to be sorted 2057 */ 2058 static void sort(short[] a, int low, int high) { 2059 if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { 2060 countingSort(a, low, high); 2061 } else { 2062 sort(a, 0, low, high); 2063 } 2064 } 2065 2066 /** 2067 * Sorts the specified array using the Dual-Pivot Quicksort and/or 2068 * other sorts in special-cases, possibly with parallel partitions. 2069 * 2070 * @param a the array to be sorted 2071 * @param bits the combination of recursion depth and bit flag, where 2072 * the right bit "0" indicates that array is the leftmost part 2073 * @param low the index of the first element, inclusive, to be sorted 2074 * @param high the index of the last element, exclusive, to be sorted 2075 */ 2076 static void sort(short[] a, int bits, int low, int high) { 2077 while (true) { 2078 int end = high - 1, size = high - low; 2079 2080 /* 2081 * Invoke insertion sort on small leftmost part. 2082 */ 2083 if (size < MAX_INSERTION_SORT_SIZE) { 2084 insertionSort(a, low, high); 2085 return; 2086 } 2087 2088 /* 2089 * Switch to counting sort if execution 2090 * time is becoming quadratic. 2091 */ 2092 if ((bits += 2) > MAX_RECURSION_DEPTH) { 2093 countingSort(a, low, high); 2094 return; 2095 } 2096 2097 /* 2098 * Use an inexpensive approximation of the golden ratio 2099 * to select five sample elements and determine pivots. 2100 */ 2101 int step = (size >> 3) * 3 + 3; 2102 2103 /* 2104 * Five elements around (and including) the central element 2105 * will be used for pivot selection as described below. The 2106 * unequal choice of spacing these elements was empirically 2107 * determined to work well on a wide variety of inputs. 2108 */ 2109 int e1 = low + step; 2110 int e5 = end - step; 2111 int e3 = (e1 + e5) >>> 1; 2112 int e2 = (e1 + e3) >>> 1; 2113 int e4 = (e3 + e5) >>> 1; 2114 short a3 = a[e3]; 2115 2116 /* 2117 * Sort these elements in place by the combination 2118 * of 4-element sorting network and insertion sort. 2119 * 2120 * 5 ------o-----------o------------ 2121 * | | 2122 * 4 ------|-----o-----o-----o------ 2123 * | | | 2124 * 2 ------o-----|-----o-----o------ 2125 * | | 2126 * 1 ------------o-----o------------ 2127 */ 2128 if (a[e5] < a[e2]) { short t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 2129 if (a[e4] < a[e1]) { short t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 2130 if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 2131 if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 2132 if (a[e4] < a[e2]) { short t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 2133 2134 if (a3 < a[e2]) { 2135 if (a3 < a[e1]) { 2136 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 2137 } else { 2138 a[e3] = a[e2]; a[e2] = a3; 2139 } 2140 } else if (a3 > a[e4]) { 2141 if (a3 > a[e5]) { 2142 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 2143 } else { 2144 a[e3] = a[e4]; a[e4] = a3; 2145 } 2146 } 2147 2148 // Pointers 2149 int lower = low; // The index of the last element of the left part 2150 int upper = end; // The index of the first element of the right part 2151 2152 /* 2153 * Partitioning with 2 pivots in case of different elements. 2154 */ 2155 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 2156 2157 /* 2158 * Use the first and fifth of the five sorted elements as 2159 * the pivots. These values are inexpensive approximation 2160 * of tertiles. Note, that pivot1 < pivot2. 2161 */ 2162 short pivot1 = a[e1]; 2163 short pivot2 = a[e5]; 2164 2165 /* 2166 * The first and the last elements to be sorted are moved 2167 * to the locations formerly occupied by the pivots. When 2168 * partitioning is completed, the pivots are swapped back 2169 * into their final positions, and excluded from the next 2170 * subsequent sorting. 2171 */ 2172 a[e1] = a[lower]; 2173 a[e5] = a[upper]; 2174 2175 /* 2176 * Skip elements, which are less or greater than the pivots. 2177 */ 2178 while (a[++lower] < pivot1); 2179 while (a[--upper] > pivot2); 2180 2181 /* 2182 * Backward 3-interval partitioning 2183 * 2184 * left part central part right part 2185 * +------------------------------------------------------------+ 2186 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 2187 * +------------------------------------------------------------+ 2188 * ^ ^ ^ 2189 * | | | 2190 * lower k upper 2191 * 2192 * Invariants: 2193 * 2194 * all in (low, lower] < pivot1 2195 * pivot1 <= all in (k, upper) <= pivot2 2196 * all in [upper, end) > pivot2 2197 * 2198 * Pointer k is the last index of ?-part 2199 */ 2200 for (int unused = --lower, k = ++upper; --k > lower; ) { 2201 short ak = a[k]; 2202 2203 if (ak < pivot1) { // Move a[k] to the left side 2204 while (lower < k) { 2205 if (a[++lower] >= pivot1) { 2206 if (a[lower] > pivot2) { 2207 a[k] = a[--upper]; 2208 a[upper] = a[lower]; 2209 } else { 2210 a[k] = a[lower]; 2211 } 2212 a[lower] = ak; 2213 break; 2214 } 2215 } 2216 } else if (ak > pivot2) { // Move a[k] to the right side 2217 a[k] = a[--upper]; 2218 a[upper] = ak; 2219 } 2220 } 2221 2222 /* 2223 * Swap the pivots into their final positions. 2224 */ 2225 a[low] = a[lower]; a[lower] = pivot1; 2226 a[end] = a[upper]; a[upper] = pivot2; 2227 2228 /* 2229 * Sort non-left parts recursively, 2230 * excluding known pivots. 2231 */ 2232 sort(a, bits | 1, lower + 1, upper); 2233 sort(a, bits | 1, upper + 1, high); 2234 2235 } else { // Use single pivot in case of many equal elements 2236 2237 /* 2238 * Use the third of the five sorted elements as the pivot. 2239 * This value is inexpensive approximation of the median. 2240 */ 2241 short pivot = a[e3]; 2242 2243 /* 2244 * The first element to be sorted is moved to the 2245 * location formerly occupied by the pivot. After 2246 * completion of partitioning the pivot is swapped 2247 * back into its final position, and excluded from 2248 * the next subsequent sorting. 2249 */ 2250 a[e3] = a[lower]; 2251 2252 /* 2253 * Traditional 3-way (Dutch National Flag) partitioning 2254 * 2255 * left part central part right part 2256 * +------------------------------------------------------+ 2257 * | < pivot | ? | == pivot | > pivot | 2258 * +------------------------------------------------------+ 2259 * ^ ^ ^ 2260 * | | | 2261 * lower k upper 2262 * 2263 * Invariants: 2264 * 2265 * all in (low, lower] < pivot 2266 * all in (k, upper) == pivot 2267 * all in [upper, end] > pivot 2268 * 2269 * Pointer k is the last index of ?-part 2270 */ 2271 for (int k = ++upper; --k > lower; ) { 2272 short ak = a[k]; 2273 2274 if (ak != pivot) { 2275 a[k] = pivot; 2276 2277 if (ak < pivot) { // Move a[k] to the left side 2278 while (a[++lower] < pivot); 2279 2280 if (a[lower] > pivot) { 2281 a[--upper] = a[lower]; 2282 } 2283 a[lower] = ak; 2284 } else { // ak > pivot - Move a[k] to the right side 2285 a[--upper] = ak; 2286 } 2287 } 2288 } 2289 2290 /* 2291 * Swap the pivot into its final position. 2292 */ 2293 a[low] = a[lower]; a[lower] = pivot; 2294 2295 /* 2296 * Sort the right part, excluding known pivot. 2297 * All elements from the central part are 2298 * equal and therefore already sorted. 2299 */ 2300 sort(a, bits | 1, upper, high); 2301 } 2302 high = lower; // Iterate along the left part 2303 } 2304 } 2305 2306 /** 2307 * Sorts the specified range of the array using insertion sort. 2308 * 2309 * @param a the array to be sorted 2310 * @param low the index of the first element, inclusive, to be sorted 2311 * @param high the index of the last element, exclusive, to be sorted 2312 */ 2313 private static void insertionSort(short[] a, int low, int high) { 2314 for (int i, k = low; ++k < high; ) { 2315 short ai = a[i = k]; 2316 2317 if (ai < a[i - 1]) { 2318 while (--i >= low && ai < a[i]) { 2319 a[i + 1] = a[i]; 2320 } 2321 a[i + 1] = ai; 2322 } 2323 } 2324 } 2325 2326 /** 2327 * The number of distinct short values. 2328 */ 2329 private static final int NUM_SHORT_VALUES = 1 << 16; 2330 2331 /** 2332 * Max index of short counter. 2333 */ 2334 private static final int MAX_SHORT_INDEX = Short.MAX_VALUE + NUM_SHORT_VALUES + 1; 2335 2336 /** 2337 * Sorts the specified range of the array using counting sort. 2338 * 2339 * @param a the array to be sorted 2340 * @param low the index of the first element, inclusive, to be sorted 2341 * @param high the index of the last element, exclusive, to be sorted 2342 */ 2343 private static void countingSort(short[] a, int low, int high) { 2344 int[] count = new int[NUM_SHORT_VALUES]; 2345 2346 /* 2347 * Compute a histogram with the number of each values. 2348 */ 2349 for (int i = high; i > low; ++count[a[--i] & 0xFFFF]); 2350 2351 /* 2352 * Place values on their final positions. 2353 */ 2354 if (high - low > NUM_SHORT_VALUES) { 2355 for (int i = MAX_SHORT_INDEX; --i > Short.MAX_VALUE; ) { 2356 int value = i & 0xFFFF; 2357 2358 for (low = high - count[value]; high > low; 2359 a[--high] = (short) value 2360 ); 2361 } 2362 } else { 2363 for (int i = MAX_SHORT_INDEX; high > low; ) { 2364 while (count[--i & 0xFFFF] == 0); 2365 2366 int value = i & 0xFFFF; 2367 int c = count[value]; 2368 2369 do { 2370 a[--high] = (short) value; 2371 } while (--c > 0); 2372 } 2373 } 2374 } 2375 2376 // [float] 2377 2378 /** 2379 * Sorts the specified range of the array using parallel merge 2380 * sort and/or Dual-Pivot Quicksort. 2381 * 2382 * To balance the faster splitting and parallelism of merge sort 2383 * with the faster element partitioning of Quicksort, ranges are 2384 * subdivided in tiers such that, if there is enough parallelism, 2385 * the four-way parallel merge is started, still ensuring enough 2386 * parallelism to process the partitions. 2387 * 2388 * @param a the array to be sorted 2389 * @param parallelism the parallelism level 2390 * @param low the index of the first element, inclusive, to be sorted 2391 * @param high the index of the last element, exclusive, to be sorted 2392 */ 2393 static void sort(float[] a, int parallelism, int low, int high) { 2394 /* 2395 * Phase 1. Count the number of negative zero -0.0f, 2396 * turn them into positive zero, and move all NaNs 2397 * to the end of the array. 2398 */ 2399 int numNegativeZero = 0; 2400 2401 for (int k = high; k > low; ) { 2402 float ak = a[--k]; 2403 2404 if (ak == 0.0f && Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f 2405 numNegativeZero += 1; 2406 a[k] = 0.0f; 2407 } else if (ak != ak) { // ak is NaN 2408 a[k] = a[--high]; 2409 a[high] = ak; 2410 } 2411 } 2412 2413 /* 2414 * Phase 2. Sort everything except NaNs, 2415 * which are already in place. 2416 */ 2417 int size = high - low; 2418 2419 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { 2420 int depth = getDepth(parallelism, size >> 12); 2421 float[] b = depth == 0 ? null : new float[size]; 2422 new Sorter(null, a, b, low, size, low, depth).invoke(); 2423 } else { 2424 sort(null, a, 0, low, high); 2425 } 2426 2427 /* 2428 * Phase 3. Turn positive zero 0.0f 2429 * back into negative zero -0.0f. 2430 */ 2431 if (++numNegativeZero == 1) { 2432 return; 2433 } 2434 2435 /* 2436 * Find the position one less than 2437 * the index of the first zero. 2438 */ 2439 while (low <= high) { 2440 int middle = (low + high) >>> 1; 2441 2442 if (a[middle] < 0) { 2443 low = middle + 1; 2444 } else { 2445 high = middle - 1; 2446 } 2447 } 2448 2449 /* 2450 * Replace the required number of 0.0f by -0.0f. 2451 */ 2452 while (--numNegativeZero > 0) { 2453 a[++high] = -0.0f; 2454 } 2455 } 2456 2457 /** 2458 * Sorts the specified array using the Dual-Pivot Quicksort and/or 2459 * other sorts in special-cases, possibly with parallel partitions. 2460 * 2461 * @param sorter parallel context 2462 * @param a the array to be sorted 2463 * @param bits the combination of recursion depth and bit flag, where 2464 * the right bit "0" indicates that array is the leftmost part 2465 * @param low the index of the first element, inclusive, to be sorted 2466 * @param high the index of the last element, exclusive, to be sorted 2467 */ 2468 static void sort(Sorter sorter, float[] a, int bits, int low, int high) { 2469 while (true) { 2470 int end = high - 1, size = high - low; 2471 2472 /* 2473 * Run mixed insertion sort on small non-leftmost parts. 2474 */ 2475 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 2476 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 2477 return; 2478 } 2479 2480 /* 2481 * Invoke insertion sort on small leftmost part. 2482 */ 2483 if (size < MAX_INSERTION_SORT_SIZE) { 2484 insertionSort(a, low, high); 2485 return; 2486 } 2487 2488 /* 2489 * Check if the whole array or large non-leftmost 2490 * parts are nearly sorted and then merge runs. 2491 */ 2492 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 2493 && tryMergeRuns(sorter, a, low, size)) { 2494 return; 2495 } 2496 2497 /* 2498 * Switch to heap sort if execution 2499 * time is becoming quadratic. 2500 */ 2501 if ((bits += 2) > MAX_RECURSION_DEPTH) { 2502 heapSort(a, low, high); 2503 return; 2504 } 2505 2506 /* 2507 * Use an inexpensive approximation of the golden ratio 2508 * to select five sample elements and determine pivots. 2509 */ 2510 int step = (size >> 3) * 3 + 3; 2511 2512 /* 2513 * Five elements around (and including) the central element 2514 * will be used for pivot selection as described below. The 2515 * unequal choice of spacing these elements was empirically 2516 * determined to work well on a wide variety of inputs. 2517 */ 2518 int e1 = low + step; 2519 int e5 = end - step; 2520 int e3 = (e1 + e5) >>> 1; 2521 int e2 = (e1 + e3) >>> 1; 2522 int e4 = (e3 + e5) >>> 1; 2523 float a3 = a[e3]; 2524 2525 /* 2526 * Sort these elements in place by the combination 2527 * of 4-element sorting network and insertion sort. 2528 * 2529 * 5 ------o-----------o------------ 2530 * | | 2531 * 4 ------|-----o-----o-----o------ 2532 * | | | 2533 * 2 ------o-----|-----o-----o------ 2534 * | | 2535 * 1 ------------o-----o------------ 2536 */ 2537 if (a[e5] < a[e2]) { float t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 2538 if (a[e4] < a[e1]) { float t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 2539 if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 2540 if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 2541 if (a[e4] < a[e2]) { float t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 2542 2543 if (a3 < a[e2]) { 2544 if (a3 < a[e1]) { 2545 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 2546 } else { 2547 a[e3] = a[e2]; a[e2] = a3; 2548 } 2549 } else if (a3 > a[e4]) { 2550 if (a3 > a[e5]) { 2551 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 2552 } else { 2553 a[e3] = a[e4]; a[e4] = a3; 2554 } 2555 } 2556 2557 // Pointers 2558 int lower = low; // The index of the last element of the left part 2559 int upper = end; // The index of the first element of the right part 2560 2561 /* 2562 * Partitioning with 2 pivots in case of different elements. 2563 */ 2564 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 2565 2566 /* 2567 * Use the first and fifth of the five sorted elements as 2568 * the pivots. These values are inexpensive approximation 2569 * of tertiles. Note, that pivot1 < pivot2. 2570 */ 2571 float pivot1 = a[e1]; 2572 float pivot2 = a[e5]; 2573 2574 /* 2575 * The first and the last elements to be sorted are moved 2576 * to the locations formerly occupied by the pivots. When 2577 * partitioning is completed, the pivots are swapped back 2578 * into their final positions, and excluded from the next 2579 * subsequent sorting. 2580 */ 2581 a[e1] = a[lower]; 2582 a[e5] = a[upper]; 2583 2584 /* 2585 * Skip elements, which are less or greater than the pivots. 2586 */ 2587 while (a[++lower] < pivot1); 2588 while (a[--upper] > pivot2); 2589 2590 /* 2591 * Backward 3-interval partitioning 2592 * 2593 * left part central part right part 2594 * +------------------------------------------------------------+ 2595 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 2596 * +------------------------------------------------------------+ 2597 * ^ ^ ^ 2598 * | | | 2599 * lower k upper 2600 * 2601 * Invariants: 2602 * 2603 * all in (low, lower] < pivot1 2604 * pivot1 <= all in (k, upper) <= pivot2 2605 * all in [upper, end) > pivot2 2606 * 2607 * Pointer k is the last index of ?-part 2608 */ 2609 for (int unused = --lower, k = ++upper; --k > lower; ) { 2610 float ak = a[k]; 2611 2612 if (ak < pivot1) { // Move a[k] to the left side 2613 while (lower < k) { 2614 if (a[++lower] >= pivot1) { 2615 if (a[lower] > pivot2) { 2616 a[k] = a[--upper]; 2617 a[upper] = a[lower]; 2618 } else { 2619 a[k] = a[lower]; 2620 } 2621 a[lower] = ak; 2622 break; 2623 } 2624 } 2625 } else if (ak > pivot2) { // Move a[k] to the right side 2626 a[k] = a[--upper]; 2627 a[upper] = ak; 2628 } 2629 } 2630 2631 /* 2632 * Swap the pivots into their final positions. 2633 */ 2634 a[low] = a[lower]; a[lower] = pivot1; 2635 a[end] = a[upper]; a[upper] = pivot2; 2636 2637 /* 2638 * Sort non-left parts recursively (possibly in parallel), 2639 * excluding known pivots. 2640 */ 2641 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 2642 sorter.forkSorter(bits | 1, lower + 1, upper); 2643 sorter.forkSorter(bits | 1, upper + 1, high); 2644 } else { 2645 sort(sorter, a, bits | 1, lower + 1, upper); 2646 sort(sorter, a, bits | 1, upper + 1, high); 2647 } 2648 2649 } else { // Use single pivot in case of many equal elements 2650 2651 /* 2652 * Use the third of the five sorted elements as the pivot. 2653 * This value is inexpensive approximation of the median. 2654 */ 2655 float pivot = a[e3]; 2656 2657 /* 2658 * The first element to be sorted is moved to the 2659 * location formerly occupied by the pivot. After 2660 * completion of partitioning the pivot is swapped 2661 * back into its final position, and excluded from 2662 * the next subsequent sorting. 2663 */ 2664 a[e3] = a[lower]; 2665 2666 /* 2667 * Traditional 3-way (Dutch National Flag) partitioning 2668 * 2669 * left part central part right part 2670 * +------------------------------------------------------+ 2671 * | < pivot | ? | == pivot | > pivot | 2672 * +------------------------------------------------------+ 2673 * ^ ^ ^ 2674 * | | | 2675 * lower k upper 2676 * 2677 * Invariants: 2678 * 2679 * all in (low, lower] < pivot 2680 * all in (k, upper) == pivot 2681 * all in [upper, end] > pivot 2682 * 2683 * Pointer k is the last index of ?-part 2684 */ 2685 for (int k = ++upper; --k > lower; ) { 2686 float ak = a[k]; 2687 2688 if (ak != pivot) { 2689 a[k] = pivot; 2690 2691 if (ak < pivot) { // Move a[k] to the left side 2692 while (a[++lower] < pivot); 2693 2694 if (a[lower] > pivot) { 2695 a[--upper] = a[lower]; 2696 } 2697 a[lower] = ak; 2698 } else { // ak > pivot - Move a[k] to the right side 2699 a[--upper] = ak; 2700 } 2701 } 2702 } 2703 2704 /* 2705 * Swap the pivot into its final position. 2706 */ 2707 a[low] = a[lower]; a[lower] = pivot; 2708 2709 /* 2710 * Sort the right part (possibly in parallel), excluding 2711 * known pivot. All elements from the central part are 2712 * equal and therefore already sorted. 2713 */ 2714 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 2715 sorter.forkSorter(bits | 1, upper, high); 2716 } else { 2717 sort(sorter, a, bits | 1, upper, high); 2718 } 2719 } 2720 high = lower; // Iterate along the left part 2721 } 2722 } 2723 2724 /** 2725 * Sorts the specified range of the array using mixed insertion sort. 2726 * 2727 * Mixed insertion sort is combination of simple insertion sort, 2728 * pin insertion sort and pair insertion sort. 2729 * 2730 * In the context of Dual-Pivot Quicksort, the pivot element 2731 * from the left part plays the role of sentinel, because it 2732 * is less than any elements from the given part. Therefore, 2733 * expensive check of the left range can be skipped on each 2734 * iteration unless it is the leftmost call. 2735 * 2736 * @param a the array to be sorted 2737 * @param low the index of the first element, inclusive, to be sorted 2738 * @param end the index of the last element for simple insertion sort 2739 * @param high the index of the last element, exclusive, to be sorted 2740 */ 2741 private static void mixedInsertionSort(float[] a, int low, int end, int high) { 2742 if (end == high) { 2743 2744 /* 2745 * Invoke simple insertion sort on tiny array. 2746 */ 2747 for (int i; ++low < end; ) { 2748 float ai = a[i = low]; 2749 2750 while (ai < a[--i]) { 2751 a[i + 1] = a[i]; 2752 } 2753 a[i + 1] = ai; 2754 } 2755 } else { 2756 2757 /* 2758 * Start with pin insertion sort on small part. 2759 * 2760 * Pin insertion sort is extended simple insertion sort. 2761 * The main idea of this sort is to put elements larger 2762 * than an element called pin to the end of array (the 2763 * proper area for such elements). It avoids expensive 2764 * movements of these elements through the whole array. 2765 */ 2766 float pin = a[end]; 2767 2768 for (int i, p = high; ++low < end; ) { 2769 float ai = a[i = low]; 2770 2771 if (ai < a[i - 1]) { // Small element 2772 2773 /* 2774 * Insert small element into sorted part. 2775 */ 2776 a[i] = a[--i]; 2777 2778 while (ai < a[--i]) { 2779 a[i + 1] = a[i]; 2780 } 2781 a[i + 1] = ai; 2782 2783 } else if (p > i && ai > pin) { // Large element 2784 2785 /* 2786 * Find element smaller than pin. 2787 */ 2788 while (a[--p] > pin); 2789 2790 /* 2791 * Swap it with large element. 2792 */ 2793 if (p > i) { 2794 ai = a[p]; 2795 a[p] = a[i]; 2796 } 2797 2798 /* 2799 * Insert small element into sorted part. 2800 */ 2801 while (ai < a[--i]) { 2802 a[i + 1] = a[i]; 2803 } 2804 a[i + 1] = ai; 2805 } 2806 } 2807 2808 /* 2809 * Continue with pair insertion sort on remain part. 2810 */ 2811 for (int i; low < high; ++low) { 2812 float a1 = a[i = low], a2 = a[++low]; 2813 2814 /* 2815 * Insert two elements per iteration: at first, insert the 2816 * larger element and then insert the smaller element, but 2817 * from the position where the larger element was inserted. 2818 */ 2819 if (a1 > a2) { 2820 2821 while (a1 < a[--i]) { 2822 a[i + 2] = a[i]; 2823 } 2824 a[++i + 1] = a1; 2825 2826 while (a2 < a[--i]) { 2827 a[i + 1] = a[i]; 2828 } 2829 a[i + 1] = a2; 2830 2831 } else if (a1 < a[i - 1]) { 2832 2833 while (a2 < a[--i]) { 2834 a[i + 2] = a[i]; 2835 } 2836 a[++i + 1] = a2; 2837 2838 while (a1 < a[--i]) { 2839 a[i + 1] = a[i]; 2840 } 2841 a[i + 1] = a1; 2842 } 2843 } 2844 } 2845 } 2846 2847 /** 2848 * Sorts the specified range of the array using insertion sort. 2849 * 2850 * @param a the array to be sorted 2851 * @param low the index of the first element, inclusive, to be sorted 2852 * @param high the index of the last element, exclusive, to be sorted 2853 */ 2854 private static void insertionSort(float[] a, int low, int high) { 2855 for (int i, k = low; ++k < high; ) { 2856 float ai = a[i = k]; 2857 2858 if (ai < a[i - 1]) { 2859 while (--i >= low && ai < a[i]) { 2860 a[i + 1] = a[i]; 2861 } 2862 a[i + 1] = ai; 2863 } 2864 } 2865 } 2866 2867 /** 2868 * Sorts the specified range of the array using heap sort. 2869 * 2870 * @param a the array to be sorted 2871 * @param low the index of the first element, inclusive, to be sorted 2872 * @param high the index of the last element, exclusive, to be sorted 2873 */ 2874 private static void heapSort(float[] a, int low, int high) { 2875 for (int k = (low + high) >>> 1; k > low; ) { 2876 pushDown(a, --k, a[k], low, high); 2877 } 2878 while (--high > low) { 2879 float max = a[low]; 2880 pushDown(a, low, a[high], low, high); 2881 a[high] = max; 2882 } 2883 } 2884 2885 /** 2886 * Pushes specified element down during heap sort. 2887 * 2888 * @param a the given array 2889 * @param p the start index 2890 * @param value the given element 2891 * @param low the index of the first element, inclusive, to be sorted 2892 * @param high the index of the last element, exclusive, to be sorted 2893 */ 2894 private static void pushDown(float[] a, int p, float value, int low, int high) { 2895 for (int k ;; a[p] = a[p = k]) { 2896 k = (p << 1) - low + 2; // Index of the right child 2897 2898 if (k > high) { 2899 break; 2900 } 2901 if (k == high || a[k] < a[k - 1]) { 2902 --k; 2903 } 2904 if (a[k] <= value) { 2905 break; 2906 } 2907 } 2908 a[p] = value; 2909 } 2910 2911 /** 2912 * Tries to sort the specified range of the array. 2913 * 2914 * @param sorter parallel context 2915 * @param a the array to be sorted 2916 * @param low the index of the first element to be sorted 2917 * @param size the array size 2918 * @return true if finally sorted, false otherwise 2919 */ 2920 private static boolean tryMergeRuns(Sorter sorter, float[] a, int low, int size) { 2921 2922 /* 2923 * The run array is constructed only if initial runs are 2924 * long enough to continue, run[i] then holds start index 2925 * of the i-th sequence of elements in non-descending order. 2926 */ 2927 int[] run = null; 2928 int high = low + size; 2929 int count = 1, last = low; 2930 2931 /* 2932 * Identify all possible runs. 2933 */ 2934 for (int k = low + 1; k < high; ) { 2935 2936 /* 2937 * Find the end index of the current run. 2938 */ 2939 if (a[k - 1] < a[k]) { 2940 2941 // Identify ascending sequence 2942 while (++k < high && a[k - 1] <= a[k]); 2943 2944 } else if (a[k - 1] > a[k]) { 2945 2946 // Identify descending sequence 2947 while (++k < high && a[k - 1] >= a[k]); 2948 2949 // Reverse into ascending order 2950 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 2951 float ai = a[i]; a[i] = a[j]; a[j] = ai; 2952 } 2953 } else { // Identify constant sequence 2954 for (float ak = a[k]; ++k < high && ak == a[k]; ); 2955 2956 if (k < high) { 2957 continue; 2958 } 2959 } 2960 2961 /* 2962 * Check special cases. 2963 */ 2964 if (run == null) { 2965 if (k == high) { 2966 2967 /* 2968 * The array is monotonous sequence, 2969 * and therefore already sorted. 2970 */ 2971 return true; 2972 } 2973 2974 if (k - low < MIN_FIRST_RUN_SIZE) { 2975 2976 /* 2977 * The first run is too small 2978 * to proceed with scanning. 2979 */ 2980 return false; 2981 } 2982 2983 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 2984 run[0] = low; 2985 2986 } else if (a[last - 1] > a[last]) { 2987 2988 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 2989 2990 /* 2991 * The first runs are not long 2992 * enough to continue scanning. 2993 */ 2994 return false; 2995 } 2996 2997 if (++count == MAX_RUN_CAPACITY) { 2998 2999 /* 3000 * Array is not highly structured. 3001 */ 3002 return false; 3003 } 3004 3005 if (count == run.length) { 3006 3007 /* 3008 * Increase capacity of index array. 3009 */ 3010 run = Arrays.copyOf(run, count << 1); 3011 } 3012 } 3013 run[count] = (last = k); 3014 } 3015 3016 /* 3017 * Merge runs of highly structured array. 3018 */ 3019 if (count > 1) { 3020 float[] b; int offset = low; 3021 3022 if (sorter == null || (b = (float[]) sorter.b) == null) { 3023 b = new float[size]; 3024 } else { 3025 offset = sorter.offset; 3026 } 3027 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 3028 } 3029 return true; 3030 } 3031 3032 /** 3033 * Merges the specified runs. 3034 * 3035 * @param a the source array 3036 * @param b the temporary buffer used in merging 3037 * @param offset the start index in the source, inclusive 3038 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 3039 * @param parallel indicates whether merging is performed in parallel 3040 * @param run the start indexes of the runs, inclusive 3041 * @param lo the start index of the first run, inclusive 3042 * @param hi the start index of the last run, inclusive 3043 * @return the destination where runs are merged 3044 */ 3045 private static float[] mergeRuns(float[] a, float[] b, int offset, 3046 int aim, boolean parallel, int[] run, int lo, int hi) { 3047 3048 if (hi - lo == 1) { 3049 if (aim >= 0) { 3050 return a; 3051 } 3052 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 3053 b[--j] = a[--i] 3054 ); 3055 return b; 3056 } 3057 3058 /* 3059 * Split into approximately equal parts. 3060 */ 3061 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 3062 while (run[++mi + 1] <= rmi); 3063 3064 /* 3065 * Merge the left and right parts. 3066 */ 3067 float[] a1, a2; 3068 3069 if (parallel && hi - lo > MIN_RUN_COUNT) { 3070 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 3071 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 3072 a2 = (float[]) merger.getDestination(); 3073 } else { 3074 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 3075 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 3076 } 3077 3078 float[] dst = a1 == a ? b : a; 3079 3080 int k = a1 == a ? run[lo] - offset : run[lo]; 3081 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 3082 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 3083 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 3084 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 3085 3086 if (parallel) { 3087 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 3088 } else { 3089 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 3090 } 3091 return dst; 3092 } 3093 3094 /** 3095 * Merges the sorted parts. 3096 * 3097 * @param merger parallel context 3098 * @param dst the destination where parts are merged 3099 * @param k the start index of the destination, inclusive 3100 * @param a1 the first part 3101 * @param lo1 the start index of the first part, inclusive 3102 * @param hi1 the end index of the first part, exclusive 3103 * @param a2 the second part 3104 * @param lo2 the start index of the second part, inclusive 3105 * @param hi2 the end index of the second part, exclusive 3106 */ 3107 private static void mergeParts(Merger merger, float[] dst, int k, 3108 float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2) { 3109 3110 if (merger != null && a1 == a2) { 3111 3112 while (true) { 3113 3114 /* 3115 * The first part must be larger. 3116 */ 3117 if (hi1 - lo1 < hi2 - lo2) { 3118 int lo = lo1; lo1 = lo2; lo2 = lo; 3119 int hi = hi1; hi1 = hi2; hi2 = hi; 3120 } 3121 3122 /* 3123 * Small parts will be merged sequentially. 3124 */ 3125 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 3126 break; 3127 } 3128 3129 /* 3130 * Find the median of the larger part. 3131 */ 3132 int mi1 = (lo1 + hi1) >>> 1; 3133 float key = a1[mi1]; 3134 int mi2 = hi2; 3135 3136 /* 3137 * Partition the smaller part. 3138 */ 3139 for (int loo = lo2; loo < mi2; ) { 3140 int t = (loo + mi2) >>> 1; 3141 3142 if (key > a2[t]) { 3143 loo = t + 1; 3144 } else { 3145 mi2 = t; 3146 } 3147 } 3148 3149 int d = mi2 - lo2 + mi1 - lo1; 3150 3151 /* 3152 * Merge the right sub-parts in parallel. 3153 */ 3154 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 3155 3156 /* 3157 * Process the sub-left parts. 3158 */ 3159 hi1 = mi1; 3160 hi2 = mi2; 3161 } 3162 } 3163 3164 /* 3165 * Merge small parts sequentially. 3166 */ 3167 while (lo1 < hi1 && lo2 < hi2) { 3168 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 3169 } 3170 if (dst != a1 || k < lo1) { 3171 while (lo1 < hi1) { 3172 dst[k++] = a1[lo1++]; 3173 } 3174 } 3175 if (dst != a2 || k < lo2) { 3176 while (lo2 < hi2) { 3177 dst[k++] = a2[lo2++]; 3178 } 3179 } 3180 } 3181 3182 // [double] 3183 3184 /** 3185 * Sorts the specified range of the array using parallel merge 3186 * sort and/or Dual-Pivot Quicksort. 3187 * 3188 * To balance the faster splitting and parallelism of merge sort 3189 * with the faster element partitioning of Quicksort, ranges are 3190 * subdivided in tiers such that, if there is enough parallelism, 3191 * the four-way parallel merge is started, still ensuring enough 3192 * parallelism to process the partitions. 3193 * 3194 * @param a the array to be sorted 3195 * @param parallelism the parallelism level 3196 * @param low the index of the first element, inclusive, to be sorted 3197 * @param high the index of the last element, exclusive, to be sorted 3198 */ 3199 static void sort(double[] a, int parallelism, int low, int high) { 3200 /* 3201 * Phase 1. Count the number of negative zero -0.0d, 3202 * turn them into positive zero, and move all NaNs 3203 * to the end of the array. 3204 */ 3205 int numNegativeZero = 0; 3206 3207 for (int k = high; k > low; ) { 3208 double ak = a[--k]; 3209 3210 if (ak == 0.0d && Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d 3211 numNegativeZero += 1; 3212 a[k] = 0.0d; 3213 } else if (ak != ak) { // ak is NaN 3214 a[k] = a[--high]; 3215 a[high] = ak; 3216 } 3217 } 3218 3219 /* 3220 * Phase 2. Sort everything except NaNs, 3221 * which are already in place. 3222 */ 3223 int size = high - low; 3224 3225 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { 3226 int depth = getDepth(parallelism, size >> 12); 3227 double[] b = depth == 0 ? null : new double[size]; 3228 new Sorter(null, a, b, low, size, low, depth).invoke(); 3229 } else { 3230 sort(null, a, 0, low, high); 3231 } 3232 3233 /* 3234 * Phase 3. Turn positive zero 0.0d 3235 * back into negative zero -0.0d. 3236 */ 3237 if (++numNegativeZero == 1) { 3238 return; 3239 } 3240 3241 /* 3242 * Find the position one less than 3243 * the index of the first zero. 3244 */ 3245 while (low <= high) { 3246 int middle = (low + high) >>> 1; 3247 3248 if (a[middle] < 0) { 3249 low = middle + 1; 3250 } else { 3251 high = middle - 1; 3252 } 3253 } 3254 3255 /* 3256 * Replace the required number of 0.0d by -0.0d. 3257 */ 3258 while (--numNegativeZero > 0) { 3259 a[++high] = -0.0d; 3260 } 3261 } 3262 3263 /** 3264 * Sorts the specified array using the Dual-Pivot Quicksort and/or 3265 * other sorts in special-cases, possibly with parallel partitions. 3266 * 3267 * @param sorter parallel context 3268 * @param a the array to be sorted 3269 * @param bits the combination of recursion depth and bit flag, where 3270 * the right bit "0" indicates that array is the leftmost part 3271 * @param low the index of the first element, inclusive, to be sorted 3272 * @param high the index of the last element, exclusive, to be sorted 3273 */ 3274 static void sort(Sorter sorter, double[] a, int bits, int low, int high) { 3275 while (true) { 3276 int end = high - 1, size = high - low; 3277 3278 /* 3279 * Run mixed insertion sort on small non-leftmost parts. 3280 */ 3281 if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { 3282 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); 3283 return; 3284 } 3285 3286 /* 3287 * Invoke insertion sort on small leftmost part. 3288 */ 3289 if (size < MAX_INSERTION_SORT_SIZE) { 3290 insertionSort(a, low, high); 3291 return; 3292 } 3293 3294 /* 3295 * Check if the whole array or large non-leftmost 3296 * parts are nearly sorted and then merge runs. 3297 */ 3298 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) 3299 && tryMergeRuns(sorter, a, low, size)) { 3300 return; 3301 } 3302 3303 /* 3304 * Switch to heap sort if execution 3305 * time is becoming quadratic. 3306 */ 3307 if ((bits += 2) > MAX_RECURSION_DEPTH) { 3308 heapSort(a, low, high); 3309 return; 3310 } 3311 3312 /* 3313 * Use an inexpensive approximation of the golden ratio 3314 * to select five sample elements and determine pivots. 3315 */ 3316 int step = (size >> 3) * 3 + 3; 3317 3318 /* 3319 * Five elements around (and including) the central element 3320 * will be used for pivot selection as described below. The 3321 * unequal choice of spacing these elements was empirically 3322 * determined to work well on a wide variety of inputs. 3323 */ 3324 int e1 = low + step; 3325 int e5 = end - step; 3326 int e3 = (e1 + e5) >>> 1; 3327 int e2 = (e1 + e3) >>> 1; 3328 int e4 = (e3 + e5) >>> 1; 3329 double a3 = a[e3]; 3330 3331 /* 3332 * Sort these elements in place by the combination 3333 * of 4-element sorting network and insertion sort. 3334 * 3335 * 5 ------o-----------o------------ 3336 * | | 3337 * 4 ------|-----o-----o-----o------ 3338 * | | | 3339 * 2 ------o-----|-----o-----o------ 3340 * | | 3341 * 1 ------------o-----o------------ 3342 */ 3343 if (a[e5] < a[e2]) { double t = a[e5]; a[e5] = a[e2]; a[e2] = t; } 3344 if (a[e4] < a[e1]) { double t = a[e4]; a[e4] = a[e1]; a[e1] = t; } 3345 if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; } 3346 if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; } 3347 if (a[e4] < a[e2]) { double t = a[e4]; a[e4] = a[e2]; a[e2] = t; } 3348 3349 if (a3 < a[e2]) { 3350 if (a3 < a[e1]) { 3351 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; 3352 } else { 3353 a[e3] = a[e2]; a[e2] = a3; 3354 } 3355 } else if (a3 > a[e4]) { 3356 if (a3 > a[e5]) { 3357 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; 3358 } else { 3359 a[e3] = a[e4]; a[e4] = a3; 3360 } 3361 } 3362 3363 // Pointers 3364 int lower = low; // The index of the last element of the left part 3365 int upper = end; // The index of the first element of the right part 3366 3367 /* 3368 * Partitioning with 2 pivots in case of different elements. 3369 */ 3370 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { 3371 3372 /* 3373 * Use the first and fifth of the five sorted elements as 3374 * the pivots. These values are inexpensive approximation 3375 * of tertiles. Note, that pivot1 < pivot2. 3376 */ 3377 double pivot1 = a[e1]; 3378 double pivot2 = a[e5]; 3379 3380 /* 3381 * The first and the last elements to be sorted are moved 3382 * to the locations formerly occupied by the pivots. When 3383 * partitioning is completed, the pivots are swapped back 3384 * into their final positions, and excluded from the next 3385 * subsequent sorting. 3386 */ 3387 a[e1] = a[lower]; 3388 a[e5] = a[upper]; 3389 3390 /* 3391 * Skip elements, which are less or greater than the pivots. 3392 */ 3393 while (a[++lower] < pivot1); 3394 while (a[--upper] > pivot2); 3395 3396 /* 3397 * Backward 3-interval partitioning 3398 * 3399 * left part central part right part 3400 * +------------------------------------------------------------+ 3401 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | 3402 * +------------------------------------------------------------+ 3403 * ^ ^ ^ 3404 * | | | 3405 * lower k upper 3406 * 3407 * Invariants: 3408 * 3409 * all in (low, lower] < pivot1 3410 * pivot1 <= all in (k, upper) <= pivot2 3411 * all in [upper, end) > pivot2 3412 * 3413 * Pointer k is the last index of ?-part 3414 */ 3415 for (int unused = --lower, k = ++upper; --k > lower; ) { 3416 double ak = a[k]; 3417 3418 if (ak < pivot1) { // Move a[k] to the left side 3419 while (lower < k) { 3420 if (a[++lower] >= pivot1) { 3421 if (a[lower] > pivot2) { 3422 a[k] = a[--upper]; 3423 a[upper] = a[lower]; 3424 } else { 3425 a[k] = a[lower]; 3426 } 3427 a[lower] = ak; 3428 break; 3429 } 3430 } 3431 } else if (ak > pivot2) { // Move a[k] to the right side 3432 a[k] = a[--upper]; 3433 a[upper] = ak; 3434 } 3435 } 3436 3437 /* 3438 * Swap the pivots into their final positions. 3439 */ 3440 a[low] = a[lower]; a[lower] = pivot1; 3441 a[end] = a[upper]; a[upper] = pivot2; 3442 3443 /* 3444 * Sort non-left parts recursively (possibly in parallel), 3445 * excluding known pivots. 3446 */ 3447 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 3448 sorter.forkSorter(bits | 1, lower + 1, upper); 3449 sorter.forkSorter(bits | 1, upper + 1, high); 3450 } else { 3451 sort(sorter, a, bits | 1, lower + 1, upper); 3452 sort(sorter, a, bits | 1, upper + 1, high); 3453 } 3454 3455 } else { // Use single pivot in case of many equal elements 3456 3457 /* 3458 * Use the third of the five sorted elements as the pivot. 3459 * This value is inexpensive approximation of the median. 3460 */ 3461 double pivot = a[e3]; 3462 3463 /* 3464 * The first element to be sorted is moved to the 3465 * location formerly occupied by the pivot. After 3466 * completion of partitioning the pivot is swapped 3467 * back into its final position, and excluded from 3468 * the next subsequent sorting. 3469 */ 3470 a[e3] = a[lower]; 3471 3472 /* 3473 * Traditional 3-way (Dutch National Flag) partitioning 3474 * 3475 * left part central part right part 3476 * +------------------------------------------------------+ 3477 * | < pivot | ? | == pivot | > pivot | 3478 * +------------------------------------------------------+ 3479 * ^ ^ ^ 3480 * | | | 3481 * lower k upper 3482 * 3483 * Invariants: 3484 * 3485 * all in (low, lower] < pivot 3486 * all in (k, upper) == pivot 3487 * all in [upper, end] > pivot 3488 * 3489 * Pointer k is the last index of ?-part 3490 */ 3491 for (int k = ++upper; --k > lower; ) { 3492 double ak = a[k]; 3493 3494 if (ak != pivot) { 3495 a[k] = pivot; 3496 3497 if (ak < pivot) { // Move a[k] to the left side 3498 while (a[++lower] < pivot); 3499 3500 if (a[lower] > pivot) { 3501 a[--upper] = a[lower]; 3502 } 3503 a[lower] = ak; 3504 } else { // ak > pivot - Move a[k] to the right side 3505 a[--upper] = ak; 3506 } 3507 } 3508 } 3509 3510 /* 3511 * Swap the pivot into its final position. 3512 */ 3513 a[low] = a[lower]; a[lower] = pivot; 3514 3515 /* 3516 * Sort the right part (possibly in parallel), excluding 3517 * known pivot. All elements from the central part are 3518 * equal and therefore already sorted. 3519 */ 3520 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { 3521 sorter.forkSorter(bits | 1, upper, high); 3522 } else { 3523 sort(sorter, a, bits | 1, upper, high); 3524 } 3525 } 3526 high = lower; // Iterate along the left part 3527 } 3528 } 3529 3530 /** 3531 * Sorts the specified range of the array using mixed insertion sort. 3532 * 3533 * Mixed insertion sort is combination of simple insertion sort, 3534 * pin insertion sort and pair insertion sort. 3535 * 3536 * In the context of Dual-Pivot Quicksort, the pivot element 3537 * from the left part plays the role of sentinel, because it 3538 * is less than any elements from the given part. Therefore, 3539 * expensive check of the left range can be skipped on each 3540 * iteration unless it is the leftmost call. 3541 * 3542 * @param a the array to be sorted 3543 * @param low the index of the first element, inclusive, to be sorted 3544 * @param end the index of the last element for simple insertion sort 3545 * @param high the index of the last element, exclusive, to be sorted 3546 */ 3547 private static void mixedInsertionSort(double[] a, int low, int end, int high) { 3548 if (end == high) { 3549 3550 /* 3551 * Invoke simple insertion sort on tiny array. 3552 */ 3553 for (int i; ++low < end; ) { 3554 double ai = a[i = low]; 3555 3556 while (ai < a[--i]) { 3557 a[i + 1] = a[i]; 3558 } 3559 a[i + 1] = ai; 3560 } 3561 } else { 3562 3563 /* 3564 * Start with pin insertion sort on small part. 3565 * 3566 * Pin insertion sort is extended simple insertion sort. 3567 * The main idea of this sort is to put elements larger 3568 * than an element called pin to the end of array (the 3569 * proper area for such elements). It avoids expensive 3570 * movements of these elements through the whole array. 3571 */ 3572 double pin = a[end]; 3573 3574 for (int i, p = high; ++low < end; ) { 3575 double ai = a[i = low]; 3576 3577 if (ai < a[i - 1]) { // Small element 3578 3579 /* 3580 * Insert small element into sorted part. 3581 */ 3582 a[i] = a[--i]; 3583 3584 while (ai < a[--i]) { 3585 a[i + 1] = a[i]; 3586 } 3587 a[i + 1] = ai; 3588 3589 } else if (p > i && ai > pin) { // Large element 3590 3591 /* 3592 * Find element smaller than pin. 3593 */ 3594 while (a[--p] > pin); 3595 3596 /* 3597 * Swap it with large element. 3598 */ 3599 if (p > i) { 3600 ai = a[p]; 3601 a[p] = a[i]; 3602 } 3603 3604 /* 3605 * Insert small element into sorted part. 3606 */ 3607 while (ai < a[--i]) { 3608 a[i + 1] = a[i]; 3609 } 3610 a[i + 1] = ai; 3611 } 3612 } 3613 3614 /* 3615 * Continue with pair insertion sort on remain part. 3616 */ 3617 for (int i; low < high; ++low) { 3618 double a1 = a[i = low], a2 = a[++low]; 3619 3620 /* 3621 * Insert two elements per iteration: at first, insert the 3622 * larger element and then insert the smaller element, but 3623 * from the position where the larger element was inserted. 3624 */ 3625 if (a1 > a2) { 3626 3627 while (a1 < a[--i]) { 3628 a[i + 2] = a[i]; 3629 } 3630 a[++i + 1] = a1; 3631 3632 while (a2 < a[--i]) { 3633 a[i + 1] = a[i]; 3634 } 3635 a[i + 1] = a2; 3636 3637 } else if (a1 < a[i - 1]) { 3638 3639 while (a2 < a[--i]) { 3640 a[i + 2] = a[i]; 3641 } 3642 a[++i + 1] = a2; 3643 3644 while (a1 < a[--i]) { 3645 a[i + 1] = a[i]; 3646 } 3647 a[i + 1] = a1; 3648 } 3649 } 3650 } 3651 } 3652 3653 /** 3654 * Sorts the specified range of the array using insertion sort. 3655 * 3656 * @param a the array to be sorted 3657 * @param low the index of the first element, inclusive, to be sorted 3658 * @param high the index of the last element, exclusive, to be sorted 3659 */ 3660 private static void insertionSort(double[] a, int low, int high) { 3661 for (int i, k = low; ++k < high; ) { 3662 double ai = a[i = k]; 3663 3664 if (ai < a[i - 1]) { 3665 while (--i >= low && ai < a[i]) { 3666 a[i + 1] = a[i]; 3667 } 3668 a[i + 1] = ai; 3669 } 3670 } 3671 } 3672 3673 /** 3674 * Sorts the specified range of the array using heap sort. 3675 * 3676 * @param a the array to be sorted 3677 * @param low the index of the first element, inclusive, to be sorted 3678 * @param high the index of the last element, exclusive, to be sorted 3679 */ 3680 private static void heapSort(double[] a, int low, int high) { 3681 for (int k = (low + high) >>> 1; k > low; ) { 3682 pushDown(a, --k, a[k], low, high); 3683 } 3684 while (--high > low) { 3685 double max = a[low]; 3686 pushDown(a, low, a[high], low, high); 3687 a[high] = max; 3688 } 3689 } 3690 3691 /** 3692 * Pushes specified element down during heap sort. 3693 * 3694 * @param a the given array 3695 * @param p the start index 3696 * @param value the given element 3697 * @param low the index of the first element, inclusive, to be sorted 3698 * @param high the index of the last element, exclusive, to be sorted 3699 */ 3700 private static void pushDown(double[] a, int p, double value, int low, int high) { 3701 for (int k ;; a[p] = a[p = k]) { 3702 k = (p << 1) - low + 2; // Index of the right child 3703 3704 if (k > high) { 3705 break; 3706 } 3707 if (k == high || a[k] < a[k - 1]) { 3708 --k; 3709 } 3710 if (a[k] <= value) { 3711 break; 3712 } 3713 } 3714 a[p] = value; 3715 } 3716 3717 /** 3718 * Tries to sort the specified range of the array. 3719 * 3720 * @param sorter parallel context 3721 * @param a the array to be sorted 3722 * @param low the index of the first element to be sorted 3723 * @param size the array size 3724 * @return true if finally sorted, false otherwise 3725 */ 3726 private static boolean tryMergeRuns(Sorter sorter, double[] a, int low, int size) { 3727 3728 /* 3729 * The run array is constructed only if initial runs are 3730 * long enough to continue, run[i] then holds start index 3731 * of the i-th sequence of elements in non-descending order. 3732 */ 3733 int[] run = null; 3734 int high = low + size; 3735 int count = 1, last = low; 3736 3737 /* 3738 * Identify all possible runs. 3739 */ 3740 for (int k = low + 1; k < high; ) { 3741 3742 /* 3743 * Find the end index of the current run. 3744 */ 3745 if (a[k - 1] < a[k]) { 3746 3747 // Identify ascending sequence 3748 while (++k < high && a[k - 1] <= a[k]); 3749 3750 } else if (a[k - 1] > a[k]) { 3751 3752 // Identify descending sequence 3753 while (++k < high && a[k - 1] >= a[k]); 3754 3755 // Reverse into ascending order 3756 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { 3757 double ai = a[i]; a[i] = a[j]; a[j] = ai; 3758 } 3759 } else { // Identify constant sequence 3760 for (double ak = a[k]; ++k < high && ak == a[k]; ); 3761 3762 if (k < high) { 3763 continue; 3764 } 3765 } 3766 3767 /* 3768 * Check special cases. 3769 */ 3770 if (run == null) { 3771 if (k == high) { 3772 3773 /* 3774 * The array is monotonous sequence, 3775 * and therefore already sorted. 3776 */ 3777 return true; 3778 } 3779 3780 if (k - low < MIN_FIRST_RUN_SIZE) { 3781 3782 /* 3783 * The first run is too small 3784 * to proceed with scanning. 3785 */ 3786 return false; 3787 } 3788 3789 run = new int[((size >> 10) | 0x7F) & 0x3FF]; 3790 run[0] = low; 3791 3792 } else if (a[last - 1] > a[last]) { 3793 3794 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { 3795 3796 /* 3797 * The first runs are not long 3798 * enough to continue scanning. 3799 */ 3800 return false; 3801 } 3802 3803 if (++count == MAX_RUN_CAPACITY) { 3804 3805 /* 3806 * Array is not highly structured. 3807 */ 3808 return false; 3809 } 3810 3811 if (count == run.length) { 3812 3813 /* 3814 * Increase capacity of index array. 3815 */ 3816 run = Arrays.copyOf(run, count << 1); 3817 } 3818 } 3819 run[count] = (last = k); 3820 } 3821 3822 /* 3823 * Merge runs of highly structured array. 3824 */ 3825 if (count > 1) { 3826 double[] b; int offset = low; 3827 3828 if (sorter == null || (b = (double[]) sorter.b) == null) { 3829 b = new double[size]; 3830 } else { 3831 offset = sorter.offset; 3832 } 3833 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); 3834 } 3835 return true; 3836 } 3837 3838 /** 3839 * Merges the specified runs. 3840 * 3841 * @param a the source array 3842 * @param b the temporary buffer used in merging 3843 * @param offset the start index in the source, inclusive 3844 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) 3845 * @param parallel indicates whether merging is performed in parallel 3846 * @param run the start indexes of the runs, inclusive 3847 * @param lo the start index of the first run, inclusive 3848 * @param hi the start index of the last run, inclusive 3849 * @return the destination where runs are merged 3850 */ 3851 private static double[] mergeRuns(double[] a, double[] b, int offset, 3852 int aim, boolean parallel, int[] run, int lo, int hi) { 3853 3854 if (hi - lo == 1) { 3855 if (aim >= 0) { 3856 return a; 3857 } 3858 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; 3859 b[--j] = a[--i] 3860 ); 3861 return b; 3862 } 3863 3864 /* 3865 * Split into approximately equal parts. 3866 */ 3867 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; 3868 while (run[++mi + 1] <= rmi); 3869 3870 /* 3871 * Merge the left and right parts. 3872 */ 3873 double[] a1, a2; 3874 3875 if (parallel && hi - lo > MIN_RUN_COUNT) { 3876 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); 3877 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); 3878 a2 = (double[]) merger.getDestination(); 3879 } else { 3880 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); 3881 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); 3882 } 3883 3884 double[] dst = a1 == a ? b : a; 3885 3886 int k = a1 == a ? run[lo] - offset : run[lo]; 3887 int lo1 = a1 == b ? run[lo] - offset : run[lo]; 3888 int hi1 = a1 == b ? run[mi] - offset : run[mi]; 3889 int lo2 = a2 == b ? run[mi] - offset : run[mi]; 3890 int hi2 = a2 == b ? run[hi] - offset : run[hi]; 3891 3892 if (parallel) { 3893 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); 3894 } else { 3895 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); 3896 } 3897 return dst; 3898 } 3899 3900 /** 3901 * Merges the sorted parts. 3902 * 3903 * @param merger parallel context 3904 * @param dst the destination where parts are merged 3905 * @param k the start index of the destination, inclusive 3906 * @param a1 the first part 3907 * @param lo1 the start index of the first part, inclusive 3908 * @param hi1 the end index of the first part, exclusive 3909 * @param a2 the second part 3910 * @param lo2 the start index of the second part, inclusive 3911 * @param hi2 the end index of the second part, exclusive 3912 */ 3913 private static void mergeParts(Merger merger, double[] dst, int k, 3914 double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2) { 3915 3916 if (merger != null && a1 == a2) { 3917 3918 while (true) { 3919 3920 /* 3921 * The first part must be larger. 3922 */ 3923 if (hi1 - lo1 < hi2 - lo2) { 3924 int lo = lo1; lo1 = lo2; lo2 = lo; 3925 int hi = hi1; hi1 = hi2; hi2 = hi; 3926 } 3927 3928 /* 3929 * Small parts will be merged sequentially. 3930 */ 3931 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { 3932 break; 3933 } 3934 3935 /* 3936 * Find the median of the larger part. 3937 */ 3938 int mi1 = (lo1 + hi1) >>> 1; 3939 double key = a1[mi1]; 3940 int mi2 = hi2; 3941 3942 /* 3943 * Partition the smaller part. 3944 */ 3945 for (int loo = lo2; loo < mi2; ) { 3946 int t = (loo + mi2) >>> 1; 3947 3948 if (key > a2[t]) { 3949 loo = t + 1; 3950 } else { 3951 mi2 = t; 3952 } 3953 } 3954 3955 int d = mi2 - lo2 + mi1 - lo1; 3956 3957 /* 3958 * Merge the right sub-parts in parallel. 3959 */ 3960 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); 3961 3962 /* 3963 * Process the sub-left parts. 3964 */ 3965 hi1 = mi1; 3966 hi2 = mi2; 3967 } 3968 } 3969 3970 /* 3971 * Merge small parts sequentially. 3972 */ 3973 while (lo1 < hi1 && lo2 < hi2) { 3974 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; 3975 } 3976 if (dst != a1 || k < lo1) { 3977 while (lo1 < hi1) { 3978 dst[k++] = a1[lo1++]; 3979 } 3980 } 3981 if (dst != a2 || k < lo2) { 3982 while (lo2 < hi2) { 3983 dst[k++] = a2[lo2++]; 3984 } 3985 } 3986 } 3987 3988 // [class] 3989 3990 /** 3991 * This class implements parallel sorting. 3992 */ 3993 private static final class Sorter extends CountedCompleter<Void> { 3994 private static final long serialVersionUID = 20180818L; 3995 private final Object a, b; 3996 private final int low, size, offset, depth; 3997 3998 private Sorter(CountedCompleter<?> parent, 3999 Object a, Object b, int low, int size, int offset, int depth) { 4000 super(parent); 4001 this.a = a; 4002 this.b = b; 4003 this.low = low; 4004 this.size = size; 4005 this.offset = offset; 4006 this.depth = depth; 4007 } 4008 4009 @Override 4010 public final void compute() { 4011 if (depth < 0) { 4012 setPendingCount(2); 4013 int half = size >> 1; 4014 new Sorter(this, b, a, low, half, offset, depth + 1).fork(); 4015 new Sorter(this, b, a, low + half, size - half, offset, depth + 1).compute(); 4016 } else { 4017 if (a instanceof int[]) { 4018 sort(this, (int[]) a, depth, low, low + size); 4019 } else if (a instanceof long[]) { 4020 sort(this, (long[]) a, depth, low, low + size); 4021 } else if (a instanceof float[]) { 4022 sort(this, (float[]) a, depth, low, low + size); 4023 } else if (a instanceof double[]) { 4024 sort(this, (double[]) a, depth, low, low + size); 4025 } else { 4026 throw new IllegalArgumentException( 4027 "Unknown type of array: " + a.getClass().getName()); 4028 } 4029 } 4030 tryComplete(); 4031 } 4032 4033 @Override 4034 public final void onCompletion(CountedCompleter<?> caller) { 4035 if (depth < 0) { 4036 int mi = low + (size >> 1); 4037 boolean src = (depth & 1) == 0; 4038 4039 new Merger(null, 4040 a, 4041 src ? low : low - offset, 4042 b, 4043 src ? low - offset : low, 4044 src ? mi - offset : mi, 4045 b, 4046 src ? mi - offset : mi, 4047 src ? low + size - offset : low + size 4048 ).invoke(); 4049 } 4050 } 4051 4052 private void forkSorter(int depth, int low, int high) { 4053 addToPendingCount(1); 4054 Object a = this.a; // Use local variable for performance 4055 new Sorter(this, a, b, low, high - low, offset, depth).fork(); 4056 } 4057 } 4058 4059 /** 4060 * This class implements parallel merging. 4061 */ 4062 private static final class Merger extends CountedCompleter<Void> { 4063 private static final long serialVersionUID = 20180818L; 4064 private final Object dst, a1, a2; 4065 private final int k, lo1, hi1, lo2, hi2; 4066 4067 private Merger(CountedCompleter<?> parent, Object dst, int k, 4068 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { 4069 super(parent); 4070 this.dst = dst; 4071 this.k = k; 4072 this.a1 = a1; 4073 this.lo1 = lo1; 4074 this.hi1 = hi1; 4075 this.a2 = a2; 4076 this.lo2 = lo2; 4077 this.hi2 = hi2; 4078 } 4079 4080 @Override 4081 public final void compute() { 4082 if (dst instanceof int[]) { 4083 mergeParts(this, (int[]) dst, k, 4084 (int[]) a1, lo1, hi1, (int[]) a2, lo2, hi2); 4085 } else if (dst instanceof long[]) { 4086 mergeParts(this, (long[]) dst, k, 4087 (long[]) a1, lo1, hi1, (long[]) a2, lo2, hi2); 4088 } else if (dst instanceof float[]) { 4089 mergeParts(this, (float[]) dst, k, 4090 (float[]) a1, lo1, hi1, (float[]) a2, lo2, hi2); 4091 } else if (dst instanceof double[]) { 4092 mergeParts(this, (double[]) dst, k, 4093 (double[]) a1, lo1, hi1, (double[]) a2, lo2, hi2); 4094 } else { 4095 throw new IllegalArgumentException( 4096 "Unknown type of array: " + dst.getClass().getName()); 4097 } 4098 propagateCompletion(); 4099 } 4100 4101 private void forkMerger(Object dst, int k, 4102 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { 4103 addToPendingCount(1); 4104 new Merger(this, dst, k, a1, lo1, hi1, a2, lo2, hi2).fork(); 4105 } 4106 } 4107 4108 /** 4109 * This class implements parallel merging of runs. 4110 */ 4111 private static final class RunMerger extends RecursiveTask<Object> { 4112 private static final long serialVersionUID = 20180818L; 4113 private final Object a, b; 4114 private final int[] run; 4115 private final int offset, aim, lo, hi; 4116 4117 private RunMerger(Object a, Object b, int offset, 4118 int aim, int[] run, int lo, int hi) { 4119 this.a = a; 4120 this.b = b; 4121 this.offset = offset; 4122 this.aim = aim; 4123 this.run = run; 4124 this.lo = lo; 4125 this.hi = hi; 4126 } 4127 4128 @Override 4129 protected final Object compute() { 4130 if (a instanceof int[]) { 4131 return mergeRuns((int[]) a, (int[]) b, offset, aim, true, run, lo, hi); 4132 } 4133 if (a instanceof long[]) { 4134 return mergeRuns((long[]) a, (long[]) b, offset, aim, true, run, lo, hi); 4135 } 4136 if (a instanceof float[]) { 4137 return mergeRuns((float[]) a, (float[]) b, offset, aim, true, run, lo, hi); 4138 } 4139 if (a instanceof double[]) { 4140 return mergeRuns((double[]) a, (double[]) b, offset, aim, true, run, lo, hi); 4141 } 4142 throw new IllegalArgumentException( 4143 "Unknown type of array: " + a.getClass().getName()); 4144 } 4145 4146 private RunMerger forkMe() { 4147 fork(); 4148 return this; 4149 } 4150 4151 private Object getDestination() { 4152 join(); 4153 return getRawResult(); 4154 } 4155 } 4156 }