/* * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package java.util; import java.util.concurrent.CountedCompleter; import java.util.concurrent.RecursiveTask; /** * This class implements powerful and fully optimized versions, both * sequential and parallel, of the Dual-Pivot Quicksort algorithm by * Vladimir Yaroslavskiy, Jon Bentley and Josh Bloch. This algorithm * offers O(n log(n)) performance on all data sets, and is typically * faster than traditional (one-pivot) Quicksort implementations. * * There are also additional algorithms, invoked from the Dual-Pivot * Quicksort, such as mixed insertion sort, merging of runs and heap * sort, counting sort and parallel merge sort. * * @author Vladimir Yaroslavskiy * @author Jon Bentley * @author Josh Bloch * @author Doug Lea * * @version 2018.08.18 * * @since 1.7 * 14 */ final class DualPivotQuicksort { /** * Prevents instantiation. */ private DualPivotQuicksort() {} /** * Max array size to use mixed insertion sort. */ private static final int MAX_MIXED_INSERTION_SORT_SIZE = 114; /** * Max array size to use insertion sort. */ private static final int MAX_INSERTION_SORT_SIZE = 41; /** * Min array size to perform sorting in parallel. */ private static final int MIN_PARALLEL_SORT_SIZE = 4 << 10; /** * Min array size to try merging of runs. */ private static final int MIN_TRY_MERGE_SIZE = 4 << 10; /** * Min size of the first run to continue with scanning. */ private static final int MIN_FIRST_RUN_SIZE = 16; /** * Min factor for the first runs to continue scanning. */ private static final int MIN_FIRST_RUNS_FACTOR = 7; /** * Max capacity of the index array for tracking runs. */ private static final int MAX_RUN_CAPACITY = 5 << 10; /** * Min number of runs, required by parallel merging. */ private static final int MIN_RUN_COUNT = 4; /** * Min array size to use parallel merging of parts. */ private static final int MIN_PARALLEL_MERGE_PARTS_SIZE = 4 << 10; /** * Min size of a byte array to use counting sort. */ private static final int MIN_BYTE_COUNTING_SORT_SIZE = 64; /** * Min size of a short or char array to use counting sort. */ private static final int MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE = 1750; /** * Max double recursive partitioning depth before using heap sort. */ private static final int MAX_RECURSION_DEPTH = 64 << 1; /** * Calculates the double depth of parallel merging. * Depth is negative, if tasks split before sorting. * * @param parallelism the parallelism level * @param size the target size * @return the depth of parallel merging */ private static int getDepth(int parallelism, int size) { int depth = 0; while ((parallelism >>= 3) > 0 && (size >>= 2) > 0) { depth -= 2; } return depth; } /** * Sorts the specified range of the array using parallel merge * sort and/or Dual-Pivot Quicksort. * * To balance the faster splitting and parallelism of merge sort * with the faster element partitioning of Quicksort, ranges are * subdivided in tiers such that, if there is enough parallelism, * the four-way parallel merge is started, still ensuring enough * parallelism to process the partitions. * * @param a the array to be sorted * @param parallelism the parallelism level * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(int[] a, int parallelism, int low, int high) { int size = high - low; if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { int depth = getDepth(parallelism, size >> 12); int[] b = depth == 0 ? null : new int[size]; new Sorter(null, a, b, low, size, low, depth).invoke(); } else { sort(null, a, 0, low, high); } } /** * Sorts the specified array using the Dual-Pivot Quicksort and/or * other sorts in special-cases, possibly with parallel partitions. * * @param sorter parallel context * @param a the array to be sorted * @param bits the combination of recursion depth and bit flag, where * the right bit "0" indicates that array is the leftmost part * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(Sorter sorter, int[] a, int bits, int low, int high) { while (true) { int end = high - 1, size = high - low; /* * Run mixed insertion sort on small non-leftmost parts. */ if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); return; } /* * Invoke insertion sort on small leftmost part. */ if (size < MAX_INSERTION_SORT_SIZE) { insertionSort(a, low, high); return; } /* * Check if the whole array or large non-leftmost * parts are nearly sorted and then merge runs. */ if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) && tryMergeRuns(sorter, a, low, size)) { return; } /* * Switch to heap sort if execution * time is becoming quadratic. */ if ((bits += 2) > MAX_RECURSION_DEPTH) { heapSort(a, low, high); return; } /* * Use an inexpensive approximation of the golden ratio * to select five sample elements and determine pivots. */ int step = (size >> 3) * 3 + 3; /* * Five elements around (and including) the central element * will be used for pivot selection as described below. The * unequal choice of spacing these elements was empirically * determined to work well on a wide variety of inputs. */ int e1 = low + step; int e5 = end - step; int e3 = (e1 + e5) >>> 1; int e2 = (e1 + e3) >>> 1; int e4 = (e3 + e5) >>> 1; int a3 = a[e3]; /* * Sort these elements in place by the combination * of 4-element sorting network and insertion sort. * * 5 ------o-----------o------------ * | | * 4 ------|-----o-----o-----o------ * | | | * 2 ------o-----|-----o-----o------ * | | * 1 ------------o-----o------------ */ if (a[e5] < a[e2]) { int t = a[e5]; a[e5] = a[e2]; a[e2] = t; } if (a[e4] < a[e1]) { int t = a[e4]; a[e4] = a[e1]; a[e1] = t; } if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; } if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; } if (a[e4] < a[e2]) { int t = a[e4]; a[e4] = a[e2]; a[e2] = t; } if (a3 < a[e2]) { if (a3 < a[e1]) { a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; } else { a[e3] = a[e2]; a[e2] = a3; } } else if (a3 > a[e4]) { if (a3 > a[e5]) { a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; } else { a[e3] = a[e4]; a[e4] = a3; } } // Pointers int lower = low; // The index of the last element of the left part int upper = end; // The index of the first element of the right part /* * Partitioning with 2 pivots in case of different elements. */ if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { /* * Use the first and fifth of the five sorted elements as * the pivots. These values are inexpensive approximation * of tertiles. Note, that pivot1 < pivot2. */ int pivot1 = a[e1]; int pivot2 = a[e5]; /* * The first and the last elements to be sorted are moved * to the locations formerly occupied by the pivots. When * partitioning is completed, the pivots are swapped back * into their final positions, and excluded from the next * subsequent sorting. */ a[e1] = a[lower]; a[e5] = a[upper]; /* * Skip elements, which are less or greater than the pivots. */ while (a[++lower] < pivot1); while (a[--upper] > pivot2); /* * Backward 3-interval partitioning * * left part central part right part * +------------------------------------------------------------+ * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | * +------------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot1 * pivot1 <= all in (k, upper) <= pivot2 * all in [upper, end) > pivot2 * * Pointer k is the last index of ?-part */ for (int unused = --lower, k = ++upper; --k > lower; ) { int ak = a[k]; if (ak < pivot1) { // Move a[k] to the left side while (lower < k) { if (a[++lower] >= pivot1) { if (a[lower] > pivot2) { a[k] = a[--upper]; a[upper] = a[lower]; } else { a[k] = a[lower]; } a[lower] = ak; break; } } } else if (ak > pivot2) { // Move a[k] to the right side a[k] = a[--upper]; a[upper] = ak; } } /* * Swap the pivots into their final positions. */ a[low] = a[lower]; a[lower] = pivot1; a[end] = a[upper]; a[upper] = pivot2; /* * Sort non-left parts recursively (possibly in parallel), * excluding known pivots. */ if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { sorter.forkSorter(bits | 1, lower + 1, upper); sorter.forkSorter(bits | 1, upper + 1, high); } else { sort(sorter, a, bits | 1, lower + 1, upper); sort(sorter, a, bits | 1, upper + 1, high); } } else { // Use single pivot in case of many equal elements /* * Use the third of the five sorted elements as the pivot. * This value is inexpensive approximation of the median. */ int pivot = a[e3]; /* * The first element to be sorted is moved to the * location formerly occupied by the pivot. After * completion of partitioning the pivot is swapped * back into its final position, and excluded from * the next subsequent sorting. */ a[e3] = a[lower]; /* * Traditional 3-way (Dutch National Flag) partitioning * * left part central part right part * +------------------------------------------------------+ * | < pivot | ? | == pivot | > pivot | * +------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot * all in (k, upper) == pivot * all in [upper, end] > pivot * * Pointer k is the last index of ?-part */ for (int k = ++upper; --k > lower; ) { int ak = a[k]; if (ak != pivot) { a[k] = pivot; if (ak < pivot) { // Move a[k] to the left side while (a[++lower] < pivot); if (a[lower] > pivot) { a[--upper] = a[lower]; } a[lower] = ak; } else { // ak > pivot - Move a[k] to the right side a[--upper] = ak; } } } /* * Swap the pivot into its final position. */ a[low] = a[lower]; a[lower] = pivot; /* * Sort the right part (possibly in parallel), excluding * known pivot. All elements from the central part are * equal and therefore already sorted. */ if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { sorter.forkSorter(bits | 1, upper, high); } else { sort(sorter, a, bits | 1, upper, high); } } high = lower; // Iterate along the left part } } /** * Sorts the specified range of the array using mixed insertion sort. * * Mixed insertion sort is combination of simple insertion sort, * pin insertion sort and pair insertion sort. * * In the context of Dual-Pivot Quicksort, the pivot element * from the left part plays the role of sentinel, because it * is less than any elements from the given part. Therefore, * expensive check of the left range can be skipped on each * iteration unless it is the leftmost call. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param end the index of the last element for simple insertion sort * @param high the index of the last element, exclusive, to be sorted */ private static void mixedInsertionSort(int[] a, int low, int end, int high) { if (end == high) { /* * Invoke simple insertion sort on tiny array. */ for (int i; ++low < end; ) { int ai = a[i = low]; while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } else { /* * Start with pin insertion sort on small part. * * Pin insertion sort is extended simple insertion sort. * The main idea of this sort is to put elements larger * than an element called pin to the end of array (the * proper area for such elements). It avoids expensive * movements of these elements through the whole array. */ int pin = a[end]; for (int i, p = high; ++low < end; ) { int ai = a[i = low]; if (ai < a[i - 1]) { // Small element /* * Insert small element into sorted part. */ a[i] = a[--i]; while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } else if (p > i && ai > pin) { // Large element /* * Find element smaller than pin. */ while (a[--p] > pin); /* * Swap it with large element. */ if (p > i) { ai = a[p]; a[p] = a[i]; } /* * Insert small element into sorted part. */ while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } /* * Continue with pair insertion sort on remain part. */ for (int i; low < high; ++low) { int a1 = a[i = low], a2 = a[++low]; /* * Insert two elements per iteration: at first, insert the * larger element and then insert the smaller element, but * from the position where the larger element was inserted. */ if (a1 > a2) { while (a1 < a[--i]) { a[i + 2] = a[i]; } a[++i + 1] = a1; while (a2 < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = a2; } else if (a1 < a[i - 1]) { while (a2 < a[--i]) { a[i + 2] = a[i]; } a[++i + 1] = a2; while (a1 < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = a1; } } } } /** * Sorts the specified range of the array using insertion sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void insertionSort(int[] a, int low, int high) { for (int i, k = low; ++k < high; ) { int ai = a[i = k]; if (ai < a[i - 1]) { while (--i >= low && ai < a[i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } } /** * Sorts the specified range of the array using heap sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void heapSort(int[] a, int low, int high) { for (int k = (low + high) >>> 1; k > low; ) { pushDown(a, --k, a[k], low, high); } while (--high > low) { int max = a[low]; pushDown(a, low, a[high], low, high); a[high] = max; } } /** * Pushes specified element down during heap sort. * * @param a the given array * @param p the start index * @param value the given element * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void pushDown(int[] a, int p, int value, int low, int high) { for (int k ;; a[p] = a[p = k]) { k = (p << 1) - low + 2; // Index of the right child if (k > high) { break; } if (k == high || a[k] < a[k - 1]) { --k; } if (a[k] <= value) { break; } } a[p] = value; } /** * Tries to sort the specified range of the array. * * @param sorter parallel context * @param a the array to be sorted * @param low the index of the first element to be sorted * @param size the array size * @return true if finally sorted, false otherwise */ private static boolean tryMergeRuns(Sorter sorter, int[] a, int low, int size) { /* * The run array is constructed only if initial runs are * long enough to continue, run[i] then holds start index * of the i-th sequence of elements in non-descending order. */ int[] run = null; int high = low + size; int count = 1, last = low; /* * Identify all possible runs. */ for (int k = low + 1; k < high; ) { /* * Find the end index of the current run. */ if (a[k - 1] < a[k]) { // Identify ascending sequence while (++k < high && a[k - 1] <= a[k]); } else if (a[k - 1] > a[k]) { // Identify descending sequence while (++k < high && a[k - 1] >= a[k]); // Reverse into ascending order for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { int ai = a[i]; a[i] = a[j]; a[j] = ai; } } else { // Identify constant sequence for (int ak = a[k]; ++k < high && ak == a[k]; ); if (k < high) { continue; } } /* * Check special cases. */ if (run == null) { if (k == high) { /* * The array is monotonous sequence, * and therefore already sorted. */ return true; } if (k - low < MIN_FIRST_RUN_SIZE) { /* * The first run is too small * to proceed with scanning. */ return false; } run = new int[((size >> 10) | 0x7F) & 0x3FF]; run[0] = low; } else if (a[last - 1] > a[last]) { if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { /* * The first runs are not long * enough to continue scanning. */ return false; } if (++count == MAX_RUN_CAPACITY) { /* * Array is not highly structured. */ return false; } if (count == run.length) { /* * Increase capacity of index array. */ run = Arrays.copyOf(run, count << 1); } } run[count] = (last = k); } /* * Merge runs of highly structured array. */ if (count > 1) { int[] b; int offset = low; if (sorter == null || (b = (int[]) sorter.b) == null) { b = new int[size]; } else { offset = sorter.offset; } mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); } return true; } /** * Merges the specified runs. * * @param a the source array * @param b the temporary buffer used in merging * @param offset the start index in the source, inclusive * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) * @param parallel indicates whether merging is performed in parallel * @param run the start indexes of the runs, inclusive * @param lo the start index of the first run, inclusive * @param hi the start index of the last run, inclusive * @return the destination where runs are merged */ private static int[] mergeRuns(int[] a, int[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi) { if (hi - lo == 1) { if (aim >= 0) { return a; } for (int i = run[hi], j = i - offset, low = run[lo]; i > low; b[--j] = a[--i] ); return b; } /* * Split into approximately equal parts. */ int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; while (run[++mi + 1] <= rmi); /* * Merge the left and right parts. */ int[] a1, a2; if (parallel && hi - lo > MIN_RUN_COUNT) { RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); a2 = (int[]) merger.getDestination(); } else { a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); } int[] dst = a1 == a ? b : a; int k = a1 == a ? run[lo] - offset : run[lo]; int lo1 = a1 == b ? run[lo] - offset : run[lo]; int hi1 = a1 == b ? run[mi] - offset : run[mi]; int lo2 = a2 == b ? run[mi] - offset : run[mi]; int hi2 = a2 == b ? run[hi] - offset : run[hi]; if (parallel) { new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); } else { mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); } return dst; } /** * Merges the sorted parts. * * @param merger parallel context * @param dst the destination where parts are merged * @param k the start index of the destination, inclusive * @param a1 the first part * @param lo1 the start index of the first part, inclusive * @param hi1 the end index of the first part, exclusive * @param a2 the second part * @param lo2 the start index of the second part, inclusive * @param hi2 the end index of the second part, exclusive */ private static void mergeParts(Merger merger, int[] dst, int k, int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2) { if (merger != null && a1 == a2) { while (true) { /* * The first part must be larger. */ if (hi1 - lo1 < hi2 - lo2) { int lo = lo1; lo1 = lo2; lo2 = lo; int hi = hi1; hi1 = hi2; hi2 = hi; } /* * Small parts will be merged sequentially. */ if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { break; } /* * Find the median of the larger part. */ int mi1 = (lo1 + hi1) >>> 1; int key = a1[mi1]; int mi2 = hi2; /* * Partition the smaller part. */ for (int loo = lo2; loo < mi2; ) { int t = (loo + mi2) >>> 1; if (key > a2[t]) { loo = t + 1; } else { mi2 = t; } } int d = mi2 - lo2 + mi1 - lo1; /* * Merge the right sub-parts in parallel. */ merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); /* * Process the sub-left parts. */ hi1 = mi1; hi2 = mi2; } } /* * Merge small parts sequentially. */ while (lo1 < hi1 && lo2 < hi2) { dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; } if (dst != a1 || k < lo1) { while (lo1 < hi1) { dst[k++] = a1[lo1++]; } } if (dst != a2 || k < lo2) { while (lo2 < hi2) { dst[k++] = a2[lo2++]; } } } // [long] /** * Sorts the specified range of the array using parallel merge * sort and/or Dual-Pivot Quicksort. * * To balance the faster splitting and parallelism of merge sort * with the faster element partitioning of Quicksort, ranges are * subdivided in tiers such that, if there is enough parallelism, * the four-way parallel merge is started, still ensuring enough * parallelism to process the partitions. * * @param a the array to be sorted * @param parallelism the parallelism level * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(long[] a, int parallelism, int low, int high) { int size = high - low; if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { int depth = getDepth(parallelism, size >> 12); long[] b = depth == 0 ? null : new long[size]; new Sorter(null, a, b, low, size, low, depth).invoke(); } else { sort(null, a, 0, low, high); } } /** * Sorts the specified array using the Dual-Pivot Quicksort and/or * other sorts in special-cases, possibly with parallel partitions. * * @param sorter parallel context * @param a the array to be sorted * @param bits the combination of recursion depth and bit flag, where * the right bit "0" indicates that array is the leftmost part * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(Sorter sorter, long[] a, int bits, int low, int high) { while (true) { int end = high - 1, size = high - low; /* * Run mixed insertion sort on small non-leftmost parts. */ if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); return; } /* * Invoke insertion sort on small leftmost part. */ if (size < MAX_INSERTION_SORT_SIZE) { insertionSort(a, low, high); return; } /* * Check if the whole array or large non-leftmost * parts are nearly sorted and then merge runs. */ if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) && tryMergeRuns(sorter, a, low, size)) { return; } /* * Switch to heap sort if execution * time is becoming quadratic. */ if ((bits += 2) > MAX_RECURSION_DEPTH) { heapSort(a, low, high); return; } /* * Use an inexpensive approximation of the golden ratio * to select five sample elements and determine pivots. */ int step = (size >> 3) * 3 + 3; /* * Five elements around (and including) the central element * will be used for pivot selection as described below. The * unequal choice of spacing these elements was empirically * determined to work well on a wide variety of inputs. */ int e1 = low + step; int e5 = end - step; int e3 = (e1 + e5) >>> 1; int e2 = (e1 + e3) >>> 1; int e4 = (e3 + e5) >>> 1; long a3 = a[e3]; /* * Sort these elements in place by the combination * of 4-element sorting network and insertion sort. * * 5 ------o-----------o------------ * | | * 4 ------|-----o-----o-----o------ * | | | * 2 ------o-----|-----o-----o------ * | | * 1 ------------o-----o------------ */ if (a[e5] < a[e2]) { long t = a[e5]; a[e5] = a[e2]; a[e2] = t; } if (a[e4] < a[e1]) { long t = a[e4]; a[e4] = a[e1]; a[e1] = t; } if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; } if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; } if (a[e4] < a[e2]) { long t = a[e4]; a[e4] = a[e2]; a[e2] = t; } if (a3 < a[e2]) { if (a3 < a[e1]) { a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; } else { a[e3] = a[e2]; a[e2] = a3; } } else if (a3 > a[e4]) { if (a3 > a[e5]) { a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; } else { a[e3] = a[e4]; a[e4] = a3; } } // Pointers int lower = low; // The index of the last element of the left part int upper = end; // The index of the first element of the right part /* * Partitioning with 2 pivots in case of different elements. */ if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { /* * Use the first and fifth of the five sorted elements as * the pivots. These values are inexpensive approximation * of tertiles. Note, that pivot1 < pivot2. */ long pivot1 = a[e1]; long pivot2 = a[e5]; /* * The first and the last elements to be sorted are moved * to the locations formerly occupied by the pivots. When * partitioning is completed, the pivots are swapped back * into their final positions, and excluded from the next * subsequent sorting. */ a[e1] = a[lower]; a[e5] = a[upper]; /* * Skip elements, which are less or greater than the pivots. */ while (a[++lower] < pivot1); while (a[--upper] > pivot2); /* * Backward 3-interval partitioning * * left part central part right part * +------------------------------------------------------------+ * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | * +------------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot1 * pivot1 <= all in (k, upper) <= pivot2 * all in [upper, end) > pivot2 * * Pointer k is the last index of ?-part */ for (int unused = --lower, k = ++upper; --k > lower; ) { long ak = a[k]; if (ak < pivot1) { // Move a[k] to the left side while (lower < k) { if (a[++lower] >= pivot1) { if (a[lower] > pivot2) { a[k] = a[--upper]; a[upper] = a[lower]; } else { a[k] = a[lower]; } a[lower] = ak; break; } } } else if (ak > pivot2) { // Move a[k] to the right side a[k] = a[--upper]; a[upper] = ak; } } /* * Swap the pivots into their final positions. */ a[low] = a[lower]; a[lower] = pivot1; a[end] = a[upper]; a[upper] = pivot2; /* * Sort non-left parts recursively (possibly in parallel), * excluding known pivots. */ if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { sorter.forkSorter(bits | 1, lower + 1, upper); sorter.forkSorter(bits | 1, upper + 1, high); } else { sort(sorter, a, bits | 1, lower + 1, upper); sort(sorter, a, bits | 1, upper + 1, high); } } else { // Use single pivot in case of many equal elements /* * Use the third of the five sorted elements as the pivot. * This value is inexpensive approximation of the median. */ long pivot = a[e3]; /* * The first element to be sorted is moved to the * location formerly occupied by the pivot. After * completion of partitioning the pivot is swapped * back into its final position, and excluded from * the next subsequent sorting. */ a[e3] = a[lower]; /* * Traditional 3-way (Dutch National Flag) partitioning * * left part central part right part * +------------------------------------------------------+ * | < pivot | ? | == pivot | > pivot | * +------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot * all in (k, upper) == pivot * all in [upper, end] > pivot * * Pointer k is the last index of ?-part */ for (int k = ++upper; --k > lower; ) { long ak = a[k]; if (ak != pivot) { a[k] = pivot; if (ak < pivot) { // Move a[k] to the left side while (a[++lower] < pivot); if (a[lower] > pivot) { a[--upper] = a[lower]; } a[lower] = ak; } else { // ak > pivot - Move a[k] to the right side a[--upper] = ak; } } } /* * Swap the pivot into its final position. */ a[low] = a[lower]; a[lower] = pivot; /* * Sort the right part (possibly in parallel), excluding * known pivot. All elements from the central part are * equal and therefore already sorted. */ if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { sorter.forkSorter(bits | 1, upper, high); } else { sort(sorter, a, bits | 1, upper, high); } } high = lower; // Iterate along the left part } } /** * Sorts the specified range of the array using mixed insertion sort. * * Mixed insertion sort is combination of simple insertion sort, * pin insertion sort and pair insertion sort. * * In the context of Dual-Pivot Quicksort, the pivot element * from the left part plays the role of sentinel, because it * is less than any elements from the given part. Therefore, * expensive check of the left range can be skipped on each * iteration unless it is the leftmost call. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param end the index of the last element for simple insertion sort * @param high the index of the last element, exclusive, to be sorted */ private static void mixedInsertionSort(long[] a, int low, int end, int high) { if (end == high) { /* * Invoke simple insertion sort on tiny array. */ for (int i; ++low < end; ) { long ai = a[i = low]; while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } else { /* * Start with pin insertion sort on small part. * * Pin insertion sort is extended simple insertion sort. * The main idea of this sort is to put elements larger * than an element called pin to the end of array (the * proper area for such elements). It avoids expensive * movements of these elements through the whole array. */ long pin = a[end]; for (int i, p = high; ++low < end; ) { long ai = a[i = low]; if (ai < a[i - 1]) { // Small element /* * Insert small element into sorted part. */ a[i] = a[--i]; while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } else if (p > i && ai > pin) { // Large element /* * Find element smaller than pin. */ while (a[--p] > pin); /* * Swap it with large element. */ if (p > i) { ai = a[p]; a[p] = a[i]; } /* * Insert small element into sorted part. */ while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } /* * Continue with pair insertion sort on remain part. */ for (int i; low < high; ++low) { long a1 = a[i = low], a2 = a[++low]; /* * Insert two elements per iteration: at first, insert the * larger element and then insert the smaller element, but * from the position where the larger element was inserted. */ if (a1 > a2) { while (a1 < a[--i]) { a[i + 2] = a[i]; } a[++i + 1] = a1; while (a2 < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = a2; } else if (a1 < a[i - 1]) { while (a2 < a[--i]) { a[i + 2] = a[i]; } a[++i + 1] = a2; while (a1 < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = a1; } } } } /** * Sorts the specified range of the array using insertion sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void insertionSort(long[] a, int low, int high) { for (int i, k = low; ++k < high; ) { long ai = a[i = k]; if (ai < a[i - 1]) { while (--i >= low && ai < a[i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } } /** * Sorts the specified range of the array using heap sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void heapSort(long[] a, int low, int high) { for (int k = (low + high) >>> 1; k > low; ) { pushDown(a, --k, a[k], low, high); } while (--high > low) { long max = a[low]; pushDown(a, low, a[high], low, high); a[high] = max; } } /** * Pushes specified element down during heap sort. * * @param a the given array * @param p the start index * @param value the given element * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void pushDown(long[] a, int p, long value, int low, int high) { for (int k ;; a[p] = a[p = k]) { k = (p << 1) - low + 2; // Index of the right child if (k > high) { break; } if (k == high || a[k] < a[k - 1]) { --k; } if (a[k] <= value) { break; } } a[p] = value; } /** * Tries to sort the specified range of the array. * * @param sorter parallel context * @param a the array to be sorted * @param low the index of the first element to be sorted * @param size the array size * @return true if finally sorted, false otherwise */ private static boolean tryMergeRuns(Sorter sorter, long[] a, int low, int size) { /* * The run array is constructed only if initial runs are * long enough to continue, run[i] then holds start index * of the i-th sequence of elements in non-descending order. */ int[] run = null; int high = low + size; int count = 1, last = low; /* * Identify all possible runs. */ for (int k = low + 1; k < high; ) { /* * Find the end index of the current run. */ if (a[k - 1] < a[k]) { // Identify ascending sequence while (++k < high && a[k - 1] <= a[k]); } else if (a[k - 1] > a[k]) { // Identify descending sequence while (++k < high && a[k - 1] >= a[k]); // Reverse into ascending order for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { long ai = a[i]; a[i] = a[j]; a[j] = ai; } } else { // Identify constant sequence for (long ak = a[k]; ++k < high && ak == a[k]; ); if (k < high) { continue; } } /* * Check special cases. */ if (run == null) { if (k == high) { /* * The array is monotonous sequence, * and therefore already sorted. */ return true; } if (k - low < MIN_FIRST_RUN_SIZE) { /* * The first run is too small * to proceed with scanning. */ return false; } run = new int[((size >> 10) | 0x7F) & 0x3FF]; run[0] = low; } else if (a[last - 1] > a[last]) { if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { /* * The first runs are not long * enough to continue scanning. */ return false; } if (++count == MAX_RUN_CAPACITY) { /* * Array is not highly structured. */ return false; } if (count == run.length) { /* * Increase capacity of index array. */ run = Arrays.copyOf(run, count << 1); } } run[count] = (last = k); } /* * Merge runs of highly structured array. */ if (count > 1) { long[] b; int offset = low; if (sorter == null || (b = (long[]) sorter.b) == null) { b = new long[size]; } else { offset = sorter.offset; } mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); } return true; } /** * Merges the specified runs. * * @param a the source array * @param b the temporary buffer used in merging * @param offset the start index in the source, inclusive * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) * @param parallel indicates whether merging is performed in parallel * @param run the start indexes of the runs, inclusive * @param lo the start index of the first run, inclusive * @param hi the start index of the last run, inclusive * @return the destination where runs are merged */ private static long[] mergeRuns(long[] a, long[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi) { if (hi - lo == 1) { if (aim >= 0) { return a; } for (int i = run[hi], j = i - offset, low = run[lo]; i > low; b[--j] = a[--i] ); return b; } /* * Split into approximately equal parts. */ int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; while (run[++mi + 1] <= rmi); /* * Merge the left and right parts. */ long[] a1, a2; if (parallel && hi - lo > MIN_RUN_COUNT) { RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); a2 = (long[]) merger.getDestination(); } else { a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); } long[] dst = a1 == a ? b : a; int k = a1 == a ? run[lo] - offset : run[lo]; int lo1 = a1 == b ? run[lo] - offset : run[lo]; int hi1 = a1 == b ? run[mi] - offset : run[mi]; int lo2 = a2 == b ? run[mi] - offset : run[mi]; int hi2 = a2 == b ? run[hi] - offset : run[hi]; if (parallel) { new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); } else { mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); } return dst; } /** * Merges the sorted parts. * * @param merger parallel context * @param dst the destination where parts are merged * @param k the start index of the destination, inclusive * @param a1 the first part * @param lo1 the start index of the first part, inclusive * @param hi1 the end index of the first part, exclusive * @param a2 the second part * @param lo2 the start index of the second part, inclusive * @param hi2 the end index of the second part, exclusive */ private static void mergeParts(Merger merger, long[] dst, int k, long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2) { if (merger != null && a1 == a2) { while (true) { /* * The first part must be larger. */ if (hi1 - lo1 < hi2 - lo2) { int lo = lo1; lo1 = lo2; lo2 = lo; int hi = hi1; hi1 = hi2; hi2 = hi; } /* * Small parts will be merged sequentially. */ if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { break; } /* * Find the median of the larger part. */ int mi1 = (lo1 + hi1) >>> 1; long key = a1[mi1]; int mi2 = hi2; /* * Partition the smaller part. */ for (int loo = lo2; loo < mi2; ) { int t = (loo + mi2) >>> 1; if (key > a2[t]) { loo = t + 1; } else { mi2 = t; } } int d = mi2 - lo2 + mi1 - lo1; /* * Merge the right sub-parts in parallel. */ merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); /* * Process the sub-left parts. */ hi1 = mi1; hi2 = mi2; } } /* * Merge small parts sequentially. */ while (lo1 < hi1 && lo2 < hi2) { dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; } if (dst != a1 || k < lo1) { while (lo1 < hi1) { dst[k++] = a1[lo1++]; } } if (dst != a2 || k < lo2) { while (lo2 < hi2) { dst[k++] = a2[lo2++]; } } } // [byte] /** * Sorts the specified range of the array using * counting sort or insertion sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(byte[] a, int low, int high) { if (high - low > MIN_BYTE_COUNTING_SORT_SIZE) { countingSort(a, low, high); } else { insertionSort(a, low, high); } } /** * Sorts the specified range of the array using insertion sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void insertionSort(byte[] a, int low, int high) { for (int i, k = low; ++k < high; ) { byte ai = a[i = k]; if (ai < a[i - 1]) { while (--i >= low && ai < a[i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } } /** * The number of distinct byte values. */ private static final int NUM_BYTE_VALUES = 1 << 8; /** * Max index of byte counter. */ private static final int MAX_BYTE_INDEX = Byte.MAX_VALUE + NUM_BYTE_VALUES + 1; /** * Sorts the specified range of the array using counting sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void countingSort(byte[] a, int low, int high) { int[] count = new int[NUM_BYTE_VALUES]; /* * Compute a histogram with the number of each values. */ for (int i = high; i > low; ++count[a[--i] & 0xFF]); /* * Place values on their final positions. */ if (high - low > NUM_BYTE_VALUES) { for (int i = MAX_BYTE_INDEX; --i > Byte.MAX_VALUE; ) { int value = i & 0xFF; for (low = high - count[value]; high > low; a[--high] = (byte) value ); } } else { for (int i = MAX_BYTE_INDEX; high > low; ) { while (count[--i & 0xFF] == 0); int value = i & 0xFF; int c = count[value]; do { a[--high] = (byte) value; } while (--c > 0); } } } // [char] /** * Sorts the specified range of the array using * counting sort or Dual-Pivot Quicksort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(char[] a, int low, int high) { if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { countingSort(a, low, high); } else { sort(a, 0, low, high); } } /** * Sorts the specified array using the Dual-Pivot Quicksort and/or * other sorts in special-cases, possibly with parallel partitions. * * @param a the array to be sorted * @param bits the combination of recursion depth and bit flag, where * the right bit "0" indicates that array is the leftmost part * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(char[] a, int bits, int low, int high) { while (true) { int end = high - 1, size = high - low; /* * Invoke insertion sort on small leftmost part. */ if (size < MAX_INSERTION_SORT_SIZE) { insertionSort(a, low, high); return; } /* * Switch to counting sort if execution * time is becoming quadratic. */ if ((bits += 2) > MAX_RECURSION_DEPTH) { countingSort(a, low, high); return; } /* * Use an inexpensive approximation of the golden ratio * to select five sample elements and determine pivots. */ int step = (size >> 3) * 3 + 3; /* * Five elements around (and including) the central element * will be used for pivot selection as described below. The * unequal choice of spacing these elements was empirically * determined to work well on a wide variety of inputs. */ int e1 = low + step; int e5 = end - step; int e3 = (e1 + e5) >>> 1; int e2 = (e1 + e3) >>> 1; int e4 = (e3 + e5) >>> 1; char a3 = a[e3]; /* * Sort these elements in place by the combination * of 4-element sorting network and insertion sort. * * 5 ------o-----------o------------ * | | * 4 ------|-----o-----o-----o------ * | | | * 2 ------o-----|-----o-----o------ * | | * 1 ------------o-----o------------ */ if (a[e5] < a[e2]) { char t = a[e5]; a[e5] = a[e2]; a[e2] = t; } if (a[e4] < a[e1]) { char t = a[e4]; a[e4] = a[e1]; a[e1] = t; } if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; } if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; } if (a[e4] < a[e2]) { char t = a[e4]; a[e4] = a[e2]; a[e2] = t; } if (a3 < a[e2]) { if (a3 < a[e1]) { a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; } else { a[e3] = a[e2]; a[e2] = a3; } } else if (a3 > a[e4]) { if (a3 > a[e5]) { a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; } else { a[e3] = a[e4]; a[e4] = a3; } } // Pointers int lower = low; // The index of the last element of the left part int upper = end; // The index of the first element of the right part /* * Partitioning with 2 pivots in case of different elements. */ if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { /* * Use the first and fifth of the five sorted elements as * the pivots. These values are inexpensive approximation * of tertiles. Note, that pivot1 < pivot2. */ char pivot1 = a[e1]; char pivot2 = a[e5]; /* * The first and the last elements to be sorted are moved * to the locations formerly occupied by the pivots. When * partitioning is completed, the pivots are swapped back * into their final positions, and excluded from the next * subsequent sorting. */ a[e1] = a[lower]; a[e5] = a[upper]; /* * Skip elements, which are less or greater than the pivots. */ while (a[++lower] < pivot1); while (a[--upper] > pivot2); /* * Backward 3-interval partitioning * * left part central part right part * +------------------------------------------------------------+ * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | * +------------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot1 * pivot1 <= all in (k, upper) <= pivot2 * all in [upper, end) > pivot2 * * Pointer k is the last index of ?-part */ for (int unused = --lower, k = ++upper; --k > lower; ) { char ak = a[k]; if (ak < pivot1) { // Move a[k] to the left side while (lower < k) { if (a[++lower] >= pivot1) { if (a[lower] > pivot2) { a[k] = a[--upper]; a[upper] = a[lower]; } else { a[k] = a[lower]; } a[lower] = ak; break; } } } else if (ak > pivot2) { // Move a[k] to the right side a[k] = a[--upper]; a[upper] = ak; } } /* * Swap the pivots into their final positions. */ a[low] = a[lower]; a[lower] = pivot1; a[end] = a[upper]; a[upper] = pivot2; /* * Sort non-left parts recursively, * excluding known pivots. */ sort(a, bits | 1, lower + 1, upper); sort(a, bits | 1, upper + 1, high); } else { // Use single pivot in case of many equal elements /* * Use the third of the five sorted elements as the pivot. * This value is inexpensive approximation of the median. */ char pivot = a[e3]; /* * The first element to be sorted is moved to the * location formerly occupied by the pivot. After * completion of partitioning the pivot is swapped * back into its final position, and excluded from * the next subsequent sorting. */ a[e3] = a[lower]; /* * Traditional 3-way (Dutch National Flag) partitioning * * left part central part right part * +------------------------------------------------------+ * | < pivot | ? | == pivot | > pivot | * +------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot * all in (k, upper) == pivot * all in [upper, end] > pivot * * Pointer k is the last index of ?-part */ for (int k = ++upper; --k > lower; ) { char ak = a[k]; if (ak != pivot) { a[k] = pivot; if (ak < pivot) { // Move a[k] to the left side while (a[++lower] < pivot); if (a[lower] > pivot) { a[--upper] = a[lower]; } a[lower] = ak; } else { // ak > pivot - Move a[k] to the right side a[--upper] = ak; } } } /* * Swap the pivot into its final position. */ a[low] = a[lower]; a[lower] = pivot; /* * Sort the right part, excluding known pivot. * All elements from the central part are * equal and therefore already sorted. */ sort(a, bits | 1, upper, high); } high = lower; // Iterate along the left part } } /** * Sorts the specified range of the array using insertion sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void insertionSort(char[] a, int low, int high) { for (int i, k = low; ++k < high; ) { char ai = a[i = k]; if (ai < a[i - 1]) { while (--i >= low && ai < a[i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } } /** * The number of distinct char values. */ private static final int NUM_CHAR_VALUES = 1 << 16; /** * Sorts the specified range of the array using counting sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void countingSort(char[] a, int low, int high) { int[] count = new int[NUM_CHAR_VALUES]; /* * Compute a histogram with the number of each values. */ for (int i = high; i > low; ++count[a[--i]]); /* * Place values on their final positions. */ if (high - low > NUM_CHAR_VALUES) { for (int i = NUM_CHAR_VALUES; i > 0; ) { for (low = high - count[--i]; high > low; a[--high] = (char) i ); } } else { for (int i = NUM_CHAR_VALUES; high > low; ) { while (count[--i] == 0); int c = count[i]; do { a[--high] = (char) i; } while (--c > 0); } } } // [short] /** * Sorts the specified range of the array using * counting sort or Dual-Pivot Quicksort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(short[] a, int low, int high) { if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { countingSort(a, low, high); } else { sort(a, 0, low, high); } } /** * Sorts the specified array using the Dual-Pivot Quicksort and/or * other sorts in special-cases, possibly with parallel partitions. * * @param a the array to be sorted * @param bits the combination of recursion depth and bit flag, where * the right bit "0" indicates that array is the leftmost part * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(short[] a, int bits, int low, int high) { while (true) { int end = high - 1, size = high - low; /* * Invoke insertion sort on small leftmost part. */ if (size < MAX_INSERTION_SORT_SIZE) { insertionSort(a, low, high); return; } /* * Switch to counting sort if execution * time is becoming quadratic. */ if ((bits += 2) > MAX_RECURSION_DEPTH) { countingSort(a, low, high); return; } /* * Use an inexpensive approximation of the golden ratio * to select five sample elements and determine pivots. */ int step = (size >> 3) * 3 + 3; /* * Five elements around (and including) the central element * will be used for pivot selection as described below. The * unequal choice of spacing these elements was empirically * determined to work well on a wide variety of inputs. */ int e1 = low + step; int e5 = end - step; int e3 = (e1 + e5) >>> 1; int e2 = (e1 + e3) >>> 1; int e4 = (e3 + e5) >>> 1; short a3 = a[e3]; /* * Sort these elements in place by the combination * of 4-element sorting network and insertion sort. * * 5 ------o-----------o------------ * | | * 4 ------|-----o-----o-----o------ * | | | * 2 ------o-----|-----o-----o------ * | | * 1 ------------o-----o------------ */ if (a[e5] < a[e2]) { short t = a[e5]; a[e5] = a[e2]; a[e2] = t; } if (a[e4] < a[e1]) { short t = a[e4]; a[e4] = a[e1]; a[e1] = t; } if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; } if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; } if (a[e4] < a[e2]) { short t = a[e4]; a[e4] = a[e2]; a[e2] = t; } if (a3 < a[e2]) { if (a3 < a[e1]) { a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; } else { a[e3] = a[e2]; a[e2] = a3; } } else if (a3 > a[e4]) { if (a3 > a[e5]) { a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; } else { a[e3] = a[e4]; a[e4] = a3; } } // Pointers int lower = low; // The index of the last element of the left part int upper = end; // The index of the first element of the right part /* * Partitioning with 2 pivots in case of different elements. */ if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { /* * Use the first and fifth of the five sorted elements as * the pivots. These values are inexpensive approximation * of tertiles. Note, that pivot1 < pivot2. */ short pivot1 = a[e1]; short pivot2 = a[e5]; /* * The first and the last elements to be sorted are moved * to the locations formerly occupied by the pivots. When * partitioning is completed, the pivots are swapped back * into their final positions, and excluded from the next * subsequent sorting. */ a[e1] = a[lower]; a[e5] = a[upper]; /* * Skip elements, which are less or greater than the pivots. */ while (a[++lower] < pivot1); while (a[--upper] > pivot2); /* * Backward 3-interval partitioning * * left part central part right part * +------------------------------------------------------------+ * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | * +------------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot1 * pivot1 <= all in (k, upper) <= pivot2 * all in [upper, end) > pivot2 * * Pointer k is the last index of ?-part */ for (int unused = --lower, k = ++upper; --k > lower; ) { short ak = a[k]; if (ak < pivot1) { // Move a[k] to the left side while (lower < k) { if (a[++lower] >= pivot1) { if (a[lower] > pivot2) { a[k] = a[--upper]; a[upper] = a[lower]; } else { a[k] = a[lower]; } a[lower] = ak; break; } } } else if (ak > pivot2) { // Move a[k] to the right side a[k] = a[--upper]; a[upper] = ak; } } /* * Swap the pivots into their final positions. */ a[low] = a[lower]; a[lower] = pivot1; a[end] = a[upper]; a[upper] = pivot2; /* * Sort non-left parts recursively, * excluding known pivots. */ sort(a, bits | 1, lower + 1, upper); sort(a, bits | 1, upper + 1, high); } else { // Use single pivot in case of many equal elements /* * Use the third of the five sorted elements as the pivot. * This value is inexpensive approximation of the median. */ short pivot = a[e3]; /* * The first element to be sorted is moved to the * location formerly occupied by the pivot. After * completion of partitioning the pivot is swapped * back into its final position, and excluded from * the next subsequent sorting. */ a[e3] = a[lower]; /* * Traditional 3-way (Dutch National Flag) partitioning * * left part central part right part * +------------------------------------------------------+ * | < pivot | ? | == pivot | > pivot | * +------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot * all in (k, upper) == pivot * all in [upper, end] > pivot * * Pointer k is the last index of ?-part */ for (int k = ++upper; --k > lower; ) { short ak = a[k]; if (ak != pivot) { a[k] = pivot; if (ak < pivot) { // Move a[k] to the left side while (a[++lower] < pivot); if (a[lower] > pivot) { a[--upper] = a[lower]; } a[lower] = ak; } else { // ak > pivot - Move a[k] to the right side a[--upper] = ak; } } } /* * Swap the pivot into its final position. */ a[low] = a[lower]; a[lower] = pivot; /* * Sort the right part, excluding known pivot. * All elements from the central part are * equal and therefore already sorted. */ sort(a, bits | 1, upper, high); } high = lower; // Iterate along the left part } } /** * Sorts the specified range of the array using insertion sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void insertionSort(short[] a, int low, int high) { for (int i, k = low; ++k < high; ) { short ai = a[i = k]; if (ai < a[i - 1]) { while (--i >= low && ai < a[i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } } /** * The number of distinct short values. */ private static final int NUM_SHORT_VALUES = 1 << 16; /** * Max index of short counter. */ private static final int MAX_SHORT_INDEX = Short.MAX_VALUE + NUM_SHORT_VALUES + 1; /** * Sorts the specified range of the array using counting sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void countingSort(short[] a, int low, int high) { int[] count = new int[NUM_SHORT_VALUES]; /* * Compute a histogram with the number of each values. */ for (int i = high; i > low; ++count[a[--i] & 0xFFFF]); /* * Place values on their final positions. */ if (high - low > NUM_SHORT_VALUES) { for (int i = MAX_SHORT_INDEX; --i > Short.MAX_VALUE; ) { int value = i & 0xFFFF; for (low = high - count[value]; high > low; a[--high] = (short) value ); } } else { for (int i = MAX_SHORT_INDEX; high > low; ) { while (count[--i & 0xFFFF] == 0); int value = i & 0xFFFF; int c = count[value]; do { a[--high] = (short) value; } while (--c > 0); } } } // [float] /** * Sorts the specified range of the array using parallel merge * sort and/or Dual-Pivot Quicksort. * * To balance the faster splitting and parallelism of merge sort * with the faster element partitioning of Quicksort, ranges are * subdivided in tiers such that, if there is enough parallelism, * the four-way parallel merge is started, still ensuring enough * parallelism to process the partitions. * * @param a the array to be sorted * @param parallelism the parallelism level * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(float[] a, int parallelism, int low, int high) { /* * Phase 1. Count the number of negative zero -0.0f, * turn them into positive zero, and move all NaNs * to the end of the array. */ int numNegativeZero = 0; for (int k = high; k > low; ) { float ak = a[--k]; if (ak == 0.0f && Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f numNegativeZero += 1; a[k] = 0.0f; } else if (ak != ak) { // ak is NaN a[k] = a[--high]; a[high] = ak; } } /* * Phase 2. Sort everything except NaNs, * which are already in place. */ int size = high - low; if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { int depth = getDepth(parallelism, size >> 12); float[] b = depth == 0 ? null : new float[size]; new Sorter(null, a, b, low, size, low, depth).invoke(); } else { sort(null, a, 0, low, high); } /* * Phase 3. Turn positive zero 0.0f * back into negative zero -0.0f. */ if (++numNegativeZero == 1) { return; } /* * Find the position one less than * the index of the first zero. */ while (low <= high) { int middle = (low + high) >>> 1; if (a[middle] < 0) { low = middle + 1; } else { high = middle - 1; } } /* * Replace the required number of 0.0f by -0.0f. */ while (--numNegativeZero > 0) { a[++high] = -0.0f; } } /** * Sorts the specified array using the Dual-Pivot Quicksort and/or * other sorts in special-cases, possibly with parallel partitions. * * @param sorter parallel context * @param a the array to be sorted * @param bits the combination of recursion depth and bit flag, where * the right bit "0" indicates that array is the leftmost part * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(Sorter sorter, float[] a, int bits, int low, int high) { while (true) { int end = high - 1, size = high - low; /* * Run mixed insertion sort on small non-leftmost parts. */ if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); return; } /* * Invoke insertion sort on small leftmost part. */ if (size < MAX_INSERTION_SORT_SIZE) { insertionSort(a, low, high); return; } /* * Check if the whole array or large non-leftmost * parts are nearly sorted and then merge runs. */ if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) && tryMergeRuns(sorter, a, low, size)) { return; } /* * Switch to heap sort if execution * time is becoming quadratic. */ if ((bits += 2) > MAX_RECURSION_DEPTH) { heapSort(a, low, high); return; } /* * Use an inexpensive approximation of the golden ratio * to select five sample elements and determine pivots. */ int step = (size >> 3) * 3 + 3; /* * Five elements around (and including) the central element * will be used for pivot selection as described below. The * unequal choice of spacing these elements was empirically * determined to work well on a wide variety of inputs. */ int e1 = low + step; int e5 = end - step; int e3 = (e1 + e5) >>> 1; int e2 = (e1 + e3) >>> 1; int e4 = (e3 + e5) >>> 1; float a3 = a[e3]; /* * Sort these elements in place by the combination * of 4-element sorting network and insertion sort. * * 5 ------o-----------o------------ * | | * 4 ------|-----o-----o-----o------ * | | | * 2 ------o-----|-----o-----o------ * | | * 1 ------------o-----o------------ */ if (a[e5] < a[e2]) { float t = a[e5]; a[e5] = a[e2]; a[e2] = t; } if (a[e4] < a[e1]) { float t = a[e4]; a[e4] = a[e1]; a[e1] = t; } if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; } if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; } if (a[e4] < a[e2]) { float t = a[e4]; a[e4] = a[e2]; a[e2] = t; } if (a3 < a[e2]) { if (a3 < a[e1]) { a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; } else { a[e3] = a[e2]; a[e2] = a3; } } else if (a3 > a[e4]) { if (a3 > a[e5]) { a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; } else { a[e3] = a[e4]; a[e4] = a3; } } // Pointers int lower = low; // The index of the last element of the left part int upper = end; // The index of the first element of the right part /* * Partitioning with 2 pivots in case of different elements. */ if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { /* * Use the first and fifth of the five sorted elements as * the pivots. These values are inexpensive approximation * of tertiles. Note, that pivot1 < pivot2. */ float pivot1 = a[e1]; float pivot2 = a[e5]; /* * The first and the last elements to be sorted are moved * to the locations formerly occupied by the pivots. When * partitioning is completed, the pivots are swapped back * into their final positions, and excluded from the next * subsequent sorting. */ a[e1] = a[lower]; a[e5] = a[upper]; /* * Skip elements, which are less or greater than the pivots. */ while (a[++lower] < pivot1); while (a[--upper] > pivot2); /* * Backward 3-interval partitioning * * left part central part right part * +------------------------------------------------------------+ * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | * +------------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot1 * pivot1 <= all in (k, upper) <= pivot2 * all in [upper, end) > pivot2 * * Pointer k is the last index of ?-part */ for (int unused = --lower, k = ++upper; --k > lower; ) { float ak = a[k]; if (ak < pivot1) { // Move a[k] to the left side while (lower < k) { if (a[++lower] >= pivot1) { if (a[lower] > pivot2) { a[k] = a[--upper]; a[upper] = a[lower]; } else { a[k] = a[lower]; } a[lower] = ak; break; } } } else if (ak > pivot2) { // Move a[k] to the right side a[k] = a[--upper]; a[upper] = ak; } } /* * Swap the pivots into their final positions. */ a[low] = a[lower]; a[lower] = pivot1; a[end] = a[upper]; a[upper] = pivot2; /* * Sort non-left parts recursively (possibly in parallel), * excluding known pivots. */ if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { sorter.forkSorter(bits | 1, lower + 1, upper); sorter.forkSorter(bits | 1, upper + 1, high); } else { sort(sorter, a, bits | 1, lower + 1, upper); sort(sorter, a, bits | 1, upper + 1, high); } } else { // Use single pivot in case of many equal elements /* * Use the third of the five sorted elements as the pivot. * This value is inexpensive approximation of the median. */ float pivot = a[e3]; /* * The first element to be sorted is moved to the * location formerly occupied by the pivot. After * completion of partitioning the pivot is swapped * back into its final position, and excluded from * the next subsequent sorting. */ a[e3] = a[lower]; /* * Traditional 3-way (Dutch National Flag) partitioning * * left part central part right part * +------------------------------------------------------+ * | < pivot | ? | == pivot | > pivot | * +------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot * all in (k, upper) == pivot * all in [upper, end] > pivot * * Pointer k is the last index of ?-part */ for (int k = ++upper; --k > lower; ) { float ak = a[k]; if (ak != pivot) { a[k] = pivot; if (ak < pivot) { // Move a[k] to the left side while (a[++lower] < pivot); if (a[lower] > pivot) { a[--upper] = a[lower]; } a[lower] = ak; } else { // ak > pivot - Move a[k] to the right side a[--upper] = ak; } } } /* * Swap the pivot into its final position. */ a[low] = a[lower]; a[lower] = pivot; /* * Sort the right part (possibly in parallel), excluding * known pivot. All elements from the central part are * equal and therefore already sorted. */ if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { sorter.forkSorter(bits | 1, upper, high); } else { sort(sorter, a, bits | 1, upper, high); } } high = lower; // Iterate along the left part } } /** * Sorts the specified range of the array using mixed insertion sort. * * Mixed insertion sort is combination of simple insertion sort, * pin insertion sort and pair insertion sort. * * In the context of Dual-Pivot Quicksort, the pivot element * from the left part plays the role of sentinel, because it * is less than any elements from the given part. Therefore, * expensive check of the left range can be skipped on each * iteration unless it is the leftmost call. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param end the index of the last element for simple insertion sort * @param high the index of the last element, exclusive, to be sorted */ private static void mixedInsertionSort(float[] a, int low, int end, int high) { if (end == high) { /* * Invoke simple insertion sort on tiny array. */ for (int i; ++low < end; ) { float ai = a[i = low]; while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } else { /* * Start with pin insertion sort on small part. * * Pin insertion sort is extended simple insertion sort. * The main idea of this sort is to put elements larger * than an element called pin to the end of array (the * proper area for such elements). It avoids expensive * movements of these elements through the whole array. */ float pin = a[end]; for (int i, p = high; ++low < end; ) { float ai = a[i = low]; if (ai < a[i - 1]) { // Small element /* * Insert small element into sorted part. */ a[i] = a[--i]; while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } else if (p > i && ai > pin) { // Large element /* * Find element smaller than pin. */ while (a[--p] > pin); /* * Swap it with large element. */ if (p > i) { ai = a[p]; a[p] = a[i]; } /* * Insert small element into sorted part. */ while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } /* * Continue with pair insertion sort on remain part. */ for (int i; low < high; ++low) { float a1 = a[i = low], a2 = a[++low]; /* * Insert two elements per iteration: at first, insert the * larger element and then insert the smaller element, but * from the position where the larger element was inserted. */ if (a1 > a2) { while (a1 < a[--i]) { a[i + 2] = a[i]; } a[++i + 1] = a1; while (a2 < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = a2; } else if (a1 < a[i - 1]) { while (a2 < a[--i]) { a[i + 2] = a[i]; } a[++i + 1] = a2; while (a1 < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = a1; } } } } /** * Sorts the specified range of the array using insertion sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void insertionSort(float[] a, int low, int high) { for (int i, k = low; ++k < high; ) { float ai = a[i = k]; if (ai < a[i - 1]) { while (--i >= low && ai < a[i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } } /** * Sorts the specified range of the array using heap sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void heapSort(float[] a, int low, int high) { for (int k = (low + high) >>> 1; k > low; ) { pushDown(a, --k, a[k], low, high); } while (--high > low) { float max = a[low]; pushDown(a, low, a[high], low, high); a[high] = max; } } /** * Pushes specified element down during heap sort. * * @param a the given array * @param p the start index * @param value the given element * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void pushDown(float[] a, int p, float value, int low, int high) { for (int k ;; a[p] = a[p = k]) { k = (p << 1) - low + 2; // Index of the right child if (k > high) { break; } if (k == high || a[k] < a[k - 1]) { --k; } if (a[k] <= value) { break; } } a[p] = value; } /** * Tries to sort the specified range of the array. * * @param sorter parallel context * @param a the array to be sorted * @param low the index of the first element to be sorted * @param size the array size * @return true if finally sorted, false otherwise */ private static boolean tryMergeRuns(Sorter sorter, float[] a, int low, int size) { /* * The run array is constructed only if initial runs are * long enough to continue, run[i] then holds start index * of the i-th sequence of elements in non-descending order. */ int[] run = null; int high = low + size; int count = 1, last = low; /* * Identify all possible runs. */ for (int k = low + 1; k < high; ) { /* * Find the end index of the current run. */ if (a[k - 1] < a[k]) { // Identify ascending sequence while (++k < high && a[k - 1] <= a[k]); } else if (a[k - 1] > a[k]) { // Identify descending sequence while (++k < high && a[k - 1] >= a[k]); // Reverse into ascending order for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { float ai = a[i]; a[i] = a[j]; a[j] = ai; } } else { // Identify constant sequence for (float ak = a[k]; ++k < high && ak == a[k]; ); if (k < high) { continue; } } /* * Check special cases. */ if (run == null) { if (k == high) { /* * The array is monotonous sequence, * and therefore already sorted. */ return true; } if (k - low < MIN_FIRST_RUN_SIZE) { /* * The first run is too small * to proceed with scanning. */ return false; } run = new int[((size >> 10) | 0x7F) & 0x3FF]; run[0] = low; } else if (a[last - 1] > a[last]) { if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { /* * The first runs are not long * enough to continue scanning. */ return false; } if (++count == MAX_RUN_CAPACITY) { /* * Array is not highly structured. */ return false; } if (count == run.length) { /* * Increase capacity of index array. */ run = Arrays.copyOf(run, count << 1); } } run[count] = (last = k); } /* * Merge runs of highly structured array. */ if (count > 1) { float[] b; int offset = low; if (sorter == null || (b = (float[]) sorter.b) == null) { b = new float[size]; } else { offset = sorter.offset; } mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); } return true; } /** * Merges the specified runs. * * @param a the source array * @param b the temporary buffer used in merging * @param offset the start index in the source, inclusive * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) * @param parallel indicates whether merging is performed in parallel * @param run the start indexes of the runs, inclusive * @param lo the start index of the first run, inclusive * @param hi the start index of the last run, inclusive * @return the destination where runs are merged */ private static float[] mergeRuns(float[] a, float[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi) { if (hi - lo == 1) { if (aim >= 0) { return a; } for (int i = run[hi], j = i - offset, low = run[lo]; i > low; b[--j] = a[--i] ); return b; } /* * Split into approximately equal parts. */ int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; while (run[++mi + 1] <= rmi); /* * Merge the left and right parts. */ float[] a1, a2; if (parallel && hi - lo > MIN_RUN_COUNT) { RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); a2 = (float[]) merger.getDestination(); } else { a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); } float[] dst = a1 == a ? b : a; int k = a1 == a ? run[lo] - offset : run[lo]; int lo1 = a1 == b ? run[lo] - offset : run[lo]; int hi1 = a1 == b ? run[mi] - offset : run[mi]; int lo2 = a2 == b ? run[mi] - offset : run[mi]; int hi2 = a2 == b ? run[hi] - offset : run[hi]; if (parallel) { new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); } else { mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); } return dst; } /** * Merges the sorted parts. * * @param merger parallel context * @param dst the destination where parts are merged * @param k the start index of the destination, inclusive * @param a1 the first part * @param lo1 the start index of the first part, inclusive * @param hi1 the end index of the first part, exclusive * @param a2 the second part * @param lo2 the start index of the second part, inclusive * @param hi2 the end index of the second part, exclusive */ private static void mergeParts(Merger merger, float[] dst, int k, float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2) { if (merger != null && a1 == a2) { while (true) { /* * The first part must be larger. */ if (hi1 - lo1 < hi2 - lo2) { int lo = lo1; lo1 = lo2; lo2 = lo; int hi = hi1; hi1 = hi2; hi2 = hi; } /* * Small parts will be merged sequentially. */ if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { break; } /* * Find the median of the larger part. */ int mi1 = (lo1 + hi1) >>> 1; float key = a1[mi1]; int mi2 = hi2; /* * Partition the smaller part. */ for (int loo = lo2; loo < mi2; ) { int t = (loo + mi2) >>> 1; if (key > a2[t]) { loo = t + 1; } else { mi2 = t; } } int d = mi2 - lo2 + mi1 - lo1; /* * Merge the right sub-parts in parallel. */ merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); /* * Process the sub-left parts. */ hi1 = mi1; hi2 = mi2; } } /* * Merge small parts sequentially. */ while (lo1 < hi1 && lo2 < hi2) { dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; } if (dst != a1 || k < lo1) { while (lo1 < hi1) { dst[k++] = a1[lo1++]; } } if (dst != a2 || k < lo2) { while (lo2 < hi2) { dst[k++] = a2[lo2++]; } } } // [double] /** * Sorts the specified range of the array using parallel merge * sort and/or Dual-Pivot Quicksort. * * To balance the faster splitting and parallelism of merge sort * with the faster element partitioning of Quicksort, ranges are * subdivided in tiers such that, if there is enough parallelism, * the four-way parallel merge is started, still ensuring enough * parallelism to process the partitions. * * @param a the array to be sorted * @param parallelism the parallelism level * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(double[] a, int parallelism, int low, int high) { /* * Phase 1. Count the number of negative zero -0.0d, * turn them into positive zero, and move all NaNs * to the end of the array. */ int numNegativeZero = 0; for (int k = high; k > low; ) { double ak = a[--k]; if (ak == 0.0d && Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d numNegativeZero += 1; a[k] = 0.0d; } else if (ak != ak) { // ak is NaN a[k] = a[--high]; a[high] = ak; } } /* * Phase 2. Sort everything except NaNs, * which are already in place. */ int size = high - low; if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { int depth = getDepth(parallelism, size >> 12); double[] b = depth == 0 ? null : new double[size]; new Sorter(null, a, b, low, size, low, depth).invoke(); } else { sort(null, a, 0, low, high); } /* * Phase 3. Turn positive zero 0.0d * back into negative zero -0.0d. */ if (++numNegativeZero == 1) { return; } /* * Find the position one less than * the index of the first zero. */ while (low <= high) { int middle = (low + high) >>> 1; if (a[middle] < 0) { low = middle + 1; } else { high = middle - 1; } } /* * Replace the required number of 0.0d by -0.0d. */ while (--numNegativeZero > 0) { a[++high] = -0.0d; } } /** * Sorts the specified array using the Dual-Pivot Quicksort and/or * other sorts in special-cases, possibly with parallel partitions. * * @param sorter parallel context * @param a the array to be sorted * @param bits the combination of recursion depth and bit flag, where * the right bit "0" indicates that array is the leftmost part * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ static void sort(Sorter sorter, double[] a, int bits, int low, int high) { while (true) { int end = high - 1, size = high - low; /* * Run mixed insertion sort on small non-leftmost parts. */ if (size < MAX_MIXED_INSERTION_SORT_SIZE && (bits & 1) > 0) { mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); return; } /* * Invoke insertion sort on small leftmost part. */ if (size < MAX_INSERTION_SORT_SIZE) { insertionSort(a, low, high); return; } /* * Check if the whole array or large non-leftmost * parts are nearly sorted and then merge runs. */ if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) && tryMergeRuns(sorter, a, low, size)) { return; } /* * Switch to heap sort if execution * time is becoming quadratic. */ if ((bits += 2) > MAX_RECURSION_DEPTH) { heapSort(a, low, high); return; } /* * Use an inexpensive approximation of the golden ratio * to select five sample elements and determine pivots. */ int step = (size >> 3) * 3 + 3; /* * Five elements around (and including) the central element * will be used for pivot selection as described below. The * unequal choice of spacing these elements was empirically * determined to work well on a wide variety of inputs. */ int e1 = low + step; int e5 = end - step; int e3 = (e1 + e5) >>> 1; int e2 = (e1 + e3) >>> 1; int e4 = (e3 + e5) >>> 1; double a3 = a[e3]; /* * Sort these elements in place by the combination * of 4-element sorting network and insertion sort. * * 5 ------o-----------o------------ * | | * 4 ------|-----o-----o-----o------ * | | | * 2 ------o-----|-----o-----o------ * | | * 1 ------------o-----o------------ */ if (a[e5] < a[e2]) { double t = a[e5]; a[e5] = a[e2]; a[e2] = t; } if (a[e4] < a[e1]) { double t = a[e4]; a[e4] = a[e1]; a[e1] = t; } if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; } if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; } if (a[e4] < a[e2]) { double t = a[e4]; a[e4] = a[e2]; a[e2] = t; } if (a3 < a[e2]) { if (a3 < a[e1]) { a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; } else { a[e3] = a[e2]; a[e2] = a3; } } else if (a3 > a[e4]) { if (a3 > a[e5]) { a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; } else { a[e3] = a[e4]; a[e4] = a3; } } // Pointers int lower = low; // The index of the last element of the left part int upper = end; // The index of the first element of the right part /* * Partitioning with 2 pivots in case of different elements. */ if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { /* * Use the first and fifth of the five sorted elements as * the pivots. These values are inexpensive approximation * of tertiles. Note, that pivot1 < pivot2. */ double pivot1 = a[e1]; double pivot2 = a[e5]; /* * The first and the last elements to be sorted are moved * to the locations formerly occupied by the pivots. When * partitioning is completed, the pivots are swapped back * into their final positions, and excluded from the next * subsequent sorting. */ a[e1] = a[lower]; a[e5] = a[upper]; /* * Skip elements, which are less or greater than the pivots. */ while (a[++lower] < pivot1); while (a[--upper] > pivot2); /* * Backward 3-interval partitioning * * left part central part right part * +------------------------------------------------------------+ * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | * +------------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot1 * pivot1 <= all in (k, upper) <= pivot2 * all in [upper, end) > pivot2 * * Pointer k is the last index of ?-part */ for (int unused = --lower, k = ++upper; --k > lower; ) { double ak = a[k]; if (ak < pivot1) { // Move a[k] to the left side while (lower < k) { if (a[++lower] >= pivot1) { if (a[lower] > pivot2) { a[k] = a[--upper]; a[upper] = a[lower]; } else { a[k] = a[lower]; } a[lower] = ak; break; } } } else if (ak > pivot2) { // Move a[k] to the right side a[k] = a[--upper]; a[upper] = ak; } } /* * Swap the pivots into their final positions. */ a[low] = a[lower]; a[lower] = pivot1; a[end] = a[upper]; a[upper] = pivot2; /* * Sort non-left parts recursively (possibly in parallel), * excluding known pivots. */ if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { sorter.forkSorter(bits | 1, lower + 1, upper); sorter.forkSorter(bits | 1, upper + 1, high); } else { sort(sorter, a, bits | 1, lower + 1, upper); sort(sorter, a, bits | 1, upper + 1, high); } } else { // Use single pivot in case of many equal elements /* * Use the third of the five sorted elements as the pivot. * This value is inexpensive approximation of the median. */ double pivot = a[e3]; /* * The first element to be sorted is moved to the * location formerly occupied by the pivot. After * completion of partitioning the pivot is swapped * back into its final position, and excluded from * the next subsequent sorting. */ a[e3] = a[lower]; /* * Traditional 3-way (Dutch National Flag) partitioning * * left part central part right part * +------------------------------------------------------+ * | < pivot | ? | == pivot | > pivot | * +------------------------------------------------------+ * ^ ^ ^ * | | | * lower k upper * * Invariants: * * all in (low, lower] < pivot * all in (k, upper) == pivot * all in [upper, end] > pivot * * Pointer k is the last index of ?-part */ for (int k = ++upper; --k > lower; ) { double ak = a[k]; if (ak != pivot) { a[k] = pivot; if (ak < pivot) { // Move a[k] to the left side while (a[++lower] < pivot); if (a[lower] > pivot) { a[--upper] = a[lower]; } a[lower] = ak; } else { // ak > pivot - Move a[k] to the right side a[--upper] = ak; } } } /* * Swap the pivot into its final position. */ a[low] = a[lower]; a[lower] = pivot; /* * Sort the right part (possibly in parallel), excluding * known pivot. All elements from the central part are * equal and therefore already sorted. */ if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { sorter.forkSorter(bits | 1, upper, high); } else { sort(sorter, a, bits | 1, upper, high); } } high = lower; // Iterate along the left part } } /** * Sorts the specified range of the array using mixed insertion sort. * * Mixed insertion sort is combination of simple insertion sort, * pin insertion sort and pair insertion sort. * * In the context of Dual-Pivot Quicksort, the pivot element * from the left part plays the role of sentinel, because it * is less than any elements from the given part. Therefore, * expensive check of the left range can be skipped on each * iteration unless it is the leftmost call. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param end the index of the last element for simple insertion sort * @param high the index of the last element, exclusive, to be sorted */ private static void mixedInsertionSort(double[] a, int low, int end, int high) { if (end == high) { /* * Invoke simple insertion sort on tiny array. */ for (int i; ++low < end; ) { double ai = a[i = low]; while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } else { /* * Start with pin insertion sort on small part. * * Pin insertion sort is extended simple insertion sort. * The main idea of this sort is to put elements larger * than an element called pin to the end of array (the * proper area for such elements). It avoids expensive * movements of these elements through the whole array. */ double pin = a[end]; for (int i, p = high; ++low < end; ) { double ai = a[i = low]; if (ai < a[i - 1]) { // Small element /* * Insert small element into sorted part. */ a[i] = a[--i]; while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } else if (p > i && ai > pin) { // Large element /* * Find element smaller than pin. */ while (a[--p] > pin); /* * Swap it with large element. */ if (p > i) { ai = a[p]; a[p] = a[i]; } /* * Insert small element into sorted part. */ while (ai < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } /* * Continue with pair insertion sort on remain part. */ for (int i; low < high; ++low) { double a1 = a[i = low], a2 = a[++low]; /* * Insert two elements per iteration: at first, insert the * larger element and then insert the smaller element, but * from the position where the larger element was inserted. */ if (a1 > a2) { while (a1 < a[--i]) { a[i + 2] = a[i]; } a[++i + 1] = a1; while (a2 < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = a2; } else if (a1 < a[i - 1]) { while (a2 < a[--i]) { a[i + 2] = a[i]; } a[++i + 1] = a2; while (a1 < a[--i]) { a[i + 1] = a[i]; } a[i + 1] = a1; } } } } /** * Sorts the specified range of the array using insertion sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void insertionSort(double[] a, int low, int high) { for (int i, k = low; ++k < high; ) { double ai = a[i = k]; if (ai < a[i - 1]) { while (--i >= low && ai < a[i]) { a[i + 1] = a[i]; } a[i + 1] = ai; } } } /** * Sorts the specified range of the array using heap sort. * * @param a the array to be sorted * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void heapSort(double[] a, int low, int high) { for (int k = (low + high) >>> 1; k > low; ) { pushDown(a, --k, a[k], low, high); } while (--high > low) { double max = a[low]; pushDown(a, low, a[high], low, high); a[high] = max; } } /** * Pushes specified element down during heap sort. * * @param a the given array * @param p the start index * @param value the given element * @param low the index of the first element, inclusive, to be sorted * @param high the index of the last element, exclusive, to be sorted */ private static void pushDown(double[] a, int p, double value, int low, int high) { for (int k ;; a[p] = a[p = k]) { k = (p << 1) - low + 2; // Index of the right child if (k > high) { break; } if (k == high || a[k] < a[k - 1]) { --k; } if (a[k] <= value) { break; } } a[p] = value; } /** * Tries to sort the specified range of the array. * * @param sorter parallel context * @param a the array to be sorted * @param low the index of the first element to be sorted * @param size the array size * @return true if finally sorted, false otherwise */ private static boolean tryMergeRuns(Sorter sorter, double[] a, int low, int size) { /* * The run array is constructed only if initial runs are * long enough to continue, run[i] then holds start index * of the i-th sequence of elements in non-descending order. */ int[] run = null; int high = low + size; int count = 1, last = low; /* * Identify all possible runs. */ for (int k = low + 1; k < high; ) { /* * Find the end index of the current run. */ if (a[k - 1] < a[k]) { // Identify ascending sequence while (++k < high && a[k - 1] <= a[k]); } else if (a[k - 1] > a[k]) { // Identify descending sequence while (++k < high && a[k - 1] >= a[k]); // Reverse into ascending order for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { double ai = a[i]; a[i] = a[j]; a[j] = ai; } } else { // Identify constant sequence for (double ak = a[k]; ++k < high && ak == a[k]; ); if (k < high) { continue; } } /* * Check special cases. */ if (run == null) { if (k == high) { /* * The array is monotonous sequence, * and therefore already sorted. */ return true; } if (k - low < MIN_FIRST_RUN_SIZE) { /* * The first run is too small * to proceed with scanning. */ return false; } run = new int[((size >> 10) | 0x7F) & 0x3FF]; run[0] = low; } else if (a[last - 1] > a[last]) { if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { /* * The first runs are not long * enough to continue scanning. */ return false; } if (++count == MAX_RUN_CAPACITY) { /* * Array is not highly structured. */ return false; } if (count == run.length) { /* * Increase capacity of index array. */ run = Arrays.copyOf(run, count << 1); } } run[count] = (last = k); } /* * Merge runs of highly structured array. */ if (count > 1) { double[] b; int offset = low; if (sorter == null || (b = (double[]) sorter.b) == null) { b = new double[size]; } else { offset = sorter.offset; } mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); } return true; } /** * Merges the specified runs. * * @param a the source array * @param b the temporary buffer used in merging * @param offset the start index in the source, inclusive * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) * @param parallel indicates whether merging is performed in parallel * @param run the start indexes of the runs, inclusive * @param lo the start index of the first run, inclusive * @param hi the start index of the last run, inclusive * @return the destination where runs are merged */ private static double[] mergeRuns(double[] a, double[] b, int offset, int aim, boolean parallel, int[] run, int lo, int hi) { if (hi - lo == 1) { if (aim >= 0) { return a; } for (int i = run[hi], j = i - offset, low = run[lo]; i > low; b[--j] = a[--i] ); return b; } /* * Split into approximately equal parts. */ int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; while (run[++mi + 1] <= rmi); /* * Merge the left and right parts. */ double[] a1, a2; if (parallel && hi - lo > MIN_RUN_COUNT) { RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); a2 = (double[]) merger.getDestination(); } else { a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); } double[] dst = a1 == a ? b : a; int k = a1 == a ? run[lo] - offset : run[lo]; int lo1 = a1 == b ? run[lo] - offset : run[lo]; int hi1 = a1 == b ? run[mi] - offset : run[mi]; int lo2 = a2 == b ? run[mi] - offset : run[mi]; int hi2 = a2 == b ? run[hi] - offset : run[hi]; if (parallel) { new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); } else { mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); } return dst; } /** * Merges the sorted parts. * * @param merger parallel context * @param dst the destination where parts are merged * @param k the start index of the destination, inclusive * @param a1 the first part * @param lo1 the start index of the first part, inclusive * @param hi1 the end index of the first part, exclusive * @param a2 the second part * @param lo2 the start index of the second part, inclusive * @param hi2 the end index of the second part, exclusive */ private static void mergeParts(Merger merger, double[] dst, int k, double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2) { if (merger != null && a1 == a2) { while (true) { /* * The first part must be larger. */ if (hi1 - lo1 < hi2 - lo2) { int lo = lo1; lo1 = lo2; lo2 = lo; int hi = hi1; hi1 = hi2; hi2 = hi; } /* * Small parts will be merged sequentially. */ if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { break; } /* * Find the median of the larger part. */ int mi1 = (lo1 + hi1) >>> 1; double key = a1[mi1]; int mi2 = hi2; /* * Partition the smaller part. */ for (int loo = lo2; loo < mi2; ) { int t = (loo + mi2) >>> 1; if (key > a2[t]) { loo = t + 1; } else { mi2 = t; } } int d = mi2 - lo2 + mi1 - lo1; /* * Merge the right sub-parts in parallel. */ merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); /* * Process the sub-left parts. */ hi1 = mi1; hi2 = mi2; } } /* * Merge small parts sequentially. */ while (lo1 < hi1 && lo2 < hi2) { dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; } if (dst != a1 || k < lo1) { while (lo1 < hi1) { dst[k++] = a1[lo1++]; } } if (dst != a2 || k < lo2) { while (lo2 < hi2) { dst[k++] = a2[lo2++]; } } } // [class] /** * This class implements parallel sorting. */ private static final class Sorter extends CountedCompleter { private static final long serialVersionUID = 20180818L; private final Object a, b; private final int low, size, offset, depth; private Sorter(CountedCompleter parent, Object a, Object b, int low, int size, int offset, int depth) { super(parent); this.a = a; this.b = b; this.low = low; this.size = size; this.offset = offset; this.depth = depth; } @Override public final void compute() { if (depth < 0) { setPendingCount(2); int half = size >> 1; new Sorter(this, b, a, low, half, offset, depth + 1).fork(); new Sorter(this, b, a, low + half, size - half, offset, depth + 1).compute(); } else { if (a instanceof int[]) { sort(this, (int[]) a, depth, low, low + size); } else if (a instanceof long[]) { sort(this, (long[]) a, depth, low, low + size); } else if (a instanceof float[]) { sort(this, (float[]) a, depth, low, low + size); } else if (a instanceof double[]) { sort(this, (double[]) a, depth, low, low + size); } else { throw new IllegalArgumentException( "Unknown type of array: " + a.getClass().getName()); } } tryComplete(); } @Override public final void onCompletion(CountedCompleter caller) { if (depth < 0) { int mi = low + (size >> 1); boolean src = (depth & 1) == 0; new Merger(null, a, src ? low : low - offset, b, src ? low - offset : low, src ? mi - offset : mi, b, src ? mi - offset : mi, src ? low + size - offset : low + size ).invoke(); } } private void forkSorter(int depth, int low, int high) { addToPendingCount(1); Object a = this.a; // Use local variable for performance new Sorter(this, a, b, low, high - low, offset, depth).fork(); } } /** * This class implements parallel merging. */ private static final class Merger extends CountedCompleter { private static final long serialVersionUID = 20180818L; private final Object dst, a1, a2; private final int k, lo1, hi1, lo2, hi2; private Merger(CountedCompleter parent, Object dst, int k, Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { super(parent); this.dst = dst; this.k = k; this.a1 = a1; this.lo1 = lo1; this.hi1 = hi1; this.a2 = a2; this.lo2 = lo2; this.hi2 = hi2; } @Override public final void compute() { if (dst instanceof int[]) { mergeParts(this, (int[]) dst, k, (int[]) a1, lo1, hi1, (int[]) a2, lo2, hi2); } else if (dst instanceof long[]) { mergeParts(this, (long[]) dst, k, (long[]) a1, lo1, hi1, (long[]) a2, lo2, hi2); } else if (dst instanceof float[]) { mergeParts(this, (float[]) dst, k, (float[]) a1, lo1, hi1, (float[]) a2, lo2, hi2); } else if (dst instanceof double[]) { mergeParts(this, (double[]) dst, k, (double[]) a1, lo1, hi1, (double[]) a2, lo2, hi2); } else { throw new IllegalArgumentException( "Unknown type of array: " + dst.getClass().getName()); } propagateCompletion(); } private void forkMerger(Object dst, int k, Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { addToPendingCount(1); new Merger(this, dst, k, a1, lo1, hi1, a2, lo2, hi2).fork(); } } /** * This class implements parallel merging of runs. */ private static final class RunMerger extends RecursiveTask { private static final long serialVersionUID = 20180818L; private final Object a, b; private final int[] run; private final int offset, aim, lo, hi; private RunMerger(Object a, Object b, int offset, int aim, int[] run, int lo, int hi) { this.a = a; this.b = b; this.offset = offset; this.aim = aim; this.run = run; this.lo = lo; this.hi = hi; } @Override protected final Object compute() { if (a instanceof int[]) { return mergeRuns((int[]) a, (int[]) b, offset, aim, true, run, lo, hi); } if (a instanceof long[]) { return mergeRuns((long[]) a, (long[]) b, offset, aim, true, run, lo, hi); } if (a instanceof float[]) { return mergeRuns((float[]) a, (float[]) b, offset, aim, true, run, lo, hi); } if (a instanceof double[]) { return mergeRuns((double[]) a, (double[]) b, offset, aim, true, run, lo, hi); } throw new IllegalArgumentException( "Unknown type of array: " + a.getClass().getName()); } private RunMerger forkMe() { fork(); return this; } private Object getDestination() { join(); return getRawResult(); } } }