# HG changeset patch # User igerasim # Date 1500449970 25200 # Wed Jul 19 00:39:30 2017 -0700 # Node ID 079bae1fadd1a8782c084a4d90842602282c6ca6 # Parent b2fada7695e77ae2080e58fd0cdbbdaecad7eea6 [mq]: 8134512-provide-Alpha-Numeric-logical-Comparator diff --git a/src/java.base/share/classes/java/util/Comparator.java b/src/java.base/share/classes/java/util/Comparator.java --- a/src/java.base/share/classes/java/util/Comparator.java +++ b/src/java.base/share/classes/java/util/Comparator.java @@ -526,9 +526,81 @@ * @throws NullPointerException if the argument is null * @since 1.8 */ - public static Comparator comparingDouble(ToDoubleFunction keyExtractor) { + public static Comparator comparingDouble(ToDoubleFunction keyExtractor) { Objects.requireNonNull(keyExtractor); return (Comparator & Serializable) (c1, c2) -> Double.compare(keyExtractor.applyAsDouble(c1), keyExtractor.applyAsDouble(c2)); } + + /** + * Returns a comparator that compares {@link CharSequence} objects, + * paying special attention to the decimal digit characters. + * + *

The returned comparator follows the procedure to compare two char + * sequences: first, the common prefix of the sequences is skipped; + * then, if the sequences are different at the point lying inside or + * at the boundary of a subsequence, consisting of decimal digits, the + * corresponding numerical values are compared; otherwise, the char + * sequences are compared literally. + * + *

In the case the numeric values of two compared char sequences + * are equal, but their string representations have different number + * of leading zeroes, the comparator treats the number with more + * leading zeros as greater. + * E.g. {@code "abc 1" < "abc 01" < "abc 001"}. + * + * @implSpec The returned comparator uses + * {@link java.lang.Character#compare(char,char)} to compare pairs of + * characters of its arguments. The comparator uses + * {@link java.lang.Character#isDigit(char)} to test if the character + * represets a decimal digit. The comparator uses + * {@link java.lang.Character#digit(char, int)} with the second + * argument set to {@code 10} to determine the decimal numeric value + * of the character. + * + * @param the type of elements to be compared + * @return a comparator that is to compare character sequences + * + * @since 10 + */ + @SuppressWarnings("unchecked") + public static Comparator comparingNumerically() { + return (Comparator)Comparators.NumericComparator.INSTANCE_LEADING_ZEROS_BEHIND_NATURAL; + } + + /** + * Returns a comparator that compares {@link CharSequence} objects, + * paying special attention to the decimal digit characters. + * + *

The returned comparator follows the procedure to compare two char + * sequences: first, the common prefix of the sequences is skipped; + * then, if the sequences are different at the point lying inside or + * at the boundary of a subsequence, consisting of decimal digits, the + * corresponding numerical values are compared; otherwise, the char + * sequences are compared literally. + * + *

In the case the numeric values of two compared char sequences + * are equal, but their string representations have different number + * of leading zeroes, the comparator treats the number with more + * leading zeros as smaller. + * E.g. {@code "abc 001" < "abc 01" < "abc 1"}. + * + * @implSpec The returned comparator uses + * {@link java.lang.Character#compare(char,char)} to compare pairs of + * characters of its arguments. The comparator uses + * {@link java.lang.Character#isDigit(char)} to test if the character + * represets a decimal digit. The comparator uses + * {@link java.lang.Character#digit(char, int)} with the second + * argument set to {@code 10} to determine the decimal numeric value + * of the character. + * + * @param the type of elements to be compared + * @return a comparator that is to compare character sequences + * + * @since 10 + */ + @SuppressWarnings("unchecked") + public static Comparator comparingNumericallyLeadingZerosAhead() { + return (Comparator)Comparators.NumericComparator.INSTANCE_LEADING_ZEROS_AHEAD_NATURAL; + } } diff --git a/src/java.base/share/classes/java/util/Comparators.java b/src/java.base/share/classes/java/util/Comparators.java --- a/src/java.base/share/classes/java/util/Comparators.java +++ b/src/java.base/share/classes/java/util/Comparators.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -95,4 +95,190 @@ return new NullComparator<>(!nullFirst, real == null ? null : real.reversed()); } } + + /** + * Compares char sequences, taking into account their numeric part if one exists. + * + * @see Comparator.comparingNumerically() + * @see Comparator.comparingNumericallyLeadingZerosAhead() + */ + static class NumericComparator implements Comparator, Serializable { + + private static final long serialVersionUID = 0x77164074580FAC97L; + + static final Comparator INSTANCE_LEADING_ZEROS_AHEAD_NATURAL = + new NumericComparator(true) + { + @Override + @SuppressWarnings("unchecked") + public Comparator reversed() { + return (Comparator)INSTANCE_LEADING_ZEROS_AHEAD_REVERSED; + } + }; + + static final Comparator INSTANCE_LEADING_ZEROS_AHEAD_REVERSED = + new NumericComparator(true) + { + @Override + public int compare(CharSequence o1, CharSequence o2) { + return super.compare(o2, o1); + } + + @Override + @SuppressWarnings("unchecked") + public Comparator reversed() { + return (Comparator)INSTANCE_LEADING_ZEROS_AHEAD_NATURAL; + } + }; + + static final Comparator INSTANCE_LEADING_ZEROS_BEHIND_NATURAL = + new NumericComparator(false) + { + @Override + @SuppressWarnings("unchecked") + public Comparator reversed() { + return (Comparator)INSTANCE_LEADING_ZEROS_BEHIND_REVERSED; + } + }; + + static final Comparator INSTANCE_LEADING_ZEROS_BEHIND_REVERSED = + new NumericComparator(false) + { + @Override + public int compare(CharSequence o1, CharSequence o2) { + return super.compare(o2, o1); + } + + @Override + @SuppressWarnings("unchecked") + public Comparator reversed() { + return (Comparator)INSTANCE_LEADING_ZEROS_BEHIND_NATURAL; + } + }; + + /** + * If true, the string representation of a number with + * more leading zeros will be treated as smaller. + * E.g.: "0001" < "001" < "1". + */ + private final boolean leadingZerosAhead; + + /** + * Constructs a new comparator with possibility to specify + * how it should deal with leading zeros in numbers. + * + * @param leadingZerosAhead if {@code true}, numbers with + * more leading zeros are treated as smaller. + */ + private NumericComparator(boolean leadingZerosAhead) { + this.leadingZerosAhead = leadingZerosAhead; + } + + /** + * Counts how many consequtive characters in the string {@code o} + * are digits, starting from the position {@code from}. + */ + private int countDigits(CharSequence o, int from) { + for (int i = from, len = o.length(); ; ++i) { + if (i == len || !Character.isDigit(o.charAt(i))) { + return i - from; + } + } + } + + /** + * Checks if all {@code cnt} chars of the sequence {@code o}, + * starting with {@code pos} are zeros. + * + * @param pos position of the first digit of numeric part. + * @param cnt number of digits to check; must be > 0. + * + * @return {@code true} if the numeric part of the string {@code o} + * can be reduced in length by {@code cnt} without changing its + * numerical value. + */ + private boolean skipLeadingZeros(CharSequence o, int pos, int cnt) { + do { + if (Character.digit(o.charAt(pos++), 10) != 0) { + // Leading digit of numeric part of o isn't zero + return false; + } + } while (--cnt > 0); + return true; + } + + @Override + public int compare(T o1, T o2) { + int minLen = Math.min(o1.length(), o2.length()); + int numStart1 = 0; + int index = 0; + int res0 = 0; + + // Skip common prefix of the arguments + char ch; + while (index < minLen && + (res0 = Character.compare(ch = o1.charAt(index), + o2.charAt(index))) == 0) { + ++index; + if (!Character.isDigit(ch)) { + numStart1 = index; + } + } + + int numLen1 = index - numStart1; + if (numLen1 > 0 || + (index < minLen && + Character.isDigit(o1.charAt(index)) && + Character.isDigit(o2.charAt(index)))) + { + // The difference is inside or at the boundary of + // numerical part + int numLen2 = numLen1; + int numStart2 = numStart1; + + numLen1 += countDigits(o1, index); + numLen2 += countDigits(o2, index); + + int diffNumLen = numLen2 - numLen1; + + if (diffNumLen > 0) { + if (!skipLeadingZeros(o2, numStart2, diffNumLen)) { + // Length of numeric part of o2 is greater + return -1; + } + numStart2 += diffNumLen; + numLen2 = numLen1; + } else if (diffNumLen < 0) { + if (!skipLeadingZeros(o1, numStart1, -diffNumLen)) { + // Length of numeric part of o1 is greater + return 1; + } + numStart1 -= diffNumLen; + numLen1 = numLen2; + } + + // Numeric parts are of the same length, so they can + // be compared literally + while (numLen1-- > 0) { + int res1 = Character.compare(o1.charAt(numStart1++), + o2.charAt(numStart2++)); + if (res1 != 0) { + return res1; + } + } + + // Numeric parts are numerically equal + if (diffNumLen != 0) { + // If leadingZerosAhead == true, treat the number + // with more leading zeros as smaller + return (leadingZerosAhead ^ (diffNumLen > 0)) ? -1 : 1; + } + } + + // Otherwise, compare literally + return (index < o1.length()) + ? ((index < o2.length()) ? res0 : 1) + : ((index < o2.length()) ? -1 : 0); + } + } } diff --git a/test/java/util/Comparator/BasicTest.java b/test/java/util/Comparator/BasicTest.java --- a/test/java/util/Comparator/BasicTest.java +++ b/test/java/util/Comparator/BasicTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,14 +23,19 @@ /** * @test + * @bug 8134512 * @summary Comparator default method tests * @run testng BasicTest + * @key randomness */ import java.util.TreeMap; import java.util.Comparator; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.Arrays; +import java.util.Collections; import java.util.function.Function; import java.util.function.ToIntFunction; import java.util.function.ToLongFunction; @@ -366,4 +371,107 @@ fail("comparing(null) should throw NPE"); } catch (NullPointerException npe) {} } + + @Test(dataProvider = "presorted") + public void testNumerical(String[] sorted, Comparator cmp) { + String[] shuffled = sorted.clone(); + Collections.shuffle(Arrays.asList(shuffled)); + Arrays.sort(shuffled, cmp); + assertEquals(shuffled, sorted); + } + + // Simple case: sorting 1 .. 11 + private static String[] arr0 = new String[] { + "java 1", "java 2", "java 3", + "java 4", "java 5", "java 6", + "java 7", "java 8", "java 9", + "java 10", "java 11" }; + + // Leading zeros and multiple numeric parts + private static String[] arr1 = new String[] { + "string", "string0", "string00", + "string1", "string01", "string001", + "string2", "string02", "string002", + "string002.a", "string002.a0", "string002.a1", + "string0002", "string0002" }; + + // Leading zeros ahead + private static String[] arr2 = new String[] { + "string", "string00", "string0", + "string001", "string01", "string1", + "string0002", "string0002", "string002", + "string002.a", "string002.a0", "string002.a1", + "string02", "string2" }; + + // Sample from MSDN + private static String[] arr3 = new String[] { + "2string", "3string", "20string", + "st2ring", "st3ring", "st20ring", + "string2", "string3", "string20" }; + + // Fullwidth characters + private static String[] arr4 = new String[] { + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff10", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff10\uff10", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff10\uff10\uff10", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff11", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff10\uff11", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff12", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff13", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff14", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff15", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff16", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff17", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff18", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff19", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff11\uff10", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff11\uff11", + "\uff53\uff54\uff52\uff49\uff4e\uff47 \uff11\uff12" }; + + // Very long numbers + private static String[] arr5 = new String[] { + "q_0", + "q_1", + "q_1_", + "q_10000000000000000000000000000000000000000", + "q_20000000000000000000000000000000000000000", + "q_100000000000000000000000000000000000000000", + "q_500000000000000000000000000000000000000000", + "q_10000000000000000000000000000000000000000000000000000000000000000000000000001", + "q_20000000000000000000000000000000000000000000000000000000000000000000000000000", + "q_20000000000000000000000000000000000000000000000000000000000000000000000000001", + "q_200000000000000000000000000000000000000000000000000000000000000000000000000000", + "y_1", + "y_10000000000000000000000000000000000000000000000000000000000000000000000000000" }; + + @DataProvider(name = "presorted") + public Object[][] createPresortedArrays() { + String[][] rev = { + arr0.clone(), arr1.clone(), arr2.clone(), + arr3.clone(), arr4.clone(), arr5.clone(), + }; + for (String[] r : rev) { + Collections.reverse(Arrays.asList(r)); + } + return new Object[][] { + {arr0, Comparator.comparingNumerically()}, + {arr0, Comparator.comparingNumericallyLeadingZerosAhead()}, + {arr1, Comparator.comparingNumerically()}, + {arr2, Comparator.comparingNumericallyLeadingZerosAhead()}, + {arr3, Comparator.comparingNumerically()}, + {arr3, Comparator.comparingNumericallyLeadingZerosAhead()}, + {arr4, Comparator.comparingNumerically()}, + {arr5, Comparator.comparingNumerically()}, + {arr5, Comparator.comparingNumericallyLeadingZerosAhead()}, + {rev[0], Comparator.comparingNumerically().reversed()}, + {rev[0], Comparator.comparingNumericallyLeadingZerosAhead().reversed()}, + {rev[1], Comparator.comparingNumerically().reversed()}, + {rev[2], Comparator.comparingNumericallyLeadingZerosAhead().reversed()}, + {rev[3], Comparator.comparingNumerically().reversed()}, + {rev[3], Comparator.comparingNumericallyLeadingZerosAhead().reversed()}, + {rev[4], Comparator.comparingNumerically().reversed()}, + {rev[5], Comparator.comparingNumerically().reversed()}, + {rev[5], Comparator.comparingNumericallyLeadingZerosAhead().reversed()}, + }; + } }