# HG changeset patch # User igerasim # Date 1567120994 25200 # Thu Aug 29 16:23:14 2019 -0700 # Node ID 7f2ac88b979bdec12c799f8fefe61d7774ef6395 # Parent 5021d91ba9bd3cbf75715412f352a9e6cf19844c [mq]: 8230365-Pattern-for-a-control-char-matches-non-control-characters diff --git a/src/java.base/share/classes/java/util/regex/Pattern.java b/src/java.base/share/classes/java/util/regex/Pattern.java --- a/src/java.base/share/classes/java/util/regex/Pattern.java +++ b/src/java.base/share/classes/java/util/regex/Pattern.java @@ -130,7 +130,9 @@ * {@code \e} * The escape character ('\u001B') * {@code \c}x - * The control character corresponding to x + * The control character corresponding to x + * (x is {@code A} through {@code Z}, or {@code a} through {@code z}, or one of + * {@code @}, {@code [}, {@code \\}, {@code ]}, {@code ^}, {@code _}, {@code ?}) * * Character classes * @@ -151,7 +153,7 @@ * except for {@code b} and {@code c}: {@code [ad-z]} (subtraction) * {@code [a-z&&[^m-p]]} * {@code a} through {@code z}, - * and not {@code m} through {@code p}: {@code [a-lq-z]}(subtraction) + * and not {@code m} through {@code p}: {@code [a-lq-z]} (subtraction) * * Predefined character classes * @@ -3323,7 +3325,21 @@ */ private int c() { if (cursor < patternLength) { - return read() ^ 64; + int ch = read(); + if (ASCII.isLower(ch)) + ch = ASCII.toUpper(ch); + if (ASCII.isUpper(ch)) + return ch ^ 64; + switch (ch) { + case '@': + case '[': + case '\\': + case ']': + case '^': + case '_': + case '?': + return ch ^ 64; + } } throw error("Illegal control escape sequence"); } diff --git a/test/jdk/java/util/regex/RegExTest.java b/test/jdk/java/util/regex/RegExTest.java --- a/test/jdk/java/util/regex/RegExTest.java +++ b/test/jdk/java/util/regex/RegExTest.java @@ -35,7 +35,7 @@ * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 - * 8194667 8197462 8184692 8221431 8224789 8228352 + * 8194667 8197462 8184692 8221431 8224789 8228352 8230365 * * @library /test/lib * @library /lib/testlibrary/java/lang @@ -57,6 +57,7 @@ import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Random; import java.util.Scanner; @@ -186,6 +187,7 @@ invalidGroupName(); illegalRepetitionRange(); surrogatePairWithCanonEq(); + controlCharacters(); if (failure) { throw new @@ -4984,4 +4986,64 @@ } report("surrogatePairWithCanonEq"); } + + private static void controlCharacters() { + char[] contolCharsPairs = { '@', 0x00, + 'A', 0x01, 'a', 0x01, 'B', 0x02, 'b', 0x02, 'C', 0x03, 'c', 0x03, + 'D', 0x04, 'd', 0x04, 'E', 0x05, 'e', 0x05, 'F', 0x06, 'f', 0x06, + 'G', 0x07, 'g', 0x07, 'H', 0x08, 'h', 0x08, 'I', 0x09, 'i', 0x09, + 'J', 0x0a, 'j', 0x0a, 'K', 0x0b, 'k', 0x0b, 'L', 0x0c, 'l', 0x0c, + 'M', 0x0d, 'm', 0x0d, 'N', 0x0e, 'n', 0x0e, 'O', 0x0f, 'o', 0x0f, + 'P', 0x10, 'p', 0x10, 'Q', 0x11, 'q', 0x11, 'R', 0x12, 'r', 0x12, + 'S', 0x13, 's', 0x13, 'T', 0x14, 't', 0x14, 'U', 0x15, 'u', 0x15, + 'V', 0x16, 'v', 0x16, 'W', 0x17, 'w', 0x17, 'X', 0x18, 'x', 0x18, + 'Y', 0x19, 'y', 0x19, 'Z', 0x1a, 'z', 0x1a, + '[', 0x1b, '\\', 0x1c, ']', 0x1d, '^', 0x1e, '_', 0x1f, '?', 0x7f }; + var contolChars = new HashMap(); + for (int i = 0; i < contolCharsPairs.length; i += 2) + contolChars.put(Character.valueOf(contolCharsPairs[i]), + Integer.valueOf(contolCharsPairs[i + 1])); + + for (char chP = 0; chP <= 0xff; ++chP) { + String pat = "\\c" + (chP == 0xff ? "" : Character.toString(chP)); + if (contolChars.containsKey(chP)) { + try { + Pattern p = Pattern.compile(pat); + for (int chS = 0; chS < 0xff; ++chS) { + Matcher m = p.matcher(Character.toString(chS)); + if (m.matches() && contolChars.get(chP) != chS) { + failCount++; + System.out.println("Control character 0x" + Integer.toHexString(chS) + + " unexpectedly matched pattern " + pat); + } else if (!m.matches() && contolChars.get(chP) == chS) { + failCount++; + System.out.println("Control character 0x" + Integer.toHexString(chS) + + " failed to match pattern " + pat); + } + if (m.matches() && Character.getType(chS) != Character.CONTROL) { + failCount++; + System.out.println("Non-control character 0x" + Integer.toHexString(chS) + + " unexpectedly matched pattern " + pat); + } + } + } catch (Throwable t) { + failCount++; + System.out.println("Failed to compile pattern " + pat + + " due to exception: " + t); + } + } else { + try { + Pattern p = Pattern.compile(pat); + failCount++; + System.out.println("Expected to throw an exception when compiling " + pat); + } catch (PatternSyntaxException expected) { + } catch (Throwable t) { + failCount++; + System.out.println("Unexpected exception when compiling " + pat + + " : " + t); + } + } + } + report("controlCharacters"); + } }