# HG changeset patch
# User igerasim
# Date 1567649348 25200
# Wed Sep 04 19:09:08 2019 -0700
# Node ID 021d2408e69f06c0588903b8011eadfb7ed36000
# Parent f016cc0874f03dbeec5e275f45247ee5724acad3
[mq]: 8230365-Pattern-for-a-control-char-matches-non-control-characters
diff --git a/src/java.base/share/classes/java/util/regex/ASCII.java b/src/java.base/share/classes/java/util/regex/ASCII.java
--- a/src/java.base/share/classes/java/util/regex/ASCII.java
+++ b/src/java.base/share/classes/java/util/regex/ASCII.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2000, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -259,6 +259,10 @@
return isType(ch, WORD);
}
+ static boolean isCntrlId(int ch) {
+ return ((ch-0x3f)|(0x5f-ch)) >= 0;
+ }
+
static int toDigit(int ch) {
return (ctype[ch & 0x7F] & 0x3F);
}
diff --git a/src/java.base/share/classes/java/util/regex/Pattern.java b/src/java.base/share/classes/java/util/regex/Pattern.java
--- a/src/java.base/share/classes/java/util/regex/Pattern.java
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java
@@ -45,6 +45,8 @@
import jdk.internal.util.ArraysSupport;
+import sun.security.action.GetPropertyAction;
+
/**
* A compiled representation of a regular expression.
*
@@ -130,7 +132,9 @@
*
{@code \e} |
* The escape character ('\u001B' ) |
* {@code \c}x |
- * The control character corresponding to x |
+ * The control character corresponding to x
+ * (x is either {@code A} through {@code Z} or one of
+ * {@code ?}, {@code @}, {@code [}, {@code \\}, {@code ]}, {@code ^}, {@code _}) |
*
* Character classes |
*
@@ -151,7 +155,7 @@
* except for {@code b} and {@code c}: {@code [ad-z]} (subtraction)
* {@code [a-z&&[^m-p]]} |
* {@code a} through {@code z},
- * and not {@code m} through {@code p}: {@code [a-lq-z]}(subtraction) |
+ * and not {@code m} through {@code p}: {@code [a-lq-z]} (subtraction)
*
* Predefined character classes |
*
@@ -1058,6 +1062,25 @@
private transient boolean hasSupplementary;
/**
+ * If {@code true} then only limited list of chars is accepted as
+ * control-character IDs in regular expressions of form "\\cX":
+ * 'A' through 'Z', '?', '@', '[', '\\', ']', '^', '_'.
+ * Otherwise, no restrictions on the IDs are exposed.
+ */
+ private static final boolean RESTRICTED_CONTROL_CHAR_IDS = Boolean.valueOf(
+ GetPropertyAction.privilegedGetProperty(
+ "jdk.util.regex.restrictedControlCharIds", "true"));
+
+ /**
+ * If {@code true} then lower-case control-character ids are mapped to the
+ * their upper-case counterparts.
+ * For example, "\\ca" will be the same as "\\cA".
+ */
+ private static final boolean ALLOW_LOWERCASE_CONTROL_CHAR_IDS = Boolean.valueOf(
+ GetPropertyAction.privilegedGetProperty(
+ "jdk.util.regex.allowLowerCaseControlCharIds", "false"));
+
+ /**
* Compiles the given regular expression into a pattern.
*
* @param regex
@@ -3325,7 +3348,11 @@
*/
private int c() {
if (cursor < patternLength) {
- return read() ^ 64;
+ int ch = read();
+ if (ALLOW_LOWERCASE_CONTROL_CHAR_IDS && ASCII.isLower(ch))
+ return ch ^ 0x60;
+ if (!RESTRICTED_CONTROL_CHAR_IDS || ASCII.isCntrlId(ch))
+ return ch ^ 0x40;
}
throw error("Illegal control escape sequence");
}
diff --git a/test/jdk/java/util/regex/RegExTest.java b/test/jdk/java/util/regex/RegExTest.java
--- a/test/jdk/java/util/regex/RegExTest.java
+++ b/test/jdk/java/util/regex/RegExTest.java
@@ -35,7 +35,7 @@
* 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
- * 8194667 8197462 8184692 8221431 8224789 8228352
+ * 8194667 8197462 8184692 8221431 8224789 8228352 8230365
*
* @library /test/lib
* @library /lib/testlibrary/java/lang
@@ -57,6 +57,7 @@
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
import java.util.Random;
import java.util.Scanner;
@@ -186,6 +187,7 @@
invalidGroupName();
illegalRepetitionRange();
surrogatePairWithCanonEq();
+ controlCharacters();
if (failure) {
throw new
@@ -4984,4 +4986,75 @@
}
report("surrogatePairWithCanonEq");
}
+
+ private static void controlCharacters() {
+ char[] contolCharsPairs = { '@', 0x00,
+ 'A', 0x01, 'B', 0x02, 'C', 0x03, 'D', 0x04, 'E', 0x05, 'F', 0x06,
+ 'G', 0x07, 'H', 0x08, 'I', 0x09, 'J', 0x0a, 'K', 0x0b, 'L', 0x0c,
+ 'M', 0x0d, 'N', 0x0e, 'O', 0x0f, 'P', 0x10, 'Q', 0x11, 'R', 0x12,
+ 'S', 0x13, 'T', 0x14, 'U', 0x15, 'V', 0x16, 'W', 0x17, 'X', 0x18,
+ 'Y', 0x19, 'Z', 0x1a,
+ '[', 0x1b, '\\', 0x1c, ']', 0x1d, '^', 0x1e, '_', 0x1f, '?', 0x7f };
+ var contolChars = new HashMap();
+ for (int i = 0; i < contolCharsPairs.length; i += 2)
+ contolChars.put(Character.valueOf(contolCharsPairs[i]),
+ Integer.valueOf(contolCharsPairs[i + 1]));
+
+ for (char chP = 0; chP <= 0xff + 16; ++chP) {
+ String pat = "\\c";
+ if (chP < 0xff) {
+ // \cx with ASCII x
+ pat = "\\c" + Character.toString(chP);
+ } else if (chP == 0xff) {
+ // incomplete \c at the end of pattern
+ pat = "\\c";
+ } else if (chP <= 0xff + 8) {
+ // \cx with a random non-ASCII char x
+ int x = 0xff + generator.nextInt(0xff00 + 1);
+ pat = "\\c" + Character.toString(x);
+ } else {
+ // \cx with a random non-ASCII codepoint x
+ int x = 0xff + generator.nextInt(Character.MAX_CODE_POINT + 1 - 0xff);
+ pat = "\\c" + Character.toString(x);
+ }
+ if (contolChars.containsKey(chP)) {
+ try {
+ Pattern p = Pattern.compile(pat);
+ for (int chS = 0; chS < 0xff; ++chS) {
+ Matcher m = p.matcher(Character.toString(chS));
+ if (m.matches() && contolChars.get(chP) != chS) {
+ failCount++;
+ System.out.println("Control character 0x" + Integer.toHexString(chS) +
+ " unexpectedly matched pattern " + pat);
+ } else if (!m.matches() && contolChars.get(chP) == chS) {
+ failCount++;
+ System.out.println("Control character 0x" + Integer.toHexString(chS) +
+ " failed to match pattern " + pat);
+ }
+ if (m.matches() && Character.getType(chS) != Character.CONTROL) {
+ failCount++;
+ System.out.println("Non-control character 0x" + Integer.toHexString(chS) +
+ " unexpectedly matched pattern " + pat);
+ }
+ }
+ } catch (Throwable t) {
+ failCount++;
+ System.out.println("Failed to compile pattern " + pat +
+ " due to exception: " + t);
+ }
+ } else {
+ try {
+ Pattern p = Pattern.compile(pat);
+ failCount++;
+ System.out.println("Expected to throw an exception when compiling " + pat);
+ } catch (PatternSyntaxException expected) {
+ } catch (Throwable t) {
+ failCount++;
+ System.out.println("Unexpected exception when compiling " + pat +
+ " : " + t);
+ }
+ }
+ }
+ report("controlCharacters");
+ }
}