< prev index next >

src/java.base/share/classes/java/util/regex/Pattern.java

Print this page
rev 56177 : [mq]: 8230365-Pattern-for-a-control-char-matches-non-control-characters

@@ -43,10 +43,12 @@
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
 import jdk.internal.util.ArraysSupport;
 
+import sun.security.action.GetPropertyAction;
+
 /**
  * A compiled representation of a regular expression.
  *
  * <p> A regular expression, specified as a string, must first be compiled into
  * an instance of this class.  The resulting pattern can then be used to create

@@ -128,11 +130,13 @@
  * <tr><th style="vertical-align:top; font-weight:normal" id="bell">{@code \a}</th>
  *     <td headers="matches characters bell">The alert (bell) character (<code>'\u0007'</code>)</td></tr>
  * <tr><th style="vertical-align:top; font-weight:normal" id="escape">{@code \e}</th>
  *     <td headers="matches characters escape">The escape character (<code>'\u001B'</code>)</td></tr>
  * <tr><th style="vertical-align:top; font-weight:normal" id="ctrl_x">{@code \c}<i>x</i></th>
- *     <td headers="matches characters ctrl_x">The control character corresponding to <i>x</i></td></tr>
+ *     <td headers="matches characters ctrl_x">The control character corresponding to <i>x</i>
+ *         (<i>x</i> is either {@code A} through {@code Z} or one of
+ *          {@code ?}, {@code @}, {@code [}, {@code \\}, {@code ]}, {@code ^}, {@code _})</td></tr>
  *
  *  <tr><th colspan="2" style="padding-top:20px" id="classes">Character classes</th></tr>
  *
  * <tr><th style="vertical-align:top; font-weight:normal" id="simple">{@code [abc]}</th>
  *     <td headers="matches classes simple">{@code a}, {@code b}, or {@code c} (simple class)</td></tr>

@@ -149,11 +153,11 @@
  * <tr><th style="vertical-align:top; font-weight:normal" id="subtraction1">{@code [a-z&&[^bc]]}</th>
  *     <td headers="matches classes subtraction1">{@code a} through {@code z},
  *         except for {@code b} and {@code c}: {@code [ad-z]} (subtraction)</td></tr>
  * <tr><th style="vertical-align:top; font-weight:normal" id="subtraction2">{@code [a-z&&[^m-p]]}</th>
  *     <td headers="matches classes subtraction2">{@code a} through {@code z},
- *          and not {@code m} through {@code p}: {@code [a-lq-z]}(subtraction)</td></tr>
+ *          and not {@code m} through {@code p}: {@code [a-lq-z]} (subtraction)</td></tr>
  *
  * <tr><th colspan="2" style="padding-top:20px" id="predef">Predefined character classes</th></tr>
  *
  * <tr><th style="vertical-align:top; font-weight:normal" id="any">{@code .}</th>
  *     <td headers="matches predef any">Any character (may or may not match <a href="#lt">line terminators</a>)</td></tr>

@@ -1056,10 +1060,29 @@
      * (2) There is complement node of a "family" CharProperty
      */
     private transient boolean hasSupplementary;
 
     /**
+     * If {@code true} then only limited list of chars is accepted as
+     * control-character IDs in regular expressions of form "\\cX":
+     * 'A' through 'Z', '?', '@', '[', '\\', ']', '^', '_'.
+     * Otherwise, no restrictions on the IDs are exposed.
+     */
+    private static final boolean RESTRICTED_CONTROL_CHAR_IDS = Boolean.valueOf(
+            GetPropertyAction.privilegedGetProperty(
+                    "jdk.util.regex.restrictedControlCharIds", "true"));
+
+    /**
+     * If {@code true} then lower-case control-character ids are mapped to the
+     * their upper-case counterparts.
+     * For example, "\\ca" will be the same as "\\cA".
+     */
+    private static final boolean ALLOW_LOWERCASE_CONTROL_CHAR_IDS = Boolean.valueOf(
+            GetPropertyAction.privilegedGetProperty(
+                    "jdk.util.regex.allowLowerCaseControlCharIds", "false"));
+
+    /**
      * Compiles the given regular expression into a pattern.
      *
      * @param  regex
      *         The expression to be compiled
      * @return the given regular expression compiled into a pattern

@@ -3323,11 +3346,15 @@
     /**
      *  Utility method for parsing control escape sequences.
      */
     private int c() {
         if (cursor < patternLength) {
-            return read() ^ 64;
+            int ch = read();
+            if (ALLOW_LOWERCASE_CONTROL_CHAR_IDS && ASCII.isLower(ch))
+                return ch ^ 0x60;
+            if (!RESTRICTED_CONTROL_CHAR_IDS || ASCII.isCntrlId(ch))
+                return ch ^ 0x40;
         }
         throw error("Illegal control escape sequence");
     }
 
     /**
< prev index next >