< prev index next >

src/java.base/share/classes/java/util/regex/CharPredicates.java

Print this page
rev 57965 : [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -167,15 +167,19 @@
                          JOIN_CONTROL());
     }
 
     /////////////////////////////////////////////////////////////////////////////
 
-    private static CharPredicate getPosixPredicate(String name) {
+    private static CharPredicate getPosixPredicate(String name, boolean caseIns) {
         switch (name) {
             case "ALPHA": return ALPHABETIC();
-            case "LOWER": return LOWERCASE();
-            case "UPPER": return UPPERCASE();
+            case "LOWER": return caseIns
+                                ? LOWERCASE().union(UPPERCASE(), TITLECASE())
+                                : LOWERCASE();
+            case "UPPER": return caseIns
+                                ? UPPERCASE().union(LOWERCASE(), TITLECASE())
+                                : UPPERCASE();
             case "SPACE": return WHITE_SPACE();
             case "PUNCT": return PUNCTUATION();
             case "XDIGIT": return HEX_DIGIT();
             case "ALNUM": return ALNUM();
             case "CNTRL": return CONTROL();

@@ -185,44 +189,50 @@
             case "PRINT": return PRINT();
             default: return null;
         }
     }
 
-    private static CharPredicate getUnicodePredicate(String name) {
+    private static CharPredicate getUnicodePredicate(String name, boolean caseIns) {
         switch (name) {
             case "ALPHABETIC": return ALPHABETIC();
             case "ASSIGNED": return ASSIGNED();
             case "CONTROL": return CONTROL();
-            case "HEXDIGIT": return HEX_DIGIT();
+            case "HEXDIGIT":
+            case "HEX_DIGIT": return HEX_DIGIT();
             case "IDEOGRAPHIC": return IDEOGRAPHIC();
-            case "JOINCONTROL": return JOIN_CONTROL();
+            case "JOINCONTROL":
+            case "JOIN_CONTROL": return JOIN_CONTROL();
             case "LETTER": return LETTER();
-            case "LOWERCASE": return LOWERCASE();
-            case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT();
-            case "TITLECASE": return TITLECASE();
+            case "LOWERCASE": return caseIns
+                                    ? LOWERCASE().union(UPPERCASE(), TITLECASE())
+                                    : LOWERCASE();
+            case "NONCHARACTERCODEPOINT":
+            case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT();
+            case "TITLECASE": return caseIns
+                                    ? TITLECASE().union(LOWERCASE(), UPPERCASE())
+                                    : TITLECASE();
             case "PUNCTUATION": return PUNCTUATION();
-            case "UPPERCASE": return UPPERCASE();
-            case "WHITESPACE": return WHITE_SPACE();
-            case "WORD": return WORD();
+            case "UPPERCASE": return caseIns
+                                    ? UPPERCASE().union(LOWERCASE(), TITLECASE())
+                                    : UPPERCASE();
+            case "WHITESPACE":
             case "WHITE_SPACE": return WHITE_SPACE();
-            case "HEX_DIGIT": return HEX_DIGIT();
-            case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT();
-            case "JOIN_CONTROL": return JOIN_CONTROL();
+            case "WORD": return WORD();
             default: return null;
         }
     }
 
-    public static CharPredicate forUnicodeProperty(String propName) {
+    public static CharPredicate forUnicodeProperty(String propName, boolean caseIns) {
         propName = propName.toUpperCase(Locale.ROOT);
-        CharPredicate p = getUnicodePredicate(propName);
+        CharPredicate p = getUnicodePredicate(propName, caseIns);
         if (p != null)
             return p;
-        return getPosixPredicate(propName);
+        return getPosixPredicate(propName, caseIns);
     }
 
-    public static CharPredicate forPOSIXName(String propName) {
-        return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH));
+    public static CharPredicate forPOSIXName(String propName, boolean caseIns) {
+        return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH), caseIns);
     }
 
     /////////////////////////////////////////////////////////////////////////////
 
     /**

@@ -252,18 +262,27 @@
 
     /////////////////////////////////////////////////////////////////////////////
 
     // unicode categories, aliases, properties, java methods ...
 
-    static CharPredicate forProperty(String name) {
+    static CharPredicate forProperty(String name, boolean caseIns) {
         // Unicode character property aliases, defined in
         // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
         switch (name) {
             case "Cn": return category(1<<Character.UNASSIGNED);
-            case "Lu": return category(1<<Character.UPPERCASE_LETTER);
-            case "Ll": return category(1<<Character.LOWERCASE_LETTER);
-            case "Lt": return category(1<<Character.TITLECASE_LETTER);
+            case "Lu": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
+                                                 (1<<Character.UPPERCASE_LETTER) |
+                                                 (1<<Character.TITLECASE_LETTER)
+                                               : (1<<Character.UPPERCASE_LETTER));
+            case "Ll": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
+                                                 (1<<Character.UPPERCASE_LETTER) |
+                                                 (1<<Character.TITLECASE_LETTER)
+                                               : (1<<Character.LOWERCASE_LETTER));
+            case "Lt": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
+                                                 (1<<Character.UPPERCASE_LETTER) |
+                                                 (1<<Character.TITLECASE_LETTER)
+                                               : (1<<Character.TITLECASE_LETTER));
             case "Lm": return category(1<<Character.MODIFIER_LETTER);
             case "Lo": return category(1<<Character.OTHER_LETTER);
             case "Mn": return category(1<<Character.NON_SPACING_MARK);
             case "Me": return category(1<<Character.ENCLOSING_MARK);
             case "Mc": return category(1<<Character.COMBINING_SPACING_MARK);

@@ -336,36 +355,47 @@
             case "Alpha": return ctype(ASCII.ALPHA);  // Alphabetic characters
             case "Blank": return ctype(ASCII.BLANK);  // Space and tab characters
             case "Cntrl": return ctype(ASCII.CNTRL);  // Control characters
             case "Digit": return range('0', '9');     // Numeric characters
             case "Graph": return ctype(ASCII.GRAPH);  // printable and visible
-            case "Lower": return range('a', 'z');     // Lower-case alphabetic
+            case "Lower": return caseIns ? ctype(ASCII.ALPHA)
+                                    : range('a', 'z'); // Lower-case alphabetic
             case "Print": return range(0x20, 0x7E);   // Printable characters
             case "Punct": return ctype(ASCII.PUNCT);  // Punctuation characters
             case "Space": return ctype(ASCII.SPACE);  // Space characters
-            case "Upper": return range('A', 'Z');     // Upper-case alphabetic
+            case "Upper": return caseIns ? ctype(ASCII.ALPHA)
+                                    : range('A', 'Z'); // Upper-case alphabetic
             case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits
 
             // Java character properties, defined by methods in Character.java
-            case "javaLowerCase": return java.lang.Character::isLowerCase;
-            case "javaUpperCase": return  Character::isUpperCase;
-            case "javaAlphabetic": return java.lang.Character::isAlphabetic;
-            case "javaIdeographic": return java.lang.Character::isIdeographic;
-            case "javaTitleCase": return java.lang.Character::isTitleCase;
-            case "javaDigit": return java.lang.Character::isDigit;
-            case "javaDefined": return java.lang.Character::isDefined;
-            case "javaLetter": return java.lang.Character::isLetter;
-            case "javaLetterOrDigit": return java.lang.Character::isLetterOrDigit;
-            case "javaJavaIdentifierStart": return java.lang.Character::isJavaIdentifierStart;
-            case "javaJavaIdentifierPart": return java.lang.Character::isJavaIdentifierPart;
-            case "javaUnicodeIdentifierStart": return java.lang.Character::isUnicodeIdentifierStart;
-            case "javaUnicodeIdentifierPart": return java.lang.Character::isUnicodeIdentifierPart;
-            case "javaIdentifierIgnorable": return java.lang.Character::isIdentifierIgnorable;
-            case "javaSpaceChar": return java.lang.Character::isSpaceChar;
-            case "javaWhitespace": return java.lang.Character::isWhitespace;
-            case "javaISOControl": return java.lang.Character::isISOControl;
-            case "javaMirrored": return java.lang.Character::isMirrored;
+            case "javaLowerCase": return caseIns ? c -> Character.isLowerCase(c) ||
+                                                        Character.isUpperCase(c) ||
+                                                        Character.isTitleCase(c)
+                                                 : Character::isLowerCase;
+            case "javaUpperCase": return caseIns ? c -> Character.isUpperCase(c) ||
+                                                        Character.isLowerCase(c) ||
+                                                        Character.isTitleCase(c)
+                                                 : Character::isUpperCase;
+            case "javaAlphabetic": return Character::isAlphabetic;
+            case "javaIdeographic": return Character::isIdeographic;
+            case "javaTitleCase": return caseIns ? c -> Character.isTitleCase(c) ||
+                                                        Character.isLowerCase(c) ||
+                                                        Character.isUpperCase(c)
+                                                 : Character::isTitleCase;
+            case "javaDigit": return Character::isDigit;
+            case "javaDefined": return Character::isDefined;
+            case "javaLetter": return Character::isLetter;
+            case "javaLetterOrDigit": return Character::isLetterOrDigit;
+            case "javaJavaIdentifierStart": return Character::isJavaIdentifierStart;
+            case "javaJavaIdentifierPart": return Character::isJavaIdentifierPart;
+            case "javaUnicodeIdentifierStart": return Character::isUnicodeIdentifierStart;
+            case "javaUnicodeIdentifierPart": return Character::isUnicodeIdentifierPart;
+            case "javaIdentifierIgnorable": return Character::isIdentifierIgnorable;
+            case "javaSpaceChar": return Character::isSpaceChar;
+            case "javaWhitespace": return Character::isWhitespace;
+            case "javaISOControl": return Character::isISOControl;
+            case "javaMirrored": return Character::isMirrored;
             default: return null;
         }
     }
 
     private static CharPredicate category(final int typeMask) {
< prev index next >