< prev index next >

src/java.base/share/classes/java/util/regex/CharPredicates.java

Print this page
rev 54580 : [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes
   1 /*
   2  * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 152         return ch -> ch == 0x200C || ch == 0x200D;
 153     }
 154 
 155     //  \p{alpha}
 156     //  \p{gc=Mark}
 157     //  \p{digit}
 158     //  \p{gc=Connector_Punctuation}
 159     //  \p{Join_Control}    200C..200D
 160     static final CharPredicate WORD() {
 161         return ALPHABETIC().union(ch -> ((((1 << Character.NON_SPACING_MARK) |
 162                                   (1 << Character.ENCLOSING_MARK) |
 163                                   (1 << Character.COMBINING_SPACING_MARK) |
 164                                   (1 << Character.DECIMAL_DIGIT_NUMBER) |
 165                                   (1 << Character.CONNECTOR_PUNCTUATION))
 166                                  >> Character.getType(ch)) & 1) != 0,
 167                          JOIN_CONTROL());
 168     }
 169 
 170     /////////////////////////////////////////////////////////////////////////////
 171 
 172     private static CharPredicate getPosixPredicate(String name) {
 173         switch (name) {
 174             case "ALPHA": return ALPHABETIC();
 175             case "LOWER": return LOWERCASE();
 176             case "UPPER": return UPPERCASE();




 177             case "SPACE": return WHITE_SPACE();
 178             case "PUNCT": return PUNCTUATION();
 179             case "XDIGIT": return HEX_DIGIT();
 180             case "ALNUM": return ALNUM();
 181             case "CNTRL": return CONTROL();
 182             case "DIGIT": return DIGIT();
 183             case "BLANK": return BLANK();
 184             case "GRAPH": return GRAPH();
 185             case "PRINT": return PRINT();
 186             default: return null;
 187         }
 188     }
 189 
 190     private static CharPredicate getUnicodePredicate(String name) {
 191         switch (name) {
 192             case "ALPHABETIC": return ALPHABETIC();
 193             case "ASSIGNED": return ASSIGNED();
 194             case "CONTROL": return CONTROL();
 195             case "HEXDIGIT": return HEX_DIGIT();

 196             case "IDEOGRAPHIC": return IDEOGRAPHIC();
 197             case "JOINCONTROL": return JOIN_CONTROL();

 198             case "LETTER": return LETTER();
 199             case "LOWERCASE": return LOWERCASE();
 200             case "NONCHARACTERCODEPOINT": return NONCHARACTER_CODE_POINT();
 201             case "TITLECASE": return TITLECASE();





 202             case "PUNCTUATION": return PUNCTUATION();
 203             case "UPPERCASE": return UPPERCASE();
 204             case "WHITESPACE": return WHITE_SPACE();
 205             case "WORD": return WORD();

 206             case "WHITE_SPACE": return WHITE_SPACE();
 207             case "HEX_DIGIT": return HEX_DIGIT();
 208             case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT();
 209             case "JOIN_CONTROL": return JOIN_CONTROL();
 210             default: return null;
 211         }
 212     }
 213 
 214     public static CharPredicate forUnicodeProperty(String propName) {
 215         propName = propName.toUpperCase(Locale.ROOT);
 216         CharPredicate p = getUnicodePredicate(propName);
 217         if (p != null)
 218             return p;
 219         return getPosixPredicate(propName);
 220     }
 221 
 222     public static CharPredicate forPOSIXName(String propName) {
 223         return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH));
 224     }
 225 
 226     /////////////////////////////////////////////////////////////////////////////
 227 
 228     /**
 229      * Returns a predicate matching all characters belong to a named
 230      * UnicodeScript.
 231      */
 232     static CharPredicate forUnicodeScript(String name) {
 233         final Character.UnicodeScript script;
 234         try {
 235             script = Character.UnicodeScript.forName(name);
 236             return ch -> script == Character.UnicodeScript.of(ch);
 237         } catch (IllegalArgumentException iae) {}
 238         return null;
 239     }
 240 
 241     /**
 242      * Returns a predicate matching all characters in a UnicodeBlock.
 243      */
 244     static CharPredicate forUnicodeBlock(String name) {
 245         final Character.UnicodeBlock block;
 246         try {
 247             block = Character.UnicodeBlock.forName(name);
 248             return ch -> block == Character.UnicodeBlock.of(ch);
 249         } catch (IllegalArgumentException iae) {}
 250          return null;
 251     }
 252 
 253     /////////////////////////////////////////////////////////////////////////////
 254 
 255     // unicode categories, aliases, properties, java methods ...
 256 
 257     static CharPredicate forProperty(String name) {
 258         // Unicode character property aliases, defined in
 259         // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
 260         switch (name) {
 261             case "Cn": return category(1<<Character.UNASSIGNED);
 262             case "Lu": return category(1<<Character.UPPERCASE_LETTER);
 263             case "Ll": return category(1<<Character.LOWERCASE_LETTER);
 264             case "Lt": return category(1<<Character.TITLECASE_LETTER);









 265             case "Lm": return category(1<<Character.MODIFIER_LETTER);
 266             case "Lo": return category(1<<Character.OTHER_LETTER);
 267             case "Mn": return category(1<<Character.NON_SPACING_MARK);
 268             case "Me": return category(1<<Character.ENCLOSING_MARK);
 269             case "Mc": return category(1<<Character.COMBINING_SPACING_MARK);
 270             case "Nd": return category(1<<Character.DECIMAL_DIGIT_NUMBER);
 271             case "Nl": return category(1<<Character.LETTER_NUMBER);
 272             case "No": return category(1<<Character.OTHER_NUMBER);
 273             case "Zs": return category(1<<Character.SPACE_SEPARATOR);
 274             case "Zl": return category(1<<Character.LINE_SEPARATOR);
 275             case "Zp": return category(1<<Character.PARAGRAPH_SEPARATOR);
 276             case "Cc": return category(1<<Character.CONTROL);
 277             case "Cf": return category(1<<Character.FORMAT);
 278             case "Co": return category(1<<Character.PRIVATE_USE);
 279             case "Cs": return category(1<<Character.SURROGATE);
 280             case "Pd": return category(1<<Character.DASH_PUNCTUATION);
 281             case "Ps": return category(1<<Character.START_PUNCTUATION);
 282             case "Pe": return category(1<<Character.END_PUNCTUATION);
 283             case "Pc": return category(1<<Character.CONNECTOR_PUNCTUATION);
 284             case "Po": return category(1<<Character.OTHER_PUNCTUATION);


 321             case "LC": return category(((1<<Character.UPPERCASE_LETTER) |
 322                                (1<<Character.LOWERCASE_LETTER) |
 323                                (1<<Character.TITLECASE_LETTER)));
 324             case "LD": return category(((1<<Character.UPPERCASE_LETTER) |
 325                                (1<<Character.LOWERCASE_LETTER) |
 326                                (1<<Character.TITLECASE_LETTER) |
 327                                (1<<Character.MODIFIER_LETTER)  |
 328                                (1<<Character.OTHER_LETTER)     |
 329                                (1<<Character.DECIMAL_DIGIT_NUMBER)));
 330             case "L1": return range(0x00, 0xFF); // Latin-1
 331             case "all": return Pattern.ALL();
 332             // Posix regular expression character classes, defined in
 333             // http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
 334             case "ASCII": return range(0x00, 0x7F);   // ASCII
 335             case "Alnum": return ctype(ASCII.ALNUM);  // Alphanumeric characters
 336             case "Alpha": return ctype(ASCII.ALPHA);  // Alphabetic characters
 337             case "Blank": return ctype(ASCII.BLANK);  // Space and tab characters
 338             case "Cntrl": return ctype(ASCII.CNTRL);  // Control characters
 339             case "Digit": return range('0', '9');     // Numeric characters
 340             case "Graph": return ctype(ASCII.GRAPH);  // printable and visible
 341             case "Lower": return range('a', 'z');     // Lower-case alphabetic

 342             case "Print": return range(0x20, 0x7E);   // Printable characters
 343             case "Punct": return ctype(ASCII.PUNCT);  // Punctuation characters
 344             case "Space": return ctype(ASCII.SPACE);  // Space characters
 345             case "Upper": return range('A', 'Z');     // Upper-case alphabetic

 346             case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits
 347 
 348             // Java character properties, defined by methods in Character.java
 349             case "javaLowerCase": return java.lang.Character::isLowerCase;
 350             case "javaUpperCase": return  Character::isUpperCase;
 351             case "javaAlphabetic": return java.lang.Character::isAlphabetic;
 352             case "javaIdeographic": return java.lang.Character::isIdeographic;
 353             case "javaTitleCase": return java.lang.Character::isTitleCase;
 354             case "javaDigit": return java.lang.Character::isDigit;
 355             case "javaDefined": return java.lang.Character::isDefined;
 356             case "javaLetter": return java.lang.Character::isLetter;
 357             case "javaLetterOrDigit": return java.lang.Character::isLetterOrDigit;
 358             case "javaJavaIdentifierStart": return java.lang.Character::isJavaIdentifierStart;
 359             case "javaJavaIdentifierPart": return java.lang.Character::isJavaIdentifierPart;
 360             case "javaUnicodeIdentifierStart": return java.lang.Character::isUnicodeIdentifierStart;
 361             case "javaUnicodeIdentifierPart": return java.lang.Character::isUnicodeIdentifierPart;
 362             case "javaIdentifierIgnorable": return java.lang.Character::isIdentifierIgnorable;
 363             case "javaSpaceChar": return java.lang.Character::isSpaceChar;
 364             case "javaWhitespace": return java.lang.Character::isWhitespace;
 365             case "javaISOControl": return java.lang.Character::isISOControl;
 366             case "javaMirrored": return java.lang.Character::isMirrored;









 367             default: return null;
 368         }
 369     }
 370 
 371     private static CharPredicate category(final int typeMask) {
 372         return ch -> (typeMask & (1 << Character.getType(ch))) != 0;
 373     }
 374 
 375     private static CharPredicate range(final int lower, final int upper) {
 376         return (BmpCharPredicate)ch -> lower <= ch && ch <= upper;
 377     }
 378 
 379     private static CharPredicate ctype(final int ctype) {
 380         return (BmpCharPredicate)ch -> ch < 128 && ASCII.isType(ch, ctype);
 381     }
 382 
 383     /////////////////////////////////////////////////////////////////////////////
 384 
 385     /**
 386      * Posix ASCII variants, not in the lookup map
   1 /*
   2  * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 152         return ch -> ch == 0x200C || ch == 0x200D;
 153     }
 154 
 155     //  \p{alpha}
 156     //  \p{gc=Mark}
 157     //  \p{digit}
 158     //  \p{gc=Connector_Punctuation}
 159     //  \p{Join_Control}    200C..200D
 160     static final CharPredicate WORD() {
 161         return ALPHABETIC().union(ch -> ((((1 << Character.NON_SPACING_MARK) |
 162                                   (1 << Character.ENCLOSING_MARK) |
 163                                   (1 << Character.COMBINING_SPACING_MARK) |
 164                                   (1 << Character.DECIMAL_DIGIT_NUMBER) |
 165                                   (1 << Character.CONNECTOR_PUNCTUATION))
 166                                  >> Character.getType(ch)) & 1) != 0,
 167                          JOIN_CONTROL());
 168     }
 169 
 170     /////////////////////////////////////////////////////////////////////////////
 171 
 172     private static CharPredicate getPosixPredicate(String name, boolean caseIns) {
 173         switch (name) {
 174             case "ALPHA": return ALPHABETIC();
 175             case "LOWER": return caseIns
 176                                 ? LOWERCASE().union(UPPERCASE(), TITLECASE())
 177                                 : LOWERCASE();
 178             case "UPPER": return caseIns
 179                                 ? UPPERCASE().union(LOWERCASE(), TITLECASE())
 180                                 : UPPERCASE();
 181             case "SPACE": return WHITE_SPACE();
 182             case "PUNCT": return PUNCTUATION();
 183             case "XDIGIT": return HEX_DIGIT();
 184             case "ALNUM": return ALNUM();
 185             case "CNTRL": return CONTROL();
 186             case "DIGIT": return DIGIT();
 187             case "BLANK": return BLANK();
 188             case "GRAPH": return GRAPH();
 189             case "PRINT": return PRINT();
 190             default: return null;
 191         }
 192     }
 193 
 194     private static CharPredicate getUnicodePredicate(String name, boolean caseIns) {
 195         switch (name) {
 196             case "ALPHABETIC": return ALPHABETIC();
 197             case "ASSIGNED": return ASSIGNED();
 198             case "CONTROL": return CONTROL();
 199             case "HEXDIGIT":
 200             case "HEX_DIGIT": return HEX_DIGIT();
 201             case "IDEOGRAPHIC": return IDEOGRAPHIC();
 202             case "JOINCONTROL":
 203             case "JOIN_CONTROL": return JOIN_CONTROL();
 204             case "LETTER": return LETTER();
 205             case "LOWERCASE": return caseIns
 206                                     ? LOWERCASE().union(UPPERCASE(), TITLECASE())
 207                                     : LOWERCASE();
 208             case "NONCHARACTERCODEPOINT":
 209             case "NONCHARACTER_CODE_POINT": return NONCHARACTER_CODE_POINT();
 210             case "TITLECASE": return caseIns
 211                                     ? TITLECASE().union(LOWERCASE(), UPPERCASE())
 212                                     : TITLECASE();
 213             case "PUNCTUATION": return PUNCTUATION();
 214             case "UPPERCASE": return caseIns
 215                                     ? UPPERCASE().union(LOWERCASE(), TITLECASE())
 216                                     : UPPERCASE();
 217             case "WHITESPACE":
 218             case "WHITE_SPACE": return WHITE_SPACE();
 219             case "WORD": return WORD();


 220             default: return null;
 221         }
 222     }
 223 
 224     public static CharPredicate forUnicodeProperty(String propName, boolean caseIns) {
 225         propName = propName.toUpperCase(Locale.ROOT);
 226         CharPredicate p = getUnicodePredicate(propName, caseIns);
 227         if (p != null)
 228             return p;
 229         return getPosixPredicate(propName, caseIns);
 230     }
 231 
 232     public static CharPredicate forPOSIXName(String propName, boolean caseIns) {
 233         return getPosixPredicate(propName.toUpperCase(Locale.ENGLISH), caseIns);
 234     }
 235 
 236     /////////////////////////////////////////////////////////////////////////////
 237 
 238     /**
 239      * Returns a predicate matching all characters belong to a named
 240      * UnicodeScript.
 241      */
 242     static CharPredicate forUnicodeScript(String name) {
 243         final Character.UnicodeScript script;
 244         try {
 245             script = Character.UnicodeScript.forName(name);
 246             return ch -> script == Character.UnicodeScript.of(ch);
 247         } catch (IllegalArgumentException iae) {}
 248         return null;
 249     }
 250 
 251     /**
 252      * Returns a predicate matching all characters in a UnicodeBlock.
 253      */
 254     static CharPredicate forUnicodeBlock(String name) {
 255         final Character.UnicodeBlock block;
 256         try {
 257             block = Character.UnicodeBlock.forName(name);
 258             return ch -> block == Character.UnicodeBlock.of(ch);
 259         } catch (IllegalArgumentException iae) {}
 260          return null;
 261     }
 262 
 263     /////////////////////////////////////////////////////////////////////////////
 264 
 265     // unicode categories, aliases, properties, java methods ...
 266 
 267     static CharPredicate forProperty(String name, boolean caseIns) {
 268         // Unicode character property aliases, defined in
 269         // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
 270         switch (name) {
 271             case "Cn": return category(1<<Character.UNASSIGNED);
 272             case "Lu": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
 273                                                  (1<<Character.UPPERCASE_LETTER) |
 274                                                  (1<<Character.TITLECASE_LETTER)
 275                                                : (1<<Character.UPPERCASE_LETTER));
 276             case "Ll": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
 277                                                  (1<<Character.UPPERCASE_LETTER) |
 278                                                  (1<<Character.TITLECASE_LETTER)
 279                                                : (1<<Character.LOWERCASE_LETTER));
 280             case "Lt": return category(caseIns ? (1<<Character.LOWERCASE_LETTER) |
 281                                                  (1<<Character.UPPERCASE_LETTER) |
 282                                                  (1<<Character.TITLECASE_LETTER)
 283                                                : (1<<Character.TITLECASE_LETTER));
 284             case "Lm": return category(1<<Character.MODIFIER_LETTER);
 285             case "Lo": return category(1<<Character.OTHER_LETTER);
 286             case "Mn": return category(1<<Character.NON_SPACING_MARK);
 287             case "Me": return category(1<<Character.ENCLOSING_MARK);
 288             case "Mc": return category(1<<Character.COMBINING_SPACING_MARK);
 289             case "Nd": return category(1<<Character.DECIMAL_DIGIT_NUMBER);
 290             case "Nl": return category(1<<Character.LETTER_NUMBER);
 291             case "No": return category(1<<Character.OTHER_NUMBER);
 292             case "Zs": return category(1<<Character.SPACE_SEPARATOR);
 293             case "Zl": return category(1<<Character.LINE_SEPARATOR);
 294             case "Zp": return category(1<<Character.PARAGRAPH_SEPARATOR);
 295             case "Cc": return category(1<<Character.CONTROL);
 296             case "Cf": return category(1<<Character.FORMAT);
 297             case "Co": return category(1<<Character.PRIVATE_USE);
 298             case "Cs": return category(1<<Character.SURROGATE);
 299             case "Pd": return category(1<<Character.DASH_PUNCTUATION);
 300             case "Ps": return category(1<<Character.START_PUNCTUATION);
 301             case "Pe": return category(1<<Character.END_PUNCTUATION);
 302             case "Pc": return category(1<<Character.CONNECTOR_PUNCTUATION);
 303             case "Po": return category(1<<Character.OTHER_PUNCTUATION);


 340             case "LC": return category(((1<<Character.UPPERCASE_LETTER) |
 341                                (1<<Character.LOWERCASE_LETTER) |
 342                                (1<<Character.TITLECASE_LETTER)));
 343             case "LD": return category(((1<<Character.UPPERCASE_LETTER) |
 344                                (1<<Character.LOWERCASE_LETTER) |
 345                                (1<<Character.TITLECASE_LETTER) |
 346                                (1<<Character.MODIFIER_LETTER)  |
 347                                (1<<Character.OTHER_LETTER)     |
 348                                (1<<Character.DECIMAL_DIGIT_NUMBER)));
 349             case "L1": return range(0x00, 0xFF); // Latin-1
 350             case "all": return Pattern.ALL();
 351             // Posix regular expression character classes, defined in
 352             // http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
 353             case "ASCII": return range(0x00, 0x7F);    // ASCII
 354             case "Alnum": return ctype(ASCII.ALNUM);   // Alphanumeric characters
 355             case "Alpha": return ctype(ASCII.ALPHA);   // Alphabetic characters
 356             case "Blank": return ctype(ASCII.BLANK);   // Space and tab characters
 357             case "Cntrl": return ctype(ASCII.CNTRL);   // Control characters
 358             case "Digit": return range('0', '9');      // Numeric characters
 359             case "Graph": return ctype(ASCII.GRAPH);   // printable and visible
 360             case "Lower": return caseIns ? ctype(ASCII.ALPHA)
 361                                     : range('a', 'z'); // Lower-case alphabetic
 362             case "Print": return range(0x20, 0x7E);    // Printable characters
 363             case "Punct": return ctype(ASCII.PUNCT);   // Punctuation characters
 364             case "Space": return ctype(ASCII.SPACE);   // Space characters
 365             case "Upper": return caseIns ? ctype(ASCII.ALPHA)
 366                                     : range('A', 'Z'); // Upper-case alphabetic
 367             case "XDigit": return ctype(ASCII.XDIGIT); // hexadecimal digits
 368 
 369             // Java character properties, defined by methods in Character.java
 370             case "javaLowerCase": return caseIns ? c -> Character.isLowerCase(c) ||
 371                                                         Character.isUpperCase(c) ||
 372                                                         Character.isTitleCase(c)
 373                                                  : Character::isLowerCase;
 374             case "javaUpperCase": return caseIns ? c -> Character.isUpperCase(c) ||
 375                                                         Character.isLowerCase(c) ||
 376                                                         Character.isTitleCase(c)
 377                                                  : Character::isUpperCase;
 378             case "javaAlphabetic": return Character::isAlphabetic;
 379             case "javaIdeographic": return Character::isIdeographic;
 380             case "javaTitleCase": return caseIns ? c -> Character.isTitleCase(c) ||
 381                                                         Character.isLowerCase(c) ||
 382                                                         Character.isUpperCase(c)
 383                                                  : Character::isTitleCase;
 384             case "javaDigit": return Character::isDigit;
 385             case "javaDefined": return Character::isDefined;
 386             case "javaLetter": return Character::isLetter;
 387             case "javaLetterOrDigit": return Character::isLetterOrDigit;
 388             case "javaJavaIdentifierStart": return Character::isJavaIdentifierStart;
 389             case "javaJavaIdentifierPart": return Character::isJavaIdentifierPart;
 390             case "javaUnicodeIdentifierStart": return Character::isUnicodeIdentifierStart;
 391             case "javaUnicodeIdentifierPart": return Character::isUnicodeIdentifierPart;
 392             case "javaIdentifierIgnorable": return Character::isIdentifierIgnorable;
 393             case "javaSpaceChar": return Character::isSpaceChar;
 394             case "javaWhitespace": return Character::isWhitespace;
 395             case "javaISOControl": return Character::isISOControl;
 396             case "javaMirrored": return Character::isMirrored;
 397             default: return null;
 398         }
 399     }
 400 
 401     private static CharPredicate category(final int typeMask) {
 402         return ch -> (typeMask & (1 << Character.getType(ch))) != 0;
 403     }
 404 
 405     private static CharPredicate range(final int lower, final int upper) {
 406         return (BmpCharPredicate)ch -> lower <= ch && ch <= upper;
 407     }
 408 
 409     private static CharPredicate ctype(final int ctype) {
 410         return (BmpCharPredicate)ch -> ch < 128 && ASCII.isType(ch, ctype);
 411     }
 412 
 413     /////////////////////////////////////////////////////////////////////////////
 414 
 415     /**
 416      * Posix ASCII variants, not in the lookup map
< prev index next >