src/java.base/share/classes/java/util/regex/CharPredicates.java

Print this page


   1 /*
   2  * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.regex;
  27 
  28 import java.util.HashMap;
  29 import java.util.Locale;


  30 
  31 enum UnicodeProp {
  32 
  33     ALPHABETIC {
  34         public boolean is(int ch) {
  35             return Character.isAlphabetic(ch);
  36         }
  37     },
  38 
  39     LETTER {
  40         public boolean is(int ch) {
  41             return Character.isLetter(ch);
  42         }
  43     },
  44 
  45     IDEOGRAPHIC {
  46         public boolean is(int ch) {
  47             return Character.isIdeographic(ch);
  48         }
  49     },
  50 
  51     LOWERCASE {
  52         public boolean is(int ch) {
  53             return Character.isLowerCase(ch);
  54         }
  55     },
  56 
  57     UPPERCASE {
  58         public boolean is(int ch) {
  59             return Character.isUpperCase(ch);
  60         }
  61     },
  62 
  63     TITLECASE {
  64         public boolean is(int ch) {
  65             return Character.isTitleCase(ch);
  66         }
  67     },
  68 
  69     WHITE_SPACE {
  70         // \p{Whitespace}
  71         public boolean is(int ch) {
  72             return ((((1 << Character.SPACE_SEPARATOR) |
  73                       (1 << Character.LINE_SEPARATOR) |
  74                       (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
  75                    != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
  76         }
  77     },
  78 
  79     CONTROL {
  80         // \p{gc=Control}
  81         public boolean is(int ch) {
  82             return Character.getType(ch) == Character.CONTROL;
  83         }
  84     },
  85 
  86     PUNCTUATION {
  87         // \p{gc=Punctuation}
  88         public boolean is(int ch) {
  89             return ((((1 << Character.CONNECTOR_PUNCTUATION) |
  90                       (1 << Character.DASH_PUNCTUATION) |
  91                       (1 << Character.START_PUNCTUATION) |
  92                       (1 << Character.END_PUNCTUATION) |
  93                       (1 << Character.OTHER_PUNCTUATION) |
  94                       (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
  95                       (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
  96                    != 0;
  97         }
  98     },
  99 
 100     HEX_DIGIT {
 101         // \p{gc=Decimal_Number}
 102         // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
 103         public boolean is(int ch) {
 104             return DIGIT.is(ch) ||
 105                    (ch >= 0x0030 && ch <= 0x0039) ||
 106                    (ch >= 0x0041 && ch <= 0x0046) ||
 107                    (ch >= 0x0061 && ch <= 0x0066) ||
 108                    (ch >= 0xFF10 && ch <= 0xFF19) ||
 109                    (ch >= 0xFF21 && ch <= 0xFF26) ||
 110                    (ch >= 0xFF41 && ch <= 0xFF46);
 111         }
 112     },
 113 
 114     ASSIGNED {
 115         public boolean is(int ch) {
 116             return Character.getType(ch) != Character.UNASSIGNED;
 117         }
 118     },
 119 
 120     NONCHARACTER_CODE_POINT {
 121         // PropList.txt:Noncharacter_Code_Point
 122         public boolean is(int ch) {
 123             return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
 124         }
 125     },
 126 
 127     DIGIT {
 128         // \p{gc=Decimal_Number}
 129         public boolean is(int ch) {
 130             return Character.isDigit(ch);
 131         }
 132     },
 133 
 134     ALNUM {
 135         // \p{alpha}
 136         // \p{digit}
 137         public boolean is(int ch) {
 138             return ALPHABETIC.is(ch) || DIGIT.is(ch);
 139         }
 140     },
 141 
 142     BLANK {
 143         // \p{Whitespace} --
 144         // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
 145         //  \p{gc=Line_Separator}
 146         //  \p{gc=Paragraph_Separator}]
 147         public boolean is(int ch) {
 148             return Character.getType(ch) == Character.SPACE_SEPARATOR ||
 149                    ch == 0x9; // \N{HT}
 150         }
 151     },
 152 
 153     GRAPH {
 154         // [^
 155         //  \p{space}
 156         //  \p{gc=Control}
 157         //  \p{gc=Surrogate}
 158         //  \p{gc=Unassigned}]
 159         public boolean is(int ch) {
 160             return ((((1 << Character.SPACE_SEPARATOR) |
 161                       (1 << Character.LINE_SEPARATOR) |
 162                       (1 << Character.PARAGRAPH_SEPARATOR) |
 163                       (1 << Character.CONTROL) |
 164                       (1 << Character.SURROGATE) |
 165                       (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
 166                    == 0;
 167         }
 168     },
 169 
 170     PRINT {
 171         // \p{graph}
 172         // \p{blank}
 173         // -- \p{cntrl}
 174         public boolean is(int ch) {
 175             return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
 176         }
 177     },
 178 
 179     WORD {
 180         //  \p{alpha}
 181         //  \p{gc=Mark}
 182         //  \p{digit}
 183         //  \p{gc=Connector_Punctuation}
 184         //  \p{Join_Control}    200C..200D
 185 
 186         public boolean is(int ch) {
 187             return ALPHABETIC.is(ch) ||
 188                    ((((1 << Character.NON_SPACING_MARK) |
 189                       (1 << Character.ENCLOSING_MARK) |
 190                       (1 << Character.COMBINING_SPACING_MARK) |
 191                       (1 << Character.DECIMAL_DIGIT_NUMBER) |
 192                       (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
 193                    != 0 ||
 194                    JOIN_CONTROL.is(ch);
 195         }
 196     },
 197 
 198     JOIN_CONTROL {
 199         //  200C..200D    PropList.txt:Join_Control
 200         public boolean is(int ch) {
 201            return (ch == 0x200C || ch == 0x200D);






 202         }
 203     };
 204 
 205     private static final HashMap<String, String> posix = new HashMap<>();
 206     private static final HashMap<String, String> aliases = new HashMap<>();
 207     static {
 208         posix.put("ALPHA", "ALPHABETIC");
 209         posix.put("LOWER", "LOWERCASE");
 210         posix.put("UPPER", "UPPERCASE");
 211         posix.put("SPACE", "WHITE_SPACE");
 212         posix.put("PUNCT", "PUNCTUATION");
 213         posix.put("XDIGIT","HEX_DIGIT");
 214         posix.put("ALNUM", "ALNUM");
 215         posix.put("CNTRL", "CONTROL");
 216         posix.put("DIGIT", "DIGIT");
 217         posix.put("BLANK", "BLANK");
 218         posix.put("GRAPH", "GRAPH");
 219         posix.put("PRINT", "PRINT");
 220 
 221         aliases.put("WHITESPACE", "WHITE_SPACE");
 222         aliases.put("HEXDIGIT","HEX_DIGIT");
 223         aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
 224         aliases.put("JOINCONTROL", "JOIN_CONTROL");
 225     }
 226 
 227     public static UnicodeProp forName(String propName) {
 228         propName = propName.toUpperCase(Locale.ENGLISH);
 229         String alias = aliases.get(propName);
 230         if (alias != null)
 231             propName = alias;





























 232         try {
 233             return valueOf (propName);
 234         } catch (IllegalArgumentException x) {}

 235         return null;
 236     }
 237 
 238     public static UnicodeProp forPOSIXName(String propName) {
 239         propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
 240         if (propName == null)






 241             return null;
 242         return valueOf (propName);
 243     }
 244 
 245     public abstract boolean is(int ch);
























































































































































 246 }
   1 /*
   2  * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.regex;
  27 
  28 import java.util.HashMap;
  29 import java.util.Locale;
  30 import java.util.regex.Pattern.CharPredicate;
  31 import java.util.regex.Pattern.BmpCharPredicate;
  32 
  33 class CharPredicates {
  34 
  35     static final CharPredicate ALPHABETIC  = Character::isAlphabetic;




  36 
  37     // \p{gc=Decimal_Number}
  38     static final CharPredicate DIGIT       = Character::isDigit;



  39 
  40     static final CharPredicate LETTER      = Character::isLetter;




  41 
  42     static final CharPredicate IDEOGRAPHIC = Character::isIdeographic;




  43 
  44     static final CharPredicate LOWERCASE   = Character::isLowerCase;




  45 
  46     static final CharPredicate UPPERCASE   = Character::isUpperCase;
  47 
  48     static final CharPredicate TITLECASE   = Character::isTitleCase;


  49 

  50     // \p{Whitespace}
  51     static final CharPredicate WHITE_SPACE = ch ->
  52         ((((1 << Character.SPACE_SEPARATOR) |
  53            (1 << Character.LINE_SEPARATOR) |
  54            (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
  55         != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);


  56 

  57     // \p{gc=Control}
  58     static final CharPredicate CONTROL     = ch ->
  59         Character.getType(ch) == Character.CONTROL;


  60 

  61     // \p{gc=Punctuation}
  62     static final CharPredicate PUNCTUATION = ch ->
  63         ((((1 << Character.CONNECTOR_PUNCTUATION) |
  64            (1 << Character.DASH_PUNCTUATION) |
  65            (1 << Character.START_PUNCTUATION) |
  66            (1 << Character.END_PUNCTUATION) |
  67            (1 << Character.OTHER_PUNCTUATION) |
  68            (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
  69            (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
  70         != 0;


  71 

  72     // \p{gc=Decimal_Number}
  73     // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
  74     static final CharPredicate HEX_DIGIT = DIGIT.union(
  75         ch -> (ch >= 0x0030 && ch <= 0x0039) ||

  76               (ch >= 0x0041 && ch <= 0x0046) ||
  77               (ch >= 0x0061 && ch <= 0x0066) ||
  78               (ch >= 0xFF10 && ch <= 0xFF19) ||
  79               (ch >= 0xFF21 && ch <= 0xFF26) ||
  80               (ch >= 0xFF41 && ch <= 0xFF46));


  81 
  82     static final CharPredicate ASSIGNED = ch ->
  83         Character.getType(ch) != Character.UNASSIGNED;



  84 

  85     // PropList.txt:Noncharacter_Code_Point
  86     static final CharPredicate NONCHARACTER_CODE_POINT = ch ->
  87         (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);









  88 

  89     // \p{alpha}
  90     // \p{digit}
  91     static final CharPredicate ALNUM = ALPHABETIC.union(DIGIT);



  92 

  93     // \p{Whitespace} --
  94     // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
  95     //  \p{gc=Line_Separator}
  96     //  \p{gc=Paragraph_Separator}]
  97     static final CharPredicate BLANK = ch ->
  98         Character.getType(ch) == Character.SPACE_SEPARATOR ||
  99         ch == 0x9; // \N{HT}


 100 

 101     // [^
 102     //  \p{space}
 103     //  \p{gc=Control}
 104     //  \p{gc=Surrogate}
 105     //  \p{gc=Unassigned}]
 106     static final CharPredicate GRAPH = ch ->
 107         ((((1 << Character.SPACE_SEPARATOR) |
 108            (1 << Character.LINE_SEPARATOR) |
 109            (1 << Character.PARAGRAPH_SEPARATOR) |
 110            (1 << Character.CONTROL) |
 111            (1 << Character.SURROGATE) |
 112            (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
 113         == 0;


 114 

 115     // \p{graph}
 116     // \p{blank}
 117     // -- \p{cntrl}
 118     static final CharPredicate PRINT = GRAPH.union(BLANK).and(CONTROL.negate());
 119 
 120     //  200C..200D    PropList.txt:Join_Control
 121     static final CharPredicate JOIN_CONTROL = ch -> ch == 0x200C || ch == 0x200D;
 122 

 123     //  \p{alpha}
 124     //  \p{gc=Mark}
 125     //  \p{digit}
 126     //  \p{gc=Connector_Punctuation}
 127     //  \p{Join_Control}    200C..200D
 128     static final CharPredicate WORD =
 129         ALPHABETIC.union(ch -> ((((1 << Character.NON_SPACING_MARK) |


 130                                   (1 << Character.ENCLOSING_MARK) |
 131                                   (1 << Character.COMBINING_SPACING_MARK) |
 132                                   (1 << Character.DECIMAL_DIGIT_NUMBER) |
 133                                   (1 << Character.CONNECTOR_PUNCTUATION))
 134                                  >> Character.getType(ch)) & 1) != 0,
 135                          JOIN_CONTROL);


 136 
 137     /////////////////////////////////////////////////////////////////////////////
 138 
 139     private static final HashMap<String, CharPredicate> posix = new HashMap<>(12);
 140     private static final HashMap<String, CharPredicate> uprops = new HashMap<>(18);
 141 
 142     private static void defPosix(String name, CharPredicate p) {
 143         posix.put(name, p);
 144     }
 145     private static void defUProp(String name, CharPredicate p) {
 146         uprops.put(name, p);
 147     }

 148 


 149     static {
 150         defPosix("ALPHA", ALPHABETIC);
 151         defPosix("LOWER", LOWERCASE);
 152         defPosix("UPPER", UPPERCASE);
 153         defPosix("SPACE", WHITE_SPACE);
 154         defPosix("PUNCT", PUNCTUATION);
 155         defPosix("XDIGIT",HEX_DIGIT);
 156         defPosix("ALNUM", ALNUM);
 157         defPosix("CNTRL", CONTROL);
 158         defPosix("DIGIT", DIGIT);
 159         defPosix("BLANK", BLANK);
 160         defPosix("GRAPH", GRAPH);
 161         defPosix("PRINT", PRINT);
 162 
 163         defUProp("ALPHABETIC", ALPHABETIC);
 164         defUProp("ASSIGNED", ASSIGNED);
 165         defUProp("CONTROL", CONTROL);
 166         defUProp("HEXDIGIT", HEX_DIGIT);
 167         defUProp("IDEOGRAPHIC", IDEOGRAPHIC);
 168         defUProp("JOINCONTROL", JOIN_CONTROL);
 169         defUProp("LETTER", LETTER);
 170         defUProp("LOWERCASE", LOWERCASE);
 171         defUProp("NONCHARACTERCODEPOINT", NONCHARACTER_CODE_POINT);
 172         defUProp("TITLECASE", TITLECASE);
 173         defUProp("PUNCTUATION", PUNCTUATION);
 174         defUProp("UPPERCASE", UPPERCASE);
 175         defUProp("WHITESPACE", WHITE_SPACE);
 176         defUProp("WORD", WORD);
 177         defUProp("WHITE_SPACE", WHITE_SPACE);
 178         defUProp("HEX_DIGIT", HEX_DIGIT);
 179         defUProp("NONCHARACTER_CODE_POINT", NONCHARACTER_CODE_POINT);
 180         defUProp("JOIN_CONTROL", JOIN_CONTROL);
 181     }
 182 
 183     public static CharPredicate forUnicodeProperty(String propName) {
 184         propName = propName.toUpperCase(Locale.ROOT);
 185         CharPredicate p = uprops.get(propName);
 186         if (p != null)
 187             return p;
 188         return posix.get(propName);
 189     }
 190 
 191     public static CharPredicate forPOSIXName(String propName) {
 192         return posix.get(propName.toUpperCase(Locale.ENGLISH));
 193     }
 194 
 195     /////////////////////////////////////////////////////////////////////////////
 196 
 197     /**
 198      * Returns a predicate matching all characters belong to a named
 199      * UnicodeScript.
 200      */
 201     static CharPredicate forUnicodeScript(String name) {
 202         final Character.UnicodeScript script;
 203         try {
 204             script = Character.UnicodeScript.forName(name);
 205             return ch -> script == Character.UnicodeScript.of(ch);
 206         } catch (IllegalArgumentException iae) {}
 207         return null;
 208     }
 209 
 210     /**
 211      * Returns a predicate matching all characters in a UnicodeBlock.
 212      */
 213     static CharPredicate forUnicodeBlock(String name) {
 214         final Character.UnicodeBlock block;
 215         try {
 216             block = Character.UnicodeBlock.forName(name);
 217             return ch -> block == Character.UnicodeBlock.of(ch);
 218         } catch (IllegalArgumentException iae) {}
 219          return null;

 220     }
 221 
 222     /////////////////////////////////////////////////////////////////////////////
 223 
 224     // unicode categories, aliases, properties, java methods ...
 225 
 226     private static final HashMap<String, CharPredicate> props = new HashMap<>(128);
 227 
 228     /**
 229      * Returns a predicate matching all characters in a named property.
 230      */
 231     static CharPredicate forProperty(String name) {
 232         return props.get(name);
 233     }
 234 
 235     private static void defProp(String name, CharPredicate p) {
 236         props.put(name, p);
 237     }
 238 
 239     private static void defCategory(String name, final int typeMask) {
 240         CharPredicate p = ch -> (typeMask & (1 << Character.getType(ch))) != 0;
 241         props.put(name, p);
 242     }
 243 
 244     private static void defRange(String name, final int lower, final int upper) {
 245         BmpCharPredicate p = ch -> lower <= ch && ch <= upper;
 246         props.put(name, p);
 247     }
 248 
 249     private static void defCtype(String name, final int ctype) {
 250         BmpCharPredicate p = ch -> ch < 128 && ASCII.isType(ch, ctype);
 251         // PrintPattern.pmap.put(p, name);
 252         props.put(name, p);
 253     }
 254 
 255     static {
 256         // Unicode character property aliases, defined in
 257         // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
 258         defCategory("Cn", 1<<Character.UNASSIGNED);
 259         defCategory("Lu", 1<<Character.UPPERCASE_LETTER);
 260         defCategory("Ll", 1<<Character.LOWERCASE_LETTER);
 261         defCategory("Lt", 1<<Character.TITLECASE_LETTER);
 262         defCategory("Lm", 1<<Character.MODIFIER_LETTER);
 263         defCategory("Lo", 1<<Character.OTHER_LETTER);
 264         defCategory("Mn", 1<<Character.NON_SPACING_MARK);
 265         defCategory("Me", 1<<Character.ENCLOSING_MARK);
 266         defCategory("Mc", 1<<Character.COMBINING_SPACING_MARK);
 267         defCategory("Nd", 1<<Character.DECIMAL_DIGIT_NUMBER);
 268         defCategory("Nl", 1<<Character.LETTER_NUMBER);
 269         defCategory("No", 1<<Character.OTHER_NUMBER);
 270         defCategory("Zs", 1<<Character.SPACE_SEPARATOR);
 271         defCategory("Zl", 1<<Character.LINE_SEPARATOR);
 272         defCategory("Zp", 1<<Character.PARAGRAPH_SEPARATOR);
 273         defCategory("Cc", 1<<Character.CONTROL);
 274         defCategory("Cf", 1<<Character.FORMAT);
 275         defCategory("Co", 1<<Character.PRIVATE_USE);
 276         defCategory("Cs", 1<<Character.SURROGATE);
 277         defCategory("Pd", 1<<Character.DASH_PUNCTUATION);
 278         defCategory("Ps", 1<<Character.START_PUNCTUATION);
 279         defCategory("Pe", 1<<Character.END_PUNCTUATION);
 280         defCategory("Pc", 1<<Character.CONNECTOR_PUNCTUATION);
 281         defCategory("Po", 1<<Character.OTHER_PUNCTUATION);
 282         defCategory("Sm", 1<<Character.MATH_SYMBOL);
 283         defCategory("Sc", 1<<Character.CURRENCY_SYMBOL);
 284         defCategory("Sk", 1<<Character.MODIFIER_SYMBOL);
 285         defCategory("So", 1<<Character.OTHER_SYMBOL);
 286         defCategory("Pi", 1<<Character.INITIAL_QUOTE_PUNCTUATION);
 287         defCategory("Pf", 1<<Character.FINAL_QUOTE_PUNCTUATION);
 288         defCategory("L", ((1<<Character.UPPERCASE_LETTER) |
 289                           (1<<Character.LOWERCASE_LETTER) |
 290                           (1<<Character.TITLECASE_LETTER) |
 291                           (1<<Character.MODIFIER_LETTER)  |
 292                           (1<<Character.OTHER_LETTER)));
 293         defCategory("M", ((1<<Character.NON_SPACING_MARK) |
 294                           (1<<Character.ENCLOSING_MARK)   |
 295                           (1<<Character.COMBINING_SPACING_MARK)));
 296         defCategory("N", ((1<<Character.DECIMAL_DIGIT_NUMBER) |
 297                           (1<<Character.LETTER_NUMBER)        |
 298                           (1<<Character.OTHER_NUMBER)));
 299         defCategory("Z", ((1<<Character.SPACE_SEPARATOR) |
 300                           (1<<Character.LINE_SEPARATOR)  |
 301                           (1<<Character.PARAGRAPH_SEPARATOR)));
 302         defCategory("C", ((1<<Character.CONTROL)     |
 303                           (1<<Character.FORMAT)      |
 304                           (1<<Character.PRIVATE_USE) |
 305                           (1<<Character.SURROGATE))); // Other
 306         defCategory("P", ((1<<Character.DASH_PUNCTUATION)      |
 307                           (1<<Character.START_PUNCTUATION)     |
 308                           (1<<Character.END_PUNCTUATION)       |
 309                           (1<<Character.CONNECTOR_PUNCTUATION) |
 310                           (1<<Character.OTHER_PUNCTUATION)     |
 311                           (1<<Character.INITIAL_QUOTE_PUNCTUATION) |
 312                           (1<<Character.FINAL_QUOTE_PUNCTUATION)));
 313         defCategory("S", ((1<<Character.MATH_SYMBOL)     |
 314                           (1<<Character.CURRENCY_SYMBOL) |
 315                           (1<<Character.MODIFIER_SYMBOL) |
 316                           (1<<Character.OTHER_SYMBOL)));
 317         defCategory("LC", ((1<<Character.UPPERCASE_LETTER) |
 318                            (1<<Character.LOWERCASE_LETTER) |
 319                            (1<<Character.TITLECASE_LETTER)));
 320         defCategory("LD", ((1<<Character.UPPERCASE_LETTER) |
 321                            (1<<Character.LOWERCASE_LETTER) |
 322                            (1<<Character.TITLECASE_LETTER) |
 323                            (1<<Character.MODIFIER_LETTER)  |
 324                            (1<<Character.OTHER_LETTER)     |
 325                            (1<<Character.DECIMAL_DIGIT_NUMBER)));
 326         defRange("L1", 0x00, 0xFF); // Latin-1
 327         props.put("all", ch -> true);
 328 
 329         // Posix regular expression character classes, defined in
 330         // http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
 331         defRange("ASCII", 0x00, 0x7F);   // ASCII
 332         defCtype("Alnum", ASCII.ALNUM);  // Alphanumeric characters
 333         defCtype("Alpha", ASCII.ALPHA);  // Alphabetic characters
 334         defCtype("Blank", ASCII.BLANK);  // Space and tab characters
 335         defCtype("Cntrl", ASCII.CNTRL);  // Control characters
 336         defRange("Digit", '0', '9');     // Numeric characters
 337         defCtype("Graph", ASCII.GRAPH);  // printable and visible
 338         defRange("Lower", 'a', 'z');     // Lower-case alphabetic
 339         defRange("Print", 0x20, 0x7E);   // Printable characters
 340         defCtype("Punct", ASCII.PUNCT);  // Punctuation characters
 341         defCtype("Space", ASCII.SPACE);  // Space characters
 342         defRange("Upper", 'A', 'Z');     // Upper-case alphabetic
 343         defCtype("XDigit",ASCII.XDIGIT); // hexadecimal digits
 344 
 345         // Java character properties, defined by methods in Character.java
 346         defProp("javaLowerCase", java.lang.Character::isLowerCase);
 347         defProp("javaUpperCase",  Character::isUpperCase);
 348         defProp("javaAlphabetic", java.lang.Character::isAlphabetic);
 349         defProp("javaIdeographic", java.lang.Character::isIdeographic);
 350         defProp("javaTitleCase", java.lang.Character::isTitleCase);
 351         defProp("javaDigit", java.lang.Character::isDigit);
 352         defProp("javaDefined", java.lang.Character::isDefined);
 353         defProp("javaLetter", java.lang.Character::isLetter);
 354         defProp("javaLetterOrDigit", java.lang.Character::isLetterOrDigit);
 355         defProp("javaJavaIdentifierStart", java.lang.Character::isJavaIdentifierStart);
 356         defProp("javaJavaIdentifierPart", java.lang.Character::isJavaIdentifierPart);
 357         defProp("javaUnicodeIdentifierStart", java.lang.Character::isUnicodeIdentifierStart);
 358         defProp("javaUnicodeIdentifierPart", java.lang.Character::isUnicodeIdentifierPart);
 359         defProp("javaIdentifierIgnorable", java.lang.Character::isIdentifierIgnorable);
 360         defProp("javaSpaceChar", java.lang.Character::isSpaceChar);
 361         defProp("javaWhitespace", java.lang.Character::isWhitespace);
 362         defProp("javaISOControl", java.lang.Character::isISOControl);
 363         defProp("javaMirrored", java.lang.Character::isMirrored);
 364     }
 365 
 366     /////////////////////////////////////////////////////////////////////////////
 367 
 368     /**
 369      * Posix ASCII variants, not in the lookup map
 370      */
 371     static final BmpCharPredicate ASCII_DIGIT = ch -> ch < 128 && ASCII.isDigit(ch);
 372     static final BmpCharPredicate ASCII_WORD  = ch -> ch < 128 && ASCII.isWord(ch);
 373     static final BmpCharPredicate ASCII_SPACE = ch -> ch < 128 && ASCII.isSpace(ch);
 374 
 375 }