1 /*
   2  * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.regex;
  27 
  28 import java.util.HashMap;
  29 import java.util.Locale;
  30 
  31 enum UnicodeProp {
  32 
  33     ALPHABETIC {
  34         public boolean is(int ch) {
  35             return Character.isAlphabetic(ch);
  36         }
  37     },
  38 
  39     LETTER {
  40         public boolean is(int ch) {
  41             return Character.isLetter(ch);
  42         }
  43     },
  44 
  45     IDEOGRAPHIC {
  46         public boolean is(int ch) {
  47             return Character.isIdeographic(ch);
  48         }
  49     },
  50 
  51     LOWERCASE {
  52         public boolean is(int ch) {
  53             return Character.isLowerCase(ch);
  54         }
  55     },
  56 
  57     UPPERCASE {
  58         public boolean is(int ch) {
  59             return Character.isUpperCase(ch);
  60         }
  61     },
  62 
  63     TITLECASE {
  64         public boolean is(int ch) {
  65             return Character.isTitleCase(ch);
  66         }
  67     },
  68 
  69     WHITE_SPACE {
  70         // \p{Whitespace}
  71         public boolean is(int ch) {
  72             return ((((1 << Character.SPACE_SEPARATOR) |
  73                       (1 << Character.LINE_SEPARATOR) |
  74                       (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
  75                    != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85);
  76         }
  77     },
  78 
  79     CONTROL {
  80         // \p{gc=Control}
  81         public boolean is(int ch) {
  82             return Character.getType(ch) == Character.CONTROL;
  83         }
  84     },
  85 
  86     PUNCTUATION {
  87         // \p{gc=Punctuation}
  88         public boolean is(int ch) {
  89             return ((((1 << Character.CONNECTOR_PUNCTUATION) |
  90                       (1 << Character.DASH_PUNCTUATION) |
  91                       (1 << Character.START_PUNCTUATION) |
  92                       (1 << Character.END_PUNCTUATION) |
  93                       (1 << Character.OTHER_PUNCTUATION) |
  94                       (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
  95                       (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
  96                    != 0;
  97         }
  98     },
  99 
 100     HEX_DIGIT {
 101         // \p{gc=Decimal_Number}
 102         // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
 103         public boolean is(int ch) {
 104             return DIGIT.is(ch) ||
 105                    (ch >= 0x0030 && ch <= 0x0039) ||
 106                    (ch >= 0x0041 && ch <= 0x0046) ||
 107                    (ch >= 0x0061 && ch <= 0x0066) ||
 108                    (ch >= 0xFF10 && ch <= 0xFF19) ||
 109                    (ch >= 0xFF21 && ch <= 0xFF26) ||
 110                    (ch >= 0xFF41 && ch <= 0xFF46);
 111         }
 112     },
 113 
 114     ASSIGNED {
 115         public boolean is(int ch) {
 116             return Character.getType(ch) != Character.UNASSIGNED;
 117         }
 118     },
 119 
 120     NONCHARACTER_CODE_POINT {
 121         // PropList.txt:Noncharacter_Code_Point
 122         public boolean is(int ch) {
 123             return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
 124         }
 125     },
 126 
 127     DIGIT {
 128         // \p{gc=Decimal_Number}
 129         public boolean is(int ch) {
 130             return Character.isDigit(ch);
 131         }
 132     },
 133 
 134     ALNUM {
 135         // \p{alpha}
 136         // \p{digit}
 137         public boolean is(int ch) {
 138             return ALPHABETIC.is(ch) || DIGIT.is(ch);
 139         }
 140     },
 141 
 142     BLANK {
 143         // \p{Whitespace} --
 144         // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
 145         //  \p{gc=Line_Separator}
 146         //  \p{gc=Paragraph_Separator}]
 147         public boolean is(int ch) {
 148             return Character.getType(ch) == Character.SPACE_SEPARATOR ||
 149                    ch == 0x9; // \N{HT}
 150         }
 151     },
 152 
 153     GRAPH {
 154         // [^
 155         //  \p{space}
 156         //  \p{gc=Control}
 157         //  \p{gc=Surrogate}
 158         //  \p{gc=Unassigned}]
 159         public boolean is(int ch) {
 160             return ((((1 << Character.SPACE_SEPARATOR) |
 161                       (1 << Character.LINE_SEPARATOR) |
 162                       (1 << Character.PARAGRAPH_SEPARATOR) |
 163                       (1 << Character.CONTROL) |
 164                       (1 << Character.SURROGATE) |
 165                       (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1)
 166                    == 0;
 167         }
 168     },
 169 
 170     PRINT {
 171         // \p{graph}
 172         // \p{blank}
 173         // -- \p{cntrl}
 174         public boolean is(int ch) {
 175             return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch);
 176         }
 177     },
 178 
 179     WORD {
 180         //  \p{alpha}
 181         //  \p{gc=Mark}
 182         //  \p{digit}
 183         //  \p{gc=Connector_Punctuation}
 184         //  \p{Join_Control}    200C..200D
 185 
 186         public boolean is(int ch) {
 187             return ALPHABETIC.is(ch) ||
 188                    ((((1 << Character.NON_SPACING_MARK) |
 189                       (1 << Character.ENCLOSING_MARK) |
 190                       (1 << Character.COMBINING_SPACING_MARK) |
 191                       (1 << Character.DECIMAL_DIGIT_NUMBER) |
 192                       (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
 193                    != 0 ||
 194                    JOIN_CONTROL.is(ch);
 195         }
 196     },
 197 
 198     JOIN_CONTROL {
 199         //  200C..200D    PropList.txt:Join_Control
 200         public boolean is(int ch) {
 201            return (ch == 0x200C || ch == 0x200D);
 202         }
 203     };
 204 
 205     private static final HashMap<String, String> posix = new HashMap<>();
 206     private static final HashMap<String, String> aliases = new HashMap<>();
 207     static {
 208         posix.put("ALPHA", "ALPHABETIC");
 209         posix.put("LOWER", "LOWERCASE");
 210         posix.put("UPPER", "UPPERCASE");
 211         posix.put("SPACE", "WHITE_SPACE");
 212         posix.put("PUNCT", "PUNCTUATION");
 213         posix.put("XDIGIT","HEX_DIGIT");
 214         posix.put("ALNUM", "ALNUM");
 215         posix.put("CNTRL", "CONTROL");
 216         posix.put("DIGIT", "DIGIT");
 217         posix.put("BLANK", "BLANK");
 218         posix.put("GRAPH", "GRAPH");
 219         posix.put("PRINT", "PRINT");
 220 
 221         aliases.put("WHITESPACE", "WHITE_SPACE");
 222         aliases.put("HEXDIGIT","HEX_DIGIT");
 223         aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
 224         aliases.put("JOINCONTROL", "JOIN_CONTROL");
 225     }
 226 
 227     public static UnicodeProp forName(String propName) {
 228         propName = propName.toUpperCase(Locale.ENGLISH);
 229         String alias = aliases.get(propName);
 230         if (alias != null)
 231             propName = alias;
 232         try {
 233             return valueOf (propName);
 234         } catch (IllegalArgumentException x) {}
 235         return null;
 236     }
 237 
 238     public static UnicodeProp forPOSIXName(String propName) {
 239         propName = posix.get(propName.toUpperCase(Locale.ENGLISH));
 240         if (propName == null)
 241             return null;
 242         return valueOf (propName);
 243     }
 244 
 245     public abstract boolean is(int ch);
 246 }