1 /* 2 * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.regex; 27 28 import java.util.HashMap; 29 import java.util.Locale; 30 31 enum UnicodeProp { 32 33 ALPHABETIC { 34 public boolean is(int ch) { 35 return Character.isAlphabetic(ch); 36 } 37 }, 38 39 LETTER { 40 public boolean is(int ch) { 41 return Character.isLetter(ch); 42 } 43 }, 44 45 IDEOGRAPHIC { 46 public boolean is(int ch) { 47 return Character.isIdeographic(ch); 48 } 49 }, 50 51 LOWERCASE { 52 public boolean is(int ch) { 53 return Character.isLowerCase(ch); 54 } 55 }, 56 57 UPPERCASE { 58 public boolean is(int ch) { 59 return Character.isUpperCase(ch); 60 } 61 }, 62 63 TITLECASE { 64 public boolean is(int ch) { 65 return Character.isTitleCase(ch); 66 } 67 }, 68 69 WHITE_SPACE { 70 // \p{Whitespace} 71 public boolean is(int ch) { 72 return ((((1 << Character.SPACE_SEPARATOR) | 73 (1 << Character.LINE_SEPARATOR) | 74 (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1) 75 != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85); 76 } 77 }, 78 79 CONTROL { 80 // \p{gc=Control} 81 public boolean is(int ch) { 82 return Character.getType(ch) == Character.CONTROL; 83 } 84 }, 85 86 PUNCTUATION { 87 // \p{gc=Punctuation} 88 public boolean is(int ch) { 89 return ((((1 << Character.CONNECTOR_PUNCTUATION) | 90 (1 << Character.DASH_PUNCTUATION) | 91 (1 << Character.START_PUNCTUATION) | 92 (1 << Character.END_PUNCTUATION) | 93 (1 << Character.OTHER_PUNCTUATION) | 94 (1 << Character.INITIAL_QUOTE_PUNCTUATION) | 95 (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1) 96 != 0; 97 } 98 }, 99 100 HEX_DIGIT { 101 // \p{gc=Decimal_Number} 102 // \p{Hex_Digit} -> PropList.txt: Hex_Digit 103 public boolean is(int ch) { 104 return DIGIT.is(ch) || 105 (ch >= 0x0030 && ch <= 0x0039) || 106 (ch >= 0x0041 && ch <= 0x0046) || 107 (ch >= 0x0061 && ch <= 0x0066) || 108 (ch >= 0xFF10 && ch <= 0xFF19) || 109 (ch >= 0xFF21 && ch <= 0xFF26) || 110 (ch >= 0xFF41 && ch <= 0xFF46); 111 } 112 }, 113 114 ASSIGNED { 115 public boolean is(int ch) { 116 return Character.getType(ch) != Character.UNASSIGNED; 117 } 118 }, 119 120 NONCHARACTER_CODE_POINT { 121 // PropList.txt:Noncharacter_Code_Point 122 public boolean is(int ch) { 123 return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); 124 } 125 }, 126 127 DIGIT { 128 // \p{gc=Decimal_Number} 129 public boolean is(int ch) { 130 return Character.isDigit(ch); 131 } 132 }, 133 134 ALNUM { 135 // \p{alpha} 136 // \p{digit} 137 public boolean is(int ch) { 138 return ALPHABETIC.is(ch) || DIGIT.is(ch); 139 } 140 }, 141 142 BLANK { 143 // \p{Whitespace} -- 144 // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85 145 // \p{gc=Line_Separator} 146 // \p{gc=Paragraph_Separator}] 147 public boolean is(int ch) { 148 return Character.getType(ch) == Character.SPACE_SEPARATOR || 149 ch == 0x9; // \N{HT} 150 } 151 }, 152 153 GRAPH { 154 // [^ 155 // \p{space} 156 // \p{gc=Control} 157 // \p{gc=Surrogate} 158 // \p{gc=Unassigned}] 159 public boolean is(int ch) { 160 return ((((1 << Character.SPACE_SEPARATOR) | 161 (1 << Character.LINE_SEPARATOR) | 162 (1 << Character.PARAGRAPH_SEPARATOR) | 163 (1 << Character.CONTROL) | 164 (1 << Character.SURROGATE) | 165 (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1) 166 == 0; 167 } 168 }, 169 170 PRINT { 171 // \p{graph} 172 // \p{blank} 173 // -- \p{cntrl} 174 public boolean is(int ch) { 175 return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch); 176 } 177 }, 178 179 WORD { 180 // \p{alpha} 181 // \p{gc=Mark} 182 // \p{digit} 183 // \p{gc=Connector_Punctuation} 184 // \p{Join_Control} 200C..200D 185 186 public boolean is(int ch) { 187 return ALPHABETIC.is(ch) || 188 ((((1 << Character.NON_SPACING_MARK) | 189 (1 << Character.ENCLOSING_MARK) | 190 (1 << Character.COMBINING_SPACING_MARK) | 191 (1 << Character.DECIMAL_DIGIT_NUMBER) | 192 (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1) 193 != 0 || 194 JOIN_CONTROL.is(ch); 195 } 196 }, 197 198 JOIN_CONTROL { 199 // 200C..200D PropList.txt:Join_Control 200 public boolean is(int ch) { 201 return (ch == 0x200C || ch == 0x200D); 202 } 203 }; 204 205 private static final HashMap<String, String> posix = new HashMap<>(); 206 private static final HashMap<String, String> aliases = new HashMap<>(); 207 static { 208 posix.put("ALPHA", "ALPHABETIC"); 209 posix.put("LOWER", "LOWERCASE"); 210 posix.put("UPPER", "UPPERCASE"); 211 posix.put("SPACE", "WHITE_SPACE"); 212 posix.put("PUNCT", "PUNCTUATION"); 213 posix.put("XDIGIT","HEX_DIGIT"); 214 posix.put("ALNUM", "ALNUM"); 215 posix.put("CNTRL", "CONTROL"); 216 posix.put("DIGIT", "DIGIT"); 217 posix.put("BLANK", "BLANK"); 218 posix.put("GRAPH", "GRAPH"); 219 posix.put("PRINT", "PRINT"); 220 221 aliases.put("WHITESPACE", "WHITE_SPACE"); 222 aliases.put("HEXDIGIT","HEX_DIGIT"); 223 aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT"); 224 aliases.put("JOINCONTROL", "JOIN_CONTROL"); 225 } 226 227 public static UnicodeProp forName(String propName) { 228 propName = propName.toUpperCase(Locale.ENGLISH); 229 String alias = aliases.get(propName); 230 if (alias != null) 231 propName = alias; 232 try { 233 return valueOf (propName); 234 } catch (IllegalArgumentException x) {} 235 return null; 236 } 237 238 public static UnicodeProp forPOSIXName(String propName) { 239 propName = posix.get(propName.toUpperCase(Locale.ENGLISH)); 240 if (propName == null) 241 return null; 242 return valueOf (propName); 243 } 244 245 public abstract boolean is(int ch); 246 }