1 /* 2 * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.regex; 27 28 import java.util.HashMap; 29 import java.util.Locale; 30 31 enum UnicodeProp { 32 33 ALPHABETIC { 34 public boolean is(int ch) { 35 return Character.isAlphabetic(ch); 36 } 37 }, 38 39 LETTER { 40 public boolean is(int ch) { 41 return Character.isLetter(ch); 42 } 43 }, 44 45 IDEOGRAPHIC { 46 public boolean is(int ch) { 47 return Character.isIdeographic(ch); 48 } 49 }, 50 51 LOWERCASE { 52 public boolean is(int ch) { 53 return Character.isLowerCase(ch); 54 } 55 }, 56 57 UPPERCASE { 58 public boolean is(int ch) { 59 return Character.isUpperCase(ch); 60 } 61 }, 62 63 TITLECASE { 64 public boolean is(int ch) { 65 return Character.isTitleCase(ch); 66 } 67 }, 68 69 WHITE_SPACE { 70 // \p{Whitespace} 71 public boolean is(int ch) { 72 return ((((1 << Character.SPACE_SEPARATOR) | 73 (1 << Character.LINE_SEPARATOR) | 74 (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1) 75 != 0 || (ch >= 0x9 && ch <= 0xd) || (ch == 0x85); 76 } 77 }, 78 79 CONTROL { 80 // \p{gc=Control} 81 public boolean is(int ch) { 82 return Character.getType(ch) == Character.CONTROL; 83 } 84 }, 85 86 PUNCTUATION { 87 // \p{gc=Punctuation} 88 public boolean is(int ch) { 89 return ((((1 << Character.CONNECTOR_PUNCTUATION) | 90 (1 << Character.DASH_PUNCTUATION) | 91 (1 << Character.START_PUNCTUATION) | 92 (1 << Character.END_PUNCTUATION) | 93 (1 << Character.OTHER_PUNCTUATION) | 94 (1 << Character.INITIAL_QUOTE_PUNCTUATION) | 95 (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1) 96 != 0; 97 } 98 }, 99 100 HEX_DIGIT { 101 // \p{gc=Decimal_Number} 102 // \p{Hex_Digit} -> PropList.txt: Hex_Digit 103 public boolean is(int ch) { 104 return DIGIT.is(ch) || 105 (ch >= 0x0030 && ch <= 0x0039) || 106 (ch >= 0x0041 && ch <= 0x0046) || 107 (ch >= 0x0061 && ch <= 0x0066) || 108 (ch >= 0xFF10 && ch <= 0xFF19) || 109 (ch >= 0xFF21 && ch <= 0xFF26) || 110 (ch >= 0xFF41 && ch <= 0xFF46); 111 } 112 }, 113 114 ASSIGNED { 115 public boolean is(int ch) { 116 return Character.getType(ch) != Character.UNASSIGNED; 117 } 118 }, 119 120 NONCHARACTER_CODE_POINT { 121 // PropList.txt:Noncharacter_Code_Point 122 public boolean is(int ch) { 123 return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); 124 } 125 }, 126 127 DIGIT { 128 // \p{gc=Decimal_Number} 129 public boolean is(int ch) { 130 return Character.isDigit(ch); 131 } 132 }, 133 134 ALNUM { 135 // \p{alpha} 136 // \p{digit} 137 public boolean is(int ch) { 138 return ALPHABETIC.is(ch) || DIGIT.is(ch); 139 } 140 }, 141 142 BLANK { 143 // \p{Whitespace} -- 144 // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL} -> 0xa, 0xb, 0xc, 0xd, 0x85 145 // \p{gc=Line_Separator} 146 // \p{gc=Paragraph_Separator}] 147 public boolean is(int ch) { 148 return Character.getType(ch) == Character.SPACE_SEPARATOR || 149 ch == 0x9; // \N{HT} 150 } 151 }, 152 153 GRAPH { 154 // [^ 155 // \p{space} 156 // \p{gc=Control} 157 // \p{gc=Surrogate} 158 // \p{gc=Unassigned}] 159 public boolean is(int ch) { 160 return ((((1 << Character.SPACE_SEPARATOR) | 161 (1 << Character.LINE_SEPARATOR) | 162 (1 << Character.PARAGRAPH_SEPARATOR) | 163 (1 << Character.CONTROL) | 164 (1 << Character.SURROGATE) | 165 (1 << Character.UNASSIGNED)) >> Character.getType(ch)) & 1) 166 == 0; 167 } 168 }, 169 170 PRINT { 171 // \p{graph} 172 // \p{blank} 173 // -- \p{cntrl} 174 public boolean is(int ch) { 175 return (GRAPH.is(ch) || BLANK.is(ch)) && !CONTROL.is(ch); 176 } 177 }, 178 179 WORD { 180 // \p{alpha} 181 // \p{gc=Mark} 182 // \p{digit} 183 // \p{gc=Connector_Punctuation} 184 185 public boolean is(int ch) { 186 return ALPHABETIC.is(ch) || 187 ((((1 << Character.NON_SPACING_MARK) | 188 (1 << Character.ENCLOSING_MARK) | 189 (1 << Character.COMBINING_SPACING_MARK) | 190 (1 << Character.DECIMAL_DIGIT_NUMBER) | 191 (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1) 192 != 0; 193 } 194 }; 195 196 private final static HashMap<String, String> posix = new HashMap<>(); 197 private final static HashMap<String, String> aliases = new HashMap<>(); 198 static { 199 posix.put("ALPHA", "ALPHABETIC"); 200 posix.put("LOWER", "LOWERCASE"); 201 posix.put("UPPER", "UPPERCASE"); 202 posix.put("SPACE", "WHITE_SPACE"); 203 posix.put("PUNCT", "PUNCTUATION"); 204 posix.put("XDIGIT","HEX_DIGIT"); 205 posix.put("ALNUM", "ALNUM"); 206 posix.put("CNTRL", "CONTROL"); 207 posix.put("DIGIT", "DIGIT"); 208 posix.put("BLANK", "BLANK"); 209 posix.put("GRAPH", "GRAPH"); 210 posix.put("PRINT", "PRINT"); 211 212 aliases.put("WHITESPACE", "WHITE_SPACE"); 213 aliases.put("HEXDIGIT","HEX_DIGIT"); 214 aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT"); 215 } 216 217 public static UnicodeProp forName(String propName) { 218 propName = propName.toUpperCase(Locale.ENGLISH); 219 String alias = aliases.get(propName); 220 if (alias != null) 221 propName = alias; 222 try { 223 return valueOf (propName); 224 } catch (IllegalArgumentException x) {} 225 return null; 226 } 227 228 public static UnicodeProp forPOSIXName(String propName) { 229 propName = posix.get(propName.toUpperCase(Locale.ENGLISH)); 230 if (propName == null) 231 return null; 232 return valueOf (propName); 233 } 234 235 public abstract boolean is(int ch); 236 }