1 /* 2 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.generatecharacter; 27 28 import java.io.*; 29 import java.util.*; 30 import java.lang.*; 31 32 /** 33 * SpecialCaseMap has the responsibility of storing the 34 * 1:M, locale-sensitive, and context sensitive case mappings 35 * that occur when uppercasing Unicode 4.0 characters. This class can 36 * read and parse the SpecialCasing.txt file that contains those mappings. 37 * <p> 38 * A single SpecialCaseMap contains the mapping for one character. 39 * <p> 40 * @author John O'Conner 41 */ 42 public class SpecialCaseMap implements Comparable<SpecialCaseMap> { 43 44 SpecialCaseMap() { 45 chSource = 0xFFFF; 46 } 47 48 49 /** 50 * Read and parse a Unicode special case map file. 51 * 52 * @param file a file specifying the Unicode special case mappings 53 * @return an array of SpecialCaseMap objects, one for each line of the 54 * special case map data file that could be successfully parsed 55 */ 56 57 public static SpecialCaseMap[] readSpecFile(File file, int plane) throws FileNotFoundException { 58 ArrayList<SpecialCaseMap> caseMaps = new ArrayList<>(150); 59 int count = 0; 60 BufferedReader f = new BufferedReader(new FileReader(file)); 61 String line = null; 62 loop: 63 while(true) { 64 try { 65 line = f.readLine(); 66 } 67 catch (IOException e) { break loop; } 68 if (line == null) break loop; 69 SpecialCaseMap item = parse(line.trim()); 70 if (item != null) { 71 if(item.getCharSource() >> 16 < plane) continue; 72 if(item.getCharSource() >> 16 > plane) break; 73 caseMaps.add(item); 74 ++count; 75 } 76 77 } 78 caseMaps.trimToSize(); 79 SpecialCaseMap[] result = new SpecialCaseMap[caseMaps.size()]; 80 caseMaps.toArray(result); 81 Arrays.sort(result); 82 return result; 83 84 } 85 86 /** 87 * Given one line of a Unicode special casing data file as a String, parse the line 88 * and return a SpecialCaseMap object that contains the case mapping. 89 * 90 * @param s a line of the Unicode special case map data file to be parsed 91 * @return a SpecialCaseMap object, or null if the parsing process failed for some reason 92 */ 93 public static SpecialCaseMap parse(String s) { 94 SpecialCaseMap spec = null; 95 String[] tokens = new String[REQUIRED_FIELDS]; 96 if ( s != null && s.length() != 0 && s.charAt(0) != '#') { 97 try { 98 int x = 0, tokenStart = 0, tokenEnd = 0; 99 for (x=0; x<REQUIRED_FIELDS-1; x++) { 100 tokenEnd = s.indexOf(';', tokenStart); 101 tokens[x] = s.substring(tokenStart, tokenEnd); 102 tokenStart = tokenEnd+1; 103 } 104 tokens[x] = s.substring(tokenStart); 105 106 if(tokens[FIELD_CONDITIONS].indexOf(';') == -1) { 107 spec = new SpecialCaseMap(); 108 spec.setCharSource(parseChar(tokens[FIELD_SOURCE])); 109 spec.setUpperCaseMap(parseCaseMap(tokens[FIELD_UPPERCASE])); 110 spec.setLowerCaseMap(parseCaseMap(tokens[FIELD_LOWERCASE])); 111 spec.setTitleCaseMap(parseCaseMap(tokens[FIELD_TITLECASE])); 112 spec.setLocale(parseLocale(tokens[FIELD_CONDITIONS])); 113 spec.setContext(parseContext(tokens[FIELD_CONDITIONS])); 114 } 115 } 116 catch(Exception e) { 117 spec = null; 118 System.out.println("Error parsing spec line."); 119 } 120 } 121 return spec; 122 } 123 124 static int parseChar(String token) throws NumberFormatException { 125 return Integer.parseInt(token, 16); 126 } 127 128 static char[] parseCaseMap(String token ) throws NumberFormatException { 129 int pos = 0; 130 StringBuffer buff = new StringBuffer(); 131 int start = 0, end = 0; 132 while(pos < token.length() ){ 133 while(Character.isSpaceChar(token.charAt(pos++))); 134 --pos; 135 start = pos; 136 while(pos < token.length() && !Character.isSpaceChar(token.charAt(pos))) pos++; 137 end = pos; 138 int ch = parseChar(token.substring(start,end)); 139 if (ch > 0xFFFF) { 140 buff.append(getHighSurrogate(ch)); 141 buff.append(getLowSurrogate(ch)); 142 } else { 143 buff.append((char)ch); 144 } 145 } 146 char[] map = new char[buff.length()]; 147 buff.getChars(0, buff.length(), map, 0); 148 return map; 149 } 150 151 static Locale parseLocale(String token) { 152 return null; 153 } 154 155 static String[] parseContext(String token) { 156 return null; 157 } 158 159 static int find(int ch, SpecialCaseMap[] map) { 160 if ((map == null) || (map.length == 0)) { 161 return -1; 162 } 163 int top, bottom, current; 164 bottom = 0; 165 top = map.length; 166 current = top/2; 167 // invariant: top > current >= bottom && ch >= map.chSource 168 while (top - bottom > 1) { 169 if (ch >= map[current].getCharSource()) { 170 bottom = current; 171 } else { 172 top = current; 173 } 174 current = (top + bottom) / 2; 175 } 176 if (ch == map[current].getCharSource()) return current; 177 else return -1; 178 } 179 180 /* 181 * Extracts and returns the high surrogate value from a UTF-32 code point. 182 * If argument is a BMP character, then it is converted to a char and returned; 183 * otherwise the high surrogate value is extracted. 184 * @param codePoint a UTF-32 codePoint with value greater than 0xFFFF. 185 * @return the high surrogate value that helps create <code>codePoint</code>; else 186 * the char representation of <code>codePoint</code> if it is a BMP character. 187 * @since 1.5 188 */ 189 static char getHighSurrogate(int codePoint) { 190 char high = (char)codePoint; 191 if (codePoint > 0xFFFF) { 192 high = (char)((codePoint - 0x10000)/0x0400 + 0xD800); 193 } 194 return high; 195 } 196 197 198 /* 199 * Extracts and returns the low surrogate value from a UTF-32 code point. 200 * If argument is a BMP character, then it is converted to a char and returned; 201 * otherwise the high surrogate value is extracted. 202 * @param codePoint a UTF-32 codePoint with value greater than 0xFFFF. 203 * @return the low surrogate value that helps create <code>codePoint</code>; else 204 * the char representation of <code>codePoint</code> if it is a BMP character. 205 * @since 1.5 206 */ 207 static char getLowSurrogate(int codePoint) { 208 char low = (char)codePoint; 209 if(codePoint > 0xFFFF) { 210 low = (char)((codePoint - 0x10000)%0x0400 + 0xDC00); 211 } 212 return low; 213 } 214 215 static String hex6(int n) { 216 String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase(); 217 return "000000".substring(Math.min(6, str.length())) + str; 218 } 219 220 static String hex6(char[] map){ 221 StringBuffer buff = new StringBuffer(); 222 int x=0; 223 buff.append(hex6(map[x++])); 224 while(x<map.length) { 225 buff.append(" " + hex6(map[x++])); 226 } 227 return buff.toString(); 228 } 229 230 void setCharSource(int ch) { 231 chSource = ch; 232 } 233 234 void setLowerCaseMap(char[] map) { 235 lowerCaseMap = map; 236 } 237 238 void setUpperCaseMap(char[] map) { 239 upperCaseMap = map; 240 } 241 242 void setTitleCaseMap(char[] map) { 243 titleCaseMap = map; 244 } 245 246 void setLocale(Locale locale) { 247 this.locale = locale; 248 } 249 250 void setContext(String[] context) { 251 this.context = context; 252 } 253 254 public int getCharSource() { 255 return chSource; 256 } 257 258 public char[] getLowerCaseMap() { 259 return lowerCaseMap; 260 } 261 262 public char[] getUpperCaseMap() { 263 return upperCaseMap; 264 } 265 266 public char[] getTitleCaseMap() { 267 return titleCaseMap; 268 } 269 270 public Locale getLocale() { 271 return locale; 272 } 273 274 public String[] getContext() { 275 return context; 276 } 277 278 279 int chSource; 280 Locale locale; 281 char[] lowerCaseMap; 282 char[] upperCaseMap; 283 char[] titleCaseMap; 284 String[] context; 285 286 /** 287 * Fields that can be found in the SpecialCasing.txt file. 288 */ 289 static int REQUIRED_FIELDS = 5; 290 static int FIELD_SOURCE = 0; 291 static int FIELD_LOWERCASE = 1; 292 static int FIELD_TITLECASE = 2; 293 static int FIELD_UPPERCASE = 3; 294 static int FIELD_CONDITIONS = 4; 295 296 /** 297 * Context values 298 */ 299 static String CONTEXT_FINAL = "FINAL"; 300 static String CONTEXT_NONFINAL = "NON_FINAL"; 301 static String CONTEXT_MODERN = "MODERN"; 302 static String CONTEXT_NONMODERN = "NON_MODERN"; 303 304 public int compareTo(SpecialCaseMap otherObject) { 305 if (chSource < otherObject.chSource) { 306 return -1; 307 } 308 else if (chSource > otherObject.chSource) { 309 return 1; 310 } 311 else return 0; 312 } 313 314 public boolean equals(Object o1) { 315 if (this == o1) { 316 return true; 317 } 318 if (o1 == null || !(o1 instanceof SpecialCaseMap)) { 319 return false; 320 } 321 SpecialCaseMap other = (SpecialCaseMap)o1; 322 boolean bEqual = false; 323 if (0 == compareTo(other)) { 324 bEqual = true; 325 } 326 return bEqual; 327 } 328 329 public String toString() { 330 StringBuffer buff = new StringBuffer(); 331 buff.append(hex6(getCharSource())); 332 buff.append("|" + hex6(lowerCaseMap)); 333 buff.append("|" + hex6(upperCaseMap)); 334 buff.append("|" + hex6(titleCaseMap)); 335 buff.append("|" + context); 336 return buff.toString(); 337 } 338 339 public int hashCode() { 340 return chSource; 341 } 342 343 public static void main(String[] args) { 344 SpecialCaseMap[] spec = null; 345 if (args.length == 2 ) { 346 try { 347 File file = new File(args[0]); 348 int plane = Integer.parseInt(args[1]); 349 spec = SpecialCaseMap.readSpecFile(file, plane); 350 System.out.println("SpecialCaseMap[" + spec.length + "]:"); 351 for (int x=0; x<spec.length; x++) { 352 System.out.println(spec[x].toString()); 353 } 354 } 355 catch(Exception e) { 356 e.printStackTrace(); 357 } 358 } 359 360 } 361 362 }