1 /*
   2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.generatecharacter;
  27 
  28 import java.io.*;
  29 import java.util.*;
  30 import java.lang.*;
  31 
  32 /**
  33  * SpecialCaseMap has the responsibility of storing the
  34  * 1:M, locale-sensitive, and context sensitive case mappings
  35  * that occur when uppercasing Unicode 4.0 characters. This class can
  36  * read and parse the SpecialCasing.txt file that contains those mappings.
  37  * <p>
  38  * A single SpecialCaseMap contains the mapping for one character.
  39  * <p>
  40  * @author John O'Conner
  41  */
  42 public class SpecialCaseMap implements Comparable<SpecialCaseMap> {
  43 
  44     SpecialCaseMap() {
  45         chSource = 0xFFFF;
  46     }
  47 
  48 
  49     /**
  50      * Read and parse a Unicode special case map file.
  51      *
  52      * @param file   a file specifying the Unicode special case mappings
  53      * @return  an array of SpecialCaseMap objects, one for each line of the
  54      *          special case map data file that could be successfully parsed
  55      */
  56 
  57     public static SpecialCaseMap[] readSpecFile(File file, int plane) throws FileNotFoundException {
  58         ArrayList<SpecialCaseMap> caseMaps = new ArrayList<>(150);
  59         int count = 0;
  60         BufferedReader f = new BufferedReader(new FileReader(file));
  61                 String line = null;
  62         loop:
  63         while(true) {
  64             try {
  65                 line = f.readLine();
  66             }
  67             catch (IOException e) { break loop; }
  68                 if (line == null) break loop;
  69                 SpecialCaseMap item = parse(line.trim());
  70                 if (item != null) {
  71                                 if(item.getCharSource() >> 16 < plane) continue;
  72                                 if(item.getCharSource() >> 16 > plane) break;
  73                                 caseMaps.add(item);
  74                 ++count;
  75             }
  76 
  77         }
  78         caseMaps.trimToSize();
  79         SpecialCaseMap[] result = new SpecialCaseMap[caseMaps.size()];
  80         caseMaps.toArray(result);
  81         Arrays.sort(result);
  82         return result;
  83 
  84     }
  85 
  86    /**
  87     * Given one line of a Unicode special casing data file as a String, parse the line
  88     * and return a SpecialCaseMap object that contains the case mapping.
  89     *
  90     * @param s a line of the Unicode special case map data file to be parsed
  91     * @return a SpecialCaseMap object, or null if the parsing process failed for some reason
  92     */
  93     public static SpecialCaseMap parse(String s) {
  94         SpecialCaseMap spec = null;
  95         String[] tokens = new String[REQUIRED_FIELDS];
  96         if ( s != null && s.length() != 0 && s.charAt(0) != '#') {
  97             try {
  98                 int x = 0, tokenStart = 0, tokenEnd = 0;
  99                 for (x=0; x<REQUIRED_FIELDS-1; x++) {
 100                     tokenEnd = s.indexOf(';', tokenStart);
 101                     tokens[x] = s.substring(tokenStart, tokenEnd);
 102                     tokenStart = tokenEnd+1;
 103                 }
 104                 tokens[x] = s.substring(tokenStart);
 105 
 106                 if(tokens[FIELD_CONDITIONS].indexOf(';') == -1) {
 107                     spec = new SpecialCaseMap();
 108                     spec.setCharSource(parseChar(tokens[FIELD_SOURCE]));
 109                     spec.setUpperCaseMap(parseCaseMap(tokens[FIELD_UPPERCASE]));
 110                     spec.setLowerCaseMap(parseCaseMap(tokens[FIELD_LOWERCASE]));
 111                     spec.setTitleCaseMap(parseCaseMap(tokens[FIELD_TITLECASE]));
 112                     spec.setLocale(parseLocale(tokens[FIELD_CONDITIONS]));
 113                     spec.setContext(parseContext(tokens[FIELD_CONDITIONS]));
 114                 }
 115             }
 116             catch(Exception e) {
 117                 spec = null;
 118                 System.out.println("Error parsing spec line.");
 119             }
 120         }
 121         return spec;
 122     }
 123 
 124     static int parseChar(String token) throws NumberFormatException {
 125         return Integer.parseInt(token, 16);
 126     }
 127 
 128     static char[] parseCaseMap(String token ) throws NumberFormatException {
 129         int pos = 0;
 130         StringBuffer buff = new StringBuffer();
 131         int start = 0, end = 0;
 132         while(pos < token.length() ){
 133             while(Character.isSpaceChar(token.charAt(pos++)));
 134             --pos;
 135             start = pos;
 136             while(pos < token.length() && !Character.isSpaceChar(token.charAt(pos))) pos++;
 137             end = pos;
 138             int ch = parseChar(token.substring(start,end));
 139                         if (ch > 0xFFFF) {
 140                                 buff.append(getHighSurrogate(ch));
 141                                 buff.append(getLowSurrogate(ch));
 142                         } else {
 143                                 buff.append((char)ch);
 144                         }
 145         }
 146         char[] map = new char[buff.length()];
 147         buff.getChars(0, buff.length(), map, 0);
 148         return map;
 149     }
 150 
 151     static Locale parseLocale(String token) {
 152         return null;
 153     }
 154 
 155     static String[] parseContext(String token) {
 156         return null;
 157     }
 158 
 159     static  int find(int ch, SpecialCaseMap[] map) {
 160         if ((map == null) || (map.length == 0)) {
 161             return -1;
 162         }
 163         int top, bottom, current;
 164         bottom = 0;
 165         top = map.length;
 166         current = top/2;
 167         // invariant: top > current >= bottom && ch >= map.chSource
 168         while (top - bottom > 1) {
 169             if (ch >= map[current].getCharSource()) {
 170                 bottom = current;
 171             } else {
 172                 top = current;
 173             }
 174             current = (top + bottom) / 2;
 175         }
 176         if (ch == map[current].getCharSource()) return current;
 177         else return -1;
 178     }
 179 
 180     /*
 181      * Extracts and returns the high surrogate value from a UTF-32 code point.
 182      * If argument is a BMP character, then it is converted to a char and returned;
 183      * otherwise the high surrogate value is extracted.
 184      * @param codePoint a UTF-32 codePoint with value greater than 0xFFFF.
 185      * @return the high surrogate value that helps create <code>codePoint</code>; else
 186      *         the char representation of <code>codePoint</code> if it is a BMP character.
 187      * @since 1.5
 188      */
 189     static char getHighSurrogate(int codePoint) {
 190         char high = (char)codePoint;
 191         if (codePoint > 0xFFFF) {
 192             high = (char)((codePoint - 0x10000)/0x0400 + 0xD800);
 193         }
 194         return high;
 195     }
 196 
 197 
 198     /*
 199      * Extracts and returns the low surrogate value from a UTF-32 code point.
 200      * If argument is a BMP character, then it is converted to a char and returned;
 201      * otherwise the high surrogate value is extracted.
 202      * @param codePoint a UTF-32 codePoint with value greater than 0xFFFF.
 203      * @return the low surrogate value that helps create <code>codePoint</code>; else
 204      *         the char representation of <code>codePoint</code> if it is a BMP character.
 205      * @since 1.5
 206      */
 207     static char getLowSurrogate(int codePoint) {
 208         char low = (char)codePoint;
 209         if(codePoint > 0xFFFF) {
 210                 low = (char)((codePoint - 0x10000)%0x0400 + 0xDC00);
 211         }
 212         return low;
 213     }
 214 
 215     static String hex6(int n) {
 216         String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();
 217         return "000000".substring(Math.min(6, str.length())) + str;
 218     }
 219 
 220     static String hex6(char[] map){
 221         StringBuffer buff = new StringBuffer();
 222         int x=0;
 223         buff.append(hex6(map[x++]));
 224         while(x<map.length) {
 225             buff.append(" " + hex6(map[x++]));
 226         }
 227         return buff.toString();
 228     }
 229 
 230     void setCharSource(int ch) {
 231         chSource = ch;
 232     }
 233 
 234     void setLowerCaseMap(char[] map) {
 235         lowerCaseMap = map;
 236     }
 237 
 238     void setUpperCaseMap(char[] map) {
 239         upperCaseMap = map;
 240     }
 241 
 242     void setTitleCaseMap(char[] map) {
 243         titleCaseMap = map;
 244     }
 245 
 246     void setLocale(Locale locale) {
 247         this.locale = locale;
 248     }
 249 
 250     void setContext(String[] context) {
 251         this.context = context;
 252     }
 253 
 254     public int getCharSource() {
 255         return chSource;
 256     }
 257 
 258     public char[] getLowerCaseMap() {
 259         return lowerCaseMap;
 260     }
 261 
 262     public char[] getUpperCaseMap() {
 263         return upperCaseMap;
 264     }
 265 
 266     public char[] getTitleCaseMap() {
 267         return titleCaseMap;
 268     }
 269 
 270     public Locale getLocale() {
 271         return locale;
 272     }
 273 
 274     public String[] getContext() {
 275         return context;
 276     }
 277 
 278 
 279     int chSource;
 280     Locale locale;
 281     char[] lowerCaseMap;
 282     char[] upperCaseMap;
 283     char[] titleCaseMap;
 284     String[] context;
 285 
 286     /**
 287      * Fields that can be found in the SpecialCasing.txt file.
 288      */
 289     static int REQUIRED_FIELDS = 5;
 290     static int FIELD_SOURCE = 0;
 291     static int FIELD_LOWERCASE = 1;
 292     static int FIELD_TITLECASE = 2;
 293     static int FIELD_UPPERCASE = 3;
 294     static int FIELD_CONDITIONS = 4;
 295 
 296     /**
 297      * Context values
 298      */
 299     static String CONTEXT_FINAL = "FINAL";
 300     static String CONTEXT_NONFINAL = "NON_FINAL";
 301     static String CONTEXT_MODERN = "MODERN";
 302     static String CONTEXT_NONMODERN = "NON_MODERN";
 303 
 304     public int compareTo(SpecialCaseMap otherObject) {
 305         if (chSource < otherObject.chSource) {
 306             return -1;
 307         }
 308         else if (chSource > otherObject.chSource) {
 309             return 1;
 310         }
 311         else return 0;
 312     }
 313 
 314     public boolean equals(Object o1) {
 315         if (this == o1) {
 316             return true;
 317         }
 318         if (o1 == null || !(o1 instanceof SpecialCaseMap)) {
 319             return false;
 320         }
 321         SpecialCaseMap other = (SpecialCaseMap)o1;
 322         boolean bEqual = false;
 323         if (0 == compareTo(other)) {
 324             bEqual = true;
 325         }
 326         return bEqual;
 327     }
 328 
 329     public String toString() {
 330         StringBuffer buff = new StringBuffer();
 331         buff.append(hex6(getCharSource()));
 332         buff.append("|" + hex6(lowerCaseMap));
 333         buff.append("|" + hex6(upperCaseMap));
 334         buff.append("|" + hex6(titleCaseMap));
 335         buff.append("|" + context);
 336         return buff.toString();
 337     }
 338 
 339     public int hashCode() {
 340         return chSource;
 341     }
 342 
 343     public static void main(String[] args) {
 344         SpecialCaseMap[] spec = null;
 345         if (args.length == 2 ) {
 346             try {
 347                 File file = new File(args[0]);
 348                 int plane = Integer.parseInt(args[1]);
 349                 spec = SpecialCaseMap.readSpecFile(file, plane);
 350                 System.out.println("SpecialCaseMap[" + spec.length + "]:");
 351                 for (int x=0; x<spec.length; x++) {
 352                     System.out.println(spec[x].toString());
 353                 }
 354             }
 355             catch(Exception e) {
 356                 e.printStackTrace();
 357             }
 358         }
 359 
 360     }
 361 
 362 }