1 /*
   2  * Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.generatecharacter;
  27 
  28 import java.util.regex.*;
  29 import java.util.*;
  30 import java.io.*;
  31 
  32 public class CharacterScript {
  33 
  34     // generate the code needed for j.l.C.UnicodeScript
  35     static void fortest(String fmt, Object... o) {
  36         //System.out.printf(fmt, o);
  37     }
  38 
  39     static void print(String fmt, Object... o) {
  40         System.out.printf(fmt, o);
  41     }
  42 
  43     static void debug(String fmt, Object... o) {
  44         //System.out.printf(fmt, o);
  45     }
  46 
  47     public static void main(String args[]){
  48         try {
  49             if (args.length != 1) {
  50                 System.out.println("java CharacterScript script.txt out");
  51                 System.exit(1);
  52             }
  53 
  54             int i, j;
  55             BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
  56             HashMap<String,Integer> scriptMap = new HashMap<String,Integer>();
  57             String line = null;
  58 
  59             Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
  60 
  61             int prevS = -1;
  62             int prevE = -1;
  63             String prevN = null;
  64             int[][] scripts = new int[1024][3];
  65             int scriptSize = 0;
  66 
  67             while ((line = sbfr.readLine()) != null) {
  68                 if (line.length() <= 1 || line.charAt(0) == '#') {
  69                     continue;
  70                 }
  71                 m.reset(line);
  72                 if (m.matches()) {
  73                     int start = Integer.parseInt(m.group(1), 16);
  74                     int end = (m.group(2)==null)?start
  75                               :Integer.parseInt(m.group(2), 16);
  76                     String name = m.group(3);
  77                     if (name.equals(prevN) && start == prevE + 1) {
  78                         prevE = end;
  79                     } else {
  80                         if (prevS != -1) {
  81                             if (scriptMap.get(prevN) == null) {
  82                                 scriptMap.put(prevN, scriptMap.size());
  83                             }
  84                             scripts[scriptSize][0] = prevS;
  85                             scripts[scriptSize][1] = prevE;
  86                             scripts[scriptSize][2] = scriptMap.get(prevN);
  87                             scriptSize++;
  88                         }
  89                         debug("%x-%x\t%s%n", prevS, prevE, prevN);
  90                         prevS = start; prevE = end; prevN = name;
  91                     }
  92                 } else {
  93                     debug("Warning: Unrecognized line <%s>%n", line);
  94                 }
  95             }
  96 
  97             //last one.
  98             if (scriptMap.get(prevN) == null) {
  99                 scriptMap.put(prevN, scriptMap.size());
 100             }
 101             scripts[scriptSize][0] = prevS;
 102             scripts[scriptSize][1] = prevE;
 103             scripts[scriptSize][2] = scriptMap.get(prevN);
 104             scriptSize++;
 105 
 106             debug("%x-%x\t%s%n", prevS, prevE, prevN);
 107             debug("-----------------%n");
 108             debug("Total scripts=%s%n", scriptMap.size());
 109             debug("-----------------%n%n");
 110 
 111             String[] names = new String[scriptMap.size()];
 112             for (String name: scriptMap.keySet()) {
 113                 names[scriptMap.get(name).intValue()] = name;
 114             }
 115 
 116             for (j = 0; j < scriptSize; j++) {
 117                 for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) {
 118                     String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);;
 119                     if (cp > 0xffff)
 120                         System.out.printf("%05X    %s%n", cp, name);
 121                     else
 122                         System.out.printf("%05X    %s%n", cp, name);
 123                 }
 124             }
 125 
 126             Arrays.sort(scripts, 0, scriptSize,
 127                         new Comparator<int[]>() {
 128                             public int compare(int[] a1, int[] a2) {
 129                                 return a1[0] - a2[0];
 130                             }
 131                             public boolean compare(Object obj) {
 132                                 return obj == this;
 133                             }
 134                          });
 135 
 136 
 137 
 138             // Consolidation: there are lots of "reserved" code points
 139             // embedded in those otherwise "sequential" blocks.
 140             // To make the lookup table smaller, we combine those
 141             // separated segments with the assumption that the lookup
 142             // implementation checks
 143             //    Character.getType() !=  Character.UNASSIGNED
 144             // first (return UNKNOWN for unassigned)
 145 
 146             ArrayList<int[]> list = new ArrayList<>();
 147             list.add(scripts[0]);
 148 
 149             int[] last = scripts[0];
 150             for (i = 1; i < scriptSize; i++) {
 151                 if (scripts[i][0] != (last[1] + 1)) {
 152 
 153                     boolean isNotUnassigned = false;
 154                     for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) {
 155                         if (Character.getType(cp) != Character.UNASSIGNED) {
 156                             isNotUnassigned = true;
 157                             debug("Warning: [%x] is ASSIGNED but in NON script%n", cp);
 158                             break;
 159                         }
 160                     }
 161                     if (isNotUnassigned) {
 162                         // surrogates only?
 163                         int[] a = new int[3];
 164                         a[0] = last[1] + 1;
 165                         a[1] = scripts[i][0] - 1;
 166                         a[2] = -1;  // unknown
 167                         list.add(a);
 168                     } else {
 169                         if (last[2] == scripts[i][2]) {
 170                             //combine
 171                             last[1] = scripts[i][1];
 172                             continue;
 173                         } else {
 174                             // expand last
 175                             last[1] = scripts[i][0] - 1;
 176                         }
 177                     }
 178                 }
 179                 list.add(scripts[i]);
 180                 last = scripts[i];
 181             }
 182 
 183             for (i = 0; i < list.size(); i++) {
 184                 int[] a = list.get(i);
 185                 String name = "UNKNOWN";
 186                 if (a[2] != -1)
 187                     name = names[a[2]].toUpperCase(Locale.US);
 188                 debug("0x%05x, 0x%05x  %s%n", a[0], a[1], name);
 189             }
 190             debug("--->total=%d%n", list.size());
 191 
 192 
 193             //////////////////OUTPUT//////////////////////////////////
 194             print("public class Scripts {%n%n");
 195             print("    public static enum UnicodeScript {%n");
 196             for (i = 0; i < names.length; i++) {
 197                 print("        /**%n         * Unicode script \"%s\".%n         */%n", names[i]);
 198                 print("        %s,%n%n",  names[i].toUpperCase(Locale.US));
 199             }
 200             print("        /**%n         * Unicode script \"Unknown\".%n         */%n        UNKNOWN;%n%n");
 201 
 202 
 203             // lookup table
 204             print("        private static final int[] scriptStarts = {%n");
 205             for (int[] a : list) {
 206                 String name = "UNKNOWN";
 207                 if (a[2] != -1)
 208                     name = names[a[2]].toUpperCase(Locale.US);
 209                 if (a[0] < 0x10000)
 210                     print("            0x%04X,   // %04X..%04X; %s%n",
 211                           a[0], a[0], a[1], name);
 212                 else
 213                     print("            0x%05X,  // %05X..%05X; %s%n",
 214                           a[0], a[0], a[1], name);
 215             }
 216             last = list.get(list.size() -1);
 217             if (last[1] != Character.MAX_CODE_POINT)
 218                 print("            0x%05X   // %05X..%06X; %s%n",
 219                       last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT,
 220                       "UNKNOWN");
 221             print("%n        };%n%n");
 222 
 223             print("        private static final UnicodeScript[] scripts = {%n");
 224             for (int[] a : list) {
 225                 String name = "UNKNOWN";
 226                 if (a[2] != -1)
 227                     name = names[a[2]].toUpperCase(Locale.US);
 228                 print("            %s,%n", name);
 229             }
 230 
 231             if (last[1] != Character.MAX_CODE_POINT)
 232                 print("            UNKNOWN%n");
 233             print("        };%n");
 234             print("    }%n");
 235             print("}%n");
 236 
 237         } catch (Exception e) {
 238             e.printStackTrace();
 239         }
 240     }
 241 }