1 /* 2 * Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package org.openjdk.buildtools.generatecharacter; 27 28 import java.util.regex.*; 29 import java.util.*; 30 import java.io.*; 31 32 public class CharacterScript { 33 34 // generate the code needed for j.l.C.UnicodeScript 35 static void fortest(String fmt, Object... o) { 36 //System.out.printf(fmt, o); 37 } 38 39 static void print(String fmt, Object... o) { 40 System.out.printf(fmt, o); 41 } 42 43 static void debug(String fmt, Object... o) { 44 //System.out.printf(fmt, o); 45 } 46 47 public static void main(String args[]){ 48 try { 49 if (args.length != 1) { 50 System.out.println("java CharacterScript script.txt out"); 51 System.exit(1); 52 } 53 54 int i, j; 55 BufferedReader sbfr = new BufferedReader(new FileReader(args[0])); 56 HashMap<String,Integer> scriptMap = new HashMap<String,Integer>(); 57 String line = null; 58 59 Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher(""); 60 61 int prevS = -1; 62 int prevE = -1; 63 String prevN = null; 64 int[][] scripts = new int[1024][3]; 65 int scriptSize = 0; 66 67 while ((line = sbfr.readLine()) != null) { 68 if (line.length() <= 1 || line.charAt(0) == '#') { 69 continue; 70 } 71 m.reset(line); 72 if (m.matches()) { 73 int start = Integer.parseInt(m.group(1), 16); 74 int end = (m.group(2)==null)?start 75 :Integer.parseInt(m.group(2), 16); 76 String name = m.group(3); 77 if (name.equals(prevN) && start == prevE + 1) { 78 prevE = end; 79 } else { 80 if (prevS != -1) { 81 if (scriptMap.get(prevN) == null) { 82 scriptMap.put(prevN, scriptMap.size()); 83 } 84 scripts[scriptSize][0] = prevS; 85 scripts[scriptSize][1] = prevE; 86 scripts[scriptSize][2] = scriptMap.get(prevN); 87 scriptSize++; 88 } 89 debug("%x-%x\t%s%n", prevS, prevE, prevN); 90 prevS = start; prevE = end; prevN = name; 91 } 92 } else { 93 debug("Warning: Unrecognized line <%s>%n", line); 94 } 95 } 96 97 //last one. 98 if (scriptMap.get(prevN) == null) { 99 scriptMap.put(prevN, scriptMap.size()); 100 } 101 scripts[scriptSize][0] = prevS; 102 scripts[scriptSize][1] = prevE; 103 scripts[scriptSize][2] = scriptMap.get(prevN); 104 scriptSize++; 105 106 debug("%x-%x\t%s%n", prevS, prevE, prevN); 107 debug("-----------------%n"); 108 debug("Total scripts=%s%n", scriptMap.size()); 109 debug("-----------------%n%n"); 110 111 String[] names = new String[scriptMap.size()]; 112 for (String name: scriptMap.keySet()) { 113 names[scriptMap.get(name).intValue()] = name; 114 } 115 116 for (j = 0; j < scriptSize; j++) { 117 for (int cp = scripts[j][0]; cp <= scripts[j][1]; cp++) { 118 String name = names[scripts[j][2]].toUpperCase(Locale.ENGLISH);; 119 if (cp > 0xffff) 120 System.out.printf("%05X %s%n", cp, name); 121 else 122 System.out.printf("%05X %s%n", cp, name); 123 } 124 } 125 126 Arrays.sort(scripts, 0, scriptSize, 127 new Comparator<int[]>() { 128 public int compare(int[] a1, int[] a2) { 129 return a1[0] - a2[0]; 130 } 131 public boolean compare(Object obj) { 132 return obj == this; 133 } 134 }); 135 136 137 138 // Consolidation: there are lots of "reserved" code points 139 // embedded in those otherwise "sequential" blocks. 140 // To make the lookup table smaller, we combine those 141 // separated segments with the assumption that the lookup 142 // implementation checks 143 // Character.getType() != Character.UNASSIGNED 144 // first (return UNKNOWN for unassigned) 145 146 ArrayList<int[]> list = new ArrayList<>(); 147 list.add(scripts[0]); 148 149 int[] last = scripts[0]; 150 for (i = 1; i < scriptSize; i++) { 151 if (scripts[i][0] != (last[1] + 1)) { 152 153 boolean isNotUnassigned = false; 154 for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) { 155 if (Character.getType(cp) != Character.UNASSIGNED) { 156 isNotUnassigned = true; 157 debug("Warning: [%x] is ASSIGNED but in NON script%n", cp); 158 break; 159 } 160 } 161 if (isNotUnassigned) { 162 // surrogates only? 163 int[] a = new int[3]; 164 a[0] = last[1] + 1; 165 a[1] = scripts[i][0] - 1; 166 a[2] = -1; // unknown 167 list.add(a); 168 } else { 169 if (last[2] == scripts[i][2]) { 170 //combine 171 last[1] = scripts[i][1]; 172 continue; 173 } else { 174 // expand last 175 last[1] = scripts[i][0] - 1; 176 } 177 } 178 } 179 list.add(scripts[i]); 180 last = scripts[i]; 181 } 182 183 for (i = 0; i < list.size(); i++) { 184 int[] a = list.get(i); 185 String name = "UNKNOWN"; 186 if (a[2] != -1) 187 name = names[a[2]].toUpperCase(Locale.US); 188 debug("0x%05x, 0x%05x %s%n", a[0], a[1], name); 189 } 190 debug("--->total=%d%n", list.size()); 191 192 193 //////////////////OUTPUT////////////////////////////////// 194 print("public class Scripts {%n%n"); 195 print(" public static enum UnicodeScript {%n"); 196 for (i = 0; i < names.length; i++) { 197 print(" /**%n * Unicode script \"%s\".%n */%n", names[i]); 198 print(" %s,%n%n", names[i].toUpperCase(Locale.US)); 199 } 200 print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n"); 201 202 203 // lookup table 204 print(" private static final int[] scriptStarts = {%n"); 205 for (int[] a : list) { 206 String name = "UNKNOWN"; 207 if (a[2] != -1) 208 name = names[a[2]].toUpperCase(Locale.US); 209 if (a[0] < 0x10000) 210 print(" 0x%04X, // %04X..%04X; %s%n", 211 a[0], a[0], a[1], name); 212 else 213 print(" 0x%05X, // %05X..%05X; %s%n", 214 a[0], a[0], a[1], name); 215 } 216 last = list.get(list.size() -1); 217 if (last[1] != Character.MAX_CODE_POINT) 218 print(" 0x%05X // %05X..%06X; %s%n", 219 last[1] + 1, last[1] + 1, Character.MAX_CODE_POINT, 220 "UNKNOWN"); 221 print("%n };%n%n"); 222 223 print(" private static final UnicodeScript[] scripts = {%n"); 224 for (int[] a : list) { 225 String name = "UNKNOWN"; 226 if (a[2] != -1) 227 name = names[a[2]].toUpperCase(Locale.US); 228 print(" %s,%n", name); 229 } 230 231 if (last[1] != Character.MAX_CODE_POINT) 232 print(" UNKNOWN%n"); 233 print(" };%n"); 234 print(" }%n"); 235 print("}%n"); 236 237 } catch (Exception e) { 238 e.printStackTrace(); 239 } 240 } 241 }