1 package build.tools.generatecharacter; 2 3 import java.util.regex.*; 4 import java.util.*; 5 import java.io.*; 6 7 public class CharacterScript { 8 9 // generate the code needed for j.l.C.UnicodeScript 10 static void fortest(String fmt, Object... o) { 11 //System.out.printf(fmt, o); 12 } 13 14 static void print(String fmt, Object... o) { 15 System.out.printf(fmt, o); 16 } 17 18 static void debug(String fmt, Object... o) { 19 //System.out.printf(fmt, o); 20 } 101 Arrays.sort(scripts, 0, scriptSize, 102 new Comparator<int[]>() { 103 public int compare(int[] a1, int[] a2) { 104 return a1[0] - a2[0]; 105 } 106 public boolean compare(Object obj) { 107 return obj == this; 108 } 109 }); 110 111 112 113 // Consolidation: there are lots of "reserved" code points 114 // embedded in those otherwise "sequential" blocks. 115 // To make the lookup table smaller, we combine those 116 // separated segments with the assumption that the lookup 117 // implementation checks 118 // Character.getType() != Character.UNASSIGNED 119 // first (return UNKNOWN for unassigned) 120 121 ArrayList<int[]> list = new ArrayList(); 122 list.add(scripts[0]); 123 124 int[] last = scripts[0]; 125 for (i = 1; i < scriptSize; i++) { 126 if (scripts[i][0] != (last[1] + 1)) { 127 128 boolean isNotUnassigned = false; 129 for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) { 130 if (Character.getType(cp) != Character.UNASSIGNED) { 131 isNotUnassigned = true; 132 debug("Warning: [%x] is ASSIGNED but in NON script%n", cp); 133 break; 134 } 135 } 136 if (isNotUnassigned) { 137 // surrogates only? 138 int[] a = new int[3]; 139 a[0] = last[1] + 1; 140 a[1] = scripts[i][0] - 1; 141 a[2] = -1; // unknown 142 list.add(a); 143 } else { 144 if (last[2] == scripts[i][2]) { 145 //combine 146 last[1] = scripts[i][1]; 147 continue; 148 } else { 149 // expand last 150 last[1] = scripts[i][0] - 1; 151 } 152 } 153 } 154 list.add(scripts[i]); 155 last = scripts[i]; 156 } 157 158 for (i = 0; i < list.size(); i++) { 159 int[] a = (int[])list.get(i); 160 String name = "UNKNOWN"; 161 if (a[2] != -1) 162 name = names[a[2]].toUpperCase(Locale.US); 163 debug("0x%05x, 0x%05x %s%n", a[0], a[1], name); 164 } 165 debug("--->total=%d%n", list.size()); 166 167 168 //////////////////OUTPUT////////////////////////////////// 169 print("public class Scripts {%n%n"); 170 print(" public static enum UnicodeScript {%n"); 171 for (i = 0; i < names.length; i++) { 172 print(" /**%n * Unicode script \"%s\".%n */%n", names[i]); 173 print(" %s,%n%n", names[i].toUpperCase(Locale.US)); 174 } 175 print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n"); 176 177 178 // lookup table 179 print(" private static final int[] scriptStarts = {%n"); | 1 /* 2 * Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.generatecharacter; 27 28 import java.util.regex.*; 29 import java.util.*; 30 import java.io.*; 31 32 public class CharacterScript { 33 34 // generate the code needed for j.l.C.UnicodeScript 35 static void fortest(String fmt, Object... o) { 36 //System.out.printf(fmt, o); 37 } 38 39 static void print(String fmt, Object... o) { 40 System.out.printf(fmt, o); 41 } 42 43 static void debug(String fmt, Object... o) { 44 //System.out.printf(fmt, o); 45 } 126 Arrays.sort(scripts, 0, scriptSize, 127 new Comparator<int[]>() { 128 public int compare(int[] a1, int[] a2) { 129 return a1[0] - a2[0]; 130 } 131 public boolean compare(Object obj) { 132 return obj == this; 133 } 134 }); 135 136 137 138 // Consolidation: there are lots of "reserved" code points 139 // embedded in those otherwise "sequential" blocks. 140 // To make the lookup table smaller, we combine those 141 // separated segments with the assumption that the lookup 142 // implementation checks 143 // Character.getType() != Character.UNASSIGNED 144 // first (return UNKNOWN for unassigned) 145 146 ArrayList<int[]> list = new ArrayList<>(); 147 list.add(scripts[0]); 148 149 int[] last = scripts[0]; 150 for (i = 1; i < scriptSize; i++) { 151 if (scripts[i][0] != (last[1] + 1)) { 152 153 boolean isNotUnassigned = false; 154 for (int cp = last[1] + 1; cp < scripts[i][0]; cp++) { 155 if (Character.getType(cp) != Character.UNASSIGNED) { 156 isNotUnassigned = true; 157 debug("Warning: [%x] is ASSIGNED but in NON script%n", cp); 158 break; 159 } 160 } 161 if (isNotUnassigned) { 162 // surrogates only? 163 int[] a = new int[3]; 164 a[0] = last[1] + 1; 165 a[1] = scripts[i][0] - 1; 166 a[2] = -1; // unknown 167 list.add(a); 168 } else { 169 if (last[2] == scripts[i][2]) { 170 //combine 171 last[1] = scripts[i][1]; 172 continue; 173 } else { 174 // expand last 175 last[1] = scripts[i][0] - 1; 176 } 177 } 178 } 179 list.add(scripts[i]); 180 last = scripts[i]; 181 } 182 183 for (i = 0; i < list.size(); i++) { 184 int[] a = list.get(i); 185 String name = "UNKNOWN"; 186 if (a[2] != -1) 187 name = names[a[2]].toUpperCase(Locale.US); 188 debug("0x%05x, 0x%05x %s%n", a[0], a[1], name); 189 } 190 debug("--->total=%d%n", list.size()); 191 192 193 //////////////////OUTPUT////////////////////////////////// 194 print("public class Scripts {%n%n"); 195 print(" public static enum UnicodeScript {%n"); 196 for (i = 0; i < names.length; i++) { 197 print(" /**%n * Unicode script \"%s\".%n */%n", names[i]); 198 print(" %s,%n%n", names[i].toUpperCase(Locale.US)); 199 } 200 print(" /**%n * Unicode script \"Unknown\".%n */%n UNKNOWN;%n%n"); 201 202 203 // lookup table 204 print(" private static final int[] scriptStarts = {%n"); |