1 /* 2 * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.charsetmapping; 27 28 import java.io.*; 29 import java.util.Arrays; 30 import java.util.ArrayList; 31 import java.util.Scanner; 32 import java.util.Formatter; 33 import java.util.regex.Pattern; 34 import static build.tools.charsetmapping.Utils.*; 35 36 public class SBCS { 37 38 static Pattern sbmap = Pattern.compile("0x(\\p{XDigit}++)\\s++(?:U\\+|0x)?(\\p{XDigit}++)(?:\\s++#.*)?"); 39 40 public static void genClass(Charset cs, 41 String srcDir, String dstDir, String template) 42 throws Exception 43 { 44 String clzName = cs.clzName; 45 String csName = cs.csName; 46 String hisName = cs.hisName; 47 String pkgName = cs.pkgName; 48 boolean isASCII = cs.isASCII; 49 boolean isAlwaysCompactable = true; 50 51 StringBuilder b2cSB = new StringBuilder(); 52 StringBuilder b2cNRSB = new StringBuilder(); 53 StringBuilder c2bNRSB = new StringBuilder(); 54 55 char[] sb = new char[0x100]; 56 char[] c2bIndex = new char[0x100]; 57 int c2bOff = 0; 58 Arrays.fill(sb, UNMAPPABLE_DECODING); 59 Arrays.fill(c2bIndex, UNMAPPABLE_DECODING); 60 61 // (1)read in .map to parse all b->c entries 62 FileInputStream in = new FileInputStream( 63 new File(srcDir, clzName + ".map")); 64 Parser p = new Parser(in, sbmap); 65 Entry e = null; 66 67 while ((e = p.next()) != null) { 68 sb[e.bs] = (char)e.cp; 69 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) { 70 c2bOff += 0x100; 71 c2bIndex[e.cp>>8] = 1; 72 } 73 if (e.cp > 0xFF) { 74 isAlwaysCompactable = false; 75 } 76 } 77 78 Formatter fm = new Formatter(b2cSB); 79 fm.format("%n"); 80 81 // vm -server shows cc[byte + 128] access is much faster than 82 // cc[byte&0xff] so we output the upper segment first 83 toString(sb, 0x80, 0x100, fm, "+", true); 84 toString(sb, 0x00, 0x80, fm, ";", true); 85 fm.close(); 86 87 // (2)now the .nr file which includes "b->c" non-roundtrip entries 88 File f = new File(srcDir, clzName + ".nr"); 89 if (f.exists()) { 90 in = new FileInputStream(f); 91 fm = new Formatter(b2cNRSB); 92 p = new Parser(in, sbmap); 93 e = null; 94 95 fm.format("// remove non-roundtrip entries%n"); 96 fm.format(" b2cMap = b2cTable.toCharArray();%n"); 97 while ((e = p.next()) != null) { 98 fm.format(" b2cMap[%d] = UNMAPPABLE_DECODING;%n", 99 (e.bs>=0x80)?(e.bs-0x80):(e.bs+0x80)); 100 } 101 fm.close(); 102 } 103 104 // (3)finally the .c2b file which includes c->b non-roundtrip entries 105 f = new File(srcDir, clzName + ".c2b"); 106 if (f.exists()) { 107 in = new FileInputStream(f); 108 fm = new Formatter(c2bNRSB); 109 p = new Parser(in, sbmap); 110 e = null; 111 ArrayList<Entry> es = new ArrayList<Entry>(); 112 while ((e = p.next()) != null) { 113 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) { 114 c2bOff += 0x100; 115 c2bIndex[e.cp>>8] = 1; 116 } 117 es.add(e); 118 } 119 fm.format("// non-roundtrip c2b only entries%n"); 120 if (es.size() < 100) { 121 fm.format(" c2bNR = new char[%d];%n", es.size() * 2); 122 int i = 0; 123 for (Entry entry: es) { 124 fm.format(" c2bNR[%d] = 0x%x; c2bNR[%d] = 0x%x;%n", 125 i++, entry.bs, i++, entry.cp); 126 } 127 } else { 128 char[] cc = new char[es.size() * 2]; 129 int i = 0; 130 for (Entry entry: es) { 131 cc[i++] = (char)entry.bs; 132 cc[i++] = (char)entry.cp; 133 } 134 fm.format(" c2bNR = (%n"); 135 toString(cc, 0, i, fm, ").toCharArray();", false); 136 } 137 fm.close(); 138 } 139 140 // (4)it's time to generate the source file 141 String b2c = b2cSB.toString(); 142 String b2cNR = b2cNRSB.toString(); 143 String c2bNR = c2bNRSB.toString(); 144 145 Scanner s = new Scanner(new File(srcDir, template)); 146 PrintStream out = new PrintStream(new FileOutputStream( 147 new File(dstDir, clzName + ".java"))); 148 149 while (s.hasNextLine()) { 150 String line = s.nextLine(); 151 int i = line.indexOf("$"); 152 if (i == -1) { 153 out.println(line); 154 continue; 155 } 156 if (line.indexOf("$PACKAGE$", i) != -1) { 157 line = line.replace("$PACKAGE$", pkgName); 158 } 159 if (line.indexOf("$NAME_CLZ$", i) != -1) { 160 line = line.replace("$NAME_CLZ$", clzName); 161 } 162 if (line.indexOf("$NAME_CS$", i) != -1) { 163 line = line.replace("$NAME_CS$", csName); 164 } 165 if (line.indexOf("$NAME_ALIASES$", i) != -1) { 166 if ("sun.nio.cs".equals(pkgName)) 167 line = line.replace("$NAME_ALIASES$", 168 "StandardCharsets.aliases_" + clzName + "()"); 169 else 170 line = line.replace("$NAME_ALIASES$", 171 "ExtendedCharsets.aliasesFor(\"" + csName + "\")"); 172 } 173 if (line.indexOf("$NAME_HIS$", i) != -1) { 174 line = line.replace("$NAME_HIS$", hisName); 175 } 176 if (line.indexOf("$CONTAINS$", i) != -1) { 177 if (isASCII) 178 line = " return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));"; 179 else 180 line = " return (cs instanceof " + clzName + ");"; 181 } 182 if (line.indexOf("$ASCIICOMPATIBLE$") != -1) { 183 line = line.replace("$ASCIICOMPATIBLE$", isASCII ? "true" : "false"); 184 } 185 if (line.indexOf("$ALWAYSCOMPACTABLE$") != -1) { 186 line = line.replace("$ALWAYSCOMPACTABLE$", isAlwaysCompactable ? "true" : "false"); 187 } 188 if (line.indexOf("$B2CTABLE$") != -1) { 189 line = line.replace("$B2CTABLE$", b2c); 190 } 191 if (line.indexOf("$C2BLENGTH$") != -1) { 192 line = line.replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16)); 193 } 194 if (line.indexOf("$NONROUNDTRIP_B2C$") != -1) { 195 if (b2cNR.length() == 0) 196 continue; 197 line = line.replace("$NONROUNDTRIP_B2C$", b2cNR); 198 } 199 200 if (line.indexOf("$NONROUNDTRIP_C2B$") != -1) { 201 if (c2bNR.length() == 0) 202 continue; 203 line = line.replace("$NONROUNDTRIP_C2B$", c2bNR); 204 } 205 out.println(line); 206 } 207 out.close(); 208 } 209 210 private static void toString(char[] sb, int off, int end, 211 Formatter out, String closure, boolean comment) 212 { 213 while (off < end) { 214 out.format(" \""); 215 for (int j = 0; j < 8; j++) { 216 if (off == end) 217 break; 218 char c = sb[off++]; 219 switch (c) { 220 case '\b': 221 out.format("\\b"); break; 222 case '\t': 223 out.format("\\t"); break; 224 case '\n': 225 out.format("\\n"); break; 226 case '\f': 227 out.format("\\f"); break; 228 case '\r': 229 out.format("\\r"); break; 230 case '\"': 231 out.format("\\\""); break; 232 case '\'': 233 out.format("\\'"); break; 234 case '\\': 235 out.format("\\\\"); break; 236 default: 237 out.format("\\u%04X", c & 0xffff); 238 } 239 } 240 if (comment) { 241 if (off == end) 242 out.format("\" %s // 0x%02x - 0x%02x%n", 243 closure, off-8, off-1); 244 else 245 out.format("\" + // 0x%02x - 0x%02x%n", 246 off-8, off-1); 247 } else { 248 if (off == end) 249 out.format("\"%s%n", closure); 250 else 251 out.format("\" +%n"); 252 } 253 } 254 } 255 }