1 /* 2 * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.charsetmapping; 27 28 import java.io.*; 29 import java.util.Arrays; 30 import java.util.ArrayList; 31 import java.util.Scanner; 32 import java.util.Formatter; 33 import java.util.regex.Pattern; 34 import static build.tools.charsetmapping.Utils.*; 35 36 public class SBCS { 37 38 static Pattern sbmap = Pattern.compile("0x(\\p{XDigit}++)\\s++(?:U\\+|0x)?(\\p{XDigit}++)(?:\\s++#.*)?"); 39 40 public static void genClass(Charset cs, 41 String srcDir, String dstDir, String template) 42 throws Exception 43 { 44 String clzName = cs.clzName; 45 String csName = cs.csName; 46 String hisName = cs.hisName; 47 String pkgName = cs.pkgName; 48 boolean isASCII = cs.isASCII; 49 50 StringBuilder b2cSB = new StringBuilder(); 51 StringBuilder b2cNRSB = new StringBuilder(); 52 StringBuilder c2bNRSB = new StringBuilder(); 53 54 char[] sb = new char[0x100]; 55 char[] c2bIndex = new char[0x100]; 56 int c2bOff = 0; 57 Arrays.fill(sb, UNMAPPABLE_DECODING); 58 Arrays.fill(c2bIndex, UNMAPPABLE_DECODING); 59 60 // (1)read in .map to parse all b->c entries 61 FileInputStream in = new FileInputStream( 62 new File(srcDir, clzName + ".map")); 63 Parser p = new Parser(in, sbmap); 64 Entry e = null; 65 66 while ((e = p.next()) != null) { 67 sb[e.bs] = (char)e.cp; 68 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) { 69 c2bOff += 0x100; 70 c2bIndex[e.cp>>8] = 1; 71 } 72 } 73 74 Formatter fm = new Formatter(b2cSB); 75 fm.format("%n"); 76 77 // vm -server shows cc[byte + 128] access is much faster than 78 // cc[byte&0xff] so we output the upper segment first 79 toString(sb, 0x80, 0x100, fm, "+", true); 80 toString(sb, 0x00, 0x80, fm, ";", true); 81 fm.close(); 82 83 // (2)now the .nr file which includes "b->c" non-roundtrip entries 84 File f = new File(srcDir, clzName + ".nr"); 85 if (f.exists()) { 86 in = new FileInputStream(f); 87 fm = new Formatter(b2cNRSB); 88 p = new Parser(in, sbmap); 89 e = null; 90 91 fm.format("// remove non-roundtrip entries%n"); 92 fm.format(" b2cMap = b2cTable.toCharArray();%n"); 93 while ((e = p.next()) != null) { 94 fm.format(" b2cMap[%d] = UNMAPPABLE_DECODING;%n", 95 (e.bs>=0x80)?(e.bs-0x80):(e.bs+0x80)); 96 } 97 fm.close(); 98 } 99 100 // (3)finally the .c2b file which includes c->b non-roundtrip entries 101 f = new File(srcDir, clzName + ".c2b"); 102 if (f.exists()) { 103 in = new FileInputStream(f); 104 fm = new Formatter(c2bNRSB); 105 p = new Parser(in, sbmap); 106 e = null; 107 ArrayList<Entry> es = new ArrayList<Entry>(); 108 while ((e = p.next()) != null) { 109 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) { 110 c2bOff += 0x100; 111 c2bIndex[e.cp>>8] = 1; 112 } 113 es.add(e); 114 } 115 fm.format("// non-roundtrip c2b only entries%n"); 116 if (es.size() < 100) { 117 fm.format(" c2bNR = new char[%d];%n", es.size() * 2); 118 int i = 0; 119 for (Entry entry: es) { 120 fm.format(" c2bNR[%d] = 0x%x; c2bNR[%d] = 0x%x;%n", 121 i++, entry.bs, i++, entry.cp); 122 } 123 } else { 124 char[] cc = new char[es.size() * 2]; 125 int i = 0; 126 for (Entry entry: es) { 127 cc[i++] = (char)entry.bs; 128 cc[i++] = (char)entry.cp; 129 } 130 fm.format(" c2bNR = (%n"); 131 toString(cc, 0, i, fm, ").toCharArray();", false); 132 } 133 fm.close(); 134 } 135 136 // (4)it's time to generate the source file 137 String b2c = b2cSB.toString(); 138 String b2cNR = b2cNRSB.toString(); 139 String c2bNR = c2bNRSB.toString(); 140 141 Scanner s = new Scanner(new File(srcDir, template)); 142 PrintStream out = new PrintStream(new FileOutputStream( 143 new File(dstDir, clzName + ".java"))); 144 145 while (s.hasNextLine()) { 146 String line = s.nextLine(); 147 int i = line.indexOf("$"); 148 if (i == -1) { 149 out.println(line); 150 continue; 151 } 152 if (line.indexOf("$PACKAGE$", i) != -1) { 153 line = line.replace("$PACKAGE$", pkgName); 154 } 155 if (line.indexOf("$NAME_CLZ$", i) != -1) { 156 line = line.replace("$NAME_CLZ$", clzName); 157 } 158 if (line.indexOf("$NAME_CS$", i) != -1) { 159 line = line.replace("$NAME_CS$", csName); 160 } 161 if (line.indexOf("$NAME_ALIASES$", i) != -1) { 162 if ("sun.nio.cs".equals(pkgName)) 163 line = line.replace("$NAME_ALIASES$", 164 "StandardCharsets.aliases_" + clzName); 165 else 166 line = line.replace("$NAME_ALIASES$", 167 "ExtendedCharsets.aliasesFor(\"" + csName + "\")"); 168 } 169 if (line.indexOf("$NAME_HIS$", i) != -1) { 170 line = line.replace("$NAME_HIS$", hisName); 171 } 172 if (line.indexOf("$CONTAINS$", i) != -1) { 173 if (isASCII) 174 line = " return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));"; 175 else 176 line = " return (cs instanceof " + clzName + ");"; 177 } 178 if (line.indexOf("$ASCIICOMPATIBLE$") != -1) { 179 line = line.replace("$ASCIICOMPATIBLE$", isASCII ? "true" : "false"); 180 } 181 if (line.indexOf("$B2CTABLE$") != -1) { 182 line = line.replace("$B2CTABLE$", b2c); 183 } 184 if (line.indexOf("$C2BLENGTH$") != -1) { 185 line = line.replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16)); 186 } 187 if (line.indexOf("$NONROUNDTRIP_B2C$") != -1) { 188 if (b2cNR.length() == 0) 189 continue; 190 line = line.replace("$NONROUNDTRIP_B2C$", b2cNR); 191 } 192 193 if (line.indexOf("$NONROUNDTRIP_C2B$") != -1) { 194 if (c2bNR.length() == 0) 195 continue; 196 line = line.replace("$NONROUNDTRIP_C2B$", c2bNR); 197 } 198 out.println(line); 199 } 200 out.close(); 201 } 202 203 private static void toString(char[] sb, int off, int end, 204 Formatter out, String closure, boolean comment) 205 { 206 while (off < end) { 207 out.format(" \""); 208 for (int j = 0; j < 8; j++) { 209 if (off == end) 210 break; 211 char c = sb[off++]; 212 switch (c) { 213 case '\b': 214 out.format("\\b"); break; 215 case '\t': 216 out.format("\\t"); break; 217 case '\n': 218 out.format("\\n"); break; 219 case '\f': 220 out.format("\\f"); break; 221 case '\r': 222 out.format("\\r"); break; 223 case '\"': 224 out.format("\\\""); break; 225 case '\'': 226 out.format("\\'"); break; 227 case '\\': 228 out.format("\\\\"); break; 229 default: 230 out.format("\\u%04X", c & 0xffff); 231 } 232 } 233 if (comment) { 234 if (off == end) 235 out.format("\" %s // 0x%02x - 0x%02x%n", 236 closure, off-8, off-1); 237 else 238 out.format("\" + // 0x%02x - 0x%02x%n", 239 off-8, off-1); 240 } else { 241 if (off == end) 242 out.format("\"%s%n", closure); 243 else 244 out.format("\" +%n"); 245 } 246 } 247 } 248 }