1 /*
   2  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.charsetmapping;
  27 
  28 import java.io.*;
  29 import java.util.Arrays;
  30 import java.util.ArrayList;
  31 import java.util.Scanner;
  32 import java.util.Formatter;
  33 import java.util.regex.*;
  34 import java.nio.charset.*;
  35 import static build.tools.charsetmapping.Utils.*;
  36 
  37 public class SBCS {
  38 
  39     public static void genClass(String args[]) throws Exception {
  40 
  41         Scanner s = new Scanner(new File(args[0], args[2]));
  42         while (s.hasNextLine()) {
  43             String line = s.nextLine();
  44             if (line.startsWith("#") || line.length() == 0)
  45                 continue;
  46             String[] fields = line.split("\\s+");
  47             if (fields.length < 5) {
  48                 System.err.println("Misconfiged sbcs line <" + line + ">?");
  49                 continue;
  50             }
  51             String clzName = fields[0];
  52             String csName  = fields[1];
  53             String hisName = fields[2];
  54             boolean isASCII = Boolean.valueOf(fields[3]);
  55             String pkgName  = fields[4];
  56             System.out.printf("%s,%s,%s,%b,%s%n", clzName, csName, hisName, isASCII, pkgName);
  57 
  58             genClass0(args[0], args[1], "SingleByte-X.java.template",
  59                       clzName, csName, hisName, pkgName, isASCII);
  60         }
  61     }
  62 
  63     private static void toString(char[] sb, int off, int end,
  64                                  Formatter out, String closure,
  65                                  boolean comment) {
  66         while (off < end) {
  67             out.format("        \"");
  68             for (int j = 0; j < 8; j++) {
  69                 if (off == end)
  70                     break;
  71                 char c = sb[off++];
  72                 switch (c) {
  73                 case '\b':
  74                     out.format("\\b"); break;
  75                 case '\t':
  76                     out.format("\\t"); break;
  77                 case '\n':
  78                     out.format("\\n"); break;
  79                 case '\f':
  80                     out.format("\\f"); break;
  81                 case '\r':
  82                     out.format("\\r"); break;
  83                 case '\"':
  84                     out.format("\\\""); break;
  85                 case '\'':
  86                     out.format("\\'"); break;
  87                 case '\\':
  88                     out.format("\\\\"); break;
  89                 default:
  90                     out.format("\\u%04X", c & 0xffff);
  91                 }
  92             }
  93             if (comment) {
  94                 if (off == end)
  95                     out.format("\" %s      // 0x%02x - 0x%02x%n",
  96                                closure, off-8, off-1);
  97                 else
  98                     out.format("\" +      // 0x%02x - 0x%02x%n",
  99                                off-8, off-1);
 100             } else {
 101                 if (off == end)
 102                     out.format("\"%s%n", closure);
 103                 else
 104                     out.format("\" +%n");
 105             }
 106         }
 107     }
 108 
 109     static Pattern sbmap = Pattern.compile("0x(\\p{XDigit}++)\\s++(?:U\\+|0x)?(\\p{XDigit}++)(?:\\s++#.*)?");
 110 
 111     private static void genClass0(String srcDir, String dstDir,
 112                                   String template,
 113                                   String clzName,
 114                                   String csName,
 115                                   String hisName,
 116                                   String pkgName,
 117                                   boolean isASCII)
 118         throws Exception
 119     {
 120         StringBuilder b2cSB = new StringBuilder();
 121         StringBuilder b2cNRSB = new StringBuilder();
 122         StringBuilder c2bNRSB = new StringBuilder();
 123 
 124         char[] sb = new char[0x100];
 125         char[] c2bIndex = new char[0x100];
 126         int    c2bOff = 0;
 127         Arrays.fill(sb, UNMAPPABLE_DECODING);
 128         Arrays.fill(c2bIndex, UNMAPPABLE_DECODING);
 129 
 130         // (1)read in .map to parse all b->c entries
 131         FileInputStream in = new FileInputStream(
 132                                  new File(srcDir, clzName + ".map"));
 133         Parser p = new Parser(in, sbmap);
 134         Entry  e = null;
 135 
 136         while ((e = p.next()) != null) {
 137             sb[e.bs] = (char)e.cp;
 138             if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
 139                 c2bOff += 0x100;
 140                 c2bIndex[e.cp>>8] = 1;
 141             }
 142         }
 143 
 144         Formatter fm = new Formatter(b2cSB);
 145         fm.format("%n");
 146 
 147         // vm -server shows cc[byte + 128] access is much faster than
 148         // cc[byte&0xff] so we output the upper segment first
 149         toString(sb, 0x80, 0x100, fm, "+", true);
 150         toString(sb, 0x00, 0x80,  fm, ";", true);
 151         fm.close();
 152 
 153         // (2)now the .nr file which includes "b->c" non-roundtrip entries
 154         File f = new File(srcDir, clzName + ".nr");
 155         if (f.exists()) {
 156             in = new FileInputStream(f);
 157             fm = new Formatter(b2cNRSB);
 158             p = new Parser(in, sbmap);
 159             e = null;
 160 
 161             fm.format("// remove non-roundtrip entries%n");
 162             fm.format("        b2cMap = b2cTable.toCharArray();%n");
 163             while ((e = p.next()) != null) {
 164                 fm.format("        b2cMap[%d] = UNMAPPABLE_DECODING;%n",
 165                           (e.bs>=0x80)?(e.bs-0x80):(e.bs+0x80));
 166             }
 167             fm.close();
 168         }
 169 
 170         // (3)finally the .c2b file which includes c->b non-roundtrip entries
 171         f = new File(srcDir, clzName + ".c2b");
 172         if (f.exists()) {
 173             in = new FileInputStream(f);
 174             fm = new Formatter(c2bNRSB);
 175             p = new Parser(in, sbmap);
 176             e = null;
 177             ArrayList<Entry> es = new ArrayList<Entry>();
 178             while ((e = p.next()) != null) {
 179                 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
 180                     c2bOff += 0x100;
 181                     c2bIndex[e.cp>>8] = 1;
 182                 }
 183                 es.add(e);
 184             }
 185             fm.format("// non-roundtrip c2b only entries%n");
 186             if (es.size() < 100) {
 187                 fm.format("        c2bNR = new char[%d];%n", es.size() * 2);
 188                 int i = 0;
 189                 for (Entry entry: es) {
 190                     fm.format("        c2bNR[%d] = 0x%x; c2bNR[%d] = 0x%x;%n",
 191                               i++, entry.bs, i++, entry.cp);
 192                 }
 193             } else {
 194                 char[] cc = new char[es.size() * 2];
 195                 int i = 0;
 196                 for (Entry entry: es) {
 197                     cc[i++] = (char)entry.bs;
 198                     cc[i++] = (char)entry.cp;
 199                 }
 200                 fm.format("        c2bNR = (%n");
 201                 toString(cc, 0, i,  fm, ").toCharArray();", false);
 202             }
 203             fm.close();
 204         }
 205 
 206         // (4)it's time to generate the source file
 207         String b2c = b2cSB.toString();
 208         String b2cNR = b2cNRSB.toString();
 209         String c2bNR = c2bNRSB.toString();
 210 
 211         Scanner s = new Scanner(new File(srcDir, template));
 212         PrintStream out = new PrintStream(new FileOutputStream(
 213                               new File(dstDir, clzName + ".java")));
 214 
 215         while (s.hasNextLine()) {
 216             String line = s.nextLine();
 217             int i = line.indexOf("$");
 218             if (i == -1) {
 219                 out.println(line);
 220                 continue;
 221             }
 222             if (line.indexOf("$PACKAGE$", i) != -1) {
 223                 line = line.replace("$PACKAGE$", pkgName);
 224             }
 225             if (line.indexOf("$NAME_CLZ$", i) != -1) {
 226                 line = line.replace("$NAME_CLZ$", clzName);
 227             }
 228             if (line.indexOf("$NAME_CS$", i) != -1) {
 229                 line = line.replace("$NAME_CS$", csName);
 230             }
 231             if (line.indexOf("$NAME_ALIASES$", i) != -1) {
 232                 if ("sun.nio.cs".equals(pkgName))
 233                     line = line.replace("$NAME_ALIASES$",
 234                                         "StandardCharsets.aliases_" + clzName);
 235                 else
 236                     line = line.replace("$NAME_ALIASES$",
 237                                         "ExtendedCharsets.aliasesFor(\"" + csName + "\")");
 238             }
 239             if (line.indexOf("$NAME_HIS$", i) != -1) {
 240                 line = line.replace("$NAME_HIS$", hisName);
 241             }
 242             if (line.indexOf("$CONTAINS$", i) != -1) {
 243                 if (isASCII)
 244                     line = "        return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));";
 245                 else
 246                     line = "        return (cs instanceof " + clzName + ");";
 247             }
 248             if (line.indexOf("$B2CTABLE$") != -1) {
 249                 line = line.replace("$B2CTABLE$", b2c);
 250             }
 251             if (line.indexOf("$C2BLENGTH$") != -1) {
 252                 line = line.replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16));
 253             }
 254             if (line.indexOf("$NONROUNDTRIP_B2C$") != -1) {
 255                 if (b2cNR.length() == 0)
 256                     continue;
 257                 line = line.replace("$NONROUNDTRIP_B2C$", b2cNR);
 258             }
 259 
 260             if (line.indexOf("$NONROUNDTRIP_C2B$") != -1) {
 261                 if (c2bNR.length() == 0)
 262                     continue;
 263                 line = line.replace("$NONROUNDTRIP_C2B$", c2bNR);
 264             }
 265             out.println(line);
 266         }
 267         out.close();
 268     }
 269 }