1 /*
   2  * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package build.tools.charsetmapping;
  27 import java.io.*;
  28 import java.util.Arrays;
  29 import java.util.ArrayList;
  30 import java.util.Scanner;
  31 import java.util.Formatter;
  32 import java.util.regex.*;
  33 import java.nio.charset.*;
  34 import static build.tools.charsetmapping.CharsetMapping.*;
  35 
  36 public class GenerateDBCS {
  37     // pattern used by this class to read in mapping table
  38     static Pattern mPattern = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)(?:\\s++#.*)?");
  39     public static void genDBCS(String args[]) throws Exception {
  40 
  41         Scanner s = new Scanner(new File(args[0], args[2]));
  42         while (s.hasNextLine()) {
  43             String line = s.nextLine();
  44             if (line.startsWith("#") || line.length() == 0)
  45                 continue;
  46             String[] fields = line.split("\\s+");
  47             if (fields.length < 10) {
  48                 System.err.println("Misconfiged sbcs line <" + line + ">?");
  49                 continue;
  50             }
  51             String clzName = fields[0];
  52             String csName  = fields[1];
  53             String hisName = ("null".equals(fields[2]))?null:fields[2];
  54             String type = fields[3].toUpperCase();
  55             if ("BASIC".equals(type))
  56                 type = "";
  57             else
  58                 type = "_" + type;
  59             String pkgName  = fields[4];
  60             boolean isASCII = Boolean.valueOf(fields[5]);
  61             int    b1Min = toInteger(fields[6]);
  62             int    b1Max = toInteger(fields[7]);
  63             int    b2Min    = toInteger(fields[8]);
  64             int    b2Max    = toInteger(fields[9]);
  65             System.out.printf("%s,%s,%s,%b,%s%n", clzName, csName, hisName, isASCII, pkgName);
  66             genClass(args[0], args[1], "DoubleByte-X.java.template",
  67                     clzName, csName, hisName, pkgName,
  68                     isASCII, type,
  69                     b1Min, b1Max, b2Min, b2Max);
  70         }
  71     }
  72 
  73     private static int toInteger(String s) {
  74         if (s.startsWith("0x") || s.startsWith("0X"))
  75             return Integer.valueOf(s.substring(2), 16);
  76         else
  77             return Integer.valueOf(s);
  78     }
  79 
  80     private static void outString(Formatter out,
  81                                   char[] cc, int off, int end,
  82                                   String closure)
  83     {
  84         while (off < end) {
  85             out.format("        \"");
  86             for (int j = 0; j < 8; j++) {
  87                 if (off == end)
  88                     break;
  89                 char c = cc[off++];
  90                 switch (c) {
  91                 case '\b':
  92                     out.format("\\b"); break;
  93                 case '\t':
  94                     out.format("\\t"); break;
  95                 case '\n':
  96                     out.format("\\n"); break;
  97                 case '\f':
  98                     out.format("\\f"); break;
  99                 case '\r':
 100                     out.format("\\r"); break;
 101                 case '\"':
 102                     out.format("\\\""); break;
 103                 case '\'':
 104                     out.format("\\'"); break;
 105                 case '\\':
 106                     out.format("\\\\"); break;
 107                 default:
 108                     out.format("\\u%04X", c & 0xffff);
 109                 }
 110             }
 111             if (off == end)
 112                 out.format("\" %s%n", closure);
 113             else
 114                 out.format("\" + %n");
 115         }
 116     }
 117 
 118     private static void outString(Formatter out,
 119                                   char[] db,
 120                                   int b1,
 121                                   int b2Min, int b2Max,
 122                                   String closure)
 123     {
 124         char[] cc = new char[b2Max - b2Min + 1];
 125         int off = 0;
 126         for (int b2 = b2Min; b2 <= b2Max; b2++) {
 127             cc[off++] = db[(b1 << 8) | b2];
 128         }
 129         outString(out, cc, 0, cc.length, closure);
 130     }
 131 
 132     private static void genClass(String srcDir, String dstDir, String template,
 133                                  String clzName,
 134                                  String csName,
 135                                  String hisName,
 136                                  String pkgName,
 137                                  boolean isASCII,
 138                                  String type,
 139                                  int b1Min, int b1Max,
 140                                  int b2Min, int b2Max)
 141         throws Exception
 142     {
 143 
 144         StringBuilder b2cSB = new StringBuilder();
 145         StringBuilder b2cNRSB = new StringBuilder();
 146         StringBuilder c2bNRSB = new StringBuilder();
 147 
 148         char[] db = new char[0x10000];
 149         char[] c2bIndex = new char[0x100];
 150         int c2bOff = 0x100;    // first 0x100 for unmappable segs
 151 
 152         Arrays.fill(db, UNMAPPABLE_DECODING);
 153         Arrays.fill(c2bIndex, UNMAPPABLE_DECODING);
 154 
 155         char[] b2cIndex = new char[0x100];
 156         Arrays.fill(b2cIndex, UNMAPPABLE_DECODING);
 157 
 158         // (1)read in .map to parse all b->c entries
 159         FileInputStream in = new FileInputStream(new File(srcDir, clzName + ".map"));
 160         Parser p = new Parser(in, mPattern);
 161         Entry  e = null;
 162         while ((e = p.next()) != null) {
 163             db[e.bs] = (char)e.cp;
 164 
 165             if (e.bs > 0x100 &&    // db
 166                 b2cIndex[e.bs>>8] == UNMAPPABLE_DECODING) {
 167                 b2cIndex[e.bs>>8] = 1;
 168             }
 169 
 170             if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
 171                 c2bOff += 0x100;
 172                 c2bIndex[e.cp>>8] = 1;
 173             }
 174         }
 175         Formatter fm = new Formatter(b2cSB);
 176         fm.format("%n    static final String b2cSBStr =%n");
 177         outString(fm, db, 0x00, 0x100,  ";");
 178 
 179         fm.format("%n        static final String[] b2cStr = {%n");
 180         for (int i = 0; i < 0x100; i++) {
 181             if (b2cIndex[i] == UNMAPPABLE_DECODING) {
 182                 fm.format("            null,%n");  //unmappable segments
 183             } else {
 184                 outString(fm, db, i, b2Min, b2Max, ",");
 185             }
 186         }
 187 
 188         fm.format("        };%n");
 189         fm.close();
 190 
 191         // (2)now parse the .nr file which includes "b->c" non-roundtrip entries
 192         File f = new File(srcDir, clzName + ".nr");
 193         if (f.exists()) {
 194             StringBuilder sb = new StringBuilder();
 195             in = new FileInputStream(f);
 196             p = new Parser(in, mPattern);
 197             e = null;
 198             while ((e = p.next()) != null) {
 199                 // A <b,c> pair
 200                 sb.append((char)e.bs);
 201                 sb.append((char)e.cp);
 202             }
 203             char[] nr = sb.toString().toCharArray();
 204             fm = new Formatter(b2cNRSB);
 205             fm.format("String b2cNR =%n");
 206             outString(fm, nr, 0, nr.length,  ";");
 207             fm.close();
 208         } else {
 209             b2cNRSB.append("String b2cNR = null;");
 210         }
 211 
 212         // (3)finally the .c2b file which includes c->b non-roundtrip entries
 213         f = new File(srcDir, clzName + ".c2b");
 214         if (f.exists()) {
 215             StringBuilder sb = new StringBuilder();
 216             in = new FileInputStream(f);
 217             p = new Parser(in, mPattern);
 218             e = null;
 219             while ((e = p.next()) != null) {
 220                 // A <b,c> pair
 221                 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
 222                     c2bOff += 0x100;
 223                     c2bIndex[e.cp>>8] = 1;
 224                 }
 225                 sb.append((char)e.bs);
 226                 sb.append((char)e.cp);
 227             }
 228             char[] nr = sb.toString().toCharArray();
 229             fm = new Formatter(c2bNRSB);
 230             fm.format("String c2bNR =%n");
 231             outString(fm, nr, 0, nr.length,  ";");
 232             fm.close();
 233         } else {
 234             c2bNRSB.append("String c2bNR = null;");
 235         }
 236 
 237         // (4)it's time to generate the source file
 238         String b2c = b2cSB.toString();
 239         String b2cNR = b2cNRSB.toString();
 240         String c2bNR = c2bNRSB.toString();
 241 
 242         Scanner s = new Scanner(new File(srcDir, template));
 243         PrintStream out = new PrintStream(new FileOutputStream(
 244                               new File(dstDir, clzName + ".java")));
 245         if (hisName == null)
 246             hisName = "";
 247 
 248         while (s.hasNextLine()) {
 249             String line = s.nextLine();
 250             if (line.indexOf("$") == -1) {
 251                 out.println(line);
 252                 continue;
 253             }
 254             line = line.replace("$PACKAGE$" , pkgName)
 255                        .replace("$IMPLEMENTS$", (hisName == null)?
 256                                 "" : "implements HistoricallyNamedCharset")
 257                        .replace("$NAME_CLZ$", clzName)
 258                        .replace("$NAME_ALIASES$",
 259                                 "sun.nio.cs".equals(pkgName) ?
 260                                 "StandardCharsets.aliases_" + clzName :
 261                                 "ExtendedCharsets.aliasesFor(\"" + csName + "\")")
 262                        .replace("$NAME_CS$" , csName)
 263                        .replace("$CONTAINS$",
 264                                 "MS932".equals(clzName)?
 265                                 "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof JIS_X_0201) || (cs instanceof " + clzName + "));":
 266                                 (isASCII ?
 267                                  "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));":
 268                                  "return (cs instanceof " + clzName + ");"))
 269                        .replace("$HISTORICALNAME$",
 270                                 (hisName == null)? "" :
 271                                 "    public String historicalName() { return \"" + hisName + "\"; }")
 272                        .replace("$DECTYPE$", type)
 273                        .replace("$ENCTYPE$", type)
 274                        .replace("$B1MIN$"   , "0x" + Integer.toString(b1Min, 16))
 275                        .replace("$B1MAX$"   , "0x" + Integer.toString(b1Max, 16))
 276                        .replace("$B2MIN$"   , "0x" + Integer.toString(b2Min, 16))
 277                        .replace("$B2MAX$"   , "0x" + Integer.toString(b2Max, 16))
 278                        .replace("$B2C$", b2c)
 279                        .replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16))
 280                        .replace("$NONROUNDTRIP_B2C$", b2cNR)
 281                        .replace("$NONROUNDTRIP_C2B$", c2bNR);
 282 
 283             out.println(line);
 284         }
 285         out.close();
 286     }
 287 }