1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.charsetmapping;
  27 import java.io.*;
  28 import java.util.Arrays;
  29 import java.util.ArrayList;
  30 import java.util.Scanner;
  31 import java.util.Formatter;
  32 import java.util.regex.*;
  33 import java.nio.charset.*;
  34 import static build.tools.charsetmapping.Utils.*;
  35 
  36 public class DBCS {
  37     // pattern used by this class to read in mapping table
  38     static Pattern mPattern = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)(?:\\s++#.*)?");
  39 
  40     public static void genClass(String args[]) throws Exception {
  41 
  42         Scanner s = new Scanner(new File(args[0], args[2]));
  43         while (s.hasNextLine()) {
  44             String line = s.nextLine();
  45             if (line.startsWith("#") || line.length() == 0)
  46                 continue;
  47             String[] fields = line.split("\\s+");
  48             if (fields.length < 10) {
  49                 System.err.println("Misconfiged sbcs line <" + line + ">?");
  50                 continue;
  51             }
  52             String clzName = fields[0];
  53             String csName  = fields[1];
  54             String hisName = ("null".equals(fields[2]))?null:fields[2];
  55             String type = fields[3].toUpperCase();
  56             if ("BASIC".equals(type))
  57                 type = "";
  58             else
  59                 type = "_" + type;
  60             String pkgName  = fields[4];
  61             boolean isASCII = Boolean.valueOf(fields[5]);
  62             int    b1Min = toInteger(fields[6]);
  63             int    b1Max = toInteger(fields[7]);
  64             int    b2Min    = toInteger(fields[8]);
  65             int    b2Max    = toInteger(fields[9]);
  66             System.out.printf("%s,%s,%s,%b,%s%n", clzName, csName, hisName, isASCII, pkgName);
  67             genClass0(args[0], args[1], "DoubleByte-X.java.template",
  68                     clzName, csName, hisName, pkgName,
  69                     isASCII, type,
  70                     b1Min, b1Max, b2Min, b2Max);
  71         }
  72     }
  73 
  74     static int toInteger(String s) {
  75         if (s.startsWith("0x") || s.startsWith("0X"))
  76             return Integer.valueOf(s.substring(2), 16);
  77         else
  78             return Integer.valueOf(s);
  79     }
  80 
  81     private static void genClass0(String srcDir, String dstDir, String template,
  82                                   String clzName,
  83                                   String csName,
  84                                   String hisName,
  85                                   String pkgName,
  86                                   boolean isASCII,
  87                                   String type,
  88                                   int b1Min, int b1Max,
  89                                   int b2Min, int b2Max)
  90         throws Exception
  91     {
  92 
  93         StringBuilder b2cSB = new StringBuilder();
  94         StringBuilder b2cNRSB = new StringBuilder();
  95         StringBuilder c2bNRSB = new StringBuilder();
  96 
  97         char[] db = new char[0x10000];
  98         char[] c2bIndex = new char[0x100];
  99         int c2bOff = 0x100;    // first 0x100 for unmappable segs
 100 
 101         Arrays.fill(db, UNMAPPABLE_DECODING);
 102         Arrays.fill(c2bIndex, UNMAPPABLE_DECODING);
 103 
 104         char[] b2cIndex = new char[0x100];
 105         Arrays.fill(b2cIndex, UNMAPPABLE_DECODING);
 106 
 107         // (1)read in .map to parse all b->c entries
 108         FileInputStream in = new FileInputStream(new File(srcDir, clzName + ".map"));
 109         Parser p = new Parser(in, mPattern);
 110         Entry  e = null;
 111         while ((e = p.next()) != null) {
 112             db[e.bs] = (char)e.cp;
 113 
 114             if (e.bs > 0x100 &&    // db
 115                 b2cIndex[e.bs>>8] == UNMAPPABLE_DECODING) {
 116                 b2cIndex[e.bs>>8] = 1;
 117             }
 118 
 119             if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
 120                 c2bOff += 0x100;
 121                 c2bIndex[e.cp>>8] = 1;
 122             }
 123         }
 124         Output out = new Output(new Formatter(b2cSB));
 125         out.format("%n    static final String b2cSBStr =%n");
 126         out.format(db, 0x00, 0x100,  ";");
 127 
 128         out.format("%n        static final String[] b2cStr = {%n");
 129         for (int i = 0; i < 0x100; i++) {
 130             if (b2cIndex[i] == UNMAPPABLE_DECODING) {
 131                 out.format("            null,%n");  //unmappable segments
 132             } else {
 133                 out.format(db, i, b2Min, b2Max, ",");
 134             }
 135         }
 136 
 137         out.format("        };%n");
 138         out.close();
 139 
 140         // (2)now parse the .nr file which includes "b->c" non-roundtrip entries
 141         File f = new File(srcDir, clzName + ".nr");
 142         if (f.exists()) {
 143             StringBuilder sb = new StringBuilder();
 144             in = new FileInputStream(f);
 145             p = new Parser(in, mPattern);
 146             e = null;
 147             while ((e = p.next()) != null) {
 148                 // A <b,c> pair
 149                 sb.append((char)e.bs);
 150                 sb.append((char)e.cp);
 151             }
 152             char[] nr = sb.toString().toCharArray();
 153             out = new Output(new Formatter(b2cNRSB));
 154             out.format("String b2cNR =%n");
 155             out.format(nr, 0, nr.length,  ";");
 156             out.close();
 157         } else {
 158             b2cNRSB.append("String b2cNR = null;");
 159         }
 160 
 161         // (3)finally the .c2b file which includes c->b non-roundtrip entries
 162         f = new File(srcDir, clzName + ".c2b");
 163         if (f.exists()) {
 164             StringBuilder sb = new StringBuilder();
 165             in = new FileInputStream(f);
 166             p = new Parser(in, mPattern);
 167             e = null;
 168             while ((e = p.next()) != null) {
 169                 // A <b,c> pair
 170                 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
 171                     c2bOff += 0x100;
 172                     c2bIndex[e.cp>>8] = 1;
 173                 }
 174                 sb.append((char)e.bs);
 175                 sb.append((char)e.cp);
 176             }
 177             char[] nr = sb.toString().toCharArray();
 178             out = new Output(new Formatter(c2bNRSB));
 179             out.format("String c2bNR =%n");
 180             out.format(nr, 0, nr.length,  ";");
 181             out.close();
 182         } else {
 183             c2bNRSB.append("String c2bNR = null;");
 184         }
 185 
 186         // (4)it's time to generate the source file
 187         String b2c = b2cSB.toString();
 188         String b2cNR = b2cNRSB.toString();
 189         String c2bNR = c2bNRSB.toString();
 190 
 191         Scanner s = new Scanner(new File(srcDir, template));
 192         PrintStream ops = new PrintStream(new FileOutputStream(
 193                              new File(dstDir, clzName + ".java")));
 194         if (hisName == null)
 195             hisName = "";
 196 
 197         // (5) c2b replacement, only used for JIs0208/0212, which
 198         // are two pure db charsets so default '3f' does not work
 199         // TBD: move this into configuration file
 200         String c2bRepl = "";
 201         if (clzName.startsWith("JIS_X_0208")) {
 202             c2bRepl = "new byte[]{ (byte)0x21, (byte)0x29 },";
 203         } else if (clzName.startsWith("JIS_X_0212")) {
 204             c2bRepl = "new byte[]{ (byte)0x22, (byte)0x44 },";
 205         } else if (clzName.startsWith("IBM300")) {
 206             c2bRepl = "new byte[]{ (byte)0x42, (byte)0x6f },";
 207         }
 208 
 209         while (s.hasNextLine()) {
 210             String line = s.nextLine();
 211             if (line.indexOf("$") == -1) {
 212                 ops.println(line);
 213                 continue;
 214             }
 215             line = line.replace("$PACKAGE$" , pkgName)
 216                        .replace("$IMPLEMENTS$", (hisName == null)?
 217                                 "" : "implements HistoricallyNamedCharset")
 218                        .replace("$NAME_CLZ$", clzName)
 219                        .replace("$NAME_ALIASES$",
 220                                 "sun.nio.cs".equals(pkgName) ?
 221                                 "StandardCharsets.aliases_" + clzName :
 222                                 "ExtendedCharsets.aliasesFor(\"" + csName + "\")")
 223                        .replace("$NAME_CS$" , csName)
 224                        .replace("$CONTAINS$",
 225                                 "MS932".equals(clzName)?
 226                                 "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof JIS_X_0201) || (cs instanceof " + clzName + "));":
 227                                 (isASCII ?
 228                                  "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));":
 229                                  "return (cs instanceof " + clzName + ");"))
 230                        .replace("$HISTORICALNAME$",
 231                                 (hisName == null)? "" :
 232                                 "    public String historicalName() { return \"" + hisName + "\"; }")
 233                        .replace("$DECTYPE$", type)
 234                        .replace("$ENCTYPE$", type)
 235                        .replace("$B1MIN$"   , "0x" + Integer.toString(b1Min, 16))
 236                        .replace("$B1MAX$"   , "0x" + Integer.toString(b1Max, 16))
 237                        .replace("$B2MIN$"   , "0x" + Integer.toString(b2Min, 16))
 238                        .replace("$B2MAX$"   , "0x" + Integer.toString(b2Max, 16))
 239                        .replace("$B2C$", b2c)
 240                        .replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16))
 241                        .replace("$NONROUNDTRIP_B2C$", b2cNR)
 242                        .replace("$NONROUNDTRIP_C2B$", c2bNR)
 243                        .replace("$ENC_REPLACEMENT$", c2bRepl);
 244 
 245             ops.println(line);
 246         }
 247         ops.close();
 248     }
 249 }