1 /*
   2  * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.charsetmapping;
  27 
  28 import java.io.*;
  29 import java.util.Arrays;
  30 import java.util.ArrayList;
  31 import java.util.Scanner;
  32 import java.util.Formatter;
  33 import java.util.regex.Pattern;
  34 import static build.tools.charsetmapping.Utils.*;
  35 
  36 public class DBCS {
  37     // pattern used by this class to read in mapping table
  38     static Pattern mPattern = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)(?:\\s++#.*)?");
  39 
  40     public static void genClass(String type, Charset cs,
  41                                 String srcDir, String dstDir, String template)
  42         throws Exception
  43     {
  44         String clzName = cs.clzName;
  45         String csName  = cs.csName;
  46         String hisName = cs.hisName;
  47         String pkgName = cs.pkgName;
  48         boolean isASCII = cs.isASCII;
  49         int b1Min = cs.b1Min;
  50         int b1Max = cs.b1Max;
  51         int b2Min = cs.b2Min;
  52         int b2Max = cs.b2Max;
  53 
  54         StringBuilder b2cSB = new StringBuilder();
  55         StringBuilder b2cNRSB = new StringBuilder();
  56         StringBuilder c2bNRSB = new StringBuilder();
  57 
  58         char[] db = new char[0x10000];
  59         char[] c2bIndex = new char[0x100];
  60         int c2bOff = 0x100;    // first 0x100 for unmappable segs
  61 
  62         Arrays.fill(db, UNMAPPABLE_DECODING);
  63         Arrays.fill(c2bIndex, UNMAPPABLE_DECODING);
  64 
  65         char[] b2cIndex = new char[0x100];
  66         Arrays.fill(b2cIndex, UNMAPPABLE_DECODING);
  67 
  68         // (1)read in .map to parse all b->c entries
  69         FileInputStream in = new FileInputStream(new File(srcDir, clzName + ".map"));
  70         Parser p = new Parser(in, mPattern);
  71         Entry  e = null;
  72         while ((e = p.next()) != null) {
  73             db[e.bs] = (char)e.cp;
  74 
  75             if (e.bs > 0x100 &&    // db
  76                 b2cIndex[e.bs>>8] == UNMAPPABLE_DECODING) {
  77                 b2cIndex[e.bs>>8] = 1;
  78             }
  79 
  80             if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
  81                 c2bOff += 0x100;
  82                 c2bIndex[e.cp>>8] = 1;
  83             }
  84         }
  85         Output out = new Output(new Formatter(b2cSB));
  86         out.format("%n    static final String b2cSBStr =%n");
  87         out.format(db, 0x00, 0x100,  ";");
  88 
  89         out.format("%n        static final String[] b2cStr = {%n");
  90         for (int i = 0; i < 0x100; i++) {
  91             if (b2cIndex[i] == UNMAPPABLE_DECODING) {
  92                 out.format("            null,%n");  //unmappable segments
  93             } else {
  94                 out.format(db, i, b2Min, b2Max, ",");
  95             }
  96         }
  97 
  98         out.format("        };%n");
  99         out.close();
 100 
 101         // (2)now parse the .nr file which includes "b->c" non-roundtrip entries
 102         File f = new File(srcDir, clzName + ".nr");
 103         if (f.exists()) {
 104             StringBuilder sb = new StringBuilder();
 105             in = new FileInputStream(f);
 106             p = new Parser(in, mPattern);
 107             e = null;
 108             while ((e = p.next()) != null) {
 109                 // A <b,c> pair
 110                 sb.append((char)e.bs);
 111                 sb.append((char)e.cp);
 112             }
 113             char[] nr = sb.toString().toCharArray();
 114             out = new Output(new Formatter(b2cNRSB));
 115             out.format("String b2cNR =%n");
 116             out.format(nr, 0, nr.length,  ";");
 117             out.close();
 118         } else {
 119             b2cNRSB.append("String b2cNR = null;");
 120         }
 121 
 122         // (3)finally the .c2b file which includes c->b non-roundtrip entries
 123         f = new File(srcDir, clzName + ".c2b");
 124         if (f.exists()) {
 125             StringBuilder sb = new StringBuilder();
 126             in = new FileInputStream(f);
 127             p = new Parser(in, mPattern);
 128             e = null;
 129             while ((e = p.next()) != null) {
 130                 // A <b,c> pair
 131                 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
 132                     c2bOff += 0x100;
 133                     c2bIndex[e.cp>>8] = 1;
 134                 }
 135                 sb.append((char)e.bs);
 136                 sb.append((char)e.cp);
 137             }
 138             char[] nr = sb.toString().toCharArray();
 139             out = new Output(new Formatter(c2bNRSB));
 140             out.format("String c2bNR =%n");
 141             out.format(nr, 0, nr.length,  ";");
 142             out.close();
 143         } else {
 144             c2bNRSB.append("String c2bNR = null;");
 145         }
 146 
 147         // (4)it's time to generate the source file
 148         String b2c = b2cSB.toString();
 149         String b2cNR = b2cNRSB.toString();
 150         String c2bNR = c2bNRSB.toString();
 151 
 152         Scanner s = new Scanner(new File(srcDir, template));
 153         PrintStream ops = new PrintStream(new FileOutputStream(
 154                              new File(dstDir, clzName + ".java")));
 155         if (hisName == null)
 156             hisName = "";
 157 
 158         // (5) c2b replacement, only used for JIs0208/0212, which
 159         // are two pure db charsets so default '3f' does not work
 160         // TBD: move this into configuration file
 161         String c2bRepl = "";
 162         if (clzName.startsWith("JIS_X_0208")) {
 163             c2bRepl = "new byte[]{ (byte)0x21, (byte)0x29 },";
 164         } else if (clzName.startsWith("JIS_X_0212")) {
 165             c2bRepl = "new byte[]{ (byte)0x22, (byte)0x44 },";
 166         } else if (clzName.startsWith("IBM300")) {
 167             c2bRepl = "new byte[]{ (byte)0x42, (byte)0x6f },";
 168         }
 169 
 170         while (s.hasNextLine()) {
 171             String line = s.nextLine();
 172             if (line.indexOf("$") == -1) {
 173                 ops.println(line);
 174                 continue;
 175             }
 176             line = line.replace("$PACKAGE$" , pkgName)
 177                        .replace("$IMPLEMENTS$", (hisName == null)?
 178                                 "" : "implements HistoricallyNamedCharset")
 179                        .replace("$NAME_CLZ$", clzName)
 180                        .replace("$NAME_ALIASES$",
 181                                 "sun.nio.cs".equals(pkgName) ?
 182                                 "StandardCharsets.aliases_" + clzName :
 183                                 "ExtendedCharsets.aliasesFor(\"" + csName + "\")")
 184                        .replace("$NAME_CS$" , csName)
 185                        .replace("$CONTAINS$",
 186                                 "MS932".equals(clzName)?
 187                                 "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof JIS_X_0201) || (cs instanceof " + clzName + "));":
 188                                 (isASCII ?
 189                                  "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));":
 190                                  "return (cs instanceof " + clzName + ");"))
 191                        .replace("$HISTORICALNAME$",
 192                                 (hisName == null)? "" :
 193                                 "    public String historicalName() { return \"" + hisName + "\"; }")
 194                        .replace("$DECTYPE$", type)
 195                        .replace("$ENCTYPE$", type)
 196                        .replace("$B1MIN$"   , "0x" + Integer.toString(b1Min, 16))
 197                        .replace("$B1MAX$"   , "0x" + Integer.toString(b1Max, 16))
 198                        .replace("$B2MIN$"   , "0x" + Integer.toString(b2Min, 16))
 199                        .replace("$B2MAX$"   , "0x" + Integer.toString(b2Max, 16))
 200                        .replace("$B2C$", b2c)
 201                        .replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16))
 202                        .replace("$NONROUNDTRIP_B2C$", b2cNR)
 203                        .replace("$NONROUNDTRIP_C2B$", c2bNR)
 204                        .replace("$ENC_REPLACEMENT$", c2bRepl);
 205 
 206             ops.println(line);
 207         }
 208         ops.close();
 209     }
 210 }