1 /*
   2  * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.charsetmapping;
  27 
  28 import java.io.*;
  29 import java.util.Arrays;
  30 import java.util.ArrayList;
  31 import java.util.Scanner;
  32 import java.util.Formatter;
  33 import java.util.regex.Pattern;
  34 import static build.tools.charsetmapping.Utils.*;
  35 
  36 public class SBCS {
  37 
  38     static Pattern sbmap = Pattern.compile("0x(\\p{XDigit}++)\\s++(?:U\\+|0x)?(\\p{XDigit}++)(?:\\s++#.*)?");
  39 
  40     public static void genClass(Charset cs,
  41                                 String srcDir, String dstDir, String template)
  42         throws Exception
  43     {
  44         String clzName = cs.clzName;
  45         String csName  = cs.csName;
  46         String hisName = cs.hisName;
  47         String pkgName = cs.pkgName;
  48         boolean isASCII = cs.isASCII;
  49         boolean isAlwaysCompactable = true;
  50 
  51         StringBuilder b2cSB = new StringBuilder();
  52         StringBuilder b2cNRSB = new StringBuilder();
  53         StringBuilder c2bNRSB = new StringBuilder();
  54 
  55         char[] sb = new char[0x100];
  56         char[] c2bIndex = new char[0x100];
  57         int    c2bOff = 0;
  58         Arrays.fill(sb, UNMAPPABLE_DECODING);
  59         Arrays.fill(c2bIndex, UNMAPPABLE_DECODING);
  60 
  61         // (1)read in .map to parse all b->c entries
  62         FileInputStream in = new FileInputStream(
  63                                  new File(srcDir, clzName + ".map"));
  64         Parser p = new Parser(in, sbmap);
  65         Entry  e = null;
  66 
  67         while ((e = p.next()) != null) {
  68             sb[e.bs] = (char)e.cp;
  69             if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
  70                 c2bOff += 0x100;
  71                 c2bIndex[e.cp>>8] = 1;
  72             }
  73             if (e.cp > 0xFF) {
  74                 isAlwaysCompactable = false;
  75             }
  76         }
  77 
  78         Formatter fm = new Formatter(b2cSB);
  79         fm.format("%n");
  80 
  81         // vm -server shows cc[byte + 128] access is much faster than
  82         // cc[byte&0xff] so we output the upper segment first
  83         toString(sb, 0x80, 0x100, fm, "+", true);
  84         toString(sb, 0x00, 0x80,  fm, ";", true);
  85         fm.close();
  86 
  87         // (2)now the .nr file which includes "b->c" non-roundtrip entries
  88         File f = new File(srcDir, clzName + ".nr");
  89         if (f.exists()) {
  90             in = new FileInputStream(f);
  91             fm = new Formatter(b2cNRSB);
  92             p = new Parser(in, sbmap);
  93             e = null;
  94 
  95             fm.format("// remove non-roundtrip entries%n");
  96             fm.format("        b2cMap = b2cTable.toCharArray();%n");
  97             while ((e = p.next()) != null) {
  98                 fm.format("        b2cMap[%d] = UNMAPPABLE_DECODING;%n",
  99                           (e.bs>=0x80)?(e.bs-0x80):(e.bs+0x80));
 100             }
 101             fm.close();
 102         }
 103 
 104         // (3)finally the .c2b file which includes c->b non-roundtrip entries
 105         f = new File(srcDir, clzName + ".c2b");
 106         if (f.exists()) {
 107             in = new FileInputStream(f);
 108             fm = new Formatter(c2bNRSB);
 109             p = new Parser(in, sbmap);
 110             e = null;
 111             ArrayList<Entry> es = new ArrayList<Entry>();
 112             while ((e = p.next()) != null) {
 113                 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
 114                     c2bOff += 0x100;
 115                     c2bIndex[e.cp>>8] = 1;
 116                 }
 117                 es.add(e);
 118             }
 119             fm.format("// non-roundtrip c2b only entries%n");
 120             if (es.size() < 100) {
 121                 fm.format("        c2bNR = new char[%d];%n", es.size() * 2);
 122                 int i = 0;
 123                 for (Entry entry: es) {
 124                     fm.format("        c2bNR[%d] = 0x%x; c2bNR[%d] = 0x%x;%n",
 125                               i++, entry.bs, i++, entry.cp);
 126                 }
 127             } else {
 128                 char[] cc = new char[es.size() * 2];
 129                 int i = 0;
 130                 for (Entry entry: es) {
 131                     cc[i++] = (char)entry.bs;
 132                     cc[i++] = (char)entry.cp;
 133                 }
 134                 fm.format("        c2bNR = (%n");
 135                 toString(cc, 0, i,  fm, ").toCharArray();", false);
 136             }
 137             fm.close();
 138         }
 139 
 140         // (4)it's time to generate the source file
 141         String b2c = b2cSB.toString();
 142         String b2cNR = b2cNRSB.toString();
 143         String c2bNR = c2bNRSB.toString();
 144 
 145         Scanner s = new Scanner(new File(srcDir, template));
 146         PrintStream out = new PrintStream(new FileOutputStream(
 147                               new File(dstDir, clzName + ".java")));
 148 
 149         while (s.hasNextLine()) {
 150             String line = s.nextLine();
 151             int i = line.indexOf("$");
 152             if (i == -1) {
 153                 out.println(line);
 154                 continue;
 155             }
 156             if (line.indexOf("$PACKAGE$", i) != -1) {
 157                 line = line.replace("$PACKAGE$", pkgName);
 158             }
 159             if (line.indexOf("$NAME_CLZ$", i) != -1) {
 160                 line = line.replace("$NAME_CLZ$", clzName);
 161             }
 162             if (line.indexOf("$NAME_CS$", i) != -1) {
 163                 line = line.replace("$NAME_CS$", csName);
 164             }
 165             if (line.indexOf("$NAME_ALIASES$", i) != -1) {
 166                 if ("sun.nio.cs".equals(pkgName))
 167                     line = line.replace("$NAME_ALIASES$",
 168                                         "StandardCharsets.aliases_" + clzName + "()");
 169                 else
 170                     line = line.replace("$NAME_ALIASES$",
 171                                         "ExtendedCharsets.aliasesFor(\"" + csName + "\")");
 172             }
 173             if (line.indexOf("$NAME_HIS$", i) != -1) {
 174                 line = line.replace("$NAME_HIS$", hisName);
 175             }
 176             if (line.indexOf("$CONTAINS$", i) != -1) {
 177                 if (isASCII)
 178                     line = "        return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));";
 179                 else
 180                     line = "        return (cs instanceof " + clzName + ");";
 181             }
 182             if (line.indexOf("$ASCIICOMPATIBLE$") != -1) {
 183                 line = line.replace("$ASCIICOMPATIBLE$", isASCII ? "true" : "false");
 184             }
 185             if (line.indexOf("$ALWAYSCOMPACTABLE$") != -1) {
 186                 line = line.replace("$ALWAYSCOMPACTABLE$", isAlwaysCompactable ? "true" : "false");
 187             }
 188             if (line.indexOf("$B2CTABLE$") != -1) {
 189                 line = line.replace("$B2CTABLE$", b2c);
 190             }
 191             if (line.indexOf("$C2BLENGTH$") != -1) {
 192                 line = line.replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16));
 193             }
 194             if (line.indexOf("$NONROUNDTRIP_B2C$") != -1) {
 195                 if (b2cNR.length() == 0)
 196                     continue;
 197                 line = line.replace("$NONROUNDTRIP_B2C$", b2cNR);
 198             }
 199 
 200             if (line.indexOf("$NONROUNDTRIP_C2B$") != -1) {
 201                 if (c2bNR.length() == 0)
 202                     continue;
 203                 line = line.replace("$NONROUNDTRIP_C2B$", c2bNR);
 204             }
 205             out.println(line);
 206         }
 207         out.close();
 208     }
 209 
 210     private static void toString(char[] sb, int off, int end,
 211                                  Formatter out, String closure, boolean comment)
 212     {
 213         while (off < end) {
 214             out.format("        \"");
 215             for (int j = 0; j < 8; j++) {
 216                 if (off == end)
 217                     break;
 218                 char c = sb[off++];
 219                 switch (c) {
 220                 case '\b':
 221                     out.format("\\b"); break;
 222                 case '\t':
 223                     out.format("\\t"); break;
 224                 case '\n':
 225                     out.format("\\n"); break;
 226                 case '\f':
 227                     out.format("\\f"); break;
 228                 case '\r':
 229                     out.format("\\r"); break;
 230                 case '\"':
 231                     out.format("\\\""); break;
 232                 case '\'':
 233                     out.format("\\'"); break;
 234                 case '\\':
 235                     out.format("\\\\"); break;
 236                 default:
 237                     out.format("\\u%04X", c & 0xffff);
 238                 }
 239             }
 240             if (comment) {
 241                 if (off == end)
 242                     out.format("\" %s      // 0x%02x - 0x%02x%n",
 243                                closure, off-8, off-1);
 244                 else
 245                     out.format("\" +      // 0x%02x - 0x%02x%n",
 246                                off-8, off-1);
 247             } else {
 248                 if (off == end)
 249                     out.format("\"%s%n", closure);
 250                 else
 251                     out.format("\" +%n");
 252             }
 253         }
 254     }
 255 }