1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.charsetmapping;
  27 
  28 import java.io.*;
  29 import java.util.ArrayList;
  30 import java.util.Scanner;
  31 import java.util.Formatter;
  32 import java.util.regex.*;
  33 import java.nio.charset.*;
  34 import static build.tools.charsetmapping.Utils.*;
  35 
  36 public class EUC_TW {
  37 
  38     static char[] toCharArray(int[] db,
  39                               int b1Min, int b1Max,
  40                               int b2Min, int b2Max)
  41     {
  42         char[] ca = new char[(b1Max - b1Min + 1) * (b2Max - b2Min + 1)];
  43         int off = 0;
  44         for (int b1 = b1Min; b1 <= b1Max; b1++) {
  45             for (int b2 = b2Min; b2 <= b2Max; b2++) {
  46                 ca[off++] = (char)(db[b1 * 256 + b2] & 0xffff);
  47             }
  48         }
  49         return ca;
  50     }
  51 
  52     static char[] toCharArray(byte[] ba,
  53                               int b1Min, int b1Max,
  54                               int b2Min, int b2Max)
  55     {
  56         char[] ca = new char[(b1Max - b1Min + 1) * (b2Max - b2Min + 1)];
  57         int off = 0;
  58         for (int b1 = b1Min; b1 <= b1Max; b1++) {
  59             int b2 = b2Min;
  60             while (b2 <= b2Max) {
  61                 ca[off++] = (char)(((ba[b1 * 256 + b2++] & 0xff) << 8) |
  62                                    (ba[b1 * 256 + b2++] & 0xff));
  63             }
  64         }
  65         return ca;
  66     }
  67 
  68     private static int initC2BIndex(char[] index) {
  69         int off = 0;
  70         for (int i = 0; i < index.length; i++) {
  71             if (index[i] != 0) {
  72                 index[i] = (char)off;
  73                 off += 0x100;
  74             } else {
  75                 index[i] = UNMAPPABLE_ENCODING;
  76             }
  77         }
  78         return off;
  79     }
  80 
  81     private static Pattern euctw = Pattern.compile("(?:8ea)?(\\p{XDigit}++)\\s++(\\p{XDigit}++)?\\s*+.*");
  82 
  83     static void genClass(String pkg, String args[]) throws Exception
  84     {
  85         InputStream is = new FileInputStream(new File(args[0], "euc_tw.map"));
  86         PrintStream ps = new PrintStream(new File(args[1], "EUC_TWMapping.java"),
  87                                          "ISO-8859-1");
  88         String copyright = getCopyright(new File(args[7], "EUC_TW.java"));
  89 
  90 
  91         // ranges of byte1 and byte2, something should come from a "config" file
  92         int b1Min = 0xa1;
  93         int b1Max = 0xfe;
  94         int b2Min = 0xa1;
  95         int b2Max = 0xfe;
  96 
  97         try {
  98             int[][] db = new int[8][0x10000];        // doublebyte
  99             byte[]  suppFlag = new byte[0x10000];    // doublebyte
 100             char[]  indexC2B = new char[256];
 101             char[]  indexC2BSupp = new char[256];
 102 
 103             for (int i = 0; i < 8; i++)
 104                 for (int j = 0; j < 0x10000; j++)
 105                     db[i][j] = UNMAPPABLE_DECODING;
 106 
 107             Parser p = new Parser(is, euctw);
 108             Entry  e = null;
 109             while ((e = p.next()) != null) {
 110                 int plane = 0;
 111                 if (e.bs >= 0x10000) {
 112                     plane = ((e.bs >> 16) & 0xff) - 1;
 113                     if (plane >= 14)
 114                         plane = 7;
 115                     e.bs = e.bs & 0xffff;
 116                 }
 117                 db[plane][e.bs] = e.cp;
 118                 if (e.cp < 0x10000) {
 119                     indexC2B[e.cp>>8] = 1;
 120                 } else {
 121                     indexC2BSupp[(e.cp&0xffff)>>8] = 1;
 122                     suppFlag[e.bs] |= (1 << plane);
 123                 }
 124             }
 125 
 126             StringBuilder sb = new StringBuilder();
 127             Output out = new Output(new Formatter(sb));
 128 
 129             out.format(copyright);
 130             out.format("%n// -- This file was mechanically generated: Do not edit! -- //%n");
 131             out.format("package %s;%n%n", pkg);
 132             out.format("class EUC_TWMapping {%n%n");
 133 
 134             // boundaries
 135             out.format("    final static int b1Min = 0x%x;%n", b1Min);
 136             out.format("    final static int b1Max = 0x%x;%n", b1Max);
 137             out.format("    final static int b2Min = 0x%x;%n", b2Min);
 138             out.format("    final static int b2Max = 0x%x;%n", b2Max);
 139 
 140             // b2c tables
 141             out.format("%n    final static String[] b2c = {%n");
 142             for (int plane = 0; plane < 8; plane++) {
 143                 out.format("        // Plane %d%n", plane);
 144                 out.format(toCharArray(db[plane], b1Min, b1Max, b2Min, b2Max),
 145                            ",");
 146                 out.format("%n");
 147             }
 148             out.format("    };%n");
 149 
 150             // c2bIndex
 151             out.format("%n    static final int C2BSIZE = 0x%x;%n",
 152                        initC2BIndex(indexC2B));
 153             out.format("%n    static char[] c2bIndex = new char[] {%n");
 154             out.format(indexC2B);
 155             out.format("    };%n");
 156 
 157             // c2bIndexSupp
 158             out.format("%n    static final int C2BSUPPSIZE = 0x%x;%n",
 159                        initC2BIndex(indexC2BSupp));
 160             out.format("%n    static char[] c2bSuppIndex = new char[] {%n");
 161             out.format(indexC2BSupp);
 162             out.format("    };%n");
 163 
 164             // suppFlags
 165             out.format("%n    static String b2cIsSuppStr =%n");
 166             out.format(toCharArray(suppFlag, b1Min, b1Max, b2Min, b2Max),
 167                        ";");
 168             out.format("}");
 169             out.close();
 170 
 171             ps.println(sb.toString());
 172             ps.close();
 173         } catch (Exception x) {
 174             x.printStackTrace();
 175         }
 176     }
 177 }