1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package build.tools.charsetmapping;
  27 
  28 import java.io.*;
  29 import java.util.Arrays;
  30 import java.util.ArrayList;
  31 import java.util.Scanner;
  32 import java.util.Formatter;
  33 import java.util.regex.*;
  34 import java.nio.charset.*;
  35 import static build.tools.charsetmapping.Utils.*;
  36 
  37 public class HKSCS {
  38 
  39     // HKSCS2001.map has the third column for "UnicodeAlternate", which
  40     // is for c->b non-roundtrip mapping.
  41     // For HKSCS2008, those non-roundtrip mappings are in .nr file
  42     private static Pattern hkscs =
  43         Pattern.compile("(?:0x)?+(\\p{XDigit}++)\\s++(?:0x|U\\+)?+(\\p{XDigit}++)?\\s*+(?:0x|U\\+)?(\\p{XDigit}++)?\\s*+.*");
  44 
  45     static void genClass(String args[]) throws Exception {
  46 
  47         // hkscs2008
  48         genClass0(new FileInputStream(new File(args[0], "HKSCS2008.map")),
  49                   new FileInputStream(new File(args[0], "HKSCS2008.c2b")),
  50                   new PrintStream(new File(args[1], "HKSCSMapping.java"),
  51                                   "ISO-8859-1"),
  52                   "HKSCSMapping",
  53                   getCopyright(new File(args[3])));
  54 
  55 
  56         // xp2001
  57         genClass0(new FileInputStream(new File(args[0], "HKSCS_XP.map")),
  58                   null,
  59                   new PrintStream(new File(args[1], "HKSCS_XPMapping.java"),
  60                                   "ISO-8859-1"),
  61                   "HKSCS_XPMapping",
  62                   getCopyright(new File(args[3])));
  63 
  64         // hkscs2001
  65         genClass0(new FileInputStream(new File(args[0], "HKSCS2001.map")),
  66                   new FileInputStream(new File(args[0], "HKSCS2001.c2b")),
  67                   new PrintStream(new File(args[1], "HKSCS2001Mapping.java"),
  68                                   "ISO-8859-1"),
  69                   "HKSCS2001Mapping",
  70                   getCopyright(new File(args[3])));
  71     }
  72 
  73     static void genClass0(InputStream isB2C,
  74                           InputStream isC2B,
  75                           PrintStream ps,
  76                           String clzName,
  77                           String copyright)
  78         throws Exception
  79     {
  80         // ranges of byte1 and byte2, something should come from a "config" file
  81         int b1Min = 0x87;
  82         int b1Max = 0xfe;
  83         int b2Min = 0x40;
  84         int b2Max = 0xfe;
  85 
  86         try {
  87             char[] bmp = new char[0x10000];
  88             char[] supp = new char[0x10000];
  89 
  90             boolean[] b2cBmp = new boolean[0x100];
  91             boolean[] b2cSupp = new boolean[0x100];
  92             // pua should be in range of e000-f8ff. Expand
  93             // it to 0xf93b becase the hkscs2001.c2b has
  94             // the f920-f93b filled
  95             //char[] pua = new char[0xF8FF - 0xE000 + 1];
  96             char[] pua = new char[0xF93b - 0xE000 + 1];
  97             boolean hasSupp = false;
  98             boolean hasPua = false;
  99 
 100             Arrays.fill(bmp, UNMAPPABLE_DECODING);
 101             Arrays.fill(supp, UNMAPPABLE_DECODING);
 102             Arrays.fill(pua, UNMAPPABLE_DECODING);
 103 
 104             Parser p = new Parser(isB2C, hkscs);
 105             Entry  e = null;
 106             while ((e = p.next()) != null) {
 107                 if (e.cp >= 0x10000) {
 108                     supp[e.bs] = (char)e.cp;
 109                     b2cSupp[e.bs>>8] = true;
 110                     hasSupp = true;
 111                 } else {
 112                     bmp[e.bs] = (char)e.cp;
 113                     b2cBmp[e.bs>>8] = true;
 114                 }
 115                 if (e.cp2 != 0 && e.cp2 >= 0xe000 && e.cp2 <= 0xf8ff) {
 116                     hasPua = true;
 117                     pua[e.cp2 - 0xE000] = (char)e.bs;
 118                 }
 119             }
 120 
 121             if (isC2B != null) {
 122                 p = new Parser(isC2B, hkscs);
 123                 e = null;
 124                 while ((e = p.next()) != null) {
 125                     pua[e.cp - 0xE000] = (char)e.bs;
 126                 }
 127                 hasPua = true;
 128             }
 129 
 130             StringBuilder sb = new StringBuilder();
 131             Output out = new Output(new Formatter(sb));
 132 
 133             out.format(copyright);
 134             out.format("%n// -- This file was mechanically generated: Do not edit! -- //%n");
 135             out.format("package sun.nio.cs.ext;%n%n");
 136             out.format("class %s {%n%n", clzName);
 137 
 138             /* hardcoded in sun.nio.cs.ext.HKSCS.java
 139             out.format("    final static int b1Min = 0x%x;%n", b1Min);
 140             out.format("    final static int b1Max = 0x%x;%n", b1Max);
 141             out.format("    final static int b2Min = 0x%x;%n", b2Min);
 142             out.format("    final static int b2Max = 0x%x;%n", b2Max);
 143             */
 144 
 145             // bmp tables
 146             out.format("%n    static final String[] b2cBmpStr = new String[] {%n");
 147             for (int i = 0; i < 0x100; i++) {
 148                 if (b2cBmp[i])
 149                     out.format(bmp, i, b2Min, b2Max, ",");
 150                 else
 151                     out.format("        null,%n");  //unmappable segments
 152             }
 153             out.format("        };%n");
 154 
 155             // supp tables
 156             out.format("%n    static final String[] b2cSuppStr =");
 157             if (hasSupp) {
 158                 out.format(" new String[] {%n");
 159                 for (int i = 0; i < 0x100; i++) {
 160                     if (b2cSupp[i])
 161                         out.format(supp, i, b2Min, b2Max, ",");
 162                     else
 163                         out.format("        null,%n");  //unmappable segments
 164                 }
 165                 out.format("        };%n");
 166             } else {
 167                 out.format(" null;%n");
 168             }
 169 
 170             // private area tables
 171             out.format("%n    final static String pua =");
 172             if (hasPua) {
 173                 out.format("%n");
 174                 out.format(pua, 0, pua.length, ";");
 175             } else {
 176                 out.format(" null;%n");
 177             }
 178             out.format("%n");
 179             out.format("}");
 180 
 181             out.close();
 182 
 183             ps.println(sb.toString());
 184             ps.close();
 185 
 186         } catch (Exception x) {
 187             x.printStackTrace();
 188         }
 189     }
 190 }