1 /*
   2  * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package build.tools.charsetmapping;
  27 
  28 import java.io.InputStream;
  29 import java.io.InputStreamReader;
  30 import java.io.OutputStream;
  31 import java.io.BufferedReader;
  32 import java.io.IOException;
  33 import java.util.regex.Matcher;
  34 import java.util.regex.Pattern;
  35 import java.util.*;
  36 
  37 public class CharsetMapping {
  38     public final static char UNMAPPABLE_DECODING = '\uFFFD';
  39     public final static int  UNMAPPABLE_ENCODING = 0xFFFD;
  40 
  41     public static class Entry {
  42         public int bs;   //byte sequence reps
  43         public int cp;   //Unicode codepoint
  44         public int cp2;  //CC of composite
  45 
  46         public Entry () {}
  47         public Entry (int bytes, int cp, int cp2) {
  48             this.bs = bytes;
  49             this.cp = cp;
  50             this.cp2 = cp2;
  51         }
  52     }
  53 
  54     static Comparator<Entry> comparatorCP =
  55         new Comparator<Entry>() {
  56             public int compare(Entry m1, Entry m2) {
  57                 return m1.cp - m2.cp;
  58             }
  59             public boolean equals(Object obj) {
  60                 return this == obj;
  61             }
  62     };
  63 
  64     public static class Parser {
  65         static final Pattern basic = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)?\\s*+.*");
  66         static final int gBS = 1;
  67         static final int gCP = 2;
  68         static final int gCP2 = 3;
  69 
  70         BufferedReader reader;
  71         boolean closed;
  72         Matcher matcher;
  73         int gbs, gcp, gcp2;
  74 
  75         public Parser (InputStream in, Pattern p, int gbs, int gcp, int gcp2)
  76             throws IOException
  77         {
  78             this.reader = new BufferedReader(new InputStreamReader(in));
  79             this.closed = false;
  80             this.matcher = p.matcher("");
  81             this.gbs = gbs;
  82             this.gcp = gcp;
  83             this.gcp2 = gcp2;
  84         }
  85 
  86         public Parser (InputStream in, Pattern p) throws IOException {
  87             this(in, p, gBS, gCP, gCP2);
  88         }
  89 
  90         public Parser (InputStream in) throws IOException {
  91             this(in, basic, gBS, gCP, gCP2);
  92         }
  93 
  94         protected boolean isDirective(String line) {
  95             return line.startsWith("#");
  96         }
  97 
  98         protected Entry parse(Matcher matcher, Entry mapping) {
  99             mapping.bs = Integer.parseInt(matcher.group(gbs), 16);
 100             mapping.cp = Integer.parseInt(matcher.group(gcp), 16);
 101             if (gcp2 <= matcher.groupCount() &&
 102                 matcher.group(gcp2) != null)
 103                 mapping.cp2 = Integer.parseInt(matcher.group(gcp2), 16);
 104             else
 105                 mapping.cp2 = 0;
 106             return mapping;
 107         }
 108 
 109         public Entry next() throws Exception {
 110             return next(new Entry());
 111         }
 112 
 113         // returns null and closes the input stream if the eof has beenreached.
 114         public Entry next(Entry mapping) throws Exception {
 115             if (closed)
 116                 return null;
 117             String line;
 118             while ((line = reader.readLine()) != null) {
 119                 if (isDirective(line))
 120                     continue;
 121                 matcher.reset(line);
 122                 if (!matcher.lookingAt()) {
 123                     //System.out.println("Missed: " + line);
 124                     continue;
 125                 }
 126                 return parse(matcher, mapping);
 127             }
 128             reader.close();
 129             closed = true;
 130             return null;
 131         }
 132     }
 133 
 134     // tags of different charset mapping tables
 135     private final static int MAP_SINGLEBYTE      = 0x1; // 0..256  : c
 136     private final static int MAP_DOUBLEBYTE1     = 0x2; // min..max: c
 137     private final static int MAP_DOUBLEBYTE2     = 0x3; // min..max: c [DB2]
 138     private final static int MAP_SUPPLEMENT      = 0x5; //           db,c
 139     private final static int MAP_SUPPLEMENT_C2B  = 0x6; //           c,db
 140     private final static int MAP_COMPOSITE       = 0x7; //           db,base,cc
 141     private final static int MAP_INDEXC2B        = 0x8; // index table of c->bb
 142 
 143     private static final void writeShort(OutputStream out, int data)
 144         throws IOException
 145     {
 146         out.write((data >>> 8) & 0xFF);
 147         out.write((data      ) & 0xFF);
 148     }
 149 
 150     private static final void writeShortArray(OutputStream out,
 151                                               int type,
 152                                               int[] array,
 153                                               int off,
 154                                               int size)   // exclusive
 155         throws IOException
 156     {
 157         writeShort(out, type);
 158         writeShort(out, size);
 159         for (int i = off; i < size; i++) {
 160             writeShort(out, array[off+i]);
 161         }
 162     }
 163 
 164     public static final void writeSIZE(OutputStream out, int data)
 165         throws IOException
 166     {
 167         out.write((data >>> 24) & 0xFF);
 168         out.write((data >>> 16) & 0xFF);
 169         out.write((data >>>  8) & 0xFF);
 170         out.write((data       ) & 0xFF);
 171     }
 172 
 173     public static void writeINDEXC2B(OutputStream out, int[] indexC2B)
 174         throws IOException
 175     {
 176         writeShort(out, MAP_INDEXC2B);
 177         writeShort(out, indexC2B.length);
 178         int off = 0;
 179         for (int i = 0; i < indexC2B.length; i++) {
 180             if (indexC2B[i] != 0) {
 181                 writeShort(out, off);
 182                 off += 256;
 183             } else {
 184                 writeShort(out, -1);
 185             }
 186         }
 187     }
 188 
 189     public static void writeSINGLEBYTE(OutputStream out, int[] sb)
 190         throws IOException
 191     {
 192         writeShortArray(out, MAP_SINGLEBYTE, sb, 0, 256);
 193     }
 194 
 195     private static void writeDOUBLEBYTE(OutputStream out,
 196                                         int type,
 197                                         int[] db,
 198                                         int b1Min, int b1Max,
 199                                         int b2Min, int b2Max)
 200         throws IOException
 201     {
 202         writeShort(out, type);
 203         writeShort(out, b1Min);
 204         writeShort(out, b1Max);
 205         writeShort(out, b2Min);
 206         writeShort(out, b2Max);
 207         writeShort(out, (b1Max - b1Min + 1) * (b2Max - b2Min + 1));
 208 
 209         for (int b1 = b1Min; b1 <= b1Max; b1++) {
 210             for (int b2 = b2Min; b2 <= b2Max; b2++) {
 211                 writeShort(out, db[b1 * 256 + b2]);
 212             }
 213         }
 214     }
 215     public static void writeDOUBLEBYTE1(OutputStream out,
 216                                         int[] db,
 217                                         int b1Min, int b1Max,
 218                                         int b2Min, int b2Max)
 219         throws IOException
 220     {
 221         writeDOUBLEBYTE(out, MAP_DOUBLEBYTE1, db, b1Min, b1Max, b2Min, b2Max);
 222     }
 223 
 224     public static void writeDOUBLEBYTE2(OutputStream out,
 225                                         int[] db,
 226                                         int b1Min, int b1Max,
 227                                         int b2Min, int b2Max)
 228         throws IOException
 229     {
 230         writeDOUBLEBYTE(out, MAP_DOUBLEBYTE2, db, b1Min, b1Max, b2Min, b2Max);
 231     }
 232 
 233     // the c2b table is output as well
 234     public static void writeSUPPLEMENT(OutputStream out, Entry[] supp, int size)
 235         throws IOException
 236     {
 237         writeShort(out, MAP_SUPPLEMENT);
 238         writeShort(out, size * 2);
 239         // db at first half, cc at the low half
 240         for (int i = 0; i < size; i++) {
 241             writeShort(out, supp[i].bs);
 242         }
 243         for (int i = 0; i < size; i++) {
 244             writeShort(out, supp[i].cp);
 245         }
 246 
 247         //c2b
 248         writeShort(out, MAP_SUPPLEMENT_C2B);
 249         writeShort(out, size*2);
 250         Arrays.sort(supp, 0, size, comparatorCP);
 251         for (int i = 0; i < size; i++) {
 252             writeShort(out, supp[i].cp);
 253         }
 254         for (int i = 0; i < size; i++) {
 255             writeShort(out, supp[i].bs);
 256         }
 257     }
 258 
 259     public static void writeCOMPOSITE(OutputStream out, Entry[] comp, int size)
 260         throws IOException
 261     {
 262         writeShort(out, MAP_COMPOSITE);
 263         writeShort(out, size*3);
 264         // comp is sorted already
 265         for (int i = 0; i < size; i++) {
 266             writeShort(out, (char)comp[i].bs);
 267             writeShort(out, (char)comp[i].cp);
 268             writeShort(out, (char)comp[i].cp2);
 269         }
 270     }
 271 }