1 /* 2 * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 package build.tools.charsetmapping; 27 28 import java.io.InputStream; 29 import java.io.InputStreamReader; 30 import java.io.OutputStream; 31 import java.io.BufferedReader; 32 import java.io.IOException; 33 import java.util.regex.Matcher; 34 import java.util.regex.Pattern; 35 import java.util.*; 36 37 public class CharsetMapping { 38 public final static char UNMAPPABLE_DECODING = '\uFFFD'; 39 public final static int UNMAPPABLE_ENCODING = -1; 40 41 public static class Entry { 42 public int bs; //byte sequence reps 43 public int cp; //Unicode codepoint 44 public int cp2; //CC of composite 45 46 public Entry () {} 47 public Entry (int bytes, int cp, int cp2) { 48 this.bs = bytes; 49 this.cp = cp; 50 this.cp2 = cp2; 51 } 52 } 53 54 static Comparator<Entry> comparatorCP = 55 new Comparator<Entry>() { 56 public int compare(Entry m1, Entry m2) { 57 return m1.cp - m2.cp; 58 } 59 public boolean equals(Object obj) { 60 return this == obj; 61 } 62 }; 63 64 public static class Parser { 65 static final Pattern basic = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)?\\s*+.*"); 66 static final int gBS = 1; 67 static final int gCP = 2; 68 static final int gCP2 = 3; 69 70 BufferedReader reader; 71 boolean closed; 72 Matcher matcher; 73 int gbs, gcp, gcp2; 74 75 public Parser (InputStream in, Pattern p, int gbs, int gcp, int gcp2) 76 throws IOException 77 { 78 this.reader = new BufferedReader(new InputStreamReader(in)); 79 this.closed = false; 80 this.matcher = p.matcher(""); 81 this.gbs = gbs; 82 this.gcp = gcp; 83 this.gcp2 = gcp2; 84 } 85 86 public Parser (InputStream in, Pattern p) throws IOException { 87 this(in, p, gBS, gCP, gCP2); 88 } 89 90 public Parser (InputStream in) throws IOException { 91 this(in, basic, gBS, gCP, gCP2); 92 } 93 94 protected boolean isDirective(String line) { 95 return line.startsWith("#"); 96 } 97 98 protected Entry parse(Matcher matcher, Entry mapping) { 99 mapping.bs = Integer.parseInt(matcher.group(gbs), 16); 100 mapping.cp = Integer.parseInt(matcher.group(gcp), 16); 101 if (gcp2 <= matcher.groupCount() && 102 matcher.group(gcp2) != null) 103 mapping.cp2 = Integer.parseInt(matcher.group(gcp2), 16); 104 else 105 mapping.cp2 = 0; 106 return mapping; 107 } 108 109 public Entry next() throws Exception { 110 return next(new Entry()); 111 } 112 113 // returns null and closes the input stream if the eof has beenreached. 114 public Entry next(Entry mapping) throws Exception { 115 if (closed) 116 return null; 117 String line; 118 while ((line = reader.readLine()) != null) { 119 if (isDirective(line)) 120 continue; 121 matcher.reset(line); 122 if (!matcher.lookingAt()) { 123 //System.out.println("Missed: " + line); 124 continue; 125 } 126 return parse(matcher, mapping); 127 } 128 reader.close(); 129 closed = true; 130 return null; 131 } 132 } 133 134 // tags of different charset mapping tables 135 private final static int MAP_SINGLEBYTE = 0x1; // 0..256 : c 136 private final static int MAP_DOUBLEBYTE1 = 0x2; // min..max: c 137 private final static int MAP_DOUBLEBYTE2 = 0x3; // min..max: c [DB2] 138 private final static int MAP_SUPPLEMENT = 0x5; // db,c 139 private final static int MAP_SUPPLEMENT_C2B = 0x6; // c,db 140 private final static int MAP_COMPOSITE = 0x7; // db,base,cc 141 private final static int MAP_INDEXC2B = 0x8; // index table of c->bb 142 143 private static final void writeShort(OutputStream out, int data) 144 throws IOException 145 { 146 out.write((data >>> 8) & 0xFF); 147 out.write((data ) & 0xFF); 148 } 149 150 private static final void writeShortArray(OutputStream out, 151 int type, 152 int[] array, 153 int off, 154 int size) // exclusive 155 throws IOException 156 { 157 writeShort(out, type); 158 writeShort(out, size); 159 for (int i = off; i < size; i++) { 160 writeShort(out, array[off+i]); 161 } 162 } 163 164 public static final void writeSIZE(OutputStream out, int data) 165 throws IOException 166 { 167 out.write((data >>> 24) & 0xFF); 168 out.write((data >>> 16) & 0xFF); 169 out.write((data >>> 8) & 0xFF); 170 out.write((data ) & 0xFF); 171 } 172 173 public static void writeINDEXC2B(OutputStream out, int[] indexC2B) 174 throws IOException 175 { 176 writeShort(out, MAP_INDEXC2B); 177 writeShort(out, indexC2B.length); 178 int off = 0; 179 for (int i = 0; i < indexC2B.length; i++) { 180 if (indexC2B[i] != 0) { 181 writeShort(out, off); 182 off += 256; 183 } else { 184 writeShort(out, -1); 185 } 186 } 187 } 188 189 public static void writeSINGLEBYTE(OutputStream out, int[] sb) 190 throws IOException 191 { 192 writeShortArray(out, MAP_SINGLEBYTE, sb, 0, 256); 193 } 194 195 private static void writeDOUBLEBYTE(OutputStream out, 196 int type, 197 int[] db, 198 int b1Min, int b1Max, 199 int b2Min, int b2Max) 200 throws IOException 201 { 202 writeShort(out, type); 203 writeShort(out, b1Min); 204 writeShort(out, b1Max); 205 writeShort(out, b2Min); 206 writeShort(out, b2Max); 207 writeShort(out, (b1Max - b1Min + 1) * (b2Max - b2Min + 1)); 208 209 for (int b1 = b1Min; b1 <= b1Max; b1++) { 210 for (int b2 = b2Min; b2 <= b2Max; b2++) { 211 writeShort(out, db[b1 * 256 + b2]); 212 } 213 } 214 } 215 public static void writeDOUBLEBYTE1(OutputStream out, 216 int[] db, 217 int b1Min, int b1Max, 218 int b2Min, int b2Max) 219 throws IOException 220 { 221 writeDOUBLEBYTE(out, MAP_DOUBLEBYTE1, db, b1Min, b1Max, b2Min, b2Max); 222 } 223 224 public static void writeDOUBLEBYTE2(OutputStream out, 225 int[] db, 226 int b1Min, int b1Max, 227 int b2Min, int b2Max) 228 throws IOException 229 { 230 writeDOUBLEBYTE(out, MAP_DOUBLEBYTE2, db, b1Min, b1Max, b2Min, b2Max); 231 } 232 233 // the c2b table is output as well 234 public static void writeSUPPLEMENT(OutputStream out, Entry[] supp, int size) 235 throws IOException 236 { 237 writeShort(out, MAP_SUPPLEMENT); 238 writeShort(out, size * 2); 239 // db at first half, cc at the low half 240 for (int i = 0; i < size; i++) { 241 writeShort(out, supp[i].bs); 242 } 243 for (int i = 0; i < size; i++) { 244 writeShort(out, supp[i].cp); 245 } 246 247 //c2b 248 writeShort(out, MAP_SUPPLEMENT_C2B); 249 writeShort(out, size*2); 250 Arrays.sort(supp, 0, size, comparatorCP); 251 for (int i = 0; i < size; i++) { 252 writeShort(out, supp[i].cp); 253 } 254 for (int i = 0; i < size; i++) { 255 writeShort(out, supp[i].bs); 256 } 257 } 258 259 public static void writeCOMPOSITE(OutputStream out, Entry[] comp, int size) 260 throws IOException 261 { 262 writeShort(out, MAP_COMPOSITE); 263 writeShort(out, size*3); 264 // comp is sorted already 265 for (int i = 0; i < size; i++) { 266 writeShort(out, (char)comp[i].bs); 267 writeShort(out, (char)comp[i].cp); 268 writeShort(out, (char)comp[i].cp2); 269 } 270 } 271 }