1 /* 2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package build.tools.charsetmapping; 27 28 import java.io.File; 29 import java.io.InputStream; 30 import java.io.InputStreamReader; 31 import java.io.OutputStream; 32 import java.io.BufferedReader; 33 import java.io.IOException; 34 import java.util.regex.Matcher; 35 import java.util.regex.Pattern; 36 import java.util.Scanner; 37 import java.util.Formatter; 38 39 public class Utils { 40 41 public final static char UNMAPPABLE_DECODING = '\uFFFD'; 42 public final static int UNMAPPABLE_ENCODING = 0xFFFD; 43 44 public static class Entry { 45 public int bs; //byte sequence reps 46 public int cp; //Unicode codepoint 47 public int cp2; //CC of composite 48 49 public Entry () {} 50 public Entry (int bytes, int cp, int cp2) { 51 this.bs = bytes; 52 this.cp = cp; 53 this.cp2 = cp2; 54 } 55 } 56 57 public static class Parser { 58 static final Pattern basic = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)?\\s*+.*"); 59 static final int gBS = 1; 60 static final int gCP = 2; 61 static final int gCP2 = 3; 62 63 BufferedReader reader; 64 boolean closed; 65 Matcher matcher; 66 int gbs, gcp, gcp2; 67 68 public Parser (InputStream in, Pattern p, int gbs, int gcp, int gcp2) 69 throws IOException 70 { 71 this.reader = new BufferedReader(new InputStreamReader(in)); 72 this.closed = false; 73 this.matcher = p.matcher(""); 74 this.gbs = gbs; 75 this.gcp = gcp; 76 this.gcp2 = gcp2; 77 } 78 79 public Parser (InputStream in, Pattern p) throws IOException { 80 this(in, p, gBS, gCP, gCP2); 81 } 82 83 public Parser (InputStream in) throws IOException { 84 this(in, basic, gBS, gCP, gCP2); 85 } 86 87 protected boolean isDirective(String line) { 88 return line.startsWith("#"); 89 } 90 91 protected Entry parse(Matcher matcher, Entry mapping) { 92 mapping.bs = Integer.parseInt(matcher.group(gbs), 16); 93 mapping.cp = Integer.parseInt(matcher.group(gcp), 16); 94 if (gcp2 <= matcher.groupCount() && 95 matcher.group(gcp2) != null) 96 mapping.cp2 = Integer.parseInt(matcher.group(gcp2), 16); 97 else 98 mapping.cp2 = 0; 99 return mapping; 100 } 101 102 public Entry next() throws Exception { 103 return next(new Entry()); 104 } 105 106 // returns null and closes the input stream if the eof has beenreached. 107 public Entry next(Entry mapping) throws Exception { 108 if (closed) 109 return null; 110 String line; 111 while ((line = reader.readLine()) != null) { 112 if (isDirective(line)) 113 continue; 114 matcher.reset(line); 115 if (!matcher.lookingAt()) { 116 //System.out.println("Missed: " + line); 117 continue; 118 } 119 return parse(matcher, mapping); 120 } 121 reader.close(); 122 closed = true; 123 return null; 124 } 125 } 126 127 public static class Output { 128 private Formatter out; 129 130 public Output(Formatter out) { 131 this.out = out; 132 } 133 134 public void close() { 135 out.close(); 136 } 137 138 private void toChar(String fmt, char c) { 139 switch (c) { 140 case '\b': 141 out.format("\\b"); break; 142 case '\t': 143 out.format("\\t"); break; 144 case '\n': 145 out.format("\\n"); break; 146 case '\f': 147 out.format("\\f"); break; 148 case '\r': 149 out.format("\\r"); break; 150 case '\"': 151 out.format("\\\""); break; 152 case '\'': 153 out.format("\\'"); break; 154 case '\\': 155 out.format("\\\\"); break; 156 default: 157 out.format(fmt, c & 0xffff); 158 } 159 } 160 161 public void format(String fmt, Object ... args) { 162 out.format(fmt, args); 163 } 164 165 public void format(char[] cc, int off, int end, String closure) { 166 while (off < end) { 167 out.format(" \""); 168 for (int j = 0; j < 8; j++) { 169 if (off == end) 170 break; 171 toChar("\\u%04X", cc[off++]); 172 } 173 if (off == end) 174 out.format("\" %s%n", closure); 175 else 176 out.format("\" + %n"); 177 } 178 } 179 180 public void format(char[] cc, String closure) { 181 format(cc, 0, cc.length, closure); 182 } 183 184 public void format(char[] db, int b1, int b2Min, int b2Max, 185 String closure) 186 { 187 char[] cc = new char[b2Max - b2Min + 1]; 188 int off = 0; 189 for (int b2 = b2Min; b2 <= b2Max; b2++) { 190 cc[off++] = db[(b1 << 8) | b2]; 191 } 192 format(cc, 0, cc.length, closure); 193 } 194 195 public void format(char[] date) { 196 int off = 0; 197 int end = date.length; 198 while (off < end) { 199 out.format(" "); 200 for (int j = 0; j < 8 && off < end; j++) { 201 toChar("'\\u%04X',", date[off++]); 202 } 203 out.format("%n"); 204 } 205 } 206 } 207 208 public static String getCopyright(File f) throws IOException { 209 Scanner s = new Scanner(f, "ISO-8859-1"); 210 StringBuilder sb = new StringBuilder(); 211 while (s.hasNextLine()) { 212 String ln = s.nextLine(); 213 sb.append(ln + "\n"); 214 // assume we have the copyright as the first comment 215 if (ln.matches("^\\s\\*\\/$")) 216 break; 217 } 218 s.close(); 219 return sb.toString(); 220 } 221 }