1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 1999-2004 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 /* 21 * $Id: Encodings.java,v 1.3 2005/09/28 13:49:04 pvedula Exp $ 22 */ 23 package com.sun.org.apache.xml.internal.serializer; 24 25 import java.io.InputStream; 26 import java.io.OutputStream; 27 import java.io.OutputStreamWriter; 28 import java.io.UnsupportedEncodingException; 29 import java.io.Writer; 30 import java.io.BufferedWriter; 31 import java.net.URL; 32 import java.util.Enumeration; 33 import java.util.HashMap; 34 import java.util.Properties; 35 import java.util.StringTokenizer; 36 37 import com.sun.org.apache.xalan.internal.utils.SecuritySupport; 38 39 /** 40 * Provides information about encodings. Depends on the Java runtime 41 * to provides writers for the different encodings, but can be used 42 * to override encoding names and provide the last printable character 43 * for each encoding. 44 * 45 * @version $Revision: 1.11 $ $Date: 2010-11-01 04:34:44 $ 46 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 47 */ 48 49 public final class Encodings extends Object 50 { 51 52 /** 53 * The last printable character for unknown encodings. 54 */ 55 private static final int m_defaultLastPrintable = 0x7F; 56 57 /** 58 * Standard filename for properties file with encodings data. 59 */ 60 private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties"; 61 62 /** 63 * Standard filename for properties file with encodings data. 64 */ 65 private static final String ENCODINGS_PROP = "com.sun.org.apache.xalan.internal.serialize.encodings"; 66 67 68 /** 69 * Returns a writer for the specified encoding based on 70 * an output stream. 71 * 72 * @param output The output stream 73 * @param encoding The encoding 74 * @return A suitable writer 75 * @throws UnsupportedEncodingException There is no convertor 76 * to support this encoding 77 */ 78 static Writer getWriter(OutputStream output, String encoding) 79 throws UnsupportedEncodingException 80 { 81 82 for (int i = 0; i < _encodings.length; ++i) 83 { 84 if (_encodings[i].name.equalsIgnoreCase(encoding)) 85 { 86 try 87 { 88 return new BufferedWriter(new OutputStreamWriter( 89 output, 90 _encodings[i].javaName)); 91 } 92 catch (java.lang.IllegalArgumentException iae) // java 1.1.8 93 { 94 // keep trying 95 } 96 catch (UnsupportedEncodingException usee) 97 { 98 99 // keep trying 100 } 101 } 102 } 103 104 try 105 { 106 return new BufferedWriter(new OutputStreamWriter(output, encoding)); 107 } 108 catch (java.lang.IllegalArgumentException iae) // java 1.1.8 109 { 110 throw new UnsupportedEncodingException(encoding); 111 } 112 } 113 114 115 /** 116 * Returns the last printable character for an unspecified 117 * encoding. 118 * 119 * @return the default size 120 */ 121 public static int getLastPrintable() 122 { 123 return m_defaultLastPrintable; 124 } 125 126 127 128 /** 129 * Returns the EncodingInfo object for the specified 130 * encoding. 131 * <p> 132 * This is not a public API. 133 * 134 * @param encoding The encoding 135 * @return The object that is used to determine if 136 * characters are in the given encoding. 137 * @xsl.usage internal 138 */ 139 static EncodingInfo getEncodingInfo(String encoding) 140 { 141 EncodingInfo ei; 142 143 String normalizedEncoding = toUpperCaseFast(encoding); 144 ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding); 145 if (ei == null) 146 ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding); 147 if (ei == null) { 148 // We shouldn't have to do this, but just in case. 149 ei = new EncodingInfo(null,null); 150 } 151 152 return ei; 153 } 154 155 /** 156 * A fast and cheap way to uppercase a String that is 157 * only made of printable ASCII characters. 158 * <p> 159 * This is not a public API. 160 * @param s a String of ASCII characters 161 * @return an uppercased version of the input String, 162 * possibly the same String. 163 * @xsl.usage internal 164 */ 165 static private String toUpperCaseFast(final String s) { 166 167 boolean different = false; 168 final int mx = s.length(); 169 char[] chars = new char[mx]; 170 for (int i=0; i < mx; i++) { 171 char ch = s.charAt(i); 172 // is the character a lower case ASCII one? 173 if ('a' <= ch && ch <= 'z') { 174 // a cheap and fast way to uppercase that is good enough 175 ch = (char) (ch + ('A' - 'a')); 176 different = true; // the uppercased String is different 177 } 178 chars[i] = ch; 179 } 180 181 // A little optimization, don't call String.valueOf() if 182 // the uppercased string is the same as the input string. 183 final String upper; 184 if (different) 185 upper = String.valueOf(chars); 186 else 187 upper = s; 188 189 return upper; 190 } 191 192 /** The default encoding, ISO style, ISO style. */ 193 static final String DEFAULT_MIME_ENCODING = "UTF-8"; 194 195 /** 196 * Get the proper mime encoding. From the XSLT recommendation: "The encoding 197 * attribute specifies the preferred encoding to use for outputting the result 198 * tree. XSLT processors are required to respect values of UTF-8 and UTF-16. 199 * For other values, if the XSLT processor does not support the specified 200 * encoding it may signal an error; if it does not signal an error it should 201 * use UTF-8 or UTF-16 instead. The XSLT processor must not use an encoding 202 * whose name does not match the EncName production of the XML Recommendation 203 * [XML]. If no encoding attribute is specified, then the XSLT processor should 204 * use either UTF-8 or UTF-16." 205 * 206 * @param encoding Reference to java-style encoding string, which may be null, 207 * in which case a default will be found. 208 * 209 * @return The ISO-style encoding string, or null if failure. 210 */ 211 static String getMimeEncoding(String encoding) 212 { 213 214 if (null == encoding) 215 { 216 try 217 { 218 219 // Get the default system character encoding. This may be 220 // incorrect if they passed in a writer, but right now there 221 // seems to be no way to get the encoding from a writer. 222 encoding = SecuritySupport.getSystemProperty("file.encoding", "UTF8"); 223 224 if (null != encoding) 225 { 226 227 /* 228 * See if the mime type is equal to UTF8. If you don't 229 * do that, then convertJava2MimeEncoding will convert 230 * 8859_1 to "ISO-8859-1", which is not what we want, 231 * I think, and I don't think I want to alter the tables 232 * to convert everything to UTF-8. 233 */ 234 String jencoding = 235 (encoding.equalsIgnoreCase("Cp1252") 236 || encoding.equalsIgnoreCase("ISO8859_1") 237 || encoding.equalsIgnoreCase("8859_1") 238 || encoding.equalsIgnoreCase("UTF8")) 239 ? DEFAULT_MIME_ENCODING 240 : convertJava2MimeEncoding(encoding); 241 242 encoding = 243 (null != jencoding) ? jencoding : DEFAULT_MIME_ENCODING; 244 } 245 else 246 { 247 encoding = DEFAULT_MIME_ENCODING; 248 } 249 } 250 catch (SecurityException se) 251 { 252 encoding = DEFAULT_MIME_ENCODING; 253 } 254 } 255 else 256 { 257 encoding = convertJava2MimeEncoding(encoding); 258 } 259 260 return encoding; 261 } 262 263 /** 264 * Try the best we can to convert a Java encoding to a XML-style encoding. 265 * 266 * @param encoding non-null reference to encoding string, java style. 267 * 268 * @return ISO-style encoding string. 269 */ 270 private static String convertJava2MimeEncoding(String encoding) 271 { 272 EncodingInfo enc = 273 (EncodingInfo) _encodingTableKeyJava.get(encoding.toUpperCase()); 274 if (null != enc) 275 return enc.name; 276 return encoding; 277 } 278 279 /** 280 * Try the best we can to convert a Java encoding to a XML-style encoding. 281 * 282 * @param encoding non-null reference to encoding string, java style. 283 * 284 * @return ISO-style encoding string. 285 */ 286 public static String convertMime2JavaEncoding(String encoding) 287 { 288 289 for (int i = 0; i < _encodings.length; ++i) 290 { 291 if (_encodings[i].name.equalsIgnoreCase(encoding)) 292 { 293 return _encodings[i].javaName; 294 } 295 } 296 297 return encoding; 298 } 299 300 /** 301 * Load a list of all the supported encodings. 302 * 303 * System property "encodings" formatted using URL syntax may define an 304 * external encodings list. Thanks to Sergey Ushakov for the code 305 * contribution! 306 */ 307 private static EncodingInfo[] loadEncodingInfo() 308 { 309 try 310 { 311 String urlString = null; 312 InputStream is = null; 313 314 try 315 { 316 urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, ""); 317 } 318 catch (SecurityException e) 319 { 320 } 321 322 if (urlString != null && urlString.length() > 0) { 323 URL url = new URL(urlString); 324 is = url.openStream(); 325 } 326 327 if (is == null) { 328 is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE); 329 } 330 331 Properties props = new Properties(); 332 if (is != null) { 333 props.load(is); 334 is.close(); 335 } else { 336 // Seems to be no real need to force failure here, let the 337 // system do its best... The issue is not really very critical, 338 // and the output will be in any case _correct_ though maybe not 339 // always human-friendly... :) 340 // But maybe report/log the resource problem? 341 // Any standard ways to report/log errors (in static context)? 342 } 343 344 int totalEntries = props.size(); 345 int totalMimeNames = 0; 346 Enumeration keys = props.keys(); 347 for (int i = 0; i < totalEntries; ++i) 348 { 349 String javaName = (String) keys.nextElement(); 350 String val = props.getProperty(javaName); 351 totalMimeNames++; 352 int pos = val.indexOf(' '); 353 for (int j = 0; j < pos; ++j) 354 if (val.charAt(j) == ',') 355 totalMimeNames++; 356 } 357 EncodingInfo[] ret = new EncodingInfo[totalMimeNames]; 358 int j = 0; 359 keys = props.keys(); 360 for (int i = 0; i < totalEntries; ++i) 361 { 362 String javaName = (String) keys.nextElement(); 363 String val = props.getProperty(javaName); 364 int pos = val.indexOf(' '); 365 String mimeName; 366 //int lastPrintable; 367 if (pos < 0) 368 { 369 // Maybe report/log this problem? 370 // "Last printable character not defined for encoding " + 371 // mimeName + " (" + val + ")" ... 372 mimeName = val; 373 //lastPrintable = 0x00FF; 374 } 375 else 376 { 377 //lastPrintable = 378 // Integer.decode(val.substring(pos).trim()).intValue(); 379 StringTokenizer st = 380 new StringTokenizer(val.substring(0, pos), ","); 381 for (boolean first = true; 382 st.hasMoreTokens(); 383 first = false) 384 { 385 mimeName = st.nextToken(); 386 ret[j] = 387 new EncodingInfo(mimeName, javaName); 388 _encodingTableKeyMime.put( 389 mimeName.toUpperCase(), 390 ret[j]); 391 if (first) 392 _encodingTableKeyJava.put( 393 javaName.toUpperCase(), 394 ret[j]); 395 j++; 396 } 397 } 398 } 399 return ret; 400 } 401 catch (java.net.MalformedURLException mue) 402 { 403 throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue); 404 } 405 catch (java.io.IOException ioe) 406 { 407 throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe); 408 } 409 } 410 411 /** 412 * Return true if the character is the high member of a surrogate pair. 413 * <p> 414 * This is not a public API. 415 * @param ch the character to test 416 * @xsl.usage internal 417 */ 418 static boolean isHighUTF16Surrogate(char ch) { 419 return ('\uD800' <= ch && ch <= '\uDBFF'); 420 } 421 /** 422 * Return true if the character is the low member of a surrogate pair. 423 * <p> 424 * This is not a public API. 425 * @param ch the character to test 426 * @xsl.usage internal 427 */ 428 static boolean isLowUTF16Surrogate(char ch) { 429 return ('\uDC00' <= ch && ch <= '\uDFFF'); 430 } 431 /** 432 * Return the unicode code point represented by the high/low surrogate pair. 433 * <p> 434 * This is not a public API. 435 * @param highSurrogate the high char of the high/low pair 436 * @param lowSurrogate the low char of the high/low pair 437 * @xsl.usage internal 438 */ 439 static int toCodePoint(char highSurrogate, char lowSurrogate) { 440 int codePoint = 441 ((highSurrogate - 0xd800) << 10) 442 + (lowSurrogate - 0xdc00) 443 + 0x10000; 444 return codePoint; 445 } 446 /** 447 * Return the unicode code point represented by the char. 448 * A bit of a dummy method, since all it does is return the char, 449 * but as an int value. 450 * <p> 451 * This is not a public API. 452 * @param ch the char. 453 * @xsl.usage internal 454 */ 455 static int toCodePoint(char ch) { 456 int codePoint = ch; 457 return codePoint; 458 } 459 460 private static final HashMap _encodingTableKeyJava = new HashMap(); 461 private static final HashMap _encodingTableKeyMime = new HashMap(); 462 private static final EncodingInfo[] _encodings = loadEncodingInfo(); 463 }