jaxp/src/com/sun/org/apache/xml/internal/serializer/Encodings.java

Print this page
rev 565 : 8008738: Issue in com.sun.org.apache.xml.internal.serializer.Encodings causes some JCK tests to fail intermittently
Summary: Encodings.java sometimes creates EncodingInfo objects whose java names are not recognized by the Charset API. This patch fixes that issue.
Reviewed-by: joehw, alanb

*** 31,40 **** --- 31,48 ---- import java.net.URL; import java.util.Enumeration; import java.util.HashMap; import java.util.Properties; import java.util.StringTokenizer; + import java.io.IOException; + import java.net.MalformedURLException; + import java.nio.charset.Charset; + import java.nio.charset.IllegalCharsetNameException; + import java.nio.charset.UnsupportedCharsetException; + import java.util.Collections; + import java.util.Map; + import java.util.Map.Entry; import com.sun.org.apache.xalan.internal.utils.SecuritySupport; /** * Provides information about encodings. Depends on the Java runtime
*** 77,117 **** */ static Writer getWriter(OutputStream output, String encoding) throws UnsupportedEncodingException { ! for (int i = 0; i < _encodings.length; ++i) ! { ! if (_encodings[i].name.equalsIgnoreCase(encoding)) ! { ! try ! { return new BufferedWriter(new OutputStreamWriter( ! output, ! _encodings[i].javaName)); ! } ! catch (java.lang.IllegalArgumentException iae) // java 1.1.8 ! { // keep trying } - catch (UnsupportedEncodingException usee) - { - - // keep trying - } - } } - try - { return new BufferedWriter(new OutputStreamWriter(output, encoding)); } - catch (java.lang.IllegalArgumentException iae) // java 1.1.8 - { - throw new UnsupportedEncodingException(encoding); - } - } /** * Returns the last printable character for an unspecified * encoding. --- 85,106 ---- */ static Writer getWriter(OutputStream output, String encoding) throws UnsupportedEncodingException { ! final EncodingInfo ei = _encodingInfos.findEncoding(toUpperCaseFast(encoding)); ! if (ei != null) { ! try { return new BufferedWriter(new OutputStreamWriter( ! output, ei.javaName)); ! } catch (UnsupportedEncodingException usee) { // keep trying } } return new BufferedWriter(new OutputStreamWriter(output, encoding)); } /** * Returns the last printable character for an unspecified * encoding.
*** 139,155 **** static EncodingInfo getEncodingInfo(String encoding) { EncodingInfo ei; String normalizedEncoding = toUpperCaseFast(encoding); ! ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding); ! if (ei == null) ! ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding); if (ei == null) { // We shouldn't have to do this, but just in case. ei = new EncodingInfo(null,null); } return ei; } /** --- 128,156 ---- static EncodingInfo getEncodingInfo(String encoding) { EncodingInfo ei; String normalizedEncoding = toUpperCaseFast(encoding); ! ei = _encodingInfos.findEncoding(normalizedEncoding); if (ei == null) { // We shouldn't have to do this, but just in case. + try { + // This may happen if the caller tries to use + // an encoding that wasn't registered in the + // (java name)->(preferred mime name) mapping file. + // In that case we attempt to load the charset for the + // given encoding, and if that succeeds - we create a new + // EncodingInfo instance - assuming the canonical name + // of the charset can be used as the mime name. + final Charset c = Charset.forName(encoding); + final String name = c.name(); + ei = new EncodingInfo(name, name); + _encodingInfos.putEncoding(normalizedEncoding, ei); + } catch (IllegalCharsetNameException | UnsupportedCharsetException x) { ei = new EncodingInfo(null,null); } + } return ei; } /**
*** 267,278 **** * * @return ISO-style encoding string. */ private static String convertJava2MimeEncoding(String encoding) { ! EncodingInfo enc = ! (EncodingInfo) _encodingTableKeyJava.get(encoding.toUpperCase()); if (null != enc) return enc.name; return encoding; } --- 268,279 ---- * * @return ISO-style encoding string. */ private static String convertJava2MimeEncoding(String encoding) { ! final EncodingInfo enc = ! _encodingInfos.getEncodingFromJavaKey(toUpperCaseFast(encoding)); if (null != enc) return enc.name; return encoding; }
*** 283,415 **** * * @return ISO-style encoding string. */ public static String convertMime2JavaEncoding(String encoding) { ! ! for (int i = 0; i < _encodings.length; ++i) ! { ! if (_encodings[i].name.equalsIgnoreCase(encoding)) ! { ! return _encodings[i].javaName; ! } } ! return encoding; ! } ! ! /** ! * Load a list of all the supported encodings. ! * ! * System property "encodings" formatted using URL syntax may define an ! * external encodings list. Thanks to Sergey Ushakov for the code ! * contribution! ! */ ! private static EncodingInfo[] loadEncodingInfo() ! { ! try ! { String urlString = null; InputStream is = null; ! try ! { urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, ""); ! } ! catch (SecurityException e) ! { } if (urlString != null && urlString.length() > 0) { URL url = new URL(urlString); is = url.openStream(); } if (is == null) { is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE); } Properties props = new Properties(); if (is != null) { props.load(is); - is.close(); } else { // Seems to be no real need to force failure here, let the // system do its best... The issue is not really very critical, // and the output will be in any case _correct_ though maybe not // always human-friendly... :) // But maybe report/log the resource problem? // Any standard ways to report/log errors (in static context)? } ! int totalEntries = props.size(); ! int totalMimeNames = 0; ! Enumeration keys = props.keys(); ! for (int i = 0; i < totalEntries; ++i) ! { ! String javaName = (String) keys.nextElement(); ! String val = props.getProperty(javaName); ! totalMimeNames++; ! int pos = val.indexOf(' '); ! for (int j = 0; j < pos; ++j) ! if (val.charAt(j) == ',') ! totalMimeNames++; ! } ! EncodingInfo[] ret = new EncodingInfo[totalMimeNames]; ! int j = 0; ! keys = props.keys(); ! for (int i = 0; i < totalEntries; ++i) ! { ! String javaName = (String) keys.nextElement(); ! String val = props.getProperty(javaName); int pos = val.indexOf(' '); - String mimeName; //int lastPrintable; ! if (pos < 0) ! { // Maybe report/log this problem? // "Last printable character not defined for encoding " + // mimeName + " (" + val + ")" ... ! mimeName = val; //lastPrintable = 0x00FF; } - else - { //lastPrintable = // Integer.decode(val.substring(pos).trim()).intValue(); StringTokenizer st = new StringTokenizer(val.substring(0, pos), ","); ! for (boolean first = true; ! st.hasMoreTokens(); ! first = false) ! { ! mimeName = st.nextToken(); ! ret[j] = ! new EncodingInfo(mimeName, javaName); ! _encodingTableKeyMime.put( ! mimeName.toUpperCase(), ! ret[j]); ! if (first) ! _encodingTableKeyJava.put( ! javaName.toUpperCase(), ! ret[j]); ! j++; } } } ! return ret; } ! catch (java.net.MalformedURLException mue) ! { ! throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue); } ! catch (java.io.IOException ioe) ! { throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe); } } /** * Return true if the character is the high member of a surrogate pair. * <p> * This is not a public API. * @param ch the character to test --- 284,524 ---- * * @return ISO-style encoding string. */ public static String convertMime2JavaEncoding(String encoding) { ! final EncodingInfo info = _encodingInfos.findEncoding(toUpperCaseFast(encoding)); ! return info != null ? info.javaName : encoding; } ! // Using an inner static class here prevent initialization races ! // where the hash maps could be used before they were populated. ! // ! private final static class EncodingInfos { ! // These maps are final and not modified after initialization. ! private final Map<String, EncodingInfo> _encodingTableKeyJava = new HashMap<>(); ! private final Map<String, EncodingInfo> _encodingTableKeyMime = new HashMap<>(); ! // This map will be added to after initialization: make sure it's ! // thread-safe. This map should not be used frequently - only in cases ! // where the mapping requested was not declared in the Encodings.properties ! // file. ! private final Map<String, EncodingInfo> _encodingDynamicTable = ! Collections.synchronizedMap(new HashMap<String, EncodingInfo>()); ! ! private EncodingInfos() { ! loadEncodingInfo(); ! } ! ! // Opens the file/resource containing java charset name -> preferred mime ! // name mapping and returns it as an InputStream. ! private InputStream openEncodingsFileStream() throws MalformedURLException, IOException { String urlString = null; InputStream is = null; ! try { urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, ""); ! } catch (SecurityException e) { } if (urlString != null && urlString.length() > 0) { URL url = new URL(urlString); is = url.openStream(); } if (is == null) { is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE); } + return is; + } + // Loads the Properties resource containing the mapping: + // java charset name -> preferred mime name + // and returns it. + private Properties loadProperties() throws MalformedURLException, IOException { Properties props = new Properties(); + final InputStream is = openEncodingsFileStream(); + try { if (is != null) { props.load(is); } else { // Seems to be no real need to force failure here, let the // system do its best... The issue is not really very critical, // and the output will be in any case _correct_ though maybe not // always human-friendly... :) // But maybe report/log the resource problem? // Any standard ways to report/log errors (in static context)? } + } finally { + if (is != null) { + is.close(); + } + } + return props; + } ! // Parses the mime list associated to a java charset name. ! // The first mime name in the list is supposed to be the preferred ! // mime name. ! private String[] parseMimeTypes(String val) { int pos = val.indexOf(' '); //int lastPrintable; ! if (pos < 0) { // Maybe report/log this problem? // "Last printable character not defined for encoding " + // mimeName + " (" + val + ")" ... ! return new String[] { val }; //lastPrintable = 0x00FF; } //lastPrintable = // Integer.decode(val.substring(pos).trim()).intValue(); StringTokenizer st = new StringTokenizer(val.substring(0, pos), ","); ! String[] values = new String[st.countTokens()]; ! for (int i=0; st.hasMoreTokens(); i++) { ! values[i] = st.nextToken(); ! } ! return values; ! } ! ! // This method here attempts to find the canonical charset name for the ! // the given name - which is supposed to be either a java name or a mime ! // name. ! // For that, it attempts to load the charset using the given name, and ! // then returns the charset's canonical name. ! // If the charset could not be loaded from the given name, ! // the method returns null. ! private String findCharsetNameFor(String name) { ! try { ! return Charset.forName(name).name(); ! } catch (Exception x) { ! return null; } } + + // This method here attempts to find the canonical charset name for the + // the set javaName+mimeNames - which are supposed to all refer to the + // same charset. + // For that it attempts to load the charset using the javaName, and if + // not found, attempts again using each of the mime names in turn. + // If the charset could be loaded from the javaName, then the javaName + // itself is returned as charset name. Otherwise, each of the mime names + // is tried in turn, until a charset can be loaded from one of the names, + // and the loaded charset's canonical name is returned. + // If no charset can be loaded from either the javaName or one of the + // mime names, then null is returned. + // + // Note that the returned name is the 'java' name that will be used in + // instances of EncodingInfo. + // This is important because EncodingInfo uses that 'java name' later on + // in calls to String.getBytes(javaName). + // As it happens, sometimes only one element of the set mime names/javaName + // is known by Charset: sometimes only one of the mime names is known, + // sometime only the javaName is known, sometimes all are known. + // + // By using this method here, we fix the problem where one of the mime + // names is known but the javaName is unknown, by associating the charset + // loaded from one of the mime names with the unrecognized javaName. + // + // When none of the mime names or javaName are known - there's not much we can + // do... It can mean that this encoding is not supported for this + // OS. If such a charset is ever use it will result in having all characters + // escaped. + // + private String findCharsetNameFor(String javaName, String[] mimes) { + String cs = findCharsetNameFor(javaName); + if (cs != null) return javaName; + for (String m : mimes) { + cs = findCharsetNameFor(m); + if (cs != null) break; } ! return cs; } ! ! /** ! * Loads a list of all the supported encodings. ! * ! * System property "encodings" formatted using URL syntax may define an ! * external encodings list. Thanks to Sergey Ushakov for the code ! * contribution! ! */ ! private void loadEncodingInfo() { ! try { ! // load (java name)->(preferred mime name) mapping. ! final Properties props = loadProperties(); ! ! // create instances of EncodingInfo from the loaded mapping ! Enumeration keys = props.keys(); ! Map<String, EncodingInfo> canonicals = new HashMap<>(); ! while (keys.hasMoreElements()) { ! final String javaName = (String) keys.nextElement(); ! final String[] mimes = parseMimeTypes(props.getProperty(javaName)); ! ! final String charsetName = findCharsetNameFor(javaName, mimes); ! if (charsetName != null) { ! final String kj = toUpperCaseFast(javaName); ! final String kc = toUpperCaseFast(charsetName); ! for (int i = 0; i < mimes.length; ++i) { ! final String mimeName = mimes[i]; ! final String km = toUpperCaseFast(mimeName); ! EncodingInfo info = new EncodingInfo(mimeName, charsetName); ! _encodingTableKeyMime.put(km, info); ! if (!canonicals.containsKey(kc)) { ! // canonicals will map the charset name to ! // the info containing the prefered mime name ! // (the preferred mime name is the first mime ! // name in the list). ! canonicals.put(kc, info); ! _encodingTableKeyJava.put(kc, info); } ! _encodingTableKeyJava.put(kj, info); ! } ! } else { ! // None of the java or mime names on the line were ! // recognized => this charset is not supported? ! } ! } ! ! // Fix up the _encodingTableKeyJava so that the info mapped to ! // the java name contains the preferred mime name. ! // (a given java name can correspond to several mime name, ! // but we want the _encodingTableKeyJava to point to the ! // preferred mime name). ! for (Entry<String, EncodingInfo> e : _encodingTableKeyJava.entrySet()) { ! e.setValue(canonicals.get(toUpperCaseFast(e.getValue().javaName))); ! } ! ! } catch (java.net.MalformedURLException mue) { ! throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue); ! } catch (java.io.IOException ioe) { throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe); } } + EncodingInfo findEncoding(String normalizedEncoding) { + EncodingInfo info = _encodingTableKeyJava.get(normalizedEncoding); + if (info == null) { + info = _encodingTableKeyMime.get(normalizedEncoding); + } + if (info == null) { + info = _encodingDynamicTable.get(normalizedEncoding); + } + return info; + } + + EncodingInfo getEncodingFromMimeKey(String normalizedMimeName) { + return _encodingTableKeyMime.get(normalizedMimeName); + } + + EncodingInfo getEncodingFromJavaKey(String normalizedJavaName) { + return _encodingTableKeyJava.get(normalizedJavaName); + } + + void putEncoding(String key, EncodingInfo info) { + _encodingDynamicTable.put(key, info); + } + } + /** * Return true if the character is the high member of a surrogate pair. * <p> * This is not a public API. * @param ch the character to test
*** 455,463 **** static int toCodePoint(char ch) { int codePoint = ch; return codePoint; } ! private static final HashMap _encodingTableKeyJava = new HashMap(); ! private static final HashMap _encodingTableKeyMime = new HashMap(); ! private static final EncodingInfo[] _encodings = loadEncodingInfo(); } --- 564,571 ---- static int toCodePoint(char ch) { int codePoint = ch; return codePoint; } ! private final static EncodingInfos _encodingInfos = new EncodingInfos(); ! }