jaxp/src/com/sun/org/apache/xml/internal/serializer/Encodings.java

Print this page
rev 565 : 8008738: Issue in com.sun.org.apache.xml.internal.serializer.Encodings causes some JCK tests to fail intermittently
Summary: Encodings.java sometimes creates EncodingInfo objects whose java names are not recognized by the Charset API. This patch fixes that issue.
Reviewed-by: joehw, alanb

@@ -31,10 +31,18 @@
 import java.net.URL;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Properties;
 import java.util.StringTokenizer;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Map.Entry;
 
 import com.sun.org.apache.xalan.internal.utils.SecuritySupport;
 
 /**
  * Provides information about encodings. Depends on the Java runtime

@@ -77,41 +85,22 @@
      */
     static Writer getWriter(OutputStream output, String encoding)
         throws UnsupportedEncodingException
     {
 
-        for (int i = 0; i < _encodings.length; ++i)
-        {
-            if (_encodings[i].name.equalsIgnoreCase(encoding))
-            {
-                try
-                {
+        final EncodingInfo ei = _encodingInfos.findEncoding(toUpperCaseFast(encoding));
+        if (ei != null) {
+            try {
                     return new BufferedWriter(new OutputStreamWriter(
-                        output,
-                        _encodings[i].javaName));
-                }
-                catch (java.lang.IllegalArgumentException iae) // java 1.1.8
-                {
+                        output, ei.javaName));
+            } catch (UnsupportedEncodingException usee) {
                     // keep trying
                 }
-                catch (UnsupportedEncodingException usee)
-                {
-
-                    // keep trying
-                }
-            }
         }
 
-        try
-        {
             return new BufferedWriter(new OutputStreamWriter(output, encoding));
         }
-        catch (java.lang.IllegalArgumentException iae) // java 1.1.8
-        {
-            throw new UnsupportedEncodingException(encoding);
-        }
-    }
 
 
     /**
      * Returns the last printable character for an unspecified
      * encoding.

@@ -139,17 +128,29 @@
     static EncodingInfo getEncodingInfo(String encoding)
     {
         EncodingInfo ei;
 
         String normalizedEncoding = toUpperCaseFast(encoding);
-        ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding);
-        if (ei == null)
-            ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding);
+        ei = _encodingInfos.findEncoding(normalizedEncoding);
         if (ei == null) {
             // We shouldn't have to do this, but just in case.
+            try {
+                // This may happen if the caller tries to use
+                // an encoding that wasn't registered in the
+                // (java name)->(preferred mime name) mapping file.
+                // In that case we attempt to load the charset for the
+                // given encoding, and if that succeeds - we create a new
+                // EncodingInfo instance - assuming the canonical name
+                // of the charset can be used as the mime name.
+                final Charset c = Charset.forName(encoding);
+                final String name = c.name();
+                ei = new EncodingInfo(name, name);
+                _encodingInfos.putEncoding(normalizedEncoding, ei);
+            } catch (IllegalCharsetNameException | UnsupportedCharsetException x) {
             ei = new EncodingInfo(null,null);
         }
+        }
 
         return ei;
     }
 
     /**

@@ -267,12 +268,12 @@
      *
      * @return ISO-style encoding string.
      */
     private static String convertJava2MimeEncoding(String encoding)
     {
-        EncodingInfo enc =
-            (EncodingInfo) _encodingTableKeyJava.get(encoding.toUpperCase());
+        final EncodingInfo enc =
+             _encodingInfos.getEncodingFromJavaKey(toUpperCaseFast(encoding));
         if (null != enc)
             return enc.name;
         return encoding;
     }
 

@@ -283,133 +284,241 @@
      *
      * @return ISO-style encoding string.
      */
     public static String convertMime2JavaEncoding(String encoding)
     {
-
-        for (int i = 0; i < _encodings.length; ++i)
-        {
-            if (_encodings[i].name.equalsIgnoreCase(encoding))
-            {
-                return _encodings[i].javaName;
-            }
+        final EncodingInfo info = _encodingInfos.findEncoding(toUpperCaseFast(encoding));
+        return info != null ? info.javaName : encoding;
         }
 
-        return encoding;
-    }
-
-    /**
-     * Load a list of all the supported encodings.
-     *
-     * System property "encodings" formatted using URL syntax may define an
-     * external encodings list. Thanks to Sergey Ushakov for the code
-     * contribution!
-     */
-    private static EncodingInfo[] loadEncodingInfo()
-    {
-        try
-        {
+    // Using an inner static class here prevent initialization races
+    // where the hash maps could be used before they were populated.
+    //
+    private final static class EncodingInfos {
+        // These maps are final and not modified after initialization.
+        private final Map<String, EncodingInfo> _encodingTableKeyJava = new HashMap<>();
+        private final Map<String, EncodingInfo> _encodingTableKeyMime = new HashMap<>();
+        // This map will be added to after initialization: make sure it's
+        // thread-safe. This map should not be used frequently - only in cases
+        // where the mapping requested was not declared in the Encodings.properties
+        // file.
+        private final Map<String, EncodingInfo> _encodingDynamicTable =
+                Collections.synchronizedMap(new HashMap<String, EncodingInfo>());
+
+        private EncodingInfos() {
+            loadEncodingInfo();
+        }
+
+        // Opens the file/resource containing java charset name -> preferred mime
+        // name mapping and returns it as an InputStream.
+        private InputStream openEncodingsFileStream() throws MalformedURLException, IOException {
             String urlString = null;
             InputStream is = null;
 
-            try
-            {
+            try {
                 urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, "");
-            }
-            catch (SecurityException e)
-            {
+            } catch (SecurityException e) {
             }
 
             if (urlString != null && urlString.length() > 0) {
                 URL url = new URL(urlString);
                 is = url.openStream();
             }
 
             if (is == null) {
                 is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE);
             }
+            return is;
+        }
 
+        // Loads the Properties resource containing the mapping:
+        //    java charset name -> preferred mime name
+        // and returns it.
+        private Properties loadProperties() throws MalformedURLException, IOException {
             Properties props = new Properties();
+            final InputStream is = openEncodingsFileStream();
+            try {
             if (is != null) {
                 props.load(is);
-                is.close();
             } else {
                 // Seems to be no real need to force failure here, let the
                 // system do its best... The issue is not really very critical,
                 // and the output will be in any case _correct_ though maybe not
                 // always human-friendly... :)
                 // But maybe report/log the resource problem?
                 // Any standard ways to report/log errors (in static context)?
             }
+            } finally {
+                if (is != null) {
+                    is.close();
+                }
+            }
+            return props;
+        }
 
-            int totalEntries = props.size();
-            int totalMimeNames = 0;
-            Enumeration keys = props.keys();
-            for (int i = 0; i < totalEntries; ++i)
-            {
-                String javaName = (String) keys.nextElement();
-                String val = props.getProperty(javaName);
-                totalMimeNames++;
-                int pos = val.indexOf(' ');
-                for (int j = 0; j < pos; ++j)
-                    if (val.charAt(j) == ',')
-                        totalMimeNames++;
-            }
-            EncodingInfo[] ret = new EncodingInfo[totalMimeNames];
-            int j = 0;
-            keys = props.keys();
-            for (int i = 0; i < totalEntries; ++i)
-            {
-                String javaName = (String) keys.nextElement();
-                String val = props.getProperty(javaName);
+        // Parses the mime list associated to a java charset name.
+        // The first mime name in the list is supposed to be the preferred
+        // mime name.
+        private String[] parseMimeTypes(String val) {
                 int pos = val.indexOf(' ');
-                String mimeName;
                 //int lastPrintable;
-                if (pos < 0)
-                {
+            if (pos < 0) {
                     // Maybe report/log this problem?
                     //  "Last printable character not defined for encoding " +
                     //  mimeName + " (" + val + ")" ...
-                    mimeName = val;
+                return new String[] { val };
                     //lastPrintable = 0x00FF;
                 }
-                else
-                {
                     //lastPrintable =
                     //    Integer.decode(val.substring(pos).trim()).intValue();
                     StringTokenizer st =
                         new StringTokenizer(val.substring(0, pos), ",");
-                    for (boolean first = true;
-                        st.hasMoreTokens();
-                        first = false)
-                    {
-                        mimeName = st.nextToken();
-                        ret[j] =
-                            new EncodingInfo(mimeName, javaName);
-                        _encodingTableKeyMime.put(
-                            mimeName.toUpperCase(),
-                            ret[j]);
-                        if (first)
-                            _encodingTableKeyJava.put(
-                                javaName.toUpperCase(),
-                                ret[j]);
-                        j++;
+            String[] values = new String[st.countTokens()];
+            for (int i=0; st.hasMoreTokens(); i++) {
+                values[i] = st.nextToken();
+            }
+            return values;
+        }
+
+        // This method here attempts to find the canonical charset name for the
+        // the given name - which is supposed to be either a java name or a mime
+        // name.
+        // For that, it attempts to load the charset using the given name, and
+        // then returns the charset's canonical name.
+        // If the charset could not be loaded from the given name,
+        // the method returns null.
+        private String findCharsetNameFor(String name) {
+            try {
+                return Charset.forName(name).name();
+            } catch (Exception x) {
+                return null;
                     }
                 }
+
+        // This method here attempts to find the canonical charset name for the
+        // the set javaName+mimeNames - which are supposed to all refer to the
+        // same charset.
+        // For that it attempts to load the charset using the javaName, and if
+        // not found, attempts again using each of the mime names in turn.
+        // If the charset could be loaded from the javaName, then the javaName
+        // itself is returned as charset name. Otherwise, each of the mime names
+        // is tried in turn, until a charset can be loaded from one of the names,
+        // and the loaded charset's canonical name is returned.
+        // If no charset can be loaded from either the javaName or one of the
+        // mime names, then null is returned.
+        //
+        // Note that the returned name is the 'java' name that will be used in
+        // instances of EncodingInfo.
+        // This is important because EncodingInfo uses that 'java name' later on
+        // in calls to String.getBytes(javaName).
+        // As it happens, sometimes only one element of the set mime names/javaName
+        // is known by Charset: sometimes only one of the mime names is known,
+        // sometime only the javaName is known, sometimes all are known.
+        //
+        // By using this method here, we fix the problem where one of the mime
+        // names is known but the javaName is unknown, by associating the charset
+        // loaded from one of the mime names with the unrecognized javaName.
+        //
+        // When none of the mime names or javaName are known - there's not much we can
+        // do... It can mean that this encoding is not supported for this
+        // OS. If such a charset is ever use it will result in having all characters
+        // escaped.
+        //
+        private String findCharsetNameFor(String javaName, String[] mimes) {
+            String cs = findCharsetNameFor(javaName);
+            if (cs != null) return javaName;
+            for (String m : mimes) {
+                cs = findCharsetNameFor(m);
+                if (cs != null) break;
             }
-            return ret;
+            return cs;
         }
-        catch (java.net.MalformedURLException mue)
-        {
-            throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue);
+
+        /**
+         * Loads a list of all the supported encodings.
+         *
+         * System property "encodings" formatted using URL syntax may define an
+         * external encodings list. Thanks to Sergey Ushakov for the code
+         * contribution!
+         */
+        private void loadEncodingInfo() {
+            try {
+                // load (java name)->(preferred mime name) mapping.
+                final Properties props = loadProperties();
+
+                // create instances of EncodingInfo from the loaded mapping
+                Enumeration keys = props.keys();
+                Map<String, EncodingInfo> canonicals = new HashMap<>();
+                while (keys.hasMoreElements()) {
+                    final String javaName = (String) keys.nextElement();
+                    final String[] mimes = parseMimeTypes(props.getProperty(javaName));
+
+                    final String charsetName = findCharsetNameFor(javaName, mimes);
+                    if (charsetName != null) {
+                        final String kj = toUpperCaseFast(javaName);
+                        final String kc = toUpperCaseFast(charsetName);
+                        for (int i = 0; i < mimes.length; ++i) {
+                            final String mimeName = mimes[i];
+                            final String km = toUpperCaseFast(mimeName);
+                            EncodingInfo info = new EncodingInfo(mimeName, charsetName);
+                            _encodingTableKeyMime.put(km, info);
+                            if (!canonicals.containsKey(kc)) {
+                                // canonicals will map the charset name to
+                                //   the info containing the prefered mime name
+                                //   (the preferred mime name is the first mime
+                                //   name in the list).
+                                canonicals.put(kc, info);
+                                _encodingTableKeyJava.put(kc, info);
         }
-        catch (java.io.IOException ioe)
-        {
+                            _encodingTableKeyJava.put(kj, info);
+                        }
+                    } else {
+                        // None of the java or mime names on the line were
+                        // recognized => this charset is not supported?
+                    }
+                }
+
+                // Fix up the _encodingTableKeyJava so that the info mapped to
+                // the java name contains the preferred mime name.
+                // (a given java name can correspond to several mime name,
+                //  but we want the _encodingTableKeyJava to point to the
+                //  preferred mime name).
+                for (Entry<String, EncodingInfo> e : _encodingTableKeyJava.entrySet()) {
+                    e.setValue(canonicals.get(toUpperCaseFast(e.getValue().javaName)));
+                }
+
+            } catch (java.net.MalformedURLException mue) {
+                throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue);
+            } catch (java.io.IOException ioe) {
             throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe);
         }
     }
 
+        EncodingInfo findEncoding(String normalizedEncoding) {
+            EncodingInfo info = _encodingTableKeyJava.get(normalizedEncoding);
+            if (info == null) {
+                info = _encodingTableKeyMime.get(normalizedEncoding);
+            }
+            if (info == null) {
+                info = _encodingDynamicTable.get(normalizedEncoding);
+            }
+            return info;
+        }
+
+        EncodingInfo getEncodingFromMimeKey(String normalizedMimeName) {
+            return _encodingTableKeyMime.get(normalizedMimeName);
+        }
+
+        EncodingInfo getEncodingFromJavaKey(String normalizedJavaName) {
+            return _encodingTableKeyJava.get(normalizedJavaName);
+        }
+
+        void putEncoding(String key, EncodingInfo info) {
+            _encodingDynamicTable.put(key, info);
+        }
+    }
+
     /**
      * Return true if the character is the high member of a surrogate pair.
      * <p>
      * This is not a public API.
      * @param ch the character to test

@@ -455,9 +564,8 @@
     static int toCodePoint(char ch) {
         int codePoint = ch;
         return codePoint;
     }
 
-    private static final HashMap _encodingTableKeyJava = new HashMap();
-    private static final HashMap _encodingTableKeyMime = new HashMap();
-    private static final EncodingInfo[] _encodings = loadEncodingInfo();
+    private final static EncodingInfos _encodingInfos = new EncodingInfos();
+
 }