--- old/jaxp/src/com/sun/org/apache/xml/internal/serializer/Encodings.java 2013-05-02 18:02:43.000000000 +0200 +++ new/jaxp/src/com/sun/org/apache/xml/internal/serializer/Encodings.java 2013-05-02 18:02:42.000000000 +0200 @@ -33,6 +33,14 @@ import java.util.HashMap; import java.util.Properties; import java.util.StringTokenizer; +import java.io.IOException; +import java.net.MalformedURLException; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; +import java.util.Collections; +import java.util.Map; +import java.util.Map.Entry; import com.sun.org.apache.xalan.internal.utils.SecuritySupport; @@ -79,36 +87,17 @@ throws UnsupportedEncodingException { - for (int i = 0; i < _encodings.length; ++i) - { - if (_encodings[i].name.equalsIgnoreCase(encoding)) - { - try - { - return new BufferedWriter(new OutputStreamWriter( - output, - _encodings[i].javaName)); - } - catch (java.lang.IllegalArgumentException iae) // java 1.1.8 - { - // keep trying - } - catch (UnsupportedEncodingException usee) - { - - // keep trying - } + final EncodingInfo ei = _encodingInfos.findEncoding(toUpperCaseFast(encoding)); + if (ei != null) { + try { + return new BufferedWriter(new OutputStreamWriter( + output, ei.javaName)); + } catch (UnsupportedEncodingException usee) { + // keep trying } } - try - { - return new BufferedWriter(new OutputStreamWriter(output, encoding)); - } - catch (java.lang.IllegalArgumentException iae) // java 1.1.8 - { - throw new UnsupportedEncodingException(encoding); - } + return new BufferedWriter(new OutputStreamWriter(output, encoding)); } @@ -141,12 +130,24 @@ EncodingInfo ei; String normalizedEncoding = toUpperCaseFast(encoding); - ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding); - if (ei == null) - ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding); + ei = _encodingInfos.findEncoding(normalizedEncoding); if (ei == null) { // We shouldn't have to do this, but just in case. - ei = new EncodingInfo(null,null); + try { + // This may happen if the caller tries to use + // an encoding that wasn't registered in the + // (java name)->(preferred mime name) mapping file. + // In that case we attempt to load the charset for the + // given encoding, and if that succeeds - we create a new + // EncodingInfo instance - assuming the canonical name + // of the charset can be used as the mime name. + final Charset c = Charset.forName(encoding); + final String name = c.name(); + ei = new EncodingInfo(name, name); + _encodingInfos.putEncoding(normalizedEncoding, ei); + } catch (IllegalCharsetNameException | UnsupportedCharsetException x) { + ei = new EncodingInfo(null,null); + } } return ei; @@ -269,8 +270,8 @@ */ private static String convertJava2MimeEncoding(String encoding) { - EncodingInfo enc = - (EncodingInfo) _encodingTableKeyJava.get(encoding.toUpperCase()); + final EncodingInfo enc = + _encodingInfos.getEncodingFromJavaKey(toUpperCaseFast(encoding)); if (null != enc) return enc.name; return encoding; @@ -285,38 +286,37 @@ */ public static String convertMime2JavaEncoding(String encoding) { - - for (int i = 0; i < _encodings.length; ++i) - { - if (_encodings[i].name.equalsIgnoreCase(encoding)) - { - return _encodings[i].javaName; - } - } - - return encoding; + final EncodingInfo info = _encodingInfos.findEncoding(toUpperCaseFast(encoding)); + return info != null ? info.javaName : encoding; } - /** - * Load a list of all the supported encodings. - * - * System property "encodings" formatted using URL syntax may define an - * external encodings list. Thanks to Sergey Ushakov for the code - * contribution! - */ - private static EncodingInfo[] loadEncodingInfo() - { - try - { + // Using an inner static class here prevent initialization races + // where the hash maps could be used before they were populated. + // + private final static class EncodingInfos { + // These maps are final and not modified after initialization. + private final Map _encodingTableKeyJava = new HashMap<>(); + private final Map _encodingTableKeyMime = new HashMap<>(); + // This map will be added to after initialization: make sure it's + // thread-safe. This map should not be used frequently - only in cases + // where the mapping requested was not declared in the Encodings.properties + // file. + private final Map _encodingDynamicTable = + Collections.synchronizedMap(new HashMap()); + + private EncodingInfos() { + loadEncodingInfo(); + } + + // Opens the file/resource containing java charset name -> preferred mime + // name mapping and returns it as an InputStream. + private InputStream openEncodingsFileStream() throws MalformedURLException, IOException { String urlString = null; InputStream is = null; - try - { + try { urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, ""); - } - catch (SecurityException e) - { + } catch (SecurityException e) { } if (urlString != null && urlString.length() > 0) { @@ -327,84 +327,188 @@ if (is == null) { is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE); } + return is; + } + // Loads the Properties resource containing the mapping: + // java charset name -> preferred mime name + // and returns it. + private Properties loadProperties() throws MalformedURLException, IOException { Properties props = new Properties(); - if (is != null) { - props.load(is); - is.close(); - } else { - // Seems to be no real need to force failure here, let the - // system do its best... The issue is not really very critical, - // and the output will be in any case _correct_ though maybe not - // always human-friendly... :) - // But maybe report/log the resource problem? - // Any standard ways to report/log errors (in static context)? - } - - int totalEntries = props.size(); - int totalMimeNames = 0; - Enumeration keys = props.keys(); - for (int i = 0; i < totalEntries; ++i) - { - String javaName = (String) keys.nextElement(); - String val = props.getProperty(javaName); - totalMimeNames++; - int pos = val.indexOf(' '); - for (int j = 0; j < pos; ++j) - if (val.charAt(j) == ',') - totalMimeNames++; - } - EncodingInfo[] ret = new EncodingInfo[totalMimeNames]; - int j = 0; - keys = props.keys(); - for (int i = 0; i < totalEntries; ++i) - { - String javaName = (String) keys.nextElement(); - String val = props.getProperty(javaName); - int pos = val.indexOf(' '); - String mimeName; - //int lastPrintable; - if (pos < 0) - { - // Maybe report/log this problem? - // "Last printable character not defined for encoding " + - // mimeName + " (" + val + ")" ... - mimeName = val; - //lastPrintable = 0x00FF; + try (InputStream is = openEncodingsFileStream()) { + if (is != null) { + props.load(is); + } else { + // Seems to be no real need to force failure here, let the + // system do its best... The issue is not really very critical, + // and the output will be in any case _correct_ though maybe not + // always human-friendly... :) + // But maybe report/log the resource problem? + // Any standard ways to report/log errors (in static context)? } - else - { - //lastPrintable = - // Integer.decode(val.substring(pos).trim()).intValue(); - StringTokenizer st = - new StringTokenizer(val.substring(0, pos), ","); - for (boolean first = true; - st.hasMoreTokens(); - first = false) - { - mimeName = st.nextToken(); - ret[j] = - new EncodingInfo(mimeName, javaName); - _encodingTableKeyMime.put( - mimeName.toUpperCase(), - ret[j]); - if (first) - _encodingTableKeyJava.put( - javaName.toUpperCase(), - ret[j]); - j++; + } + return props; + } + + // Parses the mime list associated to a java charset name. + // The first mime name in the list is supposed to be the preferred + // mime name. + private String[] parseMimeTypes(String val) { + int pos = val.indexOf(' '); + //int lastPrintable; + if (pos < 0) { + // Maybe report/log this problem? + // "Last printable character not defined for encoding " + + // mimeName + " (" + val + ")" ... + return new String[] { val }; + //lastPrintable = 0x00FF; + } + //lastPrintable = + // Integer.decode(val.substring(pos).trim()).intValue(); + StringTokenizer st = + new StringTokenizer(val.substring(0, pos), ","); + String[] values = new String[st.countTokens()]; + for (int i=0; st.hasMoreTokens(); i++) { + values[i] = st.nextToken(); + } + return values; + } + + // This method here attempts to find the canonical charset name for the + // the given name - which is supposed to be either a java name or a mime + // name. + // For that, it attempts to load the charset using the given name, and + // then returns the charset's canonical name. + // If the charset could not be loaded from the given name, + // the method returns null. + private String findCharsetNameFor(String name) { + try { + return Charset.forName(name).name(); + } catch (Exception x) { + return null; + } + } + + // This method here attempts to find the canonical charset name for the + // the set javaName+mimeNames - which are supposed to all refer to the + // same charset. + // For that it attempts to load the charset using the javaName, and if + // not found, attempts again using each of the mime names in turn. + // If the charset could be loaded from the javaName, then the javaName + // itself is returned as charset name. Otherwise, each of the mime names + // is tried in turn, until a charset can be loaded from one of the names, + // and the loaded charset's canonical name is returned. + // If no charset can be loaded from either the javaName or one of the + // mime names, then null is returned. + // + // Note that the returned name is the 'java' name that will be used in + // instances of EncodingInfo. + // This is important because EncodingInfo uses that 'java name' later on + // in calls to String.getBytes(javaName). + // As it happens, sometimes only one element of the set mime names/javaName + // is known by Charset: sometimes only one of the mime names is known, + // sometime only the javaName is known, sometimes all are known. + // + // By using this method here, we fix the problem where one of the mime + // names is known but the javaName is unknown, by associating the charset + // loaded from one of the mime names with the unrecognized javaName. + // + // When none of the mime names or javaName are known - there's not much we can + // do... It can mean that this encoding is not supported for this + // OS. If such a charset is ever use it will result in having all characters + // escaped. + // + private String findCharsetNameFor(String javaName, String[] mimes) { + String cs = findCharsetNameFor(javaName); + if (cs != null) return javaName; + for (String m : mimes) { + cs = findCharsetNameFor(m); + if (cs != null) break; + } + return cs; + } + + /** + * Loads a list of all the supported encodings. + * + * System property "encodings" formatted using URL syntax may define an + * external encodings list. Thanks to Sergey Ushakov for the code + * contribution! + */ + private void loadEncodingInfo() { + try { + // load (java name)->(preferred mime name) mapping. + final Properties props = loadProperties(); + + // create instances of EncodingInfo from the loaded mapping + Enumeration keys = props.keys(); + Map canonicals = new HashMap<>(); + while (keys.hasMoreElements()) { + final String javaName = (String) keys.nextElement(); + final String[] mimes = parseMimeTypes(props.getProperty(javaName)); + + final String charsetName = findCharsetNameFor(javaName, mimes); + if (charsetName != null) { + final String kj = toUpperCaseFast(javaName); + final String kc = toUpperCaseFast(charsetName); + for (int i = 0; i < mimes.length; ++i) { + final String mimeName = mimes[i]; + final String km = toUpperCaseFast(mimeName); + EncodingInfo info = new EncodingInfo(mimeName, charsetName); + _encodingTableKeyMime.put(km, info); + if (!canonicals.containsKey(kc)) { + // canonicals will map the charset name to + // the info containing the prefered mime name + // (the preferred mime name is the first mime + // name in the list). + canonicals.put(kc, info); + _encodingTableKeyJava.put(kc, info); + } + _encodingTableKeyJava.put(kj, info); + } + } else { + // None of the java or mime names on the line were + // recognized => this charset is not supported? } } + + // Fix up the _encodingTableKeyJava so that the info mapped to + // the java name contains the preferred mime name. + // (a given java name can correspond to several mime name, + // but we want the _encodingTableKeyJava to point to the + // preferred mime name). + for (Entry e : _encodingTableKeyJava.entrySet()) { + e.setValue(canonicals.get(toUpperCaseFast(e.getValue().javaName))); + } + + } catch (java.net.MalformedURLException mue) { + throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue); + } catch (java.io.IOException ioe) { + throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe); } - return ret; } - catch (java.net.MalformedURLException mue) - { - throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue); + + EncodingInfo findEncoding(String normalizedEncoding) { + EncodingInfo info = _encodingTableKeyJava.get(normalizedEncoding); + if (info == null) { + info = _encodingTableKeyMime.get(normalizedEncoding); + } + if (info == null) { + info = _encodingDynamicTable.get(normalizedEncoding); + } + return info; } - catch (java.io.IOException ioe) - { - throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe); + + EncodingInfo getEncodingFromMimeKey(String normalizedMimeName) { + return _encodingTableKeyMime.get(normalizedMimeName); + } + + EncodingInfo getEncodingFromJavaKey(String normalizedJavaName) { + return _encodingTableKeyJava.get(normalizedJavaName); + } + + void putEncoding(String key, EncodingInfo info) { + _encodingDynamicTable.put(key, info); } } @@ -457,7 +561,6 @@ return codePoint; } - private static final HashMap _encodingTableKeyJava = new HashMap(); - private static final HashMap _encodingTableKeyMime = new HashMap(); - private static final EncodingInfo[] _encodings = loadEncodingInfo(); + private final static EncodingInfos _encodingInfos = new EncodingInfos(); + } --- /dev/null 2013-05-02 18:02:45.000000000 +0200 +++ new/jdk/test/javax/xml/jaxp/Encodings/CheckEncodingPropertiesFile.java 2013-05-02 18:02:44.000000000 +0200 @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8008738 + * @summary checks that the mapping implemented by + * com.sun.org.apache.xml.internal.serializer.Encodings + * correctly identifies valid Charset names and + * correctly maps them to their preferred mime names. + * Also checks that the Encodings.properties resource file + * is consistent. + * @compile -XDignore.symbol.file CheckEncodingPropertiesFile.java + * @run main CheckEncodingPropertiesFile + * @author Daniel Fuchs + */ + +import com.sun.org.apache.xml.internal.serializer.EncodingInfo; +import com.sun.org.apache.xml.internal.serializer.Encodings; +import java.io.InputStreamReader; +import java.lang.reflect.Method; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.Set; +import java.util.StringTokenizer; + +public class CheckEncodingPropertiesFile { + + private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties"; + + public static void main(String[] args) throws Exception { + Properties props = new Properties(); + try (InputStreamReader is = new InputStreamReader(ClassLoader.getSystemResourceAsStream(ENCODINGS_FILE))) { + props.load(is); + } + + //printAllCharsets(); + + test(props); + } + + + private static final class CheckCharsetMapping { + + /** + * A map that maps Java or XML name to canonical charset names. + * key: upper cased value of Java or XML name. + * value: case-sensitive canonical name of charset. + */ + private final Map charsetMap = new HashMap<>(); + + private final Map preferredMime = new HashMap<>(); + + /** + * Unresolved alias names. + * For a given set of names pointing to the same unresolved charset, + * this map will contain, for each alias in the set, a mapping + * with the alias.toUpperValue() as key and the set of known aliases + * as value. + */ + private final Map> unresolved = new HashMap<>(); + + public final static class ConflictingCharsetError extends Error { + ConflictingCharsetError(String a, String cs1, String cs2) { + super("Conflicting charset mapping for '"+a+"': '"+cs1+"' and '"+cs2+"'"); + } + } + + public final static class MissingValidCharsetNameError extends Error { + MissingValidCharsetNameError(String name, Collection aliases) { + super(name+": Line "+aliases+" has no recognized charset alias"); + } + } + + public final static class ConflictingPreferredMimeNameError extends Error { + ConflictingPreferredMimeNameError(String a, String cs1, String cs2) { + super("Conflicting preferred mime name for '"+a+"': '"+cs1+"' and '"+cs2+"'"); + } + } + + /** + * For each alias in aliases, attempt to find the canonical + * charset name. + * All names in aliases are supposed to point to the same charset. + * Names in aliases can be java names or XML names, indifferently. + * @param aliases list of names (aliases) for a given charset. + * @return The canonical name of the charset, if found, null otherwise. + */ + private String findCharsetNameFor(String[] aliases) { + String cs = null; + String res = null; + for (String a : aliases) { + final String k = a.toUpperCase(); + String cachedCs = charsetMap.get(k); + if (cs == null) { + cs = cachedCs; + } + if (cachedCs != null && cs != null + && !Charset.forName(cachedCs).name().equals(Charset.forName(cs).name())) { + throw new ConflictingCharsetError(a,cs,cachedCs); + } + try { + final String rcs = Charset.forName(a).name(); + if (cs != null && !Charset.forName(cs).name().equals(rcs)) { + throw new ConflictingCharsetError(a,cs,rcs); + } + if (res == null) { + if (a.equals(aliases[0])) { + res = a; + } else { + res = cs; + } + } + cs = rcs; + charsetMap.put(k, res == null ? cs : res); + } catch (Exception x) { + continue; + } + } + return res == null ? cs : res; + } + + /** + * Register a canonical charset name for a given set of aliases. + * + * @param charsetName the canonical charset name. + * @param aliases a list of aliases for the given charset. + */ + private void registerCharsetNameFor(String charsetName, String[] aliases) { + if (charsetName == null) throw new NullPointerException(); + + for (String a : aliases) { + String k = a.toUpperCase(); + String csv = charsetMap.get(k); + if (csv == null) { + charsetMap.put(k, charsetName); + csv = charsetName; + } else if (!csv.equals(charsetName)) { + throw new ConflictingCharsetError(a,charsetName,csv); + } + + final Collection c = unresolved.get(k); + if (c != null) { + for (String aa : c) { + k = aa.toUpperCase(); + String csvv = charsetMap.get(k); + if (csvv == null) charsetMap.put(k, csv); + unresolved.remove(k); + } + throw new MissingValidCharsetNameError(charsetName,c); + } + } + } + + /** + * Register a set of aliases as being unresolved. + * @param names the list of names - this should be what is returned by + * nameSet.toArray(new String[nameSet.size()]) + * @param nameSet the set of unresolved aliases. + */ + private void registerUnresolvedNamesFor(String[] names, Collection nameSet) { + // This is not necessarily an error: it could happen that some + // charsets are simply not supported on some OS/Arch + System.err.println("Warning: unresolved charset names: '"+ nameSet + + "' This is not necessarily an error " + + "- this charset may not be supported on this platform."); + for (String a : names) { + final String k = a.toUpperCase(); + final Collection c = unresolved.get(k); + if (c != null) { + //System.out.println("Found: "+a+" -> "+c); + //System.out.println("\t merging "+ c + " with " + nameSet); + nameSet.addAll(c); + for (String aa : c) { + unresolved.put(aa.toUpperCase(), nameSet); + } + } + unresolved.put(k, nameSet); + } + } + + + /** + * Add a new charset name mapping + * @param javaName the (supposedly) java name of the charset. + * @param xmlNames a list of corresponding XML names for that charset. + */ + void addMapping(String javaName, Collection xmlNames) { + final LinkedHashSet aliasNames = new LinkedHashSet<>(); + aliasNames.add(javaName); + aliasNames.addAll(xmlNames); + final String[] aliases = aliasNames.toArray(new String[aliasNames.size()]); + final String cs = findCharsetNameFor(aliases); + if (cs != null) { + registerCharsetNameFor(cs, aliases); + if (xmlNames.size() > 0) { + String preferred = xmlNames.iterator().next(); + String cachedPreferred = preferredMime.get(cs.toUpperCase()); + if (cachedPreferred != null && !cachedPreferred.equals(preferred)) { + throw new ConflictingPreferredMimeNameError(cs, cachedPreferred, preferred); + } + preferredMime.put(cs.toUpperCase(), preferred); + } + } else { + registerUnresolvedNamesFor(aliases, aliasNames); + } + } + + /** + * Returns the canonical name of the charset for the given Java or XML + * alias name. + * @param alias the alias name + * @return the canonical charset name - or null if unknown. + */ + public String getCharsetNameFor(String alias) { + return charsetMap.get(alias.toUpperCase()); + } + + } + + public static void test(Properties props) throws Exception { + + // First, build a mapping from the properties read from the resource + // file. + // We're going to check the consistency of the resource file + // while building this mapping, and throw errors if the file + // does not meet our assumptions. + // + Map> lines = new HashMap<>(); + final CheckCharsetMapping mapping = new CheckCharsetMapping(); + + for (String key : props.stringPropertyNames()) { + Collection values = getValues(props.getProperty(key)); + lines.put(key, values); + mapping.addMapping(key, values); + } + + // Then build maps of EncodingInfos, and print along debugging + // information that should help understand the content of the + // resource file and the mapping it defines. + // + Map javaInfos = new HashMap<>(); // Map indexed by java names + Map xmlMap = new HashMap<>(); // Map indexed by XML names + Map preferred = + new HashMap<>(mapping.preferredMime); // Java Name -> Preferred Mime Name + List all = new ArrayList<>(); // unused... + for (Entry> e : lines.entrySet()) { + final String charsetName = mapping.getCharsetNameFor(e.getKey()); + if (charsetName == null) { + System.out.println("!! No charset for: "+e.getKey()+ " "+ e.getValue()); + continue; + } + Charset c = Charset.forName(charsetName); + EncodingInfo info; + final String k = e.getKey().toUpperCase(); + final String kc = charsetName.toUpperCase(); + StringBuilder sb = new StringBuilder(); + for (String xml : e.getValue()) { + final String kx = xml.toUpperCase(); + info = xmlMap.get(kx); + if (info == null) { + info = new EncodingInfo(xml, charsetName); + System.out.println("** XML: "+xml+" -> "+charsetName); + xmlMap.put(kx, info); + all.add(info); + } + if (!javaInfos.containsKey(k)) { + javaInfos.put(k, info); + if (!preferred.containsKey(k)) { + preferred.put(k, xml); + } + sb.append("** Java: ").append(k).append(" -> ") + .append(xml).append(" (charset: ") + .append(charsetName).append(")\n"); + } + if (!javaInfos.containsKey(kc)) { + if (!preferred.containsKey(kc)) { + preferred.put(kc, xml); + } + javaInfos.put(kc, info); + sb.append("** Java: ").append(kc).append(" -> ") + .append(xml).append(" (charset: ") + .append(charsetName).append(")\n"); + } + if (!javaInfos.containsKey(c.name().toUpperCase())) { + if (!preferred.containsKey(c.name().toUpperCase())) { + preferred.put(c.name().toUpperCase(), xml); + } + javaInfos.put(c.name().toUpperCase(), info); + sb.append("** Java: ").append(c.name().toUpperCase()).append(" -> ") + .append(xml).append(" (charset: ") + .append(charsetName).append(")\n"); + } + } + if (sb.length() == 0) { + System.out.println("Nothing new for "+charsetName+": "+e.getKey()+" -> "+e.getValue()); + } else { + System.out.print(sb); + } + + } + + // Now we're going to verify that Encodings.java has done its job + // correctly. We're going to ask Encodings to convert java names to mime + // names and mime names to java names - and verify that the returned + // java names do map to recognized charsets. + // + // We're also going to verify that Encodings has recorded the preferred + // mime name correctly. + + Method m = Encodings.class.getDeclaredMethod("getMimeEncoding", String.class); + m.setAccessible(true); + + Set xNames = new HashSet<>(); + Set jNames = new HashSet<>(); + for (String name: xmlMap.keySet()) { + final String javaName = checkConvertMime2Java(name); + checkPreferredMime(m, javaName, preferred); + jNames.add(javaName); + xNames.add(name); + } + + + for (String javaName : lines.keySet()) { + final String javaCharsetName = mapping.getCharsetNameFor(javaName.toUpperCase()); + if (javaCharsetName == null) continue; + if (!jNames.contains(javaName)) { + checkPreferredMime(m, javaName, preferred); + jNames.add(javaName); + } + for (String xml : lines.get(javaName)) { + if (xNames.contains(xml)) continue; + final String jName = checkConvertMime2Java(xml); + xNames.add(xml); + if (jNames.contains(jName)) continue; + checkPreferredMime(m, jName, preferred); + } + } + } + + private static String checkConvertMime2Java(String xml) { + final String jName = Encodings.convertMime2JavaEncoding(xml); + final String jCharsetName; + try { + jCharsetName = Charset.forName(jName).name(); + } catch (Exception x) { + throw new Error("Unrecognized charset returned by Encodings.convertMime2JavaEncoding(\""+xml+"\")", x); + } + System.out.println("Encodings.convertMime2JavaEncoding(\""+xml+"\") = \""+jName+"\" ("+jCharsetName+")"); + return jName; + } + + private static void checkPreferredMime(Method m, String javaName, Map preferred) + throws Exception { + final String mime = (String) m.invoke(null, javaName); + final String expected = preferred.get(javaName.toUpperCase()); + if (Arrays.deepEquals(new String[] {mime}, new String[] {expected})) { + System.out.println("Encodings.getMimeEncoding(\""+javaName+"\") = \""+mime+"\""); + } else { + throw new Error("Bad preferred mime type for: '"+javaName+"': expected '"+ + expected+"' but got '"+mime+"'"); + } + } + + private static Collection getValues(String val) { + int pos = val.indexOf(' '); + if (pos < 0) { + return Collections.singletonList(val); + } + //lastPrintable = + // Integer.decode(val.substring(pos).trim()).intValue(); + StringTokenizer st = + new StringTokenizer(val.substring(0, pos), ","); + final List values = new ArrayList<>(st.countTokens()); + while (st.hasMoreTokens()) { + values.add(st.nextToken()); + } + return values; + } + + // can be called in main() to help debugging. + // Prints out all available charsets and their recognized aliases + // as returned by the Charset API. + private static void printAllCharsets() { + Map all = Charset.availableCharsets(); + System.out.println("\n=========================================\n"); + for (String can : all.keySet()) { + System.out.println(can + ": " + all.get(can).aliases()); + } + } +}