1 /*
   2  * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @bug 8008738
  27  * @summary checks that the mapping implemented by
  28  *      com.sun.org.apache.xml.internal.serializer.Encodings
  29  *      correctly identifies valid Charset names and
  30  *      correctly maps them to their preferred mime names.
  31  *      Also checks that the Encodings.properties resource file
  32  *      is consistent.
  33  * @compile -XDignore.symbol.file CheckEncodingPropertiesFile.java
  34  * @run main CheckEncodingPropertiesFile
  35  * @author Daniel Fuchs
  36  */
  37 
  38 import com.sun.org.apache.xml.internal.serializer.EncodingInfo;
  39 import com.sun.org.apache.xml.internal.serializer.Encodings;
  40 import java.io.InputStreamReader;
  41 import java.lang.reflect.Method;
  42 import java.nio.charset.Charset;
  43 import java.util.ArrayList;
  44 import java.util.Arrays;
  45 import java.util.Collection;
  46 import java.util.Collections;
  47 import java.util.HashMap;
  48 import java.util.HashSet;
  49 import java.util.LinkedHashSet;
  50 import java.util.List;
  51 import java.util.Map;
  52 import java.util.Map.Entry;
  53 import java.util.Properties;
  54 import java.util.Set;
  55 import java.util.StringTokenizer;
  56 
  57 public class CheckEncodingPropertiesFile {
  58 
  59     private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties";
  60 
  61     public static void main(String[] args) throws Exception {
  62         Properties props = new Properties();
  63         try (InputStreamReader is = new InputStreamReader(ClassLoader.getSystemResourceAsStream(ENCODINGS_FILE))) {
  64             props.load(is);
  65         }
  66 
  67         //printAllCharsets();
  68 
  69         test(props);
  70     }
  71 
  72 
  73     private static final class CheckCharsetMapping {
  74 
  75         /**
  76          * A map that maps Java or XML name to canonical charset names.
  77          * key:    upper cased value of Java or XML name.
  78          * value:  case-sensitive canonical name of charset.
  79          */
  80         private final Map<String, String> charsetMap = new HashMap<>();
  81 
  82         private final Map<String, String> preferredMime = new HashMap<>();
  83 
  84         /**
  85          * Unresolved alias names.
  86          * For a given set of names pointing to the same unresolved charset,
  87          * this map will contain, for each alias in the set, a mapping
  88          * with the alias.toUpperValue() as key and the set of known aliases
  89          * as value.
  90          */
  91         private final Map<String, Collection<String>> unresolved = new HashMap<>();
  92 
  93         public final static class ConflictingCharsetError extends Error {
  94             ConflictingCharsetError(String a, String cs1, String cs2) {
  95                 super("Conflicting charset mapping for '"+a+"': '"+cs1+"' and '"+cs2+"'");
  96             }
  97         }
  98 
  99         public final static class MissingValidCharsetNameError extends Error {
 100             MissingValidCharsetNameError(String name, Collection<String> aliases) {
 101                 super(name+": Line "+aliases+" has no recognized charset alias");
 102             }
 103         }
 104 
 105         public final static class ConflictingPreferredMimeNameError extends Error {
 106             ConflictingPreferredMimeNameError(String a, String cs1, String cs2) {
 107                 super("Conflicting preferred mime name for '"+a+"': '"+cs1+"' and '"+cs2+"'");
 108             }
 109         }
 110 
 111         /**
 112          * For each alias in aliases, attempt to find the canonical
 113          * charset name.
 114          * All names in aliases are supposed to point to the same charset.
 115          * Names in aliases can be java names or XML names, indifferently.
 116          * @param aliases list of names (aliases) for a given charset.
 117          * @return The canonical name of the charset, if found, null otherwise.
 118          */
 119         private String findCharsetNameFor(String[] aliases) {
 120             String cs = null;
 121             String res = null;
 122             for (String a : aliases) {
 123                 final String k = a.toUpperCase();
 124                 String cachedCs = charsetMap.get(k);
 125                 if (cs == null) {
 126                     cs = cachedCs;
 127                 }
 128                 if (cachedCs != null && cs != null
 129                         && !Charset.forName(cachedCs).name().equals(Charset.forName(cs).name())) {
 130                     throw new ConflictingCharsetError(a,cs,cachedCs);
 131                 }
 132                 try {
 133                     final String rcs = Charset.forName(a).name();
 134                     if (cs != null && !Charset.forName(cs).name().equals(rcs)) {
 135                         throw new ConflictingCharsetError(a,cs,rcs);
 136                     }
 137                     if (res == null) {
 138                         if (a.equals(aliases[0])) {
 139                             res = a;
 140                         } else {
 141                             res = cs;
 142                         }
 143                     }
 144                     cs = rcs;
 145                     charsetMap.put(k, res == null ? cs : res);
 146                 } catch (Exception x) {
 147                     continue;
 148                 }
 149             }
 150             return res == null ? cs : res;
 151         }
 152 
 153         /**
 154          * Register a canonical charset name for a given set of aliases.
 155          *
 156          * @param charsetName the canonical charset name.
 157          * @param aliases a list of aliases for the given charset.
 158          */
 159         private void registerCharsetNameFor(String charsetName, String[] aliases) {
 160             if (charsetName == null) throw new NullPointerException();
 161 
 162             for (String a : aliases) {
 163                 String k = a.toUpperCase();
 164                 String csv = charsetMap.get(k);
 165                 if (csv == null) {
 166                     charsetMap.put(k, charsetName);
 167                     csv = charsetName;
 168                 } else if (!csv.equals(charsetName)) {
 169                     throw new ConflictingCharsetError(a,charsetName,csv);
 170                 }
 171 
 172                 final Collection<String> c = unresolved.get(k);
 173                 if (c != null) {
 174                     for (String aa : c) {
 175                         k = aa.toUpperCase();
 176                         String csvv = charsetMap.get(k);
 177                         if (csvv == null) charsetMap.put(k, csv);
 178                         unresolved.remove(k);
 179                     }
 180                     throw new MissingValidCharsetNameError(charsetName,c);
 181                 }
 182             }
 183         }
 184 
 185         /**
 186          * Register a set of aliases as being unresolved.
 187          * @param names    the list of names - this should be what is returned by
 188          *                 nameSet.toArray(new String[nameSet.size()])
 189          * @param nameSet  the set of unresolved aliases.
 190          */
 191         private void registerUnresolvedNamesFor(String[] names, Collection<String> nameSet) {
 192             // This is not necessarily an error: it could happen that some
 193             //    charsets are simply not supported on some OS/Arch
 194             System.err.println("Warning: unresolved charset names: '"+ nameSet
 195                     + "' This is not necessarily an error "
 196                     + "- this charset may not be supported on this platform.");
 197             for (String a : names) {
 198                 final String k = a.toUpperCase();
 199                 final Collection<String> c = unresolved.get(k);
 200                 if (c != null) {
 201                     //System.out.println("Found: "+a+" -> "+c);
 202                     //System.out.println("\t merging "+ c + " with " + nameSet);
 203                     nameSet.addAll(c);
 204                     for (String aa : c) {
 205                         unresolved.put(aa.toUpperCase(), nameSet);
 206                     }
 207                 }
 208                 unresolved.put(k, nameSet);
 209             }
 210         }
 211 
 212 
 213         /**
 214          * Add a new charset name mapping
 215          * @param javaName the (supposedly) java name of the charset.
 216          * @param xmlNames a list of corresponding XML names for that charset.
 217          */
 218         void addMapping(String javaName, Collection<String> xmlNames) {
 219             final LinkedHashSet<String> aliasNames = new LinkedHashSet<>();
 220             aliasNames.add(javaName);
 221             aliasNames.addAll(xmlNames);
 222             final String[] aliases = aliasNames.toArray(new String[aliasNames.size()]);
 223             final String cs = findCharsetNameFor(aliases);
 224             if (cs != null) {
 225                 registerCharsetNameFor(cs, aliases);
 226                 if (xmlNames.size() > 0) {
 227                     String preferred = xmlNames.iterator().next();
 228                     String cachedPreferred = preferredMime.get(cs.toUpperCase());
 229                     if (cachedPreferred != null && !cachedPreferred.equals(preferred)) {
 230                         throw new ConflictingPreferredMimeNameError(cs, cachedPreferred, preferred);
 231                     }
 232                     preferredMime.put(cs.toUpperCase(), preferred);
 233                 }
 234             } else {
 235                 registerUnresolvedNamesFor(aliases, aliasNames);
 236             }
 237         }
 238 
 239         /**
 240          * Returns the canonical name of the charset for the given Java or XML
 241          * alias name.
 242          * @param alias the alias name
 243          * @return the canonical charset name - or null if unknown.
 244          */
 245         public String getCharsetNameFor(String alias) {
 246             return charsetMap.get(alias.toUpperCase());
 247         }
 248 
 249     }
 250 
 251     public static void test(Properties props) throws Exception {
 252 
 253         // First, build a mapping from the properties read from the resource
 254         // file.
 255         // We're going to check the consistency of the resource file
 256         // while building this mapping, and throw errors if the file
 257         // does not meet our assumptions.
 258         //
 259         Map<String, Collection<String>> lines = new HashMap<>();
 260         final CheckCharsetMapping mapping = new CheckCharsetMapping();
 261 
 262         for (String key : props.stringPropertyNames()) {
 263             Collection<String> values = getValues(props.getProperty(key));
 264             lines.put(key, values);
 265             mapping.addMapping(key, values);
 266         }
 267 
 268         // Then build maps of EncodingInfos, and print along debugging
 269         // information that should help understand the content of the
 270         // resource file and the mapping it defines.
 271         //
 272         Map<String, EncodingInfo> javaInfos = new HashMap<>(); // Map indexed by java names
 273         Map<String, EncodingInfo> xmlMap = new HashMap<>();    // Map indexed by XML names
 274         Map<String, String> preferred =
 275                 new HashMap<>(mapping.preferredMime);          // Java Name -> Preferred Mime Name
 276         List<EncodingInfo> all = new ArrayList<>();            // unused...
 277         for (Entry<String, Collection<String>> e : lines.entrySet()) {
 278             final String charsetName = mapping.getCharsetNameFor(e.getKey());
 279             if (charsetName == null) {
 280                 System.out.println("!! No charset for: "+e.getKey()+ " "+ e.getValue());
 281                 continue;
 282             }
 283             Charset c = Charset.forName(charsetName);
 284             EncodingInfo info;
 285             final String k = e.getKey().toUpperCase();
 286             final String kc = charsetName.toUpperCase();
 287             StringBuilder sb = new StringBuilder();
 288             for (String xml : e.getValue()) {
 289                 final String kx = xml.toUpperCase();
 290                 info = xmlMap.get(kx);
 291                 if (info == null) {
 292                     info = new EncodingInfo(xml, charsetName);
 293                     System.out.println("** XML: "+xml+" -> "+charsetName);
 294                     xmlMap.put(kx, info);
 295                     all.add(info);
 296                 }
 297                 if (!javaInfos.containsKey(k)) {
 298                     javaInfos.put(k, info);
 299                     if (!preferred.containsKey(k)) {
 300                         preferred.put(k, xml);
 301                     }
 302                     sb.append("** Java: ").append(k).append(" -> ")
 303                             .append(xml).append(" (charset: ")
 304                             .append(charsetName).append(")\n");
 305                 }
 306                 if (!javaInfos.containsKey(kc)) {
 307                     if (!preferred.containsKey(kc)) {
 308                         preferred.put(kc, xml);
 309                     }
 310                     javaInfos.put(kc, info);
 311                     sb.append("** Java: ").append(kc).append(" -> ")
 312                             .append(xml).append(" (charset: ")
 313                             .append(charsetName).append(")\n");
 314                 }
 315                 if (!javaInfos.containsKey(c.name().toUpperCase())) {
 316                     if (!preferred.containsKey(c.name().toUpperCase())) {
 317                         preferred.put(c.name().toUpperCase(), xml);
 318                     }
 319                     javaInfos.put(c.name().toUpperCase(), info);
 320                     sb.append("** Java: ").append(c.name().toUpperCase()).append(" -> ")
 321                             .append(xml).append(" (charset: ")
 322                             .append(charsetName).append(")\n");
 323                 }
 324             }
 325             if (sb.length() == 0) {
 326                 System.out.println("Nothing new for "+charsetName+": "+e.getKey()+" -> "+e.getValue());
 327             } else {
 328                 System.out.print(sb);
 329             }
 330 
 331         }
 332 
 333         // Now we're going to verify that Encodings.java has done its job
 334         // correctly. We're going to ask Encodings to convert java names to mime
 335         // names and mime names to java names - and verify that the returned
 336         // java names do map to recognized charsets.
 337         //
 338         // We're also going to verify that Encodings has recorded the preferred
 339         // mime name correctly.
 340 
 341         Method m = Encodings.class.getDeclaredMethod("getMimeEncoding", String.class);
 342         m.setAccessible(true);
 343 
 344         Set<String> xNames = new HashSet<>();
 345         Set<String> jNames = new HashSet<>();
 346         for (String name: xmlMap.keySet()) {
 347             final String javaName = checkConvertMime2Java(name);
 348             checkPreferredMime(m, javaName, preferred);
 349             jNames.add(javaName);
 350             xNames.add(name);
 351         }
 352 
 353 
 354         for (String javaName : lines.keySet()) {
 355             final String javaCharsetName = mapping.getCharsetNameFor(javaName.toUpperCase());
 356             if (javaCharsetName == null) continue;
 357             if (!jNames.contains(javaName)) {
 358                 checkPreferredMime(m, javaName, preferred);
 359                 jNames.add(javaName);
 360             }
 361             for (String xml : lines.get(javaName)) {
 362                 if (xNames.contains(xml)) continue;
 363                 final String jName = checkConvertMime2Java(xml);
 364                 xNames.add(xml);
 365                 if (jNames.contains(jName)) continue;
 366                 checkPreferredMime(m, jName, preferred);
 367             }
 368         }
 369     }
 370 
 371     private static String checkConvertMime2Java(String xml) {
 372         final String jName = Encodings.convertMime2JavaEncoding(xml);
 373         final String jCharsetName;
 374         try {
 375             jCharsetName = Charset.forName(jName).name();
 376         } catch (Exception x) {
 377             throw new Error("Unrecognized charset returned by Encodings.convertMime2JavaEncoding(\""+xml+"\")", x);
 378         }
 379         System.out.println("Encodings.convertMime2JavaEncoding(\""+xml+"\") = \""+jName+"\" ("+jCharsetName+")");
 380         return jName;
 381     }
 382 
 383     private static void checkPreferredMime(Method m, String javaName, Map<String,String> preferred)
 384             throws Exception {
 385         final String mime = (String) m.invoke(null, javaName);
 386         final String expected = preferred.get(javaName.toUpperCase());
 387         if (Arrays.deepEquals(new String[] {mime}, new String[] {expected})) {
 388             System.out.println("Encodings.getMimeEncoding(\""+javaName+"\") = \""+mime+"\"");
 389         } else {
 390             throw new Error("Bad preferred mime type for: '"+javaName+"': expected '"+
 391                 expected+"' but got '"+mime+"'");
 392         }
 393     }
 394 
 395     private static Collection<String> getValues(String val) {
 396         int pos = val.indexOf(' ');
 397         if (pos < 0) {
 398             return Collections.singletonList(val);
 399         }
 400         //lastPrintable =
 401         //    Integer.decode(val.substring(pos).trim()).intValue();
 402         StringTokenizer st =
 403             new StringTokenizer(val.substring(0, pos), ",");
 404         final List<String> values = new ArrayList<>(st.countTokens());
 405         while (st.hasMoreTokens()) {
 406             values.add(st.nextToken());
 407         }
 408         return values;
 409     }
 410 
 411     // can be called in main() to help debugging.
 412     // Prints out all available charsets and their recognized aliases
 413     // as returned by the Charset API.
 414     private static void printAllCharsets() {
 415         Map<String, Charset> all = Charset.availableCharsets();
 416         System.out.println("\n=========================================\n");
 417         for (String can : all.keySet()) {
 418             System.out.println(can + ": " + all.get(can).aliases());
 419         }
 420     }
 421 }