1 /* 2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @bug 8008738 27 * @summary checks that the mapping implemented by 28 * com.sun.org.apache.xml.internal.serializer.Encodings 29 * correctly identifies valid Charset names and 30 * correctly maps them to their preferred mime names. 31 * Also checks that the Encodings.properties resource file 32 * is consistent. 33 * @compile -XDignore.symbol.file CheckEncodingPropertiesFile.java 34 * @run main CheckEncodingPropertiesFile 35 * @author Daniel Fuchs 36 */ 37 38 import com.sun.org.apache.xml.internal.serializer.EncodingInfo; 39 import com.sun.org.apache.xml.internal.serializer.Encodings; 40 import java.io.InputStreamReader; 41 import java.lang.reflect.Method; 42 import java.nio.charset.Charset; 43 import java.util.ArrayList; 44 import java.util.Arrays; 45 import java.util.Collection; 46 import java.util.Collections; 47 import java.util.HashMap; 48 import java.util.HashSet; 49 import java.util.LinkedHashSet; 50 import java.util.List; 51 import java.util.Map; 52 import java.util.Map.Entry; 53 import java.util.Properties; 54 import java.util.Set; 55 import java.util.StringTokenizer; 56 57 public class CheckEncodingPropertiesFile { 58 59 private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties"; 60 61 public static void main(String[] args) throws Exception { 62 Properties props = new Properties(); 63 try (InputStreamReader is = new InputStreamReader(ClassLoader.getSystemResourceAsStream(ENCODINGS_FILE))) { 64 props.load(is); 65 } 66 67 //printAllCharsets(); 68 69 test(props); 70 } 71 72 73 private static final class CheckCharsetMapping { 74 75 /** 76 * A map that maps Java or XML name to canonical charset names. 77 * key: upper cased value of Java or XML name. 78 * value: case-sensitive canonical name of charset. 79 */ 80 private final Map<String, String> charsetMap = new HashMap<>(); 81 82 private final Map<String, String> preferredMime = new HashMap<>(); 83 84 /** 85 * Unresolved alias names. 86 * For a given set of names pointing to the same unresolved charset, 87 * this map will contain, for each alias in the set, a mapping 88 * with the alias.toUpperValue() as key and the set of known aliases 89 * as value. 90 */ 91 private final Map<String, Collection<String>> unresolved = new HashMap<>(); 92 93 public final static class ConflictingCharsetError extends Error { 94 ConflictingCharsetError(String a, String cs1, String cs2) { 95 super("Conflicting charset mapping for '"+a+"': '"+cs1+"' and '"+cs2+"'"); 96 } 97 } 98 99 public final static class MissingValidCharsetNameError extends Error { 100 MissingValidCharsetNameError(String name, Collection<String> aliases) { 101 super(name+": Line "+aliases+" has no recognized charset alias"); 102 } 103 } 104 105 public final static class ConflictingPreferredMimeNameError extends Error { 106 ConflictingPreferredMimeNameError(String a, String cs1, String cs2) { 107 super("Conflicting preferred mime name for '"+a+"': '"+cs1+"' and '"+cs2+"'"); 108 } 109 } 110 111 /** 112 * For each alias in aliases, attempt to find the canonical 113 * charset name. 114 * All names in aliases are supposed to point to the same charset. 115 * Names in aliases can be java names or XML names, indifferently. 116 * @param aliases list of names (aliases) for a given charset. 117 * @return The canonical name of the charset, if found, null otherwise. 118 */ 119 private String findCharsetNameFor(String[] aliases) { 120 String cs = null; 121 String res = null; 122 for (String a : aliases) { 123 final String k = a.toUpperCase(); 124 String cachedCs = charsetMap.get(k); 125 if (cs == null) { 126 cs = cachedCs; 127 } 128 if (cachedCs != null && cs != null 129 && !Charset.forName(cachedCs).name().equals(Charset.forName(cs).name())) { 130 throw new ConflictingCharsetError(a,cs,cachedCs); 131 } 132 try { 133 final String rcs = Charset.forName(a).name(); 134 if (cs != null && !Charset.forName(cs).name().equals(rcs)) { 135 throw new ConflictingCharsetError(a,cs,rcs); 136 } 137 if (res == null) { 138 if (a.equals(aliases[0])) { 139 res = a; 140 } else { 141 res = cs; 142 } 143 } 144 cs = rcs; 145 charsetMap.put(k, res == null ? cs : res); 146 } catch (Exception x) { 147 continue; 148 } 149 } 150 return res == null ? cs : res; 151 } 152 153 /** 154 * Register a canonical charset name for a given set of aliases. 155 * 156 * @param charsetName the canonical charset name. 157 * @param aliases a list of aliases for the given charset. 158 */ 159 private void registerCharsetNameFor(String charsetName, String[] aliases) { 160 if (charsetName == null) throw new NullPointerException(); 161 162 for (String a : aliases) { 163 String k = a.toUpperCase(); 164 String csv = charsetMap.get(k); 165 if (csv == null) { 166 charsetMap.put(k, charsetName); 167 csv = charsetName; 168 } else if (!csv.equals(charsetName)) { 169 throw new ConflictingCharsetError(a,charsetName,csv); 170 } 171 172 final Collection<String> c = unresolved.get(k); 173 if (c != null) { 174 for (String aa : c) { 175 k = aa.toUpperCase(); 176 String csvv = charsetMap.get(k); 177 if (csvv == null) charsetMap.put(k, csv); 178 unresolved.remove(k); 179 } 180 throw new MissingValidCharsetNameError(charsetName,c); 181 } 182 } 183 } 184 185 /** 186 * Register a set of aliases as being unresolved. 187 * @param names the list of names - this should be what is returned by 188 * nameSet.toArray(new String[nameSet.size()]) 189 * @param nameSet the set of unresolved aliases. 190 */ 191 private void registerUnresolvedNamesFor(String[] names, Collection<String> nameSet) { 192 // This is not necessarily an error: it could happen that some 193 // charsets are simply not supported on some OS/Arch 194 System.err.println("Warning: unresolved charset names: '"+ nameSet 195 + "' This is not necessarily an error " 196 + "- this charset may not be supported on this platform."); 197 for (String a : names) { 198 final String k = a.toUpperCase(); 199 final Collection<String> c = unresolved.get(k); 200 if (c != null) { 201 //System.out.println("Found: "+a+" -> "+c); 202 //System.out.println("\t merging "+ c + " with " + nameSet); 203 nameSet.addAll(c); 204 for (String aa : c) { 205 unresolved.put(aa.toUpperCase(), nameSet); 206 } 207 } 208 unresolved.put(k, nameSet); 209 } 210 } 211 212 213 /** 214 * Add a new charset name mapping 215 * @param javaName the (supposedly) java name of the charset. 216 * @param xmlNames a list of corresponding XML names for that charset. 217 */ 218 void addMapping(String javaName, Collection<String> xmlNames) { 219 final LinkedHashSet<String> aliasNames = new LinkedHashSet<>(); 220 aliasNames.add(javaName); 221 aliasNames.addAll(xmlNames); 222 final String[] aliases = aliasNames.toArray(new String[aliasNames.size()]); 223 final String cs = findCharsetNameFor(aliases); 224 if (cs != null) { 225 registerCharsetNameFor(cs, aliases); 226 if (xmlNames.size() > 0) { 227 String preferred = xmlNames.iterator().next(); 228 String cachedPreferred = preferredMime.get(cs.toUpperCase()); 229 if (cachedPreferred != null && !cachedPreferred.equals(preferred)) { 230 throw new ConflictingPreferredMimeNameError(cs, cachedPreferred, preferred); 231 } 232 preferredMime.put(cs.toUpperCase(), preferred); 233 } 234 } else { 235 registerUnresolvedNamesFor(aliases, aliasNames); 236 } 237 } 238 239 /** 240 * Returns the canonical name of the charset for the given Java or XML 241 * alias name. 242 * @param alias the alias name 243 * @return the canonical charset name - or null if unknown. 244 */ 245 public String getCharsetNameFor(String alias) { 246 return charsetMap.get(alias.toUpperCase()); 247 } 248 249 } 250 251 public static void test(Properties props) throws Exception { 252 253 // First, build a mapping from the properties read from the resource 254 // file. 255 // We're going to check the consistency of the resource file 256 // while building this mapping, and throw errors if the file 257 // does not meet our assumptions. 258 // 259 Map<String, Collection<String>> lines = new HashMap<>(); 260 final CheckCharsetMapping mapping = new CheckCharsetMapping(); 261 262 for (String key : props.stringPropertyNames()) { 263 Collection<String> values = getValues(props.getProperty(key)); 264 lines.put(key, values); 265 mapping.addMapping(key, values); 266 } 267 268 // Then build maps of EncodingInfos, and print along debugging 269 // information that should help understand the content of the 270 // resource file and the mapping it defines. 271 // 272 Map<String, EncodingInfo> javaInfos = new HashMap<>(); // Map indexed by java names 273 Map<String, EncodingInfo> xmlMap = new HashMap<>(); // Map indexed by XML names 274 Map<String, String> preferred = 275 new HashMap<>(mapping.preferredMime); // Java Name -> Preferred Mime Name 276 List<EncodingInfo> all = new ArrayList<>(); // unused... 277 for (Entry<String, Collection<String>> e : lines.entrySet()) { 278 final String charsetName = mapping.getCharsetNameFor(e.getKey()); 279 if (charsetName == null) { 280 System.out.println("!! No charset for: "+e.getKey()+ " "+ e.getValue()); 281 continue; 282 } 283 Charset c = Charset.forName(charsetName); 284 EncodingInfo info; 285 final String k = e.getKey().toUpperCase(); 286 final String kc = charsetName.toUpperCase(); 287 StringBuilder sb = new StringBuilder(); 288 for (String xml : e.getValue()) { 289 final String kx = xml.toUpperCase(); 290 info = xmlMap.get(kx); 291 if (info == null) { 292 info = new EncodingInfo(xml, charsetName); 293 System.out.println("** XML: "+xml+" -> "+charsetName); 294 xmlMap.put(kx, info); 295 all.add(info); 296 } 297 if (!javaInfos.containsKey(k)) { 298 javaInfos.put(k, info); 299 if (!preferred.containsKey(k)) { 300 preferred.put(k, xml); 301 } 302 sb.append("** Java: ").append(k).append(" -> ") 303 .append(xml).append(" (charset: ") 304 .append(charsetName).append(")\n"); 305 } 306 if (!javaInfos.containsKey(kc)) { 307 if (!preferred.containsKey(kc)) { 308 preferred.put(kc, xml); 309 } 310 javaInfos.put(kc, info); 311 sb.append("** Java: ").append(kc).append(" -> ") 312 .append(xml).append(" (charset: ") 313 .append(charsetName).append(")\n"); 314 } 315 if (!javaInfos.containsKey(c.name().toUpperCase())) { 316 if (!preferred.containsKey(c.name().toUpperCase())) { 317 preferred.put(c.name().toUpperCase(), xml); 318 } 319 javaInfos.put(c.name().toUpperCase(), info); 320 sb.append("** Java: ").append(c.name().toUpperCase()).append(" -> ") 321 .append(xml).append(" (charset: ") 322 .append(charsetName).append(")\n"); 323 } 324 } 325 if (sb.length() == 0) { 326 System.out.println("Nothing new for "+charsetName+": "+e.getKey()+" -> "+e.getValue()); 327 } else { 328 System.out.print(sb); 329 } 330 331 } 332 333 // Now we're going to verify that Encodings.java has done its job 334 // correctly. We're going to ask Encodings to convert java names to mime 335 // names and mime names to java names - and verify that the returned 336 // java names do map to recognized charsets. 337 // 338 // We're also going to verify that Encodings has recorded the preferred 339 // mime name correctly. 340 341 Method m = Encodings.class.getDeclaredMethod("getMimeEncoding", String.class); 342 m.setAccessible(true); 343 344 Set<String> xNames = new HashSet<>(); 345 Set<String> jNames = new HashSet<>(); 346 for (String name: xmlMap.keySet()) { 347 final String javaName = checkConvertMime2Java(name); 348 checkPreferredMime(m, javaName, preferred); 349 jNames.add(javaName); 350 xNames.add(name); 351 } 352 353 354 for (String javaName : lines.keySet()) { 355 final String javaCharsetName = mapping.getCharsetNameFor(javaName.toUpperCase()); 356 if (javaCharsetName == null) continue; 357 if (!jNames.contains(javaName)) { 358 checkPreferredMime(m, javaName, preferred); 359 jNames.add(javaName); 360 } 361 for (String xml : lines.get(javaName)) { 362 if (xNames.contains(xml)) continue; 363 final String jName = checkConvertMime2Java(xml); 364 xNames.add(xml); 365 if (jNames.contains(jName)) continue; 366 checkPreferredMime(m, jName, preferred); 367 } 368 } 369 } 370 371 private static String checkConvertMime2Java(String xml) { 372 final String jName = Encodings.convertMime2JavaEncoding(xml); 373 final String jCharsetName; 374 try { 375 jCharsetName = Charset.forName(jName).name(); 376 } catch (Exception x) { 377 throw new Error("Unrecognized charset returned by Encodings.convertMime2JavaEncoding(\""+xml+"\")", x); 378 } 379 System.out.println("Encodings.convertMime2JavaEncoding(\""+xml+"\") = \""+jName+"\" ("+jCharsetName+")"); 380 return jName; 381 } 382 383 private static void checkPreferredMime(Method m, String javaName, Map<String,String> preferred) 384 throws Exception { 385 final String mime = (String) m.invoke(null, javaName); 386 final String expected = preferred.get(javaName.toUpperCase()); 387 if (Arrays.deepEquals(new String[] {mime}, new String[] {expected})) { 388 System.out.println("Encodings.getMimeEncoding(\""+javaName+"\") = \""+mime+"\""); 389 } else { 390 throw new Error("Bad preferred mime type for: '"+javaName+"': expected '"+ 391 expected+"' but got '"+mime+"'"); 392 } 393 } 394 395 private static Collection<String> getValues(String val) { 396 int pos = val.indexOf(' '); 397 if (pos < 0) { 398 return Collections.singletonList(val); 399 } 400 //lastPrintable = 401 // Integer.decode(val.substring(pos).trim()).intValue(); 402 StringTokenizer st = 403 new StringTokenizer(val.substring(0, pos), ","); 404 final List<String> values = new ArrayList<>(st.countTokens()); 405 while (st.hasMoreTokens()) { 406 values.add(st.nextToken()); 407 } 408 return values; 409 } 410 411 // can be called in main() to help debugging. 412 // Prints out all available charsets and their recognized aliases 413 // as returned by the Charset API. 414 private static void printAllCharsets() { 415 Map<String, Charset> all = Charset.availableCharsets(); 416 System.out.println("\n=========================================\n"); 417 for (String can : all.keySet()) { 418 System.out.println(can + ": " + all.get(can).aliases()); 419 } 420 } 421 }