1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 
  22 package com.sun.org.apache.xml.internal.serialize;
  23 
  24 
  25 import com.sun.org.apache.xerces.internal.util.EncodingMap;
  26 import java.io.UnsupportedEncodingException;
  27 import java.util.Locale;
  28 import java.util.Map;
  29 import java.util.concurrent.ConcurrentHashMap;
  30 
  31 
  32 /**
  33  * Provides information about encodings. Depends on the Java runtime
  34  * to provides writers for the different encodings, but can be used
  35  * to override encoding names and provide the last printable character
  36  * for each encoding.
  37  *
  38  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
  39  *
  40  * @deprecated As of JDK 1.9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation
  41  * is replaced by that of Xalan. Main class
  42  * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced
  43  * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}.
  44  */
  45 class Encodings
  46 {
  47 
  48 
  49     /**
  50      * The last printable character for unknown encodings.
  51      */
  52     static final int DEFAULT_LAST_PRINTABLE = 0x7F;
  53 
  54     // last printable character for Unicode-compatible encodings
  55     static final int LAST_PRINTABLE_UNICODE = 0xffff;
  56     // unicode-compliant encodings; can express plane 0
  57     static final String[] UNICODE_ENCODINGS = {
  58         "Unicode", "UnicodeBig", "UnicodeLittle", "GB2312", "UTF8", "UTF-16",
  59     };
  60     // default (Java) encoding if none supplied:
  61     static final String DEFAULT_ENCODING = "UTF8";
  62 
  63     // note that the size of this Map
  64     // is bounded by the number of encodings recognized by EncodingMap;
  65     // therefore it poses no static mutability risk.
  66     private static final Map<String, EncodingInfo> _encodings = new ConcurrentHashMap();
  67 
  68     /**
  69      * @param encoding a MIME charset name, or null.
  70      */
  71     static EncodingInfo getEncodingInfo(String encoding, boolean allowJavaNames) throws UnsupportedEncodingException {
  72         EncodingInfo eInfo = null;
  73         if (encoding == null) {
  74             if((eInfo = _encodings.get(DEFAULT_ENCODING)) != null)
  75                 return eInfo;
  76             eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(DEFAULT_ENCODING), DEFAULT_ENCODING, LAST_PRINTABLE_UNICODE);
  77             _encodings.put(DEFAULT_ENCODING, eInfo);
  78             return eInfo;
  79         }
  80         // need to convert it to upper case:
  81         encoding = encoding.toUpperCase(Locale.ENGLISH);
  82         String jName = EncodingMap.getIANA2JavaMapping(encoding);
  83         if(jName == null) {
  84             // see if the encoding passed in is a Java encoding name.
  85             if(allowJavaNames ) {
  86                 EncodingInfo.testJavaEncodingName(encoding);
  87                 if((eInfo = _encodings.get(encoding)) != null)
  88                     return eInfo;
  89                 // is it known to be unicode-compliant?
  90                 int i=0;
  91                 for(; i<UNICODE_ENCODINGS.length; i++) {
  92                     if(UNICODE_ENCODINGS[i].equalsIgnoreCase(encoding)) {
  93                         eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, LAST_PRINTABLE_UNICODE);
  94                         break;
  95                     }
  96                 }
  97                 if(i == UNICODE_ENCODINGS.length) {
  98                     eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, DEFAULT_LAST_PRINTABLE);
  99                 }
 100                 _encodings.put(encoding, eInfo);
 101                 return eInfo;
 102             } else {
 103                 throw new UnsupportedEncodingException(encoding);
 104             }
 105         }
 106         if ((eInfo = _encodings.get(jName)) != null)
 107             return eInfo;
 108         // have to create one...
 109         // is it known to be unicode-compliant?
 110         int i=0;
 111         for(; i<UNICODE_ENCODINGS.length; i++) {
 112             if(UNICODE_ENCODINGS[i].equalsIgnoreCase(jName)) {
 113                 eInfo = new EncodingInfo(encoding, jName, LAST_PRINTABLE_UNICODE);
 114                 break;
 115             }
 116         }
 117         if(i == UNICODE_ENCODINGS.length) {
 118             eInfo = new EncodingInfo(encoding, jName, DEFAULT_LAST_PRINTABLE);
 119         }
 120         _encodings.put(jName, eInfo);
 121         return eInfo;
 122     }
 123 
 124     static final String JIS_DANGER_CHARS
 125     = "\\\u007e\u007f\u00a2\u00a3\u00a5\u00ac"
 126     +"\u2014\u2015\u2016\u2026\u203e\u203e\u2225\u222f\u301c"
 127     +"\uff3c\uff5e\uffe0\uffe1\uffe2\uffe3";
 128 
 129 }