1 /*
   2  * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.CharConversionException;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.ref.SoftReference;
  31 import java.nio.ByteBuffer;
  32 import java.nio.CharBuffer;
  33 import java.nio.BufferOverflowException;
  34 import java.nio.BufferUnderflowException;
  35 import java.nio.charset.Charset;
  36 import java.nio.charset.CharsetDecoder;
  37 import java.nio.charset.CharsetEncoder;
  38 import java.nio.charset.CharacterCodingException;
  39 import java.nio.charset.CoderResult;
  40 import java.nio.charset.CodingErrorAction;
  41 import java.nio.charset.IllegalCharsetNameException;
  42 import java.nio.charset.MalformedInputException;
  43 import java.nio.charset.UnsupportedCharsetException;
  44 import java.util.Arrays;
  45 import sun.misc.MessageUtils;
  46 import sun.nio.cs.HistoricallyNamedCharset;
  47 
  48 /**
  49  * Utility class for string encoding and decoding.
  50  */
  51 
  52 class StringCoding {
  53 
  54     private StringCoding() { }
  55 
  56     /** The cached coders for each thread */
  57     private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
  58         new ThreadLocal<SoftReference<StringDecoder>>();
  59     private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
  60         new ThreadLocal<SoftReference<StringEncoder>>();
  61 
  62     private static boolean warnUnsupportedCharset = true;
  63 
  64     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
  65         SoftReference<T> sr = tl.get();
  66         if (sr == null)
  67             return null;
  68         return sr.get();
  69     }
  70 
  71     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
  72         tl.set(new SoftReference<T>(ob));
  73     }
  74 
  75     // Trim the given byte array to the given length
  76     //
  77     private static byte[] safeTrim(byte[] ba, int len, Charset cs) {
  78         if (len == ba.length
  79             && (System.getSecurityManager() == null
  80                 || cs.getClass().getClassLoader0() == null))
  81             return ba;
  82         else
  83             return Arrays.copyOf(ba, len);
  84     }
  85 
  86     // Trim the given char array to the given length
  87     //
  88     private static char[] safeTrim(char[] ca, int len, Charset cs) {
  89         if (len == ca.length
  90             && (System.getSecurityManager() == null
  91                 || cs.getClass().getClassLoader0() == null))
  92             return ca;
  93         else
  94             return Arrays.copyOf(ca, len);
  95     }
  96 
  97     private static int scale(int len, float expansionFactor) {
  98         // We need to perform double, not float, arithmetic; otherwise
  99         // we lose low order bits when len is larger than 2**24.
 100         return (int)(len * (double)expansionFactor);
 101     }
 102 
 103     private static Charset lookupCharset(String csn) {
 104         if (Charset.isSupported(csn)) {
 105             try {
 106                 return Charset.forName(csn);
 107             } catch (UnsupportedCharsetException x) {
 108                 throw new Error(x);
 109             }
 110         }
 111         return null;
 112     }
 113 
 114     private static void warnUnsupportedCharset(String csn) {
 115         if (warnUnsupportedCharset) {
 116             // Use sun.misc.MessageUtils rather than the Logging API or
 117             // System.err since this method may be called during VM
 118             // initialization before either is available.
 119             MessageUtils.err("WARNING: Default charset " + csn +
 120                              " not supported, using ISO-8859-1 instead");
 121             warnUnsupportedCharset = false;
 122         }
 123     }
 124 
 125 
 126     // -- Decoding --
 127     private static class StringDecoder {
 128         private final String requestedCharsetName;
 129         private final Charset cs;
 130         private final CharsetDecoder cd;
 131 
 132         private StringDecoder(Charset cs, String rcn) {
 133             this.requestedCharsetName = rcn;
 134             this.cs = cs;
 135             this.cd = cs.newDecoder()
 136                 .onMalformedInput(CodingErrorAction.REPLACE)
 137                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
 138         }
 139 
 140         String charsetName() {
 141             if (cs instanceof HistoricallyNamedCharset)
 142                 return ((HistoricallyNamedCharset)cs).historicalName();
 143             return cs.name();
 144         }
 145 
 146         final String requestedCharsetName() {
 147             return requestedCharsetName;
 148         }
 149 
 150         char[] decode(byte[] ba, int off, int len) {
 151             int en = scale(len, cd.maxCharsPerByte());
 152             char[] ca = new char[en];
 153             if (len == 0)
 154                 return ca;
 155             cd.reset();
 156             ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
 157             CharBuffer cb = CharBuffer.wrap(ca);
 158             try {
 159                 CoderResult cr = cd.decode(bb, cb, true);
 160                 if (!cr.isUnderflow())
 161                     cr.throwException();
 162                 cr = cd.flush(cb);
 163                 if (!cr.isUnderflow())
 164                     cr.throwException();
 165             } catch (CharacterCodingException x) {
 166                 // Substitution is always enabled,
 167                 // so this shouldn't happen
 168                 throw new Error(x);
 169             }
 170             return safeTrim(ca, cb.position(), cs);
 171         }
 172 
 173     }
 174 
 175     static char[] decode(String charsetName, byte[] ba, int off, int len)
 176         throws UnsupportedEncodingException
 177     {
 178         StringDecoder sd = deref(decoder);
 179         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 180         if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
 181                               || csn.equals(sd.charsetName()))) {
 182             sd = null;
 183             try {
 184                 Charset cs = lookupCharset(csn);
 185                 if (cs != null)
 186                     sd = new StringDecoder(cs, csn);
 187             } catch (IllegalCharsetNameException x) {}
 188             if (sd == null)
 189                 throw new UnsupportedEncodingException(csn);
 190             set(decoder, sd);
 191         }
 192         return sd.decode(ba, off, len);
 193     }
 194 
 195     static char[] decode(Charset cs, byte[] ba, int off, int len) {
 196         StringDecoder sd = new StringDecoder(cs, cs.name());
 197         return sd.decode(Arrays.copyOfRange(ba, off, off + len), 0, len);
 198     }
 199 
 200     static char[] decode(byte[] ba, int off, int len) {
 201         String csn = Charset.defaultCharset().name();
 202         try {
 203             return decode(csn, ba, off, len);
 204         } catch (UnsupportedEncodingException x) {
 205             warnUnsupportedCharset(csn);
 206         }
 207         try {
 208             return decode("ISO-8859-1", ba, off, len);
 209         } catch (UnsupportedEncodingException x) {
 210             // If this code is hit during VM initialization, MessageUtils is
 211             // the only way we will be able to get any kind of error message.
 212             MessageUtils.err("ISO-8859-1 charset not available: "
 213                              + x.toString());
 214             // If we can not find ISO-8859-1 (a required encoding) then things
 215             // are seriously wrong with the installation.
 216             System.exit(1);
 217             return null;
 218         }
 219     }
 220 
 221 
 222 
 223 
 224     // -- Encoding --
 225     private static class StringEncoder {
 226         private Charset cs;
 227         private CharsetEncoder ce;
 228         private final String requestedCharsetName;
 229 
 230         private StringEncoder(Charset cs, String rcn) {
 231             this.requestedCharsetName = rcn;
 232             this.cs = cs;
 233             this.ce = cs.newEncoder()
 234                 .onMalformedInput(CodingErrorAction.REPLACE)
 235                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
 236         }
 237 
 238         String charsetName() {
 239             if (cs instanceof HistoricallyNamedCharset)
 240                 return ((HistoricallyNamedCharset)cs).historicalName();
 241             return cs.name();
 242         }
 243 
 244         final String requestedCharsetName() {
 245             return requestedCharsetName;
 246         }
 247 
 248         byte[] encode(char[] ca, int off, int len) {
 249             int en = scale(len, ce.maxBytesPerChar());
 250             byte[] ba = new byte[en];
 251             if (len == 0)
 252                 return ba;
 253 
 254             ce.reset();
 255             ByteBuffer bb = ByteBuffer.wrap(ba);
 256             CharBuffer cb = CharBuffer.wrap(ca, off, len);
 257             try {
 258                 CoderResult cr = ce.encode(cb, bb, true);
 259                 if (!cr.isUnderflow())
 260                     cr.throwException();
 261                 cr = ce.flush(bb);
 262                 if (!cr.isUnderflow())
 263                     cr.throwException();
 264             } catch (CharacterCodingException x) {
 265                 // Substitution is always enabled,
 266                 // so this shouldn't happen
 267                 throw new Error(x);
 268             }
 269             return safeTrim(ba, bb.position(), cs);
 270         }
 271     }
 272 
 273     static byte[] encode(String charsetName, char[] ca, int off, int len)
 274         throws UnsupportedEncodingException
 275     {
 276         StringEncoder se = deref(encoder);
 277         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 278         if ((se == null) || !(csn.equals(se.requestedCharsetName())
 279                               || csn.equals(se.charsetName()))) {
 280             se = null;
 281             try {
 282                 Charset cs = lookupCharset(csn);
 283                 if (cs != null)
 284                     se = new StringEncoder(cs, csn);
 285             } catch (IllegalCharsetNameException x) {}
 286             if (se == null)
 287                 throw new UnsupportedEncodingException (csn);
 288             set(encoder, se);
 289         }
 290         return se.encode(ca, off, len);
 291     }
 292 
 293     static byte[] encode(Charset cs, char[] ca, int off, int len) {
 294         StringEncoder se = new StringEncoder(cs, cs.name());
 295         return se.encode(Arrays.copyOfRange(ca, off, off + len), 0, len);
 296     }
 297 
 298     static byte[] encode(char[] ca, int off, int len) {
 299         String csn = Charset.defaultCharset().name();
 300         try {
 301             return encode(csn, ca, off, len);
 302         } catch (UnsupportedEncodingException x) {
 303             warnUnsupportedCharset(csn);
 304         }
 305         try {
 306             return encode("ISO-8859-1", ca, off, len);
 307         } catch (UnsupportedEncodingException x) {
 308             // If this code is hit during VM initialization, MessageUtils is
 309             // the only way we will be able to get any kind of error message.
 310             MessageUtils.err("ISO-8859-1 charset not available: "
 311                              + x.toString());
 312             // If we can not find ISO-8859-1 (a required encoding) then things
 313             // are seriously wrong with the installation.
 314             System.exit(1);
 315             return null;
 316         }
 317     }
 318 }