1 /*
   2  * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.UnsupportedEncodingException;
  29 import java.lang.ref.SoftReference;
  30 import java.nio.ByteBuffer;
  31 import java.nio.CharBuffer;
  32 import java.nio.charset.Charset;
  33 import java.nio.charset.CharsetDecoder;
  34 import java.nio.charset.CharsetEncoder;
  35 import java.nio.charset.CharacterCodingException;
  36 import java.nio.charset.CoderResult;
  37 import java.nio.charset.CodingErrorAction;
  38 import java.nio.charset.IllegalCharsetNameException;
  39 import java.nio.charset.UnsupportedCharsetException;
  40 import java.util.Arrays;
  41 import sun.misc.MessageUtils;
  42 import sun.nio.cs.HistoricallyNamedCharset;
  43 import sun.nio.cs.ArrayDecoder;
  44 import sun.nio.cs.ArrayEncoder;
  45 
  46 /**
  47  * Utility class for string encoding and decoding.
  48  */
  49 
  50 class StringCoding {
  51 
  52     private StringCoding() { }
  53 
  54     /** The cached coders for each thread */
  55     private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
  56         new ThreadLocal<SoftReference<StringDecoder>>();
  57     private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
  58         new ThreadLocal<SoftReference<StringEncoder>>();
  59 
  60     private static boolean warnUnsupportedCharset = true;
  61 
  62     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
  63         SoftReference<T> sr = tl.get();
  64         if (sr == null)
  65             return null;
  66         return sr.get();
  67     }
  68 
  69     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
  70         tl.set(new SoftReference<T>(ob));
  71     }
  72 
  73     // Trim the given byte array to the given length
  74     //
  75     private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
  76         if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
  77             return ba;
  78         else
  79             return Arrays.copyOf(ba, len);
  80     }
  81 
  82     // Trim the given char array to the given length
  83     //
  84     private static char[] safeTrim(char[] ca, int len,
  85                                    Charset cs, boolean isTrusted) {
  86         if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
  87             return ca;
  88         else
  89             return Arrays.copyOf(ca, len);
  90     }
  91 
  92     private static int scale(int len, float expansionFactor) {
  93         // We need to perform double, not float, arithmetic; otherwise
  94         // we lose low order bits when len is larger than 2**24.
  95         return (int)(len * (double)expansionFactor);
  96     }
  97 
  98     private static Charset lookupCharset(String csn) {
  99         if (Charset.isSupported(csn)) {
 100             try {
 101                 return Charset.forName(csn);
 102             } catch (UnsupportedCharsetException x) {
 103                 throw new Error(x);
 104             }
 105         }
 106         return null;
 107     }
 108 
 109     private static void warnUnsupportedCharset(String csn) {
 110         if (warnUnsupportedCharset) {
 111             // Use sun.misc.MessageUtils rather than the Logging API or
 112             // System.err since this method may be called during VM
 113             // initialization before either is available.
 114             MessageUtils.err("WARNING: Default charset " + csn +
 115                              " not supported, using ISO-8859-1 instead");
 116             warnUnsupportedCharset = false;
 117         }
 118     }
 119 
 120 
 121     // -- Decoding --
 122     private static class StringDecoder {
 123         private final String requestedCharsetName;
 124         private final Charset cs;
 125         private final CharsetDecoder cd;
 126         private final boolean isTrusted;
 127 
 128         private StringDecoder(Charset cs, String rcn) {
 129             this.requestedCharsetName = rcn;
 130             this.cs = cs;
 131             this.cd = cs.newDecoder()
 132                 .onMalformedInput(CodingErrorAction.REPLACE)
 133                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
 134             this.isTrusted = (cs.getClass().getClassLoader0() == null);
 135         }
 136 
 137         String charsetName() {
 138             if (cs instanceof HistoricallyNamedCharset)
 139                 return ((HistoricallyNamedCharset)cs).historicalName();
 140             return cs.name();
 141         }
 142 
 143         final String requestedCharsetName() {
 144             return requestedCharsetName;
 145         }
 146 
 147         char[] decode(byte[] ba, int off, int len) {
 148             int en = scale(len, cd.maxCharsPerByte());
 149             char[] ca = new char[en];
 150             if (len == 0)
 151                 return ca;
 152             if (cd instanceof ArrayDecoder) {
 153                 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
 154                 return safeTrim(ca, clen, cs, isTrusted);
 155             } else {
 156                 cd.reset();
 157                 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
 158                 CharBuffer cb = CharBuffer.wrap(ca);
 159                 try {
 160                     CoderResult cr = cd.decode(bb, cb, true);
 161                     if (!cr.isUnderflow())
 162                         cr.throwException();
 163                     cr = cd.flush(cb);
 164                     if (!cr.isUnderflow())
 165                         cr.throwException();
 166                 } catch (CharacterCodingException x) {
 167                     // Substitution is always enabled,
 168                     // so this shouldn't happen
 169                     throw new Error(x);
 170                 }
 171                 return safeTrim(ca, cb.position(), cs, isTrusted);
 172             }
 173         }
 174     }
 175 
 176     static char[] decode(String charsetName, byte[] ba, int off, int len)
 177         throws UnsupportedEncodingException
 178     {
 179         StringDecoder sd = deref(decoder);
 180         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 181         if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
 182                               || csn.equals(sd.charsetName()))) {
 183             sd = null;
 184             try {
 185                 Charset cs = lookupCharset(csn);
 186                 if (cs != null)
 187                     sd = new StringDecoder(cs, csn);
 188             } catch (IllegalCharsetNameException x) {}
 189             if (sd == null)
 190                 throw new UnsupportedEncodingException(csn);
 191             set(decoder, sd);
 192         }
 193         return sd.decode(ba, off, len);
 194     }
 195 
 196     static char[] decode(Charset cs, byte[] ba, int off, int len) {
 197         // (1)We never cache the "external" cs, the only benefit of creating
 198         // an additional StringDe/Encoder object to wrap it is to share the
 199         // de/encode() method. These SD/E objects are short-lifed, the young-gen
 200         // gc should be able to take care of them well. But the best approash
 201         // is still not to generate them if not really necessary.
 202         // (2)The defensive copy of the input byte/char[] has a big performance
 203         // impact, as well as the outgoing result byte/char[]. Need to do the
 204         // optimization check of (sm==null && classLoader0==null) for both.
 205         // (3)getClass().getClassLoader0() is expensive
 206         // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
 207         // is only chcked (and then isTrusted gets set) when (SM==null). It is
 208         // possible that the SM==null for now but then SM is NOT null later
 209         // when safeTrim() is invoked...the "safe" way to do is to redundant
 210         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
 211         // but it then can be argued that the SM is null when the opertaion
 212         // is started...
 213         CharsetDecoder cd = cs.newDecoder();
 214         int en = scale(len, cd.maxCharsPerByte());
 215         char[] ca = new char[en];
 216         if (len == 0)
 217             return ca;
 218         boolean isTrusted = false;
 219         if (System.getSecurityManager() != null) {
 220             if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
 221                 ba =  Arrays.copyOfRange(ba, off, off + len);
 222                 off = 0;
 223             }
 224         }
 225         if (cd instanceof ArrayDecoder) {
 226             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
 227             return safeTrim(ca, clen, cs, isTrusted);
 228         } else {
 229             cd.onMalformedInput(CodingErrorAction.REPLACE)
 230               .onUnmappableCharacter(CodingErrorAction.REPLACE)
 231               .reset();
 232             ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
 233             CharBuffer cb = CharBuffer.wrap(ca);
 234             try {
 235                 CoderResult cr = cd.decode(bb, cb, true);
 236                 if (!cr.isUnderflow())
 237                     cr.throwException();
 238                 cr = cd.flush(cb);
 239                 if (!cr.isUnderflow())
 240                     cr.throwException();
 241             } catch (CharacterCodingException x) {
 242                 // Substitution is always enabled,
 243                 // so this shouldn't happen
 244                 throw new Error(x);
 245             }
 246             return safeTrim(ca, cb.position(), cs, isTrusted);
 247         }
 248     }
 249 
 250     static char[] decode(byte[] ba, int off, int len) {
 251         String csn = Charset.defaultCharset().name();
 252         try {
 253             return decode(csn, ba, off, len);
 254         } catch (UnsupportedEncodingException x) {
 255             warnUnsupportedCharset(csn);
 256         }
 257         try {
 258             return decode("ISO-8859-1", ba, off, len);
 259         } catch (UnsupportedEncodingException x) {
 260             // If this code is hit during VM initialization, MessageUtils is
 261             // the only way we will be able to get any kind of error message.
 262             MessageUtils.err("ISO-8859-1 charset not available: "
 263                              + x.toString());
 264             // If we can not find ISO-8859-1 (a required encoding) then things
 265             // are seriously wrong with the installation.
 266             System.exit(1);
 267             return null;
 268         }
 269     }
 270 
 271     // -- Encoding --
 272     private static class StringEncoder {
 273         private Charset cs;
 274         private CharsetEncoder ce;
 275         private final String requestedCharsetName;
 276         private final boolean isTrusted;
 277 
 278         private StringEncoder(Charset cs, String rcn) {
 279             this.requestedCharsetName = rcn;
 280             this.cs = cs;
 281             this.ce = cs.newEncoder()
 282                 .onMalformedInput(CodingErrorAction.REPLACE)
 283                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
 284             this.isTrusted = (cs.getClass().getClassLoader0() == null);
 285         }
 286 
 287         String charsetName() {
 288             if (cs instanceof HistoricallyNamedCharset)
 289                 return ((HistoricallyNamedCharset)cs).historicalName();
 290             return cs.name();
 291         }
 292 
 293         final String requestedCharsetName() {
 294             return requestedCharsetName;
 295         }
 296 
 297         byte[] encode(char[] ca, int off, int len) {
 298             int en = scale(len, ce.maxBytesPerChar());
 299             byte[] ba = new byte[en];
 300             if (len == 0)
 301                 return ba;
 302             if (ce instanceof ArrayEncoder) {
 303                 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
 304                 return safeTrim(ba, blen, cs, isTrusted);
 305             } else {
 306                 ce.reset();
 307                 ByteBuffer bb = ByteBuffer.wrap(ba);
 308                 CharBuffer cb = CharBuffer.wrap(ca, off, len);
 309                 try {
 310                     CoderResult cr = ce.encode(cb, bb, true);
 311                     if (!cr.isUnderflow())
 312                         cr.throwException();
 313                     cr = ce.flush(bb);
 314                     if (!cr.isUnderflow())
 315                         cr.throwException();
 316                 } catch (CharacterCodingException x) {
 317                     // Substitution is always enabled,
 318                     // so this shouldn't happen
 319                     throw new Error(x);
 320                 }
 321                 return safeTrim(ba, bb.position(), cs, isTrusted);
 322             }
 323         }
 324     }
 325 
 326     static byte[] encode(String charsetName, char[] ca, int off, int len)
 327         throws UnsupportedEncodingException
 328     {
 329         StringEncoder se = deref(encoder);
 330         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 331         if ((se == null) || !(csn.equals(se.requestedCharsetName())
 332                               || csn.equals(se.charsetName()))) {
 333             se = null;
 334             try {
 335                 Charset cs = lookupCharset(csn);
 336                 if (cs != null)
 337                     se = new StringEncoder(cs, csn);
 338             } catch (IllegalCharsetNameException x) {}
 339             if (se == null)
 340                 throw new UnsupportedEncodingException (csn);
 341             set(encoder, se);
 342         }
 343         return se.encode(ca, off, len);
 344     }
 345 
 346     static byte[] encode(Charset cs, char[] ca, int off, int len) {
 347         CharsetEncoder ce = cs.newEncoder();
 348         int en = scale(len, ce.maxBytesPerChar());
 349         byte[] ba = new byte[en];
 350         if (len == 0)
 351             return ba;
 352         boolean isTrusted = false;
 353         if (System.getSecurityManager() != null) {
 354             if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
 355                 ca =  Arrays.copyOfRange(ca, off, off + len);
 356                 off = 0;
 357             }
 358         }
 359         if (ce instanceof ArrayEncoder) {
 360             int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
 361             return safeTrim(ba, blen, cs, isTrusted);
 362         } else {
 363             ce.onMalformedInput(CodingErrorAction.REPLACE)
 364               .onUnmappableCharacter(CodingErrorAction.REPLACE)
 365               .reset();
 366             ByteBuffer bb = ByteBuffer.wrap(ba);
 367             CharBuffer cb = CharBuffer.wrap(ca, off, len);
 368             try {
 369                 CoderResult cr = ce.encode(cb, bb, true);
 370                 if (!cr.isUnderflow())
 371                     cr.throwException();
 372                 cr = ce.flush(bb);
 373                 if (!cr.isUnderflow())
 374                     cr.throwException();
 375             } catch (CharacterCodingException x) {
 376                 throw new Error(x);
 377             }
 378             return safeTrim(ba, bb.position(), cs, isTrusted);
 379         }
 380     }
 381 
 382     static byte[] encode(char[] ca, int off, int len) {
 383         String csn = Charset.defaultCharset().name();
 384         try {
 385             return encode(csn, ca, off, len);
 386         } catch (UnsupportedEncodingException x) {
 387             warnUnsupportedCharset(csn);
 388         }
 389         try {
 390             return encode("ISO-8859-1", ca, off, len);
 391         } catch (UnsupportedEncodingException x) {
 392             // If this code is hit during VM initialization, MessageUtils is
 393             // the only way we will be able to get any kind of error message.
 394             MessageUtils.err("ISO-8859-1 charset not available: "
 395                              + x.toString());
 396             // If we can not find ISO-8859-1 (a required encoding) then things
 397             // are seriously wrong with the installation.
 398             System.exit(1);
 399             return null;
 400         }
 401     }
 402 }