1 /*
   2  * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.UnsupportedEncodingException;
  29 import java.lang.ref.SoftReference;
  30 import java.nio.ByteBuffer;
  31 import java.nio.CharBuffer;
  32 import java.nio.charset.Charset;
  33 import java.nio.charset.CharsetDecoder;
  34 import java.nio.charset.CharsetEncoder;
  35 import java.nio.charset.CharacterCodingException;
  36 import java.nio.charset.CoderResult;
  37 import java.nio.charset.CodingErrorAction;
  38 import java.nio.charset.IllegalCharsetNameException;
  39 import java.nio.charset.UnsupportedCharsetException;
  40 import java.util.Arrays;
  41 import sun.misc.MessageUtils;
  42 import sun.nio.cs.HistoricallyNamedCharset;
  43 import sun.nio.cs.ArrayDecoder;
  44 import sun.nio.cs.ArrayEncoder;
  45 
  46 /**
  47  * Utility class for string encoding and decoding.
  48  */
  49 
  50 class StringCoding {
  51 
  52     private StringCoding() { }
  53 
  54     /** The cached coders for each thread */
  55     private static final ThreadLocal<SoftReference<StringDecoder>> decoder =
  56         new ThreadLocal<>();
  57     private static final ThreadLocal<SoftReference<StringEncoder>> encoder =
  58         new ThreadLocal<>();
  59 
  60     private static boolean warnUnsupportedCharset = true;
  61 
  62     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
  63         SoftReference<T> sr = tl.get();
  64         if (sr == null)
  65             return null;
  66         return sr.get();
  67     }
  68 
  69     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
  70         tl.set(new SoftReference<>(ob));
  71     }
  72 
  73     // Trim the given byte array to the given length
  74     //
  75     private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
  76         if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
  77             return ba;
  78         else
  79             return Arrays.copyOf(ba, len);
  80     }
  81 
  82     // Trim the given char array to the given length
  83     //
  84     private static char[] safeTrim(char[] ca, int len,
  85                                    Charset cs, boolean isTrusted) {
  86         if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
  87             return ca;
  88         else
  89             return Arrays.copyOf(ca, len);
  90     }
  91 
  92     private static int scale(int len, float expansionFactor) {
  93         // We need to perform double, not float, arithmetic; otherwise
  94         // we lose low order bits when len is larger than 2**24.
  95         return (int)(len * (double)expansionFactor);
  96     }
  97 
  98     private static Charset lookupCharset(String csn) {
  99         if (Charset.isSupported(csn)) {
 100             try {
 101                 return Charset.forName(csn);
 102             } catch (UnsupportedCharsetException x) {
 103                 throw new Error(x);
 104             }
 105         }
 106         return null;
 107     }
 108 
 109     private static void warnUnsupportedCharset(String csn) {
 110         if (warnUnsupportedCharset) {
 111             // Use sun.misc.MessageUtils rather than the Logging API or
 112             // System.err since this method may be called during VM
 113             // initialization before either is available.
 114             MessageUtils.err("WARNING: Default charset " + csn +
 115                              " not supported, using ISO-8859-1 instead");
 116             warnUnsupportedCharset = false;
 117         }
 118     }
 119 
 120 
 121     // -- Decoding --
 122     private static class StringDecoder {
 123         private final String requestedCharsetName;
 124         private final Charset cs;
 125         private final CharsetDecoder cd;
 126         private final boolean isTrusted;
 127 
 128         private StringDecoder(Charset cs, String rcn) {
 129             this.requestedCharsetName = rcn;
 130             this.cs = cs;
 131             this.cd = cs.newDecoder()
 132                 .onMalformedInput(CodingErrorAction.REPLACE)
 133                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
 134             this.isTrusted = (cs.getClass().getClassLoader0() == null);
 135         }
 136 
 137         String charsetName() {
 138             if (cs instanceof HistoricallyNamedCharset)
 139                 return ((HistoricallyNamedCharset)cs).historicalName();
 140             return cs.name();
 141         }
 142 
 143         final String requestedCharsetName() {
 144             return requestedCharsetName;
 145         }
 146 
 147         char[] decode(byte[] ba, int off, int len) {
 148             int en = scale(len, cd.maxCharsPerByte());
 149             char[] ca = new char[en];
 150             if (len == 0)
 151                 return ca;
 152             if (cd instanceof ArrayDecoder) {
 153                 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
 154                 return safeTrim(ca, clen, cs, isTrusted);
 155             } else {
 156                 cd.reset();
 157                 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
 158                 CharBuffer cb = CharBuffer.wrap(ca);
 159                 try {
 160                     CoderResult cr = cd.decode(bb, cb, true);
 161                     if (!cr.isUnderflow())
 162                         cr.throwException();
 163                     cr = cd.flush(cb);
 164                     if (!cr.isUnderflow())
 165                         cr.throwException();
 166                 } catch (CharacterCodingException x) {
 167                     // Substitution is always enabled,
 168                     // so this shouldn't happen
 169                     throw new Error(x);
 170                 }
 171                 return safeTrim(ca, cb.position(), cs, isTrusted);
 172             }
 173         }
 174     }
 175 
 176     static char[] decode(String charsetName, byte[] ba, int off, int len)
 177         throws UnsupportedEncodingException
 178     {
 179         StringDecoder sd = deref(decoder);
 180         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 181         if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
 182                               || csn.equals(sd.charsetName()))) {
 183             sd = null;
 184             try {
 185                 Charset cs = lookupCharset(csn);
 186                 if (cs != null)
 187                     sd = new StringDecoder(cs, csn);
 188             } catch (IllegalCharsetNameException x) {}
 189             if (sd == null)
 190                 throw new UnsupportedEncodingException(csn);
 191             set(decoder, sd);
 192         }
 193         return sd.decode(ba, off, len);
 194     }
 195 
 196     static char[] decode(Charset cs, byte[] ba, int off, int len) {
 197         // (1)We never cache the "external" cs, the only benefit of creating
 198         // an additional StringDe/Encoder object to wrap it is to share the
 199         // de/encode() method. These SD/E objects are short-lived, the young-gen
 200         // gc should be able to take care of them well. But the best approach
 201         // is still not to generate them if not really necessary.
 202         // (2)The defensive copy of the input byte/char[] has a big performance
 203         // impact, as well as the outgoing result byte/char[]. Need to do the
 204         // optimization check of (sm==null && classLoader0==null) for both.
 205         // (3)getClass().getClassLoader0() is expensive
 206         // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
 207         // is only checked (and then isTrusted gets set) when (SM==null). It is
 208         // possible that the SM==null for now but then SM is NOT null later
 209         // when safeTrim() is invoked...the "safe" way to do is to redundant
 210         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
 211         // but it then can be argued that the SM is null when the operation
 212         // is started...
 213         CharsetDecoder cd = cs.newDecoder();
 214         int en = scale(len, cd.maxCharsPerByte());
 215         char[] ca = new char[en];
 216         if (len == 0)
 217             return ca;
 218         boolean isTrusted = false;
 219         if (System.getSecurityManager() != null) {
 220             if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
 221                 ba =  Arrays.copyOfRange(ba, off, off + len);
 222                 off = 0;
 223             }
 224         }
 225         cd.onMalformedInput(CodingErrorAction.REPLACE)
 226           .onUnmappableCharacter(CodingErrorAction.REPLACE)
 227           .reset();
 228         if (cd instanceof ArrayDecoder) {
 229             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
 230             return safeTrim(ca, clen, cs, isTrusted);
 231         } else {
 232             ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
 233             CharBuffer cb = CharBuffer.wrap(ca);
 234             try {
 235                 CoderResult cr = cd.decode(bb, cb, true);
 236                 if (!cr.isUnderflow())
 237                     cr.throwException();
 238                 cr = cd.flush(cb);
 239                 if (!cr.isUnderflow())
 240                     cr.throwException();
 241             } catch (CharacterCodingException x) {
 242                 // Substitution is always enabled,
 243                 // so this shouldn't happen
 244                 throw new Error(x);
 245             }
 246             return safeTrim(ca, cb.position(), cs, isTrusted);
 247         }
 248     }
 249 
 250     static char[] decode(byte[] ba, int off, int len) {
 251         String csn = Charset.defaultCharset().name();
 252         try {
 253             // use charset name decode() variant which provides caching.
 254             return decode(csn, ba, off, len);
 255         } catch (UnsupportedEncodingException x) {
 256             warnUnsupportedCharset(csn);
 257         }
 258         try {
 259             return decode("ISO-8859-1", ba, off, len);
 260         } catch (UnsupportedEncodingException x) {
 261             // If this code is hit during VM initialization, MessageUtils is
 262             // the only way we will be able to get any kind of error message.
 263             MessageUtils.err("ISO-8859-1 charset not available: "
 264                              + x.toString());
 265             // If we can not find ISO-8859-1 (a required encoding) then things
 266             // are seriously wrong with the installation.
 267             System.exit(1);
 268             return null;
 269         }
 270     }
 271 
 272     // -- Encoding --
 273     private static class StringEncoder {
 274         private Charset cs;
 275         private CharsetEncoder ce;
 276         private final String requestedCharsetName;
 277         private final boolean isTrusted;
 278 
 279         private StringEncoder(Charset cs, String rcn) {
 280             this.requestedCharsetName = rcn;
 281             this.cs = cs;
 282             this.ce = cs.newEncoder()
 283                 .onMalformedInput(CodingErrorAction.REPLACE)
 284                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
 285             this.isTrusted = (cs.getClass().getClassLoader0() == null);
 286         }
 287 
 288         String charsetName() {
 289             if (cs instanceof HistoricallyNamedCharset)
 290                 return ((HistoricallyNamedCharset)cs).historicalName();
 291             return cs.name();
 292         }
 293 
 294         final String requestedCharsetName() {
 295             return requestedCharsetName;
 296         }
 297 
 298         byte[] encode(char[] ca, int off, int len) {
 299             int en = scale(len, ce.maxBytesPerChar());
 300             byte[] ba = new byte[en];
 301             if (len == 0)
 302                 return ba;
 303             if (ce instanceof ArrayEncoder) {
 304                 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
 305                 return safeTrim(ba, blen, cs, isTrusted);
 306             } else {
 307                 ce.reset();
 308                 ByteBuffer bb = ByteBuffer.wrap(ba);
 309                 CharBuffer cb = CharBuffer.wrap(ca, off, len);
 310                 try {
 311                     CoderResult cr = ce.encode(cb, bb, true);
 312                     if (!cr.isUnderflow())
 313                         cr.throwException();
 314                     cr = ce.flush(bb);
 315                     if (!cr.isUnderflow())
 316                         cr.throwException();
 317                 } catch (CharacterCodingException x) {
 318                     // Substitution is always enabled,
 319                     // so this shouldn't happen
 320                     throw new Error(x);
 321                 }
 322                 return safeTrim(ba, bb.position(), cs, isTrusted);
 323             }
 324         }
 325     }
 326 
 327     static byte[] encode(String charsetName, char[] ca, int off, int len)
 328         throws UnsupportedEncodingException
 329     {
 330         StringEncoder se = deref(encoder);
 331         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 332         if ((se == null) || !(csn.equals(se.requestedCharsetName())
 333                               || csn.equals(se.charsetName()))) {
 334             se = null;
 335             try {
 336                 Charset cs = lookupCharset(csn);
 337                 if (cs != null)
 338                     se = new StringEncoder(cs, csn);
 339             } catch (IllegalCharsetNameException x) {}
 340             if (se == null)
 341                 throw new UnsupportedEncodingException (csn);
 342             set(encoder, se);
 343         }
 344         return se.encode(ca, off, len);
 345     }
 346 
 347     static byte[] encode(Charset cs, char[] ca, int off, int len) {
 348         CharsetEncoder ce = cs.newEncoder();
 349         int en = scale(len, ce.maxBytesPerChar());
 350         byte[] ba = new byte[en];
 351         if (len == 0)
 352             return ba;
 353         boolean isTrusted = false;
 354         if (System.getSecurityManager() != null) {
 355             if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
 356                 ca =  Arrays.copyOfRange(ca, off, off + len);
 357                 off = 0;
 358             }
 359         }
 360         ce.onMalformedInput(CodingErrorAction.REPLACE)
 361           .onUnmappableCharacter(CodingErrorAction.REPLACE)
 362           .reset();
 363         if (ce instanceof ArrayEncoder) {
 364             int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
 365             return safeTrim(ba, blen, cs, isTrusted);
 366         } else {
 367             ByteBuffer bb = ByteBuffer.wrap(ba);
 368             CharBuffer cb = CharBuffer.wrap(ca, off, len);
 369             try {
 370                 CoderResult cr = ce.encode(cb, bb, true);
 371                 if (!cr.isUnderflow())
 372                     cr.throwException();
 373                 cr = ce.flush(bb);
 374                 if (!cr.isUnderflow())
 375                     cr.throwException();
 376             } catch (CharacterCodingException x) {
 377                 throw new Error(x);
 378             }
 379             return safeTrim(ba, bb.position(), cs, isTrusted);
 380         }
 381     }
 382 
 383     static byte[] encode(char[] ca, int off, int len) {
 384         String csn = Charset.defaultCharset().name();
 385         try {
 386             // use charset name encode() variant which provides caching.
 387             return encode(csn, ca, off, len);
 388         } catch (UnsupportedEncodingException x) {
 389             warnUnsupportedCharset(csn);
 390         }
 391         try {
 392             return encode("ISO-8859-1", ca, off, len);
 393         } catch (UnsupportedEncodingException x) {
 394             // If this code is hit during VM initialization, MessageUtils is
 395             // the only way we will be able to get any kind of error message.
 396             MessageUtils.err("ISO-8859-1 charset not available: "
 397                              + x.toString());
 398             // If we can not find ISO-8859-1 (a required encoding) then things
 399             // are seriously wrong with the installation.
 400             System.exit(1);
 401             return null;
 402         }
 403     }
 404 }