1 /*
   2  * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.UnsupportedEncodingException;
  29 import java.lang.ref.SoftReference;
  30 import java.nio.ByteBuffer;
  31 import java.nio.CharBuffer;
  32 import java.nio.charset.Charset;
  33 import java.nio.charset.CharsetDecoder;
  34 import java.nio.charset.CharsetEncoder;
  35 import java.nio.charset.CharacterCodingException;
  36 import java.nio.charset.CoderResult;
  37 import java.nio.charset.CodingErrorAction;
  38 import java.nio.charset.IllegalCharsetNameException;
  39 import java.nio.charset.UnsupportedCharsetException;
  40 import java.util.Arrays;
  41 import sun.misc.MessageUtils;
  42 import sun.nio.cs.HistoricallyNamedCharset;
  43 import sun.nio.cs.ArrayDecoder;
  44 import sun.nio.cs.ArrayEncoder;
  45 
  46 /**
  47  * Utility class for string encoding and decoding.
  48  */
  49 
  50 class StringCoding {
  51 
  52     private StringCoding() { }
  53 
  54     /** The cached coders for each thread */
  55     private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
  56         new ThreadLocal<>();
  57     private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
  58         new ThreadLocal<>();
  59 
  60     private static boolean warnUnsupportedCharset = true;
  61 
  62     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
  63         SoftReference<T> sr = tl.get();
  64         if (sr == null)
  65             return null;
  66         return sr.get();
  67     }
  68 
  69     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
  70         tl.set(new SoftReference<>(ob));
  71     }
  72 
  73     // Trim the given byte array to the given length
  74     //
  75     private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
  76         if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
  77             return ba;
  78         else
  79             return Arrays.copyOf(ba, len);
  80     }
  81 
  82     // Trim the given char array to the given length
  83     //
  84     private static char[] safeTrim(char[] ca, int len,
  85                                    Charset cs, boolean isTrusted) {
  86         if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
  87             return ca;
  88         else
  89             return Arrays.copyOf(ca, len);
  90     }
  91 
  92     private static int scale(int len, float expansionFactor) {
  93         // We need to perform double, not float, arithmetic; otherwise
  94         // we lose low order bits when len is larger than 2**24.
  95         return (int)(len * (double)expansionFactor);
  96     }
  97 
  98     private static Charset lookupCharset(String csn) {
  99         if (Charset.isSupported(csn)) {
 100             try {
 101                 return Charset.forName(csn);
 102             } catch (UnsupportedCharsetException x) {
 103                 throw new Error(x);
 104             }
 105         }
 106         return null;
 107     }
 108 
 109     private static void warnUnsupportedCharset(String csn) {
 110         if (warnUnsupportedCharset) {
 111             // Use sun.misc.MessageUtils rather than the Logging API or
 112             // System.err since this method may be called during VM
 113             // initialization before either is available.
 114             MessageUtils.err("WARNING: Default charset " + csn +
 115                              " not supported, using ISO-8859-1 instead");
 116             warnUnsupportedCharset = false;
 117         }
 118     }
 119 
 120 
 121     // -- Decoding --
 122     private static class StringDecoder {
 123         private final String requestedCharsetName;
 124         private final Charset cs;
 125         private final CharsetDecoder cd;
 126         private final boolean isTrusted;
 127 
 128         private StringDecoder(Charset cs, String rcn) {
 129             this.requestedCharsetName = rcn;
 130             this.cs = cs;
 131             this.cd = cs.newDecoder()
 132                 .onMalformedInput(CodingErrorAction.REPLACE)
 133                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
 134             this.isTrusted = (cs.getClass().getClassLoader0() == null);
 135         }
 136 
 137         String charsetName() {
 138             if (cs instanceof HistoricallyNamedCharset)
 139                 return ((HistoricallyNamedCharset)cs).historicalName();
 140             return cs.name();
 141         }
 142 
 143         final String requestedCharsetName() {
 144             return requestedCharsetName;
 145         }
 146 
 147         char[] decode(byte[] ba, int off, int len) {
 148             int en = scale(len, cd.maxCharsPerByte());
 149             char[] ca = new char[en];
 150             if (len == 0)
 151                 return ca;
 152             if (cd instanceof ArrayDecoder) {
 153                 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
 154                 return safeTrim(ca, clen, cs, isTrusted);
 155             } else {
 156                 cd.reset();
 157                 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
 158                 CharBuffer cb = CharBuffer.wrap(ca);
 159                 try {
 160                     CoderResult cr = cd.decode(bb, cb, true);
 161                     if (!cr.isUnderflow())
 162                         cr.throwException();
 163                     cr = cd.flush(cb);
 164                     if (!cr.isUnderflow())
 165                         cr.throwException();
 166                 } catch (CharacterCodingException x) {
 167                     // Substitution is always enabled,
 168                     // so this shouldn't happen
 169                     throw new Error(x);
 170                 }
 171                 return safeTrim(ca, cb.position(), cs, isTrusted);
 172             }
 173         }
 174     }
 175 
 176     static char[] decode(String charsetName, byte[] ba, int off, int len)
 177         throws UnsupportedEncodingException
 178     {
 179         StringDecoder sd = deref(decoder);
 180         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 181         if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
 182                               || csn.equals(sd.charsetName()))) {
 183             sd = null;
 184             try {
 185                 Charset cs = lookupCharset(csn);
 186                 if (cs != null)
 187                     sd = new StringDecoder(cs, csn);
 188             } catch (IllegalCharsetNameException x) {}
 189             if (sd == null)
 190                 throw new UnsupportedEncodingException(csn);
 191             set(decoder, sd);
 192         }
 193         return sd.decode(ba, off, len);
 194     }
 195 
 196     static char[] decode(Charset cs, byte[] ba, int off, int len) {
 197         // (1)We never cache the "external" cs, the only benefit of creating
 198         // an additional StringDe/Encoder object to wrap it is to share the
 199         // de/encode() method. These SD/E objects are short-lived, the young-gen
 200         // gc should be able to take care of them well. But the best approach
 201         // is still not to generate them if not really necessary.
 202         // (2)The defensive copy of the input byte/char[] has a big performance
 203         // impact, as well as the outgoing result byte/char[]. Need to do the
 204         // optimization check of (sm==null && classLoader0==null) for both.
 205         // (3)getClass().getClassLoader0() is expensive
 206         // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
 207         // is only checked (and then isTrusted gets set) when (SM==null). It is
 208         // possible that the SM==null for now but then SM is NOT null later
 209         // when safeTrim() is invoked...the "safe" way to do is to redundant
 210         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
 211         // but it then can be argued that the SM is null when the operation
 212         // is started...
 213         CharsetDecoder cd = cs.newDecoder();
 214         int en = scale(len, cd.maxCharsPerByte());
 215         char[] ca = new char[en];
 216         if (len == 0)
 217             return ca;
 218         
 219         boolean isTrusted = isTrusted(cs);
 220         if (!isTrusted) {
 221             ba =  Arrays.copyOfRange(ba, off, off + len);
 222             off = 0;
 223         }
 224 
 225         setupDecoder(cd);
 226 
 227         if (cd instanceof ArrayDecoder) {
 228             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
 229             return safeTrim(ca, clen, cs, isTrusted);
 230         } else {
 231             ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
 232             return performDecode(cs, isTrusted, ca, cd, bb);
 233         }
 234     }
 235     
 236     static char[] decode(Charset cs, ByteBuffer bb, int off, int len) {
 237         // See comment at top of decode(Charset,byte[],int,int)
 238         CharsetDecoder cd = cs.newDecoder();
 239         int en = scale(len, cd.maxCharsPerByte());
 240         char[] ca = new char[en];
 241         if (len == 0)
 242             return ca;
 243 
 244         boolean isTrusted = isTrusted(cs);
 245         if (!isTrusted) {
 246             // setup the bytebuffer for copying
 247             ByteBuffer originalByteBuffer = bb;
 248             int originalPosition = originalByteBuffer.position();
 249             int originalLimit = originalByteBuffer.limit();
 250             originalByteBuffer.position(off)
 251                               .limit(off + len);
 252 
 253             // copy the bytebuffer
 254             bb = ByteBuffer.allocateDirect(len)
 255                            .put(originalByteBuffer);
 256             bb.position(0);
 257             
 258             // reset the original bytebuffer
 259             originalByteBuffer.position(originalPosition)
 260                               .limit(originalLimit);
 261 
 262             off = 0;
 263         }
 264 
 265         setupDecoder(cd);
 266 
 267         return performDecode(cs, isTrusted, ca, cd, bb);
 268     }
 269 
 270     private static boolean isTrusted(Charset cs) {
 271         return System.getSecurityManager() != null
 272             && cs.getClass().getClassLoader0() == null;
 273     }
 274 
 275     private static void setupDecoder(CharsetDecoder cd) {
 276         cd.onMalformedInput(CodingErrorAction.REPLACE)
 277           .onUnmappableCharacter(CodingErrorAction.REPLACE)
 278           .reset();
 279     }
 280 
 281     static char[] performDecode(
 282         Charset cs, boolean isTrusted, char[] ca, 
 283         CharsetDecoder cd, ByteBuffer bb) {
 284 
 285         CharBuffer cb = CharBuffer.wrap(ca);
 286         try {
 287             CoderResult cr = cd.decode(bb, cb, true);
 288             if (!cr.isUnderflow())
 289                 cr.throwException();
 290             cr = cd.flush(cb);
 291             if (!cr.isUnderflow())
 292                 cr.throwException();
 293         } catch (CharacterCodingException x) {
 294             // Substitution is always enabled,
 295             // so this shouldn't happen
 296             throw new Error(x);
 297         }
 298         return safeTrim(ca, cb.position(), cs, isTrusted);
 299     }
 300 
 301     static char[] decode(byte[] ba, int off, int len) {
 302         String csn = Charset.defaultCharset().name();
 303         try {
 304             // use charset name decode() variant which provides caching.
 305             return decode(csn, ba, off, len);
 306         } catch (UnsupportedEncodingException x) {
 307             warnUnsupportedCharset(csn);
 308         }
 309         try {
 310             return decode("ISO-8859-1", ba, off, len);
 311         } catch (UnsupportedEncodingException x) {
 312             // If this code is hit during VM initialization, MessageUtils is
 313             // the only way we will be able to get any kind of error message.
 314             MessageUtils.err("ISO-8859-1 charset not available: "
 315                              + x.toString());
 316             // If we can not find ISO-8859-1 (a required encoding) then things
 317             // are seriously wrong with the installation.
 318             System.exit(1);
 319             return null;
 320         }
 321     }
 322 
 323     // -- Encoding --
 324     private static class StringEncoder {
 325         private Charset cs;
 326         private CharsetEncoder ce;
 327         private final String requestedCharsetName;
 328         private final boolean isTrusted;
 329 
 330         private StringEncoder(Charset cs, String rcn) {
 331             this.requestedCharsetName = rcn;
 332             this.cs = cs;
 333             this.ce = cs.newEncoder()
 334                 .onMalformedInput(CodingErrorAction.REPLACE)
 335                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
 336             this.isTrusted = (cs.getClass().getClassLoader0() == null);
 337         }
 338 
 339         String charsetName() {
 340             if (cs instanceof HistoricallyNamedCharset)
 341                 return ((HistoricallyNamedCharset)cs).historicalName();
 342             return cs.name();
 343         }
 344 
 345         final String requestedCharsetName() {
 346             return requestedCharsetName;
 347         }
 348 
 349         byte[] encode(char[] ca, int off, int len) {
 350             int en = scale(len, ce.maxBytesPerChar());
 351             byte[] ba = new byte[en];
 352             if (len == 0)
 353                 return ba;
 354             if (ce instanceof ArrayEncoder) {
 355                 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
 356                 return safeTrim(ba, blen, cs, isTrusted);
 357             } else {
 358                 ce.reset();
 359                 ByteBuffer bb = ByteBuffer.wrap(ba);
 360                 CharBuffer cb = CharBuffer.wrap(ca, off, len);
 361                 try {
 362                     CoderResult cr = ce.encode(cb, bb, true);
 363                     if (!cr.isUnderflow())
 364                         cr.throwException();
 365                     cr = ce.flush(bb);
 366                     if (!cr.isUnderflow())
 367                         cr.throwException();
 368                 } catch (CharacterCodingException x) {
 369                     // Substitution is always enabled,
 370                     // so this shouldn't happen
 371                     throw new Error(x);
 372                 }
 373                 return safeTrim(ba, bb.position(), cs, isTrusted);
 374             }
 375         }
 376     }
 377 
 378     static byte[] encode(String charsetName, char[] ca, int off, int len)
 379         throws UnsupportedEncodingException
 380     {
 381         StringEncoder se = deref(encoder);
 382         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
 383         if ((se == null) || !(csn.equals(se.requestedCharsetName())
 384                               || csn.equals(se.charsetName()))) {
 385             se = null;
 386             try {
 387                 Charset cs = lookupCharset(csn);
 388                 if (cs != null)
 389                     se = new StringEncoder(cs, csn);
 390             } catch (IllegalCharsetNameException x) {}
 391             if (se == null)
 392                 throw new UnsupportedEncodingException (csn);
 393             set(encoder, se);
 394         }
 395         return se.encode(ca, off, len);
 396     }
 397 
 398     static byte[] encode(Charset cs, char[] ca, int off, int len) {
 399         CharsetEncoder ce = cs.newEncoder();
 400         int en = scale(len, ce.maxBytesPerChar());
 401         byte[] ba = new byte[en];
 402         if (len == 0)
 403             return ba;
 404         boolean isTrusted = isTrusted(cs);
 405         if (!isTrusted) {
 406             ca =  Arrays.copyOfRange(ca, off, off + len);
 407             off = 0;
 408         }
 409         setupEncoder(ce);
 410         if (ce instanceof ArrayEncoder) {
 411             int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
 412             return safeTrim(ba, blen, cs, isTrusted);
 413         } else {
 414             ByteBuffer bb = ByteBuffer.wrap(ba);
 415             CharBuffer cb = CharBuffer.wrap(ca, off, len);
 416             try {
 417                 CoderResult cr = ce.encode(cb, bb, true);
 418                 if (!cr.isUnderflow())
 419                     cr.throwException();
 420                 cr = ce.flush(bb);
 421                 if (!cr.isUnderflow())
 422                     cr.throwException();
 423             } catch (CharacterCodingException x) {
 424                 throw new Error(x);
 425             }
 426             return safeTrim(ba, bb.position(), cs, isTrusted);
 427         }
 428     }
 429     
 430     static int encode(Charset cs, char[] ca, int off, int len, byte[] destBuffer, int destOffset) {
 431         ByteBuffer bb = ByteBuffer.wrap(destBuffer, destOffset, destBuffer.length - destOffset);
 432         return encode(cs, ca, off, len, bb) - destOffset;
 433     }
 434 
 435     static int encode(Charset cs, char[] ca, int off, int len, ByteBuffer destBuffer, int destOffset) {
 436         int originalPosition = destBuffer.position();
 437         destBuffer.position(destOffset);
 438         try {
 439             return encode(cs, ca, off, len, destBuffer) - destOffset;
 440         } finally {
 441             destBuffer.position(originalPosition);
 442         }
 443     }
 444 
 445     private static int encode(Charset cs, char[] ca, int off, int len, ByteBuffer destBuffer) {
 446         CharsetEncoder ce = cs.newEncoder();
 447         CharBuffer cb = CharBuffer.wrap(ca, off, len);
 448         try {
 449             CoderResult cr = ce.encode(cb, destBuffer, true);
 450             if (!cr.isUnderflow())
 451                 cr.throwException();
 452             cr = ce.flush(destBuffer);
 453             if (!cr.isUnderflow())
 454                 cr.throwException();
 455         } catch (CharacterCodingException x) {
 456             throw new Error(x);
 457         }
 458         return destBuffer.position();
 459     }
 460 
 461     private static void setupEncoder(CharsetEncoder encoder) {
 462         encoder.onMalformedInput(CodingErrorAction.REPLACE)
 463                .onUnmappableCharacter(CodingErrorAction.REPLACE)
 464                .reset();
 465     }
 466 
 467     static byte[] encode(char[] ca, int off, int len) {
 468         String csn = Charset.defaultCharset().name();
 469         try {
 470             // use charset name encode() variant which provides caching.
 471             return encode(csn, ca, off, len);
 472         } catch (UnsupportedEncodingException x) {
 473             warnUnsupportedCharset(csn);
 474         }
 475         try {
 476             return encode("ISO-8859-1", ca, off, len);
 477         } catch (UnsupportedEncodingException x) {
 478             // If this code is hit during VM initialization, MessageUtils is
 479             // the only way we will be able to get any kind of error message.
 480             MessageUtils.err("ISO-8859-1 charset not available: "
 481                              + x.toString());
 482             // If we can not find ISO-8859-1 (a required encoding) then things
 483             // are seriously wrong with the installation.
 484             System.exit(1);
 485             return null;
 486         }
 487     }
 488 }