/* * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package java.lang; import java.io.UnsupportedEncodingException; import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CharacterCodingException; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.UnsupportedCharsetException; import java.util.Arrays; import sun.misc.MessageUtils; import sun.nio.cs.HistoricallyNamedCharset; import sun.nio.cs.ArrayDecoder; import sun.nio.cs.ArrayEncoder; /** * Utility class for string encoding and decoding. */ class StringCoding { private StringCoding() { } /** The cached coders for each thread */ private final static ThreadLocal> decoder = new ThreadLocal<>(); private final static ThreadLocal> encoder = new ThreadLocal<>(); private static boolean warnUnsupportedCharset = true; private static T deref(ThreadLocal> tl) { SoftReference sr = tl.get(); if (sr == null) return null; return sr.get(); } private static void set(ThreadLocal> tl, T ob) { tl.set(new SoftReference<>(ob)); } // Trim the given byte array to the given length // private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) { if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) return ba; else return Arrays.copyOf(ba, len); } // Trim the given char array to the given length // private static char[] safeTrim(char[] ca, int len, Charset cs, boolean isTrusted) { if (len == ca.length && (isTrusted || System.getSecurityManager() == null)) return ca; else return Arrays.copyOf(ca, len); } private static int scale(int len, float expansionFactor) { // We need to perform double, not float, arithmetic; otherwise // we lose low order bits when len is larger than 2**24. return (int)(len * (double)expansionFactor); } private static Charset lookupCharset(String csn) { if (Charset.isSupported(csn)) { try { return Charset.forName(csn); } catch (UnsupportedCharsetException x) { throw new Error(x); } } return null; } private static void warnUnsupportedCharset(String csn) { if (warnUnsupportedCharset) { // Use sun.misc.MessageUtils rather than the Logging API or // System.err since this method may be called during VM // initialization before either is available. MessageUtils.err("WARNING: Default charset " + csn + " not supported, using ISO-8859-1 instead"); warnUnsupportedCharset = false; } } // -- Decoding -- private static class StringDecoder { private final String requestedCharsetName; private final Charset cs; private final CharsetDecoder cd; private final boolean isTrusted; private StringDecoder(Charset cs, String rcn) { this.requestedCharsetName = rcn; this.cs = cs; this.cd = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); this.isTrusted = (cs.getClass().getClassLoader0() == null); } String charsetName() { if (cs instanceof HistoricallyNamedCharset) return ((HistoricallyNamedCharset)cs).historicalName(); return cs.name(); } final String requestedCharsetName() { return requestedCharsetName; } char[] decode(byte[] ba, int off, int len) { int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; if (len == 0) return ca; if (cd instanceof ArrayDecoder) { int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); return safeTrim(ca, clen, cs, isTrusted); } else { cd.reset(); ByteBuffer bb = ByteBuffer.wrap(ba, off, len); CharBuffer cb = CharBuffer.wrap(ca); try { CoderResult cr = cd.decode(bb, cb, true); if (!cr.isUnderflow()) cr.throwException(); cr = cd.flush(cb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return safeTrim(ca, cb.position(), cs, isTrusted); } } } static char[] decode(String charsetName, byte[] ba, int off, int len) throws UnsupportedEncodingException { StringDecoder sd = deref(decoder); String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) || csn.equals(sd.charsetName()))) { sd = null; try { Charset cs = lookupCharset(csn); if (cs != null) sd = new StringDecoder(cs, csn); } catch (IllegalCharsetNameException x) {} if (sd == null) throw new UnsupportedEncodingException(csn); set(decoder, sd); } return sd.decode(ba, off, len); } static char[] decode(Charset cs, byte[] ba, int off, int len) { // (1)We never cache the "external" cs, the only benefit of creating // an additional StringDe/Encoder object to wrap it is to share the // de/encode() method. These SD/E objects are short-lived, the young-gen // gc should be able to take care of them well. But the best approach // is still not to generate them if not really necessary. // (2)The defensive copy of the input byte/char[] has a big performance // impact, as well as the outgoing result byte/char[]. Need to do the // optimization check of (sm==null && classLoader0==null) for both. // (3)getClass().getClassLoader0() is expensive // (4)There might be a timing gap in isTrusted setting. getClassLoader0() // is only checked (and then isTrusted gets set) when (SM==null). It is // possible that the SM==null for now but then SM is NOT null later // when safeTrim() is invoked...the "safe" way to do is to redundant // check (... && (isTrusted || SM == null || getClassLoader0())) in trim // but it then can be argued that the SM is null when the operation // is started... CharsetDecoder cd = cs.newDecoder(); int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; if (len == 0) return ca; boolean isTrusted = isTrusted(cs); if (!isTrusted) { ba = Arrays.copyOfRange(ba, off, off + len); off = 0; } setupDecoder(cd); if (cd instanceof ArrayDecoder) { int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); return safeTrim(ca, clen, cs, isTrusted); } else { ByteBuffer bb = ByteBuffer.wrap(ba, off, len); return performDecode(cs, isTrusted, ca, cd, bb); } } static char[] decode(Charset cs, ByteBuffer bb, int off, int len) { // See comment at top of decode(Charset,byte[],int,int) CharsetDecoder cd = cs.newDecoder(); int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; if (len == 0) return ca; boolean isTrusted = isTrusted(cs); if (!isTrusted) { // setup the bytebuffer for copying ByteBuffer originalByteBuffer = bb; int originalPosition = originalByteBuffer.position(); int originalLimit = originalByteBuffer.limit(); originalByteBuffer.position(off) .limit(off + len); // copy the bytebuffer bb = ByteBuffer.allocateDirect(len) .put(originalByteBuffer); bb.position(0); // reset the original bytebuffer originalByteBuffer.position(originalPosition) .limit(originalLimit); off = 0; } setupDecoder(cd); return performDecode(cs, isTrusted, ca, cd, bb); } private static boolean isTrusted(Charset cs) { return System.getSecurityManager() != null && cs.getClass().getClassLoader0() == null; } private static void setupDecoder(CharsetDecoder cd) { cd.onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .reset(); } static char[] performDecode( Charset cs, boolean isTrusted, char[] ca, CharsetDecoder cd, ByteBuffer bb) { CharBuffer cb = CharBuffer.wrap(ca); try { CoderResult cr = cd.decode(bb, cb, true); if (!cr.isUnderflow()) cr.throwException(); cr = cd.flush(cb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return safeTrim(ca, cb.position(), cs, isTrusted); } static char[] decode(byte[] ba, int off, int len) { String csn = Charset.defaultCharset().name(); try { // use charset name decode() variant which provides caching. return decode(csn, ba, off, len); } catch (UnsupportedEncodingException x) { warnUnsupportedCharset(csn); } try { return decode("ISO-8859-1", ba, off, len); } catch (UnsupportedEncodingException x) { // If this code is hit during VM initialization, MessageUtils is // the only way we will be able to get any kind of error message. MessageUtils.err("ISO-8859-1 charset not available: " + x.toString()); // If we can not find ISO-8859-1 (a required encoding) then things // are seriously wrong with the installation. System.exit(1); return null; } } // -- Encoding -- private static class StringEncoder { private Charset cs; private CharsetEncoder ce; private final String requestedCharsetName; private final boolean isTrusted; private StringEncoder(Charset cs, String rcn) { this.requestedCharsetName = rcn; this.cs = cs; this.ce = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); this.isTrusted = (cs.getClass().getClassLoader0() == null); } String charsetName() { if (cs instanceof HistoricallyNamedCharset) return ((HistoricallyNamedCharset)cs).historicalName(); return cs.name(); } final String requestedCharsetName() { return requestedCharsetName; } byte[] encode(char[] ca, int off, int len) { int en = scale(len, ce.maxBytesPerChar()); byte[] ba = new byte[en]; if (len == 0) return ba; if (ce instanceof ArrayEncoder) { int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); return safeTrim(ba, blen, cs, isTrusted); } else { ce.reset(); ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca, off, len); try { CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) cr.throwException(); cr = ce.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return safeTrim(ba, bb.position(), cs, isTrusted); } } } static byte[] encode(String charsetName, char[] ca, int off, int len) throws UnsupportedEncodingException { StringEncoder se = deref(encoder); String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; if ((se == null) || !(csn.equals(se.requestedCharsetName()) || csn.equals(se.charsetName()))) { se = null; try { Charset cs = lookupCharset(csn); if (cs != null) se = new StringEncoder(cs, csn); } catch (IllegalCharsetNameException x) {} if (se == null) throw new UnsupportedEncodingException (csn); set(encoder, se); } return se.encode(ca, off, len); } static byte[] encode(Charset cs, char[] ca, int off, int len) { CharsetEncoder ce = cs.newEncoder(); int en = scale(len, ce.maxBytesPerChar()); byte[] ba = new byte[en]; if (len == 0) return ba; boolean isTrusted = false; if (System.getSecurityManager() != null) { if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) { ca = Arrays.copyOfRange(ca, off, off + len); off = 0; } } ce.onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .reset(); if (ce instanceof ArrayEncoder) { int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); return safeTrim(ba, blen, cs, isTrusted); } else { ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca, off, len); try { CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) cr.throwException(); cr = ce.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { throw new Error(x); } return safeTrim(ba, bb.position(), cs, isTrusted); } } static byte[] encode(char[] ca, int off, int len) { String csn = Charset.defaultCharset().name(); try { // use charset name encode() variant which provides caching. return encode(csn, ca, off, len); } catch (UnsupportedEncodingException x) { warnUnsupportedCharset(csn); } try { return encode("ISO-8859-1", ca, off, len); } catch (UnsupportedEncodingException x) { // If this code is hit during VM initialization, MessageUtils is // the only way we will be able to get any kind of error message. MessageUtils.err("ISO-8859-1 charset not available: " + x.toString()); // If we can not find ISO-8859-1 (a required encoding) then things // are seriously wrong with the installation. System.exit(1); return null; } } }