src/java.base/share/classes/java/lang/StringCoder.java

Print this page

        

*** 1,7 **** /* ! * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this --- 1,7 ---- /* ! * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this
*** 51,87 **** import static java.lang.Character.highSurrogate; import static java.lang.Character.lowSurrogate; import static java.lang.Character.isSupplementaryCodePoint; import static java.lang.StringUTF16.putChar; /** * Utility class for string encoding and decoding. */ ! class StringCoding { ! ! private StringCoding() { } ! /** The cached coders for each thread */ ! private static final ThreadLocal<SoftReference<StringDecoder>> decoder = ! new ThreadLocal<>(); ! private static final ThreadLocal<SoftReference<StringEncoder>> encoder = new ThreadLocal<>(); private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE; private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE; private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE; ! private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { ! SoftReference<T> sr = tl.get(); if (sr == null) return null; return sr.get(); } ! private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { ! tl.set(new SoftReference<>(ob)); } // Trim the given byte array to the given length private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) { if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) --- 51,87 ---- import static java.lang.Character.highSurrogate; import static java.lang.Character.lowSurrogate; import static java.lang.Character.isSupplementaryCodePoint; import static java.lang.StringUTF16.putChar; + //import static java.lang.StringCoding.Result; + /** * Utility class for string encoding and decoding. */ ! class StringCoder { ! /** The cached coder for each thread */ ! private static final ThreadLocal<SoftReference<StringCoder>> cachedCoder = new ThreadLocal<>(); private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE; private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE; private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE; ! // private static StringCoder ssc = new UTF_8(cs, csn); ! ! private static StringCoder deref() { ! SoftReference<StringCoder> sr = cachedCoder.get(); if (sr == null) return null; return sr.get(); } ! private static void setref(StringCoder sc) { ! cachedCoder.set(new SoftReference<>(sc)); } // Trim the given byte array to the given length private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) { if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
*** 94,112 **** // We need to perform double, not float, arithmetic; otherwise // we lose low order bits when len is larger than 2**24. return (int)(len * (double)expansionFactor); } ! private static Charset lookupCharset(String csn) { if (Charset.isSupported(csn)) { try { ! return Charset.forName(csn); } catch (UnsupportedCharsetException x) { throw new Error(x); } } ! return null; } static class Result { byte[] value; byte coder; --- 94,146 ---- // We need to perform double, not float, arithmetic; otherwise // we lose low order bits when len is larger than 2**24. return (int)(len * (double)expansionFactor); } ! private static StringCoder newCoder(Charset cs) { ! if (cs == UTF_8) ! return new SC_UTF_8(cs, cs.name()); ! if (cs == ISO_8859_1) ! return new SC_8859_1(cs, cs.name()); ! if (cs == US_ASCII) ! return new SC_ASCII(cs, cs.name()); ! return new StringCoder(cs, cs.name()); ! } ! ! private static StringCoder getCoder(Charset cs) { ! // cache any cs implements HNC ! if (cs instanceof HistoricallyNamedCharset) { ! StringCoder sc = deref(); ! if (sc != null && sc.cs == cs) ! return sc; ! setref(newCoder(cs)); ! return sc; ! } ! return null; ! } ! ! private static StringCoder getCoder(String csn) ! throws UnsupportedEncodingException { ! ! StringCoder sc = deref(); ! if ((sc == null) || !(csn.equals(sc.requestedCharsetName()) || ! csn.equals(sc.charsetName()))) { ! sc = null; ! try { if (Charset.isSupported(csn)) { try { ! sc = newCoder(Charset.forName(csn)); } catch (UnsupportedCharsetException x) { throw new Error(x); } } ! } catch (IllegalCharsetNameException x) {} ! if (sc == null) ! throw new UnsupportedEncodingException(csn); ! setref(sc); ! } ! return sc; } static class Result { byte[] value; byte coder;
*** 146,185 **** } } return false; } ! // -- Decoding -- ! static class StringDecoder { ! private final String requestedCharsetName; ! private final Charset cs; ! private final boolean isASCIICompatible; ! private final CharsetDecoder cd; ! protected final Result result; ! StringDecoder(Charset cs, String rcn) { ! this.requestedCharsetName = rcn; this.cs = cs; ! this.cd = cs.newDecoder() ! .onMalformedInput(CodingErrorAction.REPLACE) ! .onUnmappableCharacter(CodingErrorAction.REPLACE); this.result = new Result(); ! this.isASCIICompatible = (cd instanceof ArrayDecoder) && ! ((ArrayDecoder)cd).isASCIICompatible(); } ! String charsetName() { if (cs instanceof HistoricallyNamedCharset) return ((HistoricallyNamedCharset)cs).historicalName(); return cs.name(); } final String requestedCharsetName() { return requestedCharsetName; } ! Result decode(byte[] ba, int off, int len) { if (len == 0) { return result.with(); } // fastpath for ascii compatible if (isASCIICompatible && !hasNegatives(ba, off, len)) { --- 180,240 ---- } } return false; } ! //////////////////////////////////////////////////////////////////////// ! final Charset cs; ! final String requestedCharsetName; ! final boolean isASCIICompatible; ! final Result result; ! final boolean isTrusted; ! final CharsetDecoder dec; ! CharsetEncoder enc; ! ! private StringCoder(Charset cs, ! String rcn, ! CharsetDecoder dec, ! CharsetEncoder enc) { this.cs = cs; ! this.requestedCharsetName = rcn; ! this.dec = dec; ! this.enc = enc; ! this.isTrusted = (cs.getClass().getClassLoader0() == null); this.result = new Result(); ! if (dec != null && enc != null) { ! this.isASCIICompatible = (dec instanceof ArrayDecoder) && ! ((ArrayDecoder)dec).isASCIICompatible(); ! assert(isASCIICompatible == ! (enc instanceof ArrayEncoder) && ((ArrayEncoder)enc).isASCIICompatible()); ! } else { ! this.isASCIICompatible = true; ! } ! } ! ! private StringCoder(Charset cs, String rcn) { ! this(cs, rcn, ! cs.newDecoder() ! .onMalformedInput(CodingErrorAction.REPLACE) ! .onUnmappableCharacter(CodingErrorAction.REPLACE), ! cs.newEncoder() ! .onMalformedInput(CodingErrorAction.REPLACE) ! .onUnmappableCharacter(CodingErrorAction.REPLACE)); } ! final String charsetName() { if (cs instanceof HistoricallyNamedCharset) return ((HistoricallyNamedCharset)cs).historicalName(); return cs.name(); } final String requestedCharsetName() { return requestedCharsetName; } ! Result decode0(byte[] ba, int off, int len) { if (len == 0) { return result.with(); } // fastpath for ascii compatible if (isASCIICompatible && !hasNegatives(ba, off, len)) {
*** 188,526 **** LATIN1); } else { return result.with(StringLatin1.inflate(ba, off, len), UTF16); } } ! int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; ! if (cd instanceof ArrayDecoder) { ! int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); return result.with(ca, 0, clen); } ! cd.reset(); ByteBuffer bb = ByteBuffer.wrap(ba, off, len); CharBuffer cb = CharBuffer.wrap(ca); try { ! CoderResult cr = cd.decode(bb, cb, true); if (!cr.isUnderflow()) cr.throwException(); ! cr = cd.flush(cb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return result.with(ca, 0, cb.position()); } - } ! static Result decode(String charsetName, byte[] ba, int off, int len) ! throws UnsupportedEncodingException ! { ! StringDecoder sd = deref(decoder); ! String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; ! if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) ! || csn.equals(sd.charsetName()))) { ! sd = null; ! try { ! Charset cs = lookupCharset(csn); ! if (cs != null) { ! if (cs == UTF_8) { ! return decodeUTF8(ba, off, len, true); } ! if (cs == ISO_8859_1) { ! return decodeLatin1(ba, off, len); } ! if (cs == US_ASCII) { ! return decodeASCII(ba, off, len); } - sd = new StringDecoder(cs, csn); } ! } catch (IllegalCharsetNameException x) {} ! if (sd == null) ! throw new UnsupportedEncodingException(csn); ! set(decoder, sd); } ! return sd.decode(ba, off, len); } ! static Result decode(Charset cs, byte[] ba, int off, int len) { ! if (cs == UTF_8) { ! return decodeUTF8(ba, off, len, true); } ! if (cs == ISO_8859_1) { ! return decodeLatin1(ba, off, len); } ! if (cs == US_ASCII) { ! return decodeASCII(ba, off, len); } ! // (1)We never cache the "external" cs, the only benefit of creating ! // an additional StringDe/Encoder object to wrap it is to share the ! // de/encode() method. These SD/E objects are short-lived, the young-gen ! // gc should be able to take care of them well. But the best approach ! // is still not to generate them if not really necessary. ! // (2)The defensive copy of the input byte/char[] has a big performance ! // impact, as well as the outgoing result byte/char[]. Need to do the ! // optimization check of (sm==null && classLoader0==null) for both. ! // (3)There might be a timing gap in isTrusted setting. getClassLoader0() ! // is only checked (and then isTrusted gets set) when (SM==null). It is ! // possible that the SM==null for now but then SM is NOT null later ! // when safeTrim() is invoked...the "safe" way to do is to redundant ! // check (... && (isTrusted || SM == null || getClassLoader0())) in trim ! // but it then can be argued that the SM is null when the operation ! // is started... ! CharsetDecoder cd = cs.newDecoder(); ! // ascii fastpath ! if ((cd instanceof ArrayDecoder) && ! ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) { ! return decodeLatin1(ba, off, len); } ! int en = scale(len, cd.maxCharsPerByte()); if (len == 0) { return new Result().with(); } - cd.onMalformedInput(CodingErrorAction.REPLACE) - .onUnmappableCharacter(CodingErrorAction.REPLACE) - .reset(); char[] ca = new char[en]; ! if (cd instanceof ArrayDecoder) { ! int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); ! return new Result().with(ca, 0, clen); ! } if (cs.getClass().getClassLoader0() != null && System.getSecurityManager() != null) { ba = Arrays.copyOfRange(ba, off, off + len); off = 0; } ByteBuffer bb = ByteBuffer.wrap(ba, off, len); CharBuffer cb = CharBuffer.wrap(ca); try { ! CoderResult cr = cd.decode(bb, cb, true); if (!cr.isUnderflow()) cr.throwException(); ! cr = cd.flush(cb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { ! // Substitution is always enabled, ! // so this shouldn't happen throw new Error(x); } return new Result().with(ca, 0, cb.position()); } static Result decode(byte[] ba, int off, int len) { Charset cs = Charset.defaultCharset(); ! if (cs == UTF_8) { ! return decodeUTF8(ba, off, len, true); ! } ! if (cs == ISO_8859_1) { ! return decodeLatin1(ba, off, len); ! } ! if (cs == US_ASCII) { ! return decodeASCII(ba, off, len); ! } ! StringDecoder sd = deref(decoder); ! if (sd == null || !cs.name().equals(sd.cs.name())) { ! sd = new StringDecoder(cs, cs.name()); ! set(decoder, sd); } - return sd.decode(ba, off, len); } ! // -- Encoding -- ! private static class StringEncoder { ! private Charset cs; ! private CharsetEncoder ce; ! private final boolean isASCIICompatible; ! private final String requestedCharsetName; ! private final boolean isTrusted; ! ! private StringEncoder(Charset cs, String rcn) { ! this.requestedCharsetName = rcn; ! this.cs = cs; ! this.ce = cs.newEncoder() ! .onMalformedInput(CodingErrorAction.REPLACE) ! .onUnmappableCharacter(CodingErrorAction.REPLACE); ! this.isTrusted = (cs.getClass().getClassLoader0() == null); ! this.isASCIICompatible = (ce instanceof ArrayEncoder) && ! ((ArrayEncoder)ce).isASCIICompatible(); ! } ! ! String charsetName() { ! if (cs instanceof HistoricallyNamedCharset) ! return ((HistoricallyNamedCharset)cs).historicalName(); ! return cs.name(); } ! ! final String requestedCharsetName() { ! return requestedCharsetName; } ! byte[] encode(byte coder, byte[] val) { ! // fastpath for ascii compatible ! if (coder == LATIN1 && isASCIICompatible && ! !hasNegatives(val, 0, val.length)) { ! return Arrays.copyOf(val, val.length); ! } ! int len = val.length >> coder; // assume LATIN1=0/UTF16=1; ! int en = scale(len, ce.maxBytesPerChar()); ! byte[] ba = new byte[en]; ! if (len == 0) { ! return ba; ! } ! if (ce instanceof ArrayEncoder) { ! int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba) ! : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba); ! if (blen != -1) { ! return safeTrim(ba, blen, isTrusted); ! } ! } ! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) ! : StringUTF16.toChars(val); ! ce.reset(); ! ByteBuffer bb = ByteBuffer.wrap(ba); ! CharBuffer cb = CharBuffer.wrap(ca, 0, len); ! try { ! CoderResult cr = ce.encode(cb, bb, true); ! if (!cr.isUnderflow()) ! cr.throwException(); ! cr = ce.flush(bb); ! if (!cr.isUnderflow()) ! cr.throwException(); ! } catch (CharacterCodingException x) { ! // Substitution is always enabled, ! // so this shouldn't happen ! throw new Error(x); ! } ! return safeTrim(ba, bb.position(), isTrusted); ! } ! } ! static byte[] encode(String charsetName, byte coder, byte[] val) throws UnsupportedEncodingException { ! StringEncoder se = deref(encoder); ! String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; ! if ((se == null) || !(csn.equals(se.requestedCharsetName()) ! || csn.equals(se.charsetName()))) { ! se = null; ! try { ! Charset cs = lookupCharset(csn); ! if (cs != null) { ! if (cs == UTF_8) { ! return encodeUTF8(coder, val, true); ! } ! if (cs == ISO_8859_1) { ! return encode8859_1(coder, val); ! } ! if (cs == US_ASCII) { ! return encodeASCII(coder, val); ! } ! se = new StringEncoder(cs, csn); ! } ! } catch (IllegalCharsetNameException x) {} ! if (se == null) { ! throw new UnsupportedEncodingException (csn); ! } ! set(encoder, se); ! } ! return se.encode(coder, val); } static byte[] encode(Charset cs, byte coder, byte[] val) { ! if (cs == UTF_8) { ! return encodeUTF8(coder, val, true); ! } ! if (cs == ISO_8859_1) { ! return encode8859_1(coder, val); ! } ! if (cs == US_ASCII) { ! return encodeASCII(coder, val); ! } ! CharsetEncoder ce = cs.newEncoder(); ! // fastpath for ascii compatible ! if (coder == LATIN1 && (((ce instanceof ArrayEncoder) && ! ((ArrayEncoder)ce).isASCIICompatible() && ! !hasNegatives(val, 0, val.length)))) { ! return Arrays.copyOf(val, val.length); } int len = val.length >> coder; // assume LATIN1=0/UTF16=1; ! int en = scale(len, ce.maxBytesPerChar()); byte[] ba = new byte[en]; if (len == 0) { return ba; } - ce.onMalformedInput(CodingErrorAction.REPLACE) - .onUnmappableCharacter(CodingErrorAction.REPLACE) - .reset(); - if (ce instanceof ArrayEncoder) { - int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba) - : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba); - if (blen != -1) { - return safeTrim(ba, blen, true); - } - } - boolean isTrusted = cs.getClass().getClassLoader0() == null || - System.getSecurityManager() == null; char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) : StringUTF16.toChars(val); ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca, 0, len); try { ! CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) cr.throwException(); ! cr = ce.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { throw new Error(x); } ! return safeTrim(ba, bb.position(), isTrusted); } static byte[] encode(byte coder, byte[] val) { Charset cs = Charset.defaultCharset(); ! if (cs == UTF_8) { ! return encodeUTF8(coder, val, true); ! } ! if (cs == ISO_8859_1) { ! return encode8859_1(coder, val); } - if (cs == US_ASCII) { - return encodeASCII(coder, val); } ! StringEncoder se = deref(encoder); ! if (se == null || !cs.name().equals(se.cs.name())) { ! se = new StringEncoder(cs, cs.name()); ! set(encoder, se); } ! return se.encode(coder, val); } /** * Print a message directly to stderr, bypassing all character conversion * methods. * @param msg message to print */ private static native void err(String msg); /* The cached Result for each thread */ ! private static final ThreadLocal<StringCoding.Result> ! resultCached = new ThreadLocal<>() { ! protected StringCoding.Result initialValue() { ! return new StringCoding.Result(); }}; ////////////////////////// ascii ////////////////////////////// ! private static Result decodeASCII(byte[] ba, int off, int len) { ! Result result = resultCached.get(); if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) { return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1); } byte[] dst = new byte[len<<1]; --- 243,519 ---- LATIN1); } else { return result.with(StringLatin1.inflate(ba, off, len), UTF16); } } ! ! int en = scale(len, dec.maxCharsPerByte()); char[] ca = new char[en]; ! dec.reset(); ! if (dec instanceof ArrayDecoder) { ! int clen = ((ArrayDecoder)dec).decode(ba, off, len, ca); return result.with(ca, 0, clen); } ! ByteBuffer bb = ByteBuffer.wrap(ba, off, len); CharBuffer cb = CharBuffer.wrap(ca); try { ! CoderResult cr = dec.decode(bb, cb, true); if (!cr.isUnderflow()) cr.throwException(); ! cr = dec.flush(cb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return result.with(ca, 0, cb.position()); } ! byte[] encode0(byte coder, byte[] val) { ! // fastpath for ascii compatible ! if (coder == LATIN1 && isASCIICompatible && ! !hasNegatives(val, 0, val.length)) { ! return Arrays.copyOf(val, val.length); } ! int len = val.length >> coder; // assume LATIN1=0/UTF16=1; ! ! int en = scale(len, enc.maxBytesPerChar()); ! byte[] ba = new byte[en]; ! if (len == 0) { ! return ba; } ! enc.reset(); ! if (enc instanceof ArrayEncoder) { ! int blen = (coder == LATIN1 ) ! ? ((ArrayEncoder)enc).encodeFromLatin1(val, 0, len, ba) ! : ((ArrayEncoder)enc).encodeFromUTF16(val, 0, len, ba); ! if (blen != -1) { ! return safeTrim(ba, blen, isTrusted); } } ! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) ! : StringUTF16.toChars(val); ! ByteBuffer bb = ByteBuffer.wrap(ba); ! CharBuffer cb = CharBuffer.wrap(ca, 0, len); ! try { ! CoderResult cr = enc.encode(cb, bb, true); ! if (!cr.isUnderflow()) ! cr.throwException(); ! cr = enc.flush(bb); ! if (!cr.isUnderflow()) ! cr.throwException(); ! } catch (CharacterCodingException x) { ! // Substitution is always enabled, ! // so this shouldn't happen ! throw new Error(x); } ! return safeTrim(ba, bb.position(), isTrusted); } ! int encode0(byte coder, byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) { ! ! // slow String.byte[]->char[]->CharBuffer path ! // TBD: ArrayEncoder.encodeXXX(val, Bytebuffer/dst); ! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) ! : StringUTF16.toChars(val); ! CharBuffer cb = CharBuffer.wrap(ca, srcBegin, srcEnd - srcBegin); ! ! enc.reset(); ! CoderResult cr = enc.reset().encode(cb, dst, true); ! // TBD: seems nothing can benn done for overflow, just return. ! if (cr.isUnderflow()) { ! cr = enc.flush(dst); } ! return cb.position() - srcBegin; } ! ! //////////////////////////////////////////////////////////////////////// ! ! static Result decode(String charsetName, byte[] ba, int off, int len) ! throws UnsupportedEncodingException ! { ! return getCoder(charsetName).decode0(ba, off, len); } ! static Result decode(Charset cs, byte[] ba, int off, int len) { ! StringCoder sc = getCoder(cs); ! if (sc != null && sc.cs == cs) { ! return sc.decode0(ba, off, len); } ! ! // (1) we never cache the "external" cs. new CharsetDecoder every time ! // (2) defensive copy of the input byte[] does have a performance impact ! // so only when (sm!=null && classLoader0!=null) ! CharsetDecoder dec = cs.newDecoder() ! .onMalformedInput(CodingErrorAction.REPLACE) ! .onUnmappableCharacter(CodingErrorAction.REPLACE); ! int en = scale(len, dec.maxCharsPerByte()); if (len == 0) { return new Result().with(); } char[] ca = new char[en]; ! dec.reset(); if (cs.getClass().getClassLoader0() != null && System.getSecurityManager() != null) { + // make defensive copy before passing to untrusted ba = Arrays.copyOfRange(ba, off, off + len); off = 0; } ByteBuffer bb = ByteBuffer.wrap(ba, off, len); CharBuffer cb = CharBuffer.wrap(ca); try { ! CoderResult cr = dec.decode(bb, cb, true); if (!cr.isUnderflow()) cr.throwException(); ! cr = dec.flush(cb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { ! // replacement is enabled, so should never be here throw new Error(x); } return new Result().with(ca, 0, cb.position()); } static Result decode(byte[] ba, int off, int len) { + // return getCoder(Charset.defaultCharset()).decode0(ba, off, len); Charset cs = Charset.defaultCharset(); ! StringCoder sc = getCoder(cs); ! if (sc != null) ! return sc.decode0(ba, off, len); ! try { ! return getCoder(cs.name()).decode0(ba, off, len); ! } catch (UnsupportedEncodingException x) { ! throw new Error(x); // should never be here } } ! static Result decode(Charset cs, ByteBuffer src) { ! if (src.hasArray()) { ! Result ret = decode(cs, ! src.array(), ! src.arrayOffset() + src.position(), ! src.remaining()); ! src.position(src.limit()); ! return ret; } ! // TBD: there is optimization opportinity here to use the ! // "buf" directly for ascii-only input, if the decoder is ! // "trustful" and "ascii-compatible. ! byte[] buf = new byte[src.remaining()]; ! src.get(buf); ! return decode(cs, buf, 0, buf.length); } ! ///////////////////////////////////////////////////////////////////// ! static byte[] encode(String csn, byte coder, byte[] val) throws UnsupportedEncodingException { ! return getCoder(csn).encode0(coder, val); } static byte[] encode(Charset cs, byte coder, byte[] val) { ! StringCoder sc = getCoder(cs); ! if (sc != null && sc.cs == cs) { ! return sc.encode0(coder, val); } + + CharsetEncoder enc = cs.newEncoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); + // "external" charset comes here int len = val.length >> coder; // assume LATIN1=0/UTF16=1; ! int en = scale(len, enc.maxBytesPerChar()); byte[] ba = new byte[en]; if (len == 0) { return ba; } char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) : StringUTF16.toChars(val); ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca, 0, len); try { ! CoderResult cr = enc.encode(cb, bb, true); if (!cr.isUnderflow()) cr.throwException(); ! cr = enc.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { throw new Error(x); } ! ! return safeTrim(ba, bb.position(), ! cs.getClass().getClassLoader0() == null); } static byte[] encode(byte coder, byte[] val) { Charset cs = Charset.defaultCharset(); ! StringCoder sc = getCoder(cs); ! if (sc != null) ! return sc.encode0(coder, val); ! try { ! return getCoder(cs.name()).encode0(coder, val); ! } catch (UnsupportedEncodingException x) { ! throw new Error(x); // should never be here } } ! ! static int encode(Charset cs, byte coder, byte[] val, ! int srcBegin, int srcEnd, ByteBuffer dst) { ! ! StringCoder sc = getCoder(cs); ! if (sc != null) ! return sc.encode0(coder, val, srcBegin, srcEnd, dst); ! ! CharsetEncoder enc = cs.newEncoder() ! .onMalformedInput(CodingErrorAction.REPLACE) ! .onUnmappableCharacter(CodingErrorAction.REPLACE); ! ! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) ! : StringUTF16.toChars(val); ! // slow String.byte[]->char[]->CharBuffer path ! CharBuffer cb = CharBuffer.wrap(ca, srcBegin, srcEnd - srcBegin); ! CoderResult cr = enc.reset().encode(cb, dst, true); ! // TBD: seems nothing can benn done for overflow, just return. ! // @See StringEncoder.encode(), which should never overflow ! if (cr.isUnderflow()) { ! cr = enc.flush(dst); } ! return cb.position() - srcBegin; } /** * Print a message directly to stderr, bypassing all character conversion * methods. * @param msg message to print */ private static native void err(String msg); /* The cached Result for each thread */ ! // private static final ThreadLocal<StringCoder.Result> resultCached = ! private static final ThreadLocal<Result> resultCached = ! new ThreadLocal<>() { ! protected Result initialValue() { ! return new Result(); }}; + private static char repl = '\ufffd'; + ////////////////////////// ascii ////////////////////////////// ! private static class SC_ASCII extends StringCoder { ! SC_ASCII(Charset cs, String rcn) { ! super(cs, rcn, null, null); ! } ! ! Result decode0(byte[] ba, int off, int len) { ! // Result result = resultCached.get(); if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) { return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1); } byte[] dst = new byte[len<<1];
*** 530,540 **** putChar(dst, dp++, (b >= 0) ? (char)b : repl); } return result.with(dst, UTF16); } ! private static byte[] encodeASCII(byte coder, byte[] val) { if (coder == LATIN1) { byte[] dst = new byte[val.length]; for (int i = 0; i < val.length; i++) { if (val[i] < 0) { dst[i] = '?'; --- 523,533 ---- putChar(dst, dp++, (b >= 0) ? (char)b : repl); } return result.with(dst, UTF16); } ! byte[] encode0(byte coder, byte[] val) { if (coder == LATIN1) { byte[] dst = new byte[val.length]; for (int i = 0; i < val.length; i++) { if (val[i] < 0) { dst[i] = '?';
*** 563,582 **** return dst; } return Arrays.copyOf(dst, dp); } ! ////////////////////////// latin1/8859_1 /////////////////////////// ! ! private static Result decodeLatin1(byte[] ba, int off, int len) { ! Result result = resultCached.get(); ! if (COMPACT_STRINGS) { ! return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1); } else { ! return result.with(StringLatin1.inflate(ba, off, len), UTF16); } } @HotSpotIntrinsicCandidate private static int implEncodeISOArray(byte[] sa, int sp, byte[] da, int dp, int len) { int i = 0; --- 556,596 ---- return dst; } return Arrays.copyOf(dst, dp); } ! int encode0(byte coder, byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) { ! int sp = srcBegin; ! int sl = srcBegin + Math.min(srcEnd - srcBegin, dst.remaining()); ! if (coder == LATIN1) { ! while (sp < sl) { ! if (val[sp] < 0) { ! dst.put((byte)'?'); } else { ! dst.put(val[sp]); ! } ! sp++; ! } ! return sp - srcBegin; ! } ! while (sp < sl) { ! char c = StringUTF16.getChar(val, sp++); ! if (c < 0x80) { ! dst.put((byte)c); ! continue; ! } ! if (Character.isHighSurrogate(c) && sp < sl && ! Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { ! sp++; ! } ! dst.put((byte)'?'); } + return sp - srcBegin; } + } + + ////////////////////////// latin1/8859_1 /////////////////////////// @HotSpotIntrinsicCandidate private static int implEncodeISOArray(byte[] sa, int sp, byte[] da, int dp, int len) { int i = 0;
*** 587,597 **** da[dp++] = (byte)c; } return i; } ! private static byte[] encode8859_1(byte coder, byte[] val) { if (coder == LATIN1) { return Arrays.copyOf(val, val.length); } int len = val.length >> 1; byte[] dst = new byte[len]; --- 601,628 ---- da[dp++] = (byte)c; } return i; } ! private static class SC_8859_1 extends StringCoder { ! ! SC_8859_1(Charset cs, String rcn) { ! super(cs, rcn, null, null); ! } ! ! Result decode0(byte[] ba, int off, int len) { ! // Result result = resultCached.get(); ! if (COMPACT_STRINGS) { ! return result.with(Arrays.copyOfRange(ba, off, off + len), ! LATIN1); ! } else { ! return result.with(StringLatin1.inflate(ba, off, len), ! UTF16); ! } ! } ! ! byte[] encode0(byte coder, byte[] val) { if (coder == LATIN1) { return Arrays.copyOf(val, val.length); } int len = val.length >> 1; byte[] dst = new byte[len];
*** 616,626 **** return dst; } return Arrays.copyOf(dst, dp); } ! //////////////////////////////// utf8 //////////////////////////////////// private static boolean isNotContinuation(int b) { return (b & 0xc0) != 0x80; } --- 647,728 ---- return dst; } return Arrays.copyOf(dst, dp); } ! int encode0(byte coder, byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) { ! if (coder == LATIN1) { ! int len = Math.min(srcEnd - srcBegin, dst.remaining()); ! dst.put(val, srcBegin, len); ! return len; ! } ! int sp = srcBegin; ! int sl = srcEnd; ! int dr = dst.remaining(); ! while (sp < sl && dr-- > 0) { ! char c = StringUTF16.getChar(val, sp++); ! if (c < '\u0100') { ! dst.put((byte)c); ! } else { ! if (Character.isHighSurrogate(c) && sp < sl && ! Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { ! sp++; ! } ! dst.put((byte)'?'); ! } ! } ! return sp - srcBegin; ! } ! } ! ! private static class SC_UTF_8 extends StringCoder { ! ! SC_UTF_8(Charset cs, String rcn) { ! super(cs, rcn, null, null); ! } ! ! Result decode0(byte[] src, int sp, int len) { ! // Result result = resultCached.get(); ! // ascii-bais, which has a neg impact if there is notn-ascii ! if (COMPACT_STRINGS && !hasNegatives(src, sp, len)) ! return result.with(Arrays.copyOfRange(src, sp, sp + len), LATIN1); ! return decodeUTF8(src, sp, len, true); ! } ! ! byte[] encode0(byte coder, byte[] val) { ! return encodeUTF8(coder, val, true); ! } ! ! int encode0(byte coder, byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) { ! ! if (coder == UTF16) ! return encodeUTF8_UTF16(val, srcBegin, srcEnd, dst); ! ! int len = Math.min(srcEnd - srcBegin, dst.remaining()); ! if (!hasNegatives(val, srcBegin, len)) { ! dst.put(val, srcBegin, len); ! return len; ! } ! int sp = srcBegin; ! int dr = dst.remaining(); ! while (sp < srcEnd && dr-- > 0) { ! byte c = val[sp++]; ! if (c < 0) { ! if (dr-- <= 0) ! break; ! dst.put((byte)(0xc0 | ((c & 0xff) >> 6))); ! dst.put((byte)(0x80 | (c & 0x3f))); ! } else { ! dst.put(c); ! } ! } ! return sp - srcBegin; ! } ! ! } ! ! ////////////////////////////////////////////////////// private static boolean isNotContinuation(int b) { return (b & 0xc0) != 0x80; }
*** 675,695 **** private static void throwMalformed(int off, int nb) { throw new IllegalArgumentException("malformed input off : " + off + ", length : " + nb); } ! private static char repl = '\ufffd'; ! ! private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) { ! // ascii-bais, which has a relative impact to the non-ascii-only bytes ! if (COMPACT_STRINGS && !hasNegatives(src, sp, len)) ! return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len), ! LATIN1); ! return decodeUTF8_0(src, sp, len, doReplace); ! } ! ! private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) { Result ret = resultCached.get(); int sl = sp + len; int dp = 0; byte[] dst = new byte[len]; --- 777,787 ---- private static void throwMalformed(int off, int nb) { throw new IllegalArgumentException("malformed input off : " + off + ", length : " + nb); } ! static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) { Result ret = resultCached.get(); int sl = sp + len; int dp = 0; byte[] dst = new byte[len];
*** 853,863 **** dst = Arrays.copyOf(dst, dp << 1); } return ret.with(dst, UTF16); } ! private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) { if (coder == UTF16) return encodeUTF8_UTF16(val, doReplace); if (!hasNegatives(val, 0, val.length)) return Arrays.copyOf(val, val.length); --- 945,955 ---- dst = Arrays.copyOf(dst, dp << 1); } return ret.with(dst, UTF16); } ! static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) { if (coder == UTF16) return encodeUTF8_UTF16(val, doReplace); if (!hasNegatives(val, 0, val.length)) return Arrays.copyOf(val, val.length);
*** 876,886 **** if (dp == dst.length) return dst; return Arrays.copyOf(dst, dp); } ! private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) { int dp = 0; int sp = 0; int sl = val.length >> 1; byte[] dst = new byte[sl * 3]; char c; --- 968,978 ---- if (dp == dst.length) return dst; return Arrays.copyOf(dst, dp); } ! static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) { int dp = 0; int sp = 0; int sl = val.length >> 1; byte[] dst = new byte[sl * 3]; char c;
*** 927,945 **** return dst; } return Arrays.copyOf(dst, dp); } ////////////////////// for j.u.z.ZipCoder ////////////////////////// /* * Throws iae, instead of replacing, if malformed or unmappble. */ static String newStringUTF8NoRepl(byte[] src, int off, int len) { if (COMPACT_STRINGS && !hasNegatives(src, off, len)) return new String(Arrays.copyOfRange(src, off, off + len), LATIN1); ! Result ret = decodeUTF8_0(src, off, len, false); return new String(ret.value, ret.coder); } /* * Throws iae, instead of replacing, if unmappble. --- 1019,1094 ---- return dst; } return Arrays.copyOf(dst, dp); } + static int encodeUTF8_UTF16(byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) + { + int sp = srcBegin; + int sl = srcEnd; + int dr = dst.remaining(); + + while (sp < sl) { + char c = StringUTF16.getChar(val, sp++); + if (c < 0x80) { + if (dr < 1) + break; + dst.put((byte)c); + dr--; + } else if (c < 0x800) { + if (dr < 2) + break; + dst.put((byte)(0xc0 | (c >> 6))); + dst.put((byte)(0x80 | (c & 0x3f))); + dr -= 2; + } else if (Character.isSurrogate(c)) { + int uc = -1; + char c2; + + if (sp == sl && sl < (val.length >> 1)) + return sp - srcBegin; + + if (Character.isHighSurrogate(c) && sp < sl && + Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) { + uc = Character.toCodePoint(c, c2); + } + if (uc < 0) { + if (dr-- <= 0) + break; + dst.put((byte)'?'); + } else { + if (dr < 4) + break; + dst.put((byte)(0xf0 | ((uc >> 18)))); + dst.put((byte)(0x80 | ((uc >> 12) & 0x3f))); + dst.put((byte)(0x80 | ((uc >> 6) & 0x3f))); + dst.put((byte)(0x80 | (uc & 0x3f))); + dr -= 4; + sp++; // 2 chars + } + } else { + if (dr < 3) + break; + // 3 bytes, 16 bits + dst.put((byte)(0xe0 | ((c >> 12)))); + dst.put((byte)(0x80 | ((c >> 6) & 0x3f))); + dst.put((byte)(0x80 | (c & 0x3f))); + dr -= 3; + } + } + return sp - srcBegin; + } + ////////////////////// for j.u.z.ZipCoder ////////////////////////// /* * Throws iae, instead of replacing, if malformed or unmappble. */ static String newStringUTF8NoRepl(byte[] src, int off, int len) { if (COMPACT_STRINGS && !hasNegatives(src, off, len)) return new String(Arrays.copyOfRange(src, off, off + len), LATIN1); ! Result ret = decodeUTF8(src, off, len, false); return new String(ret.value, ret.coder); } /* * Throws iae, instead of replacing, if unmappble.