src/java.base/share/classes/java/lang/StringCoder.java
Print this page
*** 1,7 ****
/*
! * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
--- 1,7 ----
/*
! * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
*** 51,87 ****
import static java.lang.Character.highSurrogate;
import static java.lang.Character.lowSurrogate;
import static java.lang.Character.isSupplementaryCodePoint;
import static java.lang.StringUTF16.putChar;
/**
* Utility class for string encoding and decoding.
*/
! class StringCoding {
!
! private StringCoding() { }
! /** The cached coders for each thread */
! private static final ThreadLocal<SoftReference<StringDecoder>> decoder =
! new ThreadLocal<>();
! private static final ThreadLocal<SoftReference<StringEncoder>> encoder =
new ThreadLocal<>();
private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
! private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
! SoftReference<T> sr = tl.get();
if (sr == null)
return null;
return sr.get();
}
! private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
! tl.set(new SoftReference<>(ob));
}
// Trim the given byte array to the given length
private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
--- 51,87 ----
import static java.lang.Character.highSurrogate;
import static java.lang.Character.lowSurrogate;
import static java.lang.Character.isSupplementaryCodePoint;
import static java.lang.StringUTF16.putChar;
+ //import static java.lang.StringCoding.Result;
+
/**
* Utility class for string encoding and decoding.
*/
! class StringCoder {
! /** The cached coder for each thread */
! private static final ThreadLocal<SoftReference<StringCoder>> cachedCoder =
new ThreadLocal<>();
private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
! // private static StringCoder ssc = new UTF_8(cs, csn);
!
! private static StringCoder deref() {
! SoftReference<StringCoder> sr = cachedCoder.get();
if (sr == null)
return null;
return sr.get();
}
! private static void setref(StringCoder sc) {
! cachedCoder.set(new SoftReference<>(sc));
}
// Trim the given byte array to the given length
private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
*** 94,112 ****
// We need to perform double, not float, arithmetic; otherwise
// we lose low order bits when len is larger than 2**24.
return (int)(len * (double)expansionFactor);
}
! private static Charset lookupCharset(String csn) {
if (Charset.isSupported(csn)) {
try {
! return Charset.forName(csn);
} catch (UnsupportedCharsetException x) {
throw new Error(x);
}
}
! return null;
}
static class Result {
byte[] value;
byte coder;
--- 94,146 ----
// We need to perform double, not float, arithmetic; otherwise
// we lose low order bits when len is larger than 2**24.
return (int)(len * (double)expansionFactor);
}
! private static StringCoder newCoder(Charset cs) {
! if (cs == UTF_8)
! return new SC_UTF_8(cs, cs.name());
! if (cs == ISO_8859_1)
! return new SC_8859_1(cs, cs.name());
! if (cs == US_ASCII)
! return new SC_ASCII(cs, cs.name());
! return new StringCoder(cs, cs.name());
! }
!
! private static StringCoder getCoder(Charset cs) {
! // cache any cs implements HNC
! if (cs instanceof HistoricallyNamedCharset) {
! StringCoder sc = deref();
! if (sc != null && sc.cs == cs)
! return sc;
! setref(newCoder(cs));
! return sc;
! }
! return null;
! }
!
! private static StringCoder getCoder(String csn)
! throws UnsupportedEncodingException {
!
! StringCoder sc = deref();
! if ((sc == null) || !(csn.equals(sc.requestedCharsetName()) ||
! csn.equals(sc.charsetName()))) {
! sc = null;
! try {
if (Charset.isSupported(csn)) {
try {
! sc = newCoder(Charset.forName(csn));
} catch (UnsupportedCharsetException x) {
throw new Error(x);
}
}
! } catch (IllegalCharsetNameException x) {}
! if (sc == null)
! throw new UnsupportedEncodingException(csn);
! setref(sc);
! }
! return sc;
}
static class Result {
byte[] value;
byte coder;
*** 146,185 ****
}
}
return false;
}
! // -- Decoding --
! static class StringDecoder {
! private final String requestedCharsetName;
! private final Charset cs;
! private final boolean isASCIICompatible;
! private final CharsetDecoder cd;
! protected final Result result;
! StringDecoder(Charset cs, String rcn) {
! this.requestedCharsetName = rcn;
this.cs = cs;
! this.cd = cs.newDecoder()
! .onMalformedInput(CodingErrorAction.REPLACE)
! .onUnmappableCharacter(CodingErrorAction.REPLACE);
this.result = new Result();
! this.isASCIICompatible = (cd instanceof ArrayDecoder) &&
! ((ArrayDecoder)cd).isASCIICompatible();
}
! String charsetName() {
if (cs instanceof HistoricallyNamedCharset)
return ((HistoricallyNamedCharset)cs).historicalName();
return cs.name();
}
final String requestedCharsetName() {
return requestedCharsetName;
}
! Result decode(byte[] ba, int off, int len) {
if (len == 0) {
return result.with();
}
// fastpath for ascii compatible
if (isASCIICompatible && !hasNegatives(ba, off, len)) {
--- 180,240 ----
}
}
return false;
}
! ////////////////////////////////////////////////////////////////////////
! final Charset cs;
! final String requestedCharsetName;
! final boolean isASCIICompatible;
! final Result result;
! final boolean isTrusted;
! final CharsetDecoder dec;
! CharsetEncoder enc;
!
! private StringCoder(Charset cs,
! String rcn,
! CharsetDecoder dec,
! CharsetEncoder enc) {
this.cs = cs;
! this.requestedCharsetName = rcn;
! this.dec = dec;
! this.enc = enc;
! this.isTrusted = (cs.getClass().getClassLoader0() == null);
this.result = new Result();
! if (dec != null && enc != null) {
! this.isASCIICompatible = (dec instanceof ArrayDecoder) &&
! ((ArrayDecoder)dec).isASCIICompatible();
! assert(isASCIICompatible ==
! (enc instanceof ArrayEncoder) && ((ArrayEncoder)enc).isASCIICompatible());
! } else {
! this.isASCIICompatible = true;
! }
! }
!
! private StringCoder(Charset cs, String rcn) {
! this(cs, rcn,
! cs.newDecoder()
! .onMalformedInput(CodingErrorAction.REPLACE)
! .onUnmappableCharacter(CodingErrorAction.REPLACE),
! cs.newEncoder()
! .onMalformedInput(CodingErrorAction.REPLACE)
! .onUnmappableCharacter(CodingErrorAction.REPLACE));
}
! final String charsetName() {
if (cs instanceof HistoricallyNamedCharset)
return ((HistoricallyNamedCharset)cs).historicalName();
return cs.name();
}
final String requestedCharsetName() {
return requestedCharsetName;
}
! Result decode0(byte[] ba, int off, int len) {
if (len == 0) {
return result.with();
}
// fastpath for ascii compatible
if (isASCIICompatible && !hasNegatives(ba, off, len)) {
*** 188,526 ****
LATIN1);
} else {
return result.with(StringLatin1.inflate(ba, off, len), UTF16);
}
}
! int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
! if (cd instanceof ArrayDecoder) {
! int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
return result.with(ca, 0, clen);
}
! cd.reset();
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
! CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
! cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return result.with(ca, 0, cb.position());
}
- }
! static Result decode(String charsetName, byte[] ba, int off, int len)
! throws UnsupportedEncodingException
! {
! StringDecoder sd = deref(decoder);
! String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
! if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
! || csn.equals(sd.charsetName()))) {
! sd = null;
! try {
! Charset cs = lookupCharset(csn);
! if (cs != null) {
! if (cs == UTF_8) {
! return decodeUTF8(ba, off, len, true);
}
! if (cs == ISO_8859_1) {
! return decodeLatin1(ba, off, len);
}
! if (cs == US_ASCII) {
! return decodeASCII(ba, off, len);
}
- sd = new StringDecoder(cs, csn);
}
! } catch (IllegalCharsetNameException x) {}
! if (sd == null)
! throw new UnsupportedEncodingException(csn);
! set(decoder, sd);
}
! return sd.decode(ba, off, len);
}
! static Result decode(Charset cs, byte[] ba, int off, int len) {
! if (cs == UTF_8) {
! return decodeUTF8(ba, off, len, true);
}
! if (cs == ISO_8859_1) {
! return decodeLatin1(ba, off, len);
}
! if (cs == US_ASCII) {
! return decodeASCII(ba, off, len);
}
! // (1)We never cache the "external" cs, the only benefit of creating
! // an additional StringDe/Encoder object to wrap it is to share the
! // de/encode() method. These SD/E objects are short-lived, the young-gen
! // gc should be able to take care of them well. But the best approach
! // is still not to generate them if not really necessary.
! // (2)The defensive copy of the input byte/char[] has a big performance
! // impact, as well as the outgoing result byte/char[]. Need to do the
! // optimization check of (sm==null && classLoader0==null) for both.
! // (3)There might be a timing gap in isTrusted setting. getClassLoader0()
! // is only checked (and then isTrusted gets set) when (SM==null). It is
! // possible that the SM==null for now but then SM is NOT null later
! // when safeTrim() is invoked...the "safe" way to do is to redundant
! // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
! // but it then can be argued that the SM is null when the operation
! // is started...
! CharsetDecoder cd = cs.newDecoder();
! // ascii fastpath
! if ((cd instanceof ArrayDecoder) &&
! ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
! return decodeLatin1(ba, off, len);
}
! int en = scale(len, cd.maxCharsPerByte());
if (len == 0) {
return new Result().with();
}
- cd.onMalformedInput(CodingErrorAction.REPLACE)
- .onUnmappableCharacter(CodingErrorAction.REPLACE)
- .reset();
char[] ca = new char[en];
! if (cd instanceof ArrayDecoder) {
! int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
! return new Result().with(ca, 0, clen);
! }
if (cs.getClass().getClassLoader0() != null &&
System.getSecurityManager() != null) {
ba = Arrays.copyOfRange(ba, off, off + len);
off = 0;
}
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
! CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
! cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
! // Substitution is always enabled,
! // so this shouldn't happen
throw new Error(x);
}
return new Result().with(ca, 0, cb.position());
}
static Result decode(byte[] ba, int off, int len) {
Charset cs = Charset.defaultCharset();
! if (cs == UTF_8) {
! return decodeUTF8(ba, off, len, true);
! }
! if (cs == ISO_8859_1) {
! return decodeLatin1(ba, off, len);
! }
! if (cs == US_ASCII) {
! return decodeASCII(ba, off, len);
! }
! StringDecoder sd = deref(decoder);
! if (sd == null || !cs.name().equals(sd.cs.name())) {
! sd = new StringDecoder(cs, cs.name());
! set(decoder, sd);
}
- return sd.decode(ba, off, len);
}
! // -- Encoding --
! private static class StringEncoder {
! private Charset cs;
! private CharsetEncoder ce;
! private final boolean isASCIICompatible;
! private final String requestedCharsetName;
! private final boolean isTrusted;
!
! private StringEncoder(Charset cs, String rcn) {
! this.requestedCharsetName = rcn;
! this.cs = cs;
! this.ce = cs.newEncoder()
! .onMalformedInput(CodingErrorAction.REPLACE)
! .onUnmappableCharacter(CodingErrorAction.REPLACE);
! this.isTrusted = (cs.getClass().getClassLoader0() == null);
! this.isASCIICompatible = (ce instanceof ArrayEncoder) &&
! ((ArrayEncoder)ce).isASCIICompatible();
! }
!
! String charsetName() {
! if (cs instanceof HistoricallyNamedCharset)
! return ((HistoricallyNamedCharset)cs).historicalName();
! return cs.name();
}
!
! final String requestedCharsetName() {
! return requestedCharsetName;
}
! byte[] encode(byte coder, byte[] val) {
! // fastpath for ascii compatible
! if (coder == LATIN1 && isASCIICompatible &&
! !hasNegatives(val, 0, val.length)) {
! return Arrays.copyOf(val, val.length);
! }
! int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
! int en = scale(len, ce.maxBytesPerChar());
! byte[] ba = new byte[en];
! if (len == 0) {
! return ba;
! }
! if (ce instanceof ArrayEncoder) {
! int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
! : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
! if (blen != -1) {
! return safeTrim(ba, blen, isTrusted);
! }
! }
! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
! : StringUTF16.toChars(val);
! ce.reset();
! ByteBuffer bb = ByteBuffer.wrap(ba);
! CharBuffer cb = CharBuffer.wrap(ca, 0, len);
! try {
! CoderResult cr = ce.encode(cb, bb, true);
! if (!cr.isUnderflow())
! cr.throwException();
! cr = ce.flush(bb);
! if (!cr.isUnderflow())
! cr.throwException();
! } catch (CharacterCodingException x) {
! // Substitution is always enabled,
! // so this shouldn't happen
! throw new Error(x);
! }
! return safeTrim(ba, bb.position(), isTrusted);
! }
! }
! static byte[] encode(String charsetName, byte coder, byte[] val)
throws UnsupportedEncodingException
{
! StringEncoder se = deref(encoder);
! String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
! if ((se == null) || !(csn.equals(se.requestedCharsetName())
! || csn.equals(se.charsetName()))) {
! se = null;
! try {
! Charset cs = lookupCharset(csn);
! if (cs != null) {
! if (cs == UTF_8) {
! return encodeUTF8(coder, val, true);
! }
! if (cs == ISO_8859_1) {
! return encode8859_1(coder, val);
! }
! if (cs == US_ASCII) {
! return encodeASCII(coder, val);
! }
! se = new StringEncoder(cs, csn);
! }
! } catch (IllegalCharsetNameException x) {}
! if (se == null) {
! throw new UnsupportedEncodingException (csn);
! }
! set(encoder, se);
! }
! return se.encode(coder, val);
}
static byte[] encode(Charset cs, byte coder, byte[] val) {
! if (cs == UTF_8) {
! return encodeUTF8(coder, val, true);
! }
! if (cs == ISO_8859_1) {
! return encode8859_1(coder, val);
! }
! if (cs == US_ASCII) {
! return encodeASCII(coder, val);
! }
! CharsetEncoder ce = cs.newEncoder();
! // fastpath for ascii compatible
! if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
! ((ArrayEncoder)ce).isASCIICompatible() &&
! !hasNegatives(val, 0, val.length)))) {
! return Arrays.copyOf(val, val.length);
}
int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
! int en = scale(len, ce.maxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0) {
return ba;
}
- ce.onMalformedInput(CodingErrorAction.REPLACE)
- .onUnmappableCharacter(CodingErrorAction.REPLACE)
- .reset();
- if (ce instanceof ArrayEncoder) {
- int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
- : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
- if (blen != -1) {
- return safeTrim(ba, blen, true);
- }
- }
- boolean isTrusted = cs.getClass().getClassLoader0() == null ||
- System.getSecurityManager() == null;
char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
: StringUTF16.toChars(val);
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, 0, len);
try {
! CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
! cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
throw new Error(x);
}
! return safeTrim(ba, bb.position(), isTrusted);
}
static byte[] encode(byte coder, byte[] val) {
Charset cs = Charset.defaultCharset();
! if (cs == UTF_8) {
! return encodeUTF8(coder, val, true);
! }
! if (cs == ISO_8859_1) {
! return encode8859_1(coder, val);
}
- if (cs == US_ASCII) {
- return encodeASCII(coder, val);
}
! StringEncoder se = deref(encoder);
! if (se == null || !cs.name().equals(se.cs.name())) {
! se = new StringEncoder(cs, cs.name());
! set(encoder, se);
}
! return se.encode(coder, val);
}
/**
* Print a message directly to stderr, bypassing all character conversion
* methods.
* @param msg message to print
*/
private static native void err(String msg);
/* The cached Result for each thread */
! private static final ThreadLocal<StringCoding.Result>
! resultCached = new ThreadLocal<>() {
! protected StringCoding.Result initialValue() {
! return new StringCoding.Result();
}};
////////////////////////// ascii //////////////////////////////
! private static Result decodeASCII(byte[] ba, int off, int len) {
! Result result = resultCached.get();
if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
return result.with(Arrays.copyOfRange(ba, off, off + len),
LATIN1);
}
byte[] dst = new byte[len<<1];
--- 243,519 ----
LATIN1);
} else {
return result.with(StringLatin1.inflate(ba, off, len), UTF16);
}
}
!
! int en = scale(len, dec.maxCharsPerByte());
char[] ca = new char[en];
! dec.reset();
! if (dec instanceof ArrayDecoder) {
! int clen = ((ArrayDecoder)dec).decode(ba, off, len, ca);
return result.with(ca, 0, clen);
}
!
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
! CoderResult cr = dec.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
! cr = dec.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return result.with(ca, 0, cb.position());
}
! byte[] encode0(byte coder, byte[] val) {
! // fastpath for ascii compatible
! if (coder == LATIN1 && isASCIICompatible &&
! !hasNegatives(val, 0, val.length)) {
! return Arrays.copyOf(val, val.length);
}
! int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
!
! int en = scale(len, enc.maxBytesPerChar());
! byte[] ba = new byte[en];
! if (len == 0) {
! return ba;
}
! enc.reset();
! if (enc instanceof ArrayEncoder) {
! int blen = (coder == LATIN1 )
! ? ((ArrayEncoder)enc).encodeFromLatin1(val, 0, len, ba)
! : ((ArrayEncoder)enc).encodeFromUTF16(val, 0, len, ba);
! if (blen != -1) {
! return safeTrim(ba, blen, isTrusted);
}
}
! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
! : StringUTF16.toChars(val);
! ByteBuffer bb = ByteBuffer.wrap(ba);
! CharBuffer cb = CharBuffer.wrap(ca, 0, len);
! try {
! CoderResult cr = enc.encode(cb, bb, true);
! if (!cr.isUnderflow())
! cr.throwException();
! cr = enc.flush(bb);
! if (!cr.isUnderflow())
! cr.throwException();
! } catch (CharacterCodingException x) {
! // Substitution is always enabled,
! // so this shouldn't happen
! throw new Error(x);
}
! return safeTrim(ba, bb.position(), isTrusted);
}
! int encode0(byte coder, byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) {
!
! // slow String.byte[]->char[]->CharBuffer path
! // TBD: ArrayEncoder.encodeXXX(val, Bytebuffer/dst);
! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
! : StringUTF16.toChars(val);
! CharBuffer cb = CharBuffer.wrap(ca, srcBegin, srcEnd - srcBegin);
!
! enc.reset();
! CoderResult cr = enc.reset().encode(cb, dst, true);
! // TBD: seems nothing can benn done for overflow, just return.
! if (cr.isUnderflow()) {
! cr = enc.flush(dst);
}
! return cb.position() - srcBegin;
}
!
! ////////////////////////////////////////////////////////////////////////
!
! static Result decode(String charsetName, byte[] ba, int off, int len)
! throws UnsupportedEncodingException
! {
! return getCoder(charsetName).decode0(ba, off, len);
}
! static Result decode(Charset cs, byte[] ba, int off, int len) {
! StringCoder sc = getCoder(cs);
! if (sc != null && sc.cs == cs) {
! return sc.decode0(ba, off, len);
}
!
! // (1) we never cache the "external" cs. new CharsetDecoder every time
! // (2) defensive copy of the input byte[] does have a performance impact
! // so only when (sm!=null && classLoader0!=null)
! CharsetDecoder dec = cs.newDecoder()
! .onMalformedInput(CodingErrorAction.REPLACE)
! .onUnmappableCharacter(CodingErrorAction.REPLACE);
! int en = scale(len, dec.maxCharsPerByte());
if (len == 0) {
return new Result().with();
}
char[] ca = new char[en];
! dec.reset();
if (cs.getClass().getClassLoader0() != null &&
System.getSecurityManager() != null) {
+ // make defensive copy before passing to untrusted
ba = Arrays.copyOfRange(ba, off, off + len);
off = 0;
}
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
! CoderResult cr = dec.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
! cr = dec.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
! // replacement is enabled, so should never be here
throw new Error(x);
}
return new Result().with(ca, 0, cb.position());
}
static Result decode(byte[] ba, int off, int len) {
+ // return getCoder(Charset.defaultCharset()).decode0(ba, off, len);
Charset cs = Charset.defaultCharset();
! StringCoder sc = getCoder(cs);
! if (sc != null)
! return sc.decode0(ba, off, len);
! try {
! return getCoder(cs.name()).decode0(ba, off, len);
! } catch (UnsupportedEncodingException x) {
! throw new Error(x); // should never be here
}
}
! static Result decode(Charset cs, ByteBuffer src) {
! if (src.hasArray()) {
! Result ret = decode(cs,
! src.array(),
! src.arrayOffset() + src.position(),
! src.remaining());
! src.position(src.limit());
! return ret;
}
! // TBD: there is optimization opportinity here to use the
! // "buf" directly for ascii-only input, if the decoder is
! // "trustful" and "ascii-compatible.
! byte[] buf = new byte[src.remaining()];
! src.get(buf);
! return decode(cs, buf, 0, buf.length);
}
! /////////////////////////////////////////////////////////////////////
! static byte[] encode(String csn, byte coder, byte[] val)
throws UnsupportedEncodingException
{
! return getCoder(csn).encode0(coder, val);
}
static byte[] encode(Charset cs, byte coder, byte[] val) {
! StringCoder sc = getCoder(cs);
! if (sc != null && sc.cs == cs) {
! return sc.encode0(coder, val);
}
+
+ CharsetEncoder enc = cs.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE);
+ // "external" charset comes here
int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
! int en = scale(len, enc.maxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0) {
return ba;
}
char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
: StringUTF16.toChars(val);
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, 0, len);
try {
! CoderResult cr = enc.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
! cr = enc.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
throw new Error(x);
}
!
! return safeTrim(ba, bb.position(),
! cs.getClass().getClassLoader0() == null);
}
static byte[] encode(byte coder, byte[] val) {
Charset cs = Charset.defaultCharset();
! StringCoder sc = getCoder(cs);
! if (sc != null)
! return sc.encode0(coder, val);
! try {
! return getCoder(cs.name()).encode0(coder, val);
! } catch (UnsupportedEncodingException x) {
! throw new Error(x); // should never be here
}
}
!
! static int encode(Charset cs, byte coder, byte[] val,
! int srcBegin, int srcEnd, ByteBuffer dst) {
!
! StringCoder sc = getCoder(cs);
! if (sc != null)
! return sc.encode0(coder, val, srcBegin, srcEnd, dst);
!
! CharsetEncoder enc = cs.newEncoder()
! .onMalformedInput(CodingErrorAction.REPLACE)
! .onUnmappableCharacter(CodingErrorAction.REPLACE);
!
! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
! : StringUTF16.toChars(val);
! // slow String.byte[]->char[]->CharBuffer path
! CharBuffer cb = CharBuffer.wrap(ca, srcBegin, srcEnd - srcBegin);
! CoderResult cr = enc.reset().encode(cb, dst, true);
! // TBD: seems nothing can benn done for overflow, just return.
! // @See StringEncoder.encode(), which should never overflow
! if (cr.isUnderflow()) {
! cr = enc.flush(dst);
}
! return cb.position() - srcBegin;
}
/**
* Print a message directly to stderr, bypassing all character conversion
* methods.
* @param msg message to print
*/
private static native void err(String msg);
/* The cached Result for each thread */
! // private static final ThreadLocal<StringCoder.Result> resultCached =
! private static final ThreadLocal<Result> resultCached =
! new ThreadLocal<>() {
! protected Result initialValue() {
! return new Result();
}};
+ private static char repl = '\ufffd';
+
////////////////////////// ascii //////////////////////////////
! private static class SC_ASCII extends StringCoder {
! SC_ASCII(Charset cs, String rcn) {
! super(cs, rcn, null, null);
! }
!
! Result decode0(byte[] ba, int off, int len) {
! // Result result = resultCached.get();
if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
return result.with(Arrays.copyOfRange(ba, off, off + len),
LATIN1);
}
byte[] dst = new byte[len<<1];
*** 530,540 ****
putChar(dst, dp++, (b >= 0) ? (char)b : repl);
}
return result.with(dst, UTF16);
}
! private static byte[] encodeASCII(byte coder, byte[] val) {
if (coder == LATIN1) {
byte[] dst = new byte[val.length];
for (int i = 0; i < val.length; i++) {
if (val[i] < 0) {
dst[i] = '?';
--- 523,533 ----
putChar(dst, dp++, (b >= 0) ? (char)b : repl);
}
return result.with(dst, UTF16);
}
! byte[] encode0(byte coder, byte[] val) {
if (coder == LATIN1) {
byte[] dst = new byte[val.length];
for (int i = 0; i < val.length; i++) {
if (val[i] < 0) {
dst[i] = '?';
*** 563,582 ****
return dst;
}
return Arrays.copyOf(dst, dp);
}
! ////////////////////////// latin1/8859_1 ///////////////////////////
!
! private static Result decodeLatin1(byte[] ba, int off, int len) {
! Result result = resultCached.get();
! if (COMPACT_STRINGS) {
! return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
} else {
! return result.with(StringLatin1.inflate(ba, off, len), UTF16);
}
}
@HotSpotIntrinsicCandidate
private static int implEncodeISOArray(byte[] sa, int sp,
byte[] da, int dp, int len) {
int i = 0;
--- 556,596 ----
return dst;
}
return Arrays.copyOf(dst, dp);
}
! int encode0(byte coder, byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) {
! int sp = srcBegin;
! int sl = srcBegin + Math.min(srcEnd - srcBegin, dst.remaining());
! if (coder == LATIN1) {
! while (sp < sl) {
! if (val[sp] < 0) {
! dst.put((byte)'?');
} else {
! dst.put(val[sp]);
! }
! sp++;
! }
! return sp - srcBegin;
! }
! while (sp < sl) {
! char c = StringUTF16.getChar(val, sp++);
! if (c < 0x80) {
! dst.put((byte)c);
! continue;
! }
! if (Character.isHighSurrogate(c) && sp < sl &&
! Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
! sp++;
! }
! dst.put((byte)'?');
}
+ return sp - srcBegin;
}
+ }
+
+ ////////////////////////// latin1/8859_1 ///////////////////////////
@HotSpotIntrinsicCandidate
private static int implEncodeISOArray(byte[] sa, int sp,
byte[] da, int dp, int len) {
int i = 0;
*** 587,597 ****
da[dp++] = (byte)c;
}
return i;
}
! private static byte[] encode8859_1(byte coder, byte[] val) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
int len = val.length >> 1;
byte[] dst = new byte[len];
--- 601,628 ----
da[dp++] = (byte)c;
}
return i;
}
! private static class SC_8859_1 extends StringCoder {
!
! SC_8859_1(Charset cs, String rcn) {
! super(cs, rcn, null, null);
! }
!
! Result decode0(byte[] ba, int off, int len) {
! // Result result = resultCached.get();
! if (COMPACT_STRINGS) {
! return result.with(Arrays.copyOfRange(ba, off, off + len),
! LATIN1);
! } else {
! return result.with(StringLatin1.inflate(ba, off, len),
! UTF16);
! }
! }
!
! byte[] encode0(byte coder, byte[] val) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
int len = val.length >> 1;
byte[] dst = new byte[len];
*** 616,626 ****
return dst;
}
return Arrays.copyOf(dst, dp);
}
! //////////////////////////////// utf8 ////////////////////////////////////
private static boolean isNotContinuation(int b) {
return (b & 0xc0) != 0x80;
}
--- 647,728 ----
return dst;
}
return Arrays.copyOf(dst, dp);
}
! int encode0(byte coder, byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) {
! if (coder == LATIN1) {
! int len = Math.min(srcEnd - srcBegin, dst.remaining());
! dst.put(val, srcBegin, len);
! return len;
! }
! int sp = srcBegin;
! int sl = srcEnd;
! int dr = dst.remaining();
! while (sp < sl && dr-- > 0) {
! char c = StringUTF16.getChar(val, sp++);
! if (c < '\u0100') {
! dst.put((byte)c);
! } else {
! if (Character.isHighSurrogate(c) && sp < sl &&
! Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
! sp++;
! }
! dst.put((byte)'?');
! }
! }
! return sp - srcBegin;
! }
! }
!
! private static class SC_UTF_8 extends StringCoder {
!
! SC_UTF_8(Charset cs, String rcn) {
! super(cs, rcn, null, null);
! }
!
! Result decode0(byte[] src, int sp, int len) {
! // Result result = resultCached.get();
! // ascii-bais, which has a neg impact if there is notn-ascii
! if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
! return result.with(Arrays.copyOfRange(src, sp, sp + len), LATIN1);
! return decodeUTF8(src, sp, len, true);
! }
!
! byte[] encode0(byte coder, byte[] val) {
! return encodeUTF8(coder, val, true);
! }
!
! int encode0(byte coder, byte[] val, int srcBegin, int srcEnd, ByteBuffer dst) {
!
! if (coder == UTF16)
! return encodeUTF8_UTF16(val, srcBegin, srcEnd, dst);
!
! int len = Math.min(srcEnd - srcBegin, dst.remaining());
! if (!hasNegatives(val, srcBegin, len)) {
! dst.put(val, srcBegin, len);
! return len;
! }
! int sp = srcBegin;
! int dr = dst.remaining();
! while (sp < srcEnd && dr-- > 0) {
! byte c = val[sp++];
! if (c < 0) {
! if (dr-- <= 0)
! break;
! dst.put((byte)(0xc0 | ((c & 0xff) >> 6)));
! dst.put((byte)(0x80 | (c & 0x3f)));
! } else {
! dst.put(c);
! }
! }
! return sp - srcBegin;
! }
!
! }
!
! //////////////////////////////////////////////////////
private static boolean isNotContinuation(int b) {
return (b & 0xc0) != 0x80;
}
*** 675,695 ****
private static void throwMalformed(int off, int nb) {
throw new IllegalArgumentException("malformed input off : " + off +
", length : " + nb);
}
! private static char repl = '\ufffd';
!
! private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
! // ascii-bais, which has a relative impact to the non-ascii-only bytes
! if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
! return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
! LATIN1);
! return decodeUTF8_0(src, sp, len, doReplace);
! }
!
! private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
Result ret = resultCached.get();
int sl = sp + len;
int dp = 0;
byte[] dst = new byte[len];
--- 777,787 ----
private static void throwMalformed(int off, int nb) {
throw new IllegalArgumentException("malformed input off : " + off +
", length : " + nb);
}
! static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
Result ret = resultCached.get();
int sl = sp + len;
int dp = 0;
byte[] dst = new byte[len];
*** 853,863 ****
dst = Arrays.copyOf(dst, dp << 1);
}
return ret.with(dst, UTF16);
}
! private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
if (coder == UTF16)
return encodeUTF8_UTF16(val, doReplace);
if (!hasNegatives(val, 0, val.length))
return Arrays.copyOf(val, val.length);
--- 945,955 ----
dst = Arrays.copyOf(dst, dp << 1);
}
return ret.with(dst, UTF16);
}
! static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
if (coder == UTF16)
return encodeUTF8_UTF16(val, doReplace);
if (!hasNegatives(val, 0, val.length))
return Arrays.copyOf(val, val.length);
*** 876,886 ****
if (dp == dst.length)
return dst;
return Arrays.copyOf(dst, dp);
}
! private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
char c;
--- 968,978 ----
if (dp == dst.length)
return dst;
return Arrays.copyOf(dst, dp);
}
! static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
char c;
*** 927,945 ****
return dst;
}
return Arrays.copyOf(dst, dp);
}
////////////////////// for j.u.z.ZipCoder //////////////////////////
/*
* Throws iae, instead of replacing, if malformed or unmappble.
*/
static String newStringUTF8NoRepl(byte[] src, int off, int len) {
if (COMPACT_STRINGS && !hasNegatives(src, off, len))
return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
! Result ret = decodeUTF8_0(src, off, len, false);
return new String(ret.value, ret.coder);
}
/*
* Throws iae, instead of replacing, if unmappble.
--- 1019,1094 ----
return dst;
}
return Arrays.copyOf(dst, dp);
}
+ static int encodeUTF8_UTF16(byte[] val, int srcBegin, int srcEnd, ByteBuffer dst)
+ {
+ int sp = srcBegin;
+ int sl = srcEnd;
+ int dr = dst.remaining();
+
+ while (sp < sl) {
+ char c = StringUTF16.getChar(val, sp++);
+ if (c < 0x80) {
+ if (dr < 1)
+ break;
+ dst.put((byte)c);
+ dr--;
+ } else if (c < 0x800) {
+ if (dr < 2)
+ break;
+ dst.put((byte)(0xc0 | (c >> 6)));
+ dst.put((byte)(0x80 | (c & 0x3f)));
+ dr -= 2;
+ } else if (Character.isSurrogate(c)) {
+ int uc = -1;
+ char c2;
+
+ if (sp == sl && sl < (val.length >> 1))
+ return sp - srcBegin;
+
+ if (Character.isHighSurrogate(c) && sp < sl &&
+ Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
+ uc = Character.toCodePoint(c, c2);
+ }
+ if (uc < 0) {
+ if (dr-- <= 0)
+ break;
+ dst.put((byte)'?');
+ } else {
+ if (dr < 4)
+ break;
+ dst.put((byte)(0xf0 | ((uc >> 18))));
+ dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));
+ dst.put((byte)(0x80 | ((uc >> 6) & 0x3f)));
+ dst.put((byte)(0x80 | (uc & 0x3f)));
+ dr -= 4;
+ sp++; // 2 chars
+ }
+ } else {
+ if (dr < 3)
+ break;
+ // 3 bytes, 16 bits
+ dst.put((byte)(0xe0 | ((c >> 12))));
+ dst.put((byte)(0x80 | ((c >> 6) & 0x3f)));
+ dst.put((byte)(0x80 | (c & 0x3f)));
+ dr -= 3;
+ }
+ }
+ return sp - srcBegin;
+ }
+
////////////////////// for j.u.z.ZipCoder //////////////////////////
/*
* Throws iae, instead of replacing, if malformed or unmappble.
*/
static String newStringUTF8NoRepl(byte[] src, int off, int len) {
if (COMPACT_STRINGS && !hasNegatives(src, off, len))
return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
! Result ret = decodeUTF8(src, off, len, false);
return new String(ret.value, ret.coder);
}
/*
* Throws iae, instead of replacing, if unmappble.