src/java.base/share/classes/java/lang/StringCoding.java
Print this page
*** 1,7 ****
/*
! * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
--- 1,7 ----
/*
! * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
*** 45,54 ****
--- 45,59 ----
import sun.nio.cs.StandardCharsets;
import static java.lang.String.LATIN1;
import static java.lang.String.UTF16;
import static java.lang.String.COMPACT_STRINGS;
+ import static java.lang.Character.isSurrogate;
+ import static java.lang.Character.highSurrogate;
+ import static java.lang.Character.lowSurrogate;
+ import static java.lang.Character.isSupplementaryCodePoint;
+ import static java.lang.StringUTF16.putChar;
/**
* Utility class for string encoding and decoding.
*/
*** 64,75 ****
private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
- private static boolean warnUnsupportedCharset = true;
-
private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
SoftReference<T> sr = tl.get();
if (sr == null)
return null;
return sr.get();
--- 69,78 ----
*** 78,88 ****
private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
tl.set(new SoftReference<>(ob));
}
// Trim the given byte array to the given length
- //
private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
return ba;
else
return Arrays.copyOf(ba, len);
--- 81,90 ----
*** 103,123 ****
}
}
return null;
}
- private static void warnUnsupportedCharset(String csn) {
- if (warnUnsupportedCharset) {
- // Use err(String) rather than the Logging API or System.err
- // since this method may be called during VM initialization
- // before either is available.
- err("WARNING: Default charset " + csn +
- " not supported, using ISO-8859-1 instead\n");
- warnUnsupportedCharset = false;
- }
- }
-
static class Result {
byte[] value;
byte coder;
Result with() {
--- 105,114 ----
*** 222,244 ****
}
return result.with(ca, 0, cb.position());
}
}
- private static class StringDecoder8859_1 extends StringDecoder {
- StringDecoder8859_1(Charset cs, String rcn) {
- super(cs, rcn);
- }
- Result decode(byte[] ba, int off, int len) {
- if (COMPACT_STRINGS) {
- return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
- } else {
- return result.with(StringLatin1.inflate(ba, off, len), UTF16);
- }
- }
- }
-
static Result decode(String charsetName, byte[] ba, int off, int len)
throws UnsupportedEncodingException
{
StringDecoder sd = deref(decoder);
String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
--- 213,222 ----
*** 247,272 ****
sd = null;
try {
Charset cs = lookupCharset(csn);
if (cs != null) {
if (cs == UTF_8) {
! sd = new StringDecoderUTF8(cs, csn);
! } else if (cs == ISO_8859_1) {
! sd = new StringDecoder8859_1(cs, csn);
! } else {
! sd = new StringDecoder(cs, csn);
}
}
} catch (IllegalCharsetNameException x) {}
if (sd == null)
throw new UnsupportedEncodingException(csn);
set(decoder, sd);
}
return sd.decode(ba, off, len);
}
static Result decode(Charset cs, byte[] ba, int off, int len) {
// (1)We never cache the "external" cs, the only benefit of creating
// an additional StringDe/Encoder object to wrap it is to share the
// de/encode() method. These SD/E objects are short-lived, the young-gen
// gc should be able to take care of them well. But the best approach
// is still not to generate them if not really necessary.
--- 225,263 ----
sd = null;
try {
Charset cs = lookupCharset(csn);
if (cs != null) {
if (cs == UTF_8) {
! return decodeUTF8(ba, off, len, true);
! }
! if (cs == ISO_8859_1) {
! return decodeLatin1(ba, off, len);
! }
! if (cs == US_ASCII) {
! return decodeASCII(ba, off, len);
}
+ sd = new StringDecoder(cs, csn);
}
} catch (IllegalCharsetNameException x) {}
if (sd == null)
throw new UnsupportedEncodingException(csn);
set(decoder, sd);
}
return sd.decode(ba, off, len);
}
static Result decode(Charset cs, byte[] ba, int off, int len) {
+ if (cs == UTF_8) {
+ return decodeUTF8(ba, off, len, true);
+ }
+ if (cs == ISO_8859_1) {
+ return decodeLatin1(ba, off, len);
+ }
+ if (cs == US_ASCII) {
+ return decodeASCII(ba, off, len);
+ }
+
// (1)We never cache the "external" cs, the only benefit of creating
// an additional StringDe/Encoder object to wrap it is to share the
// de/encode() method. These SD/E objects are short-lived, the young-gen
// gc should be able to take care of them well. But the best approach
// is still not to generate them if not really necessary.
*** 278,320 ****
// possible that the SM==null for now but then SM is NOT null later
// when safeTrim() is invoked...the "safe" way to do is to redundant
// check (... && (isTrusted || SM == null || getClassLoader0())) in trim
// but it then can be argued that the SM is null when the operation
// is started...
- if (cs == UTF_8) {
- return StringDecoderUTF8.decode(ba, off, len, new Result());
- }
CharsetDecoder cd = cs.newDecoder();
// ascii fastpath
! if (cs == ISO_8859_1 || ((cd instanceof ArrayDecoder) &&
! ((ArrayDecoder)cd).isASCIICompatible() &&
! !hasNegatives(ba, off, len))) {
! if (COMPACT_STRINGS) {
! return new Result().with(Arrays.copyOfRange(ba, off, off + len),
! LATIN1);
! } else {
! return new Result().with(StringLatin1.inflate(ba, off, len), UTF16);
! }
}
int en = scale(len, cd.maxCharsPerByte());
if (len == 0) {
return new Result().with();
}
- if (cs.getClass().getClassLoader0() != null &&
- System.getSecurityManager() != null) {
- ba = Arrays.copyOfRange(ba, off, off + len);
- off = 0;
- }
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.reset();
-
char[] ca = new char[en];
if (cd instanceof ArrayDecoder) {
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
return new Result().with(ca, 0, clen);
}
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
--- 269,301 ----
// possible that the SM==null for now but then SM is NOT null later
// when safeTrim() is invoked...the "safe" way to do is to redundant
// check (... && (isTrusted || SM == null || getClassLoader0())) in trim
// but it then can be argued that the SM is null when the operation
// is started...
CharsetDecoder cd = cs.newDecoder();
// ascii fastpath
! if ((cd instanceof ArrayDecoder) &&
! ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
! return decodeLatin1(ba, off, len);
}
int en = scale(len, cd.maxCharsPerByte());
if (len == 0) {
return new Result().with();
}
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.reset();
char[] ca = new char[en];
if (cd instanceof ArrayDecoder) {
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
return new Result().with(ca, 0, clen);
}
+ if (cs.getClass().getClassLoader0() != null &&
+ System.getSecurityManager() != null) {
+ ba = Arrays.copyOfRange(ba, off, off + len);
+ off = 0;
+ }
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
*** 329,356 ****
}
return new Result().with(ca, 0, cb.position());
}
static Result decode(byte[] ba, int off, int len) {
! String csn = Charset.defaultCharset().name();
! try {
! // use charset name decode() variant which provides caching.
! return decode(csn, ba, off, len);
! } catch (UnsupportedEncodingException x) {
! warnUnsupportedCharset(csn);
}
! try {
! return decode("ISO-8859-1", ba, off, len);
! } catch (UnsupportedEncodingException x) {
! // If this code is hit during VM initialization, err(String) is
! // the only way we will be able to get any kind of error message.
! err("ISO-8859-1 charset not available: " + x.toString() + "\n");
! // If we can not find ISO-8859-1 (a required encoding) then things
! // are seriously wrong with the installation.
! System.exit(1);
! return null;
}
}
// -- Encoding --
private static class StringEncoder {
private Charset cs;
--- 310,335 ----
}
return new Result().with(ca, 0, cb.position());
}
static Result decode(byte[] ba, int off, int len) {
! Charset cs = Charset.defaultCharset();
! if (cs == UTF_8) {
! return decodeUTF8(ba, off, len, true);
}
! if (cs == ISO_8859_1) {
! return decodeLatin1(ba, off, len);
! }
! if (cs == US_ASCII) {
! return decodeASCII(ba, off, len);
! }
! StringDecoder sd = deref(decoder);
! if (sd == null || !cs.name().equals(sd.cs.name())) {
! sd = new StringDecoder(cs, cs.name());
! set(decoder, sd);
}
+ return sd.decode(ba, off, len);
}
// -- Encoding --
private static class StringEncoder {
private Charset cs;
*** 391,403 ****
byte[] ba = new byte[en];
if (len == 0) {
return ba;
}
if (ce instanceof ArrayEncoder) {
- if (!isTrusted) {
- val = Arrays.copyOf(val, val.length);
- }
int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
: ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
if (blen != -1) {
return safeTrim(ba, blen, isTrusted);
}
--- 370,379 ----
*** 421,430 ****
--- 397,583 ----
}
return safeTrim(ba, bb.position(), isTrusted);
}
}
+ static byte[] encode(String charsetName, byte coder, byte[] val)
+ throws UnsupportedEncodingException
+ {
+ StringEncoder se = deref(encoder);
+ String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
+ if ((se == null) || !(csn.equals(se.requestedCharsetName())
+ || csn.equals(se.charsetName()))) {
+ se = null;
+ try {
+ Charset cs = lookupCharset(csn);
+ if (cs != null) {
+ if (cs == UTF_8) {
+ return encodeUTF8(coder, val, true);
+ }
+ if (cs == ISO_8859_1) {
+ return encode8859_1(coder, val);
+ }
+ if (cs == US_ASCII) {
+ return encodeASCII(coder, val);
+ }
+ se = new StringEncoder(cs, csn);
+ }
+ } catch (IllegalCharsetNameException x) {}
+ if (se == null) {
+ throw new UnsupportedEncodingException (csn);
+ }
+ set(encoder, se);
+ }
+ return se.encode(coder, val);
+ }
+
+ static byte[] encode(Charset cs, byte coder, byte[] val) {
+ if (cs == UTF_8) {
+ return encodeUTF8(coder, val, true);
+ }
+ if (cs == ISO_8859_1) {
+ return encode8859_1(coder, val);
+ }
+ if (cs == US_ASCII) {
+ return encodeASCII(coder, val);
+ }
+ CharsetEncoder ce = cs.newEncoder();
+ // fastpath for ascii compatible
+ if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
+ ((ArrayEncoder)ce).isASCIICompatible() &&
+ !hasNegatives(val, 0, val.length)))) {
+ return Arrays.copyOf(val, val.length);
+ }
+ int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
+ int en = scale(len, ce.maxBytesPerChar());
+ byte[] ba = new byte[en];
+ if (len == 0) {
+ return ba;
+ }
+ ce.onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .reset();
+ if (ce instanceof ArrayEncoder) {
+ int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
+ : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
+ if (blen != -1) {
+ return safeTrim(ba, blen, true);
+ }
+ }
+ boolean isTrusted = cs.getClass().getClassLoader0() == null ||
+ System.getSecurityManager() == null;
+ char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
+ : StringUTF16.toChars(val);
+ ByteBuffer bb = ByteBuffer.wrap(ba);
+ CharBuffer cb = CharBuffer.wrap(ca, 0, len);
+ try {
+ CoderResult cr = ce.encode(cb, bb, true);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ cr = ce.flush(bb);
+ if (!cr.isUnderflow())
+ cr.throwException();
+ } catch (CharacterCodingException x) {
+ throw new Error(x);
+ }
+ return safeTrim(ba, bb.position(), isTrusted);
+ }
+
+ static byte[] encode(byte coder, byte[] val) {
+ Charset cs = Charset.defaultCharset();
+ if (cs == UTF_8) {
+ return encodeUTF8(coder, val, true);
+ }
+ if (cs == ISO_8859_1) {
+ return encode8859_1(coder, val);
+ }
+ if (cs == US_ASCII) {
+ return encodeASCII(coder, val);
+ }
+ StringEncoder se = deref(encoder);
+ if (se == null || !cs.name().equals(se.cs.name())) {
+ se = new StringEncoder(cs, cs.name());
+ set(encoder, se);
+ }
+ return se.encode(coder, val);
+ }
+
+ /**
+ * Print a message directly to stderr, bypassing all character conversion
+ * methods.
+ * @param msg message to print
+ */
+ private static native void err(String msg);
+
+ /* The cached Result for each thread */
+ private static final ThreadLocal<StringCoding.Result>
+ resultCached = new ThreadLocal<>() {
+ protected StringCoding.Result initialValue() {
+ return new StringCoding.Result();
+ }};
+
+ ////////////////////////// ascii //////////////////////////////
+
+ private static Result decodeASCII(byte[] ba, int off, int len) {
+ Result result = resultCached.get();
+ if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
+ return result.with(Arrays.copyOfRange(ba, off, off + len),
+ LATIN1);
+ }
+ byte[] dst = new byte[len<<1];
+ int dp = 0;
+ while (dp < len) {
+ int b = ba[off++];
+ putChar(dst, dp++, (b >= 0) ? (char)b : repl);
+ }
+ return result.with(dst, UTF16);
+ }
+
+ private static byte[] encodeASCII(byte coder, byte[] val) {
+ if (coder == LATIN1) {
+ byte[] dst = new byte[val.length];
+ for (int i = 0; i < val.length; i++) {
+ if (val[i] < 0) {
+ dst[i] = '?';
+ } else {
+ dst[i] = val[i];
+ }
+ }
+ return dst;
+ }
+ int len = val.length >> 1;
+ byte[] dst = new byte[len];
+ int dp = 0;
+ for (int i = 0; i < len; i++) {
+ char c = StringUTF16.getChar(val, i);
+ if (c < 0x80) {
+ dst[dp++] = (byte)c;
+ continue;
+ }
+ if (Character.isHighSurrogate(c) && i + 1 < len &&
+ Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
+ i++;
+ }
+ dst[dp++] = '?';
+ }
+ if (len == dp) {
+ return dst;
+ }
+ return Arrays.copyOf(dst, dp);
+ }
+
+ ////////////////////////// latin1/8859_1 ///////////////////////////
+
+ private static Result decodeLatin1(byte[] ba, int off, int len) {
+ Result result = resultCached.get();
+ if (COMPACT_STRINGS) {
+ return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
+ } else {
+ return result.with(StringLatin1.inflate(ba, off, len), UTF16);
+ }
+ }
+
@HotSpotIntrinsicCandidate
private static int implEncodeISOArray(byte[] sa, int sp,
byte[] da, int dp, int len) {
int i = 0;
for (; i < len; i++) {
*** 434,444 ****
da[dp++] = (byte)c;
}
return i;
}
! static byte[] encode8859_1(byte coder, byte[] val) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
int len = val.length >> 1;
byte[] dst = new byte[len];
--- 587,597 ----
da[dp++] = (byte)c;
}
return i;
}
! private static byte[] encode8859_1(byte coder, byte[] val) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
int len = val.length >> 1;
byte[] dst = new byte[len];
*** 463,523 ****
return dst;
}
return Arrays.copyOf(dst, dp);
}
! static byte[] encodeASCII(byte coder, byte[] val) {
! if (coder == LATIN1) {
! byte[] dst = new byte[val.length];
! for (int i = 0; i < val.length; i++) {
! if (val[i] < 0) {
! dst[i] = '?';
! } else {
! dst[i] = val[i];
}
}
! return dst;
}
! int len = val.length >> 1;
! byte[] dst = new byte[len];
int dp = 0;
! for (int i = 0; i < len; i++) {
! char c = StringUTF16.getChar(val, i);
! if (c < 0x80) {
! dst[dp++] = (byte)c;
continue;
}
! if (Character.isHighSurrogate(c) && i + 1 < len &&
! Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
! i++;
}
- dst[dp++] = '?';
}
! if (len == dp) {
! return dst;
}
! return Arrays.copyOf(dst, dp);
}
- static byte[] encodeUTF8(byte coder, byte[] val) {
int dp = 0;
! byte[] dst;
! if (coder == LATIN1) {
! dst = new byte[val.length << 1];
for (int sp = 0; sp < val.length; sp++) {
byte c = val[sp];
if (c < 0) {
dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
} else {
dst[dp++] = c;
}
}
! } else {
int sp = 0;
int sl = val.length >> 1;
! dst = new byte[sl * 3];
char c;
while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
// ascii fast loop;
dst[dp++] = (byte)c;
sp++;
--- 616,890 ----
return dst;
}
return Arrays.copyOf(dst, dp);
}
! //////////////////////////////// utf8 ////////////////////////////////////
!
! private static boolean isNotContinuation(int b) {
! return (b & 0xc0) != 0x80;
}
+
+ private static boolean isMalformed3(int b1, int b2, int b3) {
+ return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+ (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
}
!
! private static boolean isMalformed3_2(int b1, int b2) {
! return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
! (b2 & 0xc0) != 0x80;
}
!
! private static boolean isMalformed4(int b2, int b3, int b4) {
! return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
! (b4 & 0xc0) != 0x80;
! }
!
! private static boolean isMalformed4_2(int b1, int b2) {
! return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
! (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
! (b2 & 0xc0) != 0x80;
! }
!
! private static boolean isMalformed4_3(int b3) {
! return (b3 & 0xc0) != 0x80;
! }
!
! // for nb == 3/4
! private static int malformedN(byte[] src, int sp, int nb) {
! if (nb == 3) {
! int b1 = src[sp++];
! int b2 = src[sp++]; // no need to lookup b3
! return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
! isNotContinuation(b2)) ? 1 : 2;
! } else if (nb == 4) { // we don't care the speed here
! int b1 = src[sp++] & 0xff;
! int b2 = src[sp++] & 0xff;
! if (b1 > 0xf4 ||
! (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
! (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
! isNotContinuation(b2))
! return 1;
! if (isNotContinuation(src[sp++]))
! return 2;
! return 3;
! }
! assert false;
! return -1;
! }
!
! private static void throwMalformed(int off, int nb) {
! throw new IllegalArgumentException("malformed input off : " + off +
! ", length : " + nb);
! }
!
! private static char repl = '\ufffd';
!
! private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
! // ascii-bais, which has a relative impact to the non-ascii-only bytes
! if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
! return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
! LATIN1);
! return decodeUTF8_0(src, sp, len, doReplace);
! }
!
! private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
! Result ret = resultCached.get();
!
! int sl = sp + len;
int dp = 0;
! byte[] dst = new byte[len];
!
! if (COMPACT_STRINGS) {
! while (sp < sl) {
! int b1 = src[sp];
! if (b1 >= 0) {
! dst[dp++] = (byte)b1;
! sp++;
continue;
}
! if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
! sp + 1 < sl) {
! int b2 = src[sp + 1];
! if (!isNotContinuation(b2)) {
! dst[dp++] = (byte)(((b1 << 6) ^ b2)^
! (((byte) 0xC0 << 6) ^
! ((byte) 0x80 << 0)));
! sp += 2;
! continue;
}
}
! // anything not a latin1, including the repl
! // we have to go with the utf16
! break;
}
! if (sp == sl) {
! if (dp != dst.length) {
! dst = Arrays.copyOf(dst, dp);
! }
! return ret.with(dst, LATIN1);
! }
! }
! if (dp == 0) {
! dst = new byte[len << 1];
! } else {
! byte[] buf = new byte[len << 1];
! StringLatin1.inflate(dst, 0, buf, 0, dp);
! dst = buf;
! }
! while (sp < sl) {
! int b1 = src[sp++];
! if (b1 >= 0) {
! putChar(dst, dp++, (char) b1);
! } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
! if (sp < sl) {
! int b2 = src[sp++];
! if (isNotContinuation(b2)) {
! if (!doReplace) {
! throwMalformed(sp - 1, 1);
! }
! putChar(dst, dp++, repl);
! sp--;
! } else {
! putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
! (((byte) 0xC0 << 6) ^
! ((byte) 0x80 << 0))));
! }
! continue;
! }
! if (!doReplace) {
! throwMalformed(sp, 1); // underflow()
! }
! putChar(dst, dp++, repl);
! break;
! } else if ((b1 >> 4) == -2) {
! if (sp + 1 < sl) {
! int b2 = src[sp++];
! int b3 = src[sp++];
! if (isMalformed3(b1, b2, b3)) {
! if (!doReplace) {
! throwMalformed(sp - 3, 3);
! }
! putChar(dst, dp++, repl);
! sp -= 3;
! sp += malformedN(src, sp, 3);
! } else {
! char c = (char)((b1 << 12) ^
! (b2 << 6) ^
! (b3 ^
! (((byte) 0xE0 << 12) ^
! ((byte) 0x80 << 6) ^
! ((byte) 0x80 << 0))));
! if (isSurrogate(c)) {
! if (!doReplace) {
! throwMalformed(sp - 3, 3);
! }
! putChar(dst, dp++, repl);
! } else {
! putChar(dst, dp++, c);
! }
! }
! continue;
! }
! if (sp < sl && isMalformed3_2(b1, src[sp])) {
! if (!doReplace) {
! throwMalformed(sp - 1, 2);
! }
! putChar(dst, dp++, repl);
! continue;
}
+ if (!doReplace){
+ throwMalformed(sp, 1);
+ }
+ putChar(dst, dp++, repl);
+ break;
+ } else if ((b1 >> 3) == -2) {
+ if (sp + 2 < sl) {
+ int b2 = src[sp++];
+ int b3 = src[sp++];
+ int b4 = src[sp++];
+ int uc = ((b1 << 18) ^
+ (b2 << 12) ^
+ (b3 << 6) ^
+ (b4 ^
+ (((byte) 0xF0 << 18) ^
+ ((byte) 0x80 << 12) ^
+ ((byte) 0x80 << 6) ^
+ ((byte) 0x80 << 0))));
+ if (isMalformed4(b2, b3, b4) ||
+ !isSupplementaryCodePoint(uc)) { // shortest form check
+ if (!doReplace) {
+ throwMalformed(sp - 4, 4);
+ }
+ putChar(dst, dp++, repl);
+ sp -= 4;
+ sp += malformedN(src, sp, 4);
+ } else {
+ putChar(dst, dp++, highSurrogate(uc));
+ putChar(dst, dp++, lowSurrogate(uc));
+ }
+ continue;
+ }
+ b1 &= 0xff;
+ if (b1 > 0xf4 ||
+ sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
+ if (!doReplace) {
+ throwMalformed(sp - 1, 1); // or 2
+ }
+ putChar(dst, dp++, repl);
+ continue;
+ }
+ if (!doReplace) {
+ throwMalformed(sp - 1, 1);
+ }
+ sp++;
+ putChar(dst, dp++, repl);
+ if (sp < sl && isMalformed4_3(src[sp])) {
+ continue;
+ }
+ break;
+ } else {
+ if (!doReplace) {
+ throwMalformed(sp - 1, 1);
+ }
+ putChar(dst, dp++, repl);
+ }
+ }
+ if (dp != len) {
+ dst = Arrays.copyOf(dst, dp << 1);
+ }
+ return ret.with(dst, UTF16);
+ }
+
+ private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
+ if (coder == UTF16)
+ return encodeUTF8_UTF16(val, doReplace);
+
+ if (!hasNegatives(val, 0, val.length))
+ return Arrays.copyOf(val, val.length);
int dp = 0;
! byte[] dst = new byte[val.length << 1];
for (int sp = 0; sp < val.length; sp++) {
byte c = val[sp];
if (c < 0) {
dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
} else {
dst[dp++] = c;
}
}
! if (dp == dst.length)
! return dst;
! return Arrays.copyOf(dst, dp);
! }
!
! private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
! int dp = 0;
int sp = 0;
int sl = val.length >> 1;
! byte[] dst = new byte[sl * 3];
char c;
while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
// ascii fast loop;
dst[dp++] = (byte)c;
sp++;
*** 535,546 ****
--- 902,917 ----
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
+ if (doReplace) {
dst[dp++] = '?';
} else {
+ throwMalformed(sp - 1, 1); // or 2, does not matter here
+ }
+ } else {
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (uc & 0x3f));
sp++; // 2 chars
*** 550,671 ****
dst[dp++] = (byte)(0xe0 | ((c >> 12)));
dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
}
}
- }
if (dp == dst.length) {
return dst;
}
return Arrays.copyOf(dst, dp);
}
! static byte[] encode(String charsetName, byte coder, byte[] val)
! throws UnsupportedEncodingException
! {
! StringEncoder se = deref(encoder);
! String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
! if ((se == null) || !(csn.equals(se.requestedCharsetName())
! || csn.equals(se.charsetName()))) {
! se = null;
! try {
! Charset cs = lookupCharset(csn);
! if (cs != null) {
! if (cs == UTF_8) {
! return encodeUTF8(coder, val);
! } else if (cs == ISO_8859_1) {
! return encode8859_1(coder, val);
! } else if (cs == US_ASCII) {
! return encodeASCII(coder, val);
! }
! se = new StringEncoder(cs, csn);
! }
! } catch (IllegalCharsetNameException x) {}
! if (se == null) {
! throw new UnsupportedEncodingException (csn);
! }
! set(encoder, se);
! }
! return se.encode(coder, val);
! }
! static byte[] encode(Charset cs, byte coder, byte[] val) {
! if (cs == UTF_8) {
! return encodeUTF8(coder, val);
! } else if (cs == ISO_8859_1) {
! return encode8859_1(coder, val);
! } else if (cs == US_ASCII) {
! return encodeASCII(coder, val);
! }
! CharsetEncoder ce = cs.newEncoder();
! // fastpath for ascii compatible
! if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
! ((ArrayEncoder)ce).isASCIICompatible() &&
! !hasNegatives(val, 0, val.length)))) {
! return Arrays.copyOf(val, val.length);
! }
! int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
! int en = scale(len, ce.maxBytesPerChar());
! byte[] ba = new byte[en];
! if (len == 0) {
! return ba;
! }
! boolean isTrusted = cs.getClass().getClassLoader0() == null ||
! System.getSecurityManager() == null;
! ce.onMalformedInput(CodingErrorAction.REPLACE)
! .onUnmappableCharacter(CodingErrorAction.REPLACE)
! .reset();
! if (ce instanceof ArrayEncoder) {
! if (!isTrusted) {
! val = Arrays.copyOf(val, val.length);
! }
! int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
! : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
! if (blen != -1) {
! return safeTrim(ba, blen, isTrusted);
! }
! }
! char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
! : StringUTF16.toChars(val);
! ByteBuffer bb = ByteBuffer.wrap(ba);
! CharBuffer cb = CharBuffer.wrap(ca, 0, len);
! try {
! CoderResult cr = ce.encode(cb, bb, true);
! if (!cr.isUnderflow())
! cr.throwException();
! cr = ce.flush(bb);
! if (!cr.isUnderflow())
! cr.throwException();
! } catch (CharacterCodingException x) {
! throw new Error(x);
! }
! return safeTrim(ba, bb.position(), isTrusted);
! }
!
! static byte[] encode(byte coder, byte[] val) {
! String csn = Charset.defaultCharset().name();
! try {
! // use charset name encode() variant which provides caching.
! return encode(csn, coder, val);
! } catch (UnsupportedEncodingException x) {
! warnUnsupportedCharset(csn);
! }
! try {
! return encode("ISO-8859-1", coder, val);
! } catch (UnsupportedEncodingException x) {
! // If this code is hit during VM initialization, err(String) is
! // the only way we will be able to get any kind of error message.
! err("ISO-8859-1 charset not available: " + x.toString() + "\n");
! // If we can not find ISO-8859-1 (a required encoding) then things
! // are seriously wrong with the installation.
! System.exit(1);
! return null;
! }
}
! /**
! * Print a message directly to stderr, bypassing all character conversion
! * methods.
! * @param msg message to print
*/
! private static native void err(String msg);
}
--- 921,950 ----
dst[dp++] = (byte)(0xe0 | ((c >> 12)));
dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
}
}
if (dp == dst.length) {
return dst;
}
return Arrays.copyOf(dst, dp);
}
! ////////////////////// for j.u.z.ZipCoder //////////////////////////
! /*
! * Throws iae, instead of replacing, if malformed or unmappble.
! */
! static String newStringUTF8NoRepl(byte[] src, int off, int len) {
! if (COMPACT_STRINGS && !hasNegatives(src, off, len))
! return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
! Result ret = decodeUTF8_0(src, off, len, false);
! return new String(ret.value, ret.coder);
}
! /*
! * Throws iae, instead of replacing, if unmappble.
*/
! static byte[] getBytesUTF8NoRepl(String s) {
! return encodeUTF8(s.coder(), s.value(), false);
! }
}