< prev index next >
src/java.base/share/classes/java/lang/StringCoding.java
Print this page
*** 34,43 ****
--- 34,44 ----
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
+ import java.nio.charset.MalformedInputException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import jdk.internal.HotSpotIntrinsicCandidate;
import sun.nio.cs.HistoricallyNamedCharset;
import sun.nio.cs.ArrayDecoder;
*** 408,418 ****
se = null;
try {
Charset cs = lookupCharset(csn);
if (cs != null) {
if (cs == UTF_8) {
! return encodeUTF8(coder, val, true);
}
if (cs == ISO_8859_1) {
return encode8859_1(coder, val);
}
if (cs == US_ASCII) {
--- 409,419 ----
se = null;
try {
Charset cs = lookupCharset(csn);
if (cs != null) {
if (cs == UTF_8) {
! return encodeUTF8(coder, val, true, false);
}
if (cs == ISO_8859_1) {
return encode8859_1(coder, val);
}
if (cs == US_ASCII) {
*** 429,439 ****
return se.encode(coder, val);
}
static byte[] encode(Charset cs, byte coder, byte[] val) {
if (cs == UTF_8) {
! return encodeUTF8(coder, val, true);
}
if (cs == ISO_8859_1) {
return encode8859_1(coder, val);
}
if (cs == US_ASCII) {
--- 430,440 ----
return se.encode(coder, val);
}
static byte[] encode(Charset cs, byte coder, byte[] val) {
if (cs == UTF_8) {
! return encodeUTF8(coder, val, true, false);
}
if (cs == ISO_8859_1) {
return encode8859_1(coder, val);
}
if (cs == US_ASCII) {
*** 482,492 ****
}
static byte[] encode(byte coder, byte[] val) {
Charset cs = Charset.defaultCharset();
if (cs == UTF_8) {
! return encodeUTF8(coder, val, true);
}
if (cs == ISO_8859_1) {
return encode8859_1(coder, val);
}
if (cs == US_ASCII) {
--- 483,493 ----
}
static byte[] encode(byte coder, byte[] val) {
Charset cs = Charset.defaultCharset();
if (cs == UTF_8) {
! return encodeUTF8(coder, val, true, false);
}
if (cs == ISO_8859_1) {
return encode8859_1(coder, val);
}
if (cs == US_ASCII) {
*** 587,600 ****
}
return i;
}
private static byte[] encode8859_1(byte coder, byte[] val) {
! return encode8859_1(coder, val, true);
}
! private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
int len = val.length >> 1;
byte[] dst = new byte[len];
--- 588,601 ----
}
return i;
}
private static byte[] encode8859_1(byte coder, byte[] val) {
! return encode8859_1(coder, val, true, false);
}
! private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace, boolean throwCCE) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
int len = val.length >> 1;
byte[] dst = new byte[len];
*** 605,615 ****
int ret = implEncodeISOArray(val, sp, dst, dp, len);
sp = sp + ret;
dp = dp + ret;
if (ret != len) {
if (!doReplace) {
! throwMalformed(sp, 1);
}
char c = StringUTF16.getChar(val, sp++);
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
sp++;
--- 606,616 ----
int ret = implEncodeISOArray(val, sp, dst, dp, len);
sp = sp + ret;
dp = dp + ret;
if (ret != len) {
if (!doReplace) {
! throwUnmappable(sp, 1, throwCCE);
}
char c = StringUTF16.getChar(val, sp++);
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
sp++;
*** 676,707 ****
}
assert false;
return -1;
}
! private static void throwMalformed(int off, int nb) {
throw new IllegalArgumentException("malformed input off : " + off +
", length : " + nb);
}
! private static void throwMalformed(byte[] val) {
int dp = 0;
while (dp < val.length && val[dp] >=0) { dp++; }
! throwMalformed(dp, 1);
}
private static char repl = '\ufffd';
private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
// ascii-bais, which has a relative impact to the non-ascii-only bytes
if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
LATIN1);
! return decodeUTF8_0(src, sp, len, doReplace);
}
! private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
Result ret = resultCached.get();
int sl = sp + len;
int dp = 0;
byte[] dst = new byte[len];
--- 677,728 ----
}
assert false;
return -1;
}
! private static void throwMalformed(int off, int nb, boolean throwCCE) {
! // keep the existing code path (throwing IAE with a msg) by default
! if (!throwCCE) {
throw new IllegalArgumentException("malformed input off : " + off +
", length : " + nb);
+ } else {
+ throw new IllegalArgumentException(new MalformedInputException(nb));
+ }
}
! private static void throwMalformed(byte[] val, boolean throwCCE) {
int dp = 0;
while (dp < val.length && val[dp] >=0) { dp++; }
! throwMalformed(dp, 1, throwCCE);
! }
!
! private static void throwUnmappable(int off, int nb, boolean throwCCE) {
! // keep the existing code path (throwing IAE with a msg) by default
! if (!throwCCE) {
! throw new IllegalArgumentException("malformed input off : " + off +
! ", length : " + nb);
! } else {
! throwUnmappable(nb);
! }
! }
!
! // Wrap CCE in IAE; May add offset if UnmappableCharacterException can take it in the future
! private static void throwUnmappable(int nb) {
! throw new IllegalArgumentException(new UnmappableCharacterException(nb));
}
private static char repl = '\ufffd';
private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
// ascii-bais, which has a relative impact to the non-ascii-only bytes
if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
LATIN1);
! return decodeUTF8_0(src, sp, len, doReplace, false);
}
! private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace, boolean throwCCE) {
Result ret = resultCached.get();
int sl = sp + len;
int dp = 0;
byte[] dst = new byte[len];
*** 750,760 ****
} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
if (sp < sl) {
int b2 = src[sp++];
if (isNotContinuation(b2)) {
if (!doReplace) {
! throwMalformed(sp - 1, 1);
}
putChar(dst, dp++, repl);
sp--;
} else {
putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
--- 771,781 ----
} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
if (sp < sl) {
int b2 = src[sp++];
if (isNotContinuation(b2)) {
if (!doReplace) {
! throwMalformed(sp - 1, 1, throwCCE);
}
putChar(dst, dp++, repl);
sp--;
} else {
putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
*** 762,782 ****
((byte) 0x80 << 0))));
}
continue;
}
if (!doReplace) {
! throwMalformed(sp, 1); // underflow()
}
putChar(dst, dp++, repl);
break;
} else if ((b1 >> 4) == -2) {
if (sp + 1 < sl) {
int b2 = src[sp++];
int b3 = src[sp++];
if (isMalformed3(b1, b2, b3)) {
if (!doReplace) {
! throwMalformed(sp - 3, 3);
}
putChar(dst, dp++, repl);
sp -= 3;
sp += malformedN(src, sp, 3);
} else {
--- 783,803 ----
((byte) 0x80 << 0))));
}
continue;
}
if (!doReplace) {
! throwMalformed(sp, 1, throwCCE); // underflow()
}
putChar(dst, dp++, repl);
break;
} else if ((b1 >> 4) == -2) {
if (sp + 1 < sl) {
int b2 = src[sp++];
int b3 = src[sp++];
if (isMalformed3(b1, b2, b3)) {
if (!doReplace) {
! throwMalformed(sp - 3, 3, throwCCE);
}
putChar(dst, dp++, repl);
sp -= 3;
sp += malformedN(src, sp, 3);
} else {
*** 786,813 ****
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
if (isSurrogate(c)) {
if (!doReplace) {
! throwMalformed(sp - 3, 3);
}
putChar(dst, dp++, repl);
} else {
putChar(dst, dp++, c);
}
}
continue;
}
if (sp < sl && isMalformed3_2(b1, src[sp])) {
if (!doReplace) {
! throwMalformed(sp - 1, 2);
}
putChar(dst, dp++, repl);
continue;
}
if (!doReplace){
! throwMalformed(sp, 1);
}
putChar(dst, dp++, repl);
break;
} else if ((b1 >> 3) == -2) {
if (sp + 2 < sl) {
--- 807,834 ----
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
if (isSurrogate(c)) {
if (!doReplace) {
! throwMalformed(sp - 3, 3, throwCCE);
}
putChar(dst, dp++, repl);
} else {
putChar(dst, dp++, c);
}
}
continue;
}
if (sp < sl && isMalformed3_2(b1, src[sp])) {
if (!doReplace) {
! throwMalformed(sp - 1, 2, throwCCE);
}
putChar(dst, dp++, repl);
continue;
}
if (!doReplace){
! throwMalformed(sp, 1, throwCCE);
}
putChar(dst, dp++, repl);
break;
} else if ((b1 >> 3) == -2) {
if (sp + 2 < sl) {
*** 823,833 ****
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
if (isMalformed4(b2, b3, b4) ||
!isSupplementaryCodePoint(uc)) { // shortest form check
if (!doReplace) {
! throwMalformed(sp - 4, 4);
}
putChar(dst, dp++, repl);
sp -= 4;
sp += malformedN(src, sp, 4);
} else {
--- 844,854 ----
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
if (isMalformed4(b2, b3, b4) ||
!isSupplementaryCodePoint(uc)) { // shortest form check
if (!doReplace) {
! throwMalformed(sp - 4, 4, throwCCE);
}
putChar(dst, dp++, repl);
sp -= 4;
sp += malformedN(src, sp, 4);
} else {
*** 838,877 ****
}
b1 &= 0xff;
if (b1 > 0xf4 ||
sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
if (!doReplace) {
! throwMalformed(sp - 1, 1); // or 2
}
putChar(dst, dp++, repl);
continue;
}
if (!doReplace) {
! throwMalformed(sp - 1, 1);
}
sp++;
putChar(dst, dp++, repl);
if (sp < sl && isMalformed4_3(src[sp])) {
continue;
}
break;
} else {
if (!doReplace) {
! throwMalformed(sp - 1, 1);
}
putChar(dst, dp++, repl);
}
}
if (dp != len) {
dst = Arrays.copyOf(dst, dp << 1);
}
return ret.with(dst, UTF16);
}
! private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
if (coder == UTF16)
! return encodeUTF8_UTF16(val, doReplace);
if (!hasNegatives(val, 0, val.length))
return Arrays.copyOf(val, val.length);
int dp = 0;
--- 859,898 ----
}
b1 &= 0xff;
if (b1 > 0xf4 ||
sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
if (!doReplace) {
! throwMalformed(sp - 1, 1, throwCCE); // or 2
}
putChar(dst, dp++, repl);
continue;
}
if (!doReplace) {
! throwMalformed(sp - 1, 1, throwCCE);
}
sp++;
putChar(dst, dp++, repl);
if (sp < sl && isMalformed4_3(src[sp])) {
continue;
}
break;
} else {
if (!doReplace) {
! throwMalformed(sp - 1, 1, throwCCE);
}
putChar(dst, dp++, repl);
}
}
if (dp != len) {
dst = Arrays.copyOf(dst, dp << 1);
}
return ret.with(dst, UTF16);
}
! private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace, boolean throwCCE) {
if (coder == UTF16)
! return encodeUTF8_UTF16(val, doReplace, throwCCE);
if (!hasNegatives(val, 0, val.length))
return Arrays.copyOf(val, val.length);
int dp = 0;
*** 888,898 ****
if (dp == dst.length)
return dst;
return Arrays.copyOf(dst, dp);
}
! private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
char c;
--- 909,919 ----
if (dp == dst.length)
return dst;
return Arrays.copyOf(dst, dp);
}
! private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace, boolean throwCCE) {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
char c;
*** 917,927 ****
}
if (uc < 0) {
if (doReplace) {
dst[dp++] = '?';
} else {
! throwMalformed(sp - 1, 1); // or 2, does not matter here
}
} else {
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
--- 938,948 ----
}
if (uc < 0) {
if (doReplace) {
dst[dp++] = '?';
} else {
! throwUnmappable(sp - 1, 1, throwCCE); // or 2, does not matter here
}
} else {
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
*** 947,965 ****
* Throws iae, instead of replacing, if malformed or unmappable.
*/
static String newStringUTF8NoRepl(byte[] src, int off, int len) {
if (COMPACT_STRINGS && !hasNegatives(src, off, len))
return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
! Result ret = decodeUTF8_0(src, off, len, false);
return new String(ret.value, ret.coder);
}
/*
* Throws iae, instead of replacing, if unmappable.
*/
static byte[] getBytesUTF8NoRepl(String s) {
! return encodeUTF8(s.coder(), s.value(), false);
}
////////////////////// for j.n.f.Files //////////////////////////
private static boolean isASCII(byte[] src) {
--- 968,986 ----
* Throws iae, instead of replacing, if malformed or unmappable.
*/
static String newStringUTF8NoRepl(byte[] src, int off, int len) {
if (COMPACT_STRINGS && !hasNegatives(src, off, len))
return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
! Result ret = decodeUTF8_0(src, off, len, false, false);
return new String(ret.value, ret.coder);
}
/*
* Throws iae, instead of replacing, if unmappable.
*/
static byte[] getBytesUTF8NoRepl(String s) {
! return encodeUTF8(s.coder(), s.value(), false, false);
}
////////////////////// for j.n.f.Files //////////////////////////
private static boolean isASCII(byte[] src) {
*** 974,994 ****
static String newStringNoRepl(byte[] src, Charset cs) {
if (cs == UTF_8) {
if (COMPACT_STRINGS && isASCII(src))
return new String(src, LATIN1);
! Result ret = decodeUTF8_0(src, 0, src.length, false);
return new String(ret.value, ret.coder);
}
if (cs == ISO_8859_1) {
return newStringLatin1(src);
}
if (cs == US_ASCII) {
if (isASCII(src)) {
return newStringLatin1(src);
} else {
! throwMalformed(src);
}
}
CharsetDecoder cd = cs.newDecoder();
// ascii fastpath
--- 995,1015 ----
static String newStringNoRepl(byte[] src, Charset cs) {
if (cs == UTF_8) {
if (COMPACT_STRINGS && isASCII(src))
return new String(src, LATIN1);
! Result ret = decodeUTF8_0(src, 0, src.length, false, true);
return new String(ret.value, ret.coder);
}
if (cs == ISO_8859_1) {
return newStringLatin1(src);
}
if (cs == US_ASCII) {
if (isASCII(src)) {
return newStringLatin1(src);
} else {
! throwMalformed(src, true);
}
}
CharsetDecoder cd = cs.newDecoder();
// ascii fastpath
*** 1030,1053 ****
byte coder = s.coder();
if (cs == UTF_8) {
if (isASCII(val)) {
return val;
}
! return encodeUTF8(coder, val, false);
}
if (cs == ISO_8859_1) {
if (coder == LATIN1) {
return val;
}
! return encode8859_1(coder, val, false);
}
if (cs == US_ASCII) {
if (coder == LATIN1) {
if (isASCII(val)) {
return val;
} else {
! throwMalformed(val);
}
}
}
CharsetEncoder ce = cs.newEncoder();
// fastpath for ascii compatible
--- 1051,1074 ----
byte coder = s.coder();
if (cs == UTF_8) {
if (isASCII(val)) {
return val;
}
! return encodeUTF8(coder, val, false, true);
}
if (cs == ISO_8859_1) {
if (coder == LATIN1) {
return val;
}
! return encode8859_1(coder, val, false, true);
}
if (cs == US_ASCII) {
if (coder == LATIN1) {
if (isASCII(val)) {
return val;
} else {
! throwUnmappable(1);
}
}
}
CharsetEncoder ce = cs.newEncoder();
// fastpath for ascii compatible
*** 1081,1090 ****
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
! throw new Error(x);
}
return safeTrim(ba, bb.position(), isTrusted);
}
}
--- 1102,1111 ----
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
! throw new IllegalArgumentException(x);
}
return safeTrim(ba, bb.position(), isTrusted);
}
}
< prev index next >