--- old/src/java.base/share/classes/java/lang/StringCoding.java 2018-06-25 21:32:55.093196430 -0700 +++ new/src/java.base/share/classes/java/lang/StringCoding.java 2018-06-25 21:32:54.674150898 -0700 @@ -36,6 +36,7 @@ import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.MalformedInputException; import java.nio.charset.UnsupportedCharsetException; import java.util.Arrays; import jdk.internal.HotSpotIntrinsicCandidate; @@ -410,7 +411,7 @@ Charset cs = lookupCharset(csn); if (cs != null) { if (cs == UTF_8) { - return encodeUTF8(coder, val, true); + return encodeUTF8(coder, val, true, false); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); @@ -431,7 +432,7 @@ static byte[] encode(Charset cs, byte coder, byte[] val) { if (cs == UTF_8) { - return encodeUTF8(coder, val, true); + return encodeUTF8(coder, val, true, false); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); @@ -484,7 +485,7 @@ static byte[] encode(byte coder, byte[] val) { Charset cs = Charset.defaultCharset(); if (cs == UTF_8) { - return encodeUTF8(coder, val, true); + return encodeUTF8(coder, val, true, false); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); @@ -589,10 +590,10 @@ } private static byte[] encode8859_1(byte coder, byte[] val) { - return encode8859_1(coder, val, true); + return encode8859_1(coder, val, true, false); } - private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) { + private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace, boolean throwCCE) { if (coder == LATIN1) { return Arrays.copyOf(val, val.length); } @@ -607,7 +608,7 @@ dp = dp + ret; if (ret != len) { if (!doReplace) { - throwMalformed(sp, 1); + throwUnmappable(sp, 1, throwCCE); } char c = StringUTF16.getChar(val, sp++); if (Character.isHighSurrogate(c) && sp < sl && @@ -678,15 +679,35 @@ return -1; } - private static void throwMalformed(int off, int nb) { - throw new IllegalArgumentException("malformed input off : " + off + - ", length : " + nb); + private static void throwMalformed(int off, int nb, boolean throwCCE) { + // keep the existing code path (throwing IAE with a msg) by default + if (!throwCCE) { + throw new IllegalArgumentException("malformed input off : " + off + + ", length : " + nb); + } else { + throw new IllegalArgumentException(new MalformedInputException(nb)); + } } - private static void throwMalformed(byte[] val) { + private static void throwMalformed(byte[] val, boolean throwCCE) { int dp = 0; while (dp < val.length && val[dp] >=0) { dp++; } - throwMalformed(dp, 1); + throwMalformed(dp, 1, throwCCE); + } + + private static void throwUnmappable(int off, int nb, boolean throwCCE) { + // keep the existing code path (throwing IAE with a msg) by default + if (!throwCCE) { + throw new IllegalArgumentException("malformed input off : " + off + + ", length : " + nb); + } else { + throwUnmappable(nb); + } + } + + // Wrap CCE in IAE; May add offset if UnmappableCharacterException can take it in the future + private static void throwUnmappable(int nb) { + throw new IllegalArgumentException(new UnmappableCharacterException(nb)); } private static char repl = '\ufffd'; @@ -696,10 +717,10 @@ if (COMPACT_STRINGS && !hasNegatives(src, sp, len)) return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len), LATIN1); - return decodeUTF8_0(src, sp, len, doReplace); + return decodeUTF8_0(src, sp, len, doReplace, false); } - private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) { + private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace, boolean throwCCE) { Result ret = resultCached.get(); int sl = sp + len; @@ -752,7 +773,7 @@ int b2 = src[sp++]; if (isNotContinuation(b2)) { if (!doReplace) { - throwMalformed(sp - 1, 1); + throwMalformed(sp - 1, 1, throwCCE); } putChar(dst, dp++, repl); sp--; @@ -764,7 +785,7 @@ continue; } if (!doReplace) { - throwMalformed(sp, 1); // underflow() + throwMalformed(sp, 1, throwCCE); // underflow() } putChar(dst, dp++, repl); break; @@ -774,7 +795,7 @@ int b3 = src[sp++]; if (isMalformed3(b1, b2, b3)) { if (!doReplace) { - throwMalformed(sp - 3, 3); + throwMalformed(sp - 3, 3, throwCCE); } putChar(dst, dp++, repl); sp -= 3; @@ -788,7 +809,7 @@ ((byte) 0x80 << 0)))); if (isSurrogate(c)) { if (!doReplace) { - throwMalformed(sp - 3, 3); + throwMalformed(sp - 3, 3, throwCCE); } putChar(dst, dp++, repl); } else { @@ -799,13 +820,13 @@ } if (sp < sl && isMalformed3_2(b1, src[sp])) { if (!doReplace) { - throwMalformed(sp - 1, 2); + throwMalformed(sp - 1, 2, throwCCE); } putChar(dst, dp++, repl); continue; } if (!doReplace){ - throwMalformed(sp, 1); + throwMalformed(sp, 1, throwCCE); } putChar(dst, dp++, repl); break; @@ -825,7 +846,7 @@ if (isMalformed4(b2, b3, b4) || !isSupplementaryCodePoint(uc)) { // shortest form check if (!doReplace) { - throwMalformed(sp - 4, 4); + throwMalformed(sp - 4, 4, throwCCE); } putChar(dst, dp++, repl); sp -= 4; @@ -840,13 +861,13 @@ if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) { if (!doReplace) { - throwMalformed(sp - 1, 1); // or 2 + throwMalformed(sp - 1, 1, throwCCE); // or 2 } putChar(dst, dp++, repl); continue; } if (!doReplace) { - throwMalformed(sp - 1, 1); + throwMalformed(sp - 1, 1, throwCCE); } sp++; putChar(dst, dp++, repl); @@ -856,7 +877,7 @@ break; } else { if (!doReplace) { - throwMalformed(sp - 1, 1); + throwMalformed(sp - 1, 1, throwCCE); } putChar(dst, dp++, repl); } @@ -867,9 +888,9 @@ return ret.with(dst, UTF16); } - private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) { + private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace, boolean throwCCE) { if (coder == UTF16) - return encodeUTF8_UTF16(val, doReplace); + return encodeUTF8_UTF16(val, doReplace, throwCCE); if (!hasNegatives(val, 0, val.length)) return Arrays.copyOf(val, val.length); @@ -890,7 +911,7 @@ return Arrays.copyOf(dst, dp); } - private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) { + private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace, boolean throwCCE) { int dp = 0; int sp = 0; int sl = val.length >> 1; @@ -919,7 +940,7 @@ if (doReplace) { dst[dp++] = '?'; } else { - throwMalformed(sp - 1, 1); // or 2, does not matter here + throwUnmappable(sp - 1, 1, throwCCE); // or 2, does not matter here } } else { dst[dp++] = (byte)(0xf0 | ((uc >> 18))); @@ -949,7 +970,7 @@ static String newStringUTF8NoRepl(byte[] src, int off, int len) { if (COMPACT_STRINGS && !hasNegatives(src, off, len)) return new String(Arrays.copyOfRange(src, off, off + len), LATIN1); - Result ret = decodeUTF8_0(src, off, len, false); + Result ret = decodeUTF8_0(src, off, len, false, false); return new String(ret.value, ret.coder); } @@ -957,7 +978,7 @@ * Throws iae, instead of replacing, if unmappable. */ static byte[] getBytesUTF8NoRepl(String s) { - return encodeUTF8(s.coder(), s.value(), false); + return encodeUTF8(s.coder(), s.value(), false, false); } ////////////////////// for j.n.f.Files ////////////////////////// @@ -976,7 +997,7 @@ if (cs == UTF_8) { if (COMPACT_STRINGS && isASCII(src)) return new String(src, LATIN1); - Result ret = decodeUTF8_0(src, 0, src.length, false); + Result ret = decodeUTF8_0(src, 0, src.length, false, true); return new String(ret.value, ret.coder); } if (cs == ISO_8859_1) { @@ -986,7 +1007,7 @@ if (isASCII(src)) { return newStringLatin1(src); } else { - throwMalformed(src); + throwMalformed(src, true); } } @@ -1032,20 +1053,20 @@ if (isASCII(val)) { return val; } - return encodeUTF8(coder, val, false); + return encodeUTF8(coder, val, false, true); } if (cs == ISO_8859_1) { if (coder == LATIN1) { return val; } - return encode8859_1(coder, val, false); + return encode8859_1(coder, val, false, true); } if (cs == US_ASCII) { if (coder == LATIN1) { if (isASCII(val)) { return val; } else { - throwMalformed(val); + throwUnmappable(1); } } } @@ -1083,7 +1104,7 @@ if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { - throw new Error(x); + throw new IllegalArgumentException(x); } return safeTrim(ba, bb.position(), isTrusted); }