< prev index next >

src/java.base/share/classes/java/lang/StringCoding.java

Print this page

        

*** 34,43 **** --- 34,44 ---- import java.nio.charset.CharsetEncoder; import java.nio.charset.CharacterCodingException; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import java.nio.charset.IllegalCharsetNameException; + import java.nio.charset.MalformedInputException; import java.nio.charset.UnsupportedCharsetException; import java.util.Arrays; import jdk.internal.HotSpotIntrinsicCandidate; import sun.nio.cs.HistoricallyNamedCharset; import sun.nio.cs.ArrayDecoder;
*** 408,418 **** se = null; try { Charset cs = lookupCharset(csn); if (cs != null) { if (cs == UTF_8) { ! return encodeUTF8(coder, val, true); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); } if (cs == US_ASCII) { --- 409,419 ---- se = null; try { Charset cs = lookupCharset(csn); if (cs != null) { if (cs == UTF_8) { ! return encodeUTF8(coder, val, true, false); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); } if (cs == US_ASCII) {
*** 429,439 **** return se.encode(coder, val); } static byte[] encode(Charset cs, byte coder, byte[] val) { if (cs == UTF_8) { ! return encodeUTF8(coder, val, true); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); } if (cs == US_ASCII) { --- 430,440 ---- return se.encode(coder, val); } static byte[] encode(Charset cs, byte coder, byte[] val) { if (cs == UTF_8) { ! return encodeUTF8(coder, val, true, false); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); } if (cs == US_ASCII) {
*** 482,492 **** } static byte[] encode(byte coder, byte[] val) { Charset cs = Charset.defaultCharset(); if (cs == UTF_8) { ! return encodeUTF8(coder, val, true); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); } if (cs == US_ASCII) { --- 483,493 ---- } static byte[] encode(byte coder, byte[] val) { Charset cs = Charset.defaultCharset(); if (cs == UTF_8) { ! return encodeUTF8(coder, val, true, false); } if (cs == ISO_8859_1) { return encode8859_1(coder, val); } if (cs == US_ASCII) {
*** 587,600 **** } return i; } private static byte[] encode8859_1(byte coder, byte[] val) { ! return encode8859_1(coder, val, true); } ! private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) { if (coder == LATIN1) { return Arrays.copyOf(val, val.length); } int len = val.length >> 1; byte[] dst = new byte[len]; --- 588,601 ---- } return i; } private static byte[] encode8859_1(byte coder, byte[] val) { ! return encode8859_1(coder, val, true, false); } ! private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace, boolean throwCCE) { if (coder == LATIN1) { return Arrays.copyOf(val, val.length); } int len = val.length >> 1; byte[] dst = new byte[len];
*** 605,615 **** int ret = implEncodeISOArray(val, sp, dst, dp, len); sp = sp + ret; dp = dp + ret; if (ret != len) { if (!doReplace) { ! throwMalformed(sp, 1); } char c = StringUTF16.getChar(val, sp++); if (Character.isHighSurrogate(c) && sp < sl && Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { sp++; --- 606,616 ---- int ret = implEncodeISOArray(val, sp, dst, dp, len); sp = sp + ret; dp = dp + ret; if (ret != len) { if (!doReplace) { ! throwUnmappable(sp, 1, throwCCE); } char c = StringUTF16.getChar(val, sp++); if (Character.isHighSurrogate(c) && sp < sl && Character.isLowSurrogate(StringUTF16.getChar(val, sp))) { sp++;
*** 676,707 **** } assert false; return -1; } ! private static void throwMalformed(int off, int nb) { throw new IllegalArgumentException("malformed input off : " + off + ", length : " + nb); } ! private static void throwMalformed(byte[] val) { int dp = 0; while (dp < val.length && val[dp] >=0) { dp++; } ! throwMalformed(dp, 1); } private static char repl = '\ufffd'; private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) { // ascii-bais, which has a relative impact to the non-ascii-only bytes if (COMPACT_STRINGS && !hasNegatives(src, sp, len)) return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len), LATIN1); ! return decodeUTF8_0(src, sp, len, doReplace); } ! private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) { Result ret = resultCached.get(); int sl = sp + len; int dp = 0; byte[] dst = new byte[len]; --- 677,728 ---- } assert false; return -1; } ! private static void throwMalformed(int off, int nb, boolean throwCCE) { ! // keep the existing code path (throwing IAE with a msg) by default ! if (!throwCCE) { throw new IllegalArgumentException("malformed input off : " + off + ", length : " + nb); + } else { + throw new IllegalArgumentException(new MalformedInputException(nb)); + } } ! private static void throwMalformed(byte[] val, boolean throwCCE) { int dp = 0; while (dp < val.length && val[dp] >=0) { dp++; } ! throwMalformed(dp, 1, throwCCE); ! } ! ! private static void throwUnmappable(int off, int nb, boolean throwCCE) { ! // keep the existing code path (throwing IAE with a msg) by default ! if (!throwCCE) { ! throw new IllegalArgumentException("malformed input off : " + off + ! ", length : " + nb); ! } else { ! throwUnmappable(nb); ! } ! } ! ! // Wrap CCE in IAE; May add offset if UnmappableCharacterException can take it in the future ! private static void throwUnmappable(int nb) { ! throw new IllegalArgumentException(new UnmappableCharacterException(nb)); } private static char repl = '\ufffd'; private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) { // ascii-bais, which has a relative impact to the non-ascii-only bytes if (COMPACT_STRINGS && !hasNegatives(src, sp, len)) return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len), LATIN1); ! return decodeUTF8_0(src, sp, len, doReplace, false); } ! private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace, boolean throwCCE) { Result ret = resultCached.get(); int sl = sp + len; int dp = 0; byte[] dst = new byte[len];
*** 750,760 **** } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { if (sp < sl) { int b2 = src[sp++]; if (isNotContinuation(b2)) { if (!doReplace) { ! throwMalformed(sp - 1, 1); } putChar(dst, dp++, repl); sp--; } else { putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^ --- 771,781 ---- } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { if (sp < sl) { int b2 = src[sp++]; if (isNotContinuation(b2)) { if (!doReplace) { ! throwMalformed(sp - 1, 1, throwCCE); } putChar(dst, dp++, repl); sp--; } else { putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
*** 762,782 **** ((byte) 0x80 << 0)))); } continue; } if (!doReplace) { ! throwMalformed(sp, 1); // underflow() } putChar(dst, dp++, repl); break; } else if ((b1 >> 4) == -2) { if (sp + 1 < sl) { int b2 = src[sp++]; int b3 = src[sp++]; if (isMalformed3(b1, b2, b3)) { if (!doReplace) { ! throwMalformed(sp - 3, 3); } putChar(dst, dp++, repl); sp -= 3; sp += malformedN(src, sp, 3); } else { --- 783,803 ---- ((byte) 0x80 << 0)))); } continue; } if (!doReplace) { ! throwMalformed(sp, 1, throwCCE); // underflow() } putChar(dst, dp++, repl); break; } else if ((b1 >> 4) == -2) { if (sp + 1 < sl) { int b2 = src[sp++]; int b3 = src[sp++]; if (isMalformed3(b1, b2, b3)) { if (!doReplace) { ! throwMalformed(sp - 3, 3, throwCCE); } putChar(dst, dp++, repl); sp -= 3; sp += malformedN(src, sp, 3); } else {
*** 786,813 **** (((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80 << 0)))); if (isSurrogate(c)) { if (!doReplace) { ! throwMalformed(sp - 3, 3); } putChar(dst, dp++, repl); } else { putChar(dst, dp++, c); } } continue; } if (sp < sl && isMalformed3_2(b1, src[sp])) { if (!doReplace) { ! throwMalformed(sp - 1, 2); } putChar(dst, dp++, repl); continue; } if (!doReplace){ ! throwMalformed(sp, 1); } putChar(dst, dp++, repl); break; } else if ((b1 >> 3) == -2) { if (sp + 2 < sl) { --- 807,834 ---- (((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80 << 0)))); if (isSurrogate(c)) { if (!doReplace) { ! throwMalformed(sp - 3, 3, throwCCE); } putChar(dst, dp++, repl); } else { putChar(dst, dp++, c); } } continue; } if (sp < sl && isMalformed3_2(b1, src[sp])) { if (!doReplace) { ! throwMalformed(sp - 1, 2, throwCCE); } putChar(dst, dp++, repl); continue; } if (!doReplace){ ! throwMalformed(sp, 1, throwCCE); } putChar(dst, dp++, repl); break; } else if ((b1 >> 3) == -2) { if (sp + 2 < sl) {
*** 823,833 **** ((byte) 0x80 << 6) ^ ((byte) 0x80 << 0)))); if (isMalformed4(b2, b3, b4) || !isSupplementaryCodePoint(uc)) { // shortest form check if (!doReplace) { ! throwMalformed(sp - 4, 4); } putChar(dst, dp++, repl); sp -= 4; sp += malformedN(src, sp, 4); } else { --- 844,854 ---- ((byte) 0x80 << 6) ^ ((byte) 0x80 << 0)))); if (isMalformed4(b2, b3, b4) || !isSupplementaryCodePoint(uc)) { // shortest form check if (!doReplace) { ! throwMalformed(sp - 4, 4, throwCCE); } putChar(dst, dp++, repl); sp -= 4; sp += malformedN(src, sp, 4); } else {
*** 838,877 **** } b1 &= 0xff; if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) { if (!doReplace) { ! throwMalformed(sp - 1, 1); // or 2 } putChar(dst, dp++, repl); continue; } if (!doReplace) { ! throwMalformed(sp - 1, 1); } sp++; putChar(dst, dp++, repl); if (sp < sl && isMalformed4_3(src[sp])) { continue; } break; } else { if (!doReplace) { ! throwMalformed(sp - 1, 1); } putChar(dst, dp++, repl); } } if (dp != len) { dst = Arrays.copyOf(dst, dp << 1); } return ret.with(dst, UTF16); } ! private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) { if (coder == UTF16) ! return encodeUTF8_UTF16(val, doReplace); if (!hasNegatives(val, 0, val.length)) return Arrays.copyOf(val, val.length); int dp = 0; --- 859,898 ---- } b1 &= 0xff; if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) { if (!doReplace) { ! throwMalformed(sp - 1, 1, throwCCE); // or 2 } putChar(dst, dp++, repl); continue; } if (!doReplace) { ! throwMalformed(sp - 1, 1, throwCCE); } sp++; putChar(dst, dp++, repl); if (sp < sl && isMalformed4_3(src[sp])) { continue; } break; } else { if (!doReplace) { ! throwMalformed(sp - 1, 1, throwCCE); } putChar(dst, dp++, repl); } } if (dp != len) { dst = Arrays.copyOf(dst, dp << 1); } return ret.with(dst, UTF16); } ! private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace, boolean throwCCE) { if (coder == UTF16) ! return encodeUTF8_UTF16(val, doReplace, throwCCE); if (!hasNegatives(val, 0, val.length)) return Arrays.copyOf(val, val.length); int dp = 0;
*** 888,898 **** if (dp == dst.length) return dst; return Arrays.copyOf(dst, dp); } ! private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) { int dp = 0; int sp = 0; int sl = val.length >> 1; byte[] dst = new byte[sl * 3]; char c; --- 909,919 ---- if (dp == dst.length) return dst; return Arrays.copyOf(dst, dp); } ! private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace, boolean throwCCE) { int dp = 0; int sp = 0; int sl = val.length >> 1; byte[] dst = new byte[sl * 3]; char c;
*** 917,927 **** } if (uc < 0) { if (doReplace) { dst[dp++] = '?'; } else { ! throwMalformed(sp - 1, 1); // or 2, does not matter here } } else { dst[dp++] = (byte)(0xf0 | ((uc >> 18))); dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); --- 938,948 ---- } if (uc < 0) { if (doReplace) { dst[dp++] = '?'; } else { ! throwUnmappable(sp - 1, 1, throwCCE); // or 2, does not matter here } } else { dst[dp++] = (byte)(0xf0 | ((uc >> 18))); dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
*** 947,965 **** * Throws iae, instead of replacing, if malformed or unmappable. */ static String newStringUTF8NoRepl(byte[] src, int off, int len) { if (COMPACT_STRINGS && !hasNegatives(src, off, len)) return new String(Arrays.copyOfRange(src, off, off + len), LATIN1); ! Result ret = decodeUTF8_0(src, off, len, false); return new String(ret.value, ret.coder); } /* * Throws iae, instead of replacing, if unmappable. */ static byte[] getBytesUTF8NoRepl(String s) { ! return encodeUTF8(s.coder(), s.value(), false); } ////////////////////// for j.n.f.Files ////////////////////////// private static boolean isASCII(byte[] src) { --- 968,986 ---- * Throws iae, instead of replacing, if malformed or unmappable. */ static String newStringUTF8NoRepl(byte[] src, int off, int len) { if (COMPACT_STRINGS && !hasNegatives(src, off, len)) return new String(Arrays.copyOfRange(src, off, off + len), LATIN1); ! Result ret = decodeUTF8_0(src, off, len, false, false); return new String(ret.value, ret.coder); } /* * Throws iae, instead of replacing, if unmappable. */ static byte[] getBytesUTF8NoRepl(String s) { ! return encodeUTF8(s.coder(), s.value(), false, false); } ////////////////////// for j.n.f.Files ////////////////////////// private static boolean isASCII(byte[] src) {
*** 974,994 **** static String newStringNoRepl(byte[] src, Charset cs) { if (cs == UTF_8) { if (COMPACT_STRINGS && isASCII(src)) return new String(src, LATIN1); ! Result ret = decodeUTF8_0(src, 0, src.length, false); return new String(ret.value, ret.coder); } if (cs == ISO_8859_1) { return newStringLatin1(src); } if (cs == US_ASCII) { if (isASCII(src)) { return newStringLatin1(src); } else { ! throwMalformed(src); } } CharsetDecoder cd = cs.newDecoder(); // ascii fastpath --- 995,1015 ---- static String newStringNoRepl(byte[] src, Charset cs) { if (cs == UTF_8) { if (COMPACT_STRINGS && isASCII(src)) return new String(src, LATIN1); ! Result ret = decodeUTF8_0(src, 0, src.length, false, true); return new String(ret.value, ret.coder); } if (cs == ISO_8859_1) { return newStringLatin1(src); } if (cs == US_ASCII) { if (isASCII(src)) { return newStringLatin1(src); } else { ! throwMalformed(src, true); } } CharsetDecoder cd = cs.newDecoder(); // ascii fastpath
*** 1030,1053 **** byte coder = s.coder(); if (cs == UTF_8) { if (isASCII(val)) { return val; } ! return encodeUTF8(coder, val, false); } if (cs == ISO_8859_1) { if (coder == LATIN1) { return val; } ! return encode8859_1(coder, val, false); } if (cs == US_ASCII) { if (coder == LATIN1) { if (isASCII(val)) { return val; } else { ! throwMalformed(val); } } } CharsetEncoder ce = cs.newEncoder(); // fastpath for ascii compatible --- 1051,1074 ---- byte coder = s.coder(); if (cs == UTF_8) { if (isASCII(val)) { return val; } ! return encodeUTF8(coder, val, false, true); } if (cs == ISO_8859_1) { if (coder == LATIN1) { return val; } ! return encode8859_1(coder, val, false, true); } if (cs == US_ASCII) { if (coder == LATIN1) { if (isASCII(val)) { return val; } else { ! throwUnmappable(1); } } } CharsetEncoder ce = cs.newEncoder(); // fastpath for ascii compatible
*** 1081,1090 **** cr.throwException(); cr = ce.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { ! throw new Error(x); } return safeTrim(ba, bb.position(), isTrusted); } } --- 1102,1111 ---- cr.throwException(); cr = ce.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { ! throw new IllegalArgumentException(x); } return safeTrim(ba, bb.position(), isTrusted); } }
< prev index next >