--- old/src/share/classes/sun/nio/cs/UTF_8.java 2011-09-19 13:12:52.000000000 -0700 +++ new/src/share/classes/sun/nio/cs/UTF_8.java 2011-09-19 13:12:51.000000000 -0700 @@ -100,6 +100,13 @@ (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; } + // only used when there is only one byte left in src buffer + private static boolean isMalformed3_2(int b1, int b2) { + return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || + (b2 & 0xc0) != 0x80; + } + + // [F0] [90..BF] [80..BF] [80..BF] // [F1..F3] [80..BF] [80..BF] [80..BF] // [F4] [80..8F] [80..BF] [80..BF] @@ -110,6 +117,17 @@ (b4 & 0xc0) != 0x80; } + // only used when there is less than 4 bytes left in src buffer + private static boolean isMalformed4_2(int b1, int b2) { + return (b1 == 0xf0 && b2 == 0x90) || + (b2 & 0xc0) != 0x80; + } + + private static boolean isMalformed4_3(int b3) { + return (b3 & 0xc0) != 0x80; + } + + private static CoderResult lookupN(ByteBuffer src, int n) { for (int i = 1; i < n; i++) { @@ -171,6 +189,7 @@ return cr; } + private static CoderResult malformed(ByteBuffer src, int mark, int nb) { @@ -180,6 +199,25 @@ return cr; } + private static CoderResult malformedForLength(ByteBuffer src, + int sp, + CharBuffer dst, + int dp, + int malformedNB) + { + updatePositions(src, sp, dst, dp); + return CoderResult.malformedForLength(malformedNB); + } + + private static CoderResult malformedForLength(ByteBuffer src, + int mark, + int malformedNB) + { + src.position(mark); + return CoderResult.malformedForLength(malformedNB); + } + + private static CoderResult xflow(Buffer src, int sp, int sl, Buffer dst, int dp, int nb) { updatePositions(src, sp, dst, dp); @@ -188,9 +226,10 @@ } private static CoderResult xflow(Buffer src, int mark, int nb) { - CoderResult cr = (nb == 0 || src.remaining() < (nb - 1)) - ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW; src.position(mark); + CoderResult cr = (nb == 0 || src.remaining() < nb) + ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW; + return cr; } @@ -210,7 +249,6 @@ // ASCII only loop while (dp < dlASCII && sa[sp] >= 0) da[dp++] = (char) sa[sp++]; - while (sp < sl) { int b1 = sa[sp]; if (b1 >= 0) { @@ -233,8 +271,12 @@ sp += 2; } else if ((b1 >> 4) == -2) { // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx - if (sl - sp < 3 || dp >= dl) + int srcRemaining = sl - sp; + if (srcRemaining < 3 || dp >= dl) { + if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1])) + return malformedForLength(src, sp, dst, dp, 1); return xflow(src, sp, sl, dst, dp, 3); + } int b2 = sa[sp + 1]; int b3 = sa[sp + 2]; if (isMalformed3(b1, b2, b3)) @@ -249,8 +291,14 @@ sp += 3; } else if ((b1 >> 3) == -2) { // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - if (sl - sp < 4 || dl - dp < 2) + int srcRemaining = sl - sp; + if (srcRemaining < 4 || dl - dp < 2) { + if (srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1])) + return malformedForLength(src, sp, dst, dp, 1); + if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2])) + return malformedForLength(src, sp, dst, dp, 2); return xflow(src, sp, sl, dst, dp, 4); + } int b2 = sa[sp + 1]; int b3 = sa[sp + 2]; int b4 = sa[sp + 3]; @@ -303,8 +351,12 @@ mark += 2; } else if ((b1 >> 4) == -2) { // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx - if (limit - mark < 3 || dst.remaining() < 1) + int srcRemaining = limit - mark; + if (srcRemaining < 3 || dst.remaining() < 1) { + if (srcRemaining > 1 && isMalformed3_2(b1, src.get())) + return malformedForLength(src, mark, 1); return xflow(src, mark, 3); + } int b2 = src.get(); int b3 = src.get(); if (isMalformed3(b1, b2, b3)) @@ -319,8 +371,14 @@ mark += 3; } else if ((b1 >> 3) == -2) { // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - if (limit - mark < 4 || dst.remaining() < 2) + int srcRemaining = limit - mark; + if (srcRemaining < 4 || dst.remaining() < 2) { + if (srcRemaining > 1 && isMalformed4_2(b1, src.get())) + return malformedForLength(src, mark, 1); + if (srcRemaining > 2 && isMalformed4_3(src.get())) + return malformedForLength(src, mark, 2); return xflow(src, mark, 4); + } int b2 = src.get(); int b3 = src.get(); int b4 = src.get(); @@ -425,6 +483,11 @@ } if (malformedInputAction() != CodingErrorAction.REPLACE) return -1; + if (sp < sl && isMalformed3_2(b1, sa[sp])) { + da[dp++] = replacement().charAt(0); + continue; + + } da[dp++] = replacement().charAt(0); return dp; } else if ((b1 >> 3) == -2) { @@ -458,6 +521,16 @@ } if (malformedInputAction() != CodingErrorAction.REPLACE) return -1; + + if (sp < sl && isMalformed4_2(b1, sa[sp])) { + da[dp++] = replacement().charAt(0); + continue; + } + sp++; + if (sp < sl && isMalformed4_3(sa[sp])) { + da[dp++] = replacement().charAt(0); + continue; + } da[dp++] = replacement().charAt(0); return dp; } else { --- old/test/sun/nio/cs/TestUTF8.java 2011-09-19 13:12:53.000000000 -0700 +++ new/test/sun/nio/cs/TestUTF8.java 2011-09-19 13:12:53.000000000 -0700 @@ -266,6 +266,8 @@ {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte + {1, (byte)0xE0, (byte)0x41,}, // invalid second byte & 2 bytes + // Four-byte sequences {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded @@ -276,8 +278,13 @@ {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid second byte {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte + {1, (byte)0xF0, (byte)41 }, // invalid second byte + // & only 2 bytes + {2, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte - {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid third byte + {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte + {2, (byte)0xF0, (byte)0x90, (byte)0x41 }, // invalid third byte + // & 3 bytes input {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte {2, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte @@ -310,6 +317,7 @@ {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, }; + static void checkMalformed(String csn) throws Exception { boolean failed = false; System.out.printf(" Check malformed <%s>...%n", csn);