src/share/classes/sun/nio/cs/UTF_8.java
Print this page
@@ -70,11 +70,11 @@
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
- static final void updatePositions(Buffer src, int sp,
+ private static final void updatePositions(Buffer src, int sp,
Buffer dst, int dp) {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
@@ -86,32 +86,43 @@
private static boolean isNotContinuation(int b) {
return (b & 0xc0) != 0x80;
}
- // [C2..DF] [80..BF]
- private static boolean isMalformed2(int b1, int b2) {
- return (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80;
- }
-
// [E0] [A0..BF] [80..BF]
// [E1..EF] [80..BF] [80..BF]
private static boolean isMalformed3(int b1, int b2, int b3) {
return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
(b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
}
+ // only used when there is only one byte left in src buffer
+ private static boolean isMalformed3_2(int b1, int b2) {
+ return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+ (b2 & 0xc0) != 0x80;
+ }
+
// [F0] [90..BF] [80..BF] [80..BF]
// [F1..F3] [80..BF] [80..BF] [80..BF]
// [F4] [80..8F] [80..BF] [80..BF]
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
// will be checked by Character.isSupplementaryCodePoint(uc)
private static boolean isMalformed4(int b2, int b3, int b4) {
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
(b4 & 0xc0) != 0x80;
}
+ // only used when there is less than 4 bytes left in src buffer
+ private static boolean isMalformed4_2(int b1, int b2) {
+ return (b1 == 0xf0 && b2 == 0x90) ||
+ (b2 & 0xc0) != 0x80;
+ }
+
+ private static boolean isMalformed4_3(int b3) {
+ return (b3 & 0xc0) != 0x80;
+ }
+
private static CoderResult lookupN(ByteBuffer src, int n)
{
for (int i = 1; i < n; i++) {
if (isNotContinuation(src.get()))
return CoderResult.malformedForLength(i);
@@ -120,32 +131,18 @@
}
private static CoderResult malformedN(ByteBuffer src, int nb) {
switch (nb) {
case 1:
- int b1 = src.get();
- if ((b1 >> 2) == -2) {
- // 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- if (src.remaining() < 4)
- return CoderResult.UNDERFLOW;
- return lookupN(src, 5);
- }
- if ((b1 >> 1) == -2) {
- // 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- if (src.remaining() < 5)
- return CoderResult.UNDERFLOW;
- return lookupN(src, 6);
- }
- return CoderResult.malformedForLength(1);
case 2: // always 1
return CoderResult.malformedForLength(1);
case 3:
- b1 = src.get();
+ int b1 = src.get();
int b2 = src.get(); // no need to lookup b3
return CoderResult.malformedForLength(
((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
- isNotContinuation(b2))?1:2);
+ isNotContinuation(b2)) ? 1 : 2);
case 4: // we don't care the speed here
b1 = src.get() & 0xff;
b2 = src.get() & 0xff;
if (b1 > 0xf4 ||
(b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
@@ -169,31 +166,50 @@
CoderResult cr = malformedN(src, nb);
updatePositions(src, sp, dst, dp);
return cr;
}
+
private static CoderResult malformed(ByteBuffer src,
int mark, int nb)
{
src.position(mark);
CoderResult cr = malformedN(src, nb);
src.position(mark);
return cr;
}
+ private static CoderResult malformedForLength(ByteBuffer src,
+ int sp,
+ CharBuffer dst,
+ int dp,
+ int malformedNB)
+ {
+ updatePositions(src, sp, dst, dp);
+ return CoderResult.malformedForLength(malformedNB);
+ }
+
+ private static CoderResult malformedForLength(ByteBuffer src,
+ int mark,
+ int malformedNB)
+ {
+ src.position(mark);
+ return CoderResult.malformedForLength(malformedNB);
+ }
+
+
private static CoderResult xflow(Buffer src, int sp, int sl,
Buffer dst, int dp, int nb) {
updatePositions(src, sp, dst, dp);
return (nb == 0 || sl - sp < nb)
- ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW;
+ ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
}
private static CoderResult xflow(Buffer src, int mark, int nb) {
- CoderResult cr = (nb == 0 || src.remaining() < (nb - 1))
- ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW;
src.position(mark);
- return cr;
+ return (nb == 0 || src.remaining() < nb)
+ ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
}
private CoderResult decodeArrayLoop(ByteBuffer src,
CharBuffer dst)
{
@@ -208,51 +224,70 @@
int dlASCII = dp + Math.min(sl - sp, dl - dp);
// ASCII only loop
while (dp < dlASCII && sa[sp] >= 0)
da[dp++] = (char) sa[sp++];
-
while (sp < sl) {
int b1 = sa[sp];
if (b1 >= 0) {
// 1 byte, 7 bits: 0xxxxxxx
if (dp >= dl)
return xflow(src, sp, sl, dst, dp, 1);
da[dp++] = (char) b1;
sp++;
- } else if ((b1 >> 5) == -2) {
+ } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
+ // [C2..DF] [80..BF]
if (sl - sp < 2 || dp >= dl)
return xflow(src, sp, sl, dst, dp, 2);
int b2 = sa[sp + 1];
- if (isMalformed2(b1, b2))
- return malformed(src, sp, dst, dp, 2);
+ // Now we check the first byte of 2-byte sequence as
+ // if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0)
+ // no longer need to check b1 against c1 & c0 for
+ // malformed as we did in previous version
+ // (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80;
+ // only need to check the second byte b2.
+ if (isNotContinuation(b2))
+ return malformedForLength(src, sp, dst, dp, 1);
da[dp++] = (char) (((b1 << 6) ^ b2)
^
(((byte) 0xC0 << 6) ^
((byte) 0x80 << 0)));
sp += 2;
} else if ((b1 >> 4) == -2) {
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
- if (sl - sp < 3 || dp >= dl)
+ int srcRemaining = sl - sp;
+ if (srcRemaining < 3 || dp >= dl) {
+ if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1]))
+ return malformedForLength(src, sp, dst, dp, 1);
return xflow(src, sp, sl, dst, dp, 3);
+ }
int b2 = sa[sp + 1];
int b3 = sa[sp + 2];
if (isMalformed3(b1, b2, b3))
return malformed(src, sp, dst, dp, 3);
- da[dp++] = (char)
+ char c = (char)
((b1 << 12) ^
(b2 << 6) ^
(b3 ^
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
+ if (Character.isSurrogate(c))
+ return malformedForLength(src, sp, dst, dp, 3);
+ da[dp++] = c;
sp += 3;
} else if ((b1 >> 3) == -2) {
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- if (sl - sp < 4 || dl - dp < 2)
+ int srcRemaining = sl - sp;
+ if (srcRemaining < 4 || dl - dp < 2) {
+ if (srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1]))
+ return malformedForLength(src, sp, dst, dp, 1);
+ if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2]))
+ return malformedForLength(src, sp, dst, dp, 2);
return xflow(src, sp, sl, dst, dp, 4);
+ }
int b2 = sa[sp + 1];
int b3 = sa[sp + 2];
int b4 = sa[sp + 3];
int uc = ((b1 << 18) ^
(b2 << 12) ^
@@ -287,42 +322,55 @@
// 1 byte, 7 bits: 0xxxxxxx
if (dst.remaining() < 1)
return xflow(src, mark, 1); // overflow
dst.put((char) b1);
mark++;
- } else if ((b1 >> 5) == -2) {
+ } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
if (limit - mark < 2|| dst.remaining() < 1)
return xflow(src, mark, 2);
int b2 = src.get();
- if (isMalformed2(b1, b2))
- return malformed(src, mark, 2);
+ if (isNotContinuation(b2))
+ return malformedForLength(src, mark, 1);
dst.put((char) (((b1 << 6) ^ b2)
^
(((byte) 0xC0 << 6) ^
((byte) 0x80 << 0))));
mark += 2;
} else if ((b1 >> 4) == -2) {
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
- if (limit - mark < 3 || dst.remaining() < 1)
+ int srcRemaining = limit - mark;
+ if (srcRemaining < 3 || dst.remaining() < 1) {
+ if (srcRemaining > 1 && isMalformed3_2(b1, src.get()))
+ return malformedForLength(src, mark, 1);
return xflow(src, mark, 3);
+ }
int b2 = src.get();
int b3 = src.get();
if (isMalformed3(b1, b2, b3))
return malformed(src, mark, 3);
- dst.put((char)
+ char c = (char)
((b1 << 12) ^
(b2 << 6) ^
(b3 ^
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
- ((byte) 0x80 << 0)))));
+ ((byte) 0x80 << 0))));
+ if (Character.isSurrogate(c))
+ return malformedForLength(src, mark, 3);
+ dst.put(c);
mark += 3;
} else if ((b1 >> 3) == -2) {
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- if (limit - mark < 4 || dst.remaining() < 2)
+ int srcRemaining = limit - mark;
+ if (srcRemaining < 4 || dst.remaining() < 2) {
+ if (srcRemaining > 1 && isMalformed4_2(b1, src.get()))
+ return malformedForLength(src, mark, 1);
+ if (srcRemaining > 2 && isMalformed4_3(src.get()))
+ return malformedForLength(src, mark, 2);
return xflow(src, mark, 4);
+ }
int b2 = src.get();
int b3 = src.get();
int b4 = src.get();
int uc = ((b1 << 18) ^
(b2 << 12) ^
@@ -362,11 +410,11 @@
bb = ByteBuffer.wrap(ba);
bb.position(sp);
return bb;
}
- // returns -1 if there is malformed byte(s) and the
+ // returns -1 if there is/are malformed byte(s) and the
// "action" for malformed input is not REPLACE.
public int decode(byte[] sa, int sp, int len, char[] da) {
final int sl = sp + len;
int dp = 0;
int dlASCII = Math.min(len, da.length);
@@ -379,15 +427,15 @@
while (sp < sl) {
int b1 = sa[sp++];
if (b1 >= 0) {
// 1 byte, 7 bits: 0xxxxxxx
da[dp++] = (char) b1;
- } else if ((b1 >> 5) == -2) {
+ } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
if (sp < sl) {
int b2 = sa[sp++];
- if (isMalformed2(b1, b2)) {
+ if (isNotContinuation(b2)) {
if (malformedInputAction() != CodingErrorAction.REPLACE)
return -1;
da[dp++] = replacement().charAt(0);
sp--; // malformedN(bb, 2) always returns 1
} else {
@@ -408,25 +456,37 @@
int b3 = sa[sp++];
if (isMalformed3(b1, b2, b3)) {
if (malformedInputAction() != CodingErrorAction.REPLACE)
return -1;
da[dp++] = replacement().charAt(0);
- sp -=3;
+ sp -= 3;
bb = getByteBuffer(bb, sa, sp);
sp += malformedN(bb, 3).length();
} else {
- da[dp++] = (char)((b1 << 12) ^
+ char c = (char)((b1 << 12) ^
(b2 << 6) ^
(b3 ^
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
+ if (Character.isSurrogate(c)) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ } else {
+ da[dp++] = c;
+ }
}
continue;
}
if (malformedInputAction() != CodingErrorAction.REPLACE)
return -1;
+ if (sp < sl && isMalformed3_2(b1, sa[sp])) {
+ da[dp++] = replacement().charAt(0);
+ continue;
+
+ }
da[dp++] = replacement().charAt(0);
return dp;
} else if ((b1 >> 3) == -2) {
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
if (sp + 2 < sl) {
@@ -456,32 +516,33 @@
}
continue;
}
if (malformedInputAction() != CodingErrorAction.REPLACE)
return -1;
+
+ if (sp < sl && isMalformed4_2(b1, sa[sp])) {
+ da[dp++] = replacement().charAt(0);
+ continue;
+ }
+ sp++;
+ if (sp < sl && isMalformed4_3(sa[sp])) {
+ da[dp++] = replacement().charAt(0);
+ continue;
+ }
da[dp++] = replacement().charAt(0);
return dp;
} else {
if (malformedInputAction() != CodingErrorAction.REPLACE)
return -1;
da[dp++] = replacement().charAt(0);
- sp--;
- bb = getByteBuffer(bb, sa, sp);
- CoderResult cr = malformedN(bb, 1);
- if (!cr.isError()) {
- // leading byte for 5 or 6-byte, but don't have enough
- // bytes in buffer to check. Consumed rest as malformed.
- return dp;
- }
- sp += cr.length();
}
}
return dp;
}
}
- private static class Encoder extends CharsetEncoder
+ private static final class Encoder extends CharsetEncoder
implements ArrayEncoder {
private Encoder(Charset cs) {
super(cs, 1.1f, 3.0f);
}