src/share/classes/sun/nio/cs/UTF_8.java
Print this page
*** 30,39 ****
--- 30,40 ----
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
+ import java.nio.charset.CodingErrorAction;
/* Legal UTF-8 Byte Sequences
*
* # Code Points Bits Bit/Byte pattern
* 1 7 0xxxxxxx
*** 75,85 ****
Buffer dst, int dp) {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
! private static class Decoder extends CharsetDecoder {
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
private static boolean isNotContinuation(int b) {
--- 76,87 ----
Buffer dst, int dp) {
src.position(sp - src.arrayOffset());
dst.position(dp - dst.arrayOffset());
}
! private static class Decoder extends CharsetDecoder
! implements ArrayDecoder {
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
private static boolean isNotContinuation(int b) {
*** 351,363 ****
if (src.hasArray() && dst.hasArray())
return decodeArrayLoop(src, dst);
else
return decodeBufferLoop(src, dst);
}
}
! private static class Encoder extends CharsetEncoder {
private Encoder(Charset cs) {
super(cs, 1.1f, 3.0f);
}
--- 353,488 ----
if (src.hasArray() && dst.hasArray())
return decodeArrayLoop(src, dst);
else
return decodeBufferLoop(src, dst);
}
+
+ private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
+ {
+ if (bb == null)
+ bb = ByteBuffer.wrap(ba);
+ bb.position(sp);
+ return bb;
+ }
+
+ // returns -1 if there is malformed byte(s) and the
+ // "action" for malformed input is not REPLACE.
+ public int decode(byte[] sa, int sp, int len, char[] da) {
+ final int sl = sp + len;
+ int dp = 0;
+ int dlASCII = Math.min(len, da.length);
+ ByteBuffer bb = null; // only necessary if malformed
+
+ // ASCII only optimized loop
+ while (dp < dlASCII && sa[sp] >= 0)
+ da[dp++] = (char) sa[sp++];
+
+ while (sp < sl) {
+ int b1 = sa[sp++];
+ if (b1 >= 0) {
+ // 1 byte, 7 bits: 0xxxxxxx
+ da[dp++] = (char) b1;
+ } else if ((b1 >> 5) == -2) {
+ // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
+ if (sp < sl) {
+ int b2 = sa[sp++];
+ if (isMalformed2(b1, b2)) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ sp--; // malformedN(bb, 2) always returns 1
+ } else {
+ da[dp++] = (char) (((b1 << 6) ^ b2)^
+ (((byte) 0xC0 << 6) ^
+ ((byte) 0x80 << 0)));
+ }
+ continue;
+ }
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ return dp;
+ } else if ((b1 >> 4) == -2) {
+ // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
+ if (sp + 1 < sl) {
+ int b2 = sa[sp++];
+ int b3 = sa[sp++];
+ if (isMalformed3(b1, b2, b3)) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ sp -=3;
+ bb = getByteBuffer(bb, sa, sp);
+ sp += malformedN(bb, 3).length();
+ } else {
+ da[dp++] = (char)((b1 << 12) ^
+ (b2 << 6) ^
+ (b3 ^
+ (((byte) 0xE0 << 12) ^
+ ((byte) 0x80 << 6) ^
+ ((byte) 0x80 << 0))));
+ }
+ continue;
+ }
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ return dp;
+ } else if ((b1 >> 3) == -2) {
+ // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ if (sp + 2 < sl) {
+ int b2 = sa[sp++];
+ int b3 = sa[sp++];
+ int b4 = sa[sp++];
+ int uc = ((b1 << 18) ^
+ (b2 << 12) ^
+ (b3 << 6) ^
+ (b4 ^
+ (((byte) 0xF0 << 18) ^
+ ((byte) 0x80 << 12) ^
+ ((byte) 0x80 << 6) ^
+ ((byte) 0x80 << 0))));
+ if (isMalformed4(b2, b3, b4) ||
+ // shortest form check
+ !Character.isSupplementaryCodePoint(uc)) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ sp -= 4;
+ bb = getByteBuffer(bb, sa, sp);
+ sp += malformedN(bb, 4).length();
+ } else {
+ da[dp++] = Character.highSurrogate(uc);
+ da[dp++] = Character.lowSurrogate(uc);
+ }
+ continue;
+ }
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ return dp;
+ } else {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement().charAt(0);
+ sp--;
+ bb = getByteBuffer(bb, sa, sp);
+ CoderResult cr = malformedN(bb, 1);
+ if (!cr.isError()) {
+ // leading byte for 5 or 6-byte, but don't have enough
+ // bytes in buffer to check. Consumed rest as malformed.
+ return dp;
+ }
+ sp += cr.length();
+ }
+ }
+ return dp;
+ }
}
! private static class Encoder extends CharsetEncoder
! implements ArrayEncoder {
private Encoder(Charset cs) {
super(cs, 1.1f, 3.0f);
}
*** 493,499 ****
--- 618,669 ----
if (src.hasArray() && dst.hasArray())
return encodeArrayLoop(src, dst);
else
return encodeBufferLoop(src, dst);
}
+
+ // returns -1 if there is malformed char(s) and the
+ // "action" for malformed input is not REPLACE.
+ public int encode(char[] sa, int sp, int len, byte[] da) {
+ int sl = sp + len;
+ int dp = 0;
+ int dlASCII = dp + Math.min(len, da.length);
+
+ // ASCII only optimized loop
+ while (dp < dlASCII && sa[sp] < '\u0080')
+ da[dp++] = (byte) sa[sp++];
+
+ while (sp < sl) {
+ char c = sa[sp++];
+ if (c < 0x80) {
+ // Have at most seven bits
+ da[dp++] = (byte)c;
+ } else if (c < 0x800) {
+ // 2 bytes, 11 bits
+ da[dp++] = (byte)(0xc0 | (c >> 6));
+ da[dp++] = (byte)(0x80 | (c & 0x3f));
+ } else if (Character.isSurrogate(c)) {
+ if (sgp == null)
+ sgp = new Surrogate.Parser();
+ int uc = sgp.parse(c, sa, sp - 1, sl);
+ if (uc < 0) {
+ if (malformedInputAction() != CodingErrorAction.REPLACE)
+ return -1;
+ da[dp++] = replacement()[0];
+ } else {
+ da[dp++] = (byte)(0xf0 | ((uc >> 18)));
+ da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
+ da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
+ da[dp++] = (byte)(0x80 | (uc & 0x3f));
+ sp++; // 2 chars
+ }
+ } else {
+ // 3 bytes, 16 bits
+ da[dp++] = (byte)(0xe0 | ((c >> 12)));
+ da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
+ da[dp++] = (byte)(0x80 | (c & 0x3f));
+ }
+ }
+ return dp;
+ }
}
}