55 */ 56 57 class UTF_8 extends Unicode 58 { 59 public UTF_8() { 60 super("UTF-8", StandardCharsets.aliases_UTF_8); 61 } 62 63 public String historicalName() { 64 return "UTF8"; 65 } 66 67 public CharsetDecoder newDecoder() { 68 return new Decoder(this); 69 } 70 71 public CharsetEncoder newEncoder() { 72 return new Encoder(this); 73 } 74 75 static final void updatePositions(Buffer src, int sp, 76 Buffer dst, int dp) { 77 src.position(sp - src.arrayOffset()); 78 dst.position(dp - dst.arrayOffset()); 79 } 80 81 private static class Decoder extends CharsetDecoder 82 implements ArrayDecoder { 83 private Decoder(Charset cs) { 84 super(cs, 1.0f, 1.0f); 85 } 86 87 private static boolean isNotContinuation(int b) { 88 return (b & 0xc0) != 0x80; 89 } 90 91 // [C2..DF] [80..BF] 92 private static boolean isMalformed2(int b1, int b2) { 93 return (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80; 94 } 95 96 // [E0] [A0..BF] [80..BF] 97 // [E1..EF] [80..BF] [80..BF] 98 private static boolean isMalformed3(int b1, int b2, int b3) { 99 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 100 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; 101 } 102 103 // [F0] [90..BF] [80..BF] [80..BF] 104 // [F1..F3] [80..BF] [80..BF] [80..BF] 105 // [F4] [80..8F] [80..BF] [80..BF] 106 // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] 107 // will be checked by Character.isSupplementaryCodePoint(uc) 108 private static boolean isMalformed4(int b2, int b3, int b4) { 109 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || 110 (b4 & 0xc0) != 0x80; 111 } 112 113 private static CoderResult lookupN(ByteBuffer src, int n) 114 { 115 for (int i = 1; i < n; i++) { 116 if (isNotContinuation(src.get())) 117 return CoderResult.malformedForLength(i); 118 } 119 return CoderResult.malformedForLength(n); 120 } 121 122 private static CoderResult malformedN(ByteBuffer src, int nb) { 123 switch (nb) { 124 case 1: 125 int b1 = src.get(); 126 if ((b1 >> 2) == -2) { 127 // 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 128 if (src.remaining() < 4) 129 return CoderResult.UNDERFLOW; 130 return lookupN(src, 5); 131 } 132 if ((b1 >> 1) == -2) { 133 // 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 134 if (src.remaining() < 5) 135 return CoderResult.UNDERFLOW; 136 return lookupN(src, 6); 137 } 138 return CoderResult.malformedForLength(1); 139 case 2: // always 1 140 return CoderResult.malformedForLength(1); 141 case 3: 142 b1 = src.get(); 143 int b2 = src.get(); // no need to lookup b3 144 return CoderResult.malformedForLength( 145 ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 146 isNotContinuation(b2))?1:2); 147 case 4: // we don't care the speed here 148 b1 = src.get() & 0xff; 149 b2 = src.get() & 0xff; 150 if (b1 > 0xf4 || 151 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || 152 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || 153 isNotContinuation(b2)) 154 return CoderResult.malformedForLength(1); 155 if (isNotContinuation(src.get())) 156 return CoderResult.malformedForLength(2); 157 return CoderResult.malformedForLength(3); 158 default: 159 assert false; 160 return null; 161 } 162 } 163 164 private static CoderResult malformed(ByteBuffer src, int sp, 165 CharBuffer dst, int dp, 166 int nb) 167 { 168 src.position(sp - src.arrayOffset()); 169 CoderResult cr = malformedN(src, nb); 170 updatePositions(src, sp, dst, dp); 171 return cr; 172 } 173 174 private static CoderResult malformed(ByteBuffer src, 175 int mark, int nb) 176 { 177 src.position(mark); 178 CoderResult cr = malformedN(src, nb); 179 src.position(mark); 180 return cr; 181 } 182 183 private static CoderResult xflow(Buffer src, int sp, int sl, 184 Buffer dst, int dp, int nb) { 185 updatePositions(src, sp, dst, dp); 186 return (nb == 0 || sl - sp < nb) 187 ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW; 188 } 189 190 private static CoderResult xflow(Buffer src, int mark, int nb) { 191 CoderResult cr = (nb == 0 || src.remaining() < (nb - 1)) 192 ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW; 193 src.position(mark); 194 return cr; 195 } 196 197 private CoderResult decodeArrayLoop(ByteBuffer src, 198 CharBuffer dst) 199 { 200 // This method is optimized for ASCII input. 201 byte[] sa = src.array(); 202 int sp = src.arrayOffset() + src.position(); 203 int sl = src.arrayOffset() + src.limit(); 204 205 char[] da = dst.array(); 206 int dp = dst.arrayOffset() + dst.position(); 207 int dl = dst.arrayOffset() + dst.limit(); 208 int dlASCII = dp + Math.min(sl - sp, dl - dp); 209 210 // ASCII only loop 211 while (dp < dlASCII && sa[sp] >= 0) 212 da[dp++] = (char) sa[sp++]; 213 214 while (sp < sl) { 215 int b1 = sa[sp]; 216 if (b1 >= 0) { 217 // 1 byte, 7 bits: 0xxxxxxx 218 if (dp >= dl) 219 return xflow(src, sp, sl, dst, dp, 1); 220 da[dp++] = (char) b1; 221 sp++; 222 } else if ((b1 >> 5) == -2) { 223 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 224 if (sl - sp < 2 || dp >= dl) 225 return xflow(src, sp, sl, dst, dp, 2); 226 int b2 = sa[sp + 1]; 227 if (isMalformed2(b1, b2)) 228 return malformed(src, sp, dst, dp, 2); 229 da[dp++] = (char) (((b1 << 6) ^ b2) 230 ^ 231 (((byte) 0xC0 << 6) ^ 232 ((byte) 0x80 << 0))); 233 sp += 2; 234 } else if ((b1 >> 4) == -2) { 235 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 236 if (sl - sp < 3 || dp >= dl) 237 return xflow(src, sp, sl, dst, dp, 3); 238 int b2 = sa[sp + 1]; 239 int b3 = sa[sp + 2]; 240 if (isMalformed3(b1, b2, b3)) 241 return malformed(src, sp, dst, dp, 3); 242 da[dp++] = (char) 243 ((b1 << 12) ^ 244 (b2 << 6) ^ 245 (b3 ^ 246 (((byte) 0xE0 << 12) ^ 247 ((byte) 0x80 << 6) ^ 248 ((byte) 0x80 << 0)))); 249 sp += 3; 250 } else if ((b1 >> 3) == -2) { 251 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 252 if (sl - sp < 4 || dl - dp < 2) 253 return xflow(src, sp, sl, dst, dp, 4); 254 int b2 = sa[sp + 1]; 255 int b3 = sa[sp + 2]; 256 int b4 = sa[sp + 3]; 257 int uc = ((b1 << 18) ^ 258 (b2 << 12) ^ 259 (b3 << 6) ^ 260 (b4 ^ 261 (((byte) 0xF0 << 18) ^ 262 ((byte) 0x80 << 12) ^ 263 ((byte) 0x80 << 6) ^ 264 ((byte) 0x80 << 0)))); 265 if (isMalformed4(b2, b3, b4) || 266 // shortest form check 267 !Character.isSupplementaryCodePoint(uc)) { 268 return malformed(src, sp, dst, dp, 4); 269 } 270 da[dp++] = Character.highSurrogate(uc); 271 da[dp++] = Character.lowSurrogate(uc); 272 sp += 4; 273 } else 274 return malformed(src, sp, dst, dp, 1); 275 } 276 return xflow(src, sp, sl, dst, dp, 0); 277 } 278 279 private CoderResult decodeBufferLoop(ByteBuffer src, 280 CharBuffer dst) 281 { 282 int mark = src.position(); 283 int limit = src.limit(); 284 while (mark < limit) { 285 int b1 = src.get(); 286 if (b1 >= 0) { 287 // 1 byte, 7 bits: 0xxxxxxx 288 if (dst.remaining() < 1) 289 return xflow(src, mark, 1); // overflow 290 dst.put((char) b1); 291 mark++; 292 } else if ((b1 >> 5) == -2) { 293 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 294 if (limit - mark < 2|| dst.remaining() < 1) 295 return xflow(src, mark, 2); 296 int b2 = src.get(); 297 if (isMalformed2(b1, b2)) 298 return malformed(src, mark, 2); 299 dst.put((char) (((b1 << 6) ^ b2) 300 ^ 301 (((byte) 0xC0 << 6) ^ 302 ((byte) 0x80 << 0)))); 303 mark += 2; 304 } else if ((b1 >> 4) == -2) { 305 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 306 if (limit - mark < 3 || dst.remaining() < 1) 307 return xflow(src, mark, 3); 308 int b2 = src.get(); 309 int b3 = src.get(); 310 if (isMalformed3(b1, b2, b3)) 311 return malformed(src, mark, 3); 312 dst.put((char) 313 ((b1 << 12) ^ 314 (b2 << 6) ^ 315 (b3 ^ 316 (((byte) 0xE0 << 12) ^ 317 ((byte) 0x80 << 6) ^ 318 ((byte) 0x80 << 0))))); 319 mark += 3; 320 } else if ((b1 >> 3) == -2) { 321 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 322 if (limit - mark < 4 || dst.remaining() < 2) 323 return xflow(src, mark, 4); 324 int b2 = src.get(); 325 int b3 = src.get(); 326 int b4 = src.get(); 327 int uc = ((b1 << 18) ^ 328 (b2 << 12) ^ 329 (b3 << 6) ^ 330 (b4 ^ 331 (((byte) 0xF0 << 18) ^ 332 ((byte) 0x80 << 12) ^ 333 ((byte) 0x80 << 6) ^ 334 ((byte) 0x80 << 0)))); 335 if (isMalformed4(b2, b3, b4) || 336 // shortest form check 337 !Character.isSupplementaryCodePoint(uc)) { 338 return malformed(src, mark, 4); 339 } 340 dst.put(Character.highSurrogate(uc)); 341 dst.put(Character.lowSurrogate(uc)); 342 mark += 4; 343 } else { 347 return xflow(src, mark, 0); 348 } 349 350 protected CoderResult decodeLoop(ByteBuffer src, 351 CharBuffer dst) 352 { 353 if (src.hasArray() && dst.hasArray()) 354 return decodeArrayLoop(src, dst); 355 else 356 return decodeBufferLoop(src, dst); 357 } 358 359 private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp) 360 { 361 if (bb == null) 362 bb = ByteBuffer.wrap(ba); 363 bb.position(sp); 364 return bb; 365 } 366 367 // returns -1 if there is malformed byte(s) and the 368 // "action" for malformed input is not REPLACE. 369 public int decode(byte[] sa, int sp, int len, char[] da) { 370 final int sl = sp + len; 371 int dp = 0; 372 int dlASCII = Math.min(len, da.length); 373 ByteBuffer bb = null; // only necessary if malformed 374 375 // ASCII only optimized loop 376 while (dp < dlASCII && sa[sp] >= 0) 377 da[dp++] = (char) sa[sp++]; 378 379 while (sp < sl) { 380 int b1 = sa[sp++]; 381 if (b1 >= 0) { 382 // 1 byte, 7 bits: 0xxxxxxx 383 da[dp++] = (char) b1; 384 } else if ((b1 >> 5) == -2) { 385 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 386 if (sp < sl) { 387 int b2 = sa[sp++]; 388 if (isMalformed2(b1, b2)) { 389 if (malformedInputAction() != CodingErrorAction.REPLACE) 390 return -1; 391 da[dp++] = replacement().charAt(0); 392 sp--; // malformedN(bb, 2) always returns 1 393 } else { 394 da[dp++] = (char) (((b1 << 6) ^ b2)^ 395 (((byte) 0xC0 << 6) ^ 396 ((byte) 0x80 << 0))); 397 } 398 continue; 399 } 400 if (malformedInputAction() != CodingErrorAction.REPLACE) 401 return -1; 402 da[dp++] = replacement().charAt(0); 403 return dp; 404 } else if ((b1 >> 4) == -2) { 405 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 406 if (sp + 1 < sl) { 407 int b2 = sa[sp++]; 408 int b3 = sa[sp++]; 409 if (isMalformed3(b1, b2, b3)) { 410 if (malformedInputAction() != CodingErrorAction.REPLACE) 411 return -1; 412 da[dp++] = replacement().charAt(0); 413 sp -=3; 414 bb = getByteBuffer(bb, sa, sp); 415 sp += malformedN(bb, 3).length(); 416 } else { 417 da[dp++] = (char)((b1 << 12) ^ 418 (b2 << 6) ^ 419 (b3 ^ 420 (((byte) 0xE0 << 12) ^ 421 ((byte) 0x80 << 6) ^ 422 ((byte) 0x80 << 0)))); 423 } 424 continue; 425 } 426 if (malformedInputAction() != CodingErrorAction.REPLACE) 427 return -1; 428 da[dp++] = replacement().charAt(0); 429 return dp; 430 } else if ((b1 >> 3) == -2) { 431 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 432 if (sp + 2 < sl) { 433 int b2 = sa[sp++]; 434 int b3 = sa[sp++]; 435 int b4 = sa[sp++]; 436 int uc = ((b1 << 18) ^ 437 (b2 << 12) ^ 438 (b3 << 6) ^ 439 (b4 ^ 440 (((byte) 0xF0 << 18) ^ 441 ((byte) 0x80 << 12) ^ 442 ((byte) 0x80 << 6) ^ 443 ((byte) 0x80 << 0)))); 444 if (isMalformed4(b2, b3, b4) || 445 // shortest form check 446 !Character.isSupplementaryCodePoint(uc)) { 447 if (malformedInputAction() != CodingErrorAction.REPLACE) 448 return -1; 449 da[dp++] = replacement().charAt(0); 450 sp -= 4; 451 bb = getByteBuffer(bb, sa, sp); 452 sp += malformedN(bb, 4).length(); 453 } else { 454 da[dp++] = Character.highSurrogate(uc); 455 da[dp++] = Character.lowSurrogate(uc); 456 } 457 continue; 458 } 459 if (malformedInputAction() != CodingErrorAction.REPLACE) 460 return -1; 461 da[dp++] = replacement().charAt(0); 462 return dp; 463 } else { 464 if (malformedInputAction() != CodingErrorAction.REPLACE) 465 return -1; 466 da[dp++] = replacement().charAt(0); 467 sp--; 468 bb = getByteBuffer(bb, sa, sp); 469 CoderResult cr = malformedN(bb, 1); 470 if (!cr.isError()) { 471 // leading byte for 5 or 6-byte, but don't have enough 472 // bytes in buffer to check. Consumed rest as malformed. 473 return dp; 474 } 475 sp += cr.length(); 476 } 477 } 478 return dp; 479 } 480 } 481 482 private static class Encoder extends CharsetEncoder 483 implements ArrayEncoder { 484 485 private Encoder(Charset cs) { 486 super(cs, 1.1f, 3.0f); 487 } 488 489 public boolean canEncode(char c) { 490 return !Character.isSurrogate(c); 491 } 492 493 public boolean isLegalReplacement(byte[] repl) { 494 return ((repl.length == 1 && repl[0] >= 0) || 495 super.isLegalReplacement(repl)); 496 } 497 498 private static CoderResult overflow(CharBuffer src, int sp, 499 ByteBuffer dst, int dp) { 500 updatePositions(src, sp, dst, dp); 501 return CoderResult.OVERFLOW; 502 } | 55 */ 56 57 class UTF_8 extends Unicode 58 { 59 public UTF_8() { 60 super("UTF-8", StandardCharsets.aliases_UTF_8); 61 } 62 63 public String historicalName() { 64 return "UTF8"; 65 } 66 67 public CharsetDecoder newDecoder() { 68 return new Decoder(this); 69 } 70 71 public CharsetEncoder newEncoder() { 72 return new Encoder(this); 73 } 74 75 private static final void updatePositions(Buffer src, int sp, 76 Buffer dst, int dp) { 77 src.position(sp - src.arrayOffset()); 78 dst.position(dp - dst.arrayOffset()); 79 } 80 81 private static class Decoder extends CharsetDecoder 82 implements ArrayDecoder { 83 private Decoder(Charset cs) { 84 super(cs, 1.0f, 1.0f); 85 } 86 87 private static boolean isNotContinuation(int b) { 88 return (b & 0xc0) != 0x80; 89 } 90 91 // [E0] [A0..BF] [80..BF] 92 // [E1..EF] [80..BF] [80..BF] 93 private static boolean isMalformed3(int b1, int b2, int b3) { 94 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 95 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; 96 } 97 98 // only used when there is only one byte left in src buffer 99 private static boolean isMalformed3_2(int b1, int b2) { 100 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 101 (b2 & 0xc0) != 0x80; 102 } 103 104 // [F0] [90..BF] [80..BF] [80..BF] 105 // [F1..F3] [80..BF] [80..BF] [80..BF] 106 // [F4] [80..8F] [80..BF] [80..BF] 107 // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] 108 // will be checked by Character.isSupplementaryCodePoint(uc) 109 private static boolean isMalformed4(int b2, int b3, int b4) { 110 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || 111 (b4 & 0xc0) != 0x80; 112 } 113 114 // only used when there is less than 4 bytes left in src buffer 115 private static boolean isMalformed4_2(int b1, int b2) { 116 return (b1 == 0xf0 && b2 == 0x90) || 117 (b2 & 0xc0) != 0x80; 118 } 119 120 private static boolean isMalformed4_3(int b3) { 121 return (b3 & 0xc0) != 0x80; 122 } 123 124 private static CoderResult lookupN(ByteBuffer src, int n) 125 { 126 for (int i = 1; i < n; i++) { 127 if (isNotContinuation(src.get())) 128 return CoderResult.malformedForLength(i); 129 } 130 return CoderResult.malformedForLength(n); 131 } 132 133 private static CoderResult malformedN(ByteBuffer src, int nb) { 134 switch (nb) { 135 case 1: 136 case 2: // always 1 137 return CoderResult.malformedForLength(1); 138 case 3: 139 int b1 = src.get(); 140 int b2 = src.get(); // no need to lookup b3 141 return CoderResult.malformedForLength( 142 ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 143 isNotContinuation(b2)) ? 1 : 2); 144 case 4: // we don't care the speed here 145 b1 = src.get() & 0xff; 146 b2 = src.get() & 0xff; 147 if (b1 > 0xf4 || 148 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || 149 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || 150 isNotContinuation(b2)) 151 return CoderResult.malformedForLength(1); 152 if (isNotContinuation(src.get())) 153 return CoderResult.malformedForLength(2); 154 return CoderResult.malformedForLength(3); 155 default: 156 assert false; 157 return null; 158 } 159 } 160 161 private static CoderResult malformed(ByteBuffer src, int sp, 162 CharBuffer dst, int dp, 163 int nb) 164 { 165 src.position(sp - src.arrayOffset()); 166 CoderResult cr = malformedN(src, nb); 167 updatePositions(src, sp, dst, dp); 168 return cr; 169 } 170 171 172 private static CoderResult malformed(ByteBuffer src, 173 int mark, int nb) 174 { 175 src.position(mark); 176 CoderResult cr = malformedN(src, nb); 177 src.position(mark); 178 return cr; 179 } 180 181 private static CoderResult malformedForLength(ByteBuffer src, 182 int sp, 183 CharBuffer dst, 184 int dp, 185 int malformedNB) 186 { 187 updatePositions(src, sp, dst, dp); 188 return CoderResult.malformedForLength(malformedNB); 189 } 190 191 private static CoderResult malformedForLength(ByteBuffer src, 192 int mark, 193 int malformedNB) 194 { 195 src.position(mark); 196 return CoderResult.malformedForLength(malformedNB); 197 } 198 199 200 private static CoderResult xflow(Buffer src, int sp, int sl, 201 Buffer dst, int dp, int nb) { 202 updatePositions(src, sp, dst, dp); 203 return (nb == 0 || sl - sp < nb) 204 ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; 205 } 206 207 private static CoderResult xflow(Buffer src, int mark, int nb) { 208 src.position(mark); 209 return (nb == 0 || src.remaining() < nb) 210 ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; 211 } 212 213 private CoderResult decodeArrayLoop(ByteBuffer src, 214 CharBuffer dst) 215 { 216 // This method is optimized for ASCII input. 217 byte[] sa = src.array(); 218 int sp = src.arrayOffset() + src.position(); 219 int sl = src.arrayOffset() + src.limit(); 220 221 char[] da = dst.array(); 222 int dp = dst.arrayOffset() + dst.position(); 223 int dl = dst.arrayOffset() + dst.limit(); 224 int dlASCII = dp + Math.min(sl - sp, dl - dp); 225 226 // ASCII only loop 227 while (dp < dlASCII && sa[sp] >= 0) 228 da[dp++] = (char) sa[sp++]; 229 while (sp < sl) { 230 int b1 = sa[sp]; 231 if (b1 >= 0) { 232 // 1 byte, 7 bits: 0xxxxxxx 233 if (dp >= dl) 234 return xflow(src, sp, sl, dst, dp, 1); 235 da[dp++] = (char) b1; 236 sp++; 237 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 238 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 239 // [C2..DF] [80..BF] 240 if (sl - sp < 2 || dp >= dl) 241 return xflow(src, sp, sl, dst, dp, 2); 242 int b2 = sa[sp + 1]; 243 // Now we check the first byte of 2-byte sequence as 244 // if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) 245 // no longer need to check b1 against c1 & c0 for 246 // malformed as we did in previous version 247 // (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80; 248 // only need to check the second byte b2. 249 if (isNotContinuation(b2)) 250 return malformedForLength(src, sp, dst, dp, 1); 251 da[dp++] = (char) (((b1 << 6) ^ b2) 252 ^ 253 (((byte) 0xC0 << 6) ^ 254 ((byte) 0x80 << 0))); 255 sp += 2; 256 } else if ((b1 >> 4) == -2) { 257 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 258 int srcRemaining = sl - sp; 259 if (srcRemaining < 3 || dp >= dl) { 260 if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1])) 261 return malformedForLength(src, sp, dst, dp, 1); 262 return xflow(src, sp, sl, dst, dp, 3); 263 } 264 int b2 = sa[sp + 1]; 265 int b3 = sa[sp + 2]; 266 if (isMalformed3(b1, b2, b3)) 267 return malformed(src, sp, dst, dp, 3); 268 char c = (char) 269 ((b1 << 12) ^ 270 (b2 << 6) ^ 271 (b3 ^ 272 (((byte) 0xE0 << 12) ^ 273 ((byte) 0x80 << 6) ^ 274 ((byte) 0x80 << 0)))); 275 if (Character.isSurrogate(c)) 276 return malformedForLength(src, sp, dst, dp, 3); 277 da[dp++] = c; 278 sp += 3; 279 } else if ((b1 >> 3) == -2) { 280 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 281 int srcRemaining = sl - sp; 282 if (srcRemaining < 4 || dl - dp < 2) { 283 if (srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1])) 284 return malformedForLength(src, sp, dst, dp, 1); 285 if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2])) 286 return malformedForLength(src, sp, dst, dp, 2); 287 return xflow(src, sp, sl, dst, dp, 4); 288 } 289 int b2 = sa[sp + 1]; 290 int b3 = sa[sp + 2]; 291 int b4 = sa[sp + 3]; 292 int uc = ((b1 << 18) ^ 293 (b2 << 12) ^ 294 (b3 << 6) ^ 295 (b4 ^ 296 (((byte) 0xF0 << 18) ^ 297 ((byte) 0x80 << 12) ^ 298 ((byte) 0x80 << 6) ^ 299 ((byte) 0x80 << 0)))); 300 if (isMalformed4(b2, b3, b4) || 301 // shortest form check 302 !Character.isSupplementaryCodePoint(uc)) { 303 return malformed(src, sp, dst, dp, 4); 304 } 305 da[dp++] = Character.highSurrogate(uc); 306 da[dp++] = Character.lowSurrogate(uc); 307 sp += 4; 308 } else 309 return malformed(src, sp, dst, dp, 1); 310 } 311 return xflow(src, sp, sl, dst, dp, 0); 312 } 313 314 private CoderResult decodeBufferLoop(ByteBuffer src, 315 CharBuffer dst) 316 { 317 int mark = src.position(); 318 int limit = src.limit(); 319 while (mark < limit) { 320 int b1 = src.get(); 321 if (b1 >= 0) { 322 // 1 byte, 7 bits: 0xxxxxxx 323 if (dst.remaining() < 1) 324 return xflow(src, mark, 1); // overflow 325 dst.put((char) b1); 326 mark++; 327 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 328 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 329 if (limit - mark < 2|| dst.remaining() < 1) 330 return xflow(src, mark, 2); 331 int b2 = src.get(); 332 if (isNotContinuation(b2)) 333 return malformedForLength(src, mark, 1); 334 dst.put((char) (((b1 << 6) ^ b2) 335 ^ 336 (((byte) 0xC0 << 6) ^ 337 ((byte) 0x80 << 0)))); 338 mark += 2; 339 } else if ((b1 >> 4) == -2) { 340 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 341 int srcRemaining = limit - mark; 342 if (srcRemaining < 3 || dst.remaining() < 1) { 343 if (srcRemaining > 1 && isMalformed3_2(b1, src.get())) 344 return malformedForLength(src, mark, 1); 345 return xflow(src, mark, 3); 346 } 347 int b2 = src.get(); 348 int b3 = src.get(); 349 if (isMalformed3(b1, b2, b3)) 350 return malformed(src, mark, 3); 351 char c = (char) 352 ((b1 << 12) ^ 353 (b2 << 6) ^ 354 (b3 ^ 355 (((byte) 0xE0 << 12) ^ 356 ((byte) 0x80 << 6) ^ 357 ((byte) 0x80 << 0)))); 358 if (Character.isSurrogate(c)) 359 return malformedForLength(src, mark, 3); 360 dst.put(c); 361 mark += 3; 362 } else if ((b1 >> 3) == -2) { 363 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 364 int srcRemaining = limit - mark; 365 if (srcRemaining < 4 || dst.remaining() < 2) { 366 if (srcRemaining > 1 && isMalformed4_2(b1, src.get())) 367 return malformedForLength(src, mark, 1); 368 if (srcRemaining > 2 && isMalformed4_3(src.get())) 369 return malformedForLength(src, mark, 2); 370 return xflow(src, mark, 4); 371 } 372 int b2 = src.get(); 373 int b3 = src.get(); 374 int b4 = src.get(); 375 int uc = ((b1 << 18) ^ 376 (b2 << 12) ^ 377 (b3 << 6) ^ 378 (b4 ^ 379 (((byte) 0xF0 << 18) ^ 380 ((byte) 0x80 << 12) ^ 381 ((byte) 0x80 << 6) ^ 382 ((byte) 0x80 << 0)))); 383 if (isMalformed4(b2, b3, b4) || 384 // shortest form check 385 !Character.isSupplementaryCodePoint(uc)) { 386 return malformed(src, mark, 4); 387 } 388 dst.put(Character.highSurrogate(uc)); 389 dst.put(Character.lowSurrogate(uc)); 390 mark += 4; 391 } else { 395 return xflow(src, mark, 0); 396 } 397 398 protected CoderResult decodeLoop(ByteBuffer src, 399 CharBuffer dst) 400 { 401 if (src.hasArray() && dst.hasArray()) 402 return decodeArrayLoop(src, dst); 403 else 404 return decodeBufferLoop(src, dst); 405 } 406 407 private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp) 408 { 409 if (bb == null) 410 bb = ByteBuffer.wrap(ba); 411 bb.position(sp); 412 return bb; 413 } 414 415 // returns -1 if there is/are malformed byte(s) and the 416 // "action" for malformed input is not REPLACE. 417 public int decode(byte[] sa, int sp, int len, char[] da) { 418 final int sl = sp + len; 419 int dp = 0; 420 int dlASCII = Math.min(len, da.length); 421 ByteBuffer bb = null; // only necessary if malformed 422 423 // ASCII only optimized loop 424 while (dp < dlASCII && sa[sp] >= 0) 425 da[dp++] = (char) sa[sp++]; 426 427 while (sp < sl) { 428 int b1 = sa[sp++]; 429 if (b1 >= 0) { 430 // 1 byte, 7 bits: 0xxxxxxx 431 da[dp++] = (char) b1; 432 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 433 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 434 if (sp < sl) { 435 int b2 = sa[sp++]; 436 if (isNotContinuation(b2)) { 437 if (malformedInputAction() != CodingErrorAction.REPLACE) 438 return -1; 439 da[dp++] = replacement().charAt(0); 440 sp--; // malformedN(bb, 2) always returns 1 441 } else { 442 da[dp++] = (char) (((b1 << 6) ^ b2)^ 443 (((byte) 0xC0 << 6) ^ 444 ((byte) 0x80 << 0))); 445 } 446 continue; 447 } 448 if (malformedInputAction() != CodingErrorAction.REPLACE) 449 return -1; 450 da[dp++] = replacement().charAt(0); 451 return dp; 452 } else if ((b1 >> 4) == -2) { 453 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 454 if (sp + 1 < sl) { 455 int b2 = sa[sp++]; 456 int b3 = sa[sp++]; 457 if (isMalformed3(b1, b2, b3)) { 458 if (malformedInputAction() != CodingErrorAction.REPLACE) 459 return -1; 460 da[dp++] = replacement().charAt(0); 461 sp -= 3; 462 bb = getByteBuffer(bb, sa, sp); 463 sp += malformedN(bb, 3).length(); 464 } else { 465 char c = (char)((b1 << 12) ^ 466 (b2 << 6) ^ 467 (b3 ^ 468 (((byte) 0xE0 << 12) ^ 469 ((byte) 0x80 << 6) ^ 470 ((byte) 0x80 << 0)))); 471 if (Character.isSurrogate(c)) { 472 if (malformedInputAction() != CodingErrorAction.REPLACE) 473 return -1; 474 da[dp++] = replacement().charAt(0); 475 } else { 476 da[dp++] = c; 477 } 478 } 479 continue; 480 } 481 if (malformedInputAction() != CodingErrorAction.REPLACE) 482 return -1; 483 if (sp < sl && isMalformed3_2(b1, sa[sp])) { 484 da[dp++] = replacement().charAt(0); 485 continue; 486 487 } 488 da[dp++] = replacement().charAt(0); 489 return dp; 490 } else if ((b1 >> 3) == -2) { 491 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 492 if (sp + 2 < sl) { 493 int b2 = sa[sp++]; 494 int b3 = sa[sp++]; 495 int b4 = sa[sp++]; 496 int uc = ((b1 << 18) ^ 497 (b2 << 12) ^ 498 (b3 << 6) ^ 499 (b4 ^ 500 (((byte) 0xF0 << 18) ^ 501 ((byte) 0x80 << 12) ^ 502 ((byte) 0x80 << 6) ^ 503 ((byte) 0x80 << 0)))); 504 if (isMalformed4(b2, b3, b4) || 505 // shortest form check 506 !Character.isSupplementaryCodePoint(uc)) { 507 if (malformedInputAction() != CodingErrorAction.REPLACE) 508 return -1; 509 da[dp++] = replacement().charAt(0); 510 sp -= 4; 511 bb = getByteBuffer(bb, sa, sp); 512 sp += malformedN(bb, 4).length(); 513 } else { 514 da[dp++] = Character.highSurrogate(uc); 515 da[dp++] = Character.lowSurrogate(uc); 516 } 517 continue; 518 } 519 if (malformedInputAction() != CodingErrorAction.REPLACE) 520 return -1; 521 522 if (sp < sl && isMalformed4_2(b1, sa[sp])) { 523 da[dp++] = replacement().charAt(0); 524 continue; 525 } 526 sp++; 527 if (sp < sl && isMalformed4_3(sa[sp])) { 528 da[dp++] = replacement().charAt(0); 529 continue; 530 } 531 da[dp++] = replacement().charAt(0); 532 return dp; 533 } else { 534 if (malformedInputAction() != CodingErrorAction.REPLACE) 535 return -1; 536 da[dp++] = replacement().charAt(0); 537 } 538 } 539 return dp; 540 } 541 } 542 543 private static final class Encoder extends CharsetEncoder 544 implements ArrayEncoder { 545 546 private Encoder(Charset cs) { 547 super(cs, 1.1f, 3.0f); 548 } 549 550 public boolean canEncode(char c) { 551 return !Character.isSurrogate(c); 552 } 553 554 public boolean isLegalReplacement(byte[] repl) { 555 return ((repl.length == 1 && repl[0] >= 0) || 556 super.isLegalReplacement(repl)); 557 } 558 559 private static CoderResult overflow(CharBuffer src, int sp, 560 ByteBuffer dst, int dp) { 561 updatePositions(src, sp, dst, dp); 562 return CoderResult.OVERFLOW; 563 } |