8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.BufferOverflowException; 31 import java.nio.BufferUnderflowException; 32 import java.nio.charset.Charset; 33 import java.nio.charset.CharsetDecoder; 34 import java.nio.charset.CharsetEncoder; 35 import java.nio.charset.CoderResult; 36 import java.nio.charset.CharacterCodingException; 37 import java.nio.charset.MalformedInputException; 38 import java.nio.charset.UnmappableCharacterException; 39 40 41 /* 42 * # Bits Bit pattern 43 * 1 7 0xxxxxxx 44 * 2 11 110xxxxx 10xxxxxx 45 * 3 16 1110xxxx 10xxxxxx 10xxxxxx 46 * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 47 * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 48 * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 49 * 50 * UCS-2 uses 1-3, UTF-16 uses 1-4, UCS-4 uses 1-6 51 */ 52 53 class UTF_8 extends Unicode 54 { 55 56 public UTF_8() { 57 super("UTF-8", StandardCharsets.aliases_UTF_8); 58 } 59 60 public String historicalName() { 61 return "UTF8"; 62 } 63 64 public CharsetDecoder newDecoder() { 65 return new Decoder(this); 66 } 67 68 public CharsetEncoder newEncoder() { 69 return new Encoder(this); 70 } 71 72 73 private static class Decoder extends CharsetDecoder { 74 private Decoder(Charset cs) { 75 super(cs, 1.0f, 1.0f); 76 } 77 78 private boolean isContinuation(int b) { 79 return ((b & 0xc0) == 0x80); 80 } 81 82 private final Surrogate.Generator sgg = new Surrogate.Generator(); 83 84 private CoderResult decodeArrayLoop(ByteBuffer src, 85 CharBuffer dst) 86 { 87 byte[] sa = src.array(); 88 int sp = src.arrayOffset() + src.position(); 89 int sl = src.arrayOffset() + src.limit(); 90 assert (sp <= sl); 91 sp = (sp <= sl ? sp : sl); 92 char[] da = dst.array(); 93 int dp = dst.arrayOffset() + dst.position(); 94 int dl = dst.arrayOffset() + dst.limit(); 95 assert (dp <= dl); 96 dp = (dp <= dl ? dp : dl); 97 98 try { 99 while (sp < sl) { 100 int b1 = sa[sp]; 101 int b2, b3; 102 switch ((b1 >> 4) & 0x0f) { 103 104 case 0: case 1: case 2: case 3: 105 case 4: case 5: case 6: case 7: 106 // 1 byte, 7 bits: 0xxxxxxx 107 if (dl - dp < 1) 108 return CoderResult.OVERFLOW; 109 da[dp++] = (char)(b1 & 0x7f); 110 sp++; 111 continue; 112 113 case 12: case 13: 114 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 115 if (sl - sp < 2) 116 return CoderResult.UNDERFLOW; 117 if (dl - dp < 1) 118 return CoderResult.OVERFLOW; 119 if (!isContinuation(b2 = sa[sp + 1])) 120 return CoderResult.malformedForLength(1); 121 da[dp++] = ((char)(((b1 & 0x1f) << 6) | 122 ((b2 & 0x3f) << 0))); 123 sp += 2; 124 continue; 125 126 case 14: 127 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 128 if (sl - sp < 3) 129 return CoderResult.UNDERFLOW; 130 if (dl - dp < 1) 131 return CoderResult.OVERFLOW; 132 if (!isContinuation(b2 = sa[sp + 1])) 133 return CoderResult.malformedForLength(1); 134 if (!isContinuation(b3 = sa[sp + 2])) 135 return CoderResult.malformedForLength(2); 136 da[dp++] = ((char)(((b1 & 0x0f) << 12) | 137 ((b2 & 0x3f) << 06) | 138 ((b3 & 0x3f) << 0))); 139 sp += 3; 140 continue; 141 142 case 15: 143 // 4, 5, or 6 bytes 144 145 int b4, b5, b6, uc, n; 146 switch (b1 & 0x0f) { 147 148 case 0: case 1: case 2: case 3: 149 case 4: case 5: case 6: case 7: 150 // 4 bytes, 21 bits 151 if (sl - sp < 4) 152 return CoderResult.UNDERFLOW; 153 if (!isContinuation(b2 = sa[sp + 1])) 154 return CoderResult.malformedForLength(1); 155 if (!isContinuation(b3 = sa[sp + 2])) 156 return CoderResult.malformedForLength(2); 157 if (!isContinuation(b4 = sa[sp + 3])) 158 return CoderResult.malformedForLength(3); 159 uc = (((b1 & 0x07) << 18) | 160 ((b2 & 0x3f) << 12) | 161 ((b3 & 0x3f) << 06) | 162 ((b4 & 0x3f) << 00)); 163 n = 4; 164 break; 165 166 case 8: case 9: case 10: case 11: 167 // 5 bytes, 26 bits 168 if (sl - sp < 5) 169 return CoderResult.UNDERFLOW; 170 if (!isContinuation(b2 = sa[sp + 1])) 171 return CoderResult.malformedForLength(1); 172 if (!isContinuation(b3 = sa[sp + 2])) 173 return CoderResult.malformedForLength(2); 174 if (!isContinuation(b4 = sa[sp + 3])) 175 return CoderResult.malformedForLength(3); 176 if (!isContinuation(b5 = sa[sp + 4])) 177 return CoderResult.malformedForLength(4); 178 uc = (((b1 & 0x03) << 24) | 179 ((b2 & 0x3f) << 18) | 180 ((b3 & 0x3f) << 12) | 181 ((b4 & 0x3f) << 06) | 182 ((b5 & 0x3f) << 00)); 183 n = 5; 184 break; 185 186 case 12: case 13: 187 // 6 bytes, 31 bits 188 if (sl - sp < 6) 189 return CoderResult.UNDERFLOW; 190 if (!isContinuation(b2 = sa[sp + 1])) 191 return CoderResult.malformedForLength(1); 192 if (!isContinuation(b3 = sa[sp + 2])) 193 return CoderResult.malformedForLength(2); 194 if (!isContinuation(b4 = sa[sp + 3])) 195 return CoderResult.malformedForLength(3); 196 if (!isContinuation(b5 = sa[sp + 4])) 197 return CoderResult.malformedForLength(4); 198 if (!isContinuation(b6 = sa[sp + 5])) 199 return CoderResult.malformedForLength(5); 200 uc = (((b1 & 0x01) << 30) | 201 ((b2 & 0x3f) << 24) | 202 ((b3 & 0x3f) << 18) | 203 ((b4 & 0x3f) << 12) | 204 ((b5 & 0x3f) << 06) | 205 ((b6 & 0x3f))); 206 n = 6; 207 break; 208 209 default: 210 return CoderResult.malformedForLength(1); 211 212 } 213 214 int gn = sgg.generate(uc, n, da, dp, dl); 215 if (gn < 0) 216 return sgg.error(); 217 dp += gn; 218 sp += n; 219 continue; 220 221 default: 222 return CoderResult.malformedForLength(1); 223 224 } 225 226 } 227 228 return CoderResult.UNDERFLOW; 229 } finally { 230 src.position(sp - src.arrayOffset()); 231 dst.position(dp - dst.arrayOffset()); 232 } 233 } 234 235 private CoderResult decodeBufferLoop(ByteBuffer src, 236 CharBuffer dst) 237 { 238 int mark = src.position(); 239 try { 240 while (src.hasRemaining()) { 241 int b1 = src.get(); 242 int b2, b3; 243 switch ((b1 >> 4) & 0x0f) { 244 245 case 0: case 1: case 2: case 3: 246 case 4: case 5: case 6: case 7: 247 // 1 byte, 7 bits: 0xxxxxxx 248 if (dst.remaining() < 1) 249 return CoderResult.OVERFLOW; 250 dst.put((char)b1); 251 mark++; 252 continue; 253 254 case 12: case 13: 255 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 256 if (src.remaining() < 1) 257 return CoderResult.UNDERFLOW; 258 if (dst.remaining() < 1) 259 return CoderResult.OVERFLOW; 260 if (!isContinuation(b2 = src.get())) 261 return CoderResult.malformedForLength(1); 262 dst.put((char)(((b1 & 0x1f) << 6) | 263 ((b2 & 0x3f) << 0))); 264 mark += 2; 265 continue; 266 267 case 14: 268 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 269 if (src.remaining() < 2) 270 return CoderResult.UNDERFLOW; 271 if (dst.remaining() < 1) 272 return CoderResult.OVERFLOW; 273 if (!isContinuation(b2 = src.get())) 274 return CoderResult.malformedForLength(1); 275 if (!isContinuation(b3 = src.get())) 276 return CoderResult.malformedForLength(2); 277 dst.put((char)(((b1 & 0x0f) << 12) | 278 ((b2 & 0x3f) << 06) | 279 ((b3 & 0x3f) << 0))); 280 mark += 3; 281 continue; 282 283 case 15: 284 // 4, 5, or 6 bytes 285 286 int b4, b5, b6, uc, n; 287 switch (b1 & 0x0f) { 288 289 case 0: case 1: case 2: case 3: 290 case 4: case 5: case 6: case 7: 291 // 4 bytes, 21 bits 292 if (src.remaining() < 3) 293 return CoderResult.UNDERFLOW; 294 if (!isContinuation(b2 = src.get())) 295 return CoderResult.malformedForLength(1); 296 if (!isContinuation(b3 = src.get())) 297 return CoderResult.malformedForLength(2); 298 if (!isContinuation(b4 = src.get())) 299 return CoderResult.malformedForLength(3); 300 uc = (((b1 & 0x07) << 18) | 301 ((b2 & 0x3f) << 12) | 302 ((b3 & 0x3f) << 06) | 303 ((b4 & 0x3f) << 00)); 304 n = 4; 305 break; 306 307 case 8: case 9: case 10: case 11: 308 // 5 bytes, 26 bits 309 if (src.remaining() < 4) 310 return CoderResult.UNDERFLOW; 311 if (!isContinuation(b2 = src.get())) 312 return CoderResult.malformedForLength(1); 313 if (!isContinuation(b3 = src.get())) 314 return CoderResult.malformedForLength(2); 315 if (!isContinuation(b4 = src.get())) 316 return CoderResult.malformedForLength(3); 317 if (!isContinuation(b5 = src.get())) 318 return CoderResult.malformedForLength(4); 319 uc = (((b1 & 0x03) << 24) | 320 ((b2 & 0x3f) << 18) | 321 ((b3 & 0x3f) << 12) | 322 ((b4 & 0x3f) << 06) | 323 ((b5 & 0x3f) << 00)); 324 n = 5; 325 break; 326 327 case 12: case 13: 328 // 6 bytes, 31 bits 329 if (src.remaining() < 5) 330 return CoderResult.UNDERFLOW; 331 if (!isContinuation(b2 = src.get())) 332 return CoderResult.malformedForLength(1); 333 if (!isContinuation(b3 = src.get())) 334 return CoderResult.malformedForLength(2); 335 if (!isContinuation(b4 = src.get())) 336 return CoderResult.malformedForLength(3); 337 if (!isContinuation(b5 = src.get())) 338 return CoderResult.malformedForLength(4); 339 if (!isContinuation(b6 = src.get())) 340 return CoderResult.malformedForLength(5); 341 uc = (((b1 & 0x01) << 30) | 342 ((b2 & 0x3f) << 24) | 343 ((b3 & 0x3f) << 18) | 344 ((b4 & 0x3f) << 12) | 345 ((b5 & 0x3f) << 06) | 346 ((b6 & 0x3f))); 347 n = 6; 348 break; 349 350 default: 351 return CoderResult.malformedForLength(1); 352 353 } 354 355 if (sgg.generate(uc, n, dst) < 0) 356 return sgg.error(); 357 mark += n; 358 continue; 359 360 default: 361 return CoderResult.malformedForLength(1); 362 363 } 364 365 } 366 return CoderResult.UNDERFLOW; 367 } finally { 368 src.position(mark); 369 } 370 } 371 372 protected CoderResult decodeLoop(ByteBuffer src, 373 CharBuffer dst) 374 { 375 if (src.hasArray() && dst.hasArray()) 376 return decodeArrayLoop(src, dst); 377 else 378 return decodeBufferLoop(src, dst); 379 } 380 381 } 382 383 384 private static class Encoder extends CharsetEncoder { 385 386 private Encoder(Charset cs) { 387 super(cs, 1.1f, 4.0f); 388 } 389 390 public boolean canEncode(char c) { 391 return !Surrogate.is(c); 392 } 393 394 private final Surrogate.Parser sgp = new Surrogate.Parser(); 395 396 private CoderResult encodeArrayLoop(CharBuffer src, 397 ByteBuffer dst) 398 { 399 char[] sa = src.array(); 400 int sp = src.arrayOffset() + src.position(); 401 int sl = src.arrayOffset() + src.limit(); 402 assert (sp <= sl); 403 sp = (sp <= sl ? sp : sl); 404 byte[] da = dst.array(); 405 int dp = dst.arrayOffset() + dst.position(); 406 int dl = dst.arrayOffset() + dst.limit(); 407 assert (dp <= dl); 408 dp = (dp <= dl ? dp : dl); 409 410 try { 411 while (sp < sl) { 412 char c = sa[sp]; 413 414 if (c < 0x80) { 415 // Have at most seven bits 416 if (dp >= dl) 417 return CoderResult.OVERFLOW; 418 da[dp++] = (byte)c; 419 sp++; 420 continue; 421 } 422 423 if (!Surrogate.is(c)) { 424 // 2 bytes, 11 bits 425 if (c < 0x800) { 426 if (dl - dp < 2) 427 return CoderResult.OVERFLOW; 428 da[dp++] = (byte)(0xc0 | ((c >> 06))); 429 da[dp++] = (byte)(0x80 | ((c >> 00) & 0x3f)); 430 sp++; 431 continue; 432 } 433 if (c <= '\uFFFF') { 434 // 3 bytes, 16 bits 435 if (dl - dp < 3) 436 return CoderResult.OVERFLOW; 437 da[dp++] = (byte)(0xe0 | ((c >> 12))); 438 da[dp++] = (byte)(0x80 | ((c >> 06) & 0x3f)); 439 da[dp++] = (byte)(0x80 | ((c >> 00) & 0x3f)); 440 sp++; 441 continue; 442 } 443 } 444 445 // Have a surrogate pair 446 int uc = sgp.parse(c, sa, sp, sl); 447 if (uc < 0) 448 return sgp.error(); 449 if (uc < 0x200000) { 450 if (dl - dp < 4) 451 return CoderResult.OVERFLOW; 452 da[dp++] = (byte)(0xf0 | ((uc >> 18))); 453 da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); 454 da[dp++] = (byte)(0x80 | ((uc >> 06) & 0x3f)); 455 da[dp++] = (byte)(0x80 | ((uc >> 00) & 0x3f)); 456 sp += sgp.increment(); 457 continue; 458 } 459 assert false; 460 461 } 462 return CoderResult.UNDERFLOW; 463 } finally { 464 src.position(sp - src.arrayOffset()); 465 dst.position(dp - dst.arrayOffset()); 466 } 467 } 468 469 private CoderResult encodeBufferLoop(CharBuffer src, 470 ByteBuffer dst) 471 { 472 int mark = src.position(); 473 try { 474 while (src.hasRemaining()) { 475 char c = src.get(); 476 477 if (c < 0x80) { 478 // Have at most seven bits 479 if (!dst.hasRemaining()) 480 return CoderResult.OVERFLOW; 481 dst.put((byte)c); 482 mark++; 483 continue; 484 } 485 486 if (!Surrogate.is(c)) { 487 if (c < 0x800) { 488 // 2 bytes, 11 bits 489 if (dst.remaining() < 2) 490 return CoderResult.OVERFLOW; 491 dst.put((byte)(0xc0 | ((c >> 06)))); 492 dst.put((byte)(0x80 | ((c >> 00) & 0x3f))); 493 mark++; 494 continue; 495 } 496 if (c <= '\uFFFF') { 497 // 3 bytes, 16 bits 498 if (dst.remaining() < 3) 499 return CoderResult.OVERFLOW; 500 dst.put((byte)(0xe0 | ((c >> 12)))); 501 dst.put((byte)(0x80 | ((c >> 06) & 0x3f))); 502 dst.put((byte)(0x80 | ((c >> 00) & 0x3f))); 503 mark++; 504 continue; 505 } 506 } 507 508 // Have a surrogate pair 509 int uc = sgp.parse(c, src); 510 if (uc < 0) 511 return sgp.error(); 512 if (uc < 0x200000) { 513 if (dst.remaining() < 4) 514 return CoderResult.OVERFLOW; 515 dst.put((byte)(0xf0 | ((uc >> 18)))); 516 dst.put((byte)(0x80 | ((uc >> 12) & 0x3f))); 517 dst.put((byte)(0x80 | ((uc >> 06) & 0x3f))); 518 dst.put((byte)(0x80 | ((uc >> 00) & 0x3f))); 519 mark += sgp.increment(); 520 continue; 521 } 522 assert false; 523 524 } 525 return CoderResult.UNDERFLOW; 526 } finally { 527 src.position(mark); 528 } 529 } 530 531 protected final CoderResult encodeLoop(CharBuffer src, 532 ByteBuffer dst) 533 { 534 if (src.hasArray() && dst.hasArray()) 535 return encodeArrayLoop(src, dst); 536 else 537 return encodeBufferLoop(src, dst); 538 } 539 540 } 541 542 } | 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.Buffer; 29 import java.nio.ByteBuffer; 30 import java.nio.CharBuffer; 31 import java.nio.charset.Charset; 32 import java.nio.charset.CharsetDecoder; 33 import java.nio.charset.CharsetEncoder; 34 import java.nio.charset.CoderResult; 35 36 /* Legal UTF-8 Byte Sequences 37 * 38 * # Code Points Bits Bit/Byte pattern 39 * 1 7 0xxxxxxx 40 * U+0000..U+007F 00..7F 41 * 42 * 2 11 110xxxxx 10xxxxxx 43 * U+0080..U+07FF C2..DF 80..BF 44 * 45 * 3 16 1110xxxx 10xxxxxx 10xxxxxx 46 * U+0800..U+0FFF E0 A0..BF 80..BF 47 * U+1000..U+FFFF E1..EF 80..BF 80..BF 48 * 49 * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 50 * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF 51 * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF 52 * U+100000..U10FFFF F4 80..8F 80..BF 80..BF 53 */ 54 55 class UTF_8 extends Unicode 56 { 57 public UTF_8() { 58 super("UTF-8", StandardCharsets.aliases_UTF_8); 59 } 60 61 public String historicalName() { 62 return "UTF8"; 63 } 64 65 public CharsetDecoder newDecoder() { 66 return new Decoder(this); 67 } 68 69 public CharsetEncoder newEncoder() { 70 return new Encoder(this); 71 } 72 73 static final void updatePositions(Buffer src, int sp, 74 Buffer dst, int dp) { 75 src.position(sp - src.arrayOffset()); 76 dst.position(dp - dst.arrayOffset()); 77 } 78 79 private static class Decoder extends CharsetDecoder { 80 private Decoder(Charset cs) { 81 super(cs, 1.0f, 1.0f); 82 } 83 84 private static boolean isNotContinuation(int b) { 85 return (b & 0xc0) != 0x80; 86 } 87 88 // [C2..DF] [80..BF] 89 private static boolean isMalformed2(int b1, int b2) { 90 return (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80; 91 } 92 93 // [E0] [A0..BF] [80..BF] 94 // [E1..EF] [80..BF] [80..BF] 95 private static boolean isMalformed3(int b1, int b2, int b3) { 96 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 97 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; 98 } 99 100 // [F0] [90..BF] [80..BF] [80..BF] 101 // [F1..F3] [80..BF] [80..BF] [80..BF] 102 // [F4] [80..8F] [80..BF] [80..BF] 103 // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] 104 // will be checked by Surrogate.neededFor(uc) 105 private static boolean isMalformed4(int b2, int b3, int b4) { 106 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || 107 (b4 & 0xc0) != 0x80; 108 } 109 110 private static CoderResult lookupN(ByteBuffer src, int n) 111 { 112 for (int i = 1; i < n; i++) { 113 if (isNotContinuation(src.get())) 114 return CoderResult.malformedForLength(i); 115 } 116 return CoderResult.malformedForLength(n); 117 } 118 119 private static CoderResult malformedN(ByteBuffer src, int nb) { 120 switch (nb) { 121 case 1: 122 int b1 = src.get(); 123 if ((b1 >> 2) == -2) { 124 // 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 125 if (src.remaining() < 4) 126 return CoderResult.UNDERFLOW; 127 return lookupN(src, 5); 128 } 129 if ((b1 >> 1) == -2) { 130 // 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 131 if (src.remaining() < 5) 132 return CoderResult.UNDERFLOW; 133 return lookupN(src, 6); 134 } 135 return CoderResult.malformedForLength(1); 136 case 2: // always 1 137 return CoderResult.malformedForLength(1); 138 case 3: 139 b1 = src.get(); 140 int b2 = src.get(); // no need to lookup b3 141 return CoderResult.malformedForLength( 142 ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 143 isNotContinuation(b2))?1:2); 144 case 4: // we don't care the speed here 145 b1 = src.get() & 0xff; 146 b2 = src.get() & 0xff; 147 if (b1 > 0xf4 || 148 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || 149 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || 150 isNotContinuation(b2)) 151 return CoderResult.malformedForLength(1); 152 if (isNotContinuation(src.get())) 153 return CoderResult.malformedForLength(2); 154 return CoderResult.malformedForLength(3); 155 default: 156 assert false; 157 return null; 158 } 159 } 160 161 private static CoderResult malformed(ByteBuffer src, int sp, 162 CharBuffer dst, int dp, 163 int nb) 164 { 165 src.position(sp - src.arrayOffset()); 166 CoderResult cr = malformedN(src, nb); 167 updatePositions(src, sp, dst, dp); 168 return cr; 169 } 170 171 private static CoderResult malformed(ByteBuffer src, 172 int mark, int nb) 173 { 174 src.position(mark); 175 CoderResult cr = malformedN(src, nb); 176 src.position(mark); 177 return cr; 178 } 179 180 private static CoderResult xflow(Buffer src, int sp, int sl, 181 Buffer dst, int dp, int nb) { 182 updatePositions(src, sp, dst, dp); 183 return (nb == 0 || sl - sp < nb) 184 ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW; 185 } 186 187 private static CoderResult xflow(Buffer src, int mark, int nb) { 188 CoderResult cr = (nb == 0 || src.remaining() < (nb - 1)) 189 ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW; 190 src.position(mark); 191 return cr; 192 } 193 194 private CoderResult decodeArrayLoop(ByteBuffer src, 195 CharBuffer dst) 196 { 197 // This method is optimized for ASCII input. 198 byte[] sa = src.array(); 199 int sp = src.arrayOffset() + src.position(); 200 int sl = src.arrayOffset() + src.limit(); 201 202 char[] da = dst.array(); 203 int dp = dst.arrayOffset() + dst.position(); 204 int dl = dst.arrayOffset() + dst.limit(); 205 int dlASCII = dp + Math.min(sl - sp, dl - dp); 206 207 // ASCII only loop 208 while (dp < dlASCII && sa[sp] >= 0) 209 da[dp++] = (char)sa[sp++]; 210 211 while (sp < sl) { 212 int b1 = sa[sp]; 213 if (b1 >= 0) { 214 // 1 byte, 7 bits: 0xxxxxxx 215 if (dp >= dl) 216 return xflow(src, sp, sl, dst, dp, 1); 217 da[dp++] = (char)b1; 218 sp++; 219 } else if ((b1 >> 5) == -2) { 220 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 221 if (sl - sp < 2 || dp >= dl) 222 return xflow(src, sp, sl, dst, dp, 2); 223 int b2 = sa[sp + 1]; 224 if (isMalformed2(b1, b2)) 225 return malformed(src, sp, dst, dp, 2); 226 da[dp++] = (char) (((b1 << 6) ^ b2) ^ 0x0f80); 227 sp += 2; 228 } else if ((b1 >> 4) == -2) { 229 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 230 if (sl - sp < 3 || dp >= dl) 231 return xflow(src, sp, sl, dst, dp, 3); 232 int b2 = sa[sp + 1]; 233 int b3 = sa[sp + 2]; 234 if (isMalformed3(b1, b2, b3)) 235 return malformed(src, sp, dst, dp, 3); 236 da[dp++] = (char) (((b1 << 12) ^ (b2 << 6) ^ b3) ^ 0x1f80); 237 sp += 3; 238 } else if ((b1 >> 3) == -2) { 239 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 240 if (sl - sp < 4 || dl - dp < 2) 241 return xflow(src, sp, sl, dst, dp, 4); 242 int b2 = sa[sp + 1]; 243 int b3 = sa[sp + 2]; 244 int b4 = sa[sp + 3]; 245 int uc = ((b1 & 0x07) << 18) | 246 ((b2 & 0x3f) << 12) | 247 ((b3 & 0x3f) << 06) | 248 (b4 & 0x3f); 249 if (isMalformed4(b2, b3, b4) || 250 !Surrogate.neededFor(uc)) { 251 return malformed(src, sp, dst, dp, 4); 252 } 253 da[dp++] = Surrogate.high(uc); 254 da[dp++] = Surrogate.low(uc); 255 sp += 4; 256 } else 257 return malformed(src, sp, dst, dp, 1); 258 } 259 return xflow(src, sp, sl, dst, dp, 0); 260 } 261 262 private CoderResult decodeBufferLoop(ByteBuffer src, 263 CharBuffer dst) 264 { 265 int mark = src.position(); 266 int limit = src.limit(); 267 while (mark < limit) { 268 int b1 = src.get(); 269 if (b1 >= 0) { 270 // 1 byte, 7 bits: 0xxxxxxx 271 if (dst.remaining() < 1) 272 return xflow(src, mark, 1); //overflow 273 dst.put((char)b1); 274 mark++; 275 } else if ((b1 >> 5) == -2) { 276 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 277 if (limit - mark < 2|| dst.remaining() < 1) 278 return xflow(src, mark, 2); 279 int b2 = src.get(); 280 if (isMalformed2(b1, b2)) 281 return malformed(src, mark, 2); 282 dst.put((char) (((b1 << 6) ^ b2) ^ 0x0f80)); 283 mark += 2; 284 } else if ((b1 >> 4) == -2) { 285 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 286 if (limit - mark < 3 || dst.remaining() < 1) 287 return xflow(src, mark, 3); 288 int b2 = src.get(); 289 int b3 = src.get(); 290 if (isMalformed3(b1, b2, b3)) 291 return malformed(src, mark, 3); 292 dst.put((char) (((b1 << 12) ^ (b2 << 6) ^ b3) ^ 0x1f80)); 293 mark += 3; 294 } else if ((b1 >> 3) == -2) { 295 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 296 if (limit - mark < 4 || dst.remaining() < 2) 297 return xflow(src, mark, 4); 298 int b2 = src.get(); 299 int b3 = src.get(); 300 int b4 = src.get(); 301 int uc = ((b1 & 0x07) << 18) | 302 ((b2 & 0x3f) << 12) | 303 ((b3 & 0x3f) << 06) | 304 (b4 & 0x3f); 305 if (isMalformed4(b2, b3, b4) || 306 !Surrogate.neededFor(uc)) { // shortest form check 307 return malformed(src, mark, 4); 308 } 309 dst.put(Surrogate.high(uc)); 310 dst.put(Surrogate.low(uc)); 311 mark += 4; 312 } else { 313 return malformed(src, mark, 1); 314 } 315 } 316 return xflow(src, mark, 0); 317 } 318 319 protected CoderResult decodeLoop(ByteBuffer src, 320 CharBuffer dst) 321 { 322 if (src.hasArray() && dst.hasArray()) 323 return decodeArrayLoop(src, dst); 324 else 325 return decodeBufferLoop(src, dst); 326 } 327 } 328 329 private static class Encoder extends CharsetEncoder { 330 331 private Encoder(Charset cs) { 332 super(cs, 1.1f, 4.0f); 333 } 334 335 public boolean canEncode(char c) { 336 return !Surrogate.is(c); 337 } 338 339 public boolean isLegalReplacement(byte[] repl) { 340 return ((repl.length == 1 && repl[0] >= 0) || 341 super.isLegalReplacement(repl)); 342 } 343 344 private static CoderResult overflow(CharBuffer src, int sp, 345 ByteBuffer dst, int dp) { 346 updatePositions(src, sp, dst, dp); 347 return CoderResult.OVERFLOW; 348 } 349 350 private static CoderResult overflow(CharBuffer src, int mark) { 351 src.position(mark); 352 return CoderResult.OVERFLOW; 353 } 354 355 private Surrogate.Parser sgp; 356 private CoderResult encodeArrayLoop(CharBuffer src, 357 ByteBuffer dst) 358 { 359 char[] sa = src.array(); 360 int sp = src.arrayOffset() + src.position(); 361 int sl = src.arrayOffset() + src.limit(); 362 363 byte[] da = dst.array(); 364 int dp = dst.arrayOffset() + dst.position(); 365 int dl = dst.arrayOffset() + dst.limit(); 366 int dlASCII = dp + Math.min(sl - sp, dl - dp); 367 368 //ASCII only loop 369 while (dp < dlASCII && sa[sp] < '\u0080') 370 da[dp++] = (byte) sa[sp++]; 371 while (sp < sl) { 372 int c = sa[sp]; 373 if (c < 0x80) { 374 // Have at most seven bits 375 if (dp >= dl) 376 return overflow(src, sp, dst, dp); 377 da[dp++] = (byte)c; 378 } else if (c < 0x800) { 379 // 2 bytes, 11 bits 380 if (dl - dp < 2) 381 return overflow(src, sp, dst, dp); 382 da[dp++] = (byte)(0xc0 | ((c >> 06))); 383 da[dp++] = (byte)(0x80 | (c & 0x3f)); 384 } else if (Surrogate.is(c)) { 385 // Have a surrogate pair 386 if (sgp == null) 387 sgp = new Surrogate.Parser(); 388 int uc = sgp.parse((char)c, sa, sp, sl); 389 if (uc < 0) { 390 updatePositions(src, sp, dst, dp); 391 return sgp.error(); 392 } 393 if (dl - dp < 4) 394 return overflow(src, sp, dst, dp); 395 da[dp++] = (byte)(0xf0 | ((uc >> 18))); 396 da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); 397 da[dp++] = (byte)(0x80 | ((uc >> 06) & 0x3f)); 398 da[dp++] = (byte)(0x80 | (uc & 0x3f)); 399 sp++; // 2 chars 400 } else { 401 // 3 bytes, 16 bits 402 if (dl - dp < 3) 403 return overflow(src, sp, dst, dp); 404 da[dp++] = (byte)(0xe0 | ((c >> 12))); 405 da[dp++] = (byte)(0x80 | ((c >> 06) & 0x3f)); 406 da[dp++] = (byte)(0x80 | (c & 0x3f)); 407 } 408 sp++; 409 } 410 updatePositions(src, sp, dst, dp); 411 return CoderResult.UNDERFLOW; 412 } 413 414 private CoderResult encodeBufferLoop(CharBuffer src, 415 ByteBuffer dst) 416 { 417 int mark = src.position(); 418 while (src.hasRemaining()) { 419 int c = src.get(); 420 if (c < 0x80) { 421 // Have at most seven bits 422 if (!dst.hasRemaining()) 423 return overflow(src, mark); 424 dst.put((byte)c); 425 } else if (c < 0x800) { 426 // 2 bytes, 11 bits 427 if (dst.remaining() < 2) 428 return overflow(src, mark); 429 dst.put((byte)(0xc0 | ((c >> 06)))); 430 dst.put((byte)(0x80 | (c & 0x3f))); 431 } else if (Surrogate.is(c)) { 432 // Have a surrogate pair 433 if (sgp == null) 434 sgp = new Surrogate.Parser(); 435 int uc = sgp.parse((char)c, src); 436 if (uc < 0) { 437 src.position(mark); 438 return sgp.error(); 439 } 440 if (dst.remaining() < 4) 441 return overflow(src, mark); 442 dst.put((byte)(0xf0 | ((uc >> 18)))); 443 dst.put((byte)(0x80 | ((uc >> 12) & 0x3f))); 444 dst.put((byte)(0x80 | ((uc >> 06) & 0x3f))); 445 dst.put((byte)(0x80 | (uc & 0x3f))); 446 mark++; //2 chars 447 } else { 448 // 3 bytes, 16 bits 449 if (dst.remaining() < 3) 450 return overflow(src, mark); 451 dst.put((byte)(0xe0 | ((c >> 12)))); 452 dst.put((byte)(0x80 | ((c >> 06) & 0x3f))); 453 dst.put((byte)(0x80 | (c & 0x3f))); 454 } 455 mark++; 456 } 457 src.position(mark); 458 return CoderResult.UNDERFLOW; 459 } 460 461 protected final CoderResult encodeLoop(CharBuffer src, 462 ByteBuffer dst) 463 { 464 if (src.hasArray() && dst.hasArray()) 465 return encodeArrayLoop(src, dst); 466 else 467 return encodeBufferLoop(src, dst); 468 } 469 } 470 } |