1 /* 2 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.Buffer; 29 import java.nio.ByteBuffer; 30 import java.nio.CharBuffer; 31 import java.nio.charset.Charset; 32 import java.nio.charset.CharsetDecoder; 33 import java.nio.charset.CharsetEncoder; 34 import java.nio.charset.CoderResult; 35 import java.nio.charset.CodingErrorAction; 36 37 /* Legal UTF-8 Byte Sequences 38 * 39 * # Code Points Bits Bit/Byte pattern 40 * 1 7 0xxxxxxx 41 * U+0000..U+007F 00..7F 42 * 43 * 2 11 110xxxxx 10xxxxxx 44 * U+0080..U+07FF C2..DF 80..BF 45 * 46 * 3 16 1110xxxx 10xxxxxx 10xxxxxx 47 * U+0800..U+0FFF E0 A0..BF 80..BF 48 * U+1000..U+FFFF E1..EF 80..BF 80..BF 49 * 50 * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 51 * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF 52 * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF 53 * U+100000..U10FFFF F4 80..8F 80..BF 80..BF 54 * 55 */ 56 57 class UTF_8 extends Unicode 58 { 59 public UTF_8() { 60 super("UTF-8", StandardCharsets.aliases_UTF_8); 61 } 62 63 public String historicalName() { 64 return "UTF8"; 65 } 66 67 public CharsetDecoder newDecoder() { 68 return new Decoder(this); 69 } 70 71 public CharsetEncoder newEncoder() { 72 return new Encoder(this); 73 } 74 75 private static final void updatePositions(Buffer src, int sp, 76 Buffer dst, int dp) { 77 src.position(sp - src.arrayOffset()); 78 dst.position(dp - dst.arrayOffset()); 79 } 80 81 private static class Decoder extends CharsetDecoder 82 implements ArrayDecoder { 83 private Decoder(Charset cs) { 84 super(cs, 1.0f, 1.0f); 85 } 86 87 private static boolean isNotContinuation(int b) { 88 return (b & 0xc0) != 0x80; 89 } 90 91 // [E0] [A0..BF] [80..BF] 92 // [E1..EF] [80..BF] [80..BF] 93 private static boolean isMalformed3(int b1, int b2, int b3) { 94 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 95 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; 96 } 97 98 // only used when there is only one byte left in src buffer 99 private static boolean isMalformed3_2(int b1, int b2) { 100 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 101 (b2 & 0xc0) != 0x80; 102 } 103 104 // [F0] [90..BF] [80..BF] [80..BF] 105 // [F1..F3] [80..BF] [80..BF] [80..BF] 106 // [F4] [80..8F] [80..BF] [80..BF] 107 // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] 108 // will be checked by Character.isSupplementaryCodePoint(uc) 109 private static boolean isMalformed4(int b2, int b3, int b4) { 110 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || 111 (b4 & 0xc0) != 0x80; 112 } 113 114 // only used when there is less than 4 bytes left in src buffer. 115 // both b1 and b2 should be "& 0xff" before passed in. 116 private static boolean isMalformed4_2(int b1, int b2) { 117 return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || 118 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || 119 (b2 & 0xc0) != 0x80; 120 } 121 122 // tests if b1 and b2 are malformed as the first 2 bytes of a 123 // legal`4-byte utf-8 byte sequence. 124 // only used when there is less than 4 bytes left in src buffer, 125 // after isMalformed4_2 has been invoked. 126 private static boolean isMalformed4_3(int b3) { 127 return (b3 & 0xc0) != 0x80; 128 } 129 130 private static CoderResult lookupN(ByteBuffer src, int n) 131 { 132 for (int i = 1; i < n; i++) { 133 if (isNotContinuation(src.get())) 134 return CoderResult.malformedForLength(i); 135 } 136 return CoderResult.malformedForLength(n); 137 } 138 139 private static CoderResult malformedN(ByteBuffer src, int nb) { 140 switch (nb) { 141 case 1: 142 case 2: // always 1 143 return CoderResult.malformedForLength(1); 144 case 3: 145 int b1 = src.get(); 146 int b2 = src.get(); // no need to lookup b3 147 return CoderResult.malformedForLength( 148 ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 149 isNotContinuation(b2)) ? 1 : 2); 150 case 4: // we don't care the speed here 151 b1 = src.get() & 0xff; 152 b2 = src.get() & 0xff; 153 if (b1 > 0xf4 || 154 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || 155 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || 156 isNotContinuation(b2)) 157 return CoderResult.malformedForLength(1); 158 if (isNotContinuation(src.get())) 159 return CoderResult.malformedForLength(2); 160 return CoderResult.malformedForLength(3); 161 default: 162 assert false; 163 return null; 164 } 165 } 166 167 private static CoderResult malformed(ByteBuffer src, int sp, 168 CharBuffer dst, int dp, 169 int nb) 170 { 171 src.position(sp - src.arrayOffset()); 172 CoderResult cr = malformedN(src, nb); 173 updatePositions(src, sp, dst, dp); 174 return cr; 175 } 176 177 178 private static CoderResult malformed(ByteBuffer src, 179 int mark, int nb) 180 { 181 src.position(mark); 182 CoderResult cr = malformedN(src, nb); 183 src.position(mark); 184 return cr; 185 } 186 187 private static CoderResult malformedForLength(ByteBuffer src, 188 int sp, 189 CharBuffer dst, 190 int dp, 191 int malformedNB) 192 { 193 updatePositions(src, sp, dst, dp); 194 return CoderResult.malformedForLength(malformedNB); 195 } 196 197 private static CoderResult malformedForLength(ByteBuffer src, 198 int mark, 199 int malformedNB) 200 { 201 src.position(mark); 202 return CoderResult.malformedForLength(malformedNB); 203 } 204 205 206 private static CoderResult xflow(Buffer src, int sp, int sl, 207 Buffer dst, int dp, int nb) { 208 updatePositions(src, sp, dst, dp); 209 return (nb == 0 || sl - sp < nb) 210 ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; 211 } 212 213 private static CoderResult xflow(Buffer src, int mark, int nb) { 214 src.position(mark); 215 return (nb == 0 || src.remaining() < nb) 216 ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; 217 } 218 219 private CoderResult decodeArrayLoop(ByteBuffer src, 220 CharBuffer dst) 221 { 222 // This method is optimized for ASCII input. 223 byte[] sa = src.array(); 224 int sp = src.arrayOffset() + src.position(); 225 int sl = src.arrayOffset() + src.limit(); 226 227 char[] da = dst.array(); 228 int dp = dst.arrayOffset() + dst.position(); 229 int dl = dst.arrayOffset() + dst.limit(); 230 int dlASCII = dp + Math.min(sl - sp, dl - dp); 231 232 // ASCII only loop 233 while (dp < dlASCII && sa[sp] >= 0) 234 da[dp++] = (char) sa[sp++]; 235 while (sp < sl) { 236 int b1 = sa[sp]; 237 if (b1 >= 0) { 238 // 1 byte, 7 bits: 0xxxxxxx 239 if (dp >= dl) 240 return xflow(src, sp, sl, dst, dp, 1); 241 da[dp++] = (char) b1; 242 sp++; 243 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 244 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 245 // [C2..DF] [80..BF] 246 if (sl - sp < 2 || dp >= dl) 247 return xflow(src, sp, sl, dst, dp, 2); 248 int b2 = sa[sp + 1]; 249 // Now we check the first byte of 2-byte sequence as 250 // if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) 251 // no longer need to check b1 against c1 & c0 for 252 // malformed as we did in previous version 253 // (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80; 254 // only need to check the second byte b2. 255 if (isNotContinuation(b2)) 256 return malformedForLength(src, sp, dst, dp, 1); 257 da[dp++] = (char) (((b1 << 6) ^ b2) 258 ^ 259 (((byte) 0xC0 << 6) ^ 260 ((byte) 0x80 << 0))); 261 sp += 2; 262 } else if ((b1 >> 4) == -2) { 263 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 264 int srcRemaining = sl - sp; 265 if (srcRemaining < 3 || dp >= dl) { 266 if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1])) 267 return malformedForLength(src, sp, dst, dp, 1); 268 return xflow(src, sp, sl, dst, dp, 3); 269 } 270 int b2 = sa[sp + 1]; 271 int b3 = sa[sp + 2]; 272 if (isMalformed3(b1, b2, b3)) 273 return malformed(src, sp, dst, dp, 3); 274 char c = (char) 275 ((b1 << 12) ^ 276 (b2 << 6) ^ 277 (b3 ^ 278 (((byte) 0xE0 << 12) ^ 279 ((byte) 0x80 << 6) ^ 280 ((byte) 0x80 << 0)))); 281 if (Character.isSurrogate(c)) 282 return malformedForLength(src, sp, dst, dp, 3); 283 da[dp++] = c; 284 sp += 3; 285 } else if ((b1 >> 3) == -2) { 286 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 287 int srcRemaining = sl - sp; 288 if (srcRemaining < 4 || dl - dp < 2) { 289 b1 &= 0xff; 290 if (b1 > 0xf4 || 291 srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1] & 0xff)) 292 return malformedForLength(src, sp, dst, dp, 1); 293 if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2])) 294 return malformedForLength(src, sp, dst, dp, 2); 295 return xflow(src, sp, sl, dst, dp, 4); 296 } 297 int b2 = sa[sp + 1]; 298 int b3 = sa[sp + 2]; 299 int b4 = sa[sp + 3]; 300 int uc = ((b1 << 18) ^ 301 (b2 << 12) ^ 302 (b3 << 6) ^ 303 (b4 ^ 304 (((byte) 0xF0 << 18) ^ 305 ((byte) 0x80 << 12) ^ 306 ((byte) 0x80 << 6) ^ 307 ((byte) 0x80 << 0)))); 308 if (isMalformed4(b2, b3, b4) || 309 // shortest form check 310 !Character.isSupplementaryCodePoint(uc)) { 311 return malformed(src, sp, dst, dp, 4); 312 } 313 da[dp++] = Character.highSurrogate(uc); 314 da[dp++] = Character.lowSurrogate(uc); 315 sp += 4; 316 } else 317 return malformed(src, sp, dst, dp, 1); 318 } 319 return xflow(src, sp, sl, dst, dp, 0); 320 } 321 322 private CoderResult decodeBufferLoop(ByteBuffer src, 323 CharBuffer dst) 324 { 325 int mark = src.position(); 326 int limit = src.limit(); 327 while (mark < limit) { 328 int b1 = src.get(); 329 if (b1 >= 0) { 330 // 1 byte, 7 bits: 0xxxxxxx 331 if (dst.remaining() < 1) 332 return xflow(src, mark, 1); // overflow 333 dst.put((char) b1); 334 mark++; 335 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 336 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 337 if (limit - mark < 2|| dst.remaining() < 1) 338 return xflow(src, mark, 2); 339 int b2 = src.get(); 340 if (isNotContinuation(b2)) 341 return malformedForLength(src, mark, 1); 342 dst.put((char) (((b1 << 6) ^ b2) 343 ^ 344 (((byte) 0xC0 << 6) ^ 345 ((byte) 0x80 << 0)))); 346 mark += 2; 347 } else if ((b1 >> 4) == -2) { 348 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 349 int srcRemaining = limit - mark; 350 if (srcRemaining < 3 || dst.remaining() < 1) { 351 if (srcRemaining > 1 && isMalformed3_2(b1, src.get())) 352 return malformedForLength(src, mark, 1); 353 return xflow(src, mark, 3); 354 } 355 int b2 = src.get(); 356 int b3 = src.get(); 357 if (isMalformed3(b1, b2, b3)) 358 return malformed(src, mark, 3); 359 char c = (char) 360 ((b1 << 12) ^ 361 (b2 << 6) ^ 362 (b3 ^ 363 (((byte) 0xE0 << 12) ^ 364 ((byte) 0x80 << 6) ^ 365 ((byte) 0x80 << 0)))); 366 if (Character.isSurrogate(c)) 367 return malformedForLength(src, mark, 3); 368 dst.put(c); 369 mark += 3; 370 } else if ((b1 >> 3) == -2) { 371 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 372 int srcRemaining = limit - mark; 373 if (srcRemaining < 4 || dst.remaining() < 2) { 374 b1 &= 0xff; 375 if (b1 > 0xf4 || 376 srcRemaining > 1 && isMalformed4_2(b1, src.get() & 0xff)) 377 return malformedForLength(src, mark, 1); 378 if (srcRemaining > 2 && isMalformed4_3(src.get())) 379 return malformedForLength(src, mark, 2); 380 return xflow(src, mark, 4); 381 } 382 int b2 = src.get(); 383 int b3 = src.get(); 384 int b4 = src.get(); 385 int uc = ((b1 << 18) ^ 386 (b2 << 12) ^ 387 (b3 << 6) ^ 388 (b4 ^ 389 (((byte) 0xF0 << 18) ^ 390 ((byte) 0x80 << 12) ^ 391 ((byte) 0x80 << 6) ^ 392 ((byte) 0x80 << 0)))); 393 if (isMalformed4(b2, b3, b4) || 394 // shortest form check 395 !Character.isSupplementaryCodePoint(uc)) { 396 return malformed(src, mark, 4); 397 } 398 dst.put(Character.highSurrogate(uc)); 399 dst.put(Character.lowSurrogate(uc)); 400 mark += 4; 401 } else { 402 return malformed(src, mark, 1); 403 } 404 } 405 return xflow(src, mark, 0); 406 } 407 408 protected CoderResult decodeLoop(ByteBuffer src, 409 CharBuffer dst) 410 { 411 if (src.hasArray() && dst.hasArray()) 412 return decodeArrayLoop(src, dst); 413 else 414 return decodeBufferLoop(src, dst); 415 } 416 417 private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp) 418 { 419 if (bb == null) 420 bb = ByteBuffer.wrap(ba); 421 bb.position(sp); 422 return bb; 423 } 424 425 // returns -1 if there is/are malformed byte(s) and the 426 // "action" for malformed input is not REPLACE. 427 public int decode(byte[] sa, int sp, int len, char[] da) { 428 final int sl = sp + len; 429 int dp = 0; 430 int dlASCII = Math.min(len, da.length); 431 ByteBuffer bb = null; // only necessary if malformed 432 433 // ASCII only optimized loop 434 while (dp < dlASCII && sa[sp] >= 0) 435 da[dp++] = (char) sa[sp++]; 436 437 while (sp < sl) { 438 int b1 = sa[sp++]; 439 if (b1 >= 0) { 440 // 1 byte, 7 bits: 0xxxxxxx 441 da[dp++] = (char) b1; 442 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 443 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 444 if (sp < sl) { 445 int b2 = sa[sp++]; 446 if (isNotContinuation(b2)) { 447 if (malformedInputAction() != CodingErrorAction.REPLACE) 448 return -1; 449 da[dp++] = replacement().charAt(0); 450 sp--; // malformedN(bb, 2) always returns 1 451 } else { 452 da[dp++] = (char) (((b1 << 6) ^ b2)^ 453 (((byte) 0xC0 << 6) ^ 454 ((byte) 0x80 << 0))); 455 } 456 continue; 457 } 458 if (malformedInputAction() != CodingErrorAction.REPLACE) 459 return -1; 460 da[dp++] = replacement().charAt(0); 461 return dp; 462 } else if ((b1 >> 4) == -2) { 463 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 464 if (sp + 1 < sl) { 465 int b2 = sa[sp++]; 466 int b3 = sa[sp++]; 467 if (isMalformed3(b1, b2, b3)) { 468 if (malformedInputAction() != CodingErrorAction.REPLACE) 469 return -1; 470 da[dp++] = replacement().charAt(0); 471 sp -= 3; 472 bb = getByteBuffer(bb, sa, sp); 473 sp += malformedN(bb, 3).length(); 474 } else { 475 char c = (char)((b1 << 12) ^ 476 (b2 << 6) ^ 477 (b3 ^ 478 (((byte) 0xE0 << 12) ^ 479 ((byte) 0x80 << 6) ^ 480 ((byte) 0x80 << 0)))); 481 if (Character.isSurrogate(c)) { 482 if (malformedInputAction() != CodingErrorAction.REPLACE) 483 return -1; 484 da[dp++] = replacement().charAt(0); 485 } else { 486 da[dp++] = c; 487 } 488 } 489 continue; 490 } 491 if (malformedInputAction() != CodingErrorAction.REPLACE) 492 return -1; 493 if (sp < sl && isMalformed3_2(b1, sa[sp])) { 494 da[dp++] = replacement().charAt(0); 495 continue; 496 497 } 498 da[dp++] = replacement().charAt(0); 499 return dp; 500 } else if ((b1 >> 3) == -2) { 501 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 502 if (sp + 2 < sl) { 503 int b2 = sa[sp++]; 504 int b3 = sa[sp++]; 505 int b4 = sa[sp++]; 506 int uc = ((b1 << 18) ^ 507 (b2 << 12) ^ 508 (b3 << 6) ^ 509 (b4 ^ 510 (((byte) 0xF0 << 18) ^ 511 ((byte) 0x80 << 12) ^ 512 ((byte) 0x80 << 6) ^ 513 ((byte) 0x80 << 0)))); 514 if (isMalformed4(b2, b3, b4) || 515 // shortest form check 516 !Character.isSupplementaryCodePoint(uc)) { 517 if (malformedInputAction() != CodingErrorAction.REPLACE) 518 return -1; 519 da[dp++] = replacement().charAt(0); 520 sp -= 4; 521 bb = getByteBuffer(bb, sa, sp); 522 sp += malformedN(bb, 4).length(); 523 } else { 524 da[dp++] = Character.highSurrogate(uc); 525 da[dp++] = Character.lowSurrogate(uc); 526 } 527 continue; 528 } 529 if (malformedInputAction() != CodingErrorAction.REPLACE) 530 return -1; 531 b1 &= 0xff; 532 if (b1 > 0xf4 || 533 sp < sl && isMalformed4_2(b1, sa[sp] & 0xff)) { 534 da[dp++] = replacement().charAt(0); 535 continue; 536 } 537 sp++; 538 if (sp < sl && isMalformed4_3(sa[sp])) { 539 da[dp++] = replacement().charAt(0); 540 continue; 541 } 542 da[dp++] = replacement().charAt(0); 543 return dp; 544 } else { 545 if (malformedInputAction() != CodingErrorAction.REPLACE) 546 return -1; 547 da[dp++] = replacement().charAt(0); 548 } 549 } 550 return dp; 551 } 552 553 public boolean isASCIICompatible() { 554 return true; 555 } 556 } 557 558 private static final class Encoder extends CharsetEncoder 559 implements ArrayEncoder { 560 561 private Encoder(Charset cs) { 562 super(cs, 1.1f, 3.0f); 563 } 564 565 public boolean canEncode(char c) { 566 return !Character.isSurrogate(c); 567 } 568 569 public boolean isLegalReplacement(byte[] repl) { 570 return ((repl.length == 1 && repl[0] >= 0) || 571 super.isLegalReplacement(repl)); 572 } 573 574 private static CoderResult overflow(CharBuffer src, int sp, 575 ByteBuffer dst, int dp) { 576 updatePositions(src, sp, dst, dp); 577 return CoderResult.OVERFLOW; 578 } 579 580 private static CoderResult overflow(CharBuffer src, int mark) { 581 src.position(mark); 582 return CoderResult.OVERFLOW; 583 } 584 585 private Surrogate.Parser sgp; 586 private CoderResult encodeArrayLoop(CharBuffer src, 587 ByteBuffer dst) 588 { 589 char[] sa = src.array(); 590 int sp = src.arrayOffset() + src.position(); 591 int sl = src.arrayOffset() + src.limit(); 592 593 byte[] da = dst.array(); 594 int dp = dst.arrayOffset() + dst.position(); 595 int dl = dst.arrayOffset() + dst.limit(); 596 int dlASCII = dp + Math.min(sl - sp, dl - dp); 597 598 // ASCII only loop 599 while (dp < dlASCII && sa[sp] < '\u0080') 600 da[dp++] = (byte) sa[sp++]; 601 while (sp < sl) { 602 char c = sa[sp]; 603 if (c < 0x80) { 604 // Have at most seven bits 605 if (dp >= dl) 606 return overflow(src, sp, dst, dp); 607 da[dp++] = (byte)c; 608 } else if (c < 0x800) { 609 // 2 bytes, 11 bits 610 if (dl - dp < 2) 611 return overflow(src, sp, dst, dp); 612 da[dp++] = (byte)(0xc0 | (c >> 6)); 613 da[dp++] = (byte)(0x80 | (c & 0x3f)); 614 } else if (Character.isSurrogate(c)) { 615 // Have a surrogate pair 616 if (sgp == null) 617 sgp = new Surrogate.Parser(); 618 int uc = sgp.parse(c, sa, sp, sl); 619 if (uc < 0) { 620 updatePositions(src, sp, dst, dp); 621 return sgp.error(); 622 } 623 if (dl - dp < 4) 624 return overflow(src, sp, dst, dp); 625 da[dp++] = (byte)(0xf0 | ((uc >> 18))); 626 da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); 627 da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); 628 da[dp++] = (byte)(0x80 | (uc & 0x3f)); 629 sp++; // 2 chars 630 } else { 631 // 3 bytes, 16 bits 632 if (dl - dp < 3) 633 return overflow(src, sp, dst, dp); 634 da[dp++] = (byte)(0xe0 | ((c >> 12))); 635 da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); 636 da[dp++] = (byte)(0x80 | (c & 0x3f)); 637 } 638 sp++; 639 } 640 updatePositions(src, sp, dst, dp); 641 return CoderResult.UNDERFLOW; 642 } 643 644 private CoderResult encodeBufferLoop(CharBuffer src, 645 ByteBuffer dst) 646 { 647 int mark = src.position(); 648 while (src.hasRemaining()) { 649 char c = src.get(); 650 if (c < 0x80) { 651 // Have at most seven bits 652 if (!dst.hasRemaining()) 653 return overflow(src, mark); 654 dst.put((byte)c); 655 } else if (c < 0x800) { 656 // 2 bytes, 11 bits 657 if (dst.remaining() < 2) 658 return overflow(src, mark); 659 dst.put((byte)(0xc0 | (c >> 6))); 660 dst.put((byte)(0x80 | (c & 0x3f))); 661 } else if (Character.isSurrogate(c)) { 662 // Have a surrogate pair 663 if (sgp == null) 664 sgp = new Surrogate.Parser(); 665 int uc = sgp.parse(c, src); 666 if (uc < 0) { 667 src.position(mark); 668 return sgp.error(); 669 } 670 if (dst.remaining() < 4) 671 return overflow(src, mark); 672 dst.put((byte)(0xf0 | ((uc >> 18)))); 673 dst.put((byte)(0x80 | ((uc >> 12) & 0x3f))); 674 dst.put((byte)(0x80 | ((uc >> 6) & 0x3f))); 675 dst.put((byte)(0x80 | (uc & 0x3f))); 676 mark++; // 2 chars 677 } else { 678 // 3 bytes, 16 bits 679 if (dst.remaining() < 3) 680 return overflow(src, mark); 681 dst.put((byte)(0xe0 | ((c >> 12)))); 682 dst.put((byte)(0x80 | ((c >> 6) & 0x3f))); 683 dst.put((byte)(0x80 | (c & 0x3f))); 684 } 685 mark++; 686 } 687 src.position(mark); 688 return CoderResult.UNDERFLOW; 689 } 690 691 protected final CoderResult encodeLoop(CharBuffer src, 692 ByteBuffer dst) 693 { 694 if (src.hasArray() && dst.hasArray()) 695 return encodeArrayLoop(src, dst); 696 else 697 return encodeBufferLoop(src, dst); 698 } 699 700 private byte repl = (byte)'?'; 701 protected void implReplaceWith(byte[] newReplacement) { 702 repl = newReplacement[0]; 703 } 704 705 // returns -1 if there is malformed char(s) and the 706 // "action" for malformed input is not REPLACE. 707 public int encode(char[] sa, int sp, int len, byte[] da) { 708 int sl = sp + len; 709 int dp = 0; 710 int dlASCII = dp + Math.min(len, da.length); 711 712 // ASCII only optimized loop 713 while (dp < dlASCII && sa[sp] < '\u0080') 714 da[dp++] = (byte) sa[sp++]; 715 716 while (sp < sl) { 717 char c = sa[sp++]; 718 if (c < 0x80) { 719 // Have at most seven bits 720 da[dp++] = (byte)c; 721 } else if (c < 0x800) { 722 // 2 bytes, 11 bits 723 da[dp++] = (byte)(0xc0 | (c >> 6)); 724 da[dp++] = (byte)(0x80 | (c & 0x3f)); 725 } else if (Character.isSurrogate(c)) { 726 if (sgp == null) 727 sgp = new Surrogate.Parser(); 728 int uc = sgp.parse(c, sa, sp - 1, sl); 729 if (uc < 0) { 730 if (malformedInputAction() != CodingErrorAction.REPLACE) 731 return -1; 732 da[dp++] = repl; 733 } else { 734 da[dp++] = (byte)(0xf0 | ((uc >> 18))); 735 da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); 736 da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); 737 da[dp++] = (byte)(0x80 | (uc & 0x3f)); 738 sp++; // 2 chars 739 } 740 } else { 741 // 3 bytes, 16 bits 742 da[dp++] = (byte)(0xe0 | ((c >> 12))); 743 da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); 744 da[dp++] = (byte)(0x80 | (c & 0x3f)); 745 } 746 } 747 return dp; 748 } 749 750 public boolean isASCIICompatible() { 751 return true; 752 } 753 } 754 }