1 /* 2 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.Buffer; 29 import java.nio.ByteBuffer; 30 import java.nio.CharBuffer; 31 import java.nio.charset.Charset; 32 import java.nio.charset.CharsetDecoder; 33 import java.nio.charset.CharsetEncoder; 34 import java.nio.charset.CoderResult; 35 import java.nio.charset.CodingErrorAction; 36 37 /* Legal UTF-8 Byte Sequences 38 * 39 * # Code Points Bits Bit/Byte pattern 40 * 1 7 0xxxxxxx 41 * U+0000..U+007F 00..7F 42 * 43 * 2 11 110xxxxx 10xxxxxx 44 * U+0080..U+07FF C2..DF 80..BF 45 * 46 * 3 16 1110xxxx 10xxxxxx 10xxxxxx 47 * U+0800..U+0FFF E0 A0..BF 80..BF 48 * U+1000..U+FFFF E1..EF 80..BF 80..BF 49 * 50 * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 51 * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF 52 * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF 53 * U+100000..U10FFFF F4 80..8F 80..BF 80..BF 54 * 55 */ 56 57 class UTF_8 extends Unicode 58 { 59 public UTF_8() { 60 super("UTF-8", StandardCharsets.aliases_UTF_8); 61 } 62 63 public String historicalName() { 64 return "UTF8"; 65 } 66 67 public CharsetDecoder newDecoder() { 68 return new Decoder(this); 69 } 70 71 public CharsetEncoder newEncoder() { 72 return new Encoder(this); 73 } 74 75 static final void updatePositions(Buffer src, int sp, 76 Buffer dst, int dp) { 77 src.position(sp - src.arrayOffset()); 78 dst.position(dp - dst.arrayOffset()); 79 } 80 81 private static class Decoder extends CharsetDecoder 82 implements ArrayDecoder { 83 private Decoder(Charset cs) { 84 super(cs, 1.0f, 1.0f); 85 } 86 87 private static boolean isNotContinuation(int b) { 88 return (b & 0xc0) != 0x80; 89 } 90 91 // [C2..DF] [80..BF] 92 private static boolean isMalformed2(int b1, int b2) { 93 return (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80; 94 } 95 96 // [E0] [A0..BF] [80..BF] 97 // [E1..EF] [80..BF] [80..BF] 98 private static boolean isMalformed3(int b1, int b2, int b3) { 99 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 100 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; 101 } 102 103 // [F0] [90..BF] [80..BF] [80..BF] 104 // [F1..F3] [80..BF] [80..BF] [80..BF] 105 // [F4] [80..8F] [80..BF] [80..BF] 106 // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] 107 // will be checked by Character.isSupplementaryCodePoint(uc) 108 private static boolean isMalformed4(int b2, int b3, int b4) { 109 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || 110 (b4 & 0xc0) != 0x80; 111 } 112 113 private static CoderResult lookupN(ByteBuffer src, int n) 114 { 115 for (int i = 1; i < n; i++) { 116 if (isNotContinuation(src.get())) 117 return CoderResult.malformedForLength(i); 118 } 119 return CoderResult.malformedForLength(n); 120 } 121 122 private static CoderResult malformedN(ByteBuffer src, int nb) { 123 switch (nb) { 124 case 1: 125 int b1 = src.get(); 126 if ((b1 >> 2) == -2) { 127 // 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 128 if (src.remaining() < 4) 129 return CoderResult.UNDERFLOW; 130 return lookupN(src, 5); 131 } 132 if ((b1 >> 1) == -2) { 133 // 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 134 if (src.remaining() < 5) 135 return CoderResult.UNDERFLOW; 136 return lookupN(src, 6); 137 } 138 return CoderResult.malformedForLength(1); 139 case 2: // always 1 140 return CoderResult.malformedForLength(1); 141 case 3: 142 b1 = src.get(); 143 int b2 = src.get(); // no need to lookup b3 144 return CoderResult.malformedForLength( 145 ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 146 isNotContinuation(b2))?1:2); 147 case 4: // we don't care the speed here 148 b1 = src.get() & 0xff; 149 b2 = src.get() & 0xff; 150 if (b1 > 0xf4 || 151 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || 152 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || 153 isNotContinuation(b2)) 154 return CoderResult.malformedForLength(1); 155 if (isNotContinuation(src.get())) 156 return CoderResult.malformedForLength(2); 157 return CoderResult.malformedForLength(3); 158 default: 159 assert false; 160 return null; 161 } 162 } 163 164 private static CoderResult malformed(ByteBuffer src, int sp, 165 CharBuffer dst, int dp, 166 int nb) 167 { 168 src.position(sp - src.arrayOffset()); 169 CoderResult cr = malformedN(src, nb); 170 updatePositions(src, sp, dst, dp); 171 return cr; 172 } 173 174 private static CoderResult malformed(ByteBuffer src, 175 int mark, int nb) 176 { 177 src.position(mark); 178 CoderResult cr = malformedN(src, nb); 179 src.position(mark); 180 return cr; 181 } 182 183 private static CoderResult xflow(Buffer src, int sp, int sl, 184 Buffer dst, int dp, int nb) { 185 updatePositions(src, sp, dst, dp); 186 return (nb == 0 || sl - sp < nb) 187 ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW; 188 } 189 190 private static CoderResult xflow(Buffer src, int mark, int nb) { 191 CoderResult cr = (nb == 0 || src.remaining() < (nb - 1)) 192 ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW; 193 src.position(mark); 194 return cr; 195 } 196 197 private CoderResult decodeArrayLoop(ByteBuffer src, 198 CharBuffer dst) 199 { 200 // This method is optimized for ASCII input. 201 byte[] sa = src.array(); 202 int sp = src.arrayOffset() + src.position(); 203 int sl = src.arrayOffset() + src.limit(); 204 205 char[] da = dst.array(); 206 int dp = dst.arrayOffset() + dst.position(); 207 int dl = dst.arrayOffset() + dst.limit(); 208 int dlASCII = dp + Math.min(sl - sp, dl - dp); 209 210 // ASCII only loop 211 while (dp < dlASCII && sa[sp] >= 0) 212 da[dp++] = (char) sa[sp++]; 213 214 while (sp < sl) { 215 int b1 = sa[sp]; 216 if (b1 >= 0) { 217 // 1 byte, 7 bits: 0xxxxxxx 218 if (dp >= dl) 219 return xflow(src, sp, sl, dst, dp, 1); 220 da[dp++] = (char) b1; 221 sp++; 222 } else if ((b1 >> 5) == -2) { 223 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 224 if (sl - sp < 2 || dp >= dl) 225 return xflow(src, sp, sl, dst, dp, 2); 226 int b2 = sa[sp + 1]; 227 if (isMalformed2(b1, b2)) 228 return malformed(src, sp, dst, dp, 2); 229 da[dp++] = (char) (((b1 << 6) ^ b2) 230 ^ 231 (((byte) 0xC0 << 6) ^ 232 ((byte) 0x80 << 0))); 233 sp += 2; 234 } else if ((b1 >> 4) == -2) { 235 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 236 if (sl - sp < 3 || dp >= dl) 237 return xflow(src, sp, sl, dst, dp, 3); 238 int b2 = sa[sp + 1]; 239 int b3 = sa[sp + 2]; 240 if (isMalformed3(b1, b2, b3)) 241 return malformed(src, sp, dst, dp, 3); 242 da[dp++] = (char) 243 ((b1 << 12) ^ 244 (b2 << 6) ^ 245 (b3 ^ 246 (((byte) 0xE0 << 12) ^ 247 ((byte) 0x80 << 6) ^ 248 ((byte) 0x80 << 0)))); 249 sp += 3; 250 } else if ((b1 >> 3) == -2) { 251 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 252 if (sl - sp < 4 || dl - dp < 2) 253 return xflow(src, sp, sl, dst, dp, 4); 254 int b2 = sa[sp + 1]; 255 int b3 = sa[sp + 2]; 256 int b4 = sa[sp + 3]; 257 int uc = ((b1 << 18) ^ 258 (b2 << 12) ^ 259 (b3 << 6) ^ 260 (b4 ^ 261 (((byte) 0xF0 << 18) ^ 262 ((byte) 0x80 << 12) ^ 263 ((byte) 0x80 << 6) ^ 264 ((byte) 0x80 << 0)))); 265 if (isMalformed4(b2, b3, b4) || 266 // shortest form check 267 !Character.isSupplementaryCodePoint(uc)) { 268 return malformed(src, sp, dst, dp, 4); 269 } 270 da[dp++] = Character.highSurrogate(uc); 271 da[dp++] = Character.lowSurrogate(uc); 272 sp += 4; 273 } else 274 return malformed(src, sp, dst, dp, 1); 275 } 276 return xflow(src, sp, sl, dst, dp, 0); 277 } 278 279 private CoderResult decodeBufferLoop(ByteBuffer src, 280 CharBuffer dst) 281 { 282 int mark = src.position(); 283 int limit = src.limit(); 284 while (mark < limit) { 285 int b1 = src.get(); 286 if (b1 >= 0) { 287 // 1 byte, 7 bits: 0xxxxxxx 288 if (dst.remaining() < 1) 289 return xflow(src, mark, 1); // overflow 290 dst.put((char) b1); 291 mark++; 292 } else if ((b1 >> 5) == -2) { 293 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 294 if (limit - mark < 2|| dst.remaining() < 1) 295 return xflow(src, mark, 2); 296 int b2 = src.get(); 297 if (isMalformed2(b1, b2)) 298 return malformed(src, mark, 2); 299 dst.put((char) (((b1 << 6) ^ b2) 300 ^ 301 (((byte) 0xC0 << 6) ^ 302 ((byte) 0x80 << 0)))); 303 mark += 2; 304 } else if ((b1 >> 4) == -2) { 305 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 306 if (limit - mark < 3 || dst.remaining() < 1) 307 return xflow(src, mark, 3); 308 int b2 = src.get(); 309 int b3 = src.get(); 310 if (isMalformed3(b1, b2, b3)) 311 return malformed(src, mark, 3); 312 dst.put((char) 313 ((b1 << 12) ^ 314 (b2 << 6) ^ 315 (b3 ^ 316 (((byte) 0xE0 << 12) ^ 317 ((byte) 0x80 << 6) ^ 318 ((byte) 0x80 << 0))))); 319 mark += 3; 320 } else if ((b1 >> 3) == -2) { 321 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 322 if (limit - mark < 4 || dst.remaining() < 2) 323 return xflow(src, mark, 4); 324 int b2 = src.get(); 325 int b3 = src.get(); 326 int b4 = src.get(); 327 int uc = ((b1 << 18) ^ 328 (b2 << 12) ^ 329 (b3 << 6) ^ 330 (b4 ^ 331 (((byte) 0xF0 << 18) ^ 332 ((byte) 0x80 << 12) ^ 333 ((byte) 0x80 << 6) ^ 334 ((byte) 0x80 << 0)))); 335 if (isMalformed4(b2, b3, b4) || 336 // shortest form check 337 !Character.isSupplementaryCodePoint(uc)) { 338 return malformed(src, mark, 4); 339 } 340 dst.put(Character.highSurrogate(uc)); 341 dst.put(Character.lowSurrogate(uc)); 342 mark += 4; 343 } else { 344 return malformed(src, mark, 1); 345 } 346 } 347 return xflow(src, mark, 0); 348 } 349 350 protected CoderResult decodeLoop(ByteBuffer src, 351 CharBuffer dst) 352 { 353 if (src.hasArray() && dst.hasArray()) 354 return decodeArrayLoop(src, dst); 355 else 356 return decodeBufferLoop(src, dst); 357 } 358 359 private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp) 360 { 361 if (bb == null) 362 bb = ByteBuffer.wrap(ba); 363 bb.position(sp); 364 return bb; 365 } 366 367 // returns -1 if there is malformed byte(s) and the 368 // "action" for malformed input is not REPLACE. 369 public int decode(byte[] sa, int sp, int len, char[] da) { 370 final int sl = sp + len; 371 int dp = 0; 372 int dlASCII = Math.min(len, da.length); 373 ByteBuffer bb = null; // only necessary if malformed 374 375 // ASCII only optimized loop 376 while (dp < dlASCII && sa[sp] >= 0) 377 da[dp++] = (char) sa[sp++]; 378 379 while (sp < sl) { 380 int b1 = sa[sp++]; 381 if (b1 >= 0) { 382 // 1 byte, 7 bits: 0xxxxxxx 383 da[dp++] = (char) b1; 384 } else if ((b1 >> 5) == -2) { 385 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 386 if (sp < sl) { 387 int b2 = sa[sp++]; 388 if (isMalformed2(b1, b2)) { 389 if (malformedInputAction() != CodingErrorAction.REPLACE) 390 return -1; 391 da[dp++] = replacement().charAt(0); 392 sp--; // malformedN(bb, 2) always returns 1 393 } else { 394 da[dp++] = (char) (((b1 << 6) ^ b2)^ 395 (((byte) 0xC0 << 6) ^ 396 ((byte) 0x80 << 0))); 397 } 398 continue; 399 } 400 if (malformedInputAction() != CodingErrorAction.REPLACE) 401 return -1; 402 da[dp++] = replacement().charAt(0); 403 return dp; 404 } else if ((b1 >> 4) == -2) { 405 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 406 if (sp + 1 < sl) { 407 int b2 = sa[sp++]; 408 int b3 = sa[sp++]; 409 if (isMalformed3(b1, b2, b3)) { 410 if (malformedInputAction() != CodingErrorAction.REPLACE) 411 return -1; 412 da[dp++] = replacement().charAt(0); 413 sp -=3; 414 bb = getByteBuffer(bb, sa, sp); 415 sp += malformedN(bb, 3).length(); 416 } else { 417 da[dp++] = (char)((b1 << 12) ^ 418 (b2 << 6) ^ 419 (b3 ^ 420 (((byte) 0xE0 << 12) ^ 421 ((byte) 0x80 << 6) ^ 422 ((byte) 0x80 << 0)))); 423 } 424 continue; 425 } 426 if (malformedInputAction() != CodingErrorAction.REPLACE) 427 return -1; 428 da[dp++] = replacement().charAt(0); 429 return dp; 430 } else if ((b1 >> 3) == -2) { 431 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 432 if (sp + 2 < sl) { 433 int b2 = sa[sp++]; 434 int b3 = sa[sp++]; 435 int b4 = sa[sp++]; 436 int uc = ((b1 << 18) ^ 437 (b2 << 12) ^ 438 (b3 << 6) ^ 439 (b4 ^ 440 (((byte) 0xF0 << 18) ^ 441 ((byte) 0x80 << 12) ^ 442 ((byte) 0x80 << 6) ^ 443 ((byte) 0x80 << 0)))); 444 if (isMalformed4(b2, b3, b4) || 445 // shortest form check 446 !Character.isSupplementaryCodePoint(uc)) { 447 if (malformedInputAction() != CodingErrorAction.REPLACE) 448 return -1; 449 da[dp++] = replacement().charAt(0); 450 sp -= 4; 451 bb = getByteBuffer(bb, sa, sp); 452 sp += malformedN(bb, 4).length(); 453 } else { 454 da[dp++] = Character.highSurrogate(uc); 455 da[dp++] = Character.lowSurrogate(uc); 456 } 457 continue; 458 } 459 if (malformedInputAction() != CodingErrorAction.REPLACE) 460 return -1; 461 da[dp++] = replacement().charAt(0); 462 return dp; 463 } else { 464 if (malformedInputAction() != CodingErrorAction.REPLACE) 465 return -1; 466 da[dp++] = replacement().charAt(0); 467 sp--; 468 bb = getByteBuffer(bb, sa, sp); 469 CoderResult cr = malformedN(bb, 1); 470 if (!cr.isError()) { 471 // leading byte for 5 or 6-byte, but don't have enough 472 // bytes in buffer to check. Consumed rest as malformed. 473 return dp; 474 } 475 sp += cr.length(); 476 } 477 } 478 return dp; 479 } 480 } 481 482 private static class Encoder extends CharsetEncoder 483 implements ArrayEncoder { 484 485 private Encoder(Charset cs) { 486 super(cs, 1.1f, 3.0f); 487 } 488 489 public boolean canEncode(char c) { 490 return !Character.isSurrogate(c); 491 } 492 493 public boolean isLegalReplacement(byte[] repl) { 494 return ((repl.length == 1 && repl[0] >= 0) || 495 super.isLegalReplacement(repl)); 496 } 497 498 private static CoderResult overflow(CharBuffer src, int sp, 499 ByteBuffer dst, int dp) { 500 updatePositions(src, sp, dst, dp); 501 return CoderResult.OVERFLOW; 502 } 503 504 private static CoderResult overflow(CharBuffer src, int mark) { 505 src.position(mark); 506 return CoderResult.OVERFLOW; 507 } 508 509 private Surrogate.Parser sgp; 510 private CoderResult encodeArrayLoop(CharBuffer src, 511 ByteBuffer dst) 512 { 513 char[] sa = src.array(); 514 int sp = src.arrayOffset() + src.position(); 515 int sl = src.arrayOffset() + src.limit(); 516 517 byte[] da = dst.array(); 518 int dp = dst.arrayOffset() + dst.position(); 519 int dl = dst.arrayOffset() + dst.limit(); 520 int dlASCII = dp + Math.min(sl - sp, dl - dp); 521 522 // ASCII only loop 523 while (dp < dlASCII && sa[sp] < '\u0080') 524 da[dp++] = (byte) sa[sp++]; 525 while (sp < sl) { 526 char c = sa[sp]; 527 if (c < 0x80) { 528 // Have at most seven bits 529 if (dp >= dl) 530 return overflow(src, sp, dst, dp); 531 da[dp++] = (byte)c; 532 } else if (c < 0x800) { 533 // 2 bytes, 11 bits 534 if (dl - dp < 2) 535 return overflow(src, sp, dst, dp); 536 da[dp++] = (byte)(0xc0 | (c >> 6)); 537 da[dp++] = (byte)(0x80 | (c & 0x3f)); 538 } else if (Character.isSurrogate(c)) { 539 // Have a surrogate pair 540 if (sgp == null) 541 sgp = new Surrogate.Parser(); 542 int uc = sgp.parse(c, sa, sp, sl); 543 if (uc < 0) { 544 updatePositions(src, sp, dst, dp); 545 return sgp.error(); 546 } 547 if (dl - dp < 4) 548 return overflow(src, sp, dst, dp); 549 da[dp++] = (byte)(0xf0 | ((uc >> 18))); 550 da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); 551 da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); 552 da[dp++] = (byte)(0x80 | (uc & 0x3f)); 553 sp++; // 2 chars 554 } else { 555 // 3 bytes, 16 bits 556 if (dl - dp < 3) 557 return overflow(src, sp, dst, dp); 558 da[dp++] = (byte)(0xe0 | ((c >> 12))); 559 da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); 560 da[dp++] = (byte)(0x80 | (c & 0x3f)); 561 } 562 sp++; 563 } 564 updatePositions(src, sp, dst, dp); 565 return CoderResult.UNDERFLOW; 566 } 567 568 private CoderResult encodeBufferLoop(CharBuffer src, 569 ByteBuffer dst) 570 { 571 int mark = src.position(); 572 while (src.hasRemaining()) { 573 char c = src.get(); 574 if (c < 0x80) { 575 // Have at most seven bits 576 if (!dst.hasRemaining()) 577 return overflow(src, mark); 578 dst.put((byte)c); 579 } else if (c < 0x800) { 580 // 2 bytes, 11 bits 581 if (dst.remaining() < 2) 582 return overflow(src, mark); 583 dst.put((byte)(0xc0 | (c >> 6))); 584 dst.put((byte)(0x80 | (c & 0x3f))); 585 } else if (Character.isSurrogate(c)) { 586 // Have a surrogate pair 587 if (sgp == null) 588 sgp = new Surrogate.Parser(); 589 int uc = sgp.parse(c, src); 590 if (uc < 0) { 591 src.position(mark); 592 return sgp.error(); 593 } 594 if (dst.remaining() < 4) 595 return overflow(src, mark); 596 dst.put((byte)(0xf0 | ((uc >> 18)))); 597 dst.put((byte)(0x80 | ((uc >> 12) & 0x3f))); 598 dst.put((byte)(0x80 | ((uc >> 6) & 0x3f))); 599 dst.put((byte)(0x80 | (uc & 0x3f))); 600 mark++; // 2 chars 601 } else { 602 // 3 bytes, 16 bits 603 if (dst.remaining() < 3) 604 return overflow(src, mark); 605 dst.put((byte)(0xe0 | ((c >> 12)))); 606 dst.put((byte)(0x80 | ((c >> 6) & 0x3f))); 607 dst.put((byte)(0x80 | (c & 0x3f))); 608 } 609 mark++; 610 } 611 src.position(mark); 612 return CoderResult.UNDERFLOW; 613 } 614 615 protected final CoderResult encodeLoop(CharBuffer src, 616 ByteBuffer dst) 617 { 618 if (src.hasArray() && dst.hasArray()) 619 return encodeArrayLoop(src, dst); 620 else 621 return encodeBufferLoop(src, dst); 622 } 623 624 // returns -1 if there is malformed char(s) and the 625 // "action" for malformed input is not REPLACE. 626 public int encode(char[] sa, int sp, int len, byte[] da) { 627 int sl = sp + len; 628 int dp = 0; 629 int dlASCII = dp + Math.min(len, da.length); 630 631 // ASCII only optimized loop 632 while (dp < dlASCII && sa[sp] < '\u0080') 633 da[dp++] = (byte) sa[sp++]; 634 635 while (sp < sl) { 636 char c = sa[sp++]; 637 if (c < 0x80) { 638 // Have at most seven bits 639 da[dp++] = (byte)c; 640 } else if (c < 0x800) { 641 // 2 bytes, 11 bits 642 da[dp++] = (byte)(0xc0 | (c >> 6)); 643 da[dp++] = (byte)(0x80 | (c & 0x3f)); 644 } else if (Character.isSurrogate(c)) { 645 if (sgp == null) 646 sgp = new Surrogate.Parser(); 647 int uc = sgp.parse(c, sa, sp - 1, sl); 648 if (uc < 0) { 649 if (malformedInputAction() != CodingErrorAction.REPLACE) 650 return -1; 651 da[dp++] = replacement()[0]; 652 } else { 653 da[dp++] = (byte)(0xf0 | ((uc >> 18))); 654 da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); 655 da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); 656 da[dp++] = (byte)(0x80 | (uc & 0x3f)); 657 sp++; // 2 chars 658 } 659 } else { 660 // 3 bytes, 16 bits 661 da[dp++] = (byte)(0xe0 | ((c >> 12))); 662 da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); 663 da[dp++] = (byte)(0x80 | (c & 0x3f)); 664 } 665 } 666 return dp; 667 } 668 } 669 }