1 /* 2 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.Buffer; 29 import java.nio.ByteBuffer; 30 import java.nio.CharBuffer; 31 import java.nio.charset.Charset; 32 import java.nio.charset.CharsetDecoder; 33 import java.nio.charset.CharsetEncoder; 34 import java.nio.charset.CoderResult; 35 import java.nio.charset.CodingErrorAction; 36 37 /* Legal UTF-8 Byte Sequences 38 * 39 * # Code Points Bits Bit/Byte pattern 40 * 1 7 0xxxxxxx 41 * U+0000..U+007F 00..7F 42 * 43 * 2 11 110xxxxx 10xxxxxx 44 * U+0080..U+07FF C2..DF 80..BF 45 * 46 * 3 16 1110xxxx 10xxxxxx 10xxxxxx 47 * U+0800..U+0FFF E0 A0..BF 80..BF 48 * U+1000..U+FFFF E1..EF 80..BF 80..BF 49 * 50 * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 51 * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF 52 * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF 53 * U+100000..U10FFFF F4 80..8F 80..BF 80..BF 54 * 55 */ 56 57 class UTF_8 extends Unicode 58 { 59 public UTF_8() { 60 super("UTF-8", StandardCharsets.aliases_UTF_8); 61 } 62 63 public String historicalName() { 64 return "UTF8"; 65 } 66 67 public CharsetDecoder newDecoder() { 68 return new Decoder(this); 69 } 70 71 public CharsetEncoder newEncoder() { 72 return new Encoder(this); 73 } 74 75 private static final void updatePositions(Buffer src, int sp, 76 Buffer dst, int dp) { 77 src.position(sp - src.arrayOffset()); 78 dst.position(dp - dst.arrayOffset()); 79 } 80 81 private static class Decoder extends CharsetDecoder 82 implements ArrayDecoder { 83 private Decoder(Charset cs) { 84 super(cs, 1.0f, 1.0f); 85 } 86 87 private static boolean isNotContinuation(int b) { 88 return (b & 0xc0) != 0x80; 89 } 90 91 // [E0] [A0..BF] [80..BF] 92 // [E1..EF] [80..BF] [80..BF] 93 private static boolean isMalformed3(int b1, int b2, int b3) { 94 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 95 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; 96 } 97 98 // only used when there is only one byte left in src buffer 99 private static boolean isMalformed3_2(int b1, int b2) { 100 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 101 (b2 & 0xc0) != 0x80; 102 } 103 104 // [F0] [90..BF] [80..BF] [80..BF] 105 // [F1..F3] [80..BF] [80..BF] [80..BF] 106 // [F4] [80..8F] [80..BF] [80..BF] 107 // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] 108 // will be checked by Character.isSupplementaryCodePoint(uc) 109 private static boolean isMalformed4(int b2, int b3, int b4) { 110 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || 111 (b4 & 0xc0) != 0x80; 112 } 113 114 // only used when there is less than 4 bytes left in src buffer 115 private static boolean isMalformed4_2(int b1, int b2) { 116 return (b1 == 0xf0 && b2 == 0x90) || 117 (b2 & 0xc0) != 0x80; 118 } 119 120 private static boolean isMalformed4_3(int b3) { 121 return (b3 & 0xc0) != 0x80; 122 } 123 124 private static CoderResult lookupN(ByteBuffer src, int n) 125 { 126 for (int i = 1; i < n; i++) { 127 if (isNotContinuation(src.get())) 128 return CoderResult.malformedForLength(i); 129 } 130 return CoderResult.malformedForLength(n); 131 } 132 133 private static CoderResult malformedN(ByteBuffer src, int nb) { 134 switch (nb) { 135 case 1: 136 case 2: // always 1 137 return CoderResult.malformedForLength(1); 138 case 3: 139 int b1 = src.get(); 140 int b2 = src.get(); // no need to lookup b3 141 return CoderResult.malformedForLength( 142 ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || 143 isNotContinuation(b2)) ? 1 : 2); 144 case 4: // we don't care the speed here 145 b1 = src.get() & 0xff; 146 b2 = src.get() & 0xff; 147 if (b1 > 0xf4 || 148 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || 149 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || 150 isNotContinuation(b2)) 151 return CoderResult.malformedForLength(1); 152 if (isNotContinuation(src.get())) 153 return CoderResult.malformedForLength(2); 154 return CoderResult.malformedForLength(3); 155 default: 156 assert false; 157 return null; 158 } 159 } 160 161 private static CoderResult malformed(ByteBuffer src, int sp, 162 CharBuffer dst, int dp, 163 int nb) 164 { 165 src.position(sp - src.arrayOffset()); 166 CoderResult cr = malformedN(src, nb); 167 updatePositions(src, sp, dst, dp); 168 return cr; 169 } 170 171 172 private static CoderResult malformed(ByteBuffer src, 173 int mark, int nb) 174 { 175 src.position(mark); 176 CoderResult cr = malformedN(src, nb); 177 src.position(mark); 178 return cr; 179 } 180 181 private static CoderResult malformedForLength(ByteBuffer src, 182 int sp, 183 CharBuffer dst, 184 int dp, 185 int malformedNB) 186 { 187 updatePositions(src, sp, dst, dp); 188 return CoderResult.malformedForLength(malformedNB); 189 } 190 191 private static CoderResult malformedForLength(ByteBuffer src, 192 int mark, 193 int malformedNB) 194 { 195 src.position(mark); 196 return CoderResult.malformedForLength(malformedNB); 197 } 198 199 200 private static CoderResult xflow(Buffer src, int sp, int sl, 201 Buffer dst, int dp, int nb) { 202 updatePositions(src, sp, dst, dp); 203 return (nb == 0 || sl - sp < nb) 204 ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; 205 } 206 207 private static CoderResult xflow(Buffer src, int mark, int nb) { 208 src.position(mark); 209 return (nb == 0 || src.remaining() < nb) 210 ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; 211 } 212 213 private CoderResult decodeArrayLoop(ByteBuffer src, 214 CharBuffer dst) 215 { 216 // This method is optimized for ASCII input. 217 byte[] sa = src.array(); 218 int sp = src.arrayOffset() + src.position(); 219 int sl = src.arrayOffset() + src.limit(); 220 221 char[] da = dst.array(); 222 int dp = dst.arrayOffset() + dst.position(); 223 int dl = dst.arrayOffset() + dst.limit(); 224 int dlASCII = dp + Math.min(sl - sp, dl - dp); 225 226 // ASCII only loop 227 while (dp < dlASCII && sa[sp] >= 0) 228 da[dp++] = (char) sa[sp++]; 229 while (sp < sl) { 230 int b1 = sa[sp]; 231 if (b1 >= 0) { 232 // 1 byte, 7 bits: 0xxxxxxx 233 if (dp >= dl) 234 return xflow(src, sp, sl, dst, dp, 1); 235 da[dp++] = (char) b1; 236 sp++; 237 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 238 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 239 // [C2..DF] [80..BF] 240 if (sl - sp < 2 || dp >= dl) 241 return xflow(src, sp, sl, dst, dp, 2); 242 int b2 = sa[sp + 1]; 243 // Now we check the first byte of 2-byte sequence as 244 // if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) 245 // no longer need to check b1 against c1 & c0 for 246 // malformed as we did in previous version 247 // (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80; 248 // only need to check the second byte b2. 249 if (isNotContinuation(b2)) 250 return malformedForLength(src, sp, dst, dp, 1); 251 da[dp++] = (char) (((b1 << 6) ^ b2) 252 ^ 253 (((byte) 0xC0 << 6) ^ 254 ((byte) 0x80 << 0))); 255 sp += 2; 256 } else if ((b1 >> 4) == -2) { 257 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 258 int srcRemaining = sl - sp; 259 if (srcRemaining < 3 || dp >= dl) { 260 if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1])) 261 return malformedForLength(src, sp, dst, dp, 1); 262 return xflow(src, sp, sl, dst, dp, 3); 263 } 264 int b2 = sa[sp + 1]; 265 int b3 = sa[sp + 2]; 266 if (isMalformed3(b1, b2, b3)) 267 return malformed(src, sp, dst, dp, 3); 268 char c = (char) 269 ((b1 << 12) ^ 270 (b2 << 6) ^ 271 (b3 ^ 272 (((byte) 0xE0 << 12) ^ 273 ((byte) 0x80 << 6) ^ 274 ((byte) 0x80 << 0)))); 275 if (Character.isSurrogate(c)) 276 return malformedForLength(src, sp, dst, dp, 3); 277 da[dp++] = c; 278 sp += 3; 279 } else if ((b1 >> 3) == -2) { 280 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 281 int srcRemaining = sl - sp; 282 if (srcRemaining < 4 || dl - dp < 2) { 283 if (srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1])) 284 return malformedForLength(src, sp, dst, dp, 1); 285 if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2])) 286 return malformedForLength(src, sp, dst, dp, 2); 287 return xflow(src, sp, sl, dst, dp, 4); 288 } 289 int b2 = sa[sp + 1]; 290 int b3 = sa[sp + 2]; 291 int b4 = sa[sp + 3]; 292 int uc = ((b1 << 18) ^ 293 (b2 << 12) ^ 294 (b3 << 6) ^ 295 (b4 ^ 296 (((byte) 0xF0 << 18) ^ 297 ((byte) 0x80 << 12) ^ 298 ((byte) 0x80 << 6) ^ 299 ((byte) 0x80 << 0)))); 300 if (isMalformed4(b2, b3, b4) || 301 // shortest form check 302 !Character.isSupplementaryCodePoint(uc)) { 303 return malformed(src, sp, dst, dp, 4); 304 } 305 da[dp++] = Character.highSurrogate(uc); 306 da[dp++] = Character.lowSurrogate(uc); 307 sp += 4; 308 } else 309 return malformed(src, sp, dst, dp, 1); 310 } 311 return xflow(src, sp, sl, dst, dp, 0); 312 } 313 314 private CoderResult decodeBufferLoop(ByteBuffer src, 315 CharBuffer dst) 316 { 317 int mark = src.position(); 318 int limit = src.limit(); 319 while (mark < limit) { 320 int b1 = src.get(); 321 if (b1 >= 0) { 322 // 1 byte, 7 bits: 0xxxxxxx 323 if (dst.remaining() < 1) 324 return xflow(src, mark, 1); // overflow 325 dst.put((char) b1); 326 mark++; 327 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 328 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 329 if (limit - mark < 2|| dst.remaining() < 1) 330 return xflow(src, mark, 2); 331 int b2 = src.get(); 332 if (isNotContinuation(b2)) 333 return malformedForLength(src, mark, 1); 334 dst.put((char) (((b1 << 6) ^ b2) 335 ^ 336 (((byte) 0xC0 << 6) ^ 337 ((byte) 0x80 << 0)))); 338 mark += 2; 339 } else if ((b1 >> 4) == -2) { 340 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 341 int srcRemaining = limit - mark; 342 if (srcRemaining < 3 || dst.remaining() < 1) { 343 if (srcRemaining > 1 && isMalformed3_2(b1, src.get())) 344 return malformedForLength(src, mark, 1); 345 return xflow(src, mark, 3); 346 } 347 int b2 = src.get(); 348 int b3 = src.get(); 349 if (isMalformed3(b1, b2, b3)) 350 return malformed(src, mark, 3); 351 char c = (char) 352 ((b1 << 12) ^ 353 (b2 << 6) ^ 354 (b3 ^ 355 (((byte) 0xE0 << 12) ^ 356 ((byte) 0x80 << 6) ^ 357 ((byte) 0x80 << 0)))); 358 if (Character.isSurrogate(c)) 359 return malformedForLength(src, mark, 3); 360 dst.put(c); 361 mark += 3; 362 } else if ((b1 >> 3) == -2) { 363 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 364 int srcRemaining = limit - mark; 365 if (srcRemaining < 4 || dst.remaining() < 2) { 366 if (srcRemaining > 1 && isMalformed4_2(b1, src.get())) 367 return malformedForLength(src, mark, 1); 368 if (srcRemaining > 2 && isMalformed4_3(src.get())) 369 return malformedForLength(src, mark, 2); 370 return xflow(src, mark, 4); 371 } 372 int b2 = src.get(); 373 int b3 = src.get(); 374 int b4 = src.get(); 375 int uc = ((b1 << 18) ^ 376 (b2 << 12) ^ 377 (b3 << 6) ^ 378 (b4 ^ 379 (((byte) 0xF0 << 18) ^ 380 ((byte) 0x80 << 12) ^ 381 ((byte) 0x80 << 6) ^ 382 ((byte) 0x80 << 0)))); 383 if (isMalformed4(b2, b3, b4) || 384 // shortest form check 385 !Character.isSupplementaryCodePoint(uc)) { 386 return malformed(src, mark, 4); 387 } 388 dst.put(Character.highSurrogate(uc)); 389 dst.put(Character.lowSurrogate(uc)); 390 mark += 4; 391 } else { 392 return malformed(src, mark, 1); 393 } 394 } 395 return xflow(src, mark, 0); 396 } 397 398 protected CoderResult decodeLoop(ByteBuffer src, 399 CharBuffer dst) 400 { 401 if (src.hasArray() && dst.hasArray()) 402 return decodeArrayLoop(src, dst); 403 else 404 return decodeBufferLoop(src, dst); 405 } 406 407 private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp) 408 { 409 if (bb == null) 410 bb = ByteBuffer.wrap(ba); 411 bb.position(sp); 412 return bb; 413 } 414 415 // returns -1 if there is/are malformed byte(s) and the 416 // "action" for malformed input is not REPLACE. 417 public int decode(byte[] sa, int sp, int len, char[] da) { 418 final int sl = sp + len; 419 int dp = 0; 420 int dlASCII = Math.min(len, da.length); 421 ByteBuffer bb = null; // only necessary if malformed 422 423 // ASCII only optimized loop 424 while (dp < dlASCII && sa[sp] >= 0) 425 da[dp++] = (char) sa[sp++]; 426 427 while (sp < sl) { 428 int b1 = sa[sp++]; 429 if (b1 >= 0) { 430 // 1 byte, 7 bits: 0xxxxxxx 431 da[dp++] = (char) b1; 432 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { 433 // 2 bytes, 11 bits: 110xxxxx 10xxxxxx 434 if (sp < sl) { 435 int b2 = sa[sp++]; 436 if (isNotContinuation(b2)) { 437 if (malformedInputAction() != CodingErrorAction.REPLACE) 438 return -1; 439 da[dp++] = replacement().charAt(0); 440 sp--; // malformedN(bb, 2) always returns 1 441 } else { 442 da[dp++] = (char) (((b1 << 6) ^ b2)^ 443 (((byte) 0xC0 << 6) ^ 444 ((byte) 0x80 << 0))); 445 } 446 continue; 447 } 448 if (malformedInputAction() != CodingErrorAction.REPLACE) 449 return -1; 450 da[dp++] = replacement().charAt(0); 451 return dp; 452 } else if ((b1 >> 4) == -2) { 453 // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx 454 if (sp + 1 < sl) { 455 int b2 = sa[sp++]; 456 int b3 = sa[sp++]; 457 if (isMalformed3(b1, b2, b3)) { 458 if (malformedInputAction() != CodingErrorAction.REPLACE) 459 return -1; 460 da[dp++] = replacement().charAt(0); 461 sp -= 3; 462 bb = getByteBuffer(bb, sa, sp); 463 sp += malformedN(bb, 3).length(); 464 } else { 465 char c = (char)((b1 << 12) ^ 466 (b2 << 6) ^ 467 (b3 ^ 468 (((byte) 0xE0 << 12) ^ 469 ((byte) 0x80 << 6) ^ 470 ((byte) 0x80 << 0)))); 471 if (Character.isSurrogate(c)) { 472 if (malformedInputAction() != CodingErrorAction.REPLACE) 473 return -1; 474 da[dp++] = replacement().charAt(0); 475 } else { 476 da[dp++] = c; 477 } 478 } 479 continue; 480 } 481 if (malformedInputAction() != CodingErrorAction.REPLACE) 482 return -1; 483 if (sp < sl && isMalformed3_2(b1, sa[sp])) { 484 da[dp++] = replacement().charAt(0); 485 continue; 486 487 } 488 da[dp++] = replacement().charAt(0); 489 return dp; 490 } else if ((b1 >> 3) == -2) { 491 // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 492 if (sp + 2 < sl) { 493 int b2 = sa[sp++]; 494 int b3 = sa[sp++]; 495 int b4 = sa[sp++]; 496 int uc = ((b1 << 18) ^ 497 (b2 << 12) ^ 498 (b3 << 6) ^ 499 (b4 ^ 500 (((byte) 0xF0 << 18) ^ 501 ((byte) 0x80 << 12) ^ 502 ((byte) 0x80 << 6) ^ 503 ((byte) 0x80 << 0)))); 504 if (isMalformed4(b2, b3, b4) || 505 // shortest form check 506 !Character.isSupplementaryCodePoint(uc)) { 507 if (malformedInputAction() != CodingErrorAction.REPLACE) 508 return -1; 509 da[dp++] = replacement().charAt(0); 510 sp -= 4; 511 bb = getByteBuffer(bb, sa, sp); 512 sp += malformedN(bb, 4).length(); 513 } else { 514 da[dp++] = Character.highSurrogate(uc); 515 da[dp++] = Character.lowSurrogate(uc); 516 } 517 continue; 518 } 519 if (malformedInputAction() != CodingErrorAction.REPLACE) 520 return -1; 521 522 if (sp < sl && isMalformed4_2(b1, sa[sp])) { 523 da[dp++] = replacement().charAt(0); 524 continue; 525 } 526 sp++; 527 if (sp < sl && isMalformed4_3(sa[sp])) { 528 da[dp++] = replacement().charAt(0); 529 continue; 530 } 531 da[dp++] = replacement().charAt(0); 532 return dp; 533 } else { 534 if (malformedInputAction() != CodingErrorAction.REPLACE) 535 return -1; 536 da[dp++] = replacement().charAt(0); 537 } 538 } 539 return dp; 540 } 541 } 542 543 private static final class Encoder extends CharsetEncoder 544 implements ArrayEncoder { 545 546 private Encoder(Charset cs) { 547 super(cs, 1.1f, 3.0f); 548 } 549 550 public boolean canEncode(char c) { 551 return !Character.isSurrogate(c); 552 } 553 554 public boolean isLegalReplacement(byte[] repl) { 555 return ((repl.length == 1 && repl[0] >= 0) || 556 super.isLegalReplacement(repl)); 557 } 558 559 private static CoderResult overflow(CharBuffer src, int sp, 560 ByteBuffer dst, int dp) { 561 updatePositions(src, sp, dst, dp); 562 return CoderResult.OVERFLOW; 563 } 564 565 private static CoderResult overflow(CharBuffer src, int mark) { 566 src.position(mark); 567 return CoderResult.OVERFLOW; 568 } 569 570 private Surrogate.Parser sgp; 571 private CoderResult encodeArrayLoop(CharBuffer src, 572 ByteBuffer dst) 573 { 574 char[] sa = src.array(); 575 int sp = src.arrayOffset() + src.position(); 576 int sl = src.arrayOffset() + src.limit(); 577 578 byte[] da = dst.array(); 579 int dp = dst.arrayOffset() + dst.position(); 580 int dl = dst.arrayOffset() + dst.limit(); 581 int dlASCII = dp + Math.min(sl - sp, dl - dp); 582 583 // ASCII only loop 584 while (dp < dlASCII && sa[sp] < '\u0080') 585 da[dp++] = (byte) sa[sp++]; 586 while (sp < sl) { 587 char c = sa[sp]; 588 if (c < 0x80) { 589 // Have at most seven bits 590 if (dp >= dl) 591 return overflow(src, sp, dst, dp); 592 da[dp++] = (byte)c; 593 } else if (c < 0x800) { 594 // 2 bytes, 11 bits 595 if (dl - dp < 2) 596 return overflow(src, sp, dst, dp); 597 da[dp++] = (byte)(0xc0 | (c >> 6)); 598 da[dp++] = (byte)(0x80 | (c & 0x3f)); 599 } else if (Character.isSurrogate(c)) { 600 // Have a surrogate pair 601 if (sgp == null) 602 sgp = new Surrogate.Parser(); 603 int uc = sgp.parse(c, sa, sp, sl); 604 if (uc < 0) { 605 updatePositions(src, sp, dst, dp); 606 return sgp.error(); 607 } 608 if (dl - dp < 4) 609 return overflow(src, sp, dst, dp); 610 da[dp++] = (byte)(0xf0 | ((uc >> 18))); 611 da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); 612 da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); 613 da[dp++] = (byte)(0x80 | (uc & 0x3f)); 614 sp++; // 2 chars 615 } else { 616 // 3 bytes, 16 bits 617 if (dl - dp < 3) 618 return overflow(src, sp, dst, dp); 619 da[dp++] = (byte)(0xe0 | ((c >> 12))); 620 da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); 621 da[dp++] = (byte)(0x80 | (c & 0x3f)); 622 } 623 sp++; 624 } 625 updatePositions(src, sp, dst, dp); 626 return CoderResult.UNDERFLOW; 627 } 628 629 private CoderResult encodeBufferLoop(CharBuffer src, 630 ByteBuffer dst) 631 { 632 int mark = src.position(); 633 while (src.hasRemaining()) { 634 char c = src.get(); 635 if (c < 0x80) { 636 // Have at most seven bits 637 if (!dst.hasRemaining()) 638 return overflow(src, mark); 639 dst.put((byte)c); 640 } else if (c < 0x800) { 641 // 2 bytes, 11 bits 642 if (dst.remaining() < 2) 643 return overflow(src, mark); 644 dst.put((byte)(0xc0 | (c >> 6))); 645 dst.put((byte)(0x80 | (c & 0x3f))); 646 } else if (Character.isSurrogate(c)) { 647 // Have a surrogate pair 648 if (sgp == null) 649 sgp = new Surrogate.Parser(); 650 int uc = sgp.parse(c, src); 651 if (uc < 0) { 652 src.position(mark); 653 return sgp.error(); 654 } 655 if (dst.remaining() < 4) 656 return overflow(src, mark); 657 dst.put((byte)(0xf0 | ((uc >> 18)))); 658 dst.put((byte)(0x80 | ((uc >> 12) & 0x3f))); 659 dst.put((byte)(0x80 | ((uc >> 6) & 0x3f))); 660 dst.put((byte)(0x80 | (uc & 0x3f))); 661 mark++; // 2 chars 662 } else { 663 // 3 bytes, 16 bits 664 if (dst.remaining() < 3) 665 return overflow(src, mark); 666 dst.put((byte)(0xe0 | ((c >> 12)))); 667 dst.put((byte)(0x80 | ((c >> 6) & 0x3f))); 668 dst.put((byte)(0x80 | (c & 0x3f))); 669 } 670 mark++; 671 } 672 src.position(mark); 673 return CoderResult.UNDERFLOW; 674 } 675 676 protected final CoderResult encodeLoop(CharBuffer src, 677 ByteBuffer dst) 678 { 679 if (src.hasArray() && dst.hasArray()) 680 return encodeArrayLoop(src, dst); 681 else 682 return encodeBufferLoop(src, dst); 683 } 684 685 // returns -1 if there is malformed char(s) and the 686 // "action" for malformed input is not REPLACE. 687 public int encode(char[] sa, int sp, int len, byte[] da) { 688 int sl = sp + len; 689 int dp = 0; 690 int dlASCII = dp + Math.min(len, da.length); 691 692 // ASCII only optimized loop 693 while (dp < dlASCII && sa[sp] < '\u0080') 694 da[dp++] = (byte) sa[sp++]; 695 696 while (sp < sl) { 697 char c = sa[sp++]; 698 if (c < 0x80) { 699 // Have at most seven bits 700 da[dp++] = (byte)c; 701 } else if (c < 0x800) { 702 // 2 bytes, 11 bits 703 da[dp++] = (byte)(0xc0 | (c >> 6)); 704 da[dp++] = (byte)(0x80 | (c & 0x3f)); 705 } else if (Character.isSurrogate(c)) { 706 if (sgp == null) 707 sgp = new Surrogate.Parser(); 708 int uc = sgp.parse(c, sa, sp - 1, sl); 709 if (uc < 0) { 710 if (malformedInputAction() != CodingErrorAction.REPLACE) 711 return -1; 712 da[dp++] = replacement()[0]; 713 } else { 714 da[dp++] = (byte)(0xf0 | ((uc >> 18))); 715 da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); 716 da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); 717 da[dp++] = (byte)(0x80 | (uc & 0x3f)); 718 sp++; // 2 chars 719 } 720 } else { 721 // 3 bytes, 16 bits 722 da[dp++] = (byte)(0xe0 | ((c >> 12))); 723 da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); 724 da[dp++] = (byte)(0x80 | (c & 0x3f)); 725 } 726 } 727 return dp; 728 } 729 } 730 }