1 /*
   2  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 
  28 import java.nio.Buffer;
  29 import java.nio.ByteBuffer;
  30 import java.nio.CharBuffer;
  31 import java.nio.charset.Charset;
  32 import java.nio.charset.CharsetDecoder;
  33 import java.nio.charset.CharsetEncoder;
  34 import java.nio.charset.CoderResult;
  35 import java.nio.charset.CodingErrorAction;
  36 
  37 /* Legal UTF-8 Byte Sequences
  38  *
  39  * #    Code Points      Bits   Bit/Byte pattern
  40  * 1                     7      0xxxxxxx
  41  *      U+0000..U+007F          00..7F
  42  *
  43  * 2                     11     110xxxxx    10xxxxxx
  44  *      U+0080..U+07FF          C2..DF      80..BF
  45  *
  46  * 3                     16     1110xxxx    10xxxxxx    10xxxxxx
  47  *      U+0800..U+0FFF          E0          A0..BF      80..BF
  48  *      U+1000..U+FFFF          E1..EF      80..BF      80..BF
  49  *
  50  * 4                     21     11110xxx    10xxxxxx    10xxxxxx    10xxxxxx
  51  *     U+10000..U+3FFFF         F0          90..BF      80..BF      80..BF
  52  *     U+40000..U+FFFFF         F1..F3      80..BF      80..BF      80..BF
  53  *    U+100000..U10FFFF         F4          80..8F      80..BF      80..BF
  54  *
  55  */
  56 
  57 class UTF_8 extends Unicode
  58 {
  59     public UTF_8() {
  60         super("UTF-8", StandardCharsets.aliases_UTF_8);
  61     }
  62 
  63     public String historicalName() {
  64         return "UTF8";
  65     }
  66 
  67     public CharsetDecoder newDecoder() {
  68         return new Decoder(this);
  69     }
  70 
  71     public CharsetEncoder newEncoder() {
  72         return new Encoder(this);
  73     }
  74 
  75     private static final void updatePositions(Buffer src, int sp,
  76                                               Buffer dst, int dp) {
  77         src.position(sp - src.arrayOffset());
  78         dst.position(dp - dst.arrayOffset());
  79     }
  80 
  81     private static class Decoder extends CharsetDecoder
  82                                  implements ArrayDecoder {
  83         private Decoder(Charset cs) {
  84             super(cs, 1.0f, 1.0f);
  85         }
  86 
  87         private static boolean isNotContinuation(int b) {
  88             return (b & 0xc0) != 0x80;
  89         }
  90 
  91         //  [E0]     [A0..BF] [80..BF]
  92         //  [E1..EF] [80..BF] [80..BF]
  93         private static boolean isMalformed3(int b1, int b2, int b3) {
  94             return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
  95                    (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
  96         }
  97 
  98         // only used when there is only one byte left in src buffer
  99         private static boolean isMalformed3_2(int b1, int b2) {
 100             return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 101                    (b2 & 0xc0) != 0x80;
 102         }
 103 
 104         //  [F0]     [90..BF] [80..BF] [80..BF]
 105         //  [F1..F3] [80..BF] [80..BF] [80..BF]
 106         //  [F4]     [80..8F] [80..BF] [80..BF]
 107         //  only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
 108         //  will be checked by Character.isSupplementaryCodePoint(uc)
 109         private static boolean isMalformed4(int b2, int b3, int b4) {
 110             return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
 111                    (b4 & 0xc0) != 0x80;
 112         }
 113 
 114         // only used when there is less than 4 bytes left in src buffer.
 115         // both b1 and b2 should be "& 0xff" before passed in.
 116         private static boolean isMalformed4_2(int b1, int b2) {
 117             return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
 118                    (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 119                    (b2 & 0xc0) != 0x80;
 120         }
 121 
 122         // only used when there is less than 4 bytes left in src buffer,
 123         // after isMalformed4_2 has been invoked.
 124         private static boolean isMalformed4_3(int b3) {
 125             return (b3 & 0xc0) != 0x80;
 126         }
 127 
 128         private static CoderResult lookupN(ByteBuffer src, int n)
 129         {
 130             for (int i = 1; i < n; i++) {
 131                if (isNotContinuation(src.get()))
 132                    return CoderResult.malformedForLength(i);
 133             }
 134             return CoderResult.malformedForLength(n);
 135         }
 136 
 137         private static CoderResult malformedN(ByteBuffer src, int nb) {
 138             switch (nb) {
 139             case 1:
 140             case 2:                    // always 1
 141                 return CoderResult.malformedForLength(1);
 142             case 3:
 143                 int b1 = src.get();
 144                 int b2 = src.get();    // no need to lookup b3
 145                 return CoderResult.malformedForLength(
 146                     ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 147                      isNotContinuation(b2)) ? 1 : 2);
 148             case 4:  // we don't care the speed here
 149                 b1 = src.get() & 0xff;
 150                 b2 = src.get() & 0xff;
 151                 if (b1 > 0xf4 ||
 152                     (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
 153                     (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 154                     isNotContinuation(b2))
 155                     return CoderResult.malformedForLength(1);
 156                 if (isNotContinuation(src.get()))
 157                     return CoderResult.malformedForLength(2);
 158                 return CoderResult.malformedForLength(3);
 159             default:
 160                 assert false;
 161                 return null;
 162             }
 163         }
 164 
 165         private static CoderResult malformed(ByteBuffer src, int sp,
 166                                              CharBuffer dst, int dp,
 167                                              int nb)
 168         {
 169             src.position(sp - src.arrayOffset());
 170             CoderResult cr = malformedN(src, nb);
 171             updatePositions(src, sp, dst, dp);
 172             return cr;
 173         }
 174 
 175 
 176         private static CoderResult malformed(ByteBuffer src,
 177                                              int mark, int nb)
 178         {
 179             src.position(mark);
 180             CoderResult cr = malformedN(src, nb);
 181             src.position(mark);
 182             return cr;
 183         }
 184 
 185         private static CoderResult malformedForLength(ByteBuffer src,
 186                                                       int sp,
 187                                                       CharBuffer dst,
 188                                                       int dp,
 189                                                       int malformedNB)
 190         {
 191             updatePositions(src, sp, dst, dp);
 192             return CoderResult.malformedForLength(malformedNB);
 193         }
 194 
 195         private static CoderResult malformedForLength(ByteBuffer src,
 196                                                       int mark,
 197                                                       int malformedNB)
 198         {
 199             src.position(mark);
 200             return CoderResult.malformedForLength(malformedNB);
 201         }
 202 
 203 
 204         private static CoderResult xflow(Buffer src, int sp, int sl,
 205                                          Buffer dst, int dp, int nb) {
 206             updatePositions(src, sp, dst, dp);
 207             return (nb == 0 || sl - sp < nb)
 208                    ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
 209         }
 210 
 211         private static CoderResult xflow(Buffer src, int mark, int nb) {
 212             src.position(mark);
 213             return (nb == 0 || src.remaining() < nb)
 214                    ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
 215         }
 216 
 217         private CoderResult decodeArrayLoop(ByteBuffer src,
 218                                             CharBuffer dst)
 219         {
 220             // This method is optimized for ASCII input.
 221             byte[] sa = src.array();
 222             int sp = src.arrayOffset() + src.position();
 223             int sl = src.arrayOffset() + src.limit();
 224 
 225             char[] da = dst.array();
 226             int dp = dst.arrayOffset() + dst.position();
 227             int dl = dst.arrayOffset() + dst.limit();
 228             int dlASCII = dp + Math.min(sl - sp, dl - dp);
 229 
 230             // ASCII only loop
 231             while (dp < dlASCII && sa[sp] >= 0)
 232                 da[dp++] = (char) sa[sp++];
 233             while (sp < sl) {
 234                 int b1 = sa[sp];
 235                 if (b1 >= 0) {
 236                     // 1 byte, 7 bits: 0xxxxxxx
 237                     if (dp >= dl)
 238                         return xflow(src, sp, sl, dst, dp, 1);
 239                     da[dp++] = (char) b1;
 240                     sp++;
 241                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 242                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 243                     //                   [C2..DF] [80..BF]
 244                     if (sl - sp < 2 || dp >= dl)
 245                         return xflow(src, sp, sl, dst, dp, 2);
 246                     int b2 = sa[sp + 1];
 247                     // Now we check the first byte of 2-byte sequence as
 248                     //     if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0)
 249                     // no longer need to check b1 against c1 & c0 for
 250                     // malformed as we did in previous version
 251                     //   (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80;
 252                     // only need to check the second byte b2.
 253                     if (isNotContinuation(b2))
 254                         return malformedForLength(src, sp, dst, dp, 1);
 255                     da[dp++] = (char) (((b1 << 6) ^ b2)
 256                                        ^
 257                                        (((byte) 0xC0 << 6) ^
 258                                         ((byte) 0x80 << 0)));
 259                     sp += 2;
 260                 } else if ((b1 >> 4) == -2) {
 261                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 262                     int srcRemaining = sl - sp;
 263                     if (srcRemaining < 3 || dp >= dl) {
 264                         if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1]))
 265                             return malformedForLength(src, sp, dst, dp, 1);
 266                         return xflow(src, sp, sl, dst, dp, 3);
 267                     }
 268                     int b2 = sa[sp + 1];
 269                     int b3 = sa[sp + 2];
 270                     if (isMalformed3(b1, b2, b3))
 271                         return malformed(src, sp, dst, dp, 3);
 272                     char c = (char)
 273                         ((b1 << 12) ^
 274                          (b2 <<  6) ^
 275                          (b3 ^
 276                           (((byte) 0xE0 << 12) ^
 277                            ((byte) 0x80 <<  6) ^
 278                            ((byte) 0x80 <<  0))));
 279                     if (Character.isSurrogate(c))
 280                         return malformedForLength(src, sp, dst, dp, 3);
 281                     da[dp++] = c;
 282                     sp += 3;
 283                 } else if ((b1 >> 3) == -2) {
 284                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 285                     int srcRemaining = sl - sp;
 286                     if (srcRemaining < 4 || dl - dp < 2) {
 287                         b1 &= 0xff;
 288                         if (b1 > 0xf4 ||
 289                             srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1] & 0xff))
 290                             return malformedForLength(src, sp, dst, dp, 1);
 291                         if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2]))
 292                             return malformedForLength(src, sp, dst, dp, 2);
 293                         return xflow(src, sp, sl, dst, dp, 4);
 294                     }
 295                     int b2 = sa[sp + 1];
 296                     int b3 = sa[sp + 2];
 297                     int b4 = sa[sp + 3];
 298                     int uc = ((b1 << 18) ^
 299                               (b2 << 12) ^
 300                               (b3 <<  6) ^
 301                               (b4 ^
 302                                (((byte) 0xF0 << 18) ^
 303                                 ((byte) 0x80 << 12) ^
 304                                 ((byte) 0x80 <<  6) ^
 305                                 ((byte) 0x80 <<  0))));
 306                     if (isMalformed4(b2, b3, b4) ||
 307                         // shortest form check
 308                         !Character.isSupplementaryCodePoint(uc)) {
 309                         return malformed(src, sp, dst, dp, 4);
 310                     }
 311                     da[dp++] = Character.highSurrogate(uc);
 312                     da[dp++] = Character.lowSurrogate(uc);
 313                     sp += 4;
 314                 } else
 315                     return malformed(src, sp, dst, dp, 1);
 316             }
 317             return xflow(src, sp, sl, dst, dp, 0);
 318         }
 319 
 320         private CoderResult decodeBufferLoop(ByteBuffer src,
 321                                              CharBuffer dst)
 322         {
 323             int mark = src.position();
 324             int limit = src.limit();
 325             while (mark < limit) {
 326                 int b1 = src.get();
 327                 if (b1 >= 0) {
 328                     // 1 byte, 7 bits: 0xxxxxxx
 329                     if (dst.remaining() < 1)
 330                         return xflow(src, mark, 1); // overflow
 331                     dst.put((char) b1);
 332                     mark++;
 333                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 334                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 335                     if (limit - mark < 2|| dst.remaining() < 1)
 336                         return xflow(src, mark, 2);
 337                     int b2 = src.get();
 338                     if (isNotContinuation(b2))
 339                         return malformedForLength(src, mark, 1);
 340                      dst.put((char) (((b1 << 6) ^ b2)
 341                                     ^
 342                                     (((byte) 0xC0 << 6) ^
 343                                      ((byte) 0x80 << 0))));
 344                     mark += 2;
 345                 } else if ((b1 >> 4) == -2) {
 346                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 347                     int srcRemaining = limit - mark;
 348                     if (srcRemaining < 3 || dst.remaining() < 1) {
 349                         if (srcRemaining > 1 && isMalformed3_2(b1, src.get()))
 350                             return malformedForLength(src, mark, 1);
 351                         return xflow(src, mark, 3);
 352                     }
 353                     int b2 = src.get();
 354                     int b3 = src.get();
 355                     if (isMalformed3(b1, b2, b3))
 356                         return malformed(src, mark, 3);
 357                     char c = (char)
 358                         ((b1 << 12) ^
 359                          (b2 <<  6) ^
 360                          (b3 ^
 361                           (((byte) 0xE0 << 12) ^
 362                            ((byte) 0x80 <<  6) ^
 363                            ((byte) 0x80 <<  0))));
 364                     if (Character.isSurrogate(c))
 365                         return malformedForLength(src, mark, 3);
 366                     dst.put(c);
 367                     mark += 3;
 368                 } else if ((b1 >> 3) == -2) {
 369                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 370                     int srcRemaining = limit - mark;
 371                     if (srcRemaining < 4 || dst.remaining() < 2) {
 372                         b1 &= 0xff;
 373                         if (b1 > 0xf4 ||
 374                             srcRemaining > 1 && isMalformed4_2(b1, src.get() & 0xff))
 375                             return malformedForLength(src, mark, 1);
 376                         if (srcRemaining > 2 && isMalformed4_3(src.get()))
 377                             return malformedForLength(src, mark, 2);
 378                         return xflow(src, mark, 4);
 379                     }
 380                     int b2 = src.get();
 381                     int b3 = src.get();
 382                     int b4 = src.get();
 383                     int uc = ((b1 << 18) ^
 384                               (b2 << 12) ^
 385                               (b3 <<  6) ^
 386                               (b4 ^
 387                                (((byte) 0xF0 << 18) ^
 388                                 ((byte) 0x80 << 12) ^
 389                                 ((byte) 0x80 <<  6) ^
 390                                 ((byte) 0x80 <<  0))));
 391                     if (isMalformed4(b2, b3, b4) ||
 392                         // shortest form check
 393                         !Character.isSupplementaryCodePoint(uc)) {
 394                         return malformed(src, mark, 4);
 395                     }
 396                     dst.put(Character.highSurrogate(uc));
 397                     dst.put(Character.lowSurrogate(uc));
 398                     mark += 4;
 399                 } else {
 400                     return malformed(src, mark, 1);
 401                 }
 402             }
 403             return xflow(src, mark, 0);
 404         }
 405 
 406         protected CoderResult decodeLoop(ByteBuffer src,
 407                                          CharBuffer dst)
 408         {
 409             if (src.hasArray() && dst.hasArray())
 410                 return decodeArrayLoop(src, dst);
 411             else
 412                 return decodeBufferLoop(src, dst);
 413         }
 414 
 415         private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
 416         {
 417             if (bb == null)
 418                 bb = ByteBuffer.wrap(ba);
 419             bb.position(sp);
 420             return bb;
 421         }
 422 
 423         // returns -1 if there is/are malformed byte(s) and the
 424         // "action" for malformed input is not REPLACE.
 425         public int decode(byte[] sa, int sp, int len, char[] da) {
 426             final int sl = sp + len;
 427             int dp = 0;
 428             int dlASCII = Math.min(len, da.length);
 429             ByteBuffer bb = null;  // only necessary if malformed
 430 
 431             // ASCII only optimized loop
 432             while (dp < dlASCII && sa[sp] >= 0)
 433                 da[dp++] = (char) sa[sp++];
 434 
 435             while (sp < sl) {
 436                 int b1 = sa[sp++];
 437                 if (b1 >= 0) {
 438                     // 1 byte, 7 bits: 0xxxxxxx
 439                     da[dp++] = (char) b1;
 440                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 441                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 442                     if (sp < sl) {
 443                         int b2 = sa[sp++];
 444                         if (isNotContinuation(b2)) {
 445                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 446                                 return -1;
 447                             da[dp++] = replacement().charAt(0);
 448                             sp--;            // malformedN(bb, 2) always returns 1
 449                         } else {
 450                             da[dp++] = (char) (((b1 << 6) ^ b2)^
 451                                            (((byte) 0xC0 << 6) ^
 452                                             ((byte) 0x80 << 0)));
 453                         }
 454                         continue;
 455                     }
 456                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 457                         return -1;
 458                     da[dp++] = replacement().charAt(0);
 459                     return dp;
 460                 } else if ((b1 >> 4) == -2) {
 461                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 462                     if (sp + 1 < sl) {
 463                         int b2 = sa[sp++];
 464                         int b3 = sa[sp++];
 465                         if (isMalformed3(b1, b2, b3)) {
 466                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 467                                 return -1;
 468                             da[dp++] = replacement().charAt(0);
 469                             sp -= 3;
 470                             bb = getByteBuffer(bb, sa, sp);
 471                             sp += malformedN(bb, 3).length();
 472                         } else {
 473                             char c = (char)((b1 << 12) ^
 474                                               (b2 <<  6) ^
 475                                               (b3 ^
 476                                               (((byte) 0xE0 << 12) ^
 477                                               ((byte) 0x80 <<  6) ^
 478                                               ((byte) 0x80 <<  0))));
 479                             if (Character.isSurrogate(c)) {
 480                                 if (malformedInputAction() != CodingErrorAction.REPLACE)
 481                                     return -1;
 482                                 da[dp++] = replacement().charAt(0);
 483                             } else {
 484                                 da[dp++] = c;
 485                             }
 486                         }
 487                         continue;
 488                     }
 489                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 490                         return -1;
 491                     if (sp  < sl && isMalformed3_2(b1, sa[sp])) {
 492                         da[dp++] = replacement().charAt(0);
 493                         continue;
 494 
 495                     }
 496                     da[dp++] = replacement().charAt(0);
 497                     return dp;
 498                 } else if ((b1 >> 3) == -2) {
 499                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 500                     if (sp + 2 < sl) {
 501                         int b2 = sa[sp++];
 502                         int b3 = sa[sp++];
 503                         int b4 = sa[sp++];
 504                         int uc = ((b1 << 18) ^
 505                                   (b2 << 12) ^
 506                                   (b3 <<  6) ^
 507                                   (b4 ^
 508                                    (((byte) 0xF0 << 18) ^
 509                                    ((byte) 0x80 << 12) ^
 510                                    ((byte) 0x80 <<  6) ^
 511                                    ((byte) 0x80 <<  0))));
 512                         if (isMalformed4(b2, b3, b4) ||
 513                             // shortest form check
 514                             !Character.isSupplementaryCodePoint(uc)) {
 515                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 516                                 return -1;
 517                             da[dp++] = replacement().charAt(0);
 518                             sp -= 4;
 519                             bb = getByteBuffer(bb, sa, sp);
 520                             sp += malformedN(bb, 4).length();
 521                         } else {
 522                             da[dp++] = Character.highSurrogate(uc);
 523                             da[dp++] = Character.lowSurrogate(uc);
 524                         }
 525                         continue;
 526                     }
 527                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 528                         return -1;
 529                     b1 &= 0xff;
 530                     if (b1 > 0xf4 ||
 531                         sp  < sl && isMalformed4_2(b1, sa[sp] & 0xff)) {
 532                         da[dp++] = replacement().charAt(0);
 533                         continue;
 534                     }
 535                     sp++;
 536                     if (sp  < sl && isMalformed4_3(sa[sp])) {
 537                         da[dp++] = replacement().charAt(0);
 538                         continue;
 539                     }
 540                     da[dp++] = replacement().charAt(0);
 541                     return dp;
 542                 } else {
 543                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 544                         return -1;
 545                     da[dp++] = replacement().charAt(0);
 546                 }
 547             }
 548             return dp;
 549         }
 550     }
 551 
 552     private static final class Encoder extends CharsetEncoder
 553                                  implements ArrayEncoder {
 554 
 555         private Encoder(Charset cs) {
 556             super(cs, 1.1f, 3.0f);
 557         }
 558 
 559         public boolean canEncode(char c) {
 560             return !Character.isSurrogate(c);
 561         }
 562 
 563         public boolean isLegalReplacement(byte[] repl) {
 564             return ((repl.length == 1 && repl[0] >= 0) ||
 565                     super.isLegalReplacement(repl));
 566         }
 567 
 568         private static CoderResult overflow(CharBuffer src, int sp,
 569                                             ByteBuffer dst, int dp) {
 570             updatePositions(src, sp, dst, dp);
 571             return CoderResult.OVERFLOW;
 572         }
 573 
 574         private static CoderResult overflow(CharBuffer src, int mark) {
 575             src.position(mark);
 576             return CoderResult.OVERFLOW;
 577         }
 578 
 579         private Surrogate.Parser sgp;
 580         private CoderResult encodeArrayLoop(CharBuffer src,
 581                                             ByteBuffer dst)
 582         {
 583             char[] sa = src.array();
 584             int sp = src.arrayOffset() + src.position();
 585             int sl = src.arrayOffset() + src.limit();
 586 
 587             byte[] da = dst.array();
 588             int dp = dst.arrayOffset() + dst.position();
 589             int dl = dst.arrayOffset() + dst.limit();
 590             int dlASCII = dp + Math.min(sl - sp, dl - dp);
 591 
 592             // ASCII only loop
 593             while (dp < dlASCII && sa[sp] < '\u0080')
 594                 da[dp++] = (byte) sa[sp++];
 595             while (sp < sl) {
 596                 char c = sa[sp];
 597                 if (c < 0x80) {
 598                     // Have at most seven bits
 599                     if (dp >= dl)
 600                         return overflow(src, sp, dst, dp);
 601                     da[dp++] = (byte)c;
 602                 } else if (c < 0x800) {
 603                     // 2 bytes, 11 bits
 604                     if (dl - dp < 2)
 605                         return overflow(src, sp, dst, dp);
 606                     da[dp++] = (byte)(0xc0 | (c >> 6));
 607                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 608                 } else if (Character.isSurrogate(c)) {
 609                     // Have a surrogate pair
 610                     if (sgp == null)
 611                         sgp = new Surrogate.Parser();
 612                     int uc = sgp.parse(c, sa, sp, sl);
 613                     if (uc < 0) {
 614                         updatePositions(src, sp, dst, dp);
 615                         return sgp.error();
 616                     }
 617                     if (dl - dp < 4)
 618                         return overflow(src, sp, dst, dp);
 619                     da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 620                     da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 621                     da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 622                     da[dp++] = (byte)(0x80 | (uc & 0x3f));
 623                     sp++;  // 2 chars
 624                 } else {
 625                     // 3 bytes, 16 bits
 626                     if (dl - dp < 3)
 627                         return overflow(src, sp, dst, dp);
 628                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 629                     da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 630                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 631                 }
 632                 sp++;
 633             }
 634             updatePositions(src, sp, dst, dp);
 635             return CoderResult.UNDERFLOW;
 636         }
 637 
 638         private CoderResult encodeBufferLoop(CharBuffer src,
 639                                              ByteBuffer dst)
 640         {
 641             int mark = src.position();
 642             while (src.hasRemaining()) {
 643                 char c = src.get();
 644                 if (c < 0x80) {
 645                     // Have at most seven bits
 646                     if (!dst.hasRemaining())
 647                         return overflow(src, mark);
 648                     dst.put((byte)c);
 649                 } else if (c < 0x800) {
 650                     // 2 bytes, 11 bits
 651                     if (dst.remaining() < 2)
 652                         return overflow(src, mark);
 653                     dst.put((byte)(0xc0 | (c >> 6)));
 654                     dst.put((byte)(0x80 | (c & 0x3f)));
 655                 } else if (Character.isSurrogate(c)) {
 656                     // Have a surrogate pair
 657                     if (sgp == null)
 658                         sgp = new Surrogate.Parser();
 659                     int uc = sgp.parse(c, src);
 660                     if (uc < 0) {
 661                         src.position(mark);
 662                         return sgp.error();
 663                     }
 664                     if (dst.remaining() < 4)
 665                         return overflow(src, mark);
 666                     dst.put((byte)(0xf0 | ((uc >> 18))));
 667                     dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));
 668                     dst.put((byte)(0x80 | ((uc >>  6) & 0x3f)));
 669                     dst.put((byte)(0x80 | (uc & 0x3f)));
 670                     mark++;  // 2 chars
 671                 } else {
 672                     // 3 bytes, 16 bits
 673                     if (dst.remaining() < 3)
 674                         return overflow(src, mark);
 675                     dst.put((byte)(0xe0 | ((c >> 12))));
 676                     dst.put((byte)(0x80 | ((c >>  6) & 0x3f)));
 677                     dst.put((byte)(0x80 | (c & 0x3f)));
 678                 }
 679                 mark++;
 680             }
 681             src.position(mark);
 682             return CoderResult.UNDERFLOW;
 683         }
 684 
 685         protected final CoderResult encodeLoop(CharBuffer src,
 686                                                ByteBuffer dst)
 687         {
 688             if (src.hasArray() && dst.hasArray())
 689                 return encodeArrayLoop(src, dst);
 690             else
 691                 return encodeBufferLoop(src, dst);
 692         }
 693 
 694         private byte repl = (byte)'?';
 695         protected void implReplaceWith(byte[] newReplacement) {
 696             repl = newReplacement[0];
 697         }
 698 
 699         // returns -1 if there is malformed char(s) and the
 700         // "action" for malformed input is not REPLACE.
 701         public int encode(char[] sa, int sp, int len, byte[] da) {
 702             int sl = sp + len;
 703             int dp = 0;
 704             int dlASCII = dp + Math.min(len, da.length);
 705 
 706             // ASCII only optimized loop
 707             while (dp < dlASCII && sa[sp] < '\u0080')
 708                 da[dp++] = (byte) sa[sp++];
 709 
 710             while (sp < sl) {
 711                 char c = sa[sp++];
 712                 if (c < 0x80) {
 713                     // Have at most seven bits
 714                     da[dp++] = (byte)c;
 715                 } else if (c < 0x800) {
 716                     // 2 bytes, 11 bits
 717                     da[dp++] = (byte)(0xc0 | (c >> 6));
 718                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 719                 } else if (Character.isSurrogate(c)) {
 720                     if (sgp == null)
 721                         sgp = new Surrogate.Parser();
 722                     int uc = sgp.parse(c, sa, sp - 1, sl);
 723                     if (uc < 0) {
 724                         if (malformedInputAction() != CodingErrorAction.REPLACE)
 725                             return -1;
 726                         da[dp++] = repl;
 727                     } else {
 728                         da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 729                         da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 730                         da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 731                         da[dp++] = (byte)(0x80 | (uc & 0x3f));
 732                         sp++;  // 2 chars
 733                     }
 734                 } else {
 735                     // 3 bytes, 16 bits
 736                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 737                     da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 738                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 739                 }
 740             }
 741             return dp;
 742         }
 743     }
 744 }