1 /*
   2  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 
  28 import java.nio.Buffer;
  29 import java.nio.ByteBuffer;
  30 import java.nio.CharBuffer;
  31 import java.nio.charset.Charset;
  32 import java.nio.charset.CharsetDecoder;
  33 import java.nio.charset.CharsetEncoder;
  34 import java.nio.charset.CoderResult;
  35 import java.nio.charset.CodingErrorAction;
  36 
  37 /* Legal UTF-8 Byte Sequences
  38  *
  39  * #    Code Points      Bits   Bit/Byte pattern
  40  * 1                     7      0xxxxxxx
  41  *      U+0000..U+007F          00..7F
  42  *
  43  * 2                     11     110xxxxx    10xxxxxx
  44  *      U+0080..U+07FF          C2..DF      80..BF
  45  *
  46  * 3                     16     1110xxxx    10xxxxxx    10xxxxxx
  47  *      U+0800..U+0FFF          E0          A0..BF      80..BF
  48  *      U+1000..U+FFFF          E1..EF      80..BF      80..BF
  49  *
  50  * 4                     21     11110xxx    10xxxxxx    10xxxxxx    10xxxxxx
  51  *     U+10000..U+3FFFF         F0          90..BF      80..BF      80..BF
  52  *     U+40000..U+FFFFF         F1..F3      80..BF      80..BF      80..BF
  53  *    U+100000..U10FFFF         F4          80..8F      80..BF      80..BF
  54  *
  55  */
  56 
  57 class UTF_8 extends Unicode
  58 {
  59     public UTF_8() {
  60         super("UTF-8", StandardCharsets.aliases_UTF_8);
  61     }
  62 
  63     public String historicalName() {
  64         return "UTF8";
  65     }
  66 
  67     public CharsetDecoder newDecoder() {
  68         return new Decoder(this);
  69     }
  70 
  71     public CharsetEncoder newEncoder() {
  72         return new Encoder(this);
  73     }
  74 
  75     private static final void updatePositions(Buffer src, int sp,
  76                                               Buffer dst, int dp) {
  77         src.position(sp - src.arrayOffset());
  78         dst.position(dp - dst.arrayOffset());
  79     }
  80 
  81     private static class Decoder extends CharsetDecoder
  82                                  implements ArrayDecoder {
  83         private Decoder(Charset cs) {
  84             super(cs, 1.0f, 1.0f);
  85         }
  86 
  87         private static boolean isNotContinuation(int b) {
  88             return (b & 0xc0) != 0x80;
  89         }
  90 
  91         //  [E0]     [A0..BF] [80..BF]
  92         //  [E1..EF] [80..BF] [80..BF]
  93         private static boolean isMalformed3(int b1, int b2, int b3) {
  94             return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
  95                    (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
  96         }
  97 
  98         // only used when there is only one byte left in src buffer
  99         private static boolean isMalformed3_2(int b1, int b2) {
 100             return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 101                    (b2 & 0xc0) != 0x80;
 102         }
 103 
 104         //  [F0]     [90..BF] [80..BF] [80..BF]
 105         //  [F1..F3] [80..BF] [80..BF] [80..BF]
 106         //  [F4]     [80..8F] [80..BF] [80..BF]
 107         //  only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
 108         //  will be checked by Character.isSupplementaryCodePoint(uc)
 109         private static boolean isMalformed4(int b2, int b3, int b4) {
 110             return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
 111                    (b4 & 0xc0) != 0x80;
 112         }
 113 
 114         // only used when there is less than 4 bytes left in src buffer
 115         private static boolean isMalformed4_2(int b1, int b2) {
 116             return (b1 == 0xf0 && b2 == 0x90) ||
 117                    (b2 & 0xc0) != 0x80;
 118         }
 119 
 120         private static boolean isMalformed4_3(int b3) {
 121             return (b3 & 0xc0) != 0x80;
 122         }
 123 
 124         private static CoderResult lookupN(ByteBuffer src, int n)
 125         {
 126             for (int i = 1; i < n; i++) {
 127                if (isNotContinuation(src.get()))
 128                    return CoderResult.malformedForLength(i);
 129             }
 130             return CoderResult.malformedForLength(n);
 131         }
 132 
 133         private static CoderResult malformedN(ByteBuffer src, int nb) {
 134             switch (nb) {
 135             case 1:
 136             case 2:                    // always 1
 137                 return CoderResult.malformedForLength(1);
 138             case 3:
 139                 int b1 = src.get();
 140                 int b2 = src.get();    // no need to lookup b3
 141                 return CoderResult.malformedForLength(
 142                     ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 143                      isNotContinuation(b2)) ? 1 : 2);
 144             case 4:  // we don't care the speed here
 145                 b1 = src.get() & 0xff;
 146                 b2 = src.get() & 0xff;
 147                 if (b1 > 0xf4 ||
 148                     (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
 149                     (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 150                     isNotContinuation(b2))
 151                     return CoderResult.malformedForLength(1);
 152                 if (isNotContinuation(src.get()))
 153                     return CoderResult.malformedForLength(2);
 154                 return CoderResult.malformedForLength(3);
 155             default:
 156                 assert false;
 157                 return null;
 158             }
 159         }
 160 
 161         private static CoderResult malformed(ByteBuffer src, int sp,
 162                                              CharBuffer dst, int dp,
 163                                              int nb)
 164         {
 165             src.position(sp - src.arrayOffset());
 166             CoderResult cr = malformedN(src, nb);
 167             updatePositions(src, sp, dst, dp);
 168             return cr;
 169         }
 170 
 171  
 172         private static CoderResult malformed(ByteBuffer src,
 173                                              int mark, int nb)
 174         {
 175             src.position(mark);
 176             CoderResult cr = malformedN(src, nb);
 177             src.position(mark);
 178             return cr;
 179         }
 180 
 181         private static CoderResult malformedForLength(ByteBuffer src,
 182                                                       int sp,
 183                                                       CharBuffer dst,
 184                                                       int dp,
 185                                                       int malformedNB)
 186         {
 187             updatePositions(src, sp, dst, dp);
 188             return CoderResult.malformedForLength(malformedNB);
 189         }
 190 
 191         private static CoderResult malformedForLength(ByteBuffer src,
 192                                                       int mark,
 193                                                       int malformedNB)
 194         {
 195             src.position(mark);
 196             return CoderResult.malformedForLength(malformedNB);
 197         }
 198 
 199 
 200         private static CoderResult xflow(Buffer src, int sp, int sl,
 201                                          Buffer dst, int dp, int nb) {
 202             updatePositions(src, sp, dst, dp);
 203             return (nb == 0 || sl - sp < nb)
 204                    ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
 205         }
 206 
 207         private static CoderResult xflow(Buffer src, int mark, int nb) {
 208             src.position(mark);
 209             return (nb == 0 || src.remaining() < nb)
 210                    ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
 211         }
 212 
 213         private CoderResult decodeArrayLoop(ByteBuffer src,
 214                                             CharBuffer dst)
 215         {
 216             // This method is optimized for ASCII input.
 217             byte[] sa = src.array();
 218             int sp = src.arrayOffset() + src.position();
 219             int sl = src.arrayOffset() + src.limit();
 220 
 221             char[] da = dst.array();
 222             int dp = dst.arrayOffset() + dst.position();
 223             int dl = dst.arrayOffset() + dst.limit();
 224             int dlASCII = dp + Math.min(sl - sp, dl - dp);
 225 
 226             // ASCII only loop
 227             while (dp < dlASCII && sa[sp] >= 0)
 228                 da[dp++] = (char) sa[sp++];
 229             while (sp < sl) {
 230                 int b1 = sa[sp];
 231                 if (b1 >= 0) {
 232                     // 1 byte, 7 bits: 0xxxxxxx
 233                     if (dp >= dl)
 234                         return xflow(src, sp, sl, dst, dp, 1);
 235                     da[dp++] = (char) b1;
 236                     sp++;
 237                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 238                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 239                     //                   [C2..DF] [80..BF]
 240                     if (sl - sp < 2 || dp >= dl)
 241                         return xflow(src, sp, sl, dst, dp, 2);
 242                     int b2 = sa[sp + 1];
 243                     // Now we check the first byte of 2-byte sequence as
 244                     //     if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0)
 245                     // no longer need to check b1 against c1 & c0 for
 246                     // malformed as we did in previous version
 247                     //   (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80;
 248                     // only need to check the second byte b2.
 249                     if (isNotContinuation(b2))
 250                         return malformedForLength(src, sp, dst, dp, 1);
 251                     da[dp++] = (char) (((b1 << 6) ^ b2)
 252                                        ^
 253                                        (((byte) 0xC0 << 6) ^
 254                                         ((byte) 0x80 << 0)));
 255                     sp += 2;
 256                 } else if ((b1 >> 4) == -2) {
 257                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 258                     int srcRemaining = sl - sp;
 259                     if (srcRemaining < 3 || dp >= dl) {
 260                         if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1]))
 261                             return malformedForLength(src, sp, dst, dp, 1);
 262                         return xflow(src, sp, sl, dst, dp, 3);
 263                     }
 264                     int b2 = sa[sp + 1];
 265                     int b3 = sa[sp + 2];
 266                     if (isMalformed3(b1, b2, b3))
 267                         return malformed(src, sp, dst, dp, 3);
 268                     char c = (char)
 269                         ((b1 << 12) ^
 270                          (b2 <<  6) ^
 271                          (b3 ^
 272                           (((byte) 0xE0 << 12) ^
 273                            ((byte) 0x80 <<  6) ^
 274                            ((byte) 0x80 <<  0))));
 275                     if (Character.isSurrogate(c))
 276                         return malformedForLength(src, sp, dst, dp, 3);
 277                     da[dp++] = c;
 278                     sp += 3;
 279                 } else if ((b1 >> 3) == -2) {
 280                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 281                     int srcRemaining = sl - sp;
 282                     if (srcRemaining < 4 || dl - dp < 2) {  
 283                         if (srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1]))
 284                             return malformedForLength(src, sp, dst, dp, 1);
 285                         if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2]))
 286                             return malformedForLength(src, sp, dst, dp, 2);
 287                         return xflow(src, sp, sl, dst, dp, 4);
 288                     }
 289                     int b2 = sa[sp + 1];
 290                     int b3 = sa[sp + 2];
 291                     int b4 = sa[sp + 3];
 292                     int uc = ((b1 << 18) ^
 293                               (b2 << 12) ^
 294                               (b3 <<  6) ^
 295                               (b4 ^
 296                                (((byte) 0xF0 << 18) ^
 297                                 ((byte) 0x80 << 12) ^
 298                                 ((byte) 0x80 <<  6) ^
 299                                 ((byte) 0x80 <<  0))));
 300                     if (isMalformed4(b2, b3, b4) ||
 301                         // shortest form check
 302                         !Character.isSupplementaryCodePoint(uc)) {
 303                         return malformed(src, sp, dst, dp, 4);
 304                     }
 305                     da[dp++] = Character.highSurrogate(uc);
 306                     da[dp++] = Character.lowSurrogate(uc);
 307                     sp += 4;
 308                 } else
 309                     return malformed(src, sp, dst, dp, 1);
 310             }
 311             return xflow(src, sp, sl, dst, dp, 0);
 312         }
 313 
 314         private CoderResult decodeBufferLoop(ByteBuffer src,
 315                                              CharBuffer dst)
 316         {
 317             int mark = src.position();
 318             int limit = src.limit();
 319             while (mark < limit) {
 320                 int b1 = src.get();
 321                 if (b1 >= 0) {
 322                     // 1 byte, 7 bits: 0xxxxxxx
 323                     if (dst.remaining() < 1)
 324                         return xflow(src, mark, 1); // overflow
 325                     dst.put((char) b1);
 326                     mark++;
 327                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 328                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 329                     if (limit - mark < 2|| dst.remaining() < 1)
 330                         return xflow(src, mark, 2);
 331                     int b2 = src.get();
 332                     if (isNotContinuation(b2))
 333                         return malformedForLength(src, mark, 1);
 334                      dst.put((char) (((b1 << 6) ^ b2)
 335                                     ^
 336                                     (((byte) 0xC0 << 6) ^
 337                                      ((byte) 0x80 << 0))));
 338                     mark += 2;
 339                 } else if ((b1 >> 4) == -2) {
 340                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 341                     int srcRemaining = limit - mark;
 342                     if (srcRemaining < 3 || dst.remaining() < 1) {
 343                         if (srcRemaining > 1 && isMalformed3_2(b1, src.get()))
 344                             return malformedForLength(src, mark, 1);
 345                         return xflow(src, mark, 3);
 346                     }
 347                     int b2 = src.get();
 348                     int b3 = src.get();
 349                     if (isMalformed3(b1, b2, b3))
 350                         return malformed(src, mark, 3);
 351                     char c = (char)
 352                         ((b1 << 12) ^
 353                          (b2 <<  6) ^
 354                          (b3 ^
 355                           (((byte) 0xE0 << 12) ^
 356                            ((byte) 0x80 <<  6) ^
 357                            ((byte) 0x80 <<  0))));
 358                     if (Character.isSurrogate(c))
 359                         return malformedForLength(src, mark, 3);
 360                     dst.put(c);
 361                     mark += 3;
 362                 } else if ((b1 >> 3) == -2) {
 363                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 364                     int srcRemaining = limit - mark;
 365                     if (srcRemaining < 4 || dst.remaining() < 2) {
 366                         if (srcRemaining > 1 && isMalformed4_2(b1, src.get()))
 367                             return malformedForLength(src, mark, 1);
 368                         if (srcRemaining > 2 && isMalformed4_3(src.get()))
 369                             return malformedForLength(src, mark, 2);
 370                         return xflow(src, mark, 4);
 371                     }
 372                     int b2 = src.get();
 373                     int b3 = src.get();
 374                     int b4 = src.get();
 375                     int uc = ((b1 << 18) ^
 376                               (b2 << 12) ^
 377                               (b3 <<  6) ^
 378                               (b4 ^
 379                                (((byte) 0xF0 << 18) ^
 380                                 ((byte) 0x80 << 12) ^
 381                                 ((byte) 0x80 <<  6) ^
 382                                 ((byte) 0x80 <<  0))));
 383                     if (isMalformed4(b2, b3, b4) ||
 384                         // shortest form check
 385                         !Character.isSupplementaryCodePoint(uc)) {
 386                         return malformed(src, mark, 4);
 387                     }
 388                     dst.put(Character.highSurrogate(uc));
 389                     dst.put(Character.lowSurrogate(uc));
 390                     mark += 4;
 391                 } else {
 392                     return malformed(src, mark, 1);
 393                 }
 394             }
 395             return xflow(src, mark, 0);
 396         }
 397 
 398         protected CoderResult decodeLoop(ByteBuffer src,
 399                                          CharBuffer dst)
 400         {
 401             if (src.hasArray() && dst.hasArray())
 402                 return decodeArrayLoop(src, dst);
 403             else
 404                 return decodeBufferLoop(src, dst);
 405         }
 406 
 407         private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
 408         {
 409             if (bb == null)
 410                 bb = ByteBuffer.wrap(ba);
 411             bb.position(sp);
 412             return bb;
 413         }
 414 
 415         // returns -1 if there is/are malformed byte(s) and the
 416         // "action" for malformed input is not REPLACE.
 417         public int decode(byte[] sa, int sp, int len, char[] da) {
 418             final int sl = sp + len;
 419             int dp = 0;
 420             int dlASCII = Math.min(len, da.length);
 421             ByteBuffer bb = null;  // only necessary if malformed
 422 
 423             // ASCII only optimized loop
 424             while (dp < dlASCII && sa[sp] >= 0)
 425                 da[dp++] = (char) sa[sp++];
 426 
 427             while (sp < sl) {
 428                 int b1 = sa[sp++];
 429                 if (b1 >= 0) {
 430                     // 1 byte, 7 bits: 0xxxxxxx
 431                     da[dp++] = (char) b1;
 432                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 433                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 434                     if (sp < sl) {
 435                         int b2 = sa[sp++];
 436                         if (isNotContinuation(b2)) {
 437                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 438                                 return -1;
 439                             da[dp++] = replacement().charAt(0);
 440                             sp--;            // malformedN(bb, 2) always returns 1
 441                         } else {
 442                             da[dp++] = (char) (((b1 << 6) ^ b2)^
 443                                            (((byte) 0xC0 << 6) ^
 444                                             ((byte) 0x80 << 0)));
 445                         }
 446                         continue;
 447                     }
 448                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 449                         return -1;
 450                     da[dp++] = replacement().charAt(0);
 451                     return dp;
 452                 } else if ((b1 >> 4) == -2) {
 453                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 454                     if (sp + 1 < sl) {
 455                         int b2 = sa[sp++];
 456                         int b3 = sa[sp++];
 457                         if (isMalformed3(b1, b2, b3)) {
 458                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 459                                 return -1;
 460                             da[dp++] = replacement().charAt(0);
 461                             sp -= 3;
 462                             bb = getByteBuffer(bb, sa, sp);
 463                             sp += malformedN(bb, 3).length();
 464                         } else {
 465                             char c = (char)((b1 << 12) ^
 466                                               (b2 <<  6) ^
 467                                               (b3 ^
 468                                               (((byte) 0xE0 << 12) ^
 469                                               ((byte) 0x80 <<  6) ^
 470                                               ((byte) 0x80 <<  0))));
 471                             if (Character.isSurrogate(c)) {
 472                                 if (malformedInputAction() != CodingErrorAction.REPLACE)
 473                                     return -1;
 474                                 da[dp++] = replacement().charAt(0);
 475                             } else {
 476                                 da[dp++] = c;
 477                             }
 478                         }
 479                         continue;
 480                     }
 481                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 482                         return -1;
 483                     if (sp  < sl && isMalformed3_2(b1, sa[sp])) {
 484                         da[dp++] = replacement().charAt(0);
 485                         continue;
 486 
 487                     }
 488                     da[dp++] = replacement().charAt(0);
 489                     return dp;
 490                 } else if ((b1 >> 3) == -2) {
 491                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 492                     if (sp + 2 < sl) {
 493                         int b2 = sa[sp++];
 494                         int b3 = sa[sp++];
 495                         int b4 = sa[sp++];
 496                         int uc = ((b1 << 18) ^
 497                                   (b2 << 12) ^
 498                                   (b3 <<  6) ^
 499                                   (b4 ^
 500                                    (((byte) 0xF0 << 18) ^
 501                                    ((byte) 0x80 << 12) ^
 502                                    ((byte) 0x80 <<  6) ^
 503                                    ((byte) 0x80 <<  0))));
 504                         if (isMalformed4(b2, b3, b4) ||
 505                             // shortest form check
 506                             !Character.isSupplementaryCodePoint(uc)) {
 507                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 508                                 return -1;
 509                             da[dp++] = replacement().charAt(0);
 510                             sp -= 4;
 511                             bb = getByteBuffer(bb, sa, sp);
 512                             sp += malformedN(bb, 4).length();
 513                         } else {
 514                             da[dp++] = Character.highSurrogate(uc);
 515                             da[dp++] = Character.lowSurrogate(uc);
 516                         }
 517                         continue;
 518                     }
 519                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 520                         return -1;
 521 
 522                     if (sp  < sl && isMalformed4_2(b1, sa[sp])) {
 523                         da[dp++] = replacement().charAt(0);
 524                         continue;
 525                     }
 526                     sp++;
 527                     if (sp  < sl && isMalformed4_3(sa[sp])) {
 528                         da[dp++] = replacement().charAt(0);
 529                         continue;
 530                     }
 531                     da[dp++] = replacement().charAt(0);
 532                     return dp;
 533                 } else {
 534                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 535                         return -1;
 536                     da[dp++] = replacement().charAt(0);
 537                 }
 538             }
 539             return dp;
 540         }
 541     }
 542 
 543     private static final class Encoder extends CharsetEncoder
 544                                  implements ArrayEncoder {
 545 
 546         private Encoder(Charset cs) {
 547             super(cs, 1.1f, 3.0f);
 548         }
 549 
 550         public boolean canEncode(char c) {
 551             return !Character.isSurrogate(c);
 552         }
 553 
 554         public boolean isLegalReplacement(byte[] repl) {
 555             return ((repl.length == 1 && repl[0] >= 0) ||
 556                     super.isLegalReplacement(repl));
 557         }
 558 
 559         private static CoderResult overflow(CharBuffer src, int sp,
 560                                             ByteBuffer dst, int dp) {
 561             updatePositions(src, sp, dst, dp);
 562             return CoderResult.OVERFLOW;
 563         }
 564 
 565         private static CoderResult overflow(CharBuffer src, int mark) {
 566             src.position(mark);
 567             return CoderResult.OVERFLOW;
 568         }
 569 
 570         private Surrogate.Parser sgp;
 571         private CoderResult encodeArrayLoop(CharBuffer src,
 572                                             ByteBuffer dst)
 573         {
 574             char[] sa = src.array();
 575             int sp = src.arrayOffset() + src.position();
 576             int sl = src.arrayOffset() + src.limit();
 577 
 578             byte[] da = dst.array();
 579             int dp = dst.arrayOffset() + dst.position();
 580             int dl = dst.arrayOffset() + dst.limit();
 581             int dlASCII = dp + Math.min(sl - sp, dl - dp);
 582 
 583             // ASCII only loop
 584             while (dp < dlASCII && sa[sp] < '\u0080')
 585                 da[dp++] = (byte) sa[sp++];
 586             while (sp < sl) {
 587                 char c = sa[sp];
 588                 if (c < 0x80) {
 589                     // Have at most seven bits
 590                     if (dp >= dl)
 591                         return overflow(src, sp, dst, dp);
 592                     da[dp++] = (byte)c;
 593                 } else if (c < 0x800) {
 594                     // 2 bytes, 11 bits
 595                     if (dl - dp < 2)
 596                         return overflow(src, sp, dst, dp);
 597                     da[dp++] = (byte)(0xc0 | (c >> 6));
 598                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 599                 } else if (Character.isSurrogate(c)) {
 600                     // Have a surrogate pair
 601                     if (sgp == null)
 602                         sgp = new Surrogate.Parser();
 603                     int uc = sgp.parse(c, sa, sp, sl);
 604                     if (uc < 0) {
 605                         updatePositions(src, sp, dst, dp);
 606                         return sgp.error();
 607                     }
 608                     if (dl - dp < 4)
 609                         return overflow(src, sp, dst, dp);
 610                     da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 611                     da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 612                     da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 613                     da[dp++] = (byte)(0x80 | (uc & 0x3f));
 614                     sp++;  // 2 chars
 615                 } else {
 616                     // 3 bytes, 16 bits
 617                     if (dl - dp < 3)
 618                         return overflow(src, sp, dst, dp);
 619                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 620                     da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 621                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 622                 }
 623                 sp++;
 624             }
 625             updatePositions(src, sp, dst, dp);
 626             return CoderResult.UNDERFLOW;
 627         }
 628 
 629         private CoderResult encodeBufferLoop(CharBuffer src,
 630                                              ByteBuffer dst)
 631         {
 632             int mark = src.position();
 633             while (src.hasRemaining()) {
 634                 char c = src.get();
 635                 if (c < 0x80) {
 636                     // Have at most seven bits
 637                     if (!dst.hasRemaining())
 638                         return overflow(src, mark);
 639                     dst.put((byte)c);
 640                 } else if (c < 0x800) {
 641                     // 2 bytes, 11 bits
 642                     if (dst.remaining() < 2)
 643                         return overflow(src, mark);
 644                     dst.put((byte)(0xc0 | (c >> 6)));
 645                     dst.put((byte)(0x80 | (c & 0x3f)));
 646                 } else if (Character.isSurrogate(c)) {
 647                     // Have a surrogate pair
 648                     if (sgp == null)
 649                         sgp = new Surrogate.Parser();
 650                     int uc = sgp.parse(c, src);
 651                     if (uc < 0) {
 652                         src.position(mark);
 653                         return sgp.error();
 654                     }
 655                     if (dst.remaining() < 4)
 656                         return overflow(src, mark);
 657                     dst.put((byte)(0xf0 | ((uc >> 18))));
 658                     dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));
 659                     dst.put((byte)(0x80 | ((uc >>  6) & 0x3f)));
 660                     dst.put((byte)(0x80 | (uc & 0x3f)));
 661                     mark++;  // 2 chars
 662                 } else {
 663                     // 3 bytes, 16 bits
 664                     if (dst.remaining() < 3)
 665                         return overflow(src, mark);
 666                     dst.put((byte)(0xe0 | ((c >> 12))));
 667                     dst.put((byte)(0x80 | ((c >>  6) & 0x3f)));
 668                     dst.put((byte)(0x80 | (c & 0x3f)));
 669                 }
 670                 mark++;
 671             }
 672             src.position(mark);
 673             return CoderResult.UNDERFLOW;
 674         }
 675 
 676         protected final CoderResult encodeLoop(CharBuffer src,
 677                                                ByteBuffer dst)
 678         {
 679             if (src.hasArray() && dst.hasArray())
 680                 return encodeArrayLoop(src, dst);
 681             else
 682                 return encodeBufferLoop(src, dst);
 683         }
 684 
 685         // returns -1 if there is malformed char(s) and the
 686         // "action" for malformed input is not REPLACE.
 687         public int encode(char[] sa, int sp, int len, byte[] da) {
 688             int sl = sp + len;
 689             int dp = 0;
 690             int dlASCII = dp + Math.min(len, da.length);
 691 
 692             // ASCII only optimized loop
 693             while (dp < dlASCII && sa[sp] < '\u0080')
 694                 da[dp++] = (byte) sa[sp++];
 695 
 696             while (sp < sl) {
 697                 char c = sa[sp++];
 698                 if (c < 0x80) {
 699                     // Have at most seven bits
 700                     da[dp++] = (byte)c;
 701                 } else if (c < 0x800) {
 702                     // 2 bytes, 11 bits
 703                     da[dp++] = (byte)(0xc0 | (c >> 6));
 704                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 705                 } else if (Character.isSurrogate(c)) {
 706                     if (sgp == null)
 707                         sgp = new Surrogate.Parser();
 708                     int uc = sgp.parse(c, sa, sp - 1, sl);
 709                     if (uc < 0) {
 710                         if (malformedInputAction() != CodingErrorAction.REPLACE)
 711                             return -1;
 712                         da[dp++] = replacement()[0];
 713                     } else {
 714                         da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 715                         da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 716                         da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 717                         da[dp++] = (byte)(0x80 | (uc & 0x3f));
 718                         sp++;  // 2 chars
 719                     }
 720                 } else {
 721                     // 3 bytes, 16 bits
 722                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 723                     da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 724                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 725                 }
 726             }
 727             return dp;
 728         }
 729     }
 730 }