1 /*
   2  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 
  28 import java.nio.Buffer;
  29 import java.nio.ByteBuffer;
  30 import java.nio.CharBuffer;
  31 import java.nio.charset.Charset;
  32 import java.nio.charset.CharsetDecoder;
  33 import java.nio.charset.CharsetEncoder;
  34 import java.nio.charset.CoderResult;
  35 import java.nio.charset.CodingErrorAction;
  36 
  37 /* Legal UTF-8 Byte Sequences
  38  *
  39  * #    Code Points      Bits   Bit/Byte pattern
  40  * 1                     7      0xxxxxxx
  41  *      U+0000..U+007F          00..7F
  42  *
  43  * 2                     11     110xxxxx    10xxxxxx
  44  *      U+0080..U+07FF          C2..DF      80..BF
  45  *
  46  * 3                     16     1110xxxx    10xxxxxx    10xxxxxx
  47  *      U+0800..U+0FFF          E0          A0..BF      80..BF
  48  *      U+1000..U+FFFF          E1..EF      80..BF      80..BF
  49  *
  50  * 4                     21     11110xxx    10xxxxxx    10xxxxxx    10xxxxxx
  51  *     U+10000..U+3FFFF         F0          90..BF      80..BF      80..BF
  52  *     U+40000..U+FFFFF         F1..F3      80..BF      80..BF      80..BF
  53  *    U+100000..U10FFFF         F4          80..8F      80..BF      80..BF
  54  *
  55  */
  56 
  57 class UTF_8 extends Unicode
  58 {
  59     public UTF_8() {
  60         super("UTF-8", StandardCharsets.aliases_UTF_8);
  61     }
  62 
  63     public String historicalName() {
  64         return "UTF8";
  65     }
  66 
  67     public CharsetDecoder newDecoder() {
  68         return new Decoder(this);
  69     }
  70 
  71     public CharsetEncoder newEncoder() {
  72         return new Encoder(this);
  73     }
  74 
  75     static final void updatePositions(Buffer src, int sp,
  76                                       Buffer dst, int dp) {
  77         src.position(sp - src.arrayOffset());
  78         dst.position(dp - dst.arrayOffset());
  79     }
  80 
  81     private static class Decoder extends CharsetDecoder
  82                                  implements ArrayDecoder {
  83         private Decoder(Charset cs) {
  84             super(cs, 1.0f, 1.0f);
  85         }
  86 
  87         private static boolean isNotContinuation(int b) {
  88             return (b & 0xc0) != 0x80;
  89         }
  90 
  91         //  [C2..DF] [80..BF]
  92         private static boolean isMalformed2(int b1, int b2) {
  93             return (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80;
  94         }
  95 
  96         //  [E0]     [A0..BF] [80..BF]
  97         //  [E1..EF] [80..BF] [80..BF]
  98         private static boolean isMalformed3(int b1, int b2, int b3) {
  99             return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 100                    (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
 101         }
 102 
 103         //  [F0]     [90..BF] [80..BF] [80..BF]
 104         //  [F1..F3] [80..BF] [80..BF] [80..BF]
 105         //  [F4]     [80..8F] [80..BF] [80..BF]
 106         //  only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
 107         //  will be checked by Character.isSupplementaryCodePoint(uc)
 108         private static boolean isMalformed4(int b2, int b3, int b4) {
 109             return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
 110                    (b4 & 0xc0) != 0x80;
 111         }
 112 
 113         private static CoderResult lookupN(ByteBuffer src, int n)
 114         {
 115             for (int i = 1; i < n; i++) {
 116                if (isNotContinuation(src.get()))
 117                    return CoderResult.malformedForLength(i);
 118             }
 119             return CoderResult.malformedForLength(n);
 120         }
 121 
 122         private static CoderResult malformedN(ByteBuffer src, int nb) {
 123             switch (nb) {
 124             case 1:
 125                 int b1 = src.get();
 126                 if ((b1 >> 2) == -2) {
 127                     // 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 128                     if (src.remaining() < 4)
 129                         return CoderResult.UNDERFLOW;
 130                     return lookupN(src, 5);
 131                 }
 132                 if ((b1 >> 1) == -2) {
 133                     // 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 134                     if (src.remaining() < 5)
 135                         return CoderResult.UNDERFLOW;
 136                     return lookupN(src, 6);
 137                 }
 138                 return CoderResult.malformedForLength(1);
 139             case 2:                    // always 1
 140                 return CoderResult.malformedForLength(1);
 141             case 3:
 142                 b1 = src.get();
 143                 int b2 = src.get();    // no need to lookup b3
 144                 return CoderResult.malformedForLength(
 145                     ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 146                      isNotContinuation(b2))?1:2);
 147             case 4:  // we don't care the speed here
 148                 b1 = src.get() & 0xff;
 149                 b2 = src.get() & 0xff;
 150                 if (b1 > 0xf4 ||
 151                     (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
 152                     (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 153                     isNotContinuation(b2))
 154                     return CoderResult.malformedForLength(1);
 155                 if (isNotContinuation(src.get()))
 156                     return CoderResult.malformedForLength(2);
 157                 return CoderResult.malformedForLength(3);
 158             default:
 159                 assert false;
 160                 return null;
 161             }
 162         }
 163 
 164         private static CoderResult malformed(ByteBuffer src, int sp,
 165                                              CharBuffer dst, int dp,
 166                                              int nb)
 167         {
 168             src.position(sp - src.arrayOffset());
 169             CoderResult cr = malformedN(src, nb);
 170             updatePositions(src, sp, dst, dp);
 171             return cr;
 172         }
 173 
 174         private static CoderResult malformed(ByteBuffer src,
 175                                              int mark, int nb)
 176         {
 177             src.position(mark);
 178             CoderResult cr = malformedN(src, nb);
 179             src.position(mark);
 180             return cr;
 181         }
 182 
 183         private static CoderResult xflow(Buffer src, int sp, int sl,
 184                                          Buffer dst, int dp, int nb) {
 185             updatePositions(src, sp, dst, dp);
 186             return (nb == 0 || sl - sp < nb)
 187                    ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW;
 188         }
 189 
 190         private static CoderResult xflow(Buffer src, int mark, int nb) {
 191             CoderResult cr = (nb == 0 || src.remaining() < (nb - 1))
 192                              ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW;
 193             src.position(mark);
 194             return cr;
 195         }
 196 
 197         private CoderResult decodeArrayLoop(ByteBuffer src,
 198                                             CharBuffer dst)
 199         {
 200             // This method is optimized for ASCII input.
 201             byte[] sa = src.array();
 202             int sp = src.arrayOffset() + src.position();
 203             int sl = src.arrayOffset() + src.limit();
 204 
 205             char[] da = dst.array();
 206             int dp = dst.arrayOffset() + dst.position();
 207             int dl = dst.arrayOffset() + dst.limit();
 208             int dlASCII = dp + Math.min(sl - sp, dl - dp);
 209 
 210             // ASCII only loop
 211             while (dp < dlASCII && sa[sp] >= 0)
 212                 da[dp++] = (char) sa[sp++];
 213 
 214             while (sp < sl) {
 215                 int b1 = sa[sp];
 216                 if (b1 >= 0) {
 217                     // 1 byte, 7 bits: 0xxxxxxx
 218                     if (dp >= dl)
 219                         return xflow(src, sp, sl, dst, dp, 1);
 220                     da[dp++] = (char) b1;
 221                     sp++;
 222                 } else if ((b1 >> 5) == -2) {
 223                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 224                     if (sl - sp < 2 || dp >= dl)
 225                         return xflow(src, sp, sl, dst, dp, 2);
 226                     int b2 = sa[sp + 1];
 227                     if (isMalformed2(b1, b2))
 228                         return malformed(src, sp, dst, dp, 2);
 229                     da[dp++] = (char) (((b1 << 6) ^ b2)
 230                                        ^
 231                                        (((byte) 0xC0 << 6) ^
 232                                         ((byte) 0x80 << 0)));
 233                     sp += 2;
 234                 } else if ((b1 >> 4) == -2) {
 235                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 236                     if (sl - sp < 3 || dp >= dl)
 237                         return xflow(src, sp, sl, dst, dp, 3);
 238                     int b2 = sa[sp + 1];
 239                     int b3 = sa[sp + 2];
 240                     if (isMalformed3(b1, b2, b3))
 241                         return malformed(src, sp, dst, dp, 3);
 242                     da[dp++] = (char)
 243                         ((b1 << 12) ^
 244                          (b2 <<  6) ^
 245                          (b3 ^
 246                           (((byte) 0xE0 << 12) ^
 247                            ((byte) 0x80 <<  6) ^
 248                            ((byte) 0x80 <<  0))));
 249                     sp += 3;
 250                 } else if ((b1 >> 3) == -2) {
 251                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 252                     if (sl - sp < 4 || dl - dp < 2)
 253                         return xflow(src, sp, sl, dst, dp, 4);
 254                     int b2 = sa[sp + 1];
 255                     int b3 = sa[sp + 2];
 256                     int b4 = sa[sp + 3];
 257                     int uc = ((b1 << 18) ^
 258                               (b2 << 12) ^
 259                               (b3 <<  6) ^
 260                               (b4 ^
 261                                (((byte) 0xF0 << 18) ^
 262                                 ((byte) 0x80 << 12) ^
 263                                 ((byte) 0x80 <<  6) ^
 264                                 ((byte) 0x80 <<  0))));
 265                     if (isMalformed4(b2, b3, b4) ||
 266                         // shortest form check
 267                         !Character.isSupplementaryCodePoint(uc)) {
 268                         return malformed(src, sp, dst, dp, 4);
 269                     }
 270                     da[dp++] = Character.highSurrogate(uc);
 271                     da[dp++] = Character.lowSurrogate(uc);
 272                     sp += 4;
 273                 } else
 274                     return malformed(src, sp, dst, dp, 1);
 275             }
 276             return xflow(src, sp, sl, dst, dp, 0);
 277         }
 278 
 279         private CoderResult decodeBufferLoop(ByteBuffer src,
 280                                              CharBuffer dst)
 281         {
 282             int mark = src.position();
 283             int limit = src.limit();
 284             while (mark < limit) {
 285                 int b1 = src.get();
 286                 if (b1 >= 0) {
 287                     // 1 byte, 7 bits: 0xxxxxxx
 288                     if (dst.remaining() < 1)
 289                         return xflow(src, mark, 1); // overflow
 290                     dst.put((char) b1);
 291                     mark++;
 292                 } else if ((b1 >> 5) == -2) {
 293                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 294                     if (limit - mark < 2|| dst.remaining() < 1)
 295                         return xflow(src, mark, 2);
 296                     int b2 = src.get();
 297                     if (isMalformed2(b1, b2))
 298                         return malformed(src, mark, 2);
 299                     dst.put((char) (((b1 << 6) ^ b2)
 300                                     ^
 301                                     (((byte) 0xC0 << 6) ^
 302                                      ((byte) 0x80 << 0))));
 303                     mark += 2;
 304                 } else if ((b1 >> 4) == -2) {
 305                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 306                     if (limit - mark < 3 || dst.remaining() < 1)
 307                         return xflow(src, mark, 3);
 308                     int b2 = src.get();
 309                     int b3 = src.get();
 310                     if (isMalformed3(b1, b2, b3))
 311                         return malformed(src, mark, 3);
 312                     dst.put((char)
 313                             ((b1 << 12) ^
 314                              (b2 <<  6) ^
 315                              (b3 ^
 316                               (((byte) 0xE0 << 12) ^
 317                                ((byte) 0x80 <<  6) ^
 318                                ((byte) 0x80 <<  0)))));
 319                     mark += 3;
 320                 } else if ((b1 >> 3) == -2) {
 321                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 322                     if (limit - mark < 4 || dst.remaining() < 2)
 323                         return xflow(src, mark, 4);
 324                     int b2 = src.get();
 325                     int b3 = src.get();
 326                     int b4 = src.get();
 327                     int uc = ((b1 << 18) ^
 328                               (b2 << 12) ^
 329                               (b3 <<  6) ^
 330                               (b4 ^
 331                                (((byte) 0xF0 << 18) ^
 332                                 ((byte) 0x80 << 12) ^
 333                                 ((byte) 0x80 <<  6) ^
 334                                 ((byte) 0x80 <<  0))));
 335                     if (isMalformed4(b2, b3, b4) ||
 336                         // shortest form check
 337                         !Character.isSupplementaryCodePoint(uc)) {
 338                         return malformed(src, mark, 4);
 339                     }
 340                     dst.put(Character.highSurrogate(uc));
 341                     dst.put(Character.lowSurrogate(uc));
 342                     mark += 4;
 343                 } else {
 344                     return malformed(src, mark, 1);
 345                 }
 346             }
 347             return xflow(src, mark, 0);
 348         }
 349 
 350         protected CoderResult decodeLoop(ByteBuffer src,
 351                                          CharBuffer dst)
 352         {
 353             if (src.hasArray() && dst.hasArray())
 354                 return decodeArrayLoop(src, dst);
 355             else
 356                 return decodeBufferLoop(src, dst);
 357         }
 358 
 359         private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
 360         {
 361             if (bb == null)
 362                 bb = ByteBuffer.wrap(ba);
 363             bb.position(sp);
 364             return bb;
 365         }
 366 
 367         // returns -1 if there is malformed byte(s) and the
 368         // "action" for malformed input is not REPLACE.
 369         public int decode(byte[] sa, int sp, int len, char[] da) {
 370             final int sl = sp + len;
 371             int dp = 0;
 372             int dlASCII = Math.min(len, da.length);
 373             ByteBuffer bb = null;  // only necessary if malformed
 374 
 375             // ASCII only optimized loop
 376             while (dp < dlASCII && sa[sp] >= 0)
 377                 da[dp++] = (char) sa[sp++];
 378 
 379             while (sp < sl) {
 380                 int b1 = sa[sp++];
 381                 if (b1 >= 0) {
 382                     // 1 byte, 7 bits: 0xxxxxxx
 383                     da[dp++] = (char) b1;
 384                 } else if ((b1 >> 5) == -2) {
 385                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 386                     if (sp < sl) {
 387                         int b2 = sa[sp++];
 388                         if (isMalformed2(b1, b2)) {
 389                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 390                                 return -1;
 391                             da[dp++] = replacement().charAt(0);
 392                             sp--;            // malformedN(bb, 2) always returns 1
 393                         } else {
 394                             da[dp++] = (char) (((b1 << 6) ^ b2)^
 395                                            (((byte) 0xC0 << 6) ^
 396                                             ((byte) 0x80 << 0)));
 397                         }
 398                         continue;
 399                     }
 400                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 401                         return -1;
 402                     da[dp++] = replacement().charAt(0);
 403                     return dp;
 404                 } else if ((b1 >> 4) == -2) {
 405                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 406                     if (sp + 1 < sl) {
 407                         int b2 = sa[sp++];
 408                         int b3 = sa[sp++];
 409                         if (isMalformed3(b1, b2, b3)) {
 410                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 411                                 return -1;
 412                             da[dp++] = replacement().charAt(0);
 413                             sp -=3;
 414                             bb = getByteBuffer(bb, sa, sp);
 415                             sp += malformedN(bb, 3).length();
 416                         } else {
 417                             da[dp++] = (char)((b1 << 12) ^
 418                                               (b2 <<  6) ^
 419                                               (b3 ^
 420                                               (((byte) 0xE0 << 12) ^
 421                                               ((byte) 0x80 <<  6) ^
 422                                               ((byte) 0x80 <<  0))));
 423                         }
 424                         continue;
 425                     }
 426                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 427                         return -1;
 428                     da[dp++] = replacement().charAt(0);
 429                     return dp;
 430                 } else if ((b1 >> 3) == -2) {
 431                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 432                     if (sp + 2 < sl) {
 433                         int b2 = sa[sp++];
 434                         int b3 = sa[sp++];
 435                         int b4 = sa[sp++];
 436                         int uc = ((b1 << 18) ^
 437                                   (b2 << 12) ^
 438                                   (b3 <<  6) ^
 439                                   (b4 ^
 440                                    (((byte) 0xF0 << 18) ^
 441                                    ((byte) 0x80 << 12) ^
 442                                    ((byte) 0x80 <<  6) ^
 443                                    ((byte) 0x80 <<  0))));
 444                         if (isMalformed4(b2, b3, b4) ||
 445                             // shortest form check
 446                             !Character.isSupplementaryCodePoint(uc)) {
 447                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 448                                 return -1;
 449                             da[dp++] = replacement().charAt(0);
 450                             sp -= 4;
 451                             bb = getByteBuffer(bb, sa, sp);
 452                             sp += malformedN(bb, 4).length();
 453                         } else {
 454                             da[dp++] = Character.highSurrogate(uc);
 455                             da[dp++] = Character.lowSurrogate(uc);
 456                         }
 457                         continue;
 458                     }
 459                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 460                         return -1;
 461                     da[dp++] = replacement().charAt(0);
 462                     return dp;
 463                 } else {
 464                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 465                         return -1;
 466                     da[dp++] = replacement().charAt(0);
 467                     sp--;
 468                     bb = getByteBuffer(bb, sa, sp);
 469                     CoderResult cr = malformedN(bb, 1);
 470                     if (!cr.isError()) {
 471                         // leading byte for 5 or 6-byte, but don't have enough
 472                         // bytes in buffer to check. Consumed rest as malformed.
 473                         return dp;
 474                     }
 475                     sp +=  cr.length();
 476                 }
 477             }
 478             return dp;
 479         }
 480     }
 481 
 482     private static class Encoder extends CharsetEncoder
 483                                  implements ArrayEncoder {
 484 
 485         private Encoder(Charset cs) {
 486             super(cs, 1.1f, 3.0f);
 487         }
 488 
 489         public boolean canEncode(char c) {
 490             return !Character.isSurrogate(c);
 491         }
 492 
 493         public boolean isLegalReplacement(byte[] repl) {
 494             return ((repl.length == 1 && repl[0] >= 0) ||
 495                     super.isLegalReplacement(repl));
 496         }
 497 
 498         private static CoderResult overflow(CharBuffer src, int sp,
 499                                             ByteBuffer dst, int dp) {
 500             updatePositions(src, sp, dst, dp);
 501             return CoderResult.OVERFLOW;
 502         }
 503 
 504         private static CoderResult overflow(CharBuffer src, int mark) {
 505             src.position(mark);
 506             return CoderResult.OVERFLOW;
 507         }
 508 
 509         private Surrogate.Parser sgp;
 510         private CoderResult encodeArrayLoop(CharBuffer src,
 511                                             ByteBuffer dst)
 512         {
 513             char[] sa = src.array();
 514             int sp = src.arrayOffset() + src.position();
 515             int sl = src.arrayOffset() + src.limit();
 516 
 517             byte[] da = dst.array();
 518             int dp = dst.arrayOffset() + dst.position();
 519             int dl = dst.arrayOffset() + dst.limit();
 520             int dlASCII = dp + Math.min(sl - sp, dl - dp);
 521 
 522             // ASCII only loop
 523             while (dp < dlASCII && sa[sp] < '\u0080')
 524                 da[dp++] = (byte) sa[sp++];
 525             while (sp < sl) {
 526                 char c = sa[sp];
 527                 if (c < 0x80) {
 528                     // Have at most seven bits
 529                     if (dp >= dl)
 530                         return overflow(src, sp, dst, dp);
 531                     da[dp++] = (byte)c;
 532                 } else if (c < 0x800) {
 533                     // 2 bytes, 11 bits
 534                     if (dl - dp < 2)
 535                         return overflow(src, sp, dst, dp);
 536                     da[dp++] = (byte)(0xc0 | (c >> 6));
 537                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 538                 } else if (Character.isSurrogate(c)) {
 539                     // Have a surrogate pair
 540                     if (sgp == null)
 541                         sgp = new Surrogate.Parser();
 542                     int uc = sgp.parse(c, sa, sp, sl);
 543                     if (uc < 0) {
 544                         updatePositions(src, sp, dst, dp);
 545                         return sgp.error();
 546                     }
 547                     if (dl - dp < 4)
 548                         return overflow(src, sp, dst, dp);
 549                     da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 550                     da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 551                     da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 552                     da[dp++] = (byte)(0x80 | (uc & 0x3f));
 553                     sp++;  // 2 chars
 554                 } else {
 555                     // 3 bytes, 16 bits
 556                     if (dl - dp < 3)
 557                         return overflow(src, sp, dst, dp);
 558                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 559                     da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 560                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 561                 }
 562                 sp++;
 563             }
 564             updatePositions(src, sp, dst, dp);
 565             return CoderResult.UNDERFLOW;
 566         }
 567 
 568         private CoderResult encodeBufferLoop(CharBuffer src,
 569                                              ByteBuffer dst)
 570         {
 571             int mark = src.position();
 572             while (src.hasRemaining()) {
 573                 char c = src.get();
 574                 if (c < 0x80) {
 575                     // Have at most seven bits
 576                     if (!dst.hasRemaining())
 577                         return overflow(src, mark);
 578                     dst.put((byte)c);
 579                 } else if (c < 0x800) {
 580                     // 2 bytes, 11 bits
 581                     if (dst.remaining() < 2)
 582                         return overflow(src, mark);
 583                     dst.put((byte)(0xc0 | (c >> 6)));
 584                     dst.put((byte)(0x80 | (c & 0x3f)));
 585                 } else if (Character.isSurrogate(c)) {
 586                     // Have a surrogate pair
 587                     if (sgp == null)
 588                         sgp = new Surrogate.Parser();
 589                     int uc = sgp.parse(c, src);
 590                     if (uc < 0) {
 591                         src.position(mark);
 592                         return sgp.error();
 593                     }
 594                     if (dst.remaining() < 4)
 595                         return overflow(src, mark);
 596                     dst.put((byte)(0xf0 | ((uc >> 18))));
 597                     dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));
 598                     dst.put((byte)(0x80 | ((uc >>  6) & 0x3f)));
 599                     dst.put((byte)(0x80 | (uc & 0x3f)));
 600                     mark++;  // 2 chars
 601                 } else {
 602                     // 3 bytes, 16 bits
 603                     if (dst.remaining() < 3)
 604                         return overflow(src, mark);
 605                     dst.put((byte)(0xe0 | ((c >> 12))));
 606                     dst.put((byte)(0x80 | ((c >>  6) & 0x3f)));
 607                     dst.put((byte)(0x80 | (c & 0x3f)));
 608                 }
 609                 mark++;
 610             }
 611             src.position(mark);
 612             return CoderResult.UNDERFLOW;
 613         }
 614 
 615         protected final CoderResult encodeLoop(CharBuffer src,
 616                                                ByteBuffer dst)
 617         {
 618             if (src.hasArray() && dst.hasArray())
 619                 return encodeArrayLoop(src, dst);
 620             else
 621                 return encodeBufferLoop(src, dst);
 622         }
 623 
 624         // returns -1 if there is malformed char(s) and the
 625         // "action" for malformed input is not REPLACE.
 626         public int encode(char[] sa, int sp, int len, byte[] da) {
 627             int sl = sp + len;
 628             int dp = 0;
 629             int dlASCII = dp + Math.min(len, da.length);
 630 
 631             // ASCII only optimized loop
 632             while (dp < dlASCII && sa[sp] < '\u0080')
 633                 da[dp++] = (byte) sa[sp++];
 634 
 635             while (sp < sl) {
 636                 char c = sa[sp++];
 637                 if (c < 0x80) {
 638                     // Have at most seven bits
 639                     da[dp++] = (byte)c;
 640                 } else if (c < 0x800) {
 641                     // 2 bytes, 11 bits
 642                     da[dp++] = (byte)(0xc0 | (c >> 6));
 643                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 644                 } else if (Character.isSurrogate(c)) {
 645                     if (sgp == null)
 646                         sgp = new Surrogate.Parser();
 647                     int uc = sgp.parse(c, sa, sp - 1, sl);
 648                     if (uc < 0) {
 649                         if (malformedInputAction() != CodingErrorAction.REPLACE)
 650                             return -1;
 651                         da[dp++] = replacement()[0];
 652                     } else {
 653                         da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 654                         da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 655                         da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 656                         da[dp++] = (byte)(0x80 | (uc & 0x3f));
 657                         sp++;  // 2 chars
 658                     }
 659                 } else {
 660                     // 3 bytes, 16 bits
 661                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 662                     da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 663                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 664                 }
 665             }
 666             return dp;
 667         }
 668     }
 669 }