1 /*
   2  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 
  28 import java.nio.Buffer;
  29 import java.nio.ByteBuffer;
  30 import java.nio.CharBuffer;
  31 import java.nio.charset.Charset;
  32 import java.nio.charset.CharsetDecoder;
  33 import java.nio.charset.CharsetEncoder;
  34 import java.nio.charset.CoderResult;
  35 import java.nio.charset.CodingErrorAction;
  36 
  37 /* Legal UTF-8 Byte Sequences
  38  *
  39  * #    Code Points      Bits   Bit/Byte pattern
  40  * 1                     7      0xxxxxxx
  41  *      U+0000..U+007F          00..7F
  42  *
  43  * 2                     11     110xxxxx    10xxxxxx
  44  *      U+0080..U+07FF          C2..DF      80..BF
  45  *
  46  * 3                     16     1110xxxx    10xxxxxx    10xxxxxx
  47  *      U+0800..U+0FFF          E0          A0..BF      80..BF
  48  *      U+1000..U+FFFF          E1..EF      80..BF      80..BF
  49  *
  50  * 4                     21     11110xxx    10xxxxxx    10xxxxxx    10xxxxxx
  51  *     U+10000..U+3FFFF         F0          90..BF      80..BF      80..BF
  52  *     U+40000..U+FFFFF         F1..F3      80..BF      80..BF      80..BF
  53  *    U+100000..U10FFFF         F4          80..8F      80..BF      80..BF
  54  *
  55  */
  56 
  57 class UTF_8 extends Unicode
  58 {
  59     public UTF_8() {
  60         super("UTF-8", StandardCharsets.aliases_UTF_8);
  61     }
  62 
  63     public String historicalName() {
  64         return "UTF8";
  65     }
  66 
  67     public CharsetDecoder newDecoder() {
  68         return new Decoder(this);
  69     }
  70 
  71     public CharsetEncoder newEncoder() {
  72         return new Encoder(this);
  73     }
  74 
  75     private static final void updatePositions(Buffer src, int sp,
  76                                               Buffer dst, int dp) {
  77         src.position(sp - src.arrayOffset());
  78         dst.position(dp - dst.arrayOffset());
  79     }
  80 
  81     private static class Decoder extends CharsetDecoder
  82                                  implements ArrayDecoder {
  83         private Decoder(Charset cs) {
  84             super(cs, 1.0f, 1.0f);
  85         }
  86 
  87         private static boolean isNotContinuation(int b) {
  88             return (b & 0xc0) != 0x80;
  89         }
  90 
  91         //  [E0]     [A0..BF] [80..BF]
  92         //  [E1..EF] [80..BF] [80..BF]
  93         private static boolean isMalformed3(int b1, int b2, int b3) {
  94             return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
  95                    (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
  96         }
  97 
  98         // only used when there is only one byte left in src buffer
  99         private static boolean isMalformed3_2(int b1, int b2) {
 100             return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 101                    (b2 & 0xc0) != 0x80;
 102         }
 103 
 104         //  [F0]     [90..BF] [80..BF] [80..BF]
 105         //  [F1..F3] [80..BF] [80..BF] [80..BF]
 106         //  [F4]     [80..8F] [80..BF] [80..BF]
 107         //  only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
 108         //  will be checked by Character.isSupplementaryCodePoint(uc)
 109         private static boolean isMalformed4(int b2, int b3, int b4) {
 110             return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
 111                    (b4 & 0xc0) != 0x80;
 112         }
 113 
 114         // only used when there is less than 4 bytes left in src buffer.
 115         // both b1 and b2 should be "& 0xff" before passed in.
 116         private static boolean isMalformed4_2(int b1, int b2) {
 117             return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
 118                    (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 119                    (b2 & 0xc0) != 0x80;
 120         }
 121 
 122         // tests if b1 and b2 are malformed as the first 2 bytes of a
 123         // legal`4-byte utf-8 byte sequence.
 124         // only used when there is less than 4 bytes left in src buffer,
 125         // after isMalformed4_2 has been invoked.
 126         private static boolean isMalformed4_3(int b3) {
 127             return (b3 & 0xc0) != 0x80;
 128         }
 129 
 130         private static CoderResult lookupN(ByteBuffer src, int n)
 131         {
 132             for (int i = 1; i < n; i++) {
 133                if (isNotContinuation(src.get()))
 134                    return CoderResult.malformedForLength(i);
 135             }
 136             return CoderResult.malformedForLength(n);
 137         }
 138 
 139         private static CoderResult malformedN(ByteBuffer src, int nb) {
 140             switch (nb) {
 141             case 1:
 142             case 2:                    // always 1
 143                 return CoderResult.malformedForLength(1);
 144             case 3:
 145                 int b1 = src.get();
 146                 int b2 = src.get();    // no need to lookup b3
 147                 return CoderResult.malformedForLength(
 148                     ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 149                      isNotContinuation(b2)) ? 1 : 2);
 150             case 4:  // we don't care the speed here
 151                 b1 = src.get() & 0xff;
 152                 b2 = src.get() & 0xff;
 153                 if (b1 > 0xf4 ||
 154                     (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
 155                     (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 156                     isNotContinuation(b2))
 157                     return CoderResult.malformedForLength(1);
 158                 if (isNotContinuation(src.get()))
 159                     return CoderResult.malformedForLength(2);
 160                 return CoderResult.malformedForLength(3);
 161             default:
 162                 assert false;
 163                 return null;
 164             }
 165         }
 166 
 167         private static CoderResult malformed(ByteBuffer src, int sp,
 168                                              CharBuffer dst, int dp,
 169                                              int nb)
 170         {
 171             src.position(sp - src.arrayOffset());
 172             CoderResult cr = malformedN(src, nb);
 173             updatePositions(src, sp, dst, dp);
 174             return cr;
 175         }
 176 
 177 
 178         private static CoderResult malformed(ByteBuffer src,
 179                                              int mark, int nb)
 180         {
 181             src.position(mark);
 182             CoderResult cr = malformedN(src, nb);
 183             src.position(mark);
 184             return cr;
 185         }
 186 
 187         private static CoderResult malformedForLength(ByteBuffer src,
 188                                                       int sp,
 189                                                       CharBuffer dst,
 190                                                       int dp,
 191                                                       int malformedNB)
 192         {
 193             updatePositions(src, sp, dst, dp);
 194             return CoderResult.malformedForLength(malformedNB);
 195         }
 196 
 197         private static CoderResult malformedForLength(ByteBuffer src,
 198                                                       int mark,
 199                                                       int malformedNB)
 200         {
 201             src.position(mark);
 202             return CoderResult.malformedForLength(malformedNB);
 203         }
 204 
 205 
 206         private static CoderResult xflow(Buffer src, int sp, int sl,
 207                                          Buffer dst, int dp, int nb) {
 208             updatePositions(src, sp, dst, dp);
 209             return (nb == 0 || sl - sp < nb)
 210                    ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
 211         }
 212 
 213         private static CoderResult xflow(Buffer src, int mark, int nb) {
 214             src.position(mark);
 215             return (nb == 0 || src.remaining() < nb)
 216                    ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
 217         }
 218 
 219         private CoderResult decodeArrayLoop(ByteBuffer src,
 220                                             CharBuffer dst)
 221         {
 222             // This method is optimized for ASCII input.
 223             byte[] sa = src.array();
 224             int sp = src.arrayOffset() + src.position();
 225             int sl = src.arrayOffset() + src.limit();
 226 
 227             char[] da = dst.array();
 228             int dp = dst.arrayOffset() + dst.position();
 229             int dl = dst.arrayOffset() + dst.limit();
 230             int dlASCII = dp + Math.min(sl - sp, dl - dp);
 231 
 232             // ASCII only loop
 233             while (dp < dlASCII && sa[sp] >= 0)
 234                 da[dp++] = (char) sa[sp++];
 235             while (sp < sl) {
 236                 int b1 = sa[sp];
 237                 if (b1 >= 0) {
 238                     // 1 byte, 7 bits: 0xxxxxxx
 239                     if (dp >= dl)
 240                         return xflow(src, sp, sl, dst, dp, 1);
 241                     da[dp++] = (char) b1;
 242                     sp++;
 243                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 244                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 245                     //                   [C2..DF] [80..BF]
 246                     if (sl - sp < 2 || dp >= dl)
 247                         return xflow(src, sp, sl, dst, dp, 2);
 248                     int b2 = sa[sp + 1];
 249                     // Now we check the first byte of 2-byte sequence as
 250                     //     if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0)
 251                     // no longer need to check b1 against c1 & c0 for
 252                     // malformed as we did in previous version
 253                     //   (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80;
 254                     // only need to check the second byte b2.
 255                     if (isNotContinuation(b2))
 256                         return malformedForLength(src, sp, dst, dp, 1);
 257                     da[dp++] = (char) (((b1 << 6) ^ b2)
 258                                        ^
 259                                        (((byte) 0xC0 << 6) ^
 260                                         ((byte) 0x80 << 0)));
 261                     sp += 2;
 262                 } else if ((b1 >> 4) == -2) {
 263                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 264                     int srcRemaining = sl - sp;
 265                     if (srcRemaining < 3 || dp >= dl) {
 266                         if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1]))
 267                             return malformedForLength(src, sp, dst, dp, 1);
 268                         return xflow(src, sp, sl, dst, dp, 3);
 269                     }
 270                     int b2 = sa[sp + 1];
 271                     int b3 = sa[sp + 2];
 272                     if (isMalformed3(b1, b2, b3))
 273                         return malformed(src, sp, dst, dp, 3);
 274                     char c = (char)
 275                         ((b1 << 12) ^
 276                          (b2 <<  6) ^
 277                          (b3 ^
 278                           (((byte) 0xE0 << 12) ^
 279                            ((byte) 0x80 <<  6) ^
 280                            ((byte) 0x80 <<  0))));
 281                     if (Character.isSurrogate(c))
 282                         return malformedForLength(src, sp, dst, dp, 3);
 283                     da[dp++] = c;
 284                     sp += 3;
 285                 } else if ((b1 >> 3) == -2) {
 286                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 287                     int srcRemaining = sl - sp;
 288                     if (srcRemaining < 4 || dl - dp < 2) {
 289                         b1 &= 0xff;
 290                         if (b1 > 0xf4 ||
 291                             srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1] & 0xff))
 292                             return malformedForLength(src, sp, dst, dp, 1);
 293                         if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2]))
 294                             return malformedForLength(src, sp, dst, dp, 2);
 295                         return xflow(src, sp, sl, dst, dp, 4);
 296                     }
 297                     int b2 = sa[sp + 1];
 298                     int b3 = sa[sp + 2];
 299                     int b4 = sa[sp + 3];
 300                     int uc = ((b1 << 18) ^
 301                               (b2 << 12) ^
 302                               (b3 <<  6) ^
 303                               (b4 ^
 304                                (((byte) 0xF0 << 18) ^
 305                                 ((byte) 0x80 << 12) ^
 306                                 ((byte) 0x80 <<  6) ^
 307                                 ((byte) 0x80 <<  0))));
 308                     if (isMalformed4(b2, b3, b4) ||
 309                         // shortest form check
 310                         !Character.isSupplementaryCodePoint(uc)) {
 311                         return malformed(src, sp, dst, dp, 4);
 312                     }
 313                     da[dp++] = Character.highSurrogate(uc);
 314                     da[dp++] = Character.lowSurrogate(uc);
 315                     sp += 4;
 316                 } else
 317                     return malformed(src, sp, dst, dp, 1);
 318             }
 319             return xflow(src, sp, sl, dst, dp, 0);
 320         }
 321 
 322         private CoderResult decodeBufferLoop(ByteBuffer src,
 323                                              CharBuffer dst)
 324         {
 325             int mark = src.position();
 326             int limit = src.limit();
 327             while (mark < limit) {
 328                 int b1 = src.get();
 329                 if (b1 >= 0) {
 330                     // 1 byte, 7 bits: 0xxxxxxx
 331                     if (dst.remaining() < 1)
 332                         return xflow(src, mark, 1); // overflow
 333                     dst.put((char) b1);
 334                     mark++;
 335                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 336                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 337                     if (limit - mark < 2|| dst.remaining() < 1)
 338                         return xflow(src, mark, 2);
 339                     int b2 = src.get();
 340                     if (isNotContinuation(b2))
 341                         return malformedForLength(src, mark, 1);
 342                      dst.put((char) (((b1 << 6) ^ b2)
 343                                     ^
 344                                     (((byte) 0xC0 << 6) ^
 345                                      ((byte) 0x80 << 0))));
 346                     mark += 2;
 347                 } else if ((b1 >> 4) == -2) {
 348                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 349                     int srcRemaining = limit - mark;
 350                     if (srcRemaining < 3 || dst.remaining() < 1) {
 351                         if (srcRemaining > 1 && isMalformed3_2(b1, src.get()))
 352                             return malformedForLength(src, mark, 1);
 353                         return xflow(src, mark, 3);
 354                     }
 355                     int b2 = src.get();
 356                     int b3 = src.get();
 357                     if (isMalformed3(b1, b2, b3))
 358                         return malformed(src, mark, 3);
 359                     char c = (char)
 360                         ((b1 << 12) ^
 361                          (b2 <<  6) ^
 362                          (b3 ^
 363                           (((byte) 0xE0 << 12) ^
 364                            ((byte) 0x80 <<  6) ^
 365                            ((byte) 0x80 <<  0))));
 366                     if (Character.isSurrogate(c))
 367                         return malformedForLength(src, mark, 3);
 368                     dst.put(c);
 369                     mark += 3;
 370                 } else if ((b1 >> 3) == -2) {
 371                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 372                     int srcRemaining = limit - mark;
 373                     if (srcRemaining < 4 || dst.remaining() < 2) {
 374                         b1 &= 0xff;
 375                         if (b1 > 0xf4 ||
 376                             srcRemaining > 1 && isMalformed4_2(b1, src.get() & 0xff))
 377                             return malformedForLength(src, mark, 1);
 378                         if (srcRemaining > 2 && isMalformed4_3(src.get()))
 379                             return malformedForLength(src, mark, 2);
 380                         return xflow(src, mark, 4);
 381                     }
 382                     int b2 = src.get();
 383                     int b3 = src.get();
 384                     int b4 = src.get();
 385                     int uc = ((b1 << 18) ^
 386                               (b2 << 12) ^
 387                               (b3 <<  6) ^
 388                               (b4 ^
 389                                (((byte) 0xF0 << 18) ^
 390                                 ((byte) 0x80 << 12) ^
 391                                 ((byte) 0x80 <<  6) ^
 392                                 ((byte) 0x80 <<  0))));
 393                     if (isMalformed4(b2, b3, b4) ||
 394                         // shortest form check
 395                         !Character.isSupplementaryCodePoint(uc)) {
 396                         return malformed(src, mark, 4);
 397                     }
 398                     dst.put(Character.highSurrogate(uc));
 399                     dst.put(Character.lowSurrogate(uc));
 400                     mark += 4;
 401                 } else {
 402                     return malformed(src, mark, 1);
 403                 }
 404             }
 405             return xflow(src, mark, 0);
 406         }
 407 
 408         protected CoderResult decodeLoop(ByteBuffer src,
 409                                          CharBuffer dst)
 410         {
 411             if (src.hasArray() && dst.hasArray())
 412                 return decodeArrayLoop(src, dst);
 413             else
 414                 return decodeBufferLoop(src, dst);
 415         }
 416 
 417         private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
 418         {
 419             if (bb == null)
 420                 bb = ByteBuffer.wrap(ba);
 421             bb.position(sp);
 422             return bb;
 423         }
 424 
 425         // returns -1 if there is/are malformed byte(s) and the
 426         // "action" for malformed input is not REPLACE.
 427         public int decode(byte[] sa, int sp, int len, char[] da) {
 428             final int sl = sp + len;
 429             int dp = 0;
 430             int dlASCII = Math.min(len, da.length);
 431             ByteBuffer bb = null;  // only necessary if malformed
 432 
 433             // ASCII only optimized loop
 434             while (dp < dlASCII && sa[sp] >= 0)
 435                 da[dp++] = (char) sa[sp++];
 436 
 437             while (sp < sl) {
 438                 int b1 = sa[sp++];
 439                 if (b1 >= 0) {
 440                     // 1 byte, 7 bits: 0xxxxxxx
 441                     da[dp++] = (char) b1;
 442                 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
 443                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 444                     if (sp < sl) {
 445                         int b2 = sa[sp++];
 446                         if (isNotContinuation(b2)) {
 447                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 448                                 return -1;
 449                             da[dp++] = replacement().charAt(0);
 450                             sp--;            // malformedN(bb, 2) always returns 1
 451                         } else {
 452                             da[dp++] = (char) (((b1 << 6) ^ b2)^
 453                                            (((byte) 0xC0 << 6) ^
 454                                             ((byte) 0x80 << 0)));
 455                         }
 456                         continue;
 457                     }
 458                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 459                         return -1;
 460                     da[dp++] = replacement().charAt(0);
 461                     return dp;
 462                 } else if ((b1 >> 4) == -2) {
 463                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 464                     if (sp + 1 < sl) {
 465                         int b2 = sa[sp++];
 466                         int b3 = sa[sp++];
 467                         if (isMalformed3(b1, b2, b3)) {
 468                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 469                                 return -1;
 470                             da[dp++] = replacement().charAt(0);
 471                             sp -= 3;
 472                             bb = getByteBuffer(bb, sa, sp);
 473                             sp += malformedN(bb, 3).length();
 474                         } else {
 475                             char c = (char)((b1 << 12) ^
 476                                               (b2 <<  6) ^
 477                                               (b3 ^
 478                                               (((byte) 0xE0 << 12) ^
 479                                               ((byte) 0x80 <<  6) ^
 480                                               ((byte) 0x80 <<  0))));
 481                             if (Character.isSurrogate(c)) {
 482                                 if (malformedInputAction() != CodingErrorAction.REPLACE)
 483                                     return -1;
 484                                 da[dp++] = replacement().charAt(0);
 485                             } else {
 486                                 da[dp++] = c;
 487                             }
 488                         }
 489                         continue;
 490                     }
 491                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 492                         return -1;
 493                     if (sp  < sl && isMalformed3_2(b1, sa[sp])) {
 494                         da[dp++] = replacement().charAt(0);
 495                         continue;
 496 
 497                     }
 498                     da[dp++] = replacement().charAt(0);
 499                     return dp;
 500                 } else if ((b1 >> 3) == -2) {
 501                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 502                     if (sp + 2 < sl) {
 503                         int b2 = sa[sp++];
 504                         int b3 = sa[sp++];
 505                         int b4 = sa[sp++];
 506                         int uc = ((b1 << 18) ^
 507                                   (b2 << 12) ^
 508                                   (b3 <<  6) ^
 509                                   (b4 ^
 510                                    (((byte) 0xF0 << 18) ^
 511                                    ((byte) 0x80 << 12) ^
 512                                    ((byte) 0x80 <<  6) ^
 513                                    ((byte) 0x80 <<  0))));
 514                         if (isMalformed4(b2, b3, b4) ||
 515                             // shortest form check
 516                             !Character.isSupplementaryCodePoint(uc)) {
 517                             if (malformedInputAction() != CodingErrorAction.REPLACE)
 518                                 return -1;
 519                             da[dp++] = replacement().charAt(0);
 520                             sp -= 4;
 521                             bb = getByteBuffer(bb, sa, sp);
 522                             sp += malformedN(bb, 4).length();
 523                         } else {
 524                             da[dp++] = Character.highSurrogate(uc);
 525                             da[dp++] = Character.lowSurrogate(uc);
 526                         }
 527                         continue;
 528                     }
 529                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 530                         return -1;
 531                     b1 &= 0xff;
 532                     if (b1 > 0xf4 ||
 533                         sp  < sl && isMalformed4_2(b1, sa[sp] & 0xff)) {
 534                         da[dp++] = replacement().charAt(0);
 535                         continue;
 536                     }
 537                     sp++;
 538                     if (sp  < sl && isMalformed4_3(sa[sp])) {
 539                         da[dp++] = replacement().charAt(0);
 540                         continue;
 541                     }
 542                     da[dp++] = replacement().charAt(0);
 543                     return dp;
 544                 } else {
 545                     if (malformedInputAction() != CodingErrorAction.REPLACE)
 546                         return -1;
 547                     da[dp++] = replacement().charAt(0);
 548                 }
 549             }
 550             return dp;
 551         }
 552 
 553         public boolean isASCIICompatible() {
 554             return true;
 555         }
 556     }
 557 
 558     private static final class Encoder extends CharsetEncoder
 559                                  implements ArrayEncoder {
 560 
 561         private Encoder(Charset cs) {
 562             super(cs, 1.1f, 3.0f);
 563         }
 564 
 565         public boolean canEncode(char c) {
 566             return !Character.isSurrogate(c);
 567         }
 568 
 569         public boolean isLegalReplacement(byte[] repl) {
 570             return ((repl.length == 1 && repl[0] >= 0) ||
 571                     super.isLegalReplacement(repl));
 572         }
 573 
 574         private static CoderResult overflow(CharBuffer src, int sp,
 575                                             ByteBuffer dst, int dp) {
 576             updatePositions(src, sp, dst, dp);
 577             return CoderResult.OVERFLOW;
 578         }
 579 
 580         private static CoderResult overflow(CharBuffer src, int mark) {
 581             src.position(mark);
 582             return CoderResult.OVERFLOW;
 583         }
 584 
 585         private Surrogate.Parser sgp;
 586         private CoderResult encodeArrayLoop(CharBuffer src,
 587                                             ByteBuffer dst)
 588         {
 589             char[] sa = src.array();
 590             int sp = src.arrayOffset() + src.position();
 591             int sl = src.arrayOffset() + src.limit();
 592 
 593             byte[] da = dst.array();
 594             int dp = dst.arrayOffset() + dst.position();
 595             int dl = dst.arrayOffset() + dst.limit();
 596             int dlASCII = dp + Math.min(sl - sp, dl - dp);
 597 
 598             // ASCII only loop
 599             while (dp < dlASCII && sa[sp] < '\u0080')
 600                 da[dp++] = (byte) sa[sp++];
 601             while (sp < sl) {
 602                 char c = sa[sp];
 603                 if (c < 0x80) {
 604                     // Have at most seven bits
 605                     if (dp >= dl)
 606                         return overflow(src, sp, dst, dp);
 607                     da[dp++] = (byte)c;
 608                 } else if (c < 0x800) {
 609                     // 2 bytes, 11 bits
 610                     if (dl - dp < 2)
 611                         return overflow(src, sp, dst, dp);
 612                     da[dp++] = (byte)(0xc0 | (c >> 6));
 613                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 614                 } else if (Character.isSurrogate(c)) {
 615                     // Have a surrogate pair
 616                     if (sgp == null)
 617                         sgp = new Surrogate.Parser();
 618                     int uc = sgp.parse(c, sa, sp, sl);
 619                     if (uc < 0) {
 620                         updatePositions(src, sp, dst, dp);
 621                         return sgp.error();
 622                     }
 623                     if (dl - dp < 4)
 624                         return overflow(src, sp, dst, dp);
 625                     da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 626                     da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 627                     da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 628                     da[dp++] = (byte)(0x80 | (uc & 0x3f));
 629                     sp++;  // 2 chars
 630                 } else {
 631                     // 3 bytes, 16 bits
 632                     if (dl - dp < 3)
 633                         return overflow(src, sp, dst, dp);
 634                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 635                     da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 636                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 637                 }
 638                 sp++;
 639             }
 640             updatePositions(src, sp, dst, dp);
 641             return CoderResult.UNDERFLOW;
 642         }
 643 
 644         private CoderResult encodeBufferLoop(CharBuffer src,
 645                                              ByteBuffer dst)
 646         {
 647             int mark = src.position();
 648             while (src.hasRemaining()) {
 649                 char c = src.get();
 650                 if (c < 0x80) {
 651                     // Have at most seven bits
 652                     if (!dst.hasRemaining())
 653                         return overflow(src, mark);
 654                     dst.put((byte)c);
 655                 } else if (c < 0x800) {
 656                     // 2 bytes, 11 bits
 657                     if (dst.remaining() < 2)
 658                         return overflow(src, mark);
 659                     dst.put((byte)(0xc0 | (c >> 6)));
 660                     dst.put((byte)(0x80 | (c & 0x3f)));
 661                 } else if (Character.isSurrogate(c)) {
 662                     // Have a surrogate pair
 663                     if (sgp == null)
 664                         sgp = new Surrogate.Parser();
 665                     int uc = sgp.parse(c, src);
 666                     if (uc < 0) {
 667                         src.position(mark);
 668                         return sgp.error();
 669                     }
 670                     if (dst.remaining() < 4)
 671                         return overflow(src, mark);
 672                     dst.put((byte)(0xf0 | ((uc >> 18))));
 673                     dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));
 674                     dst.put((byte)(0x80 | ((uc >>  6) & 0x3f)));
 675                     dst.put((byte)(0x80 | (uc & 0x3f)));
 676                     mark++;  // 2 chars
 677                 } else {
 678                     // 3 bytes, 16 bits
 679                     if (dst.remaining() < 3)
 680                         return overflow(src, mark);
 681                     dst.put((byte)(0xe0 | ((c >> 12))));
 682                     dst.put((byte)(0x80 | ((c >>  6) & 0x3f)));
 683                     dst.put((byte)(0x80 | (c & 0x3f)));
 684                 }
 685                 mark++;
 686             }
 687             src.position(mark);
 688             return CoderResult.UNDERFLOW;
 689         }
 690 
 691         protected final CoderResult encodeLoop(CharBuffer src,
 692                                                ByteBuffer dst)
 693         {
 694             if (src.hasArray() && dst.hasArray())
 695                 return encodeArrayLoop(src, dst);
 696             else
 697                 return encodeBufferLoop(src, dst);
 698         }
 699 
 700         private byte repl = (byte)'?';
 701         protected void implReplaceWith(byte[] newReplacement) {
 702             repl = newReplacement[0];
 703         }
 704 
 705         // returns -1 if there is malformed char(s) and the
 706         // "action" for malformed input is not REPLACE.
 707         public int encode(char[] sa, int sp, int len, byte[] da) {
 708             int sl = sp + len;
 709             int dp = 0;
 710             int dlASCII = dp + Math.min(len, da.length);
 711 
 712             // ASCII only optimized loop
 713             while (dp < dlASCII && sa[sp] < '\u0080')
 714                 da[dp++] = (byte) sa[sp++];
 715 
 716             while (sp < sl) {
 717                 char c = sa[sp++];
 718                 if (c < 0x80) {
 719                     // Have at most seven bits
 720                     da[dp++] = (byte)c;
 721                 } else if (c < 0x800) {
 722                     // 2 bytes, 11 bits
 723                     da[dp++] = (byte)(0xc0 | (c >> 6));
 724                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 725                 } else if (Character.isSurrogate(c)) {
 726                     if (sgp == null)
 727                         sgp = new Surrogate.Parser();
 728                     int uc = sgp.parse(c, sa, sp - 1, sl);
 729                     if (uc < 0) {
 730                         if (malformedInputAction() != CodingErrorAction.REPLACE)
 731                             return -1;
 732                         da[dp++] = repl;
 733                     } else {
 734                         da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 735                         da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 736                         da[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 737                         da[dp++] = (byte)(0x80 | (uc & 0x3f));
 738                         sp++;  // 2 chars
 739                     }
 740                 } else {
 741                     // 3 bytes, 16 bits
 742                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 743                     da[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
 744                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 745                 }
 746             }
 747             return dp;
 748         }
 749 
 750         public boolean isASCIICompatible() {
 751             return true;
 752         }
 753     }
 754 }