src/share/classes/sun/nio/cs/UTF_8.java

Print this page




   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 

  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.BufferOverflowException;
  31 import java.nio.BufferUnderflowException;
  32 import java.nio.charset.Charset;
  33 import java.nio.charset.CharsetDecoder;
  34 import java.nio.charset.CharsetEncoder;
  35 import java.nio.charset.CoderResult;
  36 import java.nio.charset.CharacterCodingException;
  37 import java.nio.charset.MalformedInputException;
  38 import java.nio.charset.UnmappableCharacterException;
  39 
  40 
  41 /*
  42  * # Bits   Bit pattern
  43  * 1    7   0xxxxxxx


  44  * 2   11   110xxxxx 10xxxxxx


  45  * 3   16   1110xxxx 10xxxxxx 10xxxxxx
  46  * 4   21   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  47  * 5   26   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  48  * 6   31   1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  49  *
  50  * UCS-2 uses 1-3, UTF-16 uses 1-4, UCS-4 uses 1-6



  51  */
  52 
  53 class UTF_8 extends Unicode
  54 {
  55 
  56     public UTF_8() {
  57         super("UTF-8", StandardCharsets.aliases_UTF_8);
  58     }
  59 
  60     public String historicalName() {
  61         return "UTF8";
  62     }
  63 
  64     public CharsetDecoder newDecoder() {
  65         return new Decoder(this);
  66     }
  67 
  68     public CharsetEncoder newEncoder() {
  69         return new Encoder(this);
  70     }
  71 





  72 
  73     private static class Decoder extends CharsetDecoder {
  74         private Decoder(Charset cs) {
  75             super(cs, 1.0f, 1.0f);
  76         }
  77 
  78         private boolean isContinuation(int b) {
  79             return ((b & 0xc0) == 0x80);
  80         }
  81 
  82         private final Surrogate.Generator sgg = new Surrogate.Generator();



  83 





































































































  84         private CoderResult decodeArrayLoop(ByteBuffer src,
  85                                             CharBuffer dst)
  86         {

  87             byte[] sa = src.array();
  88             int sp = src.arrayOffset() + src.position();
  89             int sl = src.arrayOffset() + src.limit();
  90             assert (sp <= sl);
  91             sp = (sp <= sl ? sp : sl);
  92             char[] da = dst.array();
  93             int dp = dst.arrayOffset() + dst.position();
  94             int dl = dst.arrayOffset() + dst.limit();
  95             assert (dp <= dl);
  96             dp = (dp <= dl ? dp : dl);
  97 
  98             try {



  99                 while (sp < sl) {
 100                     int b1 = sa[sp];
 101                     int b2, b3;
 102                     switch ((b1 >> 4) & 0x0f) {
 103 
 104                     case 0: case 1: case 2: case 3:
 105                     case 4: case 5: case 6: case 7:
 106                         // 1 byte, 7 bits: 0xxxxxxx
 107                         if (dl - dp < 1)
 108                             return CoderResult.OVERFLOW;
 109                         da[dp++] = (char)(b1 & 0x7f);
 110                         sp++;
 111                         continue;
 112 
 113                     case 12: case 13:
 114                         // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 115                         if (sl - sp < 2)
 116                             return CoderResult.UNDERFLOW;
 117                         if (dl - dp < 1)
 118                             return CoderResult.OVERFLOW;
 119                         if (!isContinuation(b2 = sa[sp + 1]))
 120                             return CoderResult.malformedForLength(1);
 121                         da[dp++] = ((char)(((b1 & 0x1f) << 6) |
 122                                            ((b2 & 0x3f) << 0)));
 123                         sp += 2;
 124                         continue;
 125 
 126                     case 14:
 127                         // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 128                         if (sl - sp < 3)
 129                             return CoderResult.UNDERFLOW;
 130                         if (dl - dp < 1)
 131                             return CoderResult.OVERFLOW;
 132                         if (!isContinuation(b2 = sa[sp + 1]))
 133                             return CoderResult.malformedForLength(1);
 134                         if (!isContinuation(b3 = sa[sp + 2]))
 135                             return CoderResult.malformedForLength(2);
 136                         da[dp++] = ((char)(((b1 & 0x0f) << 12) |
 137                                            ((b2 & 0x3f) << 06) |
 138                                            ((b3 & 0x3f) << 0)));
 139                         sp += 3;
 140                         continue;
 141 
 142                     case 15:
 143                         // 4, 5, or 6 bytes
 144 
 145                         int b4, b5, b6, uc, n;
 146                         switch (b1 & 0x0f) {
 147 
 148                         case 0: case 1: case 2: case 3:
 149                         case 4: case 5: case 6: case 7:
 150                             // 4 bytes, 21 bits
 151                             if (sl - sp < 4)
 152                                 return CoderResult.UNDERFLOW;
 153                             if (!isContinuation(b2 = sa[sp + 1]))
 154                                 return CoderResult.malformedForLength(1);
 155                             if (!isContinuation(b3 = sa[sp + 2]))
 156                                 return CoderResult.malformedForLength(2);
 157                             if (!isContinuation(b4 = sa[sp + 3]))
 158                                 return CoderResult.malformedForLength(3);
 159                             uc = (((b1 & 0x07) << 18) |
 160                                   ((b2 & 0x3f) << 12) |
 161                                   ((b3 & 0x3f) << 06) |
 162                                   ((b4 & 0x3f) << 00));
 163                             n = 4;
 164                             break;
 165 
 166                         case 8: case 9: case 10: case 11:
 167                             // 5 bytes, 26 bits
 168                             if (sl - sp < 5)
 169                                 return CoderResult.UNDERFLOW;
 170                             if (!isContinuation(b2 = sa[sp + 1]))
 171                                 return CoderResult.malformedForLength(1);
 172                             if (!isContinuation(b3 = sa[sp + 2]))
 173                                 return CoderResult.malformedForLength(2);
 174                             if (!isContinuation(b4 = sa[sp + 3]))
 175                                 return CoderResult.malformedForLength(3);
 176                             if (!isContinuation(b5 = sa[sp + 4]))
 177                                 return CoderResult.malformedForLength(4);
 178                             uc = (((b1 & 0x03) << 24) |
 179                                   ((b2 & 0x3f) << 18) |
 180                                   ((b3 & 0x3f) << 12) |
 181                                   ((b4 & 0x3f) << 06) |
 182                                   ((b5 & 0x3f) << 00));
 183                             n = 5;
 184                             break;
 185 
 186                         case 12: case 13:
 187                             // 6 bytes, 31 bits
 188                             if (sl - sp < 6)
 189                                 return CoderResult.UNDERFLOW;
 190                             if (!isContinuation(b2 = sa[sp + 1]))
 191                                 return CoderResult.malformedForLength(1);
 192                             if (!isContinuation(b3 = sa[sp + 2]))
 193                                 return CoderResult.malformedForLength(2);
 194                             if (!isContinuation(b4 = sa[sp + 3]))
 195                                 return CoderResult.malformedForLength(3);
 196                             if (!isContinuation(b5 = sa[sp + 4]))
 197                                 return CoderResult.malformedForLength(4);
 198                             if (!isContinuation(b6 = sa[sp + 5]))
 199                                 return CoderResult.malformedForLength(5);
 200                             uc = (((b1 & 0x01) << 30) |
 201                                   ((b2 & 0x3f) << 24) |
 202                                   ((b3 & 0x3f) << 18) |
 203                                   ((b4 & 0x3f) << 12) |
 204                                   ((b5 & 0x3f) << 06) |
 205                                   ((b6 & 0x3f)));
 206                             n = 6;
 207                             break;
 208 
 209                         default:
 210                             return CoderResult.malformedForLength(1);
 211 
 212                         }
 213 
 214                         int gn = sgg.generate(uc, n, da, dp, dl);
 215                         if (gn < 0)
 216                             return sgg.error();
 217                         dp += gn;
 218                         sp += n;
 219                         continue;
 220 
 221                     default:
 222                         return CoderResult.malformedForLength(1);
 223 
 224                     }
 225 
 226                 }
 227 
 228                 return CoderResult.UNDERFLOW;
 229             } finally {
 230                 src.position(sp - src.arrayOffset());
 231                 dst.position(dp - dst.arrayOffset());
 232             }
 233         }
 234 
 235         private CoderResult decodeBufferLoop(ByteBuffer src,
 236                                              CharBuffer dst)
 237         {
 238             int mark = src.position();
 239             try {
 240                 while (src.hasRemaining()) {
 241                     int b1 = src.get();
 242                     int b2, b3;
 243                     switch ((b1 >> 4) & 0x0f) {
 244 
 245                     case 0: case 1: case 2: case 3:
 246                     case 4: case 5: case 6: case 7:
 247                         // 1 byte, 7 bits: 0xxxxxxx
 248                         if (dst.remaining() < 1)
 249                             return CoderResult.OVERFLOW;
 250                         dst.put((char)b1);
 251                         mark++;
 252                         continue;
 253 
 254                     case 12: case 13:
 255                         // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 256                         if (src.remaining() < 1)
 257                             return CoderResult.UNDERFLOW;
 258                         if (dst.remaining() < 1)
 259                             return CoderResult.OVERFLOW;
 260                         if (!isContinuation(b2 = src.get()))
 261                             return CoderResult.malformedForLength(1);
 262                         dst.put((char)(((b1 & 0x1f) << 6) |
 263                                        ((b2 & 0x3f) << 0)));
 264                         mark += 2;
 265                         continue;
 266 
 267                     case 14:
 268                         // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 269                         if (src.remaining() < 2)
 270                             return CoderResult.UNDERFLOW;
 271                         if (dst.remaining() < 1)
 272                             return CoderResult.OVERFLOW;
 273                         if (!isContinuation(b2 = src.get()))
 274                             return CoderResult.malformedForLength(1);
 275                         if (!isContinuation(b3 = src.get()))
 276                             return CoderResult.malformedForLength(2);
 277                         dst.put((char)(((b1 & 0x0f) << 12) |
 278                                        ((b2 & 0x3f) << 06) |
 279                                        ((b3 & 0x3f) << 0)));
 280                         mark += 3;
 281                         continue;
 282 
 283                     case 15:
 284                         // 4, 5, or 6 bytes
 285 
 286                         int b4, b5, b6, uc, n;
 287                         switch (b1 & 0x0f) {
 288 
 289                         case 0: case 1: case 2: case 3:
 290                         case 4: case 5: case 6: case 7:
 291                             // 4 bytes, 21 bits
 292                             if (src.remaining() < 3)
 293                                 return CoderResult.UNDERFLOW;
 294                             if (!isContinuation(b2 = src.get()))
 295                                 return CoderResult.malformedForLength(1);
 296                             if (!isContinuation(b3 = src.get()))
 297                                 return CoderResult.malformedForLength(2);
 298                             if (!isContinuation(b4 = src.get()))
 299                                 return CoderResult.malformedForLength(3);
 300                             uc = (((b1 & 0x07) << 18) |
 301                                   ((b2 & 0x3f) << 12) |
 302                                   ((b3 & 0x3f) << 06) |
 303                                   ((b4 & 0x3f) << 00));
 304                             n = 4;
 305                             break;
 306 
 307                         case 8: case 9: case 10: case 11:
 308                             // 5 bytes, 26 bits
 309                             if (src.remaining() < 4)
 310                                 return CoderResult.UNDERFLOW;
 311                             if (!isContinuation(b2 = src.get()))
 312                                 return CoderResult.malformedForLength(1);
 313                             if (!isContinuation(b3 = src.get()))
 314                                 return CoderResult.malformedForLength(2);
 315                             if (!isContinuation(b4 = src.get()))
 316                                 return CoderResult.malformedForLength(3);
 317                             if (!isContinuation(b5 = src.get()))
 318                                 return CoderResult.malformedForLength(4);
 319                             uc = (((b1 & 0x03) << 24) |
 320                                   ((b2 & 0x3f) << 18) |
 321                                   ((b3 & 0x3f) << 12) |
 322                                   ((b4 & 0x3f) << 06) |
 323                                   ((b5 & 0x3f) << 00));
 324                             n = 5;
 325                             break;
 326 
 327                         case 12: case 13:
 328                             // 6 bytes, 31 bits
 329                             if (src.remaining() < 5)
 330                                 return CoderResult.UNDERFLOW;
 331                             if (!isContinuation(b2 = src.get()))
 332                                 return CoderResult.malformedForLength(1);
 333                             if (!isContinuation(b3 = src.get()))
 334                                 return CoderResult.malformedForLength(2);
 335                             if (!isContinuation(b4 = src.get()))
 336                                 return CoderResult.malformedForLength(3);
 337                             if (!isContinuation(b5 = src.get()))
 338                                 return CoderResult.malformedForLength(4);
 339                             if (!isContinuation(b6 = src.get()))
 340                                 return CoderResult.malformedForLength(5);
 341                             uc = (((b1 & 0x01) << 30) |
 342                                   ((b2 & 0x3f) << 24) |
 343                                   ((b3 & 0x3f) << 18) |
 344                                   ((b4 & 0x3f) << 12) |
 345                                   ((b5 & 0x3f) << 06) |
 346                                   ((b6 & 0x3f)));
 347                             n = 6;
 348                             break;
 349 
 350                         default:
 351                             return CoderResult.malformedForLength(1);
 352 
 353                         }
 354 
 355                         if (sgg.generate(uc, n, dst) < 0)
 356                             return sgg.error();
 357                         mark += n;
 358                         continue;
 359 
 360                     default:
 361                         return CoderResult.malformedForLength(1);
 362 
 363                     }
 364 
 365                 }
 366                 return CoderResult.UNDERFLOW;
 367             } finally {
 368                 src.position(mark);
 369             }
 370         }
 371 
 372         protected CoderResult decodeLoop(ByteBuffer src,
 373                                          CharBuffer dst)
 374         {
 375             if (src.hasArray() && dst.hasArray())
 376                 return decodeArrayLoop(src, dst);
 377             else
 378                 return decodeBufferLoop(src, dst);
 379         }
 380 
 381     }
 382 
 383 
 384     private static class Encoder extends CharsetEncoder {
 385 
 386         private Encoder(Charset cs) {
 387             super(cs, 1.1f, 4.0f);
 388         }
 389 
 390         public boolean canEncode(char c) {
 391             return !Surrogate.is(c);
 392         }
 393 
 394         private final Surrogate.Parser sgp = new Surrogate.Parser();



 395 












 396         private CoderResult encodeArrayLoop(CharBuffer src,
 397                                             ByteBuffer dst)
 398         {
 399             char[] sa = src.array();
 400             int sp = src.arrayOffset() + src.position();
 401             int sl = src.arrayOffset() + src.limit();
 402             assert (sp <= sl);
 403             sp = (sp <= sl ? sp : sl);
 404             byte[] da = dst.array();
 405             int dp = dst.arrayOffset() + dst.position();
 406             int dl = dst.arrayOffset() + dst.limit();
 407             assert (dp <= dl);
 408             dp = (dp <= dl ? dp : dl);
 409 
 410             try {


 411                 while (sp < sl) {
 412                     char c = sa[sp];
 413 
 414                     if (c < 0x80) {
 415                         // Have at most seven bits
 416                         if (dp >= dl)
 417                             return CoderResult.OVERFLOW;
 418                         da[dp++] = (byte)c;
 419                         sp++;
 420                         continue;
 421                     }
 422 
 423                     if (!Surrogate.is(c)) {
 424                         // 2 bytes, 11 bits
 425                         if (c < 0x800) {
 426                             if (dl - dp < 2)
 427                                 return CoderResult.OVERFLOW;
 428                             da[dp++] = (byte)(0xc0 | ((c >> 06)));
 429                             da[dp++] = (byte)(0x80 | ((c >> 00) & 0x3f));
 430                             sp++;
 431                             continue;
 432                         }
 433                         if (c <= '\uFFFF') {
 434                             // 3 bytes, 16 bits
 435                             if (dl - dp < 3)
 436                                 return CoderResult.OVERFLOW;
 437                             da[dp++] = (byte)(0xe0 | ((c >> 12)));
 438                             da[dp++] = (byte)(0x80 | ((c >> 06) & 0x3f));
 439                             da[dp++] = (byte)(0x80 | ((c >> 00) & 0x3f));
 440                             sp++;
 441                             continue;
 442                         }
 443                     }
 444 
 445                     // Have a surrogate pair
 446                     int uc = sgp.parse(c, sa, sp, sl);
 447                     if (uc < 0)



 448                         return sgp.error();
 449                     if (uc < 0x200000) {
 450                         if (dl - dp < 4)
 451                             return CoderResult.OVERFLOW;
 452                         da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 453                         da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 454                         da[dp++] = (byte)(0x80 | ((uc >> 06) & 0x3f));
 455                         da[dp++] = (byte)(0x80 | ((uc >> 00) & 0x3f));
 456                         sp += sgp.increment();
 457                         continue;






 458                     }
 459                     assert false;
 460 
 461                 }

 462                 return CoderResult.UNDERFLOW;
 463             } finally {
 464                 src.position(sp - src.arrayOffset());
 465                 dst.position(dp - dst.arrayOffset());
 466             }
 467         }
 468 
 469         private CoderResult encodeBufferLoop(CharBuffer src,
 470                                              ByteBuffer dst)
 471         {
 472             int mark = src.position();
 473             try {
 474                 while (src.hasRemaining()) {
 475                     char c = src.get();
 476 
 477                     if (c < 0x80) {
 478                         // Have at most seven bits
 479                         if (!dst.hasRemaining())
 480                             return CoderResult.OVERFLOW;
 481                         dst.put((byte)c);
 482                         mark++;
 483                         continue;
 484                     }
 485 
 486                     if (!Surrogate.is(c)) {
 487                         if (c < 0x800) {
 488                             // 2 bytes, 11 bits
 489                             if (dst.remaining() < 2)
 490                                 return CoderResult.OVERFLOW;
 491                             dst.put((byte)(0xc0 | ((c >> 06))));
 492                             dst.put((byte)(0x80 | ((c >> 00) & 0x3f)));
 493                             mark++;
 494                             continue;
 495                         }
 496                         if (c <= '\uFFFF') {
 497                             // 3 bytes, 16 bits
 498                             if (dst.remaining() < 3)
 499                                 return CoderResult.OVERFLOW;
 500                             dst.put((byte)(0xe0 | ((c >> 12))));
 501                             dst.put((byte)(0x80 | ((c >> 06) & 0x3f)));
 502                             dst.put((byte)(0x80 | ((c >> 00) & 0x3f)));
 503                             mark++;
 504                             continue;
 505                         }
 506                     }
 507 
 508                     // Have a surrogate pair
 509                     int uc = sgp.parse(c, src);
 510                     if (uc < 0)



 511                         return sgp.error();
 512                     if (uc < 0x200000) {
 513                         if (dst.remaining() < 4)
 514                             return CoderResult.OVERFLOW;
 515                         dst.put((byte)(0xf0 | ((uc >> 18))));
 516                         dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));
 517                         dst.put((byte)(0x80 | ((uc >> 06) & 0x3f)));
 518                         dst.put((byte)(0x80 | ((uc >> 00) & 0x3f)));
 519                         mark += sgp.increment();
 520                         continue;






 521                     }
 522                     assert false;
 523 
 524                 }
 525                 return CoderResult.UNDERFLOW;
 526             } finally {
 527                 src.position(mark);

 528             }
 529         }
 530 
 531         protected final CoderResult encodeLoop(CharBuffer src,
 532                                                ByteBuffer dst)
 533         {
 534             if (src.hasArray() && dst.hasArray())
 535                 return encodeArrayLoop(src, dst);
 536             else
 537                 return encodeBufferLoop(src, dst);
 538         }
 539 
 540     }
 541 
 542 }


   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 
  28 import java.nio.Buffer;
  29 import java.nio.ByteBuffer;
  30 import java.nio.CharBuffer;


  31 import java.nio.charset.Charset;
  32 import java.nio.charset.CharsetDecoder;
  33 import java.nio.charset.CharsetEncoder;
  34 import java.nio.charset.CoderResult;



  35 
  36 /*  Legal UTF-8 Byte Sequences
  37  *
  38  * #    Code Points      Bits   Bit/Byte pattern
  39  * 1                     7      0xxxxxxx
  40  *      U+0000..U+007F          00..7F
  41  *
  42  * 2                     11     110xxxxx    10xxxxxx
  43  *      U+0080..U+07FF          C2..DF      80..BF
  44  *
  45  * 3                     16     1110xxxx    10xxxxxx    10xxxxxx
  46  *      U+0800..U+0FFF          E0          A0..BF      80..BF
  47  *      U+1000..U+FFFF          E1..EF      80..BF      80..BF

  48  *
  49  * 4                     21     11110xxx    10xxxxxx    10xxxxxx    10xxxxxx
  50  *     U+10000..U+3FFFF         F0          90..BF      80..BF      80..BF
  51  *     U+40000..U+FFFFF         F1..F3      80..BF      80..BF      80..BF
  52  *    U+100000..U10FFFF         F4          80..8F      80..BF      80..BF
  53  */
  54 
  55 class UTF_8 extends Unicode
  56 {

  57     public UTF_8() {
  58         super("UTF-8", StandardCharsets.aliases_UTF_8);
  59     }
  60 
  61     public String historicalName() {
  62         return "UTF8";
  63     }
  64 
  65     public CharsetDecoder newDecoder() {
  66         return new Decoder(this);
  67     }
  68 
  69     public CharsetEncoder newEncoder() {
  70         return new Encoder(this);
  71     }
  72 
  73     static final void updatePositions(Buffer src, int sp,
  74                                       Buffer dst, int dp) {
  75         src.position(sp - src.arrayOffset());
  76         dst.position(dp - dst.arrayOffset());
  77     }
  78 
  79     private static class Decoder extends CharsetDecoder {
  80         private Decoder(Charset cs) {
  81             super(cs, 1.0f, 1.0f);
  82         }
  83 
  84         private static boolean isNotContinuation(int b) {
  85             return (b & 0xc0) != 0x80;
  86         }
  87 
  88         //  [C2..DF] [80..BF]
  89         private static boolean isMalformed2(int b1, int b2) {
  90             return (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80;
  91         }
  92 
  93         //  [E0]     [A0..BF] [80..BF]
  94         //  [E1..EF] [80..BF] [80..BF]
  95         private static boolean isMalformed3(int b1, int b2, int b3) {
  96             return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
  97                    (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
  98         }
  99 
 100         //  [F0]     [90..BF] [80..BF] [80..BF]
 101         //  [F1..F3] [80..BF] [80..BF] [80..BF]
 102         //  [F4]     [80..8F] [80..BF] [80..BF]
 103         //  only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
 104         //  will be checked by Surrogate.neededFor(uc)
 105         private static boolean isMalformed4(int b2, int b3, int b4) {
 106             return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
 107                    (b4 & 0xc0) != 0x80;
 108         }
 109 
 110         private static CoderResult lookupN(ByteBuffer src, int n)
 111         {
 112             for (int i = 1; i < n; i++) {
 113                if (isNotContinuation(src.get()))
 114                    return CoderResult.malformedForLength(i);
 115             }
 116             return CoderResult.malformedForLength(n);
 117         }
 118 
 119         private static CoderResult malformedN(ByteBuffer src, int nb) {
 120             switch (nb) {
 121             case 1:
 122                 int b1 = src.get();
 123                 if ((b1 >> 2) == -2) {
 124                     // 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 125                     if (src.remaining() < 4)
 126                         return CoderResult.UNDERFLOW;
 127                     return lookupN(src, 5);
 128                 }
 129                 if ((b1 >> 1) == -2) {
 130                     // 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 131                     if (src.remaining() < 5)
 132                         return CoderResult.UNDERFLOW;
 133                     return lookupN(src, 6);
 134                 }
 135                 return CoderResult.malformedForLength(1);
 136             case 2:                    // always 1
 137                 return CoderResult.malformedForLength(1);
 138             case 3:
 139                 b1 = src.get();
 140                 int b2 = src.get();    // no need to lookup b3
 141                 return CoderResult.malformedForLength(
 142                     ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
 143                      isNotContinuation(b2))?1:2);
 144             case 4:  // we don't care the speed here
 145                 b1 = src.get() & 0xff;
 146                 b2 = src.get() & 0xff;
 147                 if (b1 > 0xf4 ||
 148                     (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
 149                     (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
 150                     isNotContinuation(b2))
 151                     return CoderResult.malformedForLength(1);
 152                 if (isNotContinuation(src.get()))
 153                     return CoderResult.malformedForLength(2);
 154                 return CoderResult.malformedForLength(3);
 155             default:
 156                 assert false;
 157                 return null;
 158             }
 159         }
 160 
 161         private static CoderResult malformed(ByteBuffer src, int sp,
 162                                              CharBuffer dst, int dp,
 163                                              int nb)
 164         {
 165             src.position(sp - src.arrayOffset());
 166             CoderResult cr = malformedN(src, nb);
 167             updatePositions(src, sp, dst, dp);
 168             return cr;
 169         }
 170 
 171         private static CoderResult malformed(ByteBuffer src,
 172                                              int mark, int nb)
 173         {
 174             src.position(mark);
 175             CoderResult cr = malformedN(src, nb);
 176             src.position(mark);
 177             return cr;
 178         }
 179 
 180         private static CoderResult xflow(Buffer src, int sp, int sl,
 181                                          Buffer dst, int dp, int nb) {
 182             updatePositions(src, sp, dst, dp);
 183             return (nb == 0 || sl - sp < nb)
 184                    ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW;
 185         }
 186 
 187         private static CoderResult xflow(Buffer src, int mark, int nb) {
 188             CoderResult cr = (nb == 0 || src.remaining() < (nb - 1))
 189                              ?CoderResult.UNDERFLOW:CoderResult.OVERFLOW;
 190             src.position(mark);
 191             return cr;
 192         }
 193 
 194         private CoderResult decodeArrayLoop(ByteBuffer src,
 195                                             CharBuffer dst)
 196         {
 197             // This method is optimized for ASCII input.
 198             byte[] sa = src.array();
 199             int sp = src.arrayOffset() + src.position();
 200             int sl = src.arrayOffset() + src.limit();
 201 

 202             char[] da = dst.array();
 203             int dp = dst.arrayOffset() + dst.position();
 204             int dl = dst.arrayOffset() + dst.limit();
 205             int dlASCII = dp + Math.min(sl - sp, dl - dp);

 206 
 207             // ASCII only loop          
 208             while (dp < dlASCII && sa[sp] >= 0)
 209                 da[dp++] = (char)sa[sp++];
 210 
 211             while (sp < sl) {
 212                 int b1 = sa[sp];
 213                 if (b1  >= 0) {




 214                     // 1 byte, 7 bits: 0xxxxxxx
 215                     if (dp >= dl)
 216                         return xflow(src, sp, sl, dst, dp, 1);
 217                     da[dp++] = (char)b1;
 218                     sp++;
 219                 } else if ((b1 >> 5) == -2) {


 220                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 221                     if (sl - sp < 2 || dp >= dl)
 222                         return xflow(src, sp, sl, dst, dp, 2);
 223                     int b2 = sa[sp + 1];
 224                     if (isMalformed2(b1, b2))
 225                         return malformed(src, sp, dst, dp, 2);
 226                     da[dp++] = (char) (((b1 << 6) ^ b2) ^ 0x0f80);


 227                     sp += 2;
 228                 } else if ((b1 >> 4) == -2) {


 229                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 230                     if (sl - sp < 3 || dp >= dl)
 231                         return xflow(src, sp, sl, dst, dp, 3);
 232                     int b2 = sa[sp + 1];
 233                     int b3 = sa[sp + 2];
 234                     if (isMalformed3(b1, b2, b3))
 235                         return malformed(src, sp, dst, dp, 3);
 236                     da[dp++] = (char) (((b1 << 12) ^ (b2 << 6) ^ b3) ^ 0x1f80);




 237                     sp += 3;
 238                 } else if ((b1 >> 3) == -2) {
 239                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 240                     if (sl - sp < 4 || dl - dp < 2)
 241                         return xflow(src, sp, sl, dst, dp, 4);
 242                     int b2 = sa[sp + 1];
 243                     int b3 = sa[sp + 2];
 244                     int b4 = sa[sp + 3];
 245                     int uc = ((b1 & 0x07) << 18) |












 246                              ((b2 & 0x3f) << 12) |
 247                              ((b3 & 0x3f) << 06) |
 248                              (b4 & 0x3f);
 249                     if (isMalformed4(b2, b3, b4) ||
 250                         !Surrogate.neededFor(uc)) {
 251                         return malformed(src, sp, dst, dp, 4);














































 252                     }
 253                     da[dp++] = Surrogate.high(uc);
 254                     da[dp++] = Surrogate.low(uc);
 255                     sp += 4;
 256                 } else 
 257                     return malformed(src, sp, dst, dp, 1);






 258             }
 259             return xflow(src, sp, sl, dst, dp, 0);
 260         }
 261 







 262         private CoderResult decodeBufferLoop(ByteBuffer src,
 263                                              CharBuffer dst)
 264         {
 265             int mark = src.position();
 266             int limit = src.limit();
 267             while (mark < limit) {
 268                 int b1 = src.get();
 269                 if (b1 >= 0) {




 270                     // 1 byte, 7 bits: 0xxxxxxx
 271                     if (dst.remaining() < 1)
 272                         return xflow(src, mark, 1);  //overflow
 273                     dst.put((char)b1);
 274                     mark++;
 275                 } else if ((b1 >> 5) == -2) {


 276                     // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
 277                     if (limit - mark < 2|| dst.remaining() < 1)
 278                         return xflow(src, mark, 2);
 279                     int b2 = src.get();
 280                     if (isMalformed2(b1, b2))
 281                         return malformed(src, mark, 2);
 282                     dst.put((char) (((b1 << 6) ^ b2) ^ 0x0f80));


 283                     mark += 2;
 284                 } else if ((b1 >> 4) == -2) {


 285                     // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
 286                     if (limit - mark < 3 || dst.remaining() < 1)
 287                         return xflow(src, mark, 3);
 288                     int b2 = src.get();
 289                     int b3 = src.get();
 290                     if (isMalformed3(b1, b2, b3))
 291                         return malformed(src, mark, 3);
 292                     dst.put((char) (((b1 << 12) ^ (b2 << 6) ^ b3) ^ 0x1f80));




 293                     mark += 3;
 294                 } else if ((b1 >> 3) == -2) {
 295                     // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 296                     if (limit - mark < 4 || dst.remaining() < 2)
 297                         return xflow(src, mark, 4);
 298                     int b2 = src.get();
 299                     int b3 = src.get();
 300                     int b4 = src.get();
 301                     int uc = ((b1 & 0x07) << 18) |












 302                              ((b2 & 0x3f) << 12) |
 303                              ((b3 & 0x3f) << 06) |
 304                              (b4 & 0x3f);
 305                     if (isMalformed4(b2, b3, b4) ||
 306                         !Surrogate.neededFor(uc)) { // shortest form check
 307                         return malformed(src, mark, 4);














































 308                     }
 309                     dst.put(Surrogate.high(uc));
 310                     dst.put(Surrogate.low(uc));
 311                     mark += 4;
 312                 } else {
 313                     return malformed(src, mark, 1);




 314                 }

 315             }
 316             return xflow(src, mark, 0);


 317         }

 318 
 319         protected CoderResult decodeLoop(ByteBuffer src,
 320                                          CharBuffer dst)
 321         {
 322             if (src.hasArray() && dst.hasArray())
 323                 return decodeArrayLoop(src, dst);
 324             else
 325                 return decodeBufferLoop(src, dst);
 326         }

 327     }
 328 

 329     private static class Encoder extends CharsetEncoder {
 330 
 331         private Encoder(Charset cs) {
 332             super(cs, 1.1f, 4.0f);
 333         }
 334 
 335         public boolean canEncode(char c) {
 336             return !Surrogate.is(c);
 337         }
 338 
 339         public boolean isLegalReplacement(byte[] repl) {
 340             return ((repl.length == 1 && repl[0] >= 0) ||
 341                     super.isLegalReplacement(repl));
 342         }
 343 
 344         private static CoderResult overflow(CharBuffer src, int sp,
 345                                             ByteBuffer dst, int dp) {
 346             updatePositions(src, sp, dst, dp);
 347             return CoderResult.OVERFLOW;
 348         }
 349 
 350         private static CoderResult overflow(CharBuffer src, int mark) {
 351             src.position(mark);
 352             return CoderResult.OVERFLOW;
 353         }
 354 
 355         private Surrogate.Parser sgp;
 356         private CoderResult encodeArrayLoop(CharBuffer src,
 357                                             ByteBuffer dst)
 358         {
 359             char[] sa = src.array();
 360             int sp = src.arrayOffset() + src.position();
 361             int sl = src.arrayOffset() + src.limit();
 362 

 363             byte[] da = dst.array();
 364             int dp = dst.arrayOffset() + dst.position();
 365             int dl = dst.arrayOffset() + dst.limit();
 366             int dlASCII = dp + Math.min(sl - sp, dl - dp);

 367 
 368             //ASCII only loop
 369             while (dp < dlASCII && sa[sp] < '\u0080')
 370                 da[dp++] = (byte) sa[sp++];
 371             while (sp < sl) {
 372                 int c = sa[sp];

 373                 if (c < 0x80) {
 374                     // Have at most seven bits
 375                     if (dp >= dl)
 376                         return overflow(src, sp, dst, dp);
 377                     da[dp++] = (byte)c;
 378                 } else if (c < 0x800) {




 379                     // 2 bytes, 11 bits

 380                     if (dl - dp < 2)
 381                         return overflow(src, sp, dst, dp);
 382                     da[dp++] = (byte)(0xc0 | ((c >> 06)));
 383                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 384                 } else if (Surrogate.is(c)) {














 385                     // Have a surrogate pair
 386                     if (sgp == null)
 387                         sgp = new Surrogate.Parser();
 388                     int uc = sgp.parse((char)c, sa, sp, sl);
 389                     if (uc < 0) {
 390                         updatePositions(src, sp, dst, dp);
 391                         return sgp.error();
 392                     }
 393                     if (dl - dp < 4)
 394                         return overflow(src, sp, dst, dp);
 395                     da[dp++] = (byte)(0xf0 | ((uc >> 18)));
 396                     da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 397                     da[dp++] = (byte)(0x80 | ((uc >> 06) & 0x3f));
 398                     da[dp++] = (byte)(0x80 | (uc & 0x3f));
 399                     sp++;  // 2 chars
 400                 } else {
 401                     // 3 bytes, 16 bits
 402                     if (dl - dp < 3)
 403                         return overflow(src, sp, dst, dp);
 404                     da[dp++] = (byte)(0xe0 | ((c >> 12)));
 405                     da[dp++] = (byte)(0x80 | ((c >> 06) & 0x3f));
 406                     da[dp++] = (byte)(0x80 | (c & 0x3f));
 407                 }
 408                 sp++;

 409             }
 410             updatePositions(src, sp, dst, dp);
 411             return CoderResult.UNDERFLOW;



 412         }

 413 
 414         private CoderResult encodeBufferLoop(CharBuffer src,
 415                                              ByteBuffer dst)
 416         {
 417             int mark = src.position();

 418             while (src.hasRemaining()) {
 419                 int c = src.get();

 420                 if (c < 0x80) {
 421                     // Have at most seven bits
 422                     if (!dst.hasRemaining())
 423                         return overflow(src, mark);
 424                     dst.put((byte)c);
 425                 } else if (c < 0x800) {





 426                     // 2 bytes, 11 bits
 427                     if (dst.remaining() < 2)
 428                         return overflow(src, mark);
 429                     dst.put((byte)(0xc0 | ((c >> 06))));
 430                     dst.put((byte)(0x80 | (c & 0x3f)));
 431                 } else if (Surrogate.is(c)) {














 432                     // Have a surrogate pair
 433                     if (sgp == null)
 434                         sgp = new Surrogate.Parser();
 435                     int uc = sgp.parse((char)c, src);
 436                     if (uc < 0) {
 437                         src.position(mark);
 438                         return sgp.error();
 439                     }
 440                     if (dst.remaining() < 4)
 441                         return overflow(src, mark);
 442                     dst.put((byte)(0xf0 | ((uc >> 18))));
 443                     dst.put((byte)(0x80 | ((uc >> 12) & 0x3f)));
 444                     dst.put((byte)(0x80 | ((uc >> 06) & 0x3f)));
 445                     dst.put((byte)(0x80 | (uc & 0x3f)));
 446                     mark++;  //2 chars
 447                 } else {
 448                     // 3 bytes, 16 bits
 449                     if (dst.remaining() < 3)
 450                         return overflow(src, mark);
 451                     dst.put((byte)(0xe0 | ((c >> 12))));
 452                     dst.put((byte)(0x80 | ((c >> 06) & 0x3f)));
 453                     dst.put((byte)(0x80 | (c & 0x3f)));
 454                 }
 455                 mark++;

 456             }


 457             src.position(mark);
 458             return CoderResult.UNDERFLOW;
 459         }

 460 
 461         protected final CoderResult encodeLoop(CharBuffer src,
 462                                                ByteBuffer dst)
 463         {
 464             if (src.hasArray() && dst.hasArray())
 465                 return encodeArrayLoop(src, dst);
 466             else
 467                 return encodeBufferLoop(src, dst);
 468         }

 469     }

 470 }