1 /*
   2  * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 4486841 7040220 7096080 8039751
  27  * @summary Test UTF-8 charset
  28  */
  29 
  30 import java.nio.charset.*;
  31 import java.nio.*;
  32 import java.util.*;
  33 
  34 public class TestUTF8 {
  35     static char[] decode(byte[] bb, String csn, boolean testDirect)
  36         throws Exception {
  37         CharsetDecoder dec = Charset.forName(csn).newDecoder();
  38         ByteBuffer bbf;
  39         CharBuffer cbf;
  40         if (testDirect) {
  41             bbf = ByteBuffer.allocateDirect(bb.length);
  42             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  43             bbf.put(bb).flip();
  44         } else {
  45             bbf = ByteBuffer.wrap(bb);
  46             cbf = CharBuffer.allocate(bb.length);
  47         }
  48         CoderResult cr = dec.decode(bbf, cbf, true);
  49         if (cr != CoderResult.UNDERFLOW)
  50             throw new RuntimeException("Decoding err: " + csn);
  51         char[] cc = new char[cbf.position()];
  52         cbf.flip(); cbf.get(cc);
  53         return cc;
  54 
  55     }
  56 
  57     static CoderResult decodeCR(byte[] bb, String csn, boolean testDirect)
  58         throws Exception {
  59         CharsetDecoder dec = Charset.forName(csn).newDecoder();
  60         ByteBuffer bbf;
  61         CharBuffer cbf;
  62         if (testDirect) {
  63             bbf = ByteBuffer.allocateDirect(bb.length);
  64             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  65             bbf.put(bb).flip();
  66         } else {
  67             bbf = ByteBuffer.wrap(bb);
  68             cbf = CharBuffer.allocate(bb.length);
  69         }
  70         return dec.decode(bbf, cbf, true);
  71     }
  72 
  73     // copy/paste of the StringCoding.decode()
  74     static char[] decode(Charset cs, byte[] ba, int off, int len) {
  75         CharsetDecoder cd = cs.newDecoder();
  76         int en = (int)(len * cd.maxCharsPerByte());
  77         char[] ca = new char[en];
  78         if (len == 0)
  79             return ca;
  80         cd.onMalformedInput(CodingErrorAction.REPLACE)
  81           .onUnmappableCharacter(CodingErrorAction.REPLACE)
  82           .reset();
  83 
  84         ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
  85         CharBuffer cb = CharBuffer.wrap(ca);
  86         try {
  87             CoderResult cr = cd.decode(bb, cb, true);
  88             if (!cr.isUnderflow())
  89                 cr.throwException();
  90             cr = cd.flush(cb);
  91             if (!cr.isUnderflow())
  92                 cr.throwException();
  93         } catch (CharacterCodingException x) {
  94             throw new Error(x);
  95         }
  96         return Arrays.copyOf(ca, cb.position());
  97     }
  98 
  99     static byte[] encode(char[] cc, String csn, boolean testDirect)
 100         throws Exception {
 101         ByteBuffer bbf;
 102         CharBuffer cbf;
 103         CharsetEncoder enc = Charset.forName(csn).newEncoder();
 104         if (testDirect) {
 105             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 106             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 107             cbf.put(cc).flip();
 108         } else {
 109             bbf = ByteBuffer.allocate(cc.length * 4);
 110             cbf = CharBuffer.wrap(cc);
 111         }
 112 
 113         CoderResult cr = enc.encode(cbf, bbf, true);
 114         if (cr != CoderResult.UNDERFLOW)
 115             throw new RuntimeException("Encoding err: " + csn);
 116         byte[] bb = new byte[bbf.position()];
 117         bbf.flip(); bbf.get(bb);
 118         return bb;
 119     }
 120 
 121     static CoderResult encodeCR(char[] cc, String csn, boolean testDirect)
 122         throws Exception {
 123         ByteBuffer bbf;
 124         CharBuffer cbf;
 125         CharsetEncoder enc = Charset.forName(csn).newEncoder();
 126         if (testDirect) {
 127             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 128             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 129             cbf.put(cc).flip();
 130         } else {
 131             bbf = ByteBuffer.allocate(cc.length * 4);
 132             cbf = CharBuffer.wrap(cc);
 133         }
 134         return enc.encode(cbf, bbf, true);
 135     }
 136 
 137     static char[] getUTFChars() {
 138         char[] cc = new char[0x10000 - 0xe000 + 0xd800 + //bmp
 139                              (0x110000 - 0x10000) * 2];    //supp
 140         int pos = 0;
 141         int i = 0;
 142         for (i = 0; i < 0xd800; i++)
 143             cc[pos++] = (char)i;
 144         for (i = 0xe000; i < 0x10000; i++)
 145             cc[pos++] = (char)i;
 146         for (i = 0x10000; i < 0x110000; i++) {
 147             pos += Character.toChars(i, cc, pos);
 148         }
 149         return cc;
 150     }
 151 
 152     static int to3ByteUTF8(char c, byte[] bb, int pos) {
 153         bb[pos++] = (byte)(0xe0 | ((c >> 12)));
 154         bb[pos++] = (byte)(0x80 | ((c >> 06) & 0x3f));
 155         bb[pos++] = (byte)(0x80 | ((c >> 00) & 0x3f));
 156         return 3;
 157     }
 158 
 159     static int to4ByteUTF8(int uc, byte[] bb, int pos) {
 160         bb[pos++] = (byte)(0xf0 | ((uc >> 18)));
 161         bb[pos++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 162         bb[pos++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 163         bb[pos++] = (byte)(0x80 | (uc & 0x3f));
 164         return 4;
 165     }
 166 
 167     static void checkRoundtrip(String csn) throws Exception {
 168         System.out.printf("    Check roundtrip <%s>...", csn);
 169         char[] cc = getUTFChars();
 170         byte[] bb = encode(cc, csn, false);
 171         char[] ccO = decode(bb, csn, false);
 172 
 173         if (!Arrays.equals(cc, ccO))
 174             System.out.printf("    non-direct failed");
 175         bb = encode(cc, csn, true);
 176         ccO = decode(bb, csn, true);
 177         if (!Arrays.equals(cc, ccO)) {
 178             System.out.print("    (direct) failed");
 179         }
 180         // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
 181         if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
 182             System.out.printf("    String.getBytes() failed");
 183         }
 184         if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
 185             System.out.printf("    String.toCharArray() failed");
 186         }
 187         System.out.println();
 188     }
 189 
 190     static void check4ByteSurrs(String csn) throws Exception {
 191         System.out.printf("    Check 4-byte Surrogates <%s>...%n", csn);
 192         byte[] bb = new byte[(0x110000 - 0x10000) * 4];
 193         char[] cc = new char[(0x110000 - 0x10000) * 2];
 194         int bpos = 0;
 195         int cpos = 0;
 196         for (int i = 0x10000; i < 0x110000; i++) {
 197             Character.toChars(i, cc, cpos);
 198             bpos += to4ByteUTF8(i, bb, bpos);
 199             cpos += 2;
 200         }
 201         checkSurrs(csn, bb, cc);
 202     }
 203 
 204 
 205     static void checkSurrs(String csn, byte[] bb, char[] cc)
 206         throws Exception
 207     {
 208         char[] ccO = decode(bb, csn, false);
 209         if (!Arrays.equals(cc, ccO)) {
 210             System.out.printf("    decoding failed%n");
 211         }
 212         ccO = decode(bb, csn, true);
 213         if (!Arrays.equals(cc, ccO)) {
 214             System.out.printf("    decoding(direct) failed%n");
 215         }
 216         if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
 217             System.out.printf("    String.toCharArray() failed");
 218         }
 219         if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
 220             System.out.printf("    String.getBytes() failed");
 221         }
 222     }
 223 
 224     static void check6ByteSurrs(String csn) throws Exception {
 225         System.out.printf("    Check 6-byte Surrogates <%s>...%n", csn);
 226         byte[] bb = new byte[(0x110000 - 0x10000) * 6];
 227         char[] cc = new char[(0x110000 - 0x10000) * 2];
 228         int bpos = 0;
 229         int cpos = 0;
 230         for (int i = 0x10000; i < 0x110000; i++) {
 231             Character.toChars(i, cc, cpos);
 232             bpos += to3ByteUTF8(cc[cpos], bb, bpos);
 233             bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
 234             cpos += 2;
 235         }
 236         checkSurrs(csn, bb, cc);
 237     }
 238 
 239 
 240     static void compare(String csn1, String csn2) throws Exception {
 241         System.out.printf("    Diff <%s> <%s>...%n", csn1, csn2);
 242         char[] cc = getUTFChars();
 243 
 244         byte[] bb1 = encode(cc, csn1, false);
 245         byte[] bb2 = encode(cc, csn2, false);
 246         if (!Arrays.equals(bb1, bb2))
 247             System.out.printf("        encoding failed%n");
 248         char[] cc1 = decode(bb1, csn1, false);
 249         char[] cc2 = decode(bb1, csn2, false);
 250         if (!Arrays.equals(cc1, cc2)) {
 251             System.out.printf("        decoding failed%n");
 252         }
 253 
 254         bb1 = encode(cc, csn1, true);
 255         bb2 = encode(cc, csn2, true);
 256         if (!Arrays.equals(bb1, bb2))
 257             System.out.printf("        encoding (direct) failed%n");
 258         cc1 = decode(bb1, csn1, true);
 259         cc2 = decode(bb1, csn2, true);
 260         if (!Arrays.equals(cc1, cc2)) {
 261             System.out.printf("        decoding (direct) failed%n");
 262         }
 263     }
 264 
 265     // The first byte is the length of malformed bytes
 266     static byte[][] malformed = {
 267         // One-byte sequences:
 268         {1, (byte)0xFF },
 269         {1, (byte)0xC0 },
 270         {1, (byte)0x80 },
 271 
 272         {1, (byte)0xFF, (byte)0xFF}, // all ones
 273         {1, (byte)0xA0, (byte)0x80}, // 101x first byte first nibble
 274 
 275         // Two-byte sequences:
 276         {1, (byte)0xC0, (byte)0x80}, // invalid first byte
 277         {1, (byte)0xC1, (byte)0xBF}, // invalid first byte
 278         {1, (byte)0xC2, (byte)0x00}, // invalid second byte
 279         {1, (byte)0xC2, (byte)0xC0}, // invalid second byte
 280         {1, (byte)0xD0, (byte)0x00}, // invalid second byte
 281         {1, (byte)0xD0, (byte)0xC0}, // invalid second byte
 282         {1, (byte)0xDF, (byte)0x00}, // invalid second byte
 283         {1, (byte)0xDF, (byte)0xC0}, // invalid second byte
 284 
 285         // Three-byte sequences
 286         {1, (byte)0xE0, (byte)0x80, (byte)0x80},  // 111x first byte first nibble
 287         {1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 288         {1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 289         {1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 290 
 291         {1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
 292         {2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
 293         {2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
 294         {2, (byte)0xE1, (byte)0x80, (byte)0x42},  // invalid third byte
 295 
 296         {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
 297         {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
 298         {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
 299         {1, (byte)0xE0, (byte)0x41,},             // invalid second byte & 2 bytes
 300         {1, (byte)0xE1, (byte)0x40,},             // invalid second byte & 2 bytes
 301         {3, (byte)0xED, (byte)0xAE, (byte)0x80 }, // 3 bytes surrogate
 302         {3, (byte)0xED, (byte)0xB0, (byte)0x80 }, // 3 bytes surrogate
 303 
 304 
 305 
 306         // Four-byte sequences
 307         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 308         {1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 309         {1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded
 310         {1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded
 311 
 312         {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
 313         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid second byte
 314         {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
 315         {1, (byte)0xF0, (byte)41 },                           // invalid second byte
 316                                                               // & only 2 bytes
 317 
 318         {2, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
 319         {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte
 320         {2, (byte)0xF0, (byte)0x90, (byte)0x41 },             // invalid third byte
 321                                                               // & 3 bytes input
 322 
 323         {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
 324         {2, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
 325         {3, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte
 326         {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 327         {1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 328         {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 329 
 330         // #8039751
 331         {1, (byte)0xF6, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 1st byte
 332         {1, (byte)0xF6, (byte)0x80, (byte)0x80,  },
 333         {1, (byte)0xF6, (byte)0x80, },
 334         {1, (byte)0xF6, },
 335         {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 1st byte
 336         {1, (byte)0xF5, (byte)0x80, (byte)0x80,  },
 337         {1, (byte)0xF5, (byte)0x80,  },
 338         {1, (byte)0xF5  },
 339 
 340         {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
 341         {1, (byte)0xF4, (byte)0x90, (byte)0x80 },
 342         {1, (byte)0xF4, (byte)0x90 },
 343 
 344         {1, (byte)0xF4, (byte)0x7f, (byte)0x80, (byte)0x80 }, // out-range/ascii 2nd byte
 345         {1, (byte)0xF4, (byte)0x7f, (byte)0x80 },
 346         {1, (byte)0xF4, (byte)0x7f },
 347 
 348         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
 349         {1, (byte)0xF0, (byte)0x80, (byte)0x80 },
 350         {1, (byte)0xF0, (byte)0x80 },
 351 
 352         {1, (byte)0xF0, (byte)0xc0, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
 353         {1, (byte)0xF0, (byte)0xc0, (byte)0x80 },
 354         {1, (byte)0xF0, (byte)0xc0 },
 355 
 356         // Five-byte sequences
 357         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid first byte
 358         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 359         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 360         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 361         {1, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 362 
 363         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
 364         {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
 365         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
 366         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
 367 
 368         // Six-byte sequences
 369         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 370         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 371         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 372         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 373         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
 374         {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
 375         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
 376         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
 377         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 378     };
 379 
 380    // The first byte is the length of malformed bytes
 381     static byte[][] malformed_cesu8 = {
 382         // One-byte sequences:
 383         {1, (byte)0xFF },
 384         {1, (byte)0xC0 },
 385         {1, (byte)0x80 },
 386 
 387         {1, (byte)0xFF, (byte)0xFF}, // all ones
 388         {1, (byte)0xA0, (byte)0x80}, // 101x first byte first nibble
 389 
 390         // Two-byte sequences:
 391         {1, (byte)0xC0, (byte)0x80}, // invalid first byte
 392         {1, (byte)0xC1, (byte)0xBF}, // invalid first byte
 393         {1, (byte)0xC2, (byte)0x00}, // invalid second byte
 394         {1, (byte)0xC2, (byte)0xC0}, // invalid second byte
 395         {1, (byte)0xD0, (byte)0x00}, // invalid second byte
 396         {1, (byte)0xD0, (byte)0xC0}, // invalid second byte
 397         {1, (byte)0xDF, (byte)0x00}, // invalid second byte
 398         {1, (byte)0xDF, (byte)0xC0}, // invalid second byte
 399 
 400         // Three-byte sequences
 401         {1, (byte)0xE0, (byte)0x80, (byte)0x80},  // 111x first byte first nibble
 402         {1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 403         {1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 404         {1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 405 
 406         {1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
 407         {2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
 408         {2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
 409         {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
 410         {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
 411         {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
 412         {1, (byte)0xE0, (byte)0x41,},             // invalid second byte & 2 bytes
 413 
 414         // CESU-8 does not have 4, 5, 6 bytes sequenc
 415         // Four-byte sequences
 416         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 417         {1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 418         {1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded
 419         {1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded
 420 
 421         {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
 422         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid second byte
 423         {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
 424         {1, (byte)0xF0, (byte)41 },                           // invalid second byte
 425                                                               // & only 2 bytes
 426         {1, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
 427         {1, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte
 428         {1, (byte)0xF0, (byte)0x90, (byte)0x41 },             // invalid third byte
 429                                                               // & 3 bytes input
 430 
 431         {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
 432         {1, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
 433         {1, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte
 434         {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 435         {1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 436         {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 437 
 438         // Five-byte sequences
 439         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid first byte
 440         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 441         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 442         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 443         {1, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 444 
 445         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
 446         {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
 447         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
 448         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
 449 
 450         // Six-byte sequences
 451         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 452         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 453         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 454         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 455         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
 456         {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
 457         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
 458         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
 459         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 460     };
 461 
 462 
 463     static void checkMalformed(String csn, byte[][] malformed) throws Exception {
 464         boolean failed = false;
 465         System.out.printf("    Check malformed <%s>...%n", csn);
 466         Charset cs = Charset.forName(csn);
 467         for (boolean direct: new boolean[] {false, true}) {
 468             for (byte[] bins : malformed) {
 469                 int mlen = bins[0];
 470                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
 471                 CoderResult cr = decodeCR(bin, csn, direct);
 472                 String ashex = "";
 473                 for (int i = 0; i < bin.length; i++) {
 474                     if (i > 0) ashex += " ";
 475                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
 476                 }
 477                 if (!cr.isMalformed()) {
 478                     System.out.printf("        FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
 479                     failed = true;
 480                 } else if (cr.length() != mlen) {
 481                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
 482                     failed = true;
 483                 }
 484                 if (!Arrays.equals(decode(cs, bin, 0, bin.length),
 485                                    new String(bin, csn).toCharArray())) {
 486                     System.out.printf("        FAIL(new String(bb, %s)) failed%n", csn);
 487                     failed = true;
 488                 }
 489             }
 490         }
 491         if (failed)
 492             throw new RuntimeException("Check malformed failed " + csn);
 493     }
 494 
 495     static boolean check(CharsetDecoder dec, byte[] utf8s, boolean direct, int[] flow) {
 496         int inPos = flow[0];
 497         int inLen = flow[1];
 498         int outPos = flow[2];
 499         int outLen = flow[3];
 500         int expedInPos = flow[4];
 501         int expedOutPos = flow[5];
 502         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
 503                                           :CoderResult.OVERFLOW;
 504         ByteBuffer bbf;
 505         CharBuffer cbf;
 506         if (direct) {
 507             bbf = ByteBuffer.allocateDirect(inPos + utf8s.length);
 508             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
 509         } else {
 510             bbf = ByteBuffer.allocate(inPos + utf8s.length);
 511             cbf = CharBuffer.allocate(outPos + outLen);
 512         }
 513         bbf.position(inPos);
 514         bbf.put(utf8s).flip().position(inPos).limit(inPos + inLen);
 515         cbf.position(outPos);
 516         dec.reset();
 517         CoderResult cr = dec.decode(bbf, cbf, false);
 518         if (cr != expedCR ||
 519             bbf.position() != expedInPos ||
 520             cbf.position() != expedOutPos) {
 521             System.out.printf("Expected(direct=%5b): [", direct);
 522             for (int i:flow) System.out.print(" " + i);
 523             System.out.println("]  CR=" + cr +
 524                                ", inPos=" + bbf.position() +
 525                                ", outPos=" + cbf.position());
 526             return false;
 527         }
 528         return true;
 529     }
 530 
 531     static void checkUnderOverflow(String csn) throws Exception {
 532         System.out.printf("    Check under/overflow <%s>...%n", csn);
 533         CharsetDecoder dec = Charset.forName(csn).newDecoder();
 534         boolean failed = false;
 535         byte[] utf8s = new String("\u007f\u07ff\ue000\ud800\udc00").getBytes("UTF-8");
 536         int    inlen = utf8s.length;
 537 
 538         for (int inoff = 0; inoff < 20; inoff++) {
 539             for (int outoff = 0; outoff < 20; outoff++) {
 540         int[][] Flows = {
 541             //inpos, inLen, outPos,  outLen, inPosEP,   outposEP,   under(0)/over(1)
 542             {inoff,  inlen, outoff,  1,      inoff + 1, outoff + 1, 1},
 543             {inoff,  inlen, outoff,  2,      inoff + 3, outoff + 2, 1},
 544             {inoff,  inlen, outoff,  3,      inoff + 6, outoff + 3, 1},
 545             {inoff,  inlen, outoff,  4,      inoff + 6, outoff + 3, 1},
 546             {inoff,  inlen, outoff,  5,      inoff + 10,outoff + 5, 0},
 547              // underflow
 548             {inoff,  1,     outoff,  5,      inoff + 1, outoff + 1, 0},
 549             {inoff,  2,     outoff,  5,      inoff + 1, outoff + 1, 0},
 550             {inoff,  3,     outoff,  5,      inoff + 3, outoff + 2, 0},
 551             {inoff,  4,     outoff,  5,      inoff + 3, outoff + 2, 0},
 552             {inoff,  5,     outoff,  5,      inoff + 3, outoff + 2, 0},
 553             {inoff,  6,     outoff,  5,      inoff + 6, outoff + 3, 0},
 554             {inoff,  7,     outoff,  5,      inoff + 6, outoff + 3, 0},
 555             {inoff,  8,     outoff,  5,      inoff + 6, outoff + 3, 0},
 556             {inoff,  9,     outoff,  5,      inoff + 6, outoff + 3, 0},
 557             {inoff,  10,    outoff,  5,      inoff + 10,outoff + 5, 0},
 558              // 2-byte underflow/overflow
 559             {inoff,  2,     outoff,  1,      inoff + 1, outoff + 1, 0},
 560             {inoff,  3,     outoff,  1,      inoff + 1, outoff + 1, 1},
 561              // 3-byte underflow/overflow
 562             {inoff,  4,     outoff,  2,      inoff + 3, outoff + 2, 0},
 563             {inoff,  5,     outoff,  2,      inoff + 3, outoff + 2, 0},
 564             {inoff,  6,     outoff,  2,      inoff + 3, outoff + 2, 1},
 565              // 4-byte underflow/overflow
 566             {inoff,  7,     outoff,  4,      inoff + 6, outoff + 3, 0},
 567             {inoff,  8,     outoff,  4,      inoff + 6, outoff + 3, 0},
 568             {inoff,  9,     outoff,  4,      inoff + 6, outoff + 3, 0},
 569             {inoff,  10,    outoff,  4,      inoff + 6, outoff + 3, 1},
 570         };
 571         for (boolean direct: new boolean[] {false, true}) {
 572             for (int[] flow: Flows) {
 573                 if (!check(dec, utf8s, direct, flow))
 574                     failed = true;
 575             }
 576         }}}
 577         if (failed)
 578             throw new RuntimeException("Check under/overflow failed " + csn);
 579     }
 580 
 581     public static void main(String[] args) throws Exception {
 582         checkRoundtrip("UTF-8");
 583         check4ByteSurrs("UTF-8");
 584         checkMalformed("UTF-8", malformed);
 585         checkUnderOverflow("UTF-8");
 586         checkRoundtrip("CESU-8");
 587         check6ByteSurrs("CESU-8");
 588         checkMalformed("CESU-8", malformed_cesu8);
 589     }
 590 }