1 /*
   2  * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 4486841 7040220 7096080
  27  * @summary Test UTF-8 charset
  28  */
  29 
  30 import java.nio.charset.*;
  31 import java.nio.*;
  32 import java.util.*;
  33 
  34 public class TestUTF8 {
  35     static char[] decode(byte[] bb, String csn, boolean testDirect)
  36         throws Exception {
  37         CharsetDecoder dec = Charset.forName(csn).newDecoder();
  38         ByteBuffer bbf;
  39         CharBuffer cbf;
  40         if (testDirect) {
  41             bbf = ByteBuffer.allocateDirect(bb.length);
  42             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  43             bbf.put(bb).flip();
  44         } else {
  45             bbf = ByteBuffer.wrap(bb);
  46             cbf = CharBuffer.allocate(bb.length);
  47         }
  48         CoderResult cr = dec.decode(bbf, cbf, true);
  49         if (cr != CoderResult.UNDERFLOW)
  50             throw new RuntimeException("Decoding err: " + csn);
  51         char[] cc = new char[cbf.position()];
  52         cbf.flip(); cbf.get(cc);
  53         return cc;
  54 
  55     }
  56 
  57     static CoderResult decodeCR(byte[] bb, String csn, boolean testDirect)
  58         throws Exception {
  59         CharsetDecoder dec = Charset.forName(csn).newDecoder();
  60         ByteBuffer bbf;
  61         CharBuffer cbf;
  62         if (testDirect) {
  63             bbf = ByteBuffer.allocateDirect(bb.length);
  64             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  65             bbf.put(bb).flip();
  66         } else {
  67             bbf = ByteBuffer.wrap(bb);
  68             cbf = CharBuffer.allocate(bb.length);
  69         }
  70         return dec.decode(bbf, cbf, true);
  71     }
  72 
  73     // copy/paste of the StringCoding.decode()
  74     static char[] decode(Charset cs, byte[] ba, int off, int len) {
  75         CharsetDecoder cd = cs.newDecoder();
  76         int en = (int)(len * cd.maxCharsPerByte());
  77         char[] ca = new char[en];
  78         if (len == 0)
  79             return ca;
  80         cd.onMalformedInput(CodingErrorAction.REPLACE)
  81           .onUnmappableCharacter(CodingErrorAction.REPLACE)
  82           .reset();
  83 
  84         ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
  85         CharBuffer cb = CharBuffer.wrap(ca);
  86         try {
  87             CoderResult cr = cd.decode(bb, cb, true);
  88             if (!cr.isUnderflow())
  89                 cr.throwException();
  90             cr = cd.flush(cb);
  91             if (!cr.isUnderflow())
  92                 cr.throwException();
  93         } catch (CharacterCodingException x) {
  94             throw new Error(x);
  95         }
  96         return Arrays.copyOf(ca, cb.position());
  97     }
  98 
  99     static byte[] encode(char[] cc, String csn, boolean testDirect)
 100         throws Exception {
 101         ByteBuffer bbf;
 102         CharBuffer cbf;
 103         CharsetEncoder enc = Charset.forName(csn).newEncoder();
 104         if (testDirect) {
 105             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 106             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 107             cbf.put(cc).flip();
 108         } else {
 109             bbf = ByteBuffer.allocate(cc.length * 4);
 110             cbf = CharBuffer.wrap(cc);
 111         }
 112 
 113         CoderResult cr = enc.encode(cbf, bbf, true);
 114         if (cr != CoderResult.UNDERFLOW)
 115             throw new RuntimeException("Encoding err: " + csn);
 116         byte[] bb = new byte[bbf.position()];
 117         bbf.flip(); bbf.get(bb);
 118         return bb;
 119     }
 120 
 121     static CoderResult encodeCR(char[] cc, String csn, boolean testDirect)
 122         throws Exception {
 123         ByteBuffer bbf;
 124         CharBuffer cbf;
 125         CharsetEncoder enc = Charset.forName(csn).newEncoder();
 126         if (testDirect) {
 127             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 128             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 129             cbf.put(cc).flip();
 130         } else {
 131             bbf = ByteBuffer.allocate(cc.length * 4);
 132             cbf = CharBuffer.wrap(cc);
 133         }
 134         return enc.encode(cbf, bbf, true);
 135     }
 136 
 137     static char[] getUTFChars() {
 138         char[] cc = new char[0x10000 - 0xe000 + 0xd800 + //bmp
 139                              (0x110000 - 0x10000) * 2];    //supp
 140         int pos = 0;
 141         int i = 0;
 142         for (i = 0; i < 0xd800; i++)
 143             cc[pos++] = (char)i;
 144         for (i = 0xe000; i < 0x10000; i++)
 145             cc[pos++] = (char)i;
 146         for (i = 0x10000; i < 0x110000; i++) {
 147             pos += Character.toChars(i, cc, pos);
 148         }
 149         return cc;
 150     }
 151 
 152     static int to3ByteUTF8(char c, byte[] bb, int pos) {
 153         bb[pos++] = (byte)(0xe0 | ((c >> 12)));
 154         bb[pos++] = (byte)(0x80 | ((c >> 06) & 0x3f));
 155         bb[pos++] = (byte)(0x80 | ((c >> 00) & 0x3f));
 156         return 3;
 157     }
 158 
 159     static int to4ByteUTF8(int uc, byte[] bb, int pos) {
 160         bb[pos++] = (byte)(0xf0 | ((uc >> 18)));
 161         bb[pos++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
 162         bb[pos++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
 163         bb[pos++] = (byte)(0x80 | (uc & 0x3f));
 164         return 4;
 165     }
 166 
 167     static void checkRoundtrip(String csn) throws Exception {
 168         System.out.printf("    Check roundtrip <%s>...", csn);
 169         char[] cc = getUTFChars();
 170         byte[] bb = encode(cc, csn, false);
 171         char[] ccO = decode(bb, csn, false);
 172 
 173         if (!Arrays.equals(cc, ccO))
 174             System.out.printf("    non-direct failed");
 175         bb = encode(cc, csn, true);
 176         ccO = decode(bb, csn, true);
 177         if (!Arrays.equals(cc, ccO)) {
 178             System.out.print("    (direct) failed");
 179         }
 180         // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
 181         if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
 182             System.out.printf("    String.getBytes() failed");
 183         }
 184         if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
 185             System.out.printf("    String.toCharArray() failed");
 186         }
 187         System.out.println();
 188     }
 189 
 190     static void check4ByteSurrs(String csn) throws Exception {
 191         System.out.printf("    Check 4-byte Surrogates <%s>...%n", csn);
 192         byte[] bb = new byte[(0x110000 - 0x10000) * 4];
 193         char[] cc = new char[(0x110000 - 0x10000) * 2];
 194         int bpos = 0;
 195         int cpos = 0;
 196         for (int i = 0x10000; i < 0x110000; i++) {
 197             Character.toChars(i, cc, cpos);
 198             bpos += to4ByteUTF8(i, bb, bpos);
 199             cpos += 2;
 200         }
 201         checkSurrs(csn, bb, cc);
 202     }
 203 
 204 
 205     static void checkSurrs(String csn, byte[] bb, char[] cc)
 206         throws Exception
 207     {
 208         char[] ccO = decode(bb, csn, false);
 209         if (!Arrays.equals(cc, ccO)) {
 210             System.out.printf("    decoding failed%n");
 211         }
 212         ccO = decode(bb, csn, true);
 213         if (!Arrays.equals(cc, ccO)) {
 214             System.out.printf("    decoding(direct) failed%n");
 215         }
 216         if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
 217             System.out.printf("    String.toCharArray() failed");
 218         }
 219         if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
 220             System.out.printf("    String.getBytes() failed");
 221         }
 222     }
 223 
 224     static void check6ByteSurrs(String csn) throws Exception {
 225         System.out.printf("    Check 6-byte Surrogates <%s>...%n", csn);
 226         byte[] bb = new byte[(0x110000 - 0x10000) * 6];
 227         char[] cc = new char[(0x110000 - 0x10000) * 2];
 228         int bpos = 0;
 229         int cpos = 0;
 230         for (int i = 0x10000; i < 0x110000; i++) {
 231             Character.toChars(i, cc, cpos);
 232             bpos += to3ByteUTF8(cc[cpos], bb, bpos);
 233             bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
 234             cpos += 2;
 235         }
 236         checkSurrs(csn, bb, cc);
 237     }
 238 
 239 
 240     static void compare(String csn1, String csn2) throws Exception {
 241         System.out.printf("    Diff <%s> <%s>...%n", csn1, csn2);
 242         char[] cc = getUTFChars();
 243 
 244         byte[] bb1 = encode(cc, csn1, false);
 245         byte[] bb2 = encode(cc, csn2, false);
 246         if (!Arrays.equals(bb1, bb2))
 247             System.out.printf("        encoding failed%n");
 248         char[] cc1 = decode(bb1, csn1, false);
 249         char[] cc2 = decode(bb1, csn2, false);
 250         if (!Arrays.equals(cc1, cc2)) {
 251             System.out.printf("        decoding failed%n");
 252         }
 253 
 254         bb1 = encode(cc, csn1, true);
 255         bb2 = encode(cc, csn2, true);
 256         if (!Arrays.equals(bb1, bb2))
 257             System.out.printf("        encoding (direct) failed%n");
 258         cc1 = decode(bb1, csn1, true);
 259         cc2 = decode(bb1, csn2, true);
 260         if (!Arrays.equals(cc1, cc2)) {
 261             System.out.printf("        decoding (direct) failed%n");
 262         }
 263     }
 264 
 265     // The first byte is the length of malformed bytes
 266     static byte[][] malformed = {
 267         // One-byte sequences:
 268         {1, (byte)0xFF },
 269         {1, (byte)0xC0 },
 270         {1, (byte)0x80 },
 271 
 272         {1, (byte)0xFF, (byte)0xFF}, // all ones
 273         {1, (byte)0xA0, (byte)0x80}, // 101x first byte first nibble
 274 
 275         // Two-byte sequences:
 276         {1, (byte)0xC0, (byte)0x80}, // invalid first byte
 277         {1, (byte)0xC1, (byte)0xBF}, // invalid first byte
 278         {1, (byte)0xC2, (byte)0x00}, // invalid second byte
 279         {1, (byte)0xC2, (byte)0xC0}, // invalid second byte
 280         {1, (byte)0xD0, (byte)0x00}, // invalid second byte
 281         {1, (byte)0xD0, (byte)0xC0}, // invalid second byte
 282         {1, (byte)0xDF, (byte)0x00}, // invalid second byte
 283         {1, (byte)0xDF, (byte)0xC0}, // invalid second byte
 284 
 285         // Three-byte sequences
 286         {1, (byte)0xE0, (byte)0x80, (byte)0x80},  // 111x first byte first nibble
 287         {1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 288         {1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 289         {1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 290 
 291         {1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
 292         {2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
 293         {2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
 294         {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
 295         {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
 296         {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
 297         {1, (byte)0xE0, (byte)0x41,},             // invalid second byte & 2 bytes
 298         {3, (byte)0xED, (byte)0xAE, (byte)0x80 }, // 3 bytes surrogate
 299         {3, (byte)0xED, (byte)0xB0, (byte)0x80 }, // 3 bytes surrogate
 300 
 301 
 302         // Four-byte sequences
 303         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 304         {1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 305         {1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded
 306         {1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded
 307 
 308         {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
 309         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid second byte
 310         {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
 311         {1, (byte)0xF0, (byte)41 },                           // invalid second byte
 312                                                               // & only 2 bytes
 313 
 314         {2, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
 315         {3, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte
 316         {2, (byte)0xF0, (byte)0x90, (byte)0x41 },             // invalid third byte
 317                                                               // & 3 bytes input
 318 
 319         {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
 320         {2, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
 321         {3, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte
 322         {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 323         {1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 324         {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 325 
 326         // Five-byte sequences
 327         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid first byte
 328         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 329         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 330         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 331         {1, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 332 
 333         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
 334         {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
 335         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
 336         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
 337 
 338         // Six-byte sequences
 339         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 340         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 341         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 342         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 343         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
 344         {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
 345         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
 346         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
 347         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 348     };
 349 
 350    // The first byte is the length of malformed bytes
 351     static byte[][] malformed_cesu8 = {
 352         // One-byte sequences:
 353         {1, (byte)0xFF },
 354         {1, (byte)0xC0 },
 355         {1, (byte)0x80 },
 356 
 357         {1, (byte)0xFF, (byte)0xFF}, // all ones
 358         {1, (byte)0xA0, (byte)0x80}, // 101x first byte first nibble
 359 
 360         // Two-byte sequences:
 361         {1, (byte)0xC0, (byte)0x80}, // invalid first byte
 362         {1, (byte)0xC1, (byte)0xBF}, // invalid first byte
 363         {1, (byte)0xC2, (byte)0x00}, // invalid second byte
 364         {1, (byte)0xC2, (byte)0xC0}, // invalid second byte
 365         {1, (byte)0xD0, (byte)0x00}, // invalid second byte
 366         {1, (byte)0xD0, (byte)0xC0}, // invalid second byte
 367         {1, (byte)0xDF, (byte)0x00}, // invalid second byte
 368         {1, (byte)0xDF, (byte)0xC0}, // invalid second byte
 369 
 370         // Three-byte sequences
 371         {1, (byte)0xE0, (byte)0x80, (byte)0x80},  // 111x first byte first nibble
 372         {1, (byte)0xE0, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 373         {1, (byte)0xE0, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 374         {1, (byte)0xE0, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 375 
 376         {1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
 377         {2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
 378         {2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
 379         {1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
 380         {1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
 381         {1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
 382         {1, (byte)0xE0, (byte)0x41,},             // invalid second byte & 2 bytes
 383 
 384         // CESU-8 does not have 4, 5, 6 bytes sequenc
 385         // Four-byte sequences
 386         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 387         {1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 388         {1, (byte)0xF0, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+007F zero-padded
 389         {1, (byte)0xF0, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+07FF zero-padded
 390 
 391         {1, (byte)0xFF, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
 392         {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid second byte
 393         {1, (byte)0xF0, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
 394         {1, (byte)0xF0, (byte)41 },                           // invalid second byte
 395                                                               // & only 2 bytes
 396         {1, (byte)0xF0, (byte)0x90, (byte)0xC0, (byte)0x80 }, // invalid third byte
 397         {1, (byte)0xF0, (byte)0x90, (byte)0x80, (byte)0xC0 }, // invalid forth byte
 398         {1, (byte)0xF0, (byte)0x90, (byte)0x41 },             // invalid third byte
 399                                                               // & 3 bytes input
 400 
 401         {1, (byte)0xF1, (byte)0xC0, (byte)0x80, (byte)0x80 }, // invalid second byte
 402         {1, (byte)0xF1, (byte)0x80, (byte)0xC0, (byte)0x80 }, // invalid third byte
 403         {1, (byte)0xF1, (byte)0x80, (byte)0x80, (byte)0xC0 }, // invalid forth byte
 404         {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 405         {1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 406         {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
 407 
 408         // Five-byte sequences
 409         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80},  // invalid first byte
 410         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 411         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 412         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 413         {1, (byte)0xF8, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 414 
 415         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
 416         {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
 417         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
 418         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
 419 
 420         // Six-byte sequences
 421         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 422         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 423         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 424         {1, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 425         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
 426         {1, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
 427         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
 428         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
 429         {1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 430     };
 431 
 432 
 433     static void checkMalformed(String csn, byte[][] malformed) throws Exception {
 434         boolean failed = false;
 435         System.out.printf("    Check malformed <%s>...%n", csn);
 436         Charset cs = Charset.forName(csn);
 437         for (boolean direct: new boolean[] {false, true}) {
 438             for (byte[] bins : malformed) {
 439                 int mlen = bins[0];
 440                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
 441                 CoderResult cr = decodeCR(bin, csn, direct);
 442                 String ashex = "";
 443                 for (int i = 0; i < bin.length; i++) {
 444                     if (i > 0) ashex += " ";
 445                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
 446                 }
 447                 if (!cr.isMalformed()) {
 448                     System.out.printf("        FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
 449                     failed = true;
 450                 } else if (cr.length() != mlen) {
 451                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
 452                     failed = true;
 453                 }
 454                 if (!Arrays.equals(decode(cs, bin, 0, bin.length),
 455                                    new String(bin, csn).toCharArray())) {
 456                     System.out.printf("        FAIL(new String(bb, %s)) failed%n", csn);
 457                     failed = true;
 458                 }
 459             }
 460         }
 461         if (failed)
 462             throw new RuntimeException("Check malformed failed " + csn);
 463     }
 464 
 465     static boolean check(CharsetDecoder dec, byte[] utf8s, boolean direct, int[] flow) {
 466         int inPos = flow[0];
 467         int inLen = flow[1];
 468         int outPos = flow[2];
 469         int outLen = flow[3];
 470         int expedInPos = flow[4];
 471         int expedOutPos = flow[5];
 472         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
 473                                           :CoderResult.OVERFLOW;
 474         ByteBuffer bbf;
 475         CharBuffer cbf;
 476         if (direct) {
 477             bbf = ByteBuffer.allocateDirect(inPos + utf8s.length);
 478             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
 479         } else {
 480             bbf = ByteBuffer.allocate(inPos + utf8s.length);
 481             cbf = CharBuffer.allocate(outPos + outLen);
 482         }
 483         bbf.position(inPos);
 484         bbf.put(utf8s).flip().position(inPos).limit(inPos + inLen);
 485         cbf.position(outPos);
 486         dec.reset();
 487         CoderResult cr = dec.decode(bbf, cbf, false);
 488         if (cr != expedCR ||
 489             bbf.position() != expedInPos ||
 490             cbf.position() != expedOutPos) {
 491             System.out.printf("Expected(direct=%5b): [", direct);
 492             for (int i:flow) System.out.print(" " + i);
 493             System.out.println("]  CR=" + cr +
 494                                ", inPos=" + bbf.position() +
 495                                ", outPos=" + cbf.position());
 496             return false;
 497         }
 498         return true;
 499     }
 500 
 501     static void checkUnderOverflow(String csn) throws Exception {
 502         System.out.printf("    Check under/overflow <%s>...%n", csn);
 503         CharsetDecoder dec = Charset.forName(csn).newDecoder();
 504         boolean failed = false;
 505         byte[] utf8s = new String("\u007f\u07ff\ue000\ud800\udc00").getBytes("UTF-8");
 506         int    inlen = utf8s.length;
 507 
 508         for (int inoff = 0; inoff < 20; inoff++) {
 509             for (int outoff = 0; outoff < 20; outoff++) {
 510         int[][] Flows = {
 511             //inpos, inLen, outPos,  outLen, inPosEP,   outposEP,   under(0)/over(1)
 512             {inoff,  inlen, outoff,  1,      inoff + 1, outoff + 1, 1},
 513             {inoff,  inlen, outoff,  2,      inoff + 3, outoff + 2, 1},
 514             {inoff,  inlen, outoff,  3,      inoff + 6, outoff + 3, 1},
 515             {inoff,  inlen, outoff,  4,      inoff + 6, outoff + 3, 1},
 516             {inoff,  inlen, outoff,  5,      inoff + 10,outoff + 5, 0},
 517              // underflow
 518             {inoff,  1,     outoff,  5,      inoff + 1, outoff + 1, 0},
 519             {inoff,  2,     outoff,  5,      inoff + 1, outoff + 1, 0},
 520             {inoff,  3,     outoff,  5,      inoff + 3, outoff + 2, 0},
 521             {inoff,  4,     outoff,  5,      inoff + 3, outoff + 2, 0},
 522             {inoff,  5,     outoff,  5,      inoff + 3, outoff + 2, 0},
 523             {inoff,  6,     outoff,  5,      inoff + 6, outoff + 3, 0},
 524             {inoff,  7,     outoff,  5,      inoff + 6, outoff + 3, 0},
 525             {inoff,  8,     outoff,  5,      inoff + 6, outoff + 3, 0},
 526             {inoff,  9,     outoff,  5,      inoff + 6, outoff + 3, 0},
 527             {inoff,  10,    outoff,  5,      inoff + 10,outoff + 5, 0},
 528              // 2-byte underflow/overflow
 529             {inoff,  2,     outoff,  1,      inoff + 1, outoff + 1, 0},
 530             {inoff,  3,     outoff,  1,      inoff + 1, outoff + 1, 1},
 531              // 3-byte underflow/overflow
 532             {inoff,  4,     outoff,  2,      inoff + 3, outoff + 2, 0},
 533             {inoff,  5,     outoff,  2,      inoff + 3, outoff + 2, 0},
 534             {inoff,  6,     outoff,  2,      inoff + 3, outoff + 2, 1},
 535              // 4-byte underflow/overflow
 536             {inoff,  7,     outoff,  4,      inoff + 6, outoff + 3, 0},
 537             {inoff,  8,     outoff,  4,      inoff + 6, outoff + 3, 0},
 538             {inoff,  9,     outoff,  4,      inoff + 6, outoff + 3, 0},
 539             {inoff,  10,    outoff,  4,      inoff + 6, outoff + 3, 1},
 540         };
 541         for (boolean direct: new boolean[] {false, true}) {
 542             for (int[] flow: Flows) {
 543                 if (!check(dec, utf8s, direct, flow))
 544                     failed = true;
 545             }
 546         }}}
 547         if (failed)
 548             throw new RuntimeException("Check under/overflow failed " + csn);
 549     }
 550 
 551     public static void main(String[] args) throws Exception {
 552         checkRoundtrip("UTF-8");
 553         check4ByteSurrs("UTF-8");
 554         checkMalformed("UTF-8", malformed);
 555         checkUnderOverflow("UTF-8");
 556 
 557         checkRoundtrip("CESU-8");
 558         check6ByteSurrs("CESU-8");
 559         checkMalformed("CESU-8", malformed_cesu8);
 560     }
 561 }