test/sun/nio/cs/TestUTF8.java

Print this page




   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 4486841
  27  * @summary Test UTF-8 charset
  28  */
  29 
  30 import java.nio.charset.*;
  31 import java.nio.*;
  32 import java.util.*;
  33 
  34 public class TestUTF8 {
  35     static char[] decode(byte[] bb, String csn, boolean testDirect)
  36         throws Exception {
  37         CharsetDecoder dec = Charset.forName(csn).newDecoder();
  38         ByteBuffer bbf;
  39         CharBuffer cbf;
  40         if (testDirect) {
  41             bbf = ByteBuffer.allocateDirect(bb.length);
  42             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  43             bbf.put(bb).flip();
  44         } else {
  45             bbf = ByteBuffer.wrap(bb);
  46             cbf = CharBuffer.allocate(bb.length);


  53         return cc;
  54 
  55     }
  56 
  57     static CoderResult decodeCR(byte[] bb, String csn, boolean testDirect)
  58         throws Exception {
  59         CharsetDecoder dec = Charset.forName(csn).newDecoder();
  60         ByteBuffer bbf;
  61         CharBuffer cbf;
  62         if (testDirect) {
  63             bbf = ByteBuffer.allocateDirect(bb.length);
  64             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  65             bbf.put(bb).flip();
  66         } else {
  67             bbf = ByteBuffer.wrap(bb);
  68             cbf = CharBuffer.allocate(bb.length);
  69         }
  70         return dec.decode(bbf, cbf, true);
  71     }
  72 


























  73     static byte[] encode(char[] cc, String csn, boolean testDirect)
  74         throws Exception {
  75         ByteBuffer bbf;
  76         CharBuffer cbf;
  77         CharsetEncoder enc = Charset.forName(csn).newEncoder();
  78         if (testDirect) {
  79             bbf = ByteBuffer.allocateDirect(cc.length * 4);
  80             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
  81             cbf.put(cc).flip();
  82         } else {
  83             bbf = ByteBuffer.allocate(cc.length * 4);
  84             cbf = CharBuffer.wrap(cc);
  85         }
  86 
  87         CoderResult cr = enc.encode(cbf, bbf, true);
  88         if (cr != CoderResult.UNDERFLOW)
  89             throw new RuntimeException("Encoding err: " + csn);
  90         byte[] bb = new byte[bbf.position()];
  91         bbf.flip(); bbf.get(bb);
  92         return bb;


 125 
 126     static int to3ByteUTF8(char c, byte[] bb, int pos) {
 127         bb[pos++] = (byte)(0xe0 | ((c >> 12)));
 128         bb[pos++] = (byte)(0x80 | ((c >> 06) & 0x3f));
 129         bb[pos++] = (byte)(0x80 | ((c >> 00) & 0x3f));
 130         return 3;
 131     }
 132 
 133     static void checkRoundtrip(String csn) throws Exception {
 134         System.out.printf("    Check roundtrip <%s>...", csn);
 135         char[] cc = getUTFChars();
 136         byte[] bb = encode(cc, csn, false);
 137         char[] ccO = decode(bb, csn, false);
 138 
 139         if (!Arrays.equals(cc, ccO)) {
 140             System.out.printf("    non-direct failed");
 141         }
 142         bb = encode(cc, csn, true);
 143         ccO = decode(bb, csn, true);
 144         if (!Arrays.equals(cc, ccO)) {
 145             System.out.printf("    (direct) failed");







 146         }
 147         System.out.println();
 148     }
 149 
 150     static void check6ByteSurrs(String csn) throws Exception {
 151         System.out.printf("    Check 6-byte Surrogates <%s>...%n", csn);
 152         byte[] bb = new byte[(0x110000 - 0x10000) * 6];
 153         char[] cc = new char[(0x110000 - 0x10000) * 2];
 154         int bpos = 0;
 155         int cpos = 0;
 156         for (int i = 0x10000; i < 0x110000; i++) {
 157             Character.toChars(i, cc, cpos);
 158             bpos += to3ByteUTF8(cc[cpos], bb, bpos);
 159             bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
 160             cpos += 2;
 161         }
 162 
 163         char[] ccO = decode(bb, csn, false);
 164         if (!Arrays.equals(cc, ccO)) {
 165             System.out.printf("    decoding failed%n");
 166         }
 167         ccO = decode(bb, csn, true);
 168         if (!Arrays.equals(cc, ccO)) {
 169             System.out.printf("    decoding(direct) failed%n");
 170         }






 171     }
 172 
 173     static void compare(String csn1, String csn2) throws Exception {
 174         System.out.printf("    Diff <%s> <%s>...%n", csn1, csn2);
 175         char[] cc = getUTFChars();
 176 
 177         byte[] bb1 = encode(cc, csn1, false);
 178         byte[] bb2 = encode(cc, csn2, false);
 179         if (!Arrays.equals(bb1, bb2))
 180             System.out.printf("        encoding failed%n");
 181         char[] cc1 = decode(bb1, csn1, false);
 182         char[] cc2 = decode(bb1, csn2, false);
 183         if (!Arrays.equals(cc1, cc2)) {
 184             System.out.printf("        decoding failed%n");
 185         }
 186 
 187         bb1 = encode(cc, csn1, true);
 188         bb2 = encode(cc, csn2, true);
 189         if (!Arrays.equals(bb1, bb2))
 190             System.out.printf("        encoding (direct) failed%n");


 257         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
 258         {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
 259         {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
 260         {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
 261 
 262         // Six-byte sequences
 263         {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 264         {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 265         {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 266         {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 267         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
 268         {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
 269         {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
 270         {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
 271         {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 272     };
 273 
 274     static void checkMalformed(String csn) throws Exception {
 275         boolean failed = false;
 276         System.out.printf("    Check malformed <%s>...%n", csn);

 277         for (boolean direct: new boolean[] {false, true}) {
 278             for (byte[] bins : malformed) {
 279                 int mlen = bins[0];
 280                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
 281                 CoderResult cr = decodeCR(bin, csn, direct);
 282                 String ashex = "";
 283                 for (int i = 0; i < bin.length; i++) {
 284                     if (i > 0) ashex += " ";
 285                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
 286                 }
 287                 if (!cr.isMalformed()) {
 288                     System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
 289                     failed = true;
 290                 } else if (cr.length() != mlen) {
 291                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());





 292                     failed = true;
 293                 }
 294             }
 295         }
 296         if (failed)
 297             throw new RuntimeException("Check malformed failed " + csn);
 298     }
 299 
 300     static boolean check(CharsetDecoder dec, byte[] utf8s, boolean direct, int[] flow) {
 301         int inPos = flow[0];
 302         int inLen = flow[1];
 303         int outPos = flow[2];
 304         int outLen = flow[3];
 305         int expedInPos = flow[4];
 306         int expedOutPos = flow[5];
 307         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
 308                                           :CoderResult.OVERFLOW;
 309         ByteBuffer bbf;
 310         CharBuffer cbf;
 311         if (direct) {




   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 4486841 7040220
  27  * @summary Test UTF-8 charset
  28  */
  29 
  30 import java.nio.charset.*;
  31 import java.nio.*;
  32 import java.util.*;
  33 
  34 public class TestUTF8 {
  35     static char[] decode(byte[] bb, String csn, boolean testDirect)
  36         throws Exception {
  37         CharsetDecoder dec = Charset.forName(csn).newDecoder();
  38         ByteBuffer bbf;
  39         CharBuffer cbf;
  40         if (testDirect) {
  41             bbf = ByteBuffer.allocateDirect(bb.length);
  42             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  43             bbf.put(bb).flip();
  44         } else {
  45             bbf = ByteBuffer.wrap(bb);
  46             cbf = CharBuffer.allocate(bb.length);


  53         return cc;
  54 
  55     }
  56 
  57     static CoderResult decodeCR(byte[] bb, String csn, boolean testDirect)
  58         throws Exception {
  59         CharsetDecoder dec = Charset.forName(csn).newDecoder();
  60         ByteBuffer bbf;
  61         CharBuffer cbf;
  62         if (testDirect) {
  63             bbf = ByteBuffer.allocateDirect(bb.length);
  64             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  65             bbf.put(bb).flip();
  66         } else {
  67             bbf = ByteBuffer.wrap(bb);
  68             cbf = CharBuffer.allocate(bb.length);
  69         }
  70         return dec.decode(bbf, cbf, true);
  71     }
  72 
  73     // copy/paste of the StringCoding.decode()
  74     static char[] decode(Charset cs, byte[] ba, int off, int len) {
  75         CharsetDecoder cd = cs.newDecoder();
  76         int en = (int)(len * cd.maxCharsPerByte());
  77         char[] ca = new char[en];
  78         if (len == 0)
  79             return ca;
  80         cd.onMalformedInput(CodingErrorAction.REPLACE)
  81           .onUnmappableCharacter(CodingErrorAction.REPLACE)
  82           .reset();
  83 
  84         ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
  85         CharBuffer cb = CharBuffer.wrap(ca);
  86         try {
  87             CoderResult cr = cd.decode(bb, cb, true);
  88             if (!cr.isUnderflow())
  89                 cr.throwException();
  90             cr = cd.flush(cb);
  91             if (!cr.isUnderflow())
  92                 cr.throwException();
  93         } catch (CharacterCodingException x) {
  94             throw new Error(x);
  95         }
  96         return Arrays.copyOf(ca, cb.position());
  97     }
  98 
  99     static byte[] encode(char[] cc, String csn, boolean testDirect)
 100         throws Exception {
 101         ByteBuffer bbf;
 102         CharBuffer cbf;
 103         CharsetEncoder enc = Charset.forName(csn).newEncoder();
 104         if (testDirect) {
 105             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 106             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 107             cbf.put(cc).flip();
 108         } else {
 109             bbf = ByteBuffer.allocate(cc.length * 4);
 110             cbf = CharBuffer.wrap(cc);
 111         }
 112 
 113         CoderResult cr = enc.encode(cbf, bbf, true);
 114         if (cr != CoderResult.UNDERFLOW)
 115             throw new RuntimeException("Encoding err: " + csn);
 116         byte[] bb = new byte[bbf.position()];
 117         bbf.flip(); bbf.get(bb);
 118         return bb;


 151 
 152     static int to3ByteUTF8(char c, byte[] bb, int pos) {
 153         bb[pos++] = (byte)(0xe0 | ((c >> 12)));
 154         bb[pos++] = (byte)(0x80 | ((c >> 06) & 0x3f));
 155         bb[pos++] = (byte)(0x80 | ((c >> 00) & 0x3f));
 156         return 3;
 157     }
 158 
 159     static void checkRoundtrip(String csn) throws Exception {
 160         System.out.printf("    Check roundtrip <%s>...", csn);
 161         char[] cc = getUTFChars();
 162         byte[] bb = encode(cc, csn, false);
 163         char[] ccO = decode(bb, csn, false);
 164 
 165         if (!Arrays.equals(cc, ccO)) {
 166             System.out.printf("    non-direct failed");
 167         }
 168         bb = encode(cc, csn, true);
 169         ccO = decode(bb, csn, true);
 170         if (!Arrays.equals(cc, ccO)) {
 171             System.out.print("    (direct) failed");
 172         }
 173         // String.getBytes()/toCharArray() goes to ArrayDe/Encoder path
 174         if (!Arrays.equals(bb, new String(cc).getBytes(csn))) {
 175             System.out.printf("    String.getBytes() failed");
 176         }
 177         if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
 178             System.out.printf("    String.toCharArray() failed");
 179         }
 180         System.out.println();
 181     }
 182 
 183     static void check6ByteSurrs(String csn) throws Exception {
 184         System.out.printf("    Check 6-byte Surrogates <%s>...%n", csn);
 185         byte[] bb = new byte[(0x110000 - 0x10000) * 6];
 186         char[] cc = new char[(0x110000 - 0x10000) * 2];
 187         int bpos = 0;
 188         int cpos = 0;
 189         for (int i = 0x10000; i < 0x110000; i++) {
 190             Character.toChars(i, cc, cpos);
 191             bpos += to3ByteUTF8(cc[cpos], bb, bpos);
 192             bpos += to3ByteUTF8(cc[cpos + 1], bb, bpos);
 193             cpos += 2;
 194         }
 195 
 196         char[] ccO = decode(bb, csn, false);
 197         if (!Arrays.equals(cc, ccO)) {
 198             System.out.printf("    decoding failed%n");
 199         }
 200         ccO = decode(bb, csn, true);
 201         if (!Arrays.equals(cc, ccO)) {
 202             System.out.printf("    decoding(direct) failed%n");
 203         }
 204         // new String(bb, csn).getBytes(csn) will not return
 205         // the 6 bytes surrogates as in bb, so only test
 206         // toCharArray() here.
 207         if (!Arrays.equals(cc, new String(bb, csn).toCharArray())) {
 208             System.out.printf("    String.toCharArray() failed");
 209         }
 210     }
 211 
 212     static void compare(String csn1, String csn2) throws Exception {
 213         System.out.printf("    Diff <%s> <%s>...%n", csn1, csn2);
 214         char[] cc = getUTFChars();
 215 
 216         byte[] bb1 = encode(cc, csn1, false);
 217         byte[] bb2 = encode(cc, csn2, false);
 218         if (!Arrays.equals(bb1, bb2))
 219             System.out.printf("        encoding failed%n");
 220         char[] cc1 = decode(bb1, csn1, false);
 221         char[] cc2 = decode(bb1, csn2, false);
 222         if (!Arrays.equals(cc1, cc2)) {
 223             System.out.printf("        decoding failed%n");
 224         }
 225 
 226         bb1 = encode(cc, csn1, true);
 227         bb2 = encode(cc, csn2, true);
 228         if (!Arrays.equals(bb1, bb2))
 229             System.out.printf("        encoding (direct) failed%n");


 296         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80},
 297         {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80 },
 298         {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF },
 299         {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0 },
 300 
 301         // Six-byte sequences
 302         {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
 303         {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
 304         {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xBF }, // U+07FF zero-padded
 305         {6, (byte)0xFC, (byte)0x80, (byte)0x80, (byte)0x8F, (byte)0xBF, (byte)0xBF }, // U+FFFF zero-padded
 306         {1, (byte)0xF8, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 },
 307         {2, (byte)0xF8, (byte)0x80, (byte)0xC0, (byte)0x80, (byte)0x80, (byte)0x80 },
 308         {3, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0xC1, (byte)0xBF, (byte)0x80 },
 309         {4, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0xC0, (byte)0x80 },
 310         {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 311     };
 312 
 313     static void checkMalformed(String csn) throws Exception {
 314         boolean failed = false;
 315         System.out.printf("    Check malformed <%s>...%n", csn);
 316         Charset cs = Charset.forName(csn);
 317         for (boolean direct: new boolean[] {false, true}) {
 318             for (byte[] bins : malformed) {
 319                 int mlen = bins[0];
 320                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
 321                 CoderResult cr = decodeCR(bin, csn, direct);
 322                 String ashex = "";
 323                 for (int i = 0; i < bin.length; i++) {
 324                     if (i > 0) ashex += " ";
 325                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
 326                 }
 327                 if (!cr.isMalformed()) {
 328                     System.out.printf("        FAIL(direct=%b): [%s] not malformed.%n", direct, ashex);
 329                     failed = true;
 330                 } else if (cr.length() != mlen) {
 331                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].%n", direct, ashex, cr.length());
 332                     failed = true;
 333                 }
 334                 if (!Arrays.equals(decode(cs, bin, 0, bin.length),
 335                                    new String(bin, csn).toCharArray())) {
 336                     System.out.printf("        FAIL(new String(bb, %s)) failed%n", csn);
 337                     failed = true;
 338                 }
 339             }
 340         }
 341         if (failed)
 342             throw new RuntimeException("Check malformed failed " + csn);
 343     }
 344 
 345     static boolean check(CharsetDecoder dec, byte[] utf8s, boolean direct, int[] flow) {
 346         int inPos = flow[0];
 347         int inLen = flow[1];
 348         int outPos = flow[2];
 349         int outLen = flow[3];
 350         int expedInPos = flow[4];
 351         int expedOutPos = flow[5];
 352         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
 353                                           :CoderResult.OVERFLOW;
 354         ByteBuffer bbf;
 355         CharBuffer cbf;
 356         if (direct) {