1 /*
   2  * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  20  * CA 95054 USA or visit www.sun.com if you need additional information or
  21  * have any questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 6831794 6229811
  27  * @summary Test EUC_TW charset
  28  */
  29 
  30 import java.nio.charset.*;
  31 import java.nio.*;
  32 import java.util.*;
  33 
  34 public class TestEUC_TW {
  35 
  36     static class Time {
  37         long t;
  38     }
  39     static int iteration = 1000;
  40 
  41     static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
  42         throws Exception {
  43         String csn = cs.name();
  44         CharsetDecoder dec = cs.newDecoder();
  45         ByteBuffer bbf;
  46         CharBuffer cbf;
  47         if (testDirect) {
  48             bbf = ByteBuffer.allocateDirect(bb.length);
  49             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  50             bbf.put(bb);
  51         } else {
  52             bbf = ByteBuffer.wrap(bb);
  53             cbf = CharBuffer.allocate(bb.length);
  54         }
  55         CoderResult cr = null;
  56         long t1 = System.nanoTime()/1000;
  57         for (int i = 0; i < iteration; i++) {
  58             bbf.rewind();
  59             cbf.clear();
  60             dec.reset();
  61             cr = dec.decode(bbf, cbf, true);
  62         }
  63         long t2 = System.nanoTime()/1000;
  64         if (t != null)
  65         t.t = (t2 - t1)/iteration;
  66         if (cr != CoderResult.UNDERFLOW) {
  67             System.out.println("DEC-----------------");
  68             int pos = bbf.position();
  69             System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
  70                               cr.toString(), pos, 
  71                               bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
  72             throw new RuntimeException("Decoding err: " + csn);
  73         }
  74         char[] cc = new char[cbf.position()];
  75         cbf.flip(); cbf.get(cc);
  76         return cc;
  77 
  78     }
  79 
  80     static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
  81         throws Exception {
  82         CharsetDecoder dec = cs.newDecoder();
  83         ByteBuffer bbf;
  84         CharBuffer cbf;
  85         if (testDirect) {
  86             bbf = ByteBuffer.allocateDirect(bb.length);
  87             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  88             bbf.put(bb).flip();
  89         } else {
  90             bbf = ByteBuffer.wrap(bb);
  91             cbf = CharBuffer.allocate(bb.length);
  92         }
  93         return dec.decode(bbf, cbf, true);
  94     }
  95 
  96     static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
  97         throws Exception {
  98         ByteBuffer bbf;
  99         CharBuffer cbf;
 100         CharsetEncoder enc = cs.newEncoder();
 101         String csn = cs.name();
 102         if (testDirect) {
 103             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 104             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 105             cbf.put(cc).flip();
 106         } else {
 107             bbf = ByteBuffer.allocate(cc.length * 4);
 108             cbf = CharBuffer.wrap(cc);
 109         }
 110         CoderResult cr = null;
 111         long t1 = System.nanoTime()/1000;
 112         for (int i = 0; i < iteration; i++) {
 113             cbf.rewind();
 114             bbf.clear();
 115             enc.reset();
 116             cr = enc.encode(cbf, bbf, true);
 117         }
 118         long t2 = System.nanoTime()/1000;
 119         if (t != null)
 120         t.t = (t2 - t1)/iteration;
 121         if (cr != CoderResult.UNDERFLOW) {
 122             System.out.println("ENC-----------------");
 123             int pos = cbf.position();
 124             System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
 125                               cr.toString(), pos, cc[pos]&0xffff);
 126             throw new RuntimeException("Encoding err: " + csn);
 127         }
 128         byte[] bb = new byte[bbf.position()];
 129         bbf.flip(); bbf.get(bb);
 130         return bb;
 131     }
 132 
 133     static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
 134         throws Exception {
 135         ByteBuffer bbf;
 136         CharBuffer cbf;
 137         CharsetEncoder enc = cs.newEncoder();
 138         if (testDirect) {
 139             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 140             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 141             cbf.put(cc).flip();
 142         } else {
 143             bbf = ByteBuffer.allocate(cc.length * 4);
 144             cbf = CharBuffer.wrap(cc);
 145         }
 146         return enc.encode(cbf, bbf, true);
 147     }
 148 
 149     static char[] getEUC_TWChars(boolean skipNR) {
 150         //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder();
 151         CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder();
 152         CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder();
 153         char[] cc = new char[0x20000];
 154         char[] c2 = new char[2];
 155         int pos = 0;
 156         int i = 0;
 157         //bmp
 158         for (i = 0; i < 0x10000; i++) {
 159             //SKIP these 3 NR codepoints if compared to EUC_TW
 160             if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9))
 161                 continue;
 162             if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) {
 163                 System.out.printf("  Err i=%x:  old=%b new=%b%n", i,
 164                                   encOLD.canEncode((char)i),
 165                                   enc.canEncode((char)i));
 166                 throw new RuntimeException("canEncode() err!");
 167             }
 168 
 169             if (enc.canEncode((char)i)) {
 170                 cc[pos++] = (char)i;
 171             }
 172         }
 173 
 174         //supp
 175         CharBuffer cb = CharBuffer.wrap(new char[2]);
 176         for (i = 0x20000; i < 0x30000; i++) {
 177             Character.toChars(i, c2, 0);
 178             cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip();
 179 
 180             if (encOLD.canEncode(cb) != enc.canEncode(cb)) {
 181                 throw new RuntimeException("canEncode() err!");
 182             }
 183 
 184             if (enc.canEncode(cb)) {
 185                 //System.out.printf("cp=%x,  (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff);
 186                 cc[pos++] = c2[0];
 187                 cc[pos++] = c2[1];
 188             }
 189         }
 190 
 191         return Arrays.copyOf(cc, pos);
 192     }
 193 
 194     static void checkRoundtrip(Charset cs) throws Exception {
 195         char[] cc = getEUC_TWChars(false);
 196         System.out.printf("Check roundtrip <%s>...", cs.name());
 197         byte[] bb = encode(cc, cs, false, null);
 198         char[] ccO = decode(bb, cs, false, null);
 199 
 200         if (!Arrays.equals(cc, ccO)) {
 201             System.out.printf("    non-direct failed");
 202         }
 203         bb = encode(cc, cs, true, null);
 204         ccO = decode(bb, cs, true, null);
 205         if (!Arrays.equals(cc, ccO)) {
 206             System.out.printf("    (direct) failed");
 207         }
 208         System.out.println();
 209     }
 210 
 211     static void checkInit(String csn) throws Exception {
 212         System.out.printf("Check init <%s>...%n", csn);
 213         Charset.forName("Big5");    // load in the ExtendedCharsets
 214         long t1 = System.nanoTime()/1000;
 215         Charset cs = Charset.forName(csn);
 216         long t2 = System.nanoTime()/1000;
 217         System.out.printf("    charset     :%d%n", t2 - t1);
 218         t1 = System.nanoTime()/1000;
 219             cs.newDecoder();
 220         t2 = System.nanoTime()/1000;
 221         System.out.printf("    new Decoder :%d%n", t2 - t1);
 222 
 223         t1 = System.nanoTime()/1000;
 224             cs.newEncoder();
 225         t2 = System.nanoTime()/1000;
 226         System.out.printf("    new Encoder :%d%n", t2 - t1);
 227     }
 228 
 229     static void compare(Charset cs1, Charset cs2) throws Exception {
 230         char[] cc = getEUC_TWChars(true);
 231 
 232         String csn1 = cs1.name();
 233         String csn2 = cs2.name();
 234         System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
 235 
 236         Time t1 = new Time();
 237         Time t2 = new Time();
 238 
 239         byte[] bb1 = encode(cc, cs1, false, t1);
 240         byte[] bb2 = encode(cc, cs2, false, t2);
 241 
 242         System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
 243                           csn2, csn1, 
 244                           t2.t, t1.t, 
 245                           (double)(t2.t)/(t1.t));
 246         if (!Arrays.equals(bb1, bb2)) {
 247             System.out.printf("        encoding failed%n");
 248         }
 249 
 250         char[] cc2 = decode(bb1, cs2, false, t2);
 251         char[] cc1 = decode(bb1, cs1, false, t1);
 252         System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
 253                           csn2, csn1, 
 254                           t2.t, t1.t,
 255                           (double)(t2.t)/(t1.t));
 256         if (!Arrays.equals(cc1, cc2)) {
 257             System.out.printf("        decoding failed%n");
 258         }
 259 
 260         bb1 = encode(cc, cs1, true, t1);
 261         bb2 = encode(cc, cs2, true, t2);
 262 
 263         System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
 264                           csn2, csn1, 
 265                           t2.t, t1.t,
 266                           (double)(t2.t)/(t1.t));
 267 
 268         if (!Arrays.equals(bb1, bb2))
 269             System.out.printf("        encoding (direct) failed%n");
 270 
 271         cc1 = decode(bb1, cs1, true, t1);
 272         cc2 = decode(bb1, cs2, true, t2);
 273         System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
 274                           csn2, csn1, 
 275                           t2.t, t1.t,
 276                           (double)(t2.t)/(t1.t));
 277         if (!Arrays.equals(cc1, cc2)) {
 278             System.out.printf("        decoding (direct) failed%n");
 279         }
 280     }
 281  
 282     // The first byte is the length of malformed bytes
 283     static byte[][] malformed = {
 284         //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, 
 285     };
 286 
 287     static void checkMalformed(Charset cs) throws Exception {
 288         boolean failed = false;
 289         String csn = cs.name();
 290         System.out.printf("Check malformed <%s>...%n", csn);
 291         for (boolean direct: new boolean[] {false, true}) {
 292             for (byte[] bins : malformed) {
 293                 int mlen = bins[0];
 294                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
 295                 CoderResult cr = decodeCR(bin, cs, direct);
 296                 String ashex = "";
 297                 for (int i = 0; i < bin.length; i++) {
 298                     if (i > 0) ashex += " ";
 299                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
 300                 }
 301                 if (!cr.isMalformed()) {
 302                     System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
 303                     failed = true;
 304                 } else if (cr.length() != mlen) {
 305                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
 306                     failed = true;
 307                 }
 308             }
 309         }
 310         if (failed)
 311             throw new RuntimeException("Check malformed failed " + csn);
 312     }
 313 
 314     static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
 315         int inPos = flow[0];
 316         int inLen = flow[1];
 317         int outPos = flow[2];
 318         int outLen = flow[3];
 319         int expedInPos = flow[4];
 320         int expedOutPos = flow[5];
 321         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
 322                                           :CoderResult.OVERFLOW;
 323         ByteBuffer bbf;
 324         CharBuffer cbf;
 325         if (direct) {
 326             bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
 327             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
 328         } else {
 329             bbf = ByteBuffer.allocate(inPos + bytes.length);
 330             cbf = CharBuffer.allocate(outPos + outLen);
 331         }
 332         bbf.position(inPos);
 333         bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
 334         cbf.position(outPos);
 335         dec.reset();
 336         CoderResult cr = dec.decode(bbf, cbf, false);
 337         if (cr != expedCR ||
 338             bbf.position() != expedInPos ||
 339             cbf.position() != expedOutPos) {
 340             System.out.printf("Expected(direct=%5b): [", direct);
 341             for (int i:flow) System.out.print(" " + i);
 342             System.out.println("]  CR=" + cr +
 343                                ", inPos=" + bbf.position() +
 344                                ", outPos=" + cbf.position());
 345             return false;
 346         }
 347         return true;
 348     }
 349 
 350     static void checkUnderOverflow(Charset cs) throws Exception {
 351         String csn = cs.name();
 352         System.out.printf("Check under/overflow <%s>...%n", csn);
 353         CharsetDecoder dec = cs.newDecoder();
 354         boolean failed = false;
 355         //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
 356         //0   1 2   3         7         11
 357         byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
 358         int    inlen = bytes.length;
 359 
 360         int MAXOFF = 20;
 361         for (int inoff = 0; inoff < MAXOFF; inoff++) {
 362             for (int outoff = 0; outoff < MAXOFF; outoff++) {
 363         int[][] Flows = {
 364             //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
 365             //overflow
 366             {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
 367             {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
 368             {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
 369             {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
 370             {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
 371             {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
 372             //underflow
 373             {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
 374             {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
 375             {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 376             {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 377             {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 378             {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 379             {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 380             {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 381             {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
 382             {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
 383             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
 384             // 2-byte under/overflow
 385             {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
 386             {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
 387             {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 388             // 4-byte  under/overflow
 389             {inoff,  4,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 390             {inoff,  5,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 391             {inoff,  6,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 392             {inoff,  7,     outoff,  2,      inoff + 3,  outoff + 2, 1},
 393             {inoff,  7,     outoff,  3,      inoff + 7,  outoff + 3, 0},
 394             // 4-byte  under/overflow
 395             {inoff,  8,     outoff,  3,      inoff + 7,  outoff + 3, 0},
 396             {inoff,  9,     outoff,  3,      inoff + 7,  outoff + 3, 0},
 397             {inoff, 10,     outoff,  3,      inoff + 7,  outoff + 3, 0},
 398             {inoff, 11,     outoff,  3,      inoff + 7,  outoff + 3, 1},
 399             {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
 400             // 4-byte/supp  under/overflow
 401             {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
 402             {inoff, 12,     outoff,  4,      inoff +11,  outoff + 4, 0},
 403             {inoff, 13,     outoff,  4,      inoff +11,  outoff + 4, 0},
 404             {inoff, 14,     outoff,  4,      inoff +11,  outoff + 4, 0},
 405             {inoff, 15,     outoff,  4,      inoff +11,  outoff + 4, 1},
 406             {inoff, 15,     outoff,  5,      inoff +11,  outoff + 4, 1},
 407             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
 408         };
 409         for (boolean direct: new boolean[] {false, true}) {
 410             for (int[] flow: Flows) {
 411                 if (!check(dec, bytes, direct, flow))
 412                     failed = true;
 413             }
 414         }}}
 415         if (failed)
 416             throw new RuntimeException("Check under/overflow failed " + csn);
 417     }
 418         
 419     public static void main(String[] args) throws Exception {
 420         // be the first one
 421         //checkInit("EUC_TW_OLD");
 422         checkInit("EUC_TW");
 423         Charset euctw = Charset.forName("EUC_TW");
 424         checkRoundtrip(euctw);
 425         compare(euctw, new EUC_TW_OLD());
 426         checkMalformed(euctw);
 427         checkUnderOverflow(euctw);
 428     }
 429 }
 430