1 /*
   2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 6831794 6229811
  27  * @summary Test EUC_TW charset
  28  * @modules java.base/sun.nio.cs
  29  */
  30 
  31 import java.nio.charset.*;
  32 import java.nio.*;
  33 import java.util.*;
  34 
  35 public class TestEUC_TW {
  36 
  37     static class Time {
  38         long t;
  39     }
  40     static int iteration = 100;
  41 
  42     static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
  43         throws Exception {
  44         String csn = cs.name();
  45         CharsetDecoder dec = cs.newDecoder();
  46         ByteBuffer bbf;
  47         CharBuffer cbf;
  48         if (testDirect) {
  49             bbf = ByteBuffer.allocateDirect(bb.length);
  50             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  51             bbf.put(bb);
  52         } else {
  53             bbf = ByteBuffer.wrap(bb);
  54             cbf = CharBuffer.allocate(bb.length);
  55         }
  56         CoderResult cr = null;
  57         long t1 = System.nanoTime()/1000;
  58         for (int i = 0; i < iteration; i++) {
  59             bbf.rewind();
  60             cbf.clear();
  61             dec.reset();
  62             cr = dec.decode(bbf, cbf, true);
  63         }
  64         long t2 = System.nanoTime()/1000;
  65         if (t != null)
  66         t.t = (t2 - t1)/iteration;
  67         if (cr != CoderResult.UNDERFLOW) {
  68             System.out.println("DEC-----------------");
  69             int pos = bbf.position();
  70             System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
  71                               cr.toString(), pos,
  72                               bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
  73             throw new RuntimeException("Decoding err: " + csn);
  74         }
  75         char[] cc = new char[cbf.position()];
  76         cbf.flip(); cbf.get(cc);
  77         return cc;
  78 
  79     }
  80 
  81     static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
  82         throws Exception {
  83         CharsetDecoder dec = cs.newDecoder();
  84         ByteBuffer bbf;
  85         CharBuffer cbf;
  86         if (testDirect) {
  87             bbf = ByteBuffer.allocateDirect(bb.length);
  88             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  89             bbf.put(bb).flip();
  90         } else {
  91             bbf = ByteBuffer.wrap(bb);
  92             cbf = CharBuffer.allocate(bb.length);
  93         }
  94         return dec.decode(bbf, cbf, true);
  95     }
  96 
  97     static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
  98         throws Exception {
  99         ByteBuffer bbf;
 100         CharBuffer cbf;
 101         CharsetEncoder enc = cs.newEncoder();
 102         String csn = cs.name();
 103         if (testDirect) {
 104             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 105             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 106             cbf.put(cc).flip();
 107         } else {
 108             bbf = ByteBuffer.allocate(cc.length * 4);
 109             cbf = CharBuffer.wrap(cc);
 110         }
 111         CoderResult cr = null;
 112         long t1 = System.nanoTime()/1000;
 113         for (int i = 0; i < iteration; i++) {
 114             cbf.rewind();
 115             bbf.clear();
 116             enc.reset();
 117             cr = enc.encode(cbf, bbf, true);
 118         }
 119         long t2 = System.nanoTime()/1000;
 120         if (t != null)
 121         t.t = (t2 - t1)/iteration;
 122         if (cr != CoderResult.UNDERFLOW) {
 123             System.out.println("ENC-----------------");
 124             int pos = cbf.position();
 125             System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
 126                               cr.toString(), pos, cc[pos]&0xffff);
 127             throw new RuntimeException("Encoding err: " + csn);
 128         }
 129         byte[] bb = new byte[bbf.position()];
 130         bbf.flip(); bbf.get(bb);
 131         return bb;
 132     }
 133 
 134     static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
 135         throws Exception {
 136         ByteBuffer bbf;
 137         CharBuffer cbf;
 138         CharsetEncoder enc = cs.newEncoder();
 139         if (testDirect) {
 140             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 141             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 142             cbf.put(cc).flip();
 143         } else {
 144             bbf = ByteBuffer.allocate(cc.length * 4);
 145             cbf = CharBuffer.wrap(cc);
 146         }
 147         return enc.encode(cbf, bbf, true);
 148     }
 149 
 150     static char[] getEUC_TWChars(boolean skipNR) {
 151         //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder();
 152         CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder();
 153         CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder();
 154         char[] cc = new char[0x20000];
 155         char[] c2 = new char[2];
 156         int pos = 0;
 157         int i = 0;
 158         //bmp
 159         for (i = 0; i < 0x10000; i++) {
 160             //SKIP these 3 NR codepoints if compared to EUC_TW
 161             if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9))
 162                 continue;
 163             if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) {
 164                 System.out.printf("  Err i=%x:  old=%b new=%b%n", i,
 165                                   encOLD.canEncode((char)i),
 166                                   enc.canEncode((char)i));
 167                 throw new RuntimeException("canEncode() err!");
 168             }
 169 
 170             if (enc.canEncode((char)i)) {
 171                 cc[pos++] = (char)i;
 172             }
 173         }
 174 
 175         //supp
 176         CharBuffer cb = CharBuffer.wrap(new char[2]);
 177         for (i = 0x20000; i < 0x30000; i++) {
 178             Character.toChars(i, c2, 0);
 179             cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip();
 180 
 181             if (encOLD.canEncode(cb) != enc.canEncode(cb)) {
 182                 throw new RuntimeException("canEncode() err!");
 183             }
 184 
 185             if (enc.canEncode(cb)) {
 186                 //System.out.printf("cp=%x,  (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff);
 187                 cc[pos++] = c2[0];
 188                 cc[pos++] = c2[1];
 189             }
 190         }
 191 
 192         return Arrays.copyOf(cc, pos);
 193     }
 194 
 195     static void checkRoundtrip(Charset cs) throws Exception {
 196         char[] cc = getEUC_TWChars(false);
 197         System.out.printf("Check roundtrip <%s>...", cs.name());
 198         byte[] bb = encode(cc, cs, false, null);
 199         char[] ccO = decode(bb, cs, false, null);
 200 
 201         if (!Arrays.equals(cc, ccO)) {
 202             System.out.printf("    non-direct failed");
 203         }
 204         bb = encode(cc, cs, true, null);
 205         ccO = decode(bb, cs, true, null);
 206         if (!Arrays.equals(cc, ccO)) {
 207             System.out.printf("    (direct) failed");
 208         }
 209         System.out.println();
 210     }
 211 
 212     static void checkInit(String csn) throws Exception {
 213         System.out.printf("Check init <%s>...%n", csn);
 214         Charset.forName("Big5");    // load in the ExtendedCharsets
 215         long t1 = System.nanoTime()/1000;
 216         Charset cs = Charset.forName(csn);
 217         long t2 = System.nanoTime()/1000;
 218         System.out.printf("    charset     :%d%n", t2 - t1);
 219         t1 = System.nanoTime()/1000;
 220             cs.newDecoder();
 221         t2 = System.nanoTime()/1000;
 222         System.out.printf("    new Decoder :%d%n", t2 - t1);
 223 
 224         t1 = System.nanoTime()/1000;
 225             cs.newEncoder();
 226         t2 = System.nanoTime()/1000;
 227         System.out.printf("    new Encoder :%d%n", t2 - t1);
 228     }
 229 
 230     static void compare(Charset cs1, Charset cs2) throws Exception {
 231         char[] cc = getEUC_TWChars(true);
 232 
 233         String csn1 = cs1.name();
 234         String csn2 = cs2.name();
 235         System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
 236 
 237         Time t1 = new Time();
 238         Time t2 = new Time();
 239 
 240         byte[] bb1 = encode(cc, cs1, false, t1);
 241         byte[] bb2 = encode(cc, cs2, false, t2);
 242 
 243         System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
 244                           csn2, csn1,
 245                           t2.t, t1.t,
 246                           (double)(t2.t)/(t1.t));
 247         if (!Arrays.equals(bb1, bb2)) {
 248             System.out.printf("        encoding failed%n");
 249         }
 250 
 251         char[] cc2 = decode(bb1, cs2, false, t2);
 252         char[] cc1 = decode(bb1, cs1, false, t1);
 253         System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
 254                           csn2, csn1,
 255                           t2.t, t1.t,
 256                           (double)(t2.t)/(t1.t));
 257         if (!Arrays.equals(cc1, cc2)) {
 258             System.out.printf("        decoding failed%n");
 259         }
 260 
 261         bb1 = encode(cc, cs1, true, t1);
 262         bb2 = encode(cc, cs2, true, t2);
 263 
 264         System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
 265                           csn2, csn1,
 266                           t2.t, t1.t,
 267                           (double)(t2.t)/(t1.t));
 268 
 269         if (!Arrays.equals(bb1, bb2))
 270             System.out.printf("        encoding (direct) failed%n");
 271 
 272         cc1 = decode(bb1, cs1, true, t1);
 273         cc2 = decode(bb1, cs2, true, t2);
 274         System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
 275                           csn2, csn1,
 276                           t2.t, t1.t,
 277                           (double)(t2.t)/(t1.t));
 278         if (!Arrays.equals(cc1, cc2)) {
 279             System.out.printf("        decoding (direct) failed%n");
 280         }
 281     }
 282 
 283     // The first byte is the length of malformed bytes
 284     static byte[][] malformed = {
 285         //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 286     };
 287 
 288     static void checkMalformed(Charset cs) throws Exception {
 289         boolean failed = false;
 290         String csn = cs.name();
 291         System.out.printf("Check malformed <%s>...%n", csn);
 292         for (boolean direct: new boolean[] {false, true}) {
 293             for (byte[] bins : malformed) {
 294                 int mlen = bins[0];
 295                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
 296                 CoderResult cr = decodeCR(bin, cs, direct);
 297                 String ashex = "";
 298                 for (int i = 0; i < bin.length; i++) {
 299                     if (i > 0) ashex += " ";
 300                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
 301                 }
 302                 if (!cr.isMalformed()) {
 303                     System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
 304                     failed = true;
 305                 } else if (cr.length() != mlen) {
 306                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
 307                     failed = true;
 308                 }
 309             }
 310         }
 311         if (failed)
 312             throw new RuntimeException("Check malformed failed " + csn);
 313     }
 314 
 315     static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
 316         int inPos = flow[0];
 317         int inLen = flow[1];
 318         int outPos = flow[2];
 319         int outLen = flow[3];
 320         int expedInPos = flow[4];
 321         int expedOutPos = flow[5];
 322         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
 323                                           :CoderResult.OVERFLOW;
 324         ByteBuffer bbf;
 325         CharBuffer cbf;
 326         if (direct) {
 327             bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
 328             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
 329         } else {
 330             bbf = ByteBuffer.allocate(inPos + bytes.length);
 331             cbf = CharBuffer.allocate(outPos + outLen);
 332         }
 333         bbf.position(inPos);
 334         bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
 335         cbf.position(outPos);
 336         dec.reset();
 337         CoderResult cr = dec.decode(bbf, cbf, false);
 338         if (cr != expedCR ||
 339             bbf.position() != expedInPos ||
 340             cbf.position() != expedOutPos) {
 341             System.out.printf("Expected(direct=%5b): [", direct);
 342             for (int i:flow) System.out.print(" " + i);
 343             System.out.println("]  CR=" + cr +
 344                                ", inPos=" + bbf.position() +
 345                                ", outPos=" + cbf.position());
 346             return false;
 347         }
 348         return true;
 349     }
 350 
 351     static void checkUnderOverflow(Charset cs) throws Exception {
 352         String csn = cs.name();
 353         System.out.printf("Check under/overflow <%s>...%n", csn);
 354         CharsetDecoder dec = cs.newDecoder();
 355         boolean failed = false;
 356         //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
 357         //0   1 2   3         7         11
 358         byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
 359         int    inlen = bytes.length;
 360 
 361         int MAXOFF = 20;
 362         for (int inoff = 0; inoff < MAXOFF; inoff++) {
 363             for (int outoff = 0; outoff < MAXOFF; outoff++) {
 364         int[][] Flows = {
 365             //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
 366             //overflow
 367             {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
 368             {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
 369             {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
 370             {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
 371             {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
 372             {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
 373             //underflow
 374             {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
 375             {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
 376             {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 377             {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 378             {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 379             {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 380             {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 381             {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 382             {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
 383             {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
 384             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
 385             // 2-byte under/overflow
 386             {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
 387             {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
 388             {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 389             // 4-byte  under/overflow
 390             {inoff,  4,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 391             {inoff,  5,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 392             {inoff,  6,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 393             {inoff,  7,     outoff,  2,      inoff + 3,  outoff + 2, 1},
 394             {inoff,  7,     outoff,  3,      inoff + 7,  outoff + 3, 0},
 395             // 4-byte  under/overflow
 396             {inoff,  8,     outoff,  3,      inoff + 7,  outoff + 3, 0},
 397             {inoff,  9,     outoff,  3,      inoff + 7,  outoff + 3, 0},
 398             {inoff, 10,     outoff,  3,      inoff + 7,  outoff + 3, 0},
 399             {inoff, 11,     outoff,  3,      inoff + 7,  outoff + 3, 1},
 400             {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
 401             // 4-byte/supp  under/overflow
 402             {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
 403             {inoff, 12,     outoff,  4,      inoff +11,  outoff + 4, 0},
 404             {inoff, 13,     outoff,  4,      inoff +11,  outoff + 4, 0},
 405             {inoff, 14,     outoff,  4,      inoff +11,  outoff + 4, 0},
 406             {inoff, 15,     outoff,  4,      inoff +11,  outoff + 4, 1},
 407             {inoff, 15,     outoff,  5,      inoff +11,  outoff + 4, 1},
 408             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
 409         };
 410         for (boolean direct: new boolean[] {false, true}) {
 411             for (int[] flow: Flows) {
 412                 if (!check(dec, bytes, direct, flow))
 413                     failed = true;
 414             }
 415         }}}
 416         if (failed)
 417             throw new RuntimeException("Check under/overflow failed " + csn);
 418     }
 419 
 420     public static void main(String[] args) throws Exception {
 421         // be the first one
 422         //checkInit("EUC_TW_OLD");
 423         checkInit("EUC_TW");
 424         Charset euctw = Charset.forName("EUC_TW");
 425         checkRoundtrip(euctw);
 426         compare(euctw, new EUC_TW_OLD());
 427         checkMalformed(euctw);
 428         checkUnderOverflow(euctw);
 429     }
 430 }