1 /*
   2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 6843578
  27  * @summary Test IBM DB charsets
  28  * @build IBM930_OLD IBM933_OLD IBM935_OLD IBM937_OLD IBM939_OLD IBM942_OLD IBM943_OLD IBM948_OLD IBM949_OLD IBM950_OLD IBM970_OLD IBM942C_OLD IBM943C_OLD IBM949C_OLD IBM1381_OLD IBM1383_OLD EUC_CN_OLD EUC_KR_OLD GBK_OLD Johab_OLD MS932_OLD MS936_OLD MS949_OLD MS950_OLD
  29  */
  30 
  31 import java.nio.charset.*;
  32 import java.nio.*;
  33 import java.util.*;
  34 
  35 public class TestIBMDB {
  36     static class Time {
  37         long t;
  38     }
  39     static int iteration = 200;
  40 
  41     static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
  42         throws Exception {
  43         String csn = cs.name();
  44         CharsetDecoder dec = cs.newDecoder();
  45         ByteBuffer bbf;
  46         CharBuffer cbf;
  47         if (testDirect) {
  48             bbf = ByteBuffer.allocateDirect(bb.length);
  49             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  50             bbf.put(bb);
  51         } else {
  52             bbf = ByteBuffer.wrap(bb);
  53             cbf = CharBuffer.allocate(bb.length);
  54         }
  55         CoderResult cr = null;
  56         long t1 = System.nanoTime()/1000;
  57         for (int i = 0; i < iteration; i++) {
  58             bbf.rewind();
  59             cbf.clear();
  60             dec.reset();
  61             cr = dec.decode(bbf, cbf, true);
  62         }
  63         long t2 = System.nanoTime()/1000;
  64         t.t = (t2 - t1)/iteration;
  65         if (cr != CoderResult.UNDERFLOW) {
  66             System.out.println("DEC-----------------");
  67             int pos = bbf.position();
  68             System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
  69                               cr.toString(), pos,
  70                               bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
  71             throw new RuntimeException("Decoding err: " + csn);
  72         }
  73         char[] cc = new char[cbf.position()];
  74         cbf.flip(); cbf.get(cc);
  75         return cc;
  76 
  77     }
  78 
  79     static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
  80         throws Exception {
  81         CharsetDecoder dec = cs.newDecoder();
  82         ByteBuffer bbf;
  83         CharBuffer cbf;
  84         if (testDirect) {
  85             bbf = ByteBuffer.allocateDirect(bb.length);
  86             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  87             bbf.put(bb).flip();
  88         } else {
  89             bbf = ByteBuffer.wrap(bb);
  90             cbf = CharBuffer.allocate(bb.length);
  91         }
  92         CoderResult cr = null;
  93         for (int i = 0; i < iteration; i++) {
  94             bbf.rewind();
  95             cbf.clear();
  96             dec.reset();
  97             cr = dec.decode(bbf, cbf, true);
  98         }
  99         return cr;
 100     }
 101 
 102     static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
 103         throws Exception {
 104         ByteBuffer bbf;
 105         CharBuffer cbf;
 106         CharsetEncoder enc = cs.newEncoder();
 107         String csn = cs.name();
 108         if (testDirect) {
 109             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 110             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 111             cbf.put(cc).flip();
 112         } else {
 113             bbf = ByteBuffer.allocate(cc.length * 4);
 114             cbf = CharBuffer.wrap(cc);
 115         }
 116         CoderResult cr = null;
 117         long t1 = System.nanoTime()/1000;
 118         for (int i = 0; i < iteration; i++) {
 119             cbf.rewind();
 120             bbf.clear();
 121             enc.reset();
 122             cr = enc.encode(cbf, bbf, true);
 123         }
 124         long t2 = System.nanoTime()/1000;
 125         t.t = (t2 - t1)/iteration;
 126         if (cr != CoderResult.UNDERFLOW) {
 127             System.out.println("ENC-----------------");
 128             int pos = cbf.position();
 129             System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
 130                               cr.toString(), pos, cc[pos]&0xffff);
 131             throw new RuntimeException("Encoding err: " + csn);
 132         }
 133         byte[] bb = new byte[bbf.position()];
 134         bbf.flip(); bbf.get(bb);
 135         return bb;
 136     }
 137 
 138     static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
 139         throws Exception {
 140         ByteBuffer bbf;
 141         CharBuffer cbf;
 142         CharsetEncoder enc = cs.newEncoder();
 143         if (testDirect) {
 144             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 145             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 146             cbf.put(cc).flip();
 147         } else {
 148             bbf = ByteBuffer.allocate(cc.length * 4);
 149             cbf = CharBuffer.wrap(cc);
 150         }
 151         CoderResult cr = null;
 152         for (int i = 0; i < iteration; i++) {
 153             cbf.rewind();
 154             bbf.clear();
 155             enc.reset();
 156             cr = enc.encode(cbf, bbf, true);
 157         }
 158         return cr;
 159     }
 160 
 161     static void printEntry(char c, Charset cs) {
 162         byte[] bb = new String(new char[] {c}).getBytes(cs);
 163         for (byte b:bb)
 164             System.out.printf("%x", b&0xff);
 165         System.out.printf("    %x", c & 0xffff);
 166         String s2 = new String(bb, cs);
 167         System.out.printf("    %x%n", s2.charAt(0) & 0xffff);
 168     }
 169 
 170     // check and compare canEncoding/Encoding
 171     static char[] checkEncoding(Charset oldCS, Charset newCS)
 172         throws Exception {
 173         System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name());
 174         CharsetEncoder encOLD = oldCS.newEncoder();
 175         CharsetEncoder encNew = newCS.newEncoder();
 176         char[] cc = new char[0x10000];
 177         int pos = 0;
 178         boolean is970 = "x-IBM970-Old".equals(oldCS.name());
 179 
 180         for (char c = 0; c < 0xffff; c++) {
 181             boolean canOld = encOLD.canEncode(c);
 182             boolean canNew = encNew.canEncode(c);
 183 
 184             if (is970 && c == 0x2299)
 185                 continue;
 186 
 187             if (canOld != canNew) {
 188                 if (canNew) {
 189                     System.out.printf("      NEW(only): ");
 190                     printEntry(c, newCS);
 191                 } else {
 192                     if (is970) {
 193                         byte[] bb = new String(new char[] {c}).getBytes(oldCS);
 194                         if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) {
 195                         // we know 970 has bogus nnnn -> a2c1 -> 2299
 196                             continue;
 197                         }
 198                     }
 199                     System.out.printf("      OLD(only): ");
 200                     printEntry(c, oldCS);
 201                 }
 202             } else if (canNew) {
 203                 byte[] bbNew = new String(new char[] {c}).getBytes(newCS);
 204                 byte[] bbOld = new String(new char[] {c}).getBytes(oldCS);
 205                 if (!Arrays.equals(bbNew, bbOld)) {
 206                     System.out.printf("      c->b NEW: ");
 207                     printEntry(c, newCS);
 208                     System.out.printf("      c->b OLD: ");
 209                     printEntry(c, oldCS);
 210                 } else {
 211                     String sNew = new String(bbNew, newCS);
 212                     String sOld = new String(bbOld, oldCS);
 213                     if (!sNew.equals(sOld)) {
 214                         System.out.printf("      b2c NEW (c=%x):", c&0xffff);
 215                         printEntry(sNew.charAt(0), newCS);
 216                         System.out.printf("      b2c OLD:");
 217                         printEntry(sOld.charAt(0), oldCS);
 218                     }
 219                 }
 220             }
 221             if (canNew & canOld) {  // added only both for now
 222                 cc[pos++] = c;
 223             }
 224         }
 225         return Arrays.copyOf(cc, pos);
 226     }
 227 
 228 
 229     // check and compare canEncoding/Encoding
 230     static void checkDecoding(Charset oldCS, Charset newCS)
 231         throws Exception
 232     {
 233         System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name());
 234         boolean isEBCDIC = oldCS.name().startsWith("x-IBM93");
 235 
 236         //Try singlebyte first
 237         byte[] bb = new byte[1];
 238         System.out.printf("       trying SB...%n");
 239         for (int b = 0; b < 0x100; b++) {
 240             bb[0] = (byte)b;
 241             String sOld = new String(bb, oldCS);
 242             String sNew = new String(bb, newCS);
 243             if (!sOld.equals(sNew)) {
 244                 System.out.printf("        b=%x:  %x/%d(old)  %x/%d(new)%n",
 245                                   b& 0xff,
 246                                   sOld.charAt(0) & 0xffff, sOld.length(),
 247                                   sNew.charAt(0) & 0xffff, sNew.length());
 248             }
 249         }
 250 
 251         System.out.printf("       trying DB...%n");
 252         bb = new byte[isEBCDIC?4:2];
 253         int b1Min = 0x40;
 254         int b1Max = 0xfe;
 255         for (int b1 = 0x40; b1 < 0xff; b1++) {
 256             if (!isEBCDIC) {
 257                 // decodable singlebyte b1
 258                 bb[0] = (byte)b1;
 259                 String sOld = new String(bb, oldCS);
 260                 String sNew = new String(bb, newCS);
 261                 if (!sOld.equals(sNew)) {
 262                     if (sOld.length() != 2 && sOld.charAt(0) != 0) {
 263                         // only prints we are NOT expected. above two are known issue
 264                         System.out.printf("        b1=%x:  %x/%d(old)  %x/%d(new)%n",
 265                                           b1 & 0xff,
 266                                           sOld.charAt(0) & 0xffff, sOld.length(),
 267                                           sNew.charAt(0) & 0xffff, sNew.length());
 268                         continue;
 269                     }
 270                 }
 271             }
 272             for (int b2 = 0x40; b2 < 0xff; b2++) {
 273                 if (isEBCDIC) {
 274                     bb[0] = 0x0e;
 275                     bb[1] = (byte)b1;
 276                     bb[2] = (byte)b2;
 277                     bb[3] = 0x0f;
 278                 } else {
 279                     bb[0] = (byte)b1;
 280                     bb[1] = (byte)b2;
 281                 }
 282                 String sOld = new String(bb, oldCS);
 283                 String sNew = new String(bb, newCS);
 284                 //if (!sOld.equals(sNew)) {
 285                 if (sOld.charAt(0) != sNew.charAt(0)) {
 286 
 287 if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd)
 288     continue; // known issude in old implementation
 289 
 290                     System.out.printf("        bb=<%x,%x>  c(old)=%x,  c(new)=%x%n",
 291                         b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff);
 292                 }
 293             }
 294         }
 295     }
 296 
 297     static void checkInit(String csn) throws Exception {
 298         System.out.printf("Check init <%s>...%n", csn);
 299         Charset.forName("Big5");    // load in the ExtendedCharsets
 300         long t1 = System.nanoTime()/1000;
 301         Charset cs = Charset.forName(csn);
 302         long t2 = System.nanoTime()/1000;
 303         System.out.printf("    charset     :%d%n", t2 - t1);
 304         t1 = System.nanoTime()/1000;
 305             cs.newDecoder();
 306         t2 = System.nanoTime()/1000;
 307         System.out.printf("    new Decoder :%d%n", t2 - t1);
 308 
 309         t1 = System.nanoTime()/1000;
 310             cs.newEncoder();
 311         t2 = System.nanoTime()/1000;
 312         System.out.printf("    new Encoder :%d%n", t2 - t1);
 313     }
 314 
 315     static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception {
 316         System.gc();    // enqueue finalizable objects
 317         Thread.sleep(1000);
 318         System.gc();    // enqueue finalizable objects
 319 
 320         String csn1 = cs1.name();
 321         String csn2 = cs2.name();
 322         System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
 323 
 324         Time t1 = new Time();
 325         Time t2 = new Time();
 326 
 327         byte[] bb1 = encode(cc, cs1, false, t1);
 328         byte[] bb2 = encode(cc, cs2, false, t2);
 329 
 330         System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
 331                           csn2, csn1,
 332                           t2.t, t1.t,
 333                           (double)(t2.t)/(t1.t));
 334         if (!Arrays.equals(bb1, bb2)) {
 335             System.out.printf("        encoding failed%n");
 336         }
 337 
 338         char[] cc2 = decode(bb1, cs2, false, t2);
 339         char[] cc1 = decode(bb1, cs1, false, t1);
 340         System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
 341                           csn2, csn1,
 342                           t2.t, t1.t,
 343                           (double)(t2.t)/(t1.t));
 344         if (!Arrays.equals(cc1, cc2)) {
 345             System.out.printf("        decoding failed%n");
 346         }
 347 
 348         bb1 = encode(cc, cs1, true, t1);
 349         bb2 = encode(cc, cs2, true, t2);
 350 
 351         System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
 352                           csn2, csn1,
 353                           t2.t, t1.t,
 354                           (double)(t2.t)/(t1.t));
 355 
 356         if (!Arrays.equals(bb1, bb2))
 357             System.out.printf("        encoding (direct) failed%n");
 358 
 359         cc1 = decode(bb1, cs1, true, t1);
 360         cc2 = decode(bb1, cs2, true, t2);
 361         System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
 362                           csn2, csn1,
 363                           t2.t, t1.t,
 364                           (double)(t2.t)/(t1.t));
 365         if (!Arrays.equals(cc1, cc2)) {
 366             System.out.printf("        decoding (direct) failed%n");
 367         }
 368     }
 369 
 370     /* The first byte is the length of malformed bytes
 371         byte[][] malformed = {
 372             {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 373         };
 374     */
 375 
 376     static void checkMalformed(Charset cs, byte[][] malformed)
 377         throws Exception
 378     {
 379         boolean failed = false;
 380         String csn = cs.name();
 381         System.out.printf("Check malformed <%s>...%n", csn);
 382         for (boolean direct: new boolean[] {false, true}) {
 383             for (byte[] bins : malformed) {
 384                 int mlen = bins[0];
 385                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
 386                 CoderResult cr = decodeCR(bin, cs, direct);
 387                 String ashex = "";
 388                 for (int i = 0; i < bin.length; i++) {
 389                     if (i > 0) ashex += " ";
 390                         ashex += Integer.toString((int)bin[i] & 0xff, 16);
 391                 }
 392                 if (!cr.isMalformed()) {
 393                     System.out.printf("        FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString());
 394                     failed = true;
 395                 } else if (cr.length() != mlen) {
 396                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
 397                     failed = true;
 398                 }
 399             }
 400         }
 401         if (failed)
 402             throw new RuntimeException("Check malformed failed " + csn);
 403     }
 404 
 405     static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
 406         int inPos = flow[0];
 407         int inLen = flow[1];
 408         int outPos = flow[2];
 409         int outLen = flow[3];
 410         int expedInPos = flow[4];
 411         int expedOutPos = flow[5];
 412         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
 413                                           :CoderResult.OVERFLOW;
 414         ByteBuffer bbf;
 415         CharBuffer cbf;
 416         if (direct) {
 417             bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
 418             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
 419         } else {
 420             bbf = ByteBuffer.allocate(inPos + bytes.length);
 421             cbf = CharBuffer.allocate(outPos + outLen);
 422         }
 423         bbf.position(inPos);
 424         bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
 425         cbf.position(outPos);
 426         dec.reset();
 427         CoderResult cr = dec.decode(bbf, cbf, false);
 428         if (cr != expedCR ||
 429             bbf.position() != expedInPos ||
 430             cbf.position() != expedOutPos) {
 431             System.out.printf("Expected(direct=%5b): [", direct);
 432             for (int i:flow) System.out.print(" " + i);
 433             System.out.println("]  CR=" + cr +
 434                                ", inPos=" + bbf.position() +
 435                                ", outPos=" + cbf.position());
 436             return false;
 437         }
 438         return true;
 439     }
 440 
 441     static void checkUnderOverflow(Charset cs) throws Exception {
 442         String csn = cs.name();
 443         System.out.printf("Check under/overflow <%s>...%n", csn);
 444         CharsetDecoder dec = cs.newDecoder();
 445         boolean failed = false;
 446 
 447         //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
 448         //0   1 2   3         7         11
 449         byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
 450         int    inlen = bytes.length;
 451 
 452         int MAXOFF = 20;
 453         for (int inoff = 0; inoff < MAXOFF; inoff++) {
 454             for (int outoff = 0; outoff < MAXOFF; outoff++) {
 455         int[][] Flows = {
 456             //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
 457             //overflow
 458             {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
 459             {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
 460             {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
 461             {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
 462             {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
 463             {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
 464             //underflow
 465             {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
 466             {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
 467             {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 468             {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 469             {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 470             {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 471             {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 472             {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 473             {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
 474             {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
 475             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
 476             // 2-byte under/overflow
 477             {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
 478             {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
 479             {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 480         };
 481         for (boolean direct: new boolean[] {false, true}) {
 482             for (int[] flow: Flows) {
 483                 if (!check(dec, bytes, direct, flow))
 484                     failed = true;
 485             }
 486         }}}
 487         if (failed)
 488             throw new RuntimeException("Check under/overflow failed " + csn);
 489     }
 490 
 491     static String[] csnames = new String[] {
 492         "IBM930",
 493         "IBM933",
 494         "IBM935",
 495         "IBM937",
 496         "IBM939",
 497         "IBM942",
 498         "IBM943",
 499         "IBM948",
 500         "IBM949",
 501         "IBM950",
 502         "IBM970",
 503         "IBM942C",
 504         "IBM943C",
 505         "IBM949C",
 506         "IBM1381",
 507         "IBM1383",
 508 
 509         "EUC_CN",
 510         "EUC_KR",
 511         "GBK",
 512         "Johab",
 513         "MS932",
 514         "MS936",
 515         "MS949",
 516         "MS950",
 517     };
 518 
 519     public static void main(String[] args) throws Exception {
 520         for (String csname: csnames) {
 521             System.out.printf("-----------------------------------%n");
 522             String oldname = csname + "_OLD";
 523             checkInit(csname);
 524             Charset csOld = (Charset)Class.forName(oldname).newInstance();
 525             Charset csNew = Charset.forName(csname);
 526             char[] cc = checkEncoding(csOld, csNew);
 527             checkDecoding(csOld, csNew);
 528             compare(csNew, csOld, cc);
 529 
 530             if (csname.startsWith("x-IBM93")) {
 531                 //ecdbic
 532                 checkMalformed(csNew, new byte[][] {
 533                     {1, 0x26, 0x0f, 0x27},         // in SBSC, no SI
 534                     {1, 0x0e, 0x41, 0x41, 0xe},    // in DBSC, no SO
 535                     {2, 0x0e, 0x40, 0x41, 0xe},    // illegal DB
 536                 });
 537             } else if (csname.equals("x-IBM970") ||
 538                        csname.equals("x-IBM1383")) {
 539                 //euc_simple
 540                 checkMalformed(csNew, new byte[][] {
 541                     {1, 0x26, (byte)0x8f, 0x27},                   // SS2
 542                     {1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51}, // SS3
 543                 });
 544             }
 545         }
 546     }
 547 }