1 /*
   2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /*
  25  * @test
  26  * @bug 6843578
  27  * @summary Test IBM DB charsets
  28  * @build IBM930_OLD IBM933_OLD IBM935_OLD IBM937_OLD IBM939_OLD IBM942_OLD IBM943_OLD IBM948_OLD IBM949_OLD IBM950_OLD IBM970_OLD IBM942C_OLD IBM943C_OLD IBM949C_OLD IBM1381_OLD IBM1383_OLD EUC_CN_OLD EUC_KR_OLD GBK_OLD Johab_OLD MS932_OLD MS936_OLD MS949_OLD MS950_OLD
  29  * @run main TestIBMDB
  30  */
  31 
  32 import java.nio.charset.*;
  33 import java.nio.*;
  34 import java.util.*;
  35 
  36 public class TestIBMDB {
  37     static class Time {
  38         long t;
  39     }
  40     static int iteration = 200;
  41 
  42     static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
  43         throws Exception {
  44         String csn = cs.name();
  45         CharsetDecoder dec = cs.newDecoder();
  46         ByteBuffer bbf;
  47         CharBuffer cbf;
  48         if (testDirect) {
  49             bbf = ByteBuffer.allocateDirect(bb.length);
  50             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  51             bbf.put(bb);
  52         } else {
  53             bbf = ByteBuffer.wrap(bb);
  54             cbf = CharBuffer.allocate(bb.length);
  55         }
  56         CoderResult cr = null;
  57         long t1 = System.nanoTime()/1000;
  58         for (int i = 0; i < iteration; i++) {
  59             bbf.rewind();
  60             cbf.clear();
  61             dec.reset();
  62             cr = dec.decode(bbf, cbf, true);
  63         }
  64         long t2 = System.nanoTime()/1000;
  65         t.t = (t2 - t1)/iteration;
  66         if (cr != CoderResult.UNDERFLOW) {
  67             System.out.println("DEC-----------------");
  68             int pos = bbf.position();
  69             System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
  70                               cr.toString(), pos,
  71                               bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
  72             throw new RuntimeException("Decoding err: " + csn);
  73         }
  74         char[] cc = new char[cbf.position()];
  75         cbf.flip(); cbf.get(cc);
  76         return cc;
  77 
  78     }
  79 
  80     static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
  81         throws Exception {
  82         CharsetDecoder dec = cs.newDecoder();
  83         ByteBuffer bbf;
  84         CharBuffer cbf;
  85         if (testDirect) {
  86             bbf = ByteBuffer.allocateDirect(bb.length);
  87             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
  88             bbf.put(bb).flip();
  89         } else {
  90             bbf = ByteBuffer.wrap(bb);
  91             cbf = CharBuffer.allocate(bb.length);
  92         }
  93         CoderResult cr = null;
  94         for (int i = 0; i < iteration; i++) {
  95             bbf.rewind();
  96             cbf.clear();
  97             dec.reset();
  98             cr = dec.decode(bbf, cbf, true);
  99         }
 100         return cr;
 101     }
 102 
 103     static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
 104         throws Exception {
 105         ByteBuffer bbf;
 106         CharBuffer cbf;
 107         CharsetEncoder enc = cs.newEncoder();
 108         String csn = cs.name();
 109         if (testDirect) {
 110             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 111             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 112             cbf.put(cc).flip();
 113         } else {
 114             bbf = ByteBuffer.allocate(cc.length * 4);
 115             cbf = CharBuffer.wrap(cc);
 116         }
 117         CoderResult cr = null;
 118         long t1 = System.nanoTime()/1000;
 119         for (int i = 0; i < iteration; i++) {
 120             cbf.rewind();
 121             bbf.clear();
 122             enc.reset();
 123             cr = enc.encode(cbf, bbf, true);
 124         }
 125         long t2 = System.nanoTime()/1000;
 126         t.t = (t2 - t1)/iteration;
 127         if (cr != CoderResult.UNDERFLOW) {
 128             System.out.println("ENC-----------------");
 129             int pos = cbf.position();
 130             System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
 131                               cr.toString(), pos, cc[pos]&0xffff);
 132             throw new RuntimeException("Encoding err: " + csn);
 133         }
 134         byte[] bb = new byte[bbf.position()];
 135         bbf.flip(); bbf.get(bb);
 136         return bb;
 137     }
 138 
 139     static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
 140         throws Exception {
 141         ByteBuffer bbf;
 142         CharBuffer cbf;
 143         CharsetEncoder enc = cs.newEncoder();
 144         if (testDirect) {
 145             bbf = ByteBuffer.allocateDirect(cc.length * 4);
 146             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
 147             cbf.put(cc).flip();
 148         } else {
 149             bbf = ByteBuffer.allocate(cc.length * 4);
 150             cbf = CharBuffer.wrap(cc);
 151         }
 152         CoderResult cr = null;
 153         for (int i = 0; i < iteration; i++) {
 154             cbf.rewind();
 155             bbf.clear();
 156             enc.reset();
 157             cr = enc.encode(cbf, bbf, true);
 158         }
 159         return cr;
 160     }
 161 
 162     static void printEntry(char c, Charset cs) {
 163         byte[] bb = new String(new char[] {c}).getBytes(cs);
 164         for (byte b:bb)
 165             System.out.printf("%x", b&0xff);
 166         System.out.printf("    %x", c & 0xffff);
 167         String s2 = new String(bb, cs);
 168         System.out.printf("    %x%n", s2.charAt(0) & 0xffff);
 169     }
 170 
 171     // check and compare canEncoding/Encoding
 172     static char[] checkEncoding(Charset oldCS, Charset newCS)
 173         throws Exception {
 174         System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name());
 175         CharsetEncoder encOLD = oldCS.newEncoder();
 176         CharsetEncoder encNew = newCS.newEncoder();
 177         char[] cc = new char[0x10000];
 178         int pos = 0;
 179         boolean is970 = "x-IBM970-Old".equals(oldCS.name());
 180 
 181         for (char c = 0; c < 0xffff; c++) {
 182             boolean canOld = encOLD.canEncode(c);
 183             boolean canNew = encNew.canEncode(c);
 184 
 185             if (is970 && c == 0x2299)
 186                 continue;
 187 
 188             if (canOld != canNew) {
 189                 if (canNew) {
 190                     System.out.printf("      NEW(only): ");
 191                     printEntry(c, newCS);
 192                 } else {
 193                     if (is970) {
 194                         byte[] bb = new String(new char[] {c}).getBytes(oldCS);
 195                         if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) {
 196                         // we know 970 has bogus nnnn -> a2c1 -> 2299
 197                             continue;
 198                         }
 199                     }
 200                     System.out.printf("      OLD(only): ");
 201                     printEntry(c, oldCS);
 202                 }
 203             } else if (canNew) {
 204                 byte[] bbNew = new String(new char[] {c}).getBytes(newCS);
 205                 byte[] bbOld = new String(new char[] {c}).getBytes(oldCS);
 206                 if (!Arrays.equals(bbNew, bbOld)) {
 207                     System.out.printf("      c->b NEW: ");
 208                     printEntry(c, newCS);
 209                     System.out.printf("      c->b OLD: ");
 210                     printEntry(c, oldCS);
 211                 } else {
 212                     String sNew = new String(bbNew, newCS);
 213                     String sOld = new String(bbOld, oldCS);
 214                     if (!sNew.equals(sOld)) {
 215                         System.out.printf("      b2c NEW (c=%x):", c&0xffff);
 216                         printEntry(sNew.charAt(0), newCS);
 217                         System.out.printf("      b2c OLD:");
 218                         printEntry(sOld.charAt(0), oldCS);
 219                     }
 220                 }
 221             }
 222             if (canNew & canOld) {  // added only both for now
 223                 cc[pos++] = c;
 224             }
 225         }
 226         return Arrays.copyOf(cc, pos);
 227     }
 228 
 229 
 230     // check and compare canEncoding/Encoding
 231     static void checkDecoding(Charset oldCS, Charset newCS)
 232         throws Exception
 233     {
 234         System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name());
 235         boolean isEBCDIC = oldCS.name().startsWith("x-IBM93");
 236 
 237         //Try singlebyte first
 238         byte[] bb = new byte[1];
 239         System.out.printf("       trying SB...%n");
 240         for (int b = 0; b < 0x100; b++) {
 241             bb[0] = (byte)b;
 242             String sOld = new String(bb, oldCS);
 243             String sNew = new String(bb, newCS);
 244             if (!sOld.equals(sNew)) {
 245                 System.out.printf("        b=%x:  %x/%d(old)  %x/%d(new)%n",
 246                                   b& 0xff,
 247                                   sOld.charAt(0) & 0xffff, sOld.length(),
 248                                   sNew.charAt(0) & 0xffff, sNew.length());
 249             }
 250         }
 251 
 252         System.out.printf("       trying DB...%n");
 253         bb = new byte[isEBCDIC?4:2];
 254         int b1Min = 0x40;
 255         int b1Max = 0xfe;
 256         for (int b1 = 0x40; b1 < 0xff; b1++) {
 257             if (!isEBCDIC) {
 258                 // decodable singlebyte b1
 259                 bb[0] = (byte)b1;
 260                 String sOld = new String(bb, oldCS);
 261                 String sNew = new String(bb, newCS);
 262                 if (!sOld.equals(sNew)) {
 263                     if (sOld.length() != 2 && sOld.charAt(0) != 0) {
 264                         // only prints we are NOT expected. above two are known issue
 265                         System.out.printf("        b1=%x:  %x/%d(old)  %x/%d(new)%n",
 266                                           b1 & 0xff,
 267                                           sOld.charAt(0) & 0xffff, sOld.length(),
 268                                           sNew.charAt(0) & 0xffff, sNew.length());
 269                         continue;
 270                     }
 271                 }
 272             }
 273             for (int b2 = 0x40; b2 < 0xff; b2++) {
 274                 if (isEBCDIC) {
 275                     bb[0] = 0x0e;
 276                     bb[1] = (byte)b1;
 277                     bb[2] = (byte)b2;
 278                     bb[3] = 0x0f;
 279                 } else {
 280                     bb[0] = (byte)b1;
 281                     bb[1] = (byte)b2;
 282                 }
 283                 String sOld = new String(bb, oldCS);
 284                 String sNew = new String(bb, newCS);
 285                 //if (!sOld.equals(sNew)) {
 286                 if (sOld.charAt(0) != sNew.charAt(0)) {
 287 
 288 if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd)
 289     continue; // known issude in old implementation
 290 
 291                     System.out.printf("        bb=<%x,%x>  c(old)=%x,  c(new)=%x%n",
 292                         b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff);
 293                 }
 294             }
 295         }
 296     }
 297 
 298     static void checkInit(String csn) throws Exception {
 299         System.out.printf("Check init <%s>...%n", csn);
 300         Charset.forName("Big5");    // load in the ExtendedCharsets
 301         long t1 = System.nanoTime()/1000;
 302         Charset cs = Charset.forName(csn);
 303         long t2 = System.nanoTime()/1000;
 304         System.out.printf("    charset     :%d%n", t2 - t1);
 305         t1 = System.nanoTime()/1000;
 306             cs.newDecoder();
 307         t2 = System.nanoTime()/1000;
 308         System.out.printf("    new Decoder :%d%n", t2 - t1);
 309 
 310         t1 = System.nanoTime()/1000;
 311             cs.newEncoder();
 312         t2 = System.nanoTime()/1000;
 313         System.out.printf("    new Encoder :%d%n", t2 - t1);
 314     }
 315 
 316     static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception {
 317         System.gc();    // enqueue finalizable objects
 318         Thread.sleep(1000);
 319         System.gc();    // enqueue finalizable objects
 320 
 321         String csn1 = cs1.name();
 322         String csn2 = cs2.name();
 323         System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
 324 
 325         Time t1 = new Time();
 326         Time t2 = new Time();
 327 
 328         byte[] bb1 = encode(cc, cs1, false, t1);
 329         byte[] bb2 = encode(cc, cs2, false, t2);
 330 
 331         System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
 332                           csn2, csn1,
 333                           t2.t, t1.t,
 334                           (double)(t2.t)/(t1.t));
 335         if (!Arrays.equals(bb1, bb2)) {
 336             System.out.printf("        encoding failed%n");
 337         }
 338 
 339         char[] cc2 = decode(bb1, cs2, false, t2);
 340         char[] cc1 = decode(bb1, cs1, false, t1);
 341         System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
 342                           csn2, csn1,
 343                           t2.t, t1.t,
 344                           (double)(t2.t)/(t1.t));
 345         if (!Arrays.equals(cc1, cc2)) {
 346             System.out.printf("        decoding failed%n");
 347         }
 348 
 349         bb1 = encode(cc, cs1, true, t1);
 350         bb2 = encode(cc, cs2, true, t2);
 351 
 352         System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
 353                           csn2, csn1,
 354                           t2.t, t1.t,
 355                           (double)(t2.t)/(t1.t));
 356 
 357         if (!Arrays.equals(bb1, bb2))
 358             System.out.printf("        encoding (direct) failed%n");
 359 
 360         cc1 = decode(bb1, cs1, true, t1);
 361         cc2 = decode(bb1, cs2, true, t2);
 362         System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
 363                           csn2, csn1,
 364                           t2.t, t1.t,
 365                           (double)(t2.t)/(t1.t));
 366         if (!Arrays.equals(cc1, cc2)) {
 367             System.out.printf("        decoding (direct) failed%n");
 368         }
 369     }
 370 
 371     /* The first byte is the length of malformed bytes
 372         byte[][] malformed = {
 373             {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
 374         };
 375     */
 376 
 377     static void checkMalformed(Charset cs, byte[][] malformed)
 378         throws Exception
 379     {
 380         boolean failed = false;
 381         String csn = cs.name();
 382         System.out.printf("Check malformed <%s>...%n", csn);
 383         for (boolean direct: new boolean[] {false, true}) {
 384             for (byte[] bins : malformed) {
 385                 int mlen = bins[0];
 386                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
 387                 CoderResult cr = decodeCR(bin, cs, direct);
 388                 String ashex = "";
 389                 for (int i = 0; i < bin.length; i++) {
 390                     if (i > 0) ashex += " ";
 391                         ashex += Integer.toString((int)bin[i] & 0xff, 16);
 392                 }
 393                 if (!cr.isMalformed()) {
 394                     System.out.printf("        FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString());
 395                     failed = true;
 396                 } else if (cr.length() != mlen) {
 397                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
 398                     failed = true;
 399                 }
 400             }
 401         }
 402         if (failed)
 403             throw new RuntimeException("Check malformed failed " + csn);
 404     }
 405 
 406     static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
 407         int inPos = flow[0];
 408         int inLen = flow[1];
 409         int outPos = flow[2];
 410         int outLen = flow[3];
 411         int expedInPos = flow[4];
 412         int expedOutPos = flow[5];
 413         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
 414                                           :CoderResult.OVERFLOW;
 415         ByteBuffer bbf;
 416         CharBuffer cbf;
 417         if (direct) {
 418             bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
 419             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
 420         } else {
 421             bbf = ByteBuffer.allocate(inPos + bytes.length);
 422             cbf = CharBuffer.allocate(outPos + outLen);
 423         }
 424         bbf.position(inPos);
 425         bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
 426         cbf.position(outPos);
 427         dec.reset();
 428         CoderResult cr = dec.decode(bbf, cbf, false);
 429         if (cr != expedCR ||
 430             bbf.position() != expedInPos ||
 431             cbf.position() != expedOutPos) {
 432             System.out.printf("Expected(direct=%5b): [", direct);
 433             for (int i:flow) System.out.print(" " + i);
 434             System.out.println("]  CR=" + cr +
 435                                ", inPos=" + bbf.position() +
 436                                ", outPos=" + cbf.position());
 437             return false;
 438         }
 439         return true;
 440     }
 441 
 442     static void checkUnderOverflow(Charset cs) throws Exception {
 443         String csn = cs.name();
 444         System.out.printf("Check under/overflow <%s>...%n", csn);
 445         CharsetDecoder dec = cs.newDecoder();
 446         boolean failed = false;
 447 
 448         //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
 449         //0   1 2   3         7         11
 450         byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
 451         int    inlen = bytes.length;
 452 
 453         int MAXOFF = 20;
 454         for (int inoff = 0; inoff < MAXOFF; inoff++) {
 455             for (int outoff = 0; outoff < MAXOFF; outoff++) {
 456         int[][] Flows = {
 457             //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
 458             //overflow
 459             {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
 460             {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
 461             {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
 462             {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
 463             {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
 464             {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
 465             //underflow
 466             {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
 467             {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
 468             {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 469             {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 470             {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
 471             {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 472             {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 473             {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
 474             {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
 475             {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
 476             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
 477             // 2-byte under/overflow
 478             {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
 479             {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
 480             {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
 481         };
 482         for (boolean direct: new boolean[] {false, true}) {
 483             for (int[] flow: Flows) {
 484                 if (!check(dec, bytes, direct, flow))
 485                     failed = true;
 486             }
 487         }}}
 488         if (failed)
 489             throw new RuntimeException("Check under/overflow failed " + csn);
 490     }
 491 
 492     static String[] csnames = new String[] {
 493         "IBM930",
 494         "IBM933",
 495         "IBM935",
 496         "IBM937",
 497         "IBM939",
 498         "IBM942",
 499         "IBM943",
 500         "IBM948",
 501         "IBM949",
 502         "IBM950",
 503         "IBM970",
 504         "IBM942C",
 505         "IBM943C",
 506         "IBM949C",
 507         "IBM1381",
 508         "IBM1383",
 509 
 510         "EUC_CN",
 511         "EUC_KR",
 512         "GBK",
 513         "Johab",
 514         "MS932",
 515         "MS936",
 516         "MS949",
 517         "MS950",
 518     };
 519 
 520     public static void main(String[] args) throws Exception {
 521         for (String csname: csnames) {
 522             System.out.printf("-----------------------------------%n");
 523             String oldname = csname + "_OLD";
 524             checkInit(csname);
 525             Charset csOld = (Charset)Class.forName(oldname).newInstance();
 526             Charset csNew = Charset.forName(csname);
 527             char[] cc = checkEncoding(csOld, csNew);
 528             checkDecoding(csOld, csNew);
 529             compare(csNew, csOld, cc);
 530 
 531             if (csname.startsWith("x-IBM93")) {
 532                 //ecdbic
 533                 checkMalformed(csNew, new byte[][] {
 534                     {1, 0x26, 0x0f, 0x27},         // in SBSC, no SI
 535                     {1, 0x0e, 0x41, 0x41, 0xe},    // in DBSC, no SO
 536                     {2, 0x0e, 0x40, 0x41, 0xe},    // illegal DB
 537                 });
 538             } else if (csname.equals("x-IBM970") ||
 539                        csname.equals("x-IBM1383")) {
 540                 //euc_simple
 541                 checkMalformed(csNew, new byte[][] {
 542                     {1, 0x26, (byte)0x8f, 0x27},                   // SS2
 543                     {1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51}, // SS3
 544                 });
 545             }
 546         }
 547     }
 548 }