1 /* 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 6843578 27 * @summary Test IBM DB charsets 28 * @build IBM930_OLD IBM933_OLD IBM935_OLD IBM937_OLD IBM939_OLD IBM942_OLD IBM943_OLD IBM948_OLD IBM949_OLD IBM950_OLD IBM970_OLD IBM942C_OLD IBM943C_OLD IBM949C_OLD IBM1381_OLD IBM1383_OLD EUC_CN_OLD EUC_KR_OLD GBK_OLD Johab_OLD MS932_OLD MS936_OLD MS949_OLD MS950_OLD 29 */ 30 31 import java.nio.charset.*; 32 import java.nio.*; 33 import java.util.*; 34 35 public class TestIBMDB { 36 static class Time { 37 long t; 38 } 39 static int iteration = 200; 40 41 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) 42 throws Exception { 43 String csn = cs.name(); 44 CharsetDecoder dec = cs.newDecoder(); 45 ByteBuffer bbf; 46 CharBuffer cbf; 47 if (testDirect) { 48 bbf = ByteBuffer.allocateDirect(bb.length); 49 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 50 bbf.put(bb); 51 } else { 52 bbf = ByteBuffer.wrap(bb); 53 cbf = CharBuffer.allocate(bb.length); 54 } 55 CoderResult cr = null; 56 long t1 = System.nanoTime()/1000; 57 for (int i = 0; i < iteration; i++) { 58 bbf.rewind(); 59 cbf.clear(); 60 dec.reset(); 61 cr = dec.decode(bbf, cbf, true); 62 } 63 long t2 = System.nanoTime()/1000; 64 t.t = (t2 - t1)/iteration; 65 if (cr != CoderResult.UNDERFLOW) { 66 System.out.println("DEC-----------------"); 67 int pos = bbf.position(); 68 System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", 69 cr.toString(), pos, 70 bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff); 71 throw new RuntimeException("Decoding err: " + csn); 72 } 73 char[] cc = new char[cbf.position()]; 74 cbf.flip(); cbf.get(cc); 75 return cc; 76 77 } 78 79 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) 80 throws Exception { 81 CharsetDecoder dec = cs.newDecoder(); 82 ByteBuffer bbf; 83 CharBuffer cbf; 84 if (testDirect) { 85 bbf = ByteBuffer.allocateDirect(bb.length); 86 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 87 bbf.put(bb).flip(); 88 } else { 89 bbf = ByteBuffer.wrap(bb); 90 cbf = CharBuffer.allocate(bb.length); 91 } 92 CoderResult cr = null; 93 for (int i = 0; i < iteration; i++) { 94 bbf.rewind(); 95 cbf.clear(); 96 dec.reset(); 97 cr = dec.decode(bbf, cbf, true); 98 } 99 return cr; 100 } 101 102 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) 103 throws Exception { 104 ByteBuffer bbf; 105 CharBuffer cbf; 106 CharsetEncoder enc = cs.newEncoder(); 107 String csn = cs.name(); 108 if (testDirect) { 109 bbf = ByteBuffer.allocateDirect(cc.length * 4); 110 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 111 cbf.put(cc).flip(); 112 } else { 113 bbf = ByteBuffer.allocate(cc.length * 4); 114 cbf = CharBuffer.wrap(cc); 115 } 116 CoderResult cr = null; 117 long t1 = System.nanoTime()/1000; 118 for (int i = 0; i < iteration; i++) { 119 cbf.rewind(); 120 bbf.clear(); 121 enc.reset(); 122 cr = enc.encode(cbf, bbf, true); 123 } 124 long t2 = System.nanoTime()/1000; 125 t.t = (t2 - t1)/iteration; 126 if (cr != CoderResult.UNDERFLOW) { 127 System.out.println("ENC-----------------"); 128 int pos = cbf.position(); 129 System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", 130 cr.toString(), pos, cc[pos]&0xffff); 131 throw new RuntimeException("Encoding err: " + csn); 132 } 133 byte[] bb = new byte[bbf.position()]; 134 bbf.flip(); bbf.get(bb); 135 return bb; 136 } 137 138 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) 139 throws Exception { 140 ByteBuffer bbf; 141 CharBuffer cbf; 142 CharsetEncoder enc = cs.newEncoder(); 143 if (testDirect) { 144 bbf = ByteBuffer.allocateDirect(cc.length * 4); 145 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 146 cbf.put(cc).flip(); 147 } else { 148 bbf = ByteBuffer.allocate(cc.length * 4); 149 cbf = CharBuffer.wrap(cc); 150 } 151 CoderResult cr = null; 152 for (int i = 0; i < iteration; i++) { 153 cbf.rewind(); 154 bbf.clear(); 155 enc.reset(); 156 cr = enc.encode(cbf, bbf, true); 157 } 158 return cr; 159 } 160 161 static void printEntry(char c, Charset cs) { 162 byte[] bb = new String(new char[] {c}).getBytes(cs); 163 for (byte b:bb) 164 System.out.printf("%x", b&0xff); 165 System.out.printf(" %x", c & 0xffff); 166 String s2 = new String(bb, cs); 167 System.out.printf(" %x%n", s2.charAt(0) & 0xffff); 168 } 169 170 // check and compare canEncoding/Encoding 171 static char[] checkEncoding(Charset oldCS, Charset newCS) 172 throws Exception { 173 System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name()); 174 CharsetEncoder encOLD = oldCS.newEncoder(); 175 CharsetEncoder encNew = newCS.newEncoder(); 176 char[] cc = new char[0x10000]; 177 int pos = 0; 178 boolean is970 = "x-IBM970-Old".equals(oldCS.name()); 179 180 for (char c = 0; c < 0xffff; c++) { 181 boolean canOld = encOLD.canEncode(c); 182 boolean canNew = encNew.canEncode(c); 183 184 if (is970 && c == 0x2299) 185 continue; 186 187 if (canOld != canNew) { 188 if (canNew) { 189 System.out.printf(" NEW(only): "); 190 printEntry(c, newCS); 191 } else { 192 if (is970) { 193 byte[] bb = new String(new char[] {c}).getBytes(oldCS); 194 if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) { 195 // we know 970 has bogus nnnn -> a2c1 -> 2299 196 continue; 197 } 198 } 199 System.out.printf(" OLD(only): "); 200 printEntry(c, oldCS); 201 } 202 } else if (canNew) { 203 byte[] bbNew = new String(new char[] {c}).getBytes(newCS); 204 byte[] bbOld = new String(new char[] {c}).getBytes(oldCS); 205 if (!Arrays.equals(bbNew, bbOld)) { 206 System.out.printf(" c->b NEW: "); 207 printEntry(c, newCS); 208 System.out.printf(" c->b OLD: "); 209 printEntry(c, oldCS); 210 } else { 211 String sNew = new String(bbNew, newCS); 212 String sOld = new String(bbOld, oldCS); 213 if (!sNew.equals(sOld)) { 214 System.out.printf(" b2c NEW (c=%x):", c&0xffff); 215 printEntry(sNew.charAt(0), newCS); 216 System.out.printf(" b2c OLD:"); 217 printEntry(sOld.charAt(0), oldCS); 218 } 219 } 220 } 221 if (canNew & canOld) { // added only both for now 222 cc[pos++] = c; 223 } 224 } 225 return Arrays.copyOf(cc, pos); 226 } 227 228 229 // check and compare canEncoding/Encoding 230 static void checkDecoding(Charset oldCS, Charset newCS) 231 throws Exception 232 { 233 System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name()); 234 boolean isEBCDIC = oldCS.name().startsWith("x-IBM93"); 235 236 //Try singlebyte first 237 byte[] bb = new byte[1]; 238 System.out.printf(" trying SB...%n"); 239 for (int b = 0; b < 0x100; b++) { 240 bb[0] = (byte)b; 241 String sOld = new String(bb, oldCS); 242 String sNew = new String(bb, newCS); 243 if (!sOld.equals(sNew)) { 244 System.out.printf(" b=%x: %x/%d(old) %x/%d(new)%n", 245 b& 0xff, 246 sOld.charAt(0) & 0xffff, sOld.length(), 247 sNew.charAt(0) & 0xffff, sNew.length()); 248 } 249 } 250 251 System.out.printf(" trying DB...%n"); 252 bb = new byte[isEBCDIC?4:2]; 253 int b1Min = 0x40; 254 int b1Max = 0xfe; 255 for (int b1 = 0x40; b1 < 0xff; b1++) { 256 if (!isEBCDIC) { 257 // decodable singlebyte b1 258 bb[0] = (byte)b1; 259 String sOld = new String(bb, oldCS); 260 String sNew = new String(bb, newCS); 261 if (!sOld.equals(sNew)) { 262 if (sOld.length() != 2 && sOld.charAt(0) != 0) { 263 // only prints we are NOT expected. above two are known issue 264 System.out.printf(" b1=%x: %x/%d(old) %x/%d(new)%n", 265 b1 & 0xff, 266 sOld.charAt(0) & 0xffff, sOld.length(), 267 sNew.charAt(0) & 0xffff, sNew.length()); 268 continue; 269 } 270 } 271 } 272 for (int b2 = 0x40; b2 < 0xff; b2++) { 273 if (isEBCDIC) { 274 bb[0] = 0x0e; 275 bb[1] = (byte)b1; 276 bb[2] = (byte)b2; 277 bb[3] = 0x0f; 278 } else { 279 bb[0] = (byte)b1; 280 bb[1] = (byte)b2; 281 } 282 String sOld = new String(bb, oldCS); 283 String sNew = new String(bb, newCS); 284 //if (!sOld.equals(sNew)) { 285 if (sOld.charAt(0) != sNew.charAt(0)) { 286 287 if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd) 288 continue; // known issude in old implementation 289 290 System.out.printf(" bb=<%x,%x> c(old)=%x, c(new)=%x%n", 291 b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff); 292 } 293 } 294 } 295 } 296 297 static void checkInit(String csn) throws Exception { 298 System.out.printf("Check init <%s>...%n", csn); 299 Charset.forName("Big5"); // load in the ExtendedCharsets 300 long t1 = System.nanoTime()/1000; 301 Charset cs = Charset.forName(csn); 302 long t2 = System.nanoTime()/1000; 303 System.out.printf(" charset :%d%n", t2 - t1); 304 t1 = System.nanoTime()/1000; 305 cs.newDecoder(); 306 t2 = System.nanoTime()/1000; 307 System.out.printf(" new Decoder :%d%n", t2 - t1); 308 309 t1 = System.nanoTime()/1000; 310 cs.newEncoder(); 311 t2 = System.nanoTime()/1000; 312 System.out.printf(" new Encoder :%d%n", t2 - t1); 313 } 314 315 static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception { 316 System.gc(); // enqueue finalizable objects 317 Thread.sleep(1000); 318 System.gc(); // enqueue finalizable objects 319 320 String csn1 = cs1.name(); 321 String csn2 = cs2.name(); 322 System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); 323 324 Time t1 = new Time(); 325 Time t2 = new Time(); 326 327 byte[] bb1 = encode(cc, cs1, false, t1); 328 byte[] bb2 = encode(cc, cs2, false, t2); 329 330 System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n", 331 csn2, csn1, 332 t2.t, t1.t, 333 (double)(t2.t)/(t1.t)); 334 if (!Arrays.equals(bb1, bb2)) { 335 System.out.printf(" encoding failed%n"); 336 } 337 338 char[] cc2 = decode(bb1, cs2, false, t2); 339 char[] cc1 = decode(bb1, cs1, false, t1); 340 System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n", 341 csn2, csn1, 342 t2.t, t1.t, 343 (double)(t2.t)/(t1.t)); 344 if (!Arrays.equals(cc1, cc2)) { 345 System.out.printf(" decoding failed%n"); 346 } 347 348 bb1 = encode(cc, cs1, true, t1); 349 bb2 = encode(cc, cs2, true, t2); 350 351 System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 352 csn2, csn1, 353 t2.t, t1.t, 354 (double)(t2.t)/(t1.t)); 355 356 if (!Arrays.equals(bb1, bb2)) 357 System.out.printf(" encoding (direct) failed%n"); 358 359 cc1 = decode(bb1, cs1, true, t1); 360 cc2 = decode(bb1, cs2, true, t2); 361 System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 362 csn2, csn1, 363 t2.t, t1.t, 364 (double)(t2.t)/(t1.t)); 365 if (!Arrays.equals(cc1, cc2)) { 366 System.out.printf(" decoding (direct) failed%n"); 367 } 368 } 369 370 /* The first byte is the length of malformed bytes 371 byte[][] malformed = { 372 {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, 373 }; 374 */ 375 376 static void checkMalformed(Charset cs, byte[][] malformed) 377 throws Exception 378 { 379 boolean failed = false; 380 String csn = cs.name(); 381 System.out.printf("Check malformed <%s>...%n", csn); 382 for (boolean direct: new boolean[] {false, true}) { 383 for (byte[] bins : malformed) { 384 int mlen = bins[0]; 385 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); 386 CoderResult cr = decodeCR(bin, cs, direct); 387 String ashex = ""; 388 for (int i = 0; i < bin.length; i++) { 389 if (i > 0) ashex += " "; 390 ashex += Integer.toString((int)bin[i] & 0xff, 16); 391 } 392 if (!cr.isMalformed()) { 393 System.out.printf(" FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString()); 394 failed = true; 395 } else if (cr.length() != mlen) { 396 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); 397 failed = true; 398 } 399 } 400 } 401 if (failed) 402 throw new RuntimeException("Check malformed failed " + csn); 403 } 404 405 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) { 406 int inPos = flow[0]; 407 int inLen = flow[1]; 408 int outPos = flow[2]; 409 int outLen = flow[3]; 410 int expedInPos = flow[4]; 411 int expedOutPos = flow[5]; 412 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW 413 :CoderResult.OVERFLOW; 414 ByteBuffer bbf; 415 CharBuffer cbf; 416 if (direct) { 417 bbf = ByteBuffer.allocateDirect(inPos + bytes.length); 418 cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); 419 } else { 420 bbf = ByteBuffer.allocate(inPos + bytes.length); 421 cbf = CharBuffer.allocate(outPos + outLen); 422 } 423 bbf.position(inPos); 424 bbf.put(bytes).flip().position(inPos).limit(inPos + inLen); 425 cbf.position(outPos); 426 dec.reset(); 427 CoderResult cr = dec.decode(bbf, cbf, false); 428 if (cr != expedCR || 429 bbf.position() != expedInPos || 430 cbf.position() != expedOutPos) { 431 System.out.printf("Expected(direct=%5b): [", direct); 432 for (int i:flow) System.out.print(" " + i); 433 System.out.println("] CR=" + cr + 434 ", inPos=" + bbf.position() + 435 ", outPos=" + cbf.position()); 436 return false; 437 } 438 return true; 439 } 440 441 static void checkUnderOverflow(Charset cs) throws Exception { 442 String csn = cs.name(); 443 System.out.printf("Check under/overflow <%s>...%n", csn); 444 CharsetDecoder dec = cs.newDecoder(); 445 boolean failed = false; 446 447 //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 448 //0 1 2 3 7 11 449 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); 450 int inlen = bytes.length; 451 452 int MAXOFF = 20; 453 for (int inoff = 0; inoff < MAXOFF; inoff++) { 454 for (int outoff = 0; outoff < MAXOFF; outoff++) { 455 int[][] Flows = { 456 //inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) 457 //overflow 458 {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, 459 {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, 460 {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, 461 {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, 462 {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, 463 {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, 464 //underflow 465 {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, 466 {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, 467 {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, 468 {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, 469 {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, 470 {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, 471 {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, 472 {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, 473 {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0}, 474 {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0}, 475 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, 476 // 2-byte under/overflow 477 {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, 478 {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, 479 {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, 480 }; 481 for (boolean direct: new boolean[] {false, true}) { 482 for (int[] flow: Flows) { 483 if (!check(dec, bytes, direct, flow)) 484 failed = true; 485 } 486 }}} 487 if (failed) 488 throw new RuntimeException("Check under/overflow failed " + csn); 489 } 490 491 static String[] csnames = new String[] { 492 "IBM930", 493 "IBM933", 494 "IBM935", 495 "IBM937", 496 "IBM939", 497 "IBM942", 498 "IBM943", 499 "IBM948", 500 "IBM949", 501 "IBM950", 502 "IBM970", 503 "IBM942C", 504 "IBM943C", 505 "IBM949C", 506 "IBM1381", 507 "IBM1383", 508 509 "EUC_CN", 510 "EUC_KR", 511 "GBK", 512 "Johab", 513 "MS932", 514 "MS936", 515 "MS949", 516 "MS950", 517 }; 518 519 public static void main(String[] args) throws Exception { 520 for (String csname: csnames) { 521 System.out.printf("-----------------------------------%n"); 522 String oldname = csname + "_OLD"; 523 checkInit(csname); 524 Charset csOld = (Charset)Class.forName(oldname).newInstance(); 525 Charset csNew = Charset.forName(csname); 526 char[] cc = checkEncoding(csOld, csNew); 527 checkDecoding(csOld, csNew); 528 compare(csNew, csOld, cc); 529 530 if (csname.startsWith("x-IBM93")) { 531 //ecdbic 532 checkMalformed(csNew, new byte[][] { 533 {1, 0x26, 0x0f, 0x27}, // in SBSC, no SI 534 {1, 0x0e, 0x41, 0x41, 0xe}, // in DBSC, no SO 535 {2, 0x0e, 0x40, 0x41, 0xe}, // illegal DB 536 }); 537 } else if (csname.equals("x-IBM970") || 538 csname.equals("x-IBM1383")) { 539 //euc_simple 540 checkMalformed(csNew, new byte[][] { 541 {1, 0x26, (byte)0x8f, 0x27}, // SS2 542 {1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51}, // SS3 543 }); 544 } 545 } 546 } 547 }