1 /* 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 6843578 27 * @summary Test IBM DB charsets 28 * @build IBM930_OLD IBM933_OLD IBM935_OLD IBM937_OLD IBM939_OLD IBM942_OLD IBM943_OLD IBM948_OLD IBM949_OLD IBM950_OLD IBM970_OLD IBM942C_OLD IBM943C_OLD IBM949C_OLD IBM1381_OLD IBM1383_OLD EUC_CN_OLD EUC_KR_OLD GBK_OLD Johab_OLD MS932_OLD MS936_OLD MS949_OLD MS950_OLD 29 * @run main TestIBMDB 30 */ 31 32 import java.nio.charset.*; 33 import java.nio.*; 34 import java.util.*; 35 36 public class TestIBMDB { 37 static class Time { 38 long t; 39 } 40 static int iteration = 200; 41 42 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) 43 throws Exception { 44 String csn = cs.name(); 45 CharsetDecoder dec = cs.newDecoder(); 46 ByteBuffer bbf; 47 CharBuffer cbf; 48 if (testDirect) { 49 bbf = ByteBuffer.allocateDirect(bb.length); 50 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 51 bbf.put(bb); 52 } else { 53 bbf = ByteBuffer.wrap(bb); 54 cbf = CharBuffer.allocate(bb.length); 55 } 56 CoderResult cr = null; 57 long t1 = System.nanoTime()/1000; 58 for (int i = 0; i < iteration; i++) { 59 bbf.rewind(); 60 cbf.clear(); 61 dec.reset(); 62 cr = dec.decode(bbf, cbf, true); 63 } 64 long t2 = System.nanoTime()/1000; 65 t.t = (t2 - t1)/iteration; 66 if (cr != CoderResult.UNDERFLOW) { 67 System.out.println("DEC-----------------"); 68 int pos = bbf.position(); 69 System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", 70 cr.toString(), pos, 71 bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff); 72 throw new RuntimeException("Decoding err: " + csn); 73 } 74 char[] cc = new char[cbf.position()]; 75 cbf.flip(); cbf.get(cc); 76 return cc; 77 78 } 79 80 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) 81 throws Exception { 82 CharsetDecoder dec = cs.newDecoder(); 83 ByteBuffer bbf; 84 CharBuffer cbf; 85 if (testDirect) { 86 bbf = ByteBuffer.allocateDirect(bb.length); 87 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 88 bbf.put(bb).flip(); 89 } else { 90 bbf = ByteBuffer.wrap(bb); 91 cbf = CharBuffer.allocate(bb.length); 92 } 93 CoderResult cr = null; 94 for (int i = 0; i < iteration; i++) { 95 bbf.rewind(); 96 cbf.clear(); 97 dec.reset(); 98 cr = dec.decode(bbf, cbf, true); 99 } 100 return cr; 101 } 102 103 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) 104 throws Exception { 105 ByteBuffer bbf; 106 CharBuffer cbf; 107 CharsetEncoder enc = cs.newEncoder(); 108 String csn = cs.name(); 109 if (testDirect) { 110 bbf = ByteBuffer.allocateDirect(cc.length * 4); 111 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 112 cbf.put(cc).flip(); 113 } else { 114 bbf = ByteBuffer.allocate(cc.length * 4); 115 cbf = CharBuffer.wrap(cc); 116 } 117 CoderResult cr = null; 118 long t1 = System.nanoTime()/1000; 119 for (int i = 0; i < iteration; i++) { 120 cbf.rewind(); 121 bbf.clear(); 122 enc.reset(); 123 cr = enc.encode(cbf, bbf, true); 124 } 125 long t2 = System.nanoTime()/1000; 126 t.t = (t2 - t1)/iteration; 127 if (cr != CoderResult.UNDERFLOW) { 128 System.out.println("ENC-----------------"); 129 int pos = cbf.position(); 130 System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", 131 cr.toString(), pos, cc[pos]&0xffff); 132 throw new RuntimeException("Encoding err: " + csn); 133 } 134 byte[] bb = new byte[bbf.position()]; 135 bbf.flip(); bbf.get(bb); 136 return bb; 137 } 138 139 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) 140 throws Exception { 141 ByteBuffer bbf; 142 CharBuffer cbf; 143 CharsetEncoder enc = cs.newEncoder(); 144 if (testDirect) { 145 bbf = ByteBuffer.allocateDirect(cc.length * 4); 146 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 147 cbf.put(cc).flip(); 148 } else { 149 bbf = ByteBuffer.allocate(cc.length * 4); 150 cbf = CharBuffer.wrap(cc); 151 } 152 CoderResult cr = null; 153 for (int i = 0; i < iteration; i++) { 154 cbf.rewind(); 155 bbf.clear(); 156 enc.reset(); 157 cr = enc.encode(cbf, bbf, true); 158 } 159 return cr; 160 } 161 162 static void printEntry(char c, Charset cs) { 163 byte[] bb = new String(new char[] {c}).getBytes(cs); 164 for (byte b:bb) 165 System.out.printf("%x", b&0xff); 166 System.out.printf(" %x", c & 0xffff); 167 String s2 = new String(bb, cs); 168 System.out.printf(" %x%n", s2.charAt(0) & 0xffff); 169 } 170 171 // check and compare canEncoding/Encoding 172 static char[] checkEncoding(Charset oldCS, Charset newCS) 173 throws Exception { 174 System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name()); 175 CharsetEncoder encOLD = oldCS.newEncoder(); 176 CharsetEncoder encNew = newCS.newEncoder(); 177 char[] cc = new char[0x10000]; 178 int pos = 0; 179 boolean is970 = "x-IBM970-Old".equals(oldCS.name()); 180 181 for (char c = 0; c < 0xffff; c++) { 182 boolean canOld = encOLD.canEncode(c); 183 boolean canNew = encNew.canEncode(c); 184 185 if (is970 && c == 0x2299) 186 continue; 187 188 if (canOld != canNew) { 189 if (canNew) { 190 System.out.printf(" NEW(only): "); 191 printEntry(c, newCS); 192 } else { 193 if (is970) { 194 byte[] bb = new String(new char[] {c}).getBytes(oldCS); 195 if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) { 196 // we know 970 has bogus nnnn -> a2c1 -> 2299 197 continue; 198 } 199 } 200 System.out.printf(" OLD(only): "); 201 printEntry(c, oldCS); 202 } 203 } else if (canNew) { 204 byte[] bbNew = new String(new char[] {c}).getBytes(newCS); 205 byte[] bbOld = new String(new char[] {c}).getBytes(oldCS); 206 if (!Arrays.equals(bbNew, bbOld)) { 207 System.out.printf(" c->b NEW: "); 208 printEntry(c, newCS); 209 System.out.printf(" c->b OLD: "); 210 printEntry(c, oldCS); 211 } else { 212 String sNew = new String(bbNew, newCS); 213 String sOld = new String(bbOld, oldCS); 214 if (!sNew.equals(sOld)) { 215 System.out.printf(" b2c NEW (c=%x):", c&0xffff); 216 printEntry(sNew.charAt(0), newCS); 217 System.out.printf(" b2c OLD:"); 218 printEntry(sOld.charAt(0), oldCS); 219 } 220 } 221 } 222 if (canNew & canOld) { // added only both for now 223 cc[pos++] = c; 224 } 225 } 226 return Arrays.copyOf(cc, pos); 227 } 228 229 230 // check and compare canEncoding/Encoding 231 static void checkDecoding(Charset oldCS, Charset newCS) 232 throws Exception 233 { 234 System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name()); 235 boolean isEBCDIC = oldCS.name().startsWith("x-IBM93"); 236 237 //Try singlebyte first 238 byte[] bb = new byte[1]; 239 System.out.printf(" trying SB...%n"); 240 for (int b = 0; b < 0x100; b++) { 241 bb[0] = (byte)b; 242 String sOld = new String(bb, oldCS); 243 String sNew = new String(bb, newCS); 244 if (!sOld.equals(sNew)) { 245 System.out.printf(" b=%x: %x/%d(old) %x/%d(new)%n", 246 b& 0xff, 247 sOld.charAt(0) & 0xffff, sOld.length(), 248 sNew.charAt(0) & 0xffff, sNew.length()); 249 } 250 } 251 252 System.out.printf(" trying DB...%n"); 253 bb = new byte[isEBCDIC?4:2]; 254 int b1Min = 0x40; 255 int b1Max = 0xfe; 256 for (int b1 = 0x40; b1 < 0xff; b1++) { 257 if (!isEBCDIC) { 258 // decodable singlebyte b1 259 bb[0] = (byte)b1; 260 String sOld = new String(bb, oldCS); 261 String sNew = new String(bb, newCS); 262 if (!sOld.equals(sNew)) { 263 if (sOld.length() != 2 && sOld.charAt(0) != 0) { 264 // only prints we are NOT expected. above two are known issue 265 System.out.printf(" b1=%x: %x/%d(old) %x/%d(new)%n", 266 b1 & 0xff, 267 sOld.charAt(0) & 0xffff, sOld.length(), 268 sNew.charAt(0) & 0xffff, sNew.length()); 269 continue; 270 } 271 } 272 } 273 for (int b2 = 0x40; b2 < 0xff; b2++) { 274 if (isEBCDIC) { 275 bb[0] = 0x0e; 276 bb[1] = (byte)b1; 277 bb[2] = (byte)b2; 278 bb[3] = 0x0f; 279 } else { 280 bb[0] = (byte)b1; 281 bb[1] = (byte)b2; 282 } 283 String sOld = new String(bb, oldCS); 284 String sNew = new String(bb, newCS); 285 //if (!sOld.equals(sNew)) { 286 if (sOld.charAt(0) != sNew.charAt(0)) { 287 288 if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd) 289 continue; // known issude in old implementation 290 291 System.out.printf(" bb=<%x,%x> c(old)=%x, c(new)=%x%n", 292 b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff); 293 } 294 } 295 } 296 } 297 298 static void checkInit(String csn) throws Exception { 299 System.out.printf("Check init <%s>...%n", csn); 300 Charset.forName("Big5"); // load in the ExtendedCharsets 301 long t1 = System.nanoTime()/1000; 302 Charset cs = Charset.forName(csn); 303 long t2 = System.nanoTime()/1000; 304 System.out.printf(" charset :%d%n", t2 - t1); 305 t1 = System.nanoTime()/1000; 306 cs.newDecoder(); 307 t2 = System.nanoTime()/1000; 308 System.out.printf(" new Decoder :%d%n", t2 - t1); 309 310 t1 = System.nanoTime()/1000; 311 cs.newEncoder(); 312 t2 = System.nanoTime()/1000; 313 System.out.printf(" new Encoder :%d%n", t2 - t1); 314 } 315 316 static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception { 317 System.gc(); // enqueue finalizable objects 318 Thread.sleep(1000); 319 System.gc(); // enqueue finalizable objects 320 321 String csn1 = cs1.name(); 322 String csn2 = cs2.name(); 323 System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); 324 325 Time t1 = new Time(); 326 Time t2 = new Time(); 327 328 byte[] bb1 = encode(cc, cs1, false, t1); 329 byte[] bb2 = encode(cc, cs2, false, t2); 330 331 System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n", 332 csn2, csn1, 333 t2.t, t1.t, 334 (double)(t2.t)/(t1.t)); 335 if (!Arrays.equals(bb1, bb2)) { 336 System.out.printf(" encoding failed%n"); 337 } 338 339 char[] cc2 = decode(bb1, cs2, false, t2); 340 char[] cc1 = decode(bb1, cs1, false, t1); 341 System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n", 342 csn2, csn1, 343 t2.t, t1.t, 344 (double)(t2.t)/(t1.t)); 345 if (!Arrays.equals(cc1, cc2)) { 346 System.out.printf(" decoding failed%n"); 347 } 348 349 bb1 = encode(cc, cs1, true, t1); 350 bb2 = encode(cc, cs2, true, t2); 351 352 System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 353 csn2, csn1, 354 t2.t, t1.t, 355 (double)(t2.t)/(t1.t)); 356 357 if (!Arrays.equals(bb1, bb2)) 358 System.out.printf(" encoding (direct) failed%n"); 359 360 cc1 = decode(bb1, cs1, true, t1); 361 cc2 = decode(bb1, cs2, true, t2); 362 System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 363 csn2, csn1, 364 t2.t, t1.t, 365 (double)(t2.t)/(t1.t)); 366 if (!Arrays.equals(cc1, cc2)) { 367 System.out.printf(" decoding (direct) failed%n"); 368 } 369 } 370 371 /* The first byte is the length of malformed bytes 372 byte[][] malformed = { 373 {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, 374 }; 375 */ 376 377 static void checkMalformed(Charset cs, byte[][] malformed) 378 throws Exception 379 { 380 boolean failed = false; 381 String csn = cs.name(); 382 System.out.printf("Check malformed <%s>...%n", csn); 383 for (boolean direct: new boolean[] {false, true}) { 384 for (byte[] bins : malformed) { 385 int mlen = bins[0]; 386 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); 387 CoderResult cr = decodeCR(bin, cs, direct); 388 String ashex = ""; 389 for (int i = 0; i < bin.length; i++) { 390 if (i > 0) ashex += " "; 391 ashex += Integer.toString((int)bin[i] & 0xff, 16); 392 } 393 if (!cr.isMalformed()) { 394 System.out.printf(" FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString()); 395 failed = true; 396 } else if (cr.length() != mlen) { 397 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); 398 failed = true; 399 } 400 } 401 } 402 if (failed) 403 throw new RuntimeException("Check malformed failed " + csn); 404 } 405 406 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) { 407 int inPos = flow[0]; 408 int inLen = flow[1]; 409 int outPos = flow[2]; 410 int outLen = flow[3]; 411 int expedInPos = flow[4]; 412 int expedOutPos = flow[5]; 413 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW 414 :CoderResult.OVERFLOW; 415 ByteBuffer bbf; 416 CharBuffer cbf; 417 if (direct) { 418 bbf = ByteBuffer.allocateDirect(inPos + bytes.length); 419 cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); 420 } else { 421 bbf = ByteBuffer.allocate(inPos + bytes.length); 422 cbf = CharBuffer.allocate(outPos + outLen); 423 } 424 bbf.position(inPos); 425 bbf.put(bytes).flip().position(inPos).limit(inPos + inLen); 426 cbf.position(outPos); 427 dec.reset(); 428 CoderResult cr = dec.decode(bbf, cbf, false); 429 if (cr != expedCR || 430 bbf.position() != expedInPos || 431 cbf.position() != expedOutPos) { 432 System.out.printf("Expected(direct=%5b): [", direct); 433 for (int i:flow) System.out.print(" " + i); 434 System.out.println("] CR=" + cr + 435 ", inPos=" + bbf.position() + 436 ", outPos=" + cbf.position()); 437 return false; 438 } 439 return true; 440 } 441 442 static void checkUnderOverflow(Charset cs) throws Exception { 443 String csn = cs.name(); 444 System.out.printf("Check under/overflow <%s>...%n", csn); 445 CharsetDecoder dec = cs.newDecoder(); 446 boolean failed = false; 447 448 //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 449 //0 1 2 3 7 11 450 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); 451 int inlen = bytes.length; 452 453 int MAXOFF = 20; 454 for (int inoff = 0; inoff < MAXOFF; inoff++) { 455 for (int outoff = 0; outoff < MAXOFF; outoff++) { 456 int[][] Flows = { 457 //inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) 458 //overflow 459 {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, 460 {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, 461 {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, 462 {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, 463 {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, 464 {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, 465 //underflow 466 {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, 467 {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, 468 {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, 469 {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, 470 {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, 471 {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, 472 {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, 473 {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, 474 {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0}, 475 {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0}, 476 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, 477 // 2-byte under/overflow 478 {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, 479 {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, 480 {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, 481 }; 482 for (boolean direct: new boolean[] {false, true}) { 483 for (int[] flow: Flows) { 484 if (!check(dec, bytes, direct, flow)) 485 failed = true; 486 } 487 }}} 488 if (failed) 489 throw new RuntimeException("Check under/overflow failed " + csn); 490 } 491 492 static String[] csnames = new String[] { 493 "IBM930", 494 "IBM933", 495 "IBM935", 496 "IBM937", 497 "IBM939", 498 "IBM942", 499 "IBM943", 500 "IBM948", 501 "IBM949", 502 "IBM950", 503 "IBM970", 504 "IBM942C", 505 "IBM943C", 506 "IBM949C", 507 "IBM1381", 508 "IBM1383", 509 510 "EUC_CN", 511 "EUC_KR", 512 "GBK", 513 "Johab", 514 "MS932", 515 "MS936", 516 "MS949", 517 "MS950", 518 }; 519 520 public static void main(String[] args) throws Exception { 521 for (String csname: csnames) { 522 System.out.printf("-----------------------------------%n"); 523 String oldname = csname + "_OLD"; 524 checkInit(csname); 525 Charset csOld = (Charset)Class.forName(oldname).newInstance(); 526 Charset csNew = Charset.forName(csname); 527 char[] cc = checkEncoding(csOld, csNew); 528 checkDecoding(csOld, csNew); 529 compare(csNew, csOld, cc); 530 531 if (csname.startsWith("x-IBM93")) { 532 //ecdbic 533 checkMalformed(csNew, new byte[][] { 534 {1, 0x26, 0x0f, 0x27}, // in SBSC, no SI 535 {1, 0x0e, 0x41, 0x41, 0xe}, // in DBSC, no SO 536 {2, 0x0e, 0x40, 0x41, 0xe}, // illegal DB 537 }); 538 } else if (csname.equals("x-IBM970") || 539 csname.equals("x-IBM1383")) { 540 //euc_simple 541 checkMalformed(csNew, new byte[][] { 542 {1, 0x26, (byte)0x8f, 0x27}, // SS2 543 {1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51}, // SS3 544 }); 545 } 546 } 547 } 548 }