1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* @test 25 @bug 4691554 6221056 6380723 6404504 6419565 6529796 26 @summary Test the supported New I/O coders 27 */ 28 29 import java.io.*; 30 import java.nio.*; 31 import java.nio.charset.*; 32 import java.util.regex.*; 33 34 public class CoderTest { 35 private static final int BUFSIZ = 8192; // Initial buffer size 36 private static final int MAXERRS = 10; // Errors reported per test 37 38 private static final String testRootDir 39 = System.getProperty("test.src", "."); 40 private static final PrintStream log = System.out; 41 42 // Set by -v on the command line 43 private static boolean verbose = false; 44 45 // Test modes 46 private static final int ROUNDTRIP = 0; 47 private static final int ENCODE = 1; 48 private static final int DECODE = 2; 49 50 private static boolean shiftHackDBCS = false; 51 52 // File extensions, indexed by test mode 53 private static final String[] extension 54 = new String[] { ".b2c", 55 ".c2b-irreversible", 56 ".b2c-irreversible" }; 57 58 59 // Utilities 60 private static ByteBuffer expand(ByteBuffer bb) { 61 ByteBuffer nbb = ByteBuffer.allocate(bb.capacity() * 2); 62 bb.flip(); 63 nbb.put(bb); 64 return nbb; 65 } 66 67 private static CharBuffer expand(CharBuffer cb) { 68 CharBuffer ncb = CharBuffer.allocate(cb.capacity() * 2); 69 cb.flip(); 70 ncb.put(cb); 71 return ncb; 72 } 73 74 private static byte[] parseBytes(String s) { 75 int nb = s.length() / 2; 76 byte[] bs = new byte[nb]; 77 for (int i = 0; i < nb; i++) { 78 int j = i * 2; 79 if (j + 2 > s.length()) 80 throw new RuntimeException("Malformed byte string: " + s); 81 bs[i] = (byte)Integer.parseInt(s.substring(j, j + 2), 16); 82 } 83 return bs; 84 } 85 86 private static String printBytes(byte[] bs) { 87 StringBuffer sb = new StringBuffer(); 88 for (int i = 0; i < bs.length; i++) { 89 sb.append(Integer.toHexString((bs[i] >> 4) & 0xf)); 90 sb.append(Integer.toHexString((bs[i] >> 0) & 0xf)); 91 } 92 return sb.toString(); 93 } 94 95 private static String printCodePoint(int cp) { 96 StringBuffer sb = new StringBuffer(); 97 sb.append("U+"); 98 if (cp > 0xffff) 99 sb.append(Integer.toHexString((cp >> 16) & 0xf)); 100 sb.append(Integer.toHexString((cp >> 12) & 0xf)); 101 sb.append(Integer.toHexString((cp >> 8) & 0xf)); 102 sb.append(Integer.toHexString((cp >> 4) & 0xf)); 103 sb.append(Integer.toHexString((cp >> 0) & 0xf)); 104 return sb.toString(); 105 } 106 107 private static int getCodePoint(CharBuffer cb) { 108 char c = cb.get(); 109 if (Character.isHighSurrogate(c)) 110 return Character.toCodePoint(c, cb.get()); 111 else 112 return c; 113 } 114 115 private static String plural(int n) { 116 return (n == 1 ? "" : "s"); 117 } 118 119 static class Entry { 120 byte[] bb; 121 int cp; 122 int cp2; 123 } 124 125 public static class Parser { 126 static Pattern p = Pattern.compile("(0[xX])?(00)?([0-9a-fA-F]+)\\s+(0[xX])?([0-9a-fA-F]+)(\\+0x([0-9a-fA-F]+))?\\s*"); 127 static final int gBS = 1; 128 static final int gCP = 2; 129 static final int gCP2 = 3; 130 131 boolean isStateful = false; 132 BufferedReader reader; 133 boolean closed; 134 Matcher matcher; 135 136 public Parser (InputStream in) 137 throws IOException 138 { 139 this.reader = new BufferedReader(new InputStreamReader(in)); 140 this.closed = false; 141 this.matcher = p.matcher(""); 142 } 143 144 public boolean isStateful() { 145 return isStateful; 146 } 147 148 protected boolean isDirective(String line) { 149 // Stateful DBCS encodings need special treatment 150 if (line.startsWith("#STATEFUL")) { 151 return isStateful = true; 152 } 153 return line.startsWith("#"); 154 } 155 156 protected Entry parse(Matcher m, Entry e) { 157 e.bb = parseBytes(m.group(3)); 158 e.cp = Integer.parseInt(m.group(5), 16); 159 if (m.group(7) != null) 160 e.cp2 = Integer.parseInt(m.group(7), 16); 161 else 162 e.cp2 = 0; 163 return e; 164 } 165 166 public Entry next() throws Exception { 167 return next(new Entry()); 168 } 169 170 // returns null and closes the input stream if the eof has beenreached. 171 public Entry next(Entry mapping) throws Exception { 172 if (closed) 173 return null; 174 String line; 175 while ((line = reader.readLine()) != null) { 176 if (isDirective(line)) 177 continue; 178 matcher.reset(line); 179 if (!matcher.lookingAt()) { 180 //System.out.println("Missed: " + line); 181 continue; 182 } 183 return parse(matcher, mapping); 184 } 185 reader.close(); 186 closed = true; 187 return null; 188 } 189 } 190 191 // CoderTest 192 private String encoding; 193 private Charset cs; 194 private CharsetDecoder decoder = null; 195 private CharsetEncoder encoder = null; 196 197 private CoderTest(String enc) throws Exception { 198 encoding = enc; 199 cs = Charset.forName(enc); 200 decoder = cs.newDecoder(); 201 encoder = cs.newEncoder(); 202 } 203 204 private class Test { 205 // An instance of this class tests all mappings for 206 // a particular bytesPerChar value 207 private int bytesPerChar; 208 209 // Reference data from .b2c file 210 private ByteBuffer refBytes = ByteBuffer.allocate(BUFSIZ); 211 private CharBuffer refChars = CharBuffer.allocate(BUFSIZ); 212 213 private ByteBuffer dRefBytes = ByteBuffer.allocateDirect(BUFSIZ); 214 private CharBuffer dRefChars = ByteBuffer.allocateDirect(BUFSIZ*2).asCharBuffer(); 215 216 private Test(int bpc) { 217 bytesPerChar = bpc; 218 } 219 220 private void put(byte[] bs, char[] cc) { 221 if (bs.length != bytesPerChar) 222 throw new IllegalArgumentException(bs.length 223 + " != " 224 + bytesPerChar); 225 if (refBytes.remaining() < bytesPerChar) 226 refBytes = expand(refBytes); 227 refBytes.put(bs); 228 if (refChars.remaining() < cc.length) 229 refChars = expand(refChars); 230 refChars.put(cc); 231 } 232 233 private boolean decode(ByteBuffer refByte, CharBuffer refChars) 234 throws Exception { 235 log.println(" decode" + (refByte.isDirect()?" (direct)":"")); 236 CharBuffer out = decoder.decode(refBytes); 237 238 refBytes.rewind(); 239 byte[] bs = new byte[bytesPerChar]; 240 int e = 0; 241 242 while (refBytes.hasRemaining()) { 243 refBytes.get(bs); 244 int rcp = getCodePoint(refChars); 245 int ocp = getCodePoint(out); 246 if (rcp != ocp) { 247 log.println(" Error: " 248 + printBytes(bs) 249 + " --> " 250 + printCodePoint(ocp) 251 + ", expected " 252 + printCodePoint(rcp)); 253 if (++e >= MAXERRS) { 254 log.println(" Too many errors, giving up"); 255 break; 256 } 257 } 258 if (verbose) { 259 log.println(" " 260 + printBytes(bs) 261 + " --> " 262 + printCodePoint(rcp)); 263 } 264 } 265 if (e == 0 && (refChars.hasRemaining() || out.hasRemaining())) { 266 // Paranoia: Didn't consume everything 267 throw new IllegalStateException(); 268 } 269 refBytes.rewind(); 270 refChars.rewind(); 271 return (e == 0); 272 } 273 274 private boolean encode(ByteBuffer refByte, CharBuffer refChars) 275 throws Exception { 276 log.println(" encode" + (refByte.isDirect()?" (direct)":"")); 277 ByteBuffer out = encoder.encode(refChars); 278 refChars.rewind(); 279 280 // Stateful b2c files have leading and trailing 281 // shift bytes for each mapping. However when 282 // block encoded the output will consist of a single 283 // leadByte followed by the raw DBCS byte values and 284 // a final trail byte. The state variable shiftHackDBCS 285 // which is true for stateful DBCS encodings is used 286 // to conditionally strip away per-mapping shift bytes 287 // from the comparison of expected versus actual encoded 288 // byte values. This hack can be eliminated in Mustang 289 // when sun.io converters and their associated tests are 290 // removed. 291 292 boolean boundaryBytes = false; 293 int bytesPC = bytesPerChar; 294 295 if (shiftHackDBCS && bytesPerChar==4) { 296 bytesPC = 2; 297 boundaryBytes = true; 298 if ((out.get()) != (byte)0x0e) { 299 log.println("Missing lead byte"); 300 return(false); 301 } 302 } 303 304 byte[] rbs = new byte[bytesPC]; 305 byte[] obs = new byte[bytesPC]; 306 int e = 0; 307 while (refChars.hasRemaining()) { 308 int cp = getCodePoint(refChars); 309 // Skip lead shift ref byte for stateful encoding tests 310 if (shiftHackDBCS && bytesPC == 2) 311 refBytes.get(); 312 refBytes.get(rbs); 313 out.get(obs); 314 boolean eq = true; 315 for (int i = 0; i < bytesPC; i++) 316 eq &= rbs[i] == obs[i]; 317 if (!eq) { 318 log.println(" Error: " 319 + printCodePoint(cp) 320 + " --> " 321 + printBytes(obs) 322 + ", expected " 323 + printBytes(rbs)); 324 if (++e >= MAXERRS) { 325 log.println(" Too many errors, giving up"); 326 break; 327 } 328 } 329 if (verbose) { 330 log.println(" " 331 + printCodePoint(cp) 332 + " --> " 333 + printBytes(rbs)); 334 } 335 336 // For stateful encodings ignore/exclude per-mapping 337 // trail bytes from byte comparison 338 if (shiftHackDBCS && bytesPC == 2) 339 refBytes.get(); 340 } 341 342 if (shiftHackDBCS && boundaryBytes) { 343 if ((out.get()) != (byte)0x0f) { 344 log.println("Missing trail byte"); 345 return(false); 346 } 347 } 348 349 if (e == 0 && (refBytes.hasRemaining() || out.hasRemaining())) { 350 // Paranoia: Didn't consume everything 351 throw new IllegalStateException(); 352 } 353 354 refBytes.rewind(); 355 refChars.rewind(); 356 return (e == 0); 357 } 358 359 private boolean run(int mode) throws Exception { 360 log.println(" " + bytesPerChar 361 + " byte" + plural(bytesPerChar) + "/char"); 362 363 if (dRefBytes.capacity() < refBytes.capacity()) { 364 dRefBytes = ByteBuffer.allocateDirect(refBytes.capacity()); 365 } 366 if (dRefChars.capacity() < refChars.capacity()) { 367 dRefChars = ByteBuffer.allocateDirect(refChars.capacity()*2) 368 .asCharBuffer(); 369 } 370 refBytes.flip(); 371 refChars.flip(); 372 dRefBytes.clear(); 373 dRefChars.clear(); 374 375 dRefBytes.put(refBytes).flip(); 376 dRefChars.put(refChars).flip(); 377 refBytes.flip(); 378 refChars.flip(); 379 380 boolean rv = true; 381 if (mode != ENCODE) { 382 rv &= decode(refBytes, refChars); 383 rv &= decode(dRefBytes, dRefChars); 384 } 385 if (mode != DECODE) { 386 rv &= encode(refBytes, refChars); 387 rv &= encode(dRefBytes, dRefChars); 388 } 389 return rv; 390 } 391 392 } 393 394 // Maximum bytes/char being tested 395 private int maxBytesPerChar = 0; 396 397 // Tests, indexed by bytesPerChar - 1 398 private Test[] tests; 399 400 private void clearTests() { 401 maxBytesPerChar = 0; 402 tests = new Test[0]; 403 } 404 405 // Find the test for the given bytes/char value, 406 // expanding the test array if needed 407 // 408 private Test testFor(int bpc) { 409 if (bpc > maxBytesPerChar) { 410 Test[] ts = new Test[bpc]; 411 System.arraycopy(tests, 0, ts, 0, maxBytesPerChar); 412 for (int i = maxBytesPerChar; i < bpc; i++) 413 ts[i] = new Test(i + 1); 414 tests = ts; 415 maxBytesPerChar = bpc; 416 } 417 return tests[bpc - 1]; 418 } 419 420 // Compute the name of the test file for the given encoding and mode. If 421 // the file exists then return its name, otherwise return null. 422 // 423 private File testFile(String encoding, int mode) { 424 File f = new File(testRootDir, encoding + extension[mode]); 425 if (!f.exists()) 426 return null; 427 return f; 428 } 429 430 // Parse the given b2c file and load up the required test objects 431 // 432 private void loadTests(File f) 433 throws Exception 434 { 435 clearTests(); 436 FileInputStream in = new FileInputStream(f); 437 try { 438 Parser p = new Parser(in); 439 Entry e = new Entry(); 440 441 while ((e = (Entry)p.next(e)) != null) { 442 if (e.cp2 != 0) 443 continue; // skip composite (base+cc) for now 444 byte[] bs = e.bb; 445 char[] cc = Character.toChars(e.cp); 446 testFor(bs.length).put(bs, cc); 447 } 448 shiftHackDBCS = p.isStateful(); 449 } finally { 450 in.close(); 451 } 452 } 453 454 private boolean run() throws Exception { 455 encoder 456 .onUnmappableCharacter(CodingErrorAction.REPLACE) 457 .onMalformedInput(CodingErrorAction.REPLACE); 458 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE) 459 .onMalformedInput(CodingErrorAction.REPLACE); 460 boolean rv = true; 461 462 log.println(); 463 log.println(cs.name() + " (" + encoding + ")"); 464 465 // Outer loop runs three passes: roundtrip, irreversible encodings, 466 // and then irreversible decodings 467 for (int mode = ROUNDTRIP; mode <= DECODE; mode++) { 468 File f = testFile(encoding, mode); 469 if (f == null) 470 continue; 471 loadTests(f); 472 for (int i = 0; i < maxBytesPerChar; i++) 473 rv &= tests[i].run(mode); 474 } 475 return rv; 476 } 477 478 // For debugging: java CoderTest [-v] foo.b2c bar.b2c ... 479 // 480 public static void main(String args[]) 481 throws Exception 482 { 483 File d = new File(System.getProperty("test.src", ".")); 484 String[] av = (args.length != 0) ? args : d.list(); 485 int errors = 0; 486 int tested = 0; 487 int skipped = 0; 488 489 for (int i = 0; i < av.length; i++) { 490 String a = av[i]; 491 if (a.equals("-v")) { 492 verbose = true; 493 continue; 494 } 495 if (a.endsWith(".b2c")) { 496 String encoding = a.substring(0, a.length() - 4); 497 498 if (!Charset.isSupported(encoding)) { 499 log.println(); 500 log.println("Not supported: " + encoding); 501 skipped++; 502 continue; 503 } 504 tested++; 505 if (!new CoderTest(encoding).run()) 506 errors++; 507 } 508 } 509 510 log.println(); 511 log.println(tested + " charset" + plural(tested) + " tested, " 512 + skipped + " not supported"); 513 log.println(); 514 if (errors > 0) 515 throw new Exception("Errors detected in " 516 + errors + " charset" + plural(errors)); 517 518 } 519 }