1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /* @test
  25    @bug 4691554 6221056 6380723 6404504 6419565 6529796
  26    @summary Test the supported New I/O coders
  27  */
  28 
  29 import java.io.*;
  30 import java.nio.*;
  31 import java.nio.charset.*;
  32 import java.util.regex.*;
  33 
  34 public class CoderTest {
  35     private static final int BUFSIZ = 8192;     // Initial buffer size
  36     private static final int MAXERRS = 10;      // Errors reported per test
  37 
  38     private static final String testRootDir
  39         = System.getProperty("test.src", ".");
  40     private static final PrintStream log = System.out;
  41 
  42     // Set by -v on the command line
  43     private static boolean verbose = false;
  44 
  45     // Test modes
  46     private static final int ROUNDTRIP = 0;
  47     private static final int ENCODE = 1;
  48     private static final int DECODE = 2;
  49 
  50     private static boolean shiftHackDBCS = false;
  51 
  52     // File extensions, indexed by test mode
  53     private static final String[] extension
  54         = new String[] { ".b2c",
  55                          ".c2b-irreversible",
  56                          ".b2c-irreversible" };
  57 
  58 
  59     // Utilities
  60     private static ByteBuffer expand(ByteBuffer bb) {
  61         ByteBuffer nbb = ByteBuffer.allocate(bb.capacity() * 2);
  62         bb.flip();
  63         nbb.put(bb);
  64         return nbb;
  65     }
  66 
  67     private static CharBuffer expand(CharBuffer cb) {
  68         CharBuffer ncb = CharBuffer.allocate(cb.capacity() * 2);
  69         cb.flip();
  70         ncb.put(cb);
  71         return ncb;
  72     }
  73 
  74     private static byte[] parseBytes(String s) {
  75         int nb = s.length() / 2;
  76         byte[] bs = new byte[nb];
  77         for (int i = 0; i < nb; i++) {
  78             int j = i * 2;
  79             if (j + 2 > s.length())
  80                 throw new RuntimeException("Malformed byte string: " + s);
  81             bs[i] = (byte)Integer.parseInt(s.substring(j, j + 2), 16);
  82         }
  83         return bs;
  84     }
  85 
  86     private static String printBytes(byte[] bs) {
  87         StringBuffer sb = new StringBuffer();
  88         for (int i = 0; i < bs.length; i++) {
  89             sb.append(Integer.toHexString((bs[i] >> 4) & 0xf));
  90             sb.append(Integer.toHexString((bs[i] >> 0) & 0xf));
  91         }
  92         return sb.toString();
  93     }
  94 
  95     private static String printCodePoint(int cp) {
  96         StringBuffer sb = new StringBuffer();
  97         sb.append("U+");
  98         if (cp > 0xffff)
  99             sb.append(Integer.toHexString((cp >> 16) & 0xf));
 100         sb.append(Integer.toHexString((cp >> 12) & 0xf));
 101         sb.append(Integer.toHexString((cp >> 8) & 0xf));
 102         sb.append(Integer.toHexString((cp >> 4) & 0xf));
 103         sb.append(Integer.toHexString((cp >> 0) & 0xf));
 104         return sb.toString();
 105     }
 106 
 107     private static int getCodePoint(CharBuffer cb) {
 108         char c = cb.get();
 109         if (Character.isHighSurrogate(c))
 110             return Character.toCodePoint(c, cb.get());
 111         else
 112             return c;
 113     }
 114 
 115     private static String plural(int n) {
 116         return (n == 1 ? "" : "s");
 117     }
 118 
 119     static class Entry {
 120         byte[] bb;
 121         int cp;
 122         int cp2;
 123     }
 124 
 125     public static class Parser {
 126         static Pattern p = Pattern.compile("(0[xX])?(00)?([0-9a-fA-F]+)\\s+(0[xX])?([0-9a-fA-F]+)(\\+0x([0-9a-fA-F]+))?\\s*");
 127         static final int gBS = 1;
 128         static final int gCP = 2;
 129         static final int gCP2 = 3;
 130 
 131         boolean isStateful = false;
 132         BufferedReader reader;
 133         boolean closed;
 134         Matcher matcher;
 135 
 136         public Parser (InputStream in)
 137             throws IOException
 138         {
 139             this.reader = new BufferedReader(new InputStreamReader(in));
 140             this.closed = false;
 141             this.matcher = p.matcher("");
 142         }
 143 
 144         public boolean isStateful() {
 145             return isStateful;
 146         }
 147 
 148         protected boolean isDirective(String line) {
 149             // Stateful DBCS encodings need special treatment
 150             if (line.startsWith("#STATEFUL")) {
 151                 return isStateful = true;
 152             }
 153             return line.startsWith("#");
 154         }
 155 
 156         protected Entry parse(Matcher m, Entry e) {
 157             e.bb = parseBytes(m.group(3));
 158             e.cp = Integer.parseInt(m.group(5), 16);
 159             if (m.group(7) != null)
 160                 e.cp2 = Integer.parseInt(m.group(7), 16);
 161             else
 162                 e.cp2 = 0;
 163             return e;
 164         }
 165 
 166         public Entry next() throws Exception {
 167             return next(new Entry());
 168         }
 169 
 170         // returns null and closes the input stream if the eof has beenreached.
 171         public Entry next(Entry mapping) throws Exception {
 172             if (closed)
 173                 return null;
 174             String line;
 175             while ((line = reader.readLine()) != null) {
 176                 if (isDirective(line))
 177                     continue;
 178                 matcher.reset(line);
 179                 if (!matcher.lookingAt()) {
 180                     //System.out.println("Missed: " + line);
 181                     continue;
 182                 }
 183                 return parse(matcher, mapping);
 184             }
 185             reader.close();
 186             closed = true;
 187             return null;
 188         }
 189      }
 190 
 191     // CoderTest
 192     private String encoding;
 193     private Charset cs;
 194     private CharsetDecoder decoder = null;
 195     private CharsetEncoder encoder = null;
 196 
 197     private CoderTest(String enc) throws Exception {
 198         encoding = enc;
 199         cs = Charset.forName(enc);
 200         decoder = cs.newDecoder();
 201         encoder = cs.newEncoder();
 202     }
 203 
 204     private class Test {
 205         // An instance of this class tests all mappings for
 206         // a particular bytesPerChar value
 207         private int bytesPerChar;
 208 
 209         // Reference data from .b2c file
 210         private ByteBuffer refBytes = ByteBuffer.allocate(BUFSIZ);
 211         private CharBuffer refChars = CharBuffer.allocate(BUFSIZ);
 212 
 213         private ByteBuffer dRefBytes = ByteBuffer.allocateDirect(BUFSIZ);
 214         private CharBuffer dRefChars = ByteBuffer.allocateDirect(BUFSIZ*2).asCharBuffer();
 215 
 216         private Test(int bpc) {
 217             bytesPerChar = bpc;
 218         }
 219 
 220         private void put(byte[] bs, char[] cc) {
 221             if (bs.length != bytesPerChar)
 222                 throw new IllegalArgumentException(bs.length
 223                                                    + " != "
 224                                                    + bytesPerChar);
 225             if (refBytes.remaining() < bytesPerChar)
 226                 refBytes = expand(refBytes);
 227             refBytes.put(bs);
 228             if (refChars.remaining() < cc.length)
 229                 refChars = expand(refChars);
 230             refChars.put(cc);
 231         }
 232 
 233         private boolean decode(ByteBuffer refByte, CharBuffer refChars)
 234             throws Exception {
 235             log.println("    decode" + (refByte.isDirect()?" (direct)":""));
 236             CharBuffer out = decoder.decode(refBytes);
 237 
 238             refBytes.rewind();
 239             byte[] bs = new byte[bytesPerChar];
 240             int e = 0;
 241 
 242             while (refBytes.hasRemaining()) {
 243                 refBytes.get(bs);
 244                 int rcp = getCodePoint(refChars);
 245                 int ocp = getCodePoint(out);
 246                 if (rcp != ocp) {
 247                     log.println("      Error: "
 248                                 + printBytes(bs)
 249                                 + " --> "
 250                                 + printCodePoint(ocp)
 251                                 + ", expected "
 252                                 + printCodePoint(rcp));
 253                     if (++e >= MAXERRS) {
 254                         log.println("      Too many errors, giving up");
 255                         break;
 256                     }
 257                 }
 258                 if (verbose) {
 259                     log.println("      "
 260                                 + printBytes(bs)
 261                                 + " --> "
 262                                 + printCodePoint(rcp));
 263                 }
 264             }
 265             if (e == 0 && (refChars.hasRemaining() || out.hasRemaining())) {
 266                 // Paranoia: Didn't consume everything
 267                 throw new IllegalStateException();
 268             }
 269             refBytes.rewind();
 270             refChars.rewind();
 271             return (e == 0);
 272         }
 273 
 274         private boolean encode(ByteBuffer refByte, CharBuffer refChars)
 275             throws Exception {
 276             log.println("    encode" + (refByte.isDirect()?" (direct)":""));
 277             ByteBuffer out = encoder.encode(refChars);
 278             refChars.rewind();
 279 
 280             // Stateful b2c files have leading and trailing
 281             // shift bytes for each mapping. However when
 282             // block encoded the output will consist of a single
 283             // leadByte followed by the raw DBCS byte values and
 284             // a final trail byte. The state variable shiftHackDBCS
 285             // which is true for stateful DBCS encodings is used
 286             // to conditionally strip away per-mapping shift bytes
 287             // from the comparison of expected versus actual encoded
 288             // byte values. This hack can be eliminated in Mustang
 289             // when sun.io converters and their associated tests are
 290             // removed.
 291 
 292             boolean boundaryBytes = false;
 293             int bytesPC = bytesPerChar;
 294 
 295             if (shiftHackDBCS && bytesPerChar==4) {
 296                 bytesPC = 2;
 297                 boundaryBytes = true;
 298                 if ((out.get()) != (byte)0x0e) {
 299                     log.println("Missing lead byte");
 300                     return(false);
 301                 }
 302             }
 303 
 304             byte[] rbs = new byte[bytesPC];
 305             byte[] obs = new byte[bytesPC];
 306             int e = 0;
 307             while (refChars.hasRemaining()) {
 308                 int cp = getCodePoint(refChars);
 309                 // Skip lead shift ref byte for stateful encoding tests
 310                 if (shiftHackDBCS && bytesPC == 2)
 311                    refBytes.get();
 312                 refBytes.get(rbs);
 313                 out.get(obs);
 314                 boolean eq = true;
 315                 for (int i = 0; i < bytesPC; i++)
 316                     eq &= rbs[i] == obs[i];
 317                 if (!eq) {
 318                     log.println("      Error: "
 319                                 + printCodePoint(cp)
 320                                 + " --> "
 321                                 + printBytes(obs)
 322                                 + ", expected "
 323                                 + printBytes(rbs));
 324                     if (++e >= MAXERRS) {
 325                         log.println("      Too many errors, giving up");
 326                         break;
 327                     }
 328                 }
 329                 if (verbose) {
 330                     log.println("      "
 331                                 + printCodePoint(cp)
 332                                 + " --> "
 333                                 + printBytes(rbs));
 334                 }
 335 
 336                 // For stateful encodings ignore/exclude per-mapping
 337                 // trail bytes from byte comparison
 338                 if (shiftHackDBCS && bytesPC == 2)
 339                    refBytes.get();
 340             }
 341 
 342             if (shiftHackDBCS && boundaryBytes) {
 343                 if ((out.get()) != (byte)0x0f) {
 344                     log.println("Missing trail byte");
 345                     return(false);
 346                 }
 347             }
 348 
 349             if (e == 0 && (refBytes.hasRemaining() || out.hasRemaining())) {
 350                 // Paranoia: Didn't consume everything
 351                 throw new IllegalStateException();
 352             }
 353 
 354             refBytes.rewind();
 355             refChars.rewind();
 356             return (e == 0);
 357         }
 358 
 359         private boolean run(int mode) throws Exception {
 360             log.println("  " + bytesPerChar
 361                         + " byte" + plural(bytesPerChar) + "/char");
 362 
 363             if (dRefBytes.capacity() < refBytes.capacity()) {
 364                 dRefBytes = ByteBuffer.allocateDirect(refBytes.capacity());
 365             }
 366             if (dRefChars.capacity() < refChars.capacity()) {
 367                 dRefChars = ByteBuffer.allocateDirect(refChars.capacity()*2)
 368                                       .asCharBuffer();
 369             }
 370             refBytes.flip();
 371             refChars.flip();
 372             dRefBytes.clear();
 373             dRefChars.clear();
 374 
 375             dRefBytes.put(refBytes).flip();
 376             dRefChars.put(refChars).flip();
 377             refBytes.flip();
 378             refChars.flip();
 379 
 380             boolean rv = true;
 381             if (mode != ENCODE) {
 382                 rv &= decode(refBytes, refChars);
 383                 rv &= decode(dRefBytes, dRefChars);
 384             }
 385             if (mode != DECODE) {
 386                 rv &= encode(refBytes, refChars);
 387                 rv &= encode(dRefBytes, dRefChars);
 388             }
 389             return rv;
 390         }
 391 
 392     }
 393 
 394     // Maximum bytes/char being tested
 395     private int maxBytesPerChar = 0;
 396 
 397     // Tests, indexed by bytesPerChar - 1
 398     private Test[] tests;
 399 
 400     private void clearTests() {
 401         maxBytesPerChar = 0;
 402         tests = new Test[0];
 403     }
 404 
 405     // Find the test for the given bytes/char value,
 406     // expanding the test array if needed
 407     //
 408     private Test testFor(int bpc) {
 409         if (bpc > maxBytesPerChar) {
 410             Test[] ts = new Test[bpc];
 411             System.arraycopy(tests, 0, ts, 0, maxBytesPerChar);
 412             for (int i = maxBytesPerChar; i < bpc; i++)
 413                 ts[i] = new Test(i + 1);
 414             tests = ts;
 415             maxBytesPerChar = bpc;
 416         }
 417         return tests[bpc - 1];
 418     }
 419 
 420     // Compute the name of the test file for the given encoding and mode.  If
 421     // the file exists then return its name, otherwise return null.
 422     //
 423     private File testFile(String encoding, int mode) {
 424         File f = new File(testRootDir, encoding + extension[mode]);
 425         if (!f.exists())
 426             return null;
 427         return f;
 428     }
 429 
 430     // Parse the given b2c file and load up the required test objects
 431     //
 432     private void loadTests(File f)
 433         throws Exception
 434     {
 435         clearTests();
 436         FileInputStream in = new FileInputStream(f);
 437         try {
 438             Parser p = new Parser(in);
 439             Entry e = new Entry();
 440 
 441             while ((e = (Entry)p.next(e)) != null) {
 442                 if (e.cp2 != 0)
 443                     continue;  // skip composite (base+cc) for now
 444                 byte[] bs = e.bb;
 445                 char[] cc = Character.toChars(e.cp);
 446                 testFor(bs.length).put(bs, cc);
 447             }
 448             shiftHackDBCS = p.isStateful();
 449         } finally {
 450             in.close();
 451         }
 452     }
 453 
 454     private boolean run() throws Exception {
 455         encoder
 456             .onUnmappableCharacter(CodingErrorAction.REPLACE)
 457             .onMalformedInput(CodingErrorAction.REPLACE);
 458         decoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
 459             .onMalformedInput(CodingErrorAction.REPLACE);
 460         boolean rv = true;
 461 
 462         log.println();
 463         log.println(cs.name() + " (" + encoding + ")");
 464 
 465         // Outer loop runs three passes: roundtrip, irreversible encodings,
 466         // and then irreversible decodings
 467         for (int mode = ROUNDTRIP; mode <= DECODE; mode++) {
 468             File f = testFile(encoding, mode);
 469             if (f == null)
 470                 continue;
 471             loadTests(f);
 472             for (int i = 0; i < maxBytesPerChar; i++)
 473                 rv &= tests[i].run(mode);
 474         }
 475         return rv;
 476     }
 477 
 478     // For debugging: java CoderTest [-v] foo.b2c bar.b2c ...
 479     //
 480     public static void main(String args[])
 481         throws Exception
 482     {
 483         File d = new File(System.getProperty("test.src", "."));
 484         String[] av = (args.length != 0) ? args : d.list();
 485         int errors = 0;
 486         int tested = 0;
 487         int skipped = 0;
 488 
 489         for (int i = 0; i < av.length; i++) {
 490             String a = av[i];
 491             if (a.equals("-v")) {
 492                 verbose = true;
 493                 continue;
 494             }
 495             if (a.endsWith(".b2c")) {
 496                 String encoding = a.substring(0, a.length() - 4);
 497 
 498                 if (!Charset.isSupported(encoding)) {
 499                     log.println();
 500                     log.println("Not supported: " + encoding);
 501                     skipped++;
 502                     continue;
 503                 }
 504                 tested++;
 505                 if (!new CoderTest(encoding).run())
 506                     errors++;
 507             }
 508         }
 509 
 510         log.println();
 511         log.println(tested + " charset" + plural(tested) + " tested, "
 512                     + skipped + " not supported");
 513         log.println();
 514         if (errors > 0)
 515             throw new Exception("Errors detected in "
 516                                 + errors + " charset" + plural(errors));
 517 
 518     }
 519 }