1 /*
   2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 /*
  24  * @test
  25  * @bug  4221795 6565620 6959267 7070436 7198195 8032446 8221431
  26  * @summary Confirm Normalizer's fundamental behavior
  27  * @library /lib/testlibrary/java/lang
  28  * @modules java.base/sun.text java.base/sun.text.normalizer
  29  * @compile -XDignore.symbol.file ConformanceTest.java
  30  * @run main/timeout=3000 ConformanceTest
  31  */
  32 
  33 import java.io.BufferedReader;
  34 import java.io.File;
  35 import java.io.FileInputStream;
  36 import java.io.InputStreamReader;
  37 import java.lang.reflect.Method;
  38 import java.nio.charset.Charset;
  39 import java.nio.charset.CharsetDecoder;
  40 import java.util.BitSet;
  41 import java.util.StringTokenizer;
  42 
  43 import sun.text.normalizer.NormalizerBase;
  44 import sun.text.normalizer.NormalizerImpl;
  45 
  46 /*
  47  * Conformance test for java.text.Normalizer and sun.text.Normalizer.
  48  */
  49 public class ConformanceTest {
  50 
  51     //
  52     // Options to be used with sun.text.Normalizer
  53     //
  54 
  55     /*
  56      * Default Unicode 3.2.0 normalization. (Provided for IDNA/StringPrep)
  57      *
  58      *   - Without Corrigendum 4 fix
  59      *     (Different from ICU4J 3.2's Normalizer.)
  60      *   - Without Public Review Issue #29 fix
  61      *     (Different from ICU4J 3.2's Normalizer.)
  62      */
  63     private static final int UNICODE_3_2_0 = sun.text.Normalizer.UNICODE_3_2;
  64 
  65     /*
  66      * Original Unicode 3.2.0 normalization. (Provided for testing only)
  67      *
  68      *   - With Corrigendum 4 fix
  69      *   - With Public Revilew Issue #29 fix
  70      */
  71     private static final int UNICODE_3_2_0_ORIGINAL =
  72                                  NormalizerBase.UNICODE_3_2;
  73 
  74     /*
  75      * Default normalization. In JDK 6,
  76      *   - Unicode 4.0.0
  77      *   - With Corrigendum 4 fix
  78      *   - Without Public Review Issue #29 fix
  79      *
  80      * In JDK 7,
  81      *   - Unicode 5.1.0
  82      *     (Different from ICU4J 3.2's Normalizer.)
  83      *   - With Corrigendum 4 fix
  84      *   - With Public Review Issue #29 fix
  85      *
  86      * In JDK 8,
  87      *   - Unicode 6.1.0
  88      *   - With Corrigendum 4 fix
  89      *   - With Public Review Issue #29 fix
  90      *
  91      *  When we support Unicode 4.1.0 or later, we need to do normalization
  92      *  with Public Review Issue #29 fix. For more details of PRI #29, see
  93      *  http://unicode.org/review/pr-29.html .
  94      */
  95     private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
  96 
  97     //
  98     // Conformance test datafiles
  99     //
 100 
 101     /*
 102      * Conformance test datafile for Unicode 3.2.0 with Corrigendum4
 103      * corrections.
 104      * This testdata is for sun.text.Normalize(UNICODE_3_2)
 105      *
 106      * This is NOT an original Conformace test data. Some inconvenient test
 107      * cases are commented out. About corrigendum 4, please refer
 108      *   http://www.unicode.org/review/resolved-pri.html#pri29
 109      *
 110      */
 111     static final String DATA_3_2_0_CORRIGENDUM =
 112                             "NormalizationTest-3.2.0.Corrigendum4.txt";
 113 
 114     /*
 115      * Conformance test datafile for Unicode 3.2.0 without Corrigendum4
 116      * corrections. This is the original Conformace test data.
 117      *
 118      * This testdata is for sun.text.Normalize(UNICODE_3_2_IDNA)
 119      */
 120     static final String DATA_3_2_0 = "NormalizationTest-3.2.0.txt";
 121 
 122     /*
 123      * Conformance test datafile for the latest Unicode which is supported
 124      * by J2SE.
 125      * Unicode 4.0.0 is the latest version in JDK 5.0 and JDK 6. Unicode 5.1.0
 126      * in JDK 7, and 6.1.0 in JDK 8. This Unicode can be used via both
 127      * java.text.Normalizer and sun.text.Normalizer.
 128      *
 129      * This testdata is for sun.text.Normalize(UNICODE_LATEST)
 130      */
 131     static final String DATA_LATEST = "NormalizationTest-Latest.txt";
 132 
 133     /*
 134      * Conformance test datafile in ICU4J 3.2.
 135      */
 136     static final String DATA_ICU = "ICUNormalizationTest.txt";
 137 
 138     /*
 139      * Decorder
 140      */
 141     static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
 142 
 143     /*
 144      * List to pick up characters which are not listed in Part1
 145      */
 146     static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1);
 147 
 148     /*
 149      * Shortcuts
 150      */
 151     private static final java.text.Normalizer.Form NFC  =
 152         java.text.Normalizer.Form.NFC;
 153     private static final java.text.Normalizer.Form NFD  =
 154         java.text.Normalizer.Form.NFD;
 155     private static final java.text.Normalizer.Form NFKC =
 156         java.text.Normalizer.Form.NFKC;
 157     private static final java.text.Normalizer.Form NFKD =
 158         java.text.Normalizer.Form.NFKD;
 159     static final java.text.Normalizer.Form[] forms = {NFC, NFD, NFKC, NFKD};
 160 
 161 
 162     static TestNormalizer normalizer;
 163 
 164     public static void main(String[] args) throws Exception {
 165         ConformanceTest ct = new ConformanceTest();
 166         ct.test();
 167     }
 168 
 169     void test() throws Exception {
 170         normalizer = new testJavaNormalizer();
 171         test(DATA_LATEST, UNICODE_LATEST);
 172 
 173         normalizer = new testSunNormalizer();
 174         test(DATA_3_2_0_CORRIGENDUM, UNICODE_3_2_0);
 175         test(DATA_LATEST, UNICODE_LATEST);
 176         test(DATA_ICU, UNICODE_LATEST);
 177 
 178         /* Unconformity test */
 179 //      test(DATA_3_2_0, UNICODE_LATEST);
 180 //      test(DATA_LATEST, UNICODE_3_2_0);
 181     }
 182 
 183     /*
 184      * Main routine of conformance test
 185      */
 186     private static void test(String filename, int unicodeVer) throws Exception {
 187 
 188         File  f = filename.equals(DATA_LATEST) ?
 189             UCDFiles.NORMALIZATION_TEST.toFile() :
 190             new File(System.getProperty("test.src", "."), filename);
 191         FileInputStream fis = new FileInputStream(f);
 192         BufferedReader in =
 193             new BufferedReader(new InputStreamReader(fis, decoder));
 194 
 195         System.out.println("\nStart testing for " + normalizer.name +
 196             " with " + filename + " for options: " +
 197             (((unicodeVer & NormalizerBase.UNICODE_3_2) != 0) ?
 198                 "Unicode 3.2.0" : "the latest Unicode"));
 199 
 200         int lineNo = 0;
 201         String text;
 202         boolean part1test = false;
 203         boolean part1testExists = false;
 204         String[] columns = new String[6];
 205 
 206         while ((text = in.readLine()) != null) {
 207             lineNo ++;
 208 
 209             char c = text.charAt(0);
 210             if (c == '#') {
 211                 continue;
 212             } else if (c == '@') {
 213                 if (text.startsWith("@Part")) {
 214                     System.out.println("# Testing data in " + text);
 215 
 216                     if (text.startsWith("@Part1 ")) {
 217                         part1test = true;
 218                         part1testExists = true;
 219                     } else {
 220                         part1test = false;
 221                     }
 222 
 223                     continue;
 224                 }
 225             }
 226 
 227             prepareColumns(columns, text, filename, lineNo, part1test);
 228 
 229             testNFC(columns, unicodeVer, filename, lineNo);
 230             testNFD(columns, unicodeVer, filename, lineNo);
 231             testNFKC(columns, unicodeVer, filename, lineNo);
 232             testNFKD(columns, unicodeVer, filename, lineNo);
 233         }
 234 
 235         in.close();
 236         fis.close();
 237 
 238         if (part1testExists) {
 239             System.out.println("# Testing characters which are not listed in Part1");
 240             testRemainingChars(filename, unicodeVer);
 241             part1testExists = false;
 242         }
 243     }
 244 
 245     /*
 246      * Test for NFC
 247      *
 248      *   c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
 249      *   c4 ==  NFC(c4) ==  NFC(c5)
 250      */
 251     private static void testNFC(String[] c, int unicodeVer,
 252                                 String file, int line) throws Exception {
 253         test(2, c, 1, 3, NFC, unicodeVer, file, line);
 254         test(4, c, 4, 5, NFC, unicodeVer, file, line);
 255     }
 256 
 257     /*
 258      * Test for NFD
 259      *
 260      *   c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
 261      *   c5 ==  NFD(c4) ==  NFD(c5)
 262      */
 263     private static void testNFD(String[] c, int unicodeVer,
 264                                 String file, int line) throws Exception {
 265         test(3, c, 1, 3, NFD, unicodeVer, file, line);
 266         test(5, c, 4, 5, NFD, unicodeVer, file, line);
 267     }
 268 
 269     /*
 270      * Test for NFKC
 271      *
 272      *   c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
 273      */
 274     private static void testNFKC(String[] c, int unicodeVer,
 275                                  String file, int line) throws Exception {
 276         test(4, c, 1, 5, NFKC, unicodeVer, file, line);
 277     }
 278 
 279     /*
 280      * Test for NFKD
 281      *
 282      *   c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
 283      */
 284     private static void testNFKD(String[] c, int unicodeVer,
 285                                  String file, int line) throws Exception {
 286         test(5, c, 1, 5, NFKD, unicodeVer, file, line);
 287     }
 288 
 289     /*
 290      * Test for characters which aren't listed in Part1
 291      *
 292      *   X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
 293      */
 294     private static void testRemainingChars(String file,
 295                                            int unicodeVer) throws Exception {
 296         for (int i = Character.MIN_CODE_POINT;
 297              i <= Character.MAX_CODE_POINT;
 298              i++) {
 299             if (!charList.get(i)) {
 300                 String from = String.valueOf(Character.toChars(i));
 301                 String to;
 302 
 303                 for (int j = 0; j < forms.length; j++) {
 304                     java.text.Normalizer.Form form = forms[j];
 305 
 306                     to = normalizer.normalize(from, form, unicodeVer);
 307                     if (!from.equals(to)) {
 308                         error(form, from, from, to, file, -1);
 309 //                  } else {
 310 //                      okay(form, from, from, to, file, -1);
 311                     }
 312 
 313                     if (!normalizer.isNormalized(from, form, unicodeVer)) {
 314                         error(form, from, file, -1);
 315 //                  } else {
 316 //                      okay(form, from, file, -1);
 317                     }
 318                 }
 319             }
 320         }
 321     }
 322 
 323     /*
 324      * Test normalize() and isNormalized()
 325      */
 326     private static void test(int col, String[] c,
 327                              int FROM, int TO,
 328                              java.text.Normalizer.Form form, int unicodeVer,
 329                              String file, int line) throws Exception {
 330         for (int i = FROM; i <= TO; i++) {
 331             String got = normalizer.normalize(c[i], form, unicodeVer);
 332             if (!c[col].equals(got)) {
 333                 error(form, c[i], c[col], got, file, line);
 334 //          } else {
 335 //              okay(form, c[i], c[col], got, file, line);
 336             }
 337 
 338             /*
 339              * If the original String equals its normalized String, it means
 340              * that the original String is normalizerd. Thus, isNormalized()
 341              * should return true. And, vice versa!
 342              */
 343             if (c[col].equals(c[i])) {
 344                 if (!normalizer.isNormalized(c[i], form, unicodeVer)) {
 345                     error(form, c[i], file, line);
 346 //              } else {
 347 //                  okay(form, c[i], file, line);
 348                 }
 349             } else {
 350                 if (normalizer.isNormalized(c[i], form, unicodeVer)) {
 351                     error(form, c[i], file, line);
 352 //              } else {
 353 //                  okay(form, c[i], file, line);
 354                 }
 355             }
 356         }
 357     }
 358 
 359     /*
 360      * Generate an array of String from a line of conformance datafile.
 361      */
 362     private static void prepareColumns(String[] cols, String text,
 363                                            String file, int line,
 364                                            boolean part1test) throws Exception {
 365         int index = text.indexOf('#');
 366         if (index != -1) {
 367             text = text.substring(0, index);
 368         }
 369 
 370         StringTokenizer st = new StringTokenizer(text, ";");
 371         int tokenCount = st.countTokens();
 372         if (tokenCount < 5) {
 373              throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file);
 374         }
 375 
 376         StringBuffer sb = new StringBuffer();
 377         for (int i = 1; i <= 5; i++) {
 378             StringTokenizer tst = new StringTokenizer(st.nextToken(), " ");
 379 
 380             while (tst.hasMoreTokens()) {
 381                 int code = Integer.parseInt(tst.nextToken(), 16);
 382                 sb.append(Character.toChars(code));
 383             }
 384 
 385             cols[i] = sb.toString();
 386             sb.setLength(0);
 387         }
 388 
 389         if (part1test) {
 390             charList.set(cols[1].codePointAt(0));
 391         }
 392     }
 393 
 394     /*
 395      * Show an error message when normalize() didn't return the expected value.
 396      * (An exception is sometimes convenient. Therefore, it is commented out
 397      * for the moment.)
 398      */
 399     private static void error(java.text.Normalizer.Form form,
 400                               String from, String to, String got,
 401                               String file, int line) throws Exception {
 402         System.err.println("-\t" + form.toString() + ": normalize(" +
 403             toHexString(from) + ") doesn't equal <" + toHexString(to) +
 404             "> at line " + line + " in " + file + ". Got [" +
 405             toHexString(got) + "]");
 406         throw new RuntimeException("Normalization(" + form.toString() + ") failed");
 407     }
 408 
 409     /*
 410      * Show an error message when isNormalize() didn't return the expected
 411      * value.
 412      * (An exception is sometimes convenient. Therefore, it is commented out
 413      * for the moment.)
 414      */
 415     private static void error(java.text.Normalizer.Form form, String s,
 416                               String file, int line) throws Exception {
 417         System.err.println("\t" + form.toString() + ": isNormalized(" +
 418             toHexString(s) + ") returned the wrong value at line " + line +
 419             " in " + file);
 420         throw new RuntimeException("Normalization(" + form.toString() +") failed");
 421     }
 422 
 423     /*
 424      * (For debugging)
 425      * Shows a message when normalize() returned the expected value.
 426      */
 427     private static void okay(java.text.Normalizer.Form form,
 428                              String from, String to, String got,
 429                              String file, int line) {
 430         System.out.println("\t" + form.toString() + ": normalize(" +
 431             toHexString(from) + ") equals <" + toHexString(to) +
 432             "> at line " + line + " in " + file + ". Got [" +
 433             toHexString(got) + "]");
 434     }
 435 
 436     /*
 437      * (For debugging)
 438      * Shows a message when isNormalized() returned the expected value.
 439      */
 440     private static void okay(java.text.Normalizer.Form form, String s,
 441                              String file, int line) {
 442         System.out.println("\t" + form.toString() + ": isNormalized(" +
 443             toHexString(s) + ") returned the correct value at line " +
 444             line + " in " + file);
 445     }
 446 
 447     /*
 448      * Returns a spece-delimited hex String
 449      */
 450     private static String toHexString(String s) {
 451         StringBuffer sb = new StringBuffer(" ");
 452 
 453         for (int i = 0; i < s.length(); i++) {
 454             sb.append(Integer.toHexString(s.charAt(i)));
 455             sb.append(' ');
 456         }
 457 
 458         return sb.toString();
 459     }
 460 
 461     /*
 462      * Abstract class to call each Normalizer in java.text or sun.text.
 463      */
 464     private abstract class TestNormalizer {
 465         String name;
 466 
 467         TestNormalizer(String str) {
 468             name = str;
 469         }
 470 
 471         String getNormalizerName() {
 472             return name;
 473         }
 474 
 475         abstract String normalize(CharSequence cs,
 476                                   java.text.Normalizer.Form form,
 477                                   int option);
 478 
 479         abstract boolean isNormalized(CharSequence cs,
 480                                      java.text.Normalizer.Form form,
 481                                      int option);
 482     }
 483 
 484     /*
 485      * For java.text.Normalizer
 486      *   - normalize(CharSequence, Normalizer.Form)
 487      *   - isNormalized(CharSequence, Normalizer.Form)
 488      */
 489     private class testJavaNormalizer extends TestNormalizer {
 490         testJavaNormalizer() {
 491             super("java.text.Normalizer");
 492         }
 493 
 494         String normalize(CharSequence cs,
 495                          java.text.Normalizer.Form form,
 496                          int option) {
 497             return java.text.Normalizer.normalize(cs, form);
 498         }
 499 
 500         boolean isNormalized(CharSequence cs,
 501                              java.text.Normalizer.Form form,
 502                              int option) {
 503             return java.text.Normalizer.isNormalized(cs, form);
 504         }
 505     }
 506 
 507     /*
 508      * For sun.text.Normalizer
 509      *   - normalize(CharSequence, Normalizer.Form, int)
 510      *   - isNormalized(CharSequence, Normalizer.Form, int)
 511      */
 512     private class testSunNormalizer extends TestNormalizer {
 513         testSunNormalizer() {
 514             super("sun.text.Normalizer");
 515         }
 516 
 517         String normalize(CharSequence cs,
 518                          java.text.Normalizer.Form form,
 519                          int option) {
 520             return sun.text.Normalizer.normalize(cs, form, option);
 521         }
 522 
 523         boolean isNormalized(CharSequence cs,
 524                              java.text.Normalizer.Form form,
 525                              int option) {
 526             return sun.text.Normalizer.isNormalized(cs, form, option);
 527         }
 528     }
 529 }