1 /* 2 * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 /* 24 * @test 25 * @bug 4221795 6565620 6959267 7070436 7198195 8032446 8174270 8221431 8239383 26 * @summary Confirm Normalizer's fundamental behavior 27 * @library /lib/testlibrary/java/lang 28 * @modules java.base/sun.text java.base/jdk.internal.icu.text 29 * @compile -XDignore.symbol.file ConformanceTest.java 30 * @run main/timeout=3000 ConformanceTest 31 */ 32 33 import java.io.BufferedReader; 34 import java.io.File; 35 import java.io.FileInputStream; 36 import java.io.InputStreamReader; 37 import java.nio.charset.Charset; 38 import java.nio.charset.CharsetDecoder; 39 import java.util.BitSet; 40 import java.util.StringTokenizer; 41 42 import jdk.internal.icu.text.NormalizerBase; 43 44 /* 45 * Conformance test for java.text.Normalizer and sun.text.Normalizer. 46 */ 47 public class ConformanceTest { 48 49 // 50 // Options to be used with sun.text.Normalizer 51 // 52 53 /* 54 * Default Unicode 3.2.0 normalization. (Provided for IDNA/StringPrep) 55 * 56 * - Without Corrigendum 4 fix 57 * (Different from ICU4J 3.2's Normalizer.) 58 * - Without Public Review Issue #29 fix 59 * (Different from ICU4J 3.2's Normalizer.) 60 */ 61 private static final int UNICODE_3_2_0 = sun.text.Normalizer.UNICODE_3_2; 62 63 /* 64 * Original Unicode 3.2.0 normalization. (Provided for testing only) 65 * 66 * - With Corrigendum 4 fix 67 * - With Public Revilew Issue #29 fix 68 */ 69 private static final int UNICODE_3_2_0_ORIGINAL = 70 NormalizerBase.UNICODE_3_2; 71 72 /* 73 * Default normalization. In JDK 6, 74 * - Unicode 4.0.0 75 * - With Corrigendum 4 fix 76 * - Without Public Review Issue #29 fix 77 * 78 * In JDK 7, 79 * - Unicode 5.1.0 80 * (Different from ICU4J 3.2's Normalizer.) 81 * - With Corrigendum 4 fix 82 * - With Public Review Issue #29 fix 83 * 84 * In JDK 8, 85 * - Unicode 6.1.0 86 * - With Corrigendum 4 fix 87 * - With Public Review Issue #29 fix 88 * 89 * When we support Unicode 4.1.0 or later, we need to do normalization 90 * with Public Review Issue #29 fix. For more details of PRI #29, see 91 * http://unicode.org/review/pr-29.html . 92 */ 93 private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST; 94 95 // 96 // Conformance test datafiles 97 // 98 99 /* 100 * Conformance test datafile for Unicode 3.2.0 with Corrigendum4 101 * corrections. 102 * This testdata is for sun.text.Normalize(UNICODE_3_2) 103 * 104 * This is NOT an original Conformace test data. Some inconvenient test 105 * cases are commented out. About corrigendum 4, please refer 106 * http://www.unicode.org/review/resolved-pri.html#pri29 107 * 108 */ 109 static final String DATA_3_2_0_CORRIGENDUM = 110 "NormalizationTest-3.2.0.Corrigendum4.txt"; 111 112 /* 113 * Conformance test datafile for Unicode 3.2.0 without Corrigendum4 114 * corrections. This is the original Conformace test data. 115 * 116 * This testdata is for sun.text.Normalize(UNICODE_3_2_IDNA) 117 */ 118 static final String DATA_3_2_0 = "NormalizationTest-3.2.0.txt"; 119 120 /* 121 * Conformance test datafile for the latest Unicode which is supported 122 * by J2SE. 123 * Unicode 4.0.0 is the latest version in JDK 5.0 and JDK 6. Unicode 5.1.0 124 * in JDK 7, and 6.1.0 in JDK 8. This Unicode can be used via both 125 * java.text.Normalizer and sun.text.Normalizer. 126 * 127 * This testdata is for sun.text.Normalize(UNICODE_LATEST) 128 */ 129 static final String DATA_LATEST = "NormalizationTest.txt"; 130 131 /* 132 * Conformance test datafile in ICU4J 3.2. 133 */ 134 static final String DATA_ICU = "ICUNormalizationTest.txt"; 135 136 /* 137 * Decorder 138 */ 139 static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder(); 140 141 /* 142 * List to pick up characters which are not listed in Part1 143 */ 144 static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1); 145 146 /* 147 * Shortcuts 148 */ 149 private static final java.text.Normalizer.Form NFC = 150 java.text.Normalizer.Form.NFC; 151 private static final java.text.Normalizer.Form NFD = 152 java.text.Normalizer.Form.NFD; 153 private static final java.text.Normalizer.Form NFKC = 154 java.text.Normalizer.Form.NFKC; 155 private static final java.text.Normalizer.Form NFKD = 156 java.text.Normalizer.Form.NFKD; 157 static final java.text.Normalizer.Form[] forms = {NFC, NFD, NFKC, NFKD}; 158 159 160 static TestNormalizer normalizer; 161 162 public static void main(String[] args) throws Exception { 163 ConformanceTest ct = new ConformanceTest(); 164 ct.test(); 165 } 166 167 void test() throws Exception { 168 normalizer = new testJavaNormalizer(); 169 test(DATA_LATEST, UNICODE_LATEST); 170 171 normalizer = new testSunNormalizer(); 172 test(DATA_3_2_0_CORRIGENDUM, UNICODE_3_2_0); 173 test(DATA_LATEST, UNICODE_LATEST); 174 test(DATA_ICU, UNICODE_LATEST); 175 176 /* Unconformity test */ 177 // test(DATA_3_2_0, UNICODE_LATEST); 178 // test(DATA_LATEST, UNICODE_3_2_0); 179 } 180 181 /* 182 * Main routine of conformance test 183 */ 184 private static void test(String filename, int unicodeVer) throws Exception { 185 186 File f = filename.equals(DATA_LATEST) ? 187 UCDFiles.NORMALIZATION_TEST.toFile() : 188 new File(System.getProperty("test.src", "."), filename); 189 FileInputStream fis = new FileInputStream(f); 190 BufferedReader in = 191 new BufferedReader(new InputStreamReader(fis, decoder)); 192 193 System.out.println("\nStart testing for " + normalizer.name + 194 " with " + filename + " for options: " + 195 (((unicodeVer & NormalizerBase.UNICODE_3_2) != 0) ? 196 "Unicode 3.2.0" : "the latest Unicode")); 197 198 int lineNo = 0; 199 String text; 200 boolean part1test = false; 201 boolean part1testExists = false; 202 String[] columns = new String[6]; 203 204 while ((text = in.readLine()) != null) { 205 lineNo ++; 206 207 char c = text.charAt(0); 208 if (c == '#') { 209 continue; 210 } else if (c == '@') { 211 if (text.startsWith("@Part")) { 212 System.out.println("# Testing data in " + text); 213 214 if (text.startsWith("@Part1 ")) { 215 part1test = true; 216 part1testExists = true; 217 } else { 218 part1test = false; 219 } 220 221 continue; 222 } 223 } 224 225 prepareColumns(columns, text, filename, lineNo, part1test); 226 227 testNFC(columns, unicodeVer, filename, lineNo); 228 testNFD(columns, unicodeVer, filename, lineNo); 229 testNFKC(columns, unicodeVer, filename, lineNo); 230 testNFKD(columns, unicodeVer, filename, lineNo); 231 } 232 233 in.close(); 234 fis.close(); 235 236 if (part1testExists) { 237 System.out.println("# Testing characters which are not listed in Part1"); 238 testRemainingChars(filename, unicodeVer); 239 part1testExists = false; 240 } 241 } 242 243 /* 244 * Test for NFC 245 * 246 * c2 == NFC(c1) == NFC(c2) == NFC(c3) 247 * c4 == NFC(c4) == NFC(c5) 248 */ 249 private static void testNFC(String[] c, int unicodeVer, 250 String file, int line) throws Exception { 251 test(2, c, 1, 3, NFC, unicodeVer, file, line); 252 test(4, c, 4, 5, NFC, unicodeVer, file, line); 253 } 254 255 /* 256 * Test for NFD 257 * 258 * c3 == NFD(c1) == NFD(c2) == NFD(c3) 259 * c5 == NFD(c4) == NFD(c5) 260 */ 261 private static void testNFD(String[] c, int unicodeVer, 262 String file, int line) throws Exception { 263 test(3, c, 1, 3, NFD, unicodeVer, file, line); 264 test(5, c, 4, 5, NFD, unicodeVer, file, line); 265 } 266 267 /* 268 * Test for NFKC 269 * 270 * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 271 */ 272 private static void testNFKC(String[] c, int unicodeVer, 273 String file, int line) throws Exception { 274 test(4, c, 1, 5, NFKC, unicodeVer, file, line); 275 } 276 277 /* 278 * Test for NFKD 279 * 280 * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 281 */ 282 private static void testNFKD(String[] c, int unicodeVer, 283 String file, int line) throws Exception { 284 test(5, c, 1, 5, NFKD, unicodeVer, file, line); 285 } 286 287 /* 288 * Test for characters which aren't listed in Part1 289 * 290 * X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X) 291 */ 292 private static void testRemainingChars(String file, 293 int unicodeVer) throws Exception { 294 for (int i = Character.MIN_CODE_POINT; 295 i <= Character.MAX_CODE_POINT; 296 i++) { 297 if (!charList.get(i)) { 298 String from = String.valueOf(Character.toChars(i)); 299 String to; 300 301 for (int j = 0; j < forms.length; j++) { 302 java.text.Normalizer.Form form = forms[j]; 303 304 to = normalizer.normalize(from, form, unicodeVer); 305 if (!from.equals(to)) { 306 error(form, from, from, to, file, -1); 307 // } else { 308 // okay(form, from, from, to, file, -1); 309 } 310 311 if (!normalizer.isNormalized(from, form, unicodeVer)) { 312 error(form, from, file, -1); 313 // } else { 314 // okay(form, from, file, -1); 315 } 316 } 317 } 318 } 319 } 320 321 /* 322 * Test normalize() and isNormalized() 323 */ 324 private static void test(int col, String[] c, 325 int FROM, int TO, 326 java.text.Normalizer.Form form, int unicodeVer, 327 String file, int line) throws Exception { 328 for (int i = FROM; i <= TO; i++) { 329 String got = normalizer.normalize(c[i], form, unicodeVer); 330 if (!c[col].equals(got)) { 331 error(form, c[i], c[col], got, file, line); 332 // } else { 333 // okay(form, c[i], c[col], got, file, line); 334 } 335 336 /* 337 * If the original String equals its normalized String, it means 338 * that the original String is normalizerd. Thus, isNormalized() 339 * should return true. And, vice versa! 340 */ 341 if (c[col].equals(c[i])) { 342 if (!normalizer.isNormalized(c[i], form, unicodeVer)) { 343 error(form, c[i], file, line); 344 // } else { 345 // okay(form, c[i], file, line); 346 } 347 } else { 348 if (normalizer.isNormalized(c[i], form, unicodeVer)) { 349 error(form, c[i], file, line); 350 // } else { 351 // okay(form, c[i], file, line); 352 } 353 } 354 } 355 } 356 357 /* 358 * Generate an array of String from a line of conformance datafile. 359 */ 360 private static void prepareColumns(String[] cols, String text, 361 String file, int line, 362 boolean part1test) throws Exception { 363 int index = text.indexOf('#'); 364 if (index != -1) { 365 text = text.substring(0, index); 366 } 367 368 StringTokenizer st = new StringTokenizer(text, ";"); 369 int tokenCount = st.countTokens(); 370 if (tokenCount < 5) { 371 throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file); 372 } 373 374 StringBuffer sb = new StringBuffer(); 375 for (int i = 1; i <= 5; i++) { 376 StringTokenizer tst = new StringTokenizer(st.nextToken(), " "); 377 378 while (tst.hasMoreTokens()) { 379 int code = Integer.parseInt(tst.nextToken(), 16); 380 sb.append(Character.toChars(code)); 381 } 382 383 cols[i] = sb.toString(); 384 sb.setLength(0); 385 } 386 387 if (part1test) { 388 charList.set(cols[1].codePointAt(0)); 389 } 390 } 391 392 /* 393 * Show an error message when normalize() didn't return the expected value. 394 * (An exception is sometimes convenient. Therefore, it is commented out 395 * for the moment.) 396 */ 397 private static void error(java.text.Normalizer.Form form, 398 String from, String to, String got, 399 String file, int line) throws Exception { 400 System.err.println("-\t" + form.toString() + ": normalize(" + 401 toHexString(from) + ") doesn't equal <" + toHexString(to) + 402 "> at line " + line + " in " + file + ". Got [" + 403 toHexString(got) + "]"); 404 throw new RuntimeException("Normalization(" + form.toString() + ") failed"); 405 } 406 407 /* 408 * Show an error message when isNormalize() didn't return the expected 409 * value. 410 * (An exception is sometimes convenient. Therefore, it is commented out 411 * for the moment.) 412 */ 413 private static void error(java.text.Normalizer.Form form, String s, 414 String file, int line) throws Exception { 415 System.err.println("\t" + form.toString() + ": isNormalized(" + 416 toHexString(s) + ") returned the wrong value at line " + line + 417 " in " + file); 418 throw new RuntimeException("Normalization(" + form.toString() +") failed"); 419 } 420 421 /* 422 * (For debugging) 423 * Shows a message when normalize() returned the expected value. 424 */ 425 private static void okay(java.text.Normalizer.Form form, 426 String from, String to, String got, 427 String file, int line) { 428 System.out.println("\t" + form.toString() + ": normalize(" + 429 toHexString(from) + ") equals <" + toHexString(to) + 430 "> at line " + line + " in " + file + ". Got [" + 431 toHexString(got) + "]"); 432 } 433 434 /* 435 * (For debugging) 436 * Shows a message when isNormalized() returned the expected value. 437 */ 438 private static void okay(java.text.Normalizer.Form form, String s, 439 String file, int line) { 440 System.out.println("\t" + form.toString() + ": isNormalized(" + 441 toHexString(s) + ") returned the correct value at line " + 442 line + " in " + file); 443 } 444 445 /* 446 * Returns a spece-delimited hex String 447 */ 448 private static String toHexString(String s) { 449 StringBuffer sb = new StringBuffer(" "); 450 451 for (int i = 0; i < s.length(); i++) { 452 sb.append(Integer.toHexString(s.charAt(i))); 453 sb.append(' '); 454 } 455 456 return sb.toString(); 457 } 458 459 /* 460 * Abstract class to call each Normalizer in java.text or sun.text. 461 */ 462 private abstract class TestNormalizer { 463 String name; 464 465 TestNormalizer(String str) { 466 name = str; 467 } 468 469 String getNormalizerName() { 470 return name; 471 } 472 473 abstract String normalize(CharSequence cs, 474 java.text.Normalizer.Form form, 475 int option); 476 477 abstract boolean isNormalized(CharSequence cs, 478 java.text.Normalizer.Form form, 479 int option); 480 } 481 482 /* 483 * For java.text.Normalizer 484 * - normalize(CharSequence, Normalizer.Form) 485 * - isNormalized(CharSequence, Normalizer.Form) 486 */ 487 private class testJavaNormalizer extends TestNormalizer { 488 testJavaNormalizer() { 489 super("java.text.Normalizer"); 490 } 491 492 String normalize(CharSequence cs, 493 java.text.Normalizer.Form form, 494 int option) { 495 return java.text.Normalizer.normalize(cs, form); 496 } 497 498 boolean isNormalized(CharSequence cs, 499 java.text.Normalizer.Form form, 500 int option) { 501 return java.text.Normalizer.isNormalized(cs, form); 502 } 503 } 504 505 /* 506 * For sun.text.Normalizer 507 * - normalize(CharSequence, Normalizer.Form, int) 508 * - isNormalized(CharSequence, Normalizer.Form, int) 509 */ 510 private class testSunNormalizer extends TestNormalizer { 511 testSunNormalizer() { 512 super("sun.text.Normalizer"); 513 } 514 515 String normalize(CharSequence cs, 516 java.text.Normalizer.Form form, 517 int option) { 518 return sun.text.Normalizer.normalize(cs, form, option); 519 } 520 521 boolean isNormalized(CharSequence cs, 522 java.text.Normalizer.Form form, 523 int option) { 524 return sun.text.Normalizer.isNormalized(cs, form, option); 525 } 526 } 527 }