1 /* 2 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 /* 24 * @test 25 * @bug 4221795 6565620 6959267 7070436 7198195 8032446 8221431 26 * @summary Confirm Normalizer's fundamental behavior 27 * @library /lib/testlibrary/java/lang 28 * @modules java.base/sun.text java.base/sun.text.normalizer 29 * @compile -XDignore.symbol.file ConformanceTest.java 30 * @run main/timeout=3000 ConformanceTest 31 */ 32 33 import java.io.BufferedReader; 34 import java.io.File; 35 import java.io.FileInputStream; 36 import java.io.InputStreamReader; 37 import java.lang.reflect.Method; 38 import java.nio.charset.Charset; 39 import java.nio.charset.CharsetDecoder; 40 import java.util.BitSet; 41 import java.util.StringTokenizer; 42 43 import sun.text.normalizer.NormalizerBase; 44 import sun.text.normalizer.NormalizerImpl; 45 46 /* 47 * Conformance test for java.text.Normalizer and sun.text.Normalizer. 48 */ 49 public class ConformanceTest { 50 51 // 52 // Options to be used with sun.text.Normalizer 53 // 54 55 /* 56 * Default Unicode 3.2.0 normalization. (Provided for IDNA/StringPrep) 57 * 58 * - Without Corrigendum 4 fix 59 * (Different from ICU4J 3.2's Normalizer.) 60 * - Without Public Review Issue #29 fix 61 * (Different from ICU4J 3.2's Normalizer.) 62 */ 63 private static final int UNICODE_3_2_0 = sun.text.Normalizer.UNICODE_3_2; 64 65 /* 66 * Original Unicode 3.2.0 normalization. (Provided for testing only) 67 * 68 * - With Corrigendum 4 fix 69 * - With Public Revilew Issue #29 fix 70 */ 71 private static final int UNICODE_3_2_0_ORIGINAL = 72 NormalizerBase.UNICODE_3_2; 73 74 /* 75 * Default normalization. In JDK 6, 76 * - Unicode 4.0.0 77 * - With Corrigendum 4 fix 78 * - Without Public Review Issue #29 fix 79 * 80 * In JDK 7, 81 * - Unicode 5.1.0 82 * (Different from ICU4J 3.2's Normalizer.) 83 * - With Corrigendum 4 fix 84 * - With Public Review Issue #29 fix 85 * 86 * In JDK 8, 87 * - Unicode 6.1.0 88 * - With Corrigendum 4 fix 89 * - With Public Review Issue #29 fix 90 * 91 * When we support Unicode 4.1.0 or later, we need to do normalization 92 * with Public Review Issue #29 fix. For more details of PRI #29, see 93 * http://unicode.org/review/pr-29.html . 94 */ 95 private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST; 96 97 // 98 // Conformance test datafiles 99 // 100 101 /* 102 * Conformance test datafile for Unicode 3.2.0 with Corrigendum4 103 * corrections. 104 * This testdata is for sun.text.Normalize(UNICODE_3_2) 105 * 106 * This is NOT an original Conformace test data. Some inconvenient test 107 * cases are commented out. About corrigendum 4, please refer 108 * http://www.unicode.org/review/resolved-pri.html#pri29 109 * 110 */ 111 static final String DATA_3_2_0_CORRIGENDUM = 112 "NormalizationTest-3.2.0.Corrigendum4.txt"; 113 114 /* 115 * Conformance test datafile for Unicode 3.2.0 without Corrigendum4 116 * corrections. This is the original Conformace test data. 117 * 118 * This testdata is for sun.text.Normalize(UNICODE_3_2_IDNA) 119 */ 120 static final String DATA_3_2_0 = "NormalizationTest-3.2.0.txt"; 121 122 /* 123 * Conformance test datafile for the latest Unicode which is supported 124 * by J2SE. 125 * Unicode 4.0.0 is the latest version in JDK 5.0 and JDK 6. Unicode 5.1.0 126 * in JDK 7, and 6.1.0 in JDK 8. This Unicode can be used via both 127 * java.text.Normalizer and sun.text.Normalizer. 128 * 129 * This testdata is for sun.text.Normalize(UNICODE_LATEST) 130 */ 131 static final String DATA_LATEST = "NormalizationTest-Latest.txt"; 132 133 /* 134 * Conformance test datafile in ICU4J 3.2. 135 */ 136 static final String DATA_ICU = "ICUNormalizationTest.txt"; 137 138 /* 139 * Decorder 140 */ 141 static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder(); 142 143 /* 144 * List to pick up characters which are not listed in Part1 145 */ 146 static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1); 147 148 /* 149 * Shortcuts 150 */ 151 private static final java.text.Normalizer.Form NFC = 152 java.text.Normalizer.Form.NFC; 153 private static final java.text.Normalizer.Form NFD = 154 java.text.Normalizer.Form.NFD; 155 private static final java.text.Normalizer.Form NFKC = 156 java.text.Normalizer.Form.NFKC; 157 private static final java.text.Normalizer.Form NFKD = 158 java.text.Normalizer.Form.NFKD; 159 static final java.text.Normalizer.Form[] forms = {NFC, NFD, NFKC, NFKD}; 160 161 162 static TestNormalizer normalizer; 163 164 public static void main(String[] args) throws Exception { 165 ConformanceTest ct = new ConformanceTest(); 166 ct.test(); 167 } 168 169 void test() throws Exception { 170 normalizer = new testJavaNormalizer(); 171 test(DATA_LATEST, UNICODE_LATEST); 172 173 normalizer = new testSunNormalizer(); 174 test(DATA_3_2_0_CORRIGENDUM, UNICODE_3_2_0); 175 test(DATA_LATEST, UNICODE_LATEST); 176 test(DATA_ICU, UNICODE_LATEST); 177 178 /* Unconformity test */ 179 // test(DATA_3_2_0, UNICODE_LATEST); 180 // test(DATA_LATEST, UNICODE_3_2_0); 181 } 182 183 /* 184 * Main routine of conformance test 185 */ 186 private static void test(String filename, int unicodeVer) throws Exception { 187 188 File f = filename.equals(DATA_LATEST) ? 189 UCDFiles.NORMALIZATION_TEST.toFile() : 190 new File(System.getProperty("test.src", "."), filename); 191 FileInputStream fis = new FileInputStream(f); 192 BufferedReader in = 193 new BufferedReader(new InputStreamReader(fis, decoder)); 194 195 System.out.println("\nStart testing for " + normalizer.name + 196 " with " + filename + " for options: " + 197 (((unicodeVer & NormalizerBase.UNICODE_3_2) != 0) ? 198 "Unicode 3.2.0" : "the latest Unicode")); 199 200 int lineNo = 0; 201 String text; 202 boolean part1test = false; 203 boolean part1testExists = false; 204 String[] columns = new String[6]; 205 206 while ((text = in.readLine()) != null) { 207 lineNo ++; 208 209 char c = text.charAt(0); 210 if (c == '#') { 211 continue; 212 } else if (c == '@') { 213 if (text.startsWith("@Part")) { 214 System.out.println("# Testing data in " + text); 215 216 if (text.startsWith("@Part1 ")) { 217 part1test = true; 218 part1testExists = true; 219 } else { 220 part1test = false; 221 } 222 223 continue; 224 } 225 } 226 227 prepareColumns(columns, text, filename, lineNo, part1test); 228 229 testNFC(columns, unicodeVer, filename, lineNo); 230 testNFD(columns, unicodeVer, filename, lineNo); 231 testNFKC(columns, unicodeVer, filename, lineNo); 232 testNFKD(columns, unicodeVer, filename, lineNo); 233 } 234 235 in.close(); 236 fis.close(); 237 238 if (part1testExists) { 239 System.out.println("# Testing characters which are not listed in Part1"); 240 testRemainingChars(filename, unicodeVer); 241 part1testExists = false; 242 } 243 } 244 245 /* 246 * Test for NFC 247 * 248 * c2 == NFC(c1) == NFC(c2) == NFC(c3) 249 * c4 == NFC(c4) == NFC(c5) 250 */ 251 private static void testNFC(String[] c, int unicodeVer, 252 String file, int line) throws Exception { 253 test(2, c, 1, 3, NFC, unicodeVer, file, line); 254 test(4, c, 4, 5, NFC, unicodeVer, file, line); 255 } 256 257 /* 258 * Test for NFD 259 * 260 * c3 == NFD(c1) == NFD(c2) == NFD(c3) 261 * c5 == NFD(c4) == NFD(c5) 262 */ 263 private static void testNFD(String[] c, int unicodeVer, 264 String file, int line) throws Exception { 265 test(3, c, 1, 3, NFD, unicodeVer, file, line); 266 test(5, c, 4, 5, NFD, unicodeVer, file, line); 267 } 268 269 /* 270 * Test for NFKC 271 * 272 * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 273 */ 274 private static void testNFKC(String[] c, int unicodeVer, 275 String file, int line) throws Exception { 276 test(4, c, 1, 5, NFKC, unicodeVer, file, line); 277 } 278 279 /* 280 * Test for NFKD 281 * 282 * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 283 */ 284 private static void testNFKD(String[] c, int unicodeVer, 285 String file, int line) throws Exception { 286 test(5, c, 1, 5, NFKD, unicodeVer, file, line); 287 } 288 289 /* 290 * Test for characters which aren't listed in Part1 291 * 292 * X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X) 293 */ 294 private static void testRemainingChars(String file, 295 int unicodeVer) throws Exception { 296 for (int i = Character.MIN_CODE_POINT; 297 i <= Character.MAX_CODE_POINT; 298 i++) { 299 if (!charList.get(i)) { 300 String from = String.valueOf(Character.toChars(i)); 301 String to; 302 303 for (int j = 0; j < forms.length; j++) { 304 java.text.Normalizer.Form form = forms[j]; 305 306 to = normalizer.normalize(from, form, unicodeVer); 307 if (!from.equals(to)) { 308 error(form, from, from, to, file, -1); 309 // } else { 310 // okay(form, from, from, to, file, -1); 311 } 312 313 if (!normalizer.isNormalized(from, form, unicodeVer)) { 314 error(form, from, file, -1); 315 // } else { 316 // okay(form, from, file, -1); 317 } 318 } 319 } 320 } 321 } 322 323 /* 324 * Test normalize() and isNormalized() 325 */ 326 private static void test(int col, String[] c, 327 int FROM, int TO, 328 java.text.Normalizer.Form form, int unicodeVer, 329 String file, int line) throws Exception { 330 for (int i = FROM; i <= TO; i++) { 331 String got = normalizer.normalize(c[i], form, unicodeVer); 332 if (!c[col].equals(got)) { 333 error(form, c[i], c[col], got, file, line); 334 // } else { 335 // okay(form, c[i], c[col], got, file, line); 336 } 337 338 /* 339 * If the original String equals its normalized String, it means 340 * that the original String is normalizerd. Thus, isNormalized() 341 * should return true. And, vice versa! 342 */ 343 if (c[col].equals(c[i])) { 344 if (!normalizer.isNormalized(c[i], form, unicodeVer)) { 345 error(form, c[i], file, line); 346 // } else { 347 // okay(form, c[i], file, line); 348 } 349 } else { 350 if (normalizer.isNormalized(c[i], form, unicodeVer)) { 351 error(form, c[i], file, line); 352 // } else { 353 // okay(form, c[i], file, line); 354 } 355 } 356 } 357 } 358 359 /* 360 * Generate an array of String from a line of conformance datafile. 361 */ 362 private static void prepareColumns(String[] cols, String text, 363 String file, int line, 364 boolean part1test) throws Exception { 365 int index = text.indexOf('#'); 366 if (index != -1) { 367 text = text.substring(0, index); 368 } 369 370 StringTokenizer st = new StringTokenizer(text, ";"); 371 int tokenCount = st.countTokens(); 372 if (tokenCount < 5) { 373 throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file); 374 } 375 376 StringBuffer sb = new StringBuffer(); 377 for (int i = 1; i <= 5; i++) { 378 StringTokenizer tst = new StringTokenizer(st.nextToken(), " "); 379 380 while (tst.hasMoreTokens()) { 381 int code = Integer.parseInt(tst.nextToken(), 16); 382 sb.append(Character.toChars(code)); 383 } 384 385 cols[i] = sb.toString(); 386 sb.setLength(0); 387 } 388 389 if (part1test) { 390 charList.set(cols[1].codePointAt(0)); 391 } 392 } 393 394 /* 395 * Show an error message when normalize() didn't return the expected value. 396 * (An exception is sometimes convenient. Therefore, it is commented out 397 * for the moment.) 398 */ 399 private static void error(java.text.Normalizer.Form form, 400 String from, String to, String got, 401 String file, int line) throws Exception { 402 System.err.println("-\t" + form.toString() + ": normalize(" + 403 toHexString(from) + ") doesn't equal <" + toHexString(to) + 404 "> at line " + line + " in " + file + ". Got [" + 405 toHexString(got) + "]"); 406 throw new RuntimeException("Normalization(" + form.toString() + ") failed"); 407 } 408 409 /* 410 * Show an error message when isNormalize() didn't return the expected 411 * value. 412 * (An exception is sometimes convenient. Therefore, it is commented out 413 * for the moment.) 414 */ 415 private static void error(java.text.Normalizer.Form form, String s, 416 String file, int line) throws Exception { 417 System.err.println("\t" + form.toString() + ": isNormalized(" + 418 toHexString(s) + ") returned the wrong value at line " + line + 419 " in " + file); 420 throw new RuntimeException("Normalization(" + form.toString() +") failed"); 421 } 422 423 /* 424 * (For debugging) 425 * Shows a message when normalize() returned the expected value. 426 */ 427 private static void okay(java.text.Normalizer.Form form, 428 String from, String to, String got, 429 String file, int line) { 430 System.out.println("\t" + form.toString() + ": normalize(" + 431 toHexString(from) + ") equals <" + toHexString(to) + 432 "> at line " + line + " in " + file + ". Got [" + 433 toHexString(got) + "]"); 434 } 435 436 /* 437 * (For debugging) 438 * Shows a message when isNormalized() returned the expected value. 439 */ 440 private static void okay(java.text.Normalizer.Form form, String s, 441 String file, int line) { 442 System.out.println("\t" + form.toString() + ": isNormalized(" + 443 toHexString(s) + ") returned the correct value at line " + 444 line + " in " + file); 445 } 446 447 /* 448 * Returns a spece-delimited hex String 449 */ 450 private static String toHexString(String s) { 451 StringBuffer sb = new StringBuffer(" "); 452 453 for (int i = 0; i < s.length(); i++) { 454 sb.append(Integer.toHexString(s.charAt(i))); 455 sb.append(' '); 456 } 457 458 return sb.toString(); 459 } 460 461 /* 462 * Abstract class to call each Normalizer in java.text or sun.text. 463 */ 464 private abstract class TestNormalizer { 465 String name; 466 467 TestNormalizer(String str) { 468 name = str; 469 } 470 471 String getNormalizerName() { 472 return name; 473 } 474 475 abstract String normalize(CharSequence cs, 476 java.text.Normalizer.Form form, 477 int option); 478 479 abstract boolean isNormalized(CharSequence cs, 480 java.text.Normalizer.Form form, 481 int option); 482 } 483 484 /* 485 * For java.text.Normalizer 486 * - normalize(CharSequence, Normalizer.Form) 487 * - isNormalized(CharSequence, Normalizer.Form) 488 */ 489 private class testJavaNormalizer extends TestNormalizer { 490 testJavaNormalizer() { 491 super("java.text.Normalizer"); 492 } 493 494 String normalize(CharSequence cs, 495 java.text.Normalizer.Form form, 496 int option) { 497 return java.text.Normalizer.normalize(cs, form); 498 } 499 500 boolean isNormalized(CharSequence cs, 501 java.text.Normalizer.Form form, 502 int option) { 503 return java.text.Normalizer.isNormalized(cs, form); 504 } 505 } 506 507 /* 508 * For sun.text.Normalizer 509 * - normalize(CharSequence, Normalizer.Form, int) 510 * - isNormalized(CharSequence, Normalizer.Form, int) 511 */ 512 private class testSunNormalizer extends TestNormalizer { 513 testSunNormalizer() { 514 super("sun.text.Normalizer"); 515 } 516 517 String normalize(CharSequence cs, 518 java.text.Normalizer.Form form, 519 int option) { 520 return sun.text.Normalizer.normalize(cs, form, option); 521 } 522 523 boolean isNormalized(CharSequence cs, 524 java.text.Normalizer.Form form, 525 int option) { 526 return sun.text.Normalizer.isNormalized(cs, form, option); 527 } 528 } 529 }