New test/jdk/java/text/Normalizer/ICUBasicTest.java

   1 /*
   2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 /*
  24  * @test
  25  * @bug  4221795 8032446
  26  * @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's
  27  * src/com/ibm/icu/dev/test and modified.
  28  * @modules java.base/sun.text java.base/sun.text.normalizer
  29  * @library /java/text/testlib
  30  * @compile -XDignore.symbol.file ICUBasicTest.java
  31  * @run main/timeout=30 ICUBasicTest
  32  */
  33 
  34 /*
  35  *******************************************************************************
  36  * Copyright (C) 1996-2004, International Business Machines Corporation and    *
  37  * others. All Rights Reserved.                                                *
  38  *******************************************************************************
  39  */
  40 
  41 import sun.text.Normalizer;
  42 import sun.text.normalizer.NormalizerBase;
  43 import sun.text.normalizer.NormalizerImpl;
  44 
  45 import static java.text.Normalizer.Form.*;
  46 import static sun.text.normalizer.NormalizerBase.Mode.*;
  47 
  48 public class ICUBasicTest extends IntlTest {
  49 
  50     public static void main(String[] args) throws Exception {
  51         new ICUBasicTest().run(args);
  52     }
  53 
  54     /*
  55      * Normalization modes
  56      */
  57     private static final NormalizerBase.Mode NFCmode  = NormalizerBase.NFC;
  58     private static final NormalizerBase.Mode NFDmode  = NormalizerBase.NFD;
  59     private static final NormalizerBase.Mode NFKCmode = NormalizerBase.NFKC;
  60     private static final NormalizerBase.Mode NFKDmode = NormalizerBase.NFKD;
  61     private static final NormalizerBase.Mode NONEmode = NormalizerBase.NONE;
  62 
  63     /*
  64      * Normalization options
  65      */
  66 
  67     /* Normal Unicode versions */
  68     private static final int UNICODE_3_2_0  = Normalizer.UNICODE_3_2;
  69     private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
  70 
  71     /*
  72      * Special cases for UAX #15 bug
  73      * see Unicode Public Review Issue #29
  74      * at http://www.unicode.org/review/resolved-pri.html#pri29
  75      *
  76      * Note:
  77      *   PRI #29 is supported in Unicode 4.1.0. Therefore, expected results are
  78      *   different for earlier Unicode versions.
  79      */
  80     public void TestComposition() {
  81 
  82         final TestCompositionCase cases[] = new TestCompositionCase[] {
  83             new TestCompositionCase(NFC, UNICODE_3_2_0,
  84                 "\u1100\u0300\u1161\u0327",
  85                 "\u1100\u0300\u1161\u0327"),
  86             new TestCompositionCase(NFC, UNICODE_LATEST,
  87                 "\u1100\u0300\u1161\u0327",
  88                 "\u1100\u0300\u1161\u0327"),
  89 
  90             new TestCompositionCase(NFC, UNICODE_3_2_0,
  91                 "\u1100\u0300\u1161\u0327\u11a8",
  92                 "\u1100\u0300\u1161\u0327\u11a8"),
  93             new TestCompositionCase(NFC, UNICODE_LATEST,
  94                 "\u1100\u0300\u1161\u0327\u11a8",
  95                 "\u1100\u0300\u1161\u0327\u11a8"),
  96 
  97             new TestCompositionCase(NFC, UNICODE_3_2_0,
  98                 "\uac00\u0300\u0327\u11a8",
  99                 "\uac00\u0327\u0300\u11a8"),
 100             new TestCompositionCase(NFC, UNICODE_LATEST,
 101                 "\uac00\u0300\u0327\u11a8",
 102                 "\uac00\u0327\u0300\u11a8"),
 103 
 104             new TestCompositionCase(NFC, UNICODE_3_2_0,
 105                 "\u0b47\u0300\u0b3e",
 106                 "\u0b47\u0300\u0b3e"),
 107             new TestCompositionCase(NFC, UNICODE_LATEST,
 108                 "\u0b47\u0300\u0b3e",
 109                 "\u0b47\u0300\u0b3e"),
 110         };
 111 
 112         String output;
 113         int i, length;
 114 
 115         for (i=0; i<cases.length; ++i) {
 116             output = Normalizer.normalize(cases[i].input,
 117                                           cases[i].form, cases[i].options);
 118             if (!output.equals(cases[i].expect)) {
 119                 errln("unexpected result for case " + i + ". Expected="
 120                       + cases[i].expect + ", Actual=" + output);
 121             } else if (verbose) {
 122                 logln("expected result for case " + i + ". Expected="
 123                       + cases[i].expect + ", Actual=" + output);
 124             }
 125         }
 126     }
 127 
 128     private final static class TestCompositionCase {
 129         public java.text.Normalizer.Form form;
 130         public int options;
 131         public String input, expect;
 132 
 133         TestCompositionCase(java.text.Normalizer.Form form,
 134                             int options,
 135                             String input,
 136                             String expect) {
 137             this.form    = form;
 138             this.options = options;
 139             this.input   = input;
 140             this.expect  = expect;
 141         }
 142     }
 143 
 144     /*
 145      * Added in order to detect a regression.
 146      */
 147     public void TestCombiningMarks() {
 148         String src      = "\u0f71\u0f72\u0f73\u0f74\u0f75";
 149         String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
 150         String result   = NormalizerBase.normalize(src, NFD);
 151 
 152         if (!expected.equals(result)) {
 153             errln("Reordering of combining marks failed. Expected: " +
 154                   toHexString(expected) + " Got: "+ toHexString(result));
 155         }
 156     }
 157 
 158     /*
 159      * Added in order to detect a regression.
 160      */
 161     public void TestBengali() throws Exception {
 162         String input = "\u09bc\u09be\u09cd\u09be";
 163         String output=NormalizerBase.normalize(input, NFC);
 164 
 165         if (!input.equals(output)) {
 166              errln("ERROR in NFC of string");
 167         }
 168         return;
 169     }
 170 
 171 
 172     /*
 173      * Added in order to detect a regression.
 174      */
 175     /**
 176      * Test for a problem found by Verisign.  Problem is that
 177      * characters at the start of a string are not put in canonical
 178      * order correctly by compose() if there is no starter.
 179      */
 180     public void TestVerisign() throws Exception {
 181         String[] inputs = {
 182             "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
 183             "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
 184         };
 185         String[] outputs = {
 186             "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
 187             "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
 188         };
 189 
 190         for (int i = 0; i < inputs.length; ++i) {
 191             String input = inputs[i];
 192             String output = outputs[i];
 193 
 194             String result = NormalizerBase.normalize(input, NFD);
 195             if (!result.equals(output)) {
 196                 errln("FAIL input: " + toHexString(input) + "\n" +
 197                       " decompose: " + toHexString(result) + "\n" +
 198                       "  expected: " + toHexString(output));
 199             }
 200 
 201             result = NormalizerBase.normalize(input, NFC);
 202             if (!result.equals(output)) {
 203                 errln("FAIL input: " + toHexString(input) + "\n" +
 204                       "   compose: " + toHexString(result) + "\n" +
 205                       "  expected: " + toHexString(output));
 206             }
 207         }
 208     }
 209 
 210     /**
 211      * Test for a problem that showed up just before ICU 1.6 release
 212      * having to do with combining characters with an index of zero.
 213      * Such characters do not participate in any canonical
 214      * decompositions.  However, having an index of zero means that
 215      * they all share one typeMask[] entry, that is, they all have to
 216      * map to the same canonical class, which is not the case, in
 217      * reality.
 218      */
 219     public void TestZeroIndex() throws Exception {
 220         String[] DATA = {
 221             // Expect col1 x COMPOSE_COMPAT => col2
 222             // Expect col2 x DECOMP => col3
 223             "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
 224             "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
 225             "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
 226             "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
 227             "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
 228         };
 229 
 230         for (int i=0; i<DATA.length; i+=3) {
 231             String a = DATA[i];
 232             String b = NormalizerBase.normalize(a, NFKC);
 233             String exp = DATA[i+1];
 234 
 235             if (b.equals(exp)) {
 236                 logln("Ok: " + toHexString(a) + " x COMPOSE_COMPAT => " +
 237                       toHexString(b));
 238             } else {
 239                 errln("FAIL: " + toHexString(a) + " x COMPOSE_COMPAT => " +
 240                       toHexString(b) + ", expect " + toHexString(exp));
 241             }
 242 
 243             a = NormalizerBase.normalize(b, NFD);
 244             exp = DATA[i+2];
 245             if (a.equals(exp)) {
 246                 logln("Ok: " + toHexString(b) + " x DECOMP => " +
 247                       toHexString(a));
 248             } else {
 249                 errln("FAIL: " + toHexString(b) + " x DECOMP => " +
 250                       toHexString(a) + ", expect " + toHexString(exp));
 251             }
 252         }
 253     }
 254 
 255     /**
 256      * Make sure characters in the CompositionExclusion.txt list do not get
 257      * composed to.
 258      */
 259     public void TestCompositionExclusion() throws Exception {
 260         // This list is generated from CompositionExclusion.txt.
 261         // Update whenever the normalizer tables are updated.  Note
 262         // that we test all characters listed, even those that can be
 263         // derived from the Unicode DB and are therefore commented
 264         // out.
 265 
 266         /*
 267          * kyuka's note:
 268          *   Original data seemed to be based on Unicode 3.0.0(the initial
 269          *   Composition Exclusions list) and seemed to have some mistakes.
 270          *   Updated in order to correct mistakes and to support Unicode 4.0.0.
 271          *   And, this table can be used also for Unicode 3.2.0.
 272          */
 273         String[][] EXCLUDED_UNICODE_3_2_0 = {
 274             {"\u0340"},
 275             {"\u0341"},
 276             {"\u0343"},
 277             {"\u0344"},
 278             {"\u0374"},
 279             {"\u037E"},
 280             {"\u0387"},
 281             {"\u0958"},
 282             {"\u0959", "\u095F"},
 283             {"\u09DC"},
 284             {"\u09DD"},
 285             {"\u09DF"},
 286             {"\u0A33"},
 287             {"\u0A36"},
 288             {"\u0A59", "\u0A5B"},
 289             {"\u0A5E"},
 290             {"\u0B5C"},
 291             {"\u0B5D"},
 292             {"\u0F43"},
 293             {"\u0F4D"},
 294             {"\u0F52"},
 295             {"\u0F57"},
 296             {"\u0F5C"},
 297             {"\u0F69"},
 298             {"\u0F73"},
 299             {"\u0F75"},
 300             {"\u0F76"},
 301             {"\u0F78"},
 302             {"\u0F81"},
 303             {"\u0F93"},
 304             {"\u0F9D"},
 305             {"\u0FA2"},
 306             {"\u0FA7"},
 307             {"\u0FAC"},
 308             {"\u0FB9"},
 309             {"\u1F71"},
 310             {"\u1F73"},
 311             {"\u1F75"},
 312             {"\u1F77"},
 313             {"\u1F79"},
 314             {"\u1F7B"},
 315             {"\u1F7D"},
 316             {"\u1FBB"},
 317             {"\u1FBE"},
 318             {"\u1FC9"},
 319             {"\u1FCB"},
 320             {"\u1FD3"},
 321             {"\u1FDB"},
 322             {"\u1FE3"},
 323             {"\u1FEB"},
 324             {"\u1FEE"},
 325             {"\u1FEF"},
 326             {"\u1FF9"},
 327             {"\u1FFB"},
 328             {"\u1FFD"},
 329             {"\u2000"},
 330             {"\u2001"},
 331             {"\u2126"},
 332             {"\u212A"},
 333             {"\u212B"},
 334             {"\u2329"},
 335             {"\u232A"},
 336             {"\u2ADC"},
 337             {"\uF900", "\uFA0D"},
 338             {"\uFA10"},
 339             {"\uFA12"},
 340             {"\uFA15", "\uFA1E"},
 341             {"\uFA20"},
 342             {"\uFA22"},
 343             {"\uFA25"},
 344             {"\uFA26"},
 345             {"\uFA2A", "\uFA2D"},
 346             {"\uFA30", "\uFA6A"},
 347             {"\uFB1D"},
 348             {"\uFB1F"},
 349             {"\uFB2A", "\uFB36"},
 350             {"\uFB38", "\uFB3C"},
 351             {"\uFB3E"},
 352             {"\uFB40"},
 353             {"\uFB41"},
 354             {"\uFB43"},
 355             {"\uFB44"},
 356             {"\uFB46", "\uFB4E"},
 357             {"\uD834\uDD5E", "\uD834\uDD64"},
 358             {"\uD834\uDDBB", "\uD834\uDDC0"},
 359             {"\uD87E\uDC00", "\uD87E\uDE1D"}
 360         };
 361 
 362         String[][] EXCLUDED_LATEST = {
 363 
 364         };
 365 
 366         for (int i = 0; i < EXCLUDED_UNICODE_3_2_0.length; ++i) {
 367             if (EXCLUDED_UNICODE_3_2_0[i].length == 1) {
 368                 checkCompositionExclusion_320(EXCLUDED_UNICODE_3_2_0[i][0]);
 369             } else {
 370                 int from, to;
 371                 from = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][0], 0);
 372                 to   = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][1], 0);
 373 
 374                 for (int j = from; j <= to; j++) {
 375                     checkCompositionExclusion_320(String.valueOf(Character.toChars(j)));
 376                 }
 377             }
 378         }
 379     }
 380 
 381     private void checkCompositionExclusion_320(String s) throws Exception {
 382         String a = String.valueOf(s);
 383         String b = NormalizerBase.normalize(a, NFKD);
 384         String c = NormalizerBase.normalize(b, NFC);
 385 
 386         if (c.equals(a)) {
 387             errln("FAIL: " + toHexString(a) + " x DECOMP_COMPAT => " +
 388                   toHexString(b) + " x COMPOSE => " +
 389                   toHexString(c) + " for the latest Unicode");
 390         } else if (verbose) {
 391             logln("Ok: " + toHexString(a) + " x DECOMP_COMPAT => " +
 392                   toHexString(b) + " x COMPOSE => " +
 393                   toHexString(c) + " for the latest Unicode");
 394         }
 395 
 396         b = NormalizerBase.normalize(a, NFKD, Normalizer.UNICODE_3_2);
 397         c = NormalizerBase.normalize(b, NFC, Normalizer.UNICODE_3_2);
 398         if (c.equals(a)) {
 399             errln("FAIL: " + toHexString(a) + " x DECOMP_COMPAT => " +
 400                   toHexString(b) + " x COMPOSE => " +
 401                   toHexString(c) + " for Unicode 3.2.0");
 402         } else if (verbose) {
 403             logln("Ok: " + toHexString(a) + " x DECOMP_COMPAT => " +
 404                   toHexString(b) + " x COMPOSE => " +
 405                   toHexString(c) + " for Unicode 3.2.0");
 406         }
 407     }
 408 
 409     public void TestTibetan() throws Exception {
 410         String[][] decomp = {
 411             { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
 412         };
 413         String[][] compose = {
 414             { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
 415         };
 416 
 417         staticTest(NFD, decomp, 1);
 418         staticTest(NFKD,decomp, 2);
 419         staticTest(NFC, compose, 1);
 420         staticTest(NFKC,compose, 2);
 421     }
 422 
 423     public void TestExplodingBase() throws Exception{
 424         // \u017f - Latin small letter long s
 425         // \u0307 - combining dot above
 426         // \u1e61 - Latin small letter s with dot above
 427         // \u1e9b - Latin small letter long s with dot above
 428         String[][] canon = {
 429             // Input                Decomposed              Composed
 430             { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"    },
 431             { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"    },
 432         };
 433         String[][] compat = {
 434             // Input                Decomposed              Composed
 435             { "\u017f",             "s",                    "s"           },
 436             { "\u1e9b",             "s\u0307",              "\u1e61"      },
 437         };
 438 
 439         staticTest(NFD, canon,  1);
 440         staticTest(NFC, canon,  2);
 441         staticTest(NFKD, compat, 1);
 442         staticTest(NFKC, compat, 2);
 443     }
 444 
 445     private String[][] canonTests = {
 446         // Input                Decomposed              Composed
 447 
 448         { "cat",                "cat",                  "cat"               },
 449         { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },
 450 
 451         // D-dot_above
 452         { "\u1e0a",             "D\u0307",              "\u1e0a"            },
 453 
 454         // D dot_above
 455         { "D\u0307",            "D\u0307",              "\u1e0a"            },
 456 
 457         // D-dot_below dot_above
 458         { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      },
 459 
 460         // D-dot_above dot_below
 461         { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      },
 462 
 463         // D dot_below dot_above
 464         { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      },
 465 
 466         // D dot_below cedilla dot_above
 467         { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"},
 468 
 469         // D dot_above ogonek dot_below
 470         { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"},
 471 
 472         // E-macron-grave
 473         { "\u1E14",             "E\u0304\u0300",        "\u1E14"            },
 474 
 475         // E-macron + grave
 476         { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            },
 477 
 478         // E-grave + macron
 479         { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      },
 480 
 481         // angstrom_sign
 482         { "\u212b",             "A\u030a",              "\u00c5"            },
 483 
 484         // A-ring
 485         { "\u00c5",             "A\u030a",              "\u00c5"            },
 486         { "\u00c4ffin",         "A\u0308ffin",          "\u00c4ffin"        },
 487         { "\u00c4\uFB03n",      "A\u0308\uFB03n",       "\u00c4\uFB03n"     },
 488 
 489         //updated with 3.0
 490         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
 491         { "\u00fd\uFB03n",      "y\u0301\uFB03n",       "\u00fd\uFB03n"     },
 492 
 493         { "Henry IV",           "Henry IV",             "Henry IV"          },
 494         { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },
 495 
 496         // ga(Zenkaku-Katakana)
 497         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
 498 
 499         // ka(Zenkaku-Katakana) + ten(Zenkaku)
 500         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
 501 
 502         // ka(Hankaku-Katakana) + ten(Hankaku-Katakana)
 503         { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      },
 504 
 505         // ka(Zenkaku-Katakana) + ten(Hankaku)
 506         { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      },
 507         // ka(Hankaku-Katakana) + ten(Zenkaku)
 508         { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      },
 509 
 510         { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
 511 
 512         { "\ud834\udd5e\ud834\udd57\ud834\udd65\ud834\udd5e",
 513           "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65",
 514           "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65" },
 515     };
 516 
 517     private String[][] compatTests = {
 518         // Input                Decomposed              Composed
 519 
 520         { "cat",                 "cat",                     "cat"           },
 521 
 522         // Alef-Lamed vs. Alef, Lamed
 523         { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     },
 524 
 525         { "\u00C4ffin",         "A\u0308ffin",          "\u00C4ffin"        },
 526 
 527         // ffi ligature -> f + f + i
 528         { "\u00C4\uFB03n",      "A\u0308ffin",          "\u00C4ffin"        },
 529 
 530         //updated for 3.0
 531         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
 532 
 533         // ffi ligature -> f + f + i
 534         { "\u00fd\uFB03n",      "y\u0301ffin",          "\u00fdffin"        },
 535 
 536         { "Henry IV",           "Henry IV",             "Henry IV"          },
 537         { "Henry \u2163",       "Henry IV",             "Henry IV"          },
 538 
 539         // ga(Zenkaku-Katakana)
 540         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
 541 
 542         // ka(Zenkaku-Katakana) + ten(Zenkaku)
 543         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
 544 
 545         // ka(Hankaku-Katakana) + ten(Zenkaku)
 546         { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            },
 547 
 548         /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
 549         // ka(Hankaku-Katakana) + ten(Hankaku)
 550         { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
 551 
 552         // ka(Zenkaku-Katakana) + ten(Hankaku)
 553         { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
 554     };
 555 
 556     public void TestNFD() throws Exception{
 557         staticTest(NFD, canonTests, 1);
 558     }
 559 
 560     public void TestNFC() throws Exception{
 561         staticTest(NFC, canonTests, 2);
 562     }
 563 
 564     public void TestNFKD() throws Exception{
 565         staticTest(NFKD, compatTests, 1);
 566     }
 567 
 568     public void TestNFKC() throws Exception{
 569         staticTest(NFKC, compatTests, 2);
 570     }
 571 
 572     private void staticTest(java.text.Normalizer.Form form,
 573                             String[][] tests,
 574                             int outCol) throws Exception {
 575         for (int i = 0; i < tests.length; i++) {
 576             String input = tests[i][0];
 577             logln("Normalizing '" + input + "' (" + toHexString(input) + ")" );
 578 
 579             String expect =tests[i][outCol];
 580             String output = java.text.Normalizer.normalize(input, form);
 581 
 582             if (!output.equals(expect)) {
 583                 errln("FAIL: case " + i
 584                     + " expected '" + expect + "' (" + toHexString(expect) + ")"
 585                     + " but got '" + output + "' (" + toHexString(output) + ")"
 586 );
 587             }
 588         }
 589     }
 590 
 591     // With Canonical decomposition, Hangul syllables should get decomposed
 592     // into Jamo, but Jamo characters should not be decomposed into
 593     // conjoining Jamo
 594     private String[][] hangulCanon = {
 595         // Input                Decomposed              Composed
 596         { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
 597         { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
 598     };
 599 
 600     public void TestHangulCompose() throws Exception{
 601         logln("Canonical composition...");
 602         staticTest(NFC, hangulCanon,  2);
 603      }
 604 
 605     public void TestHangulDecomp() throws Exception{
 606         logln("Canonical decomposition...");
 607         staticTest(NFD, hangulCanon, 1);
 608     }
 609 
 610 }