New test/jdk/java/text/Normalizer/ICUBasicTest.java

   1 /*
   2  * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 /*
  24  * @test
  25  * @bug  4221795 8032446 8174270
  26  * @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's
  27  * src/com/ibm/icu/dev/test and modified.
  28  * @modules java.base/sun.text java.base/jdk.internal.icu.text
  29  * @library /java/text/testlib
  30  * @compile -XDignore.symbol.file ICUBasicTest.java
  31  * @run main/timeout=30 ICUBasicTest
  32  */
  33 
  34 /*
  35  *******************************************************************************
  36  * Copyright (C) 1996-2004, International Business Machines Corporation and    *
  37  * others. All Rights Reserved.                                                *
  38  *******************************************************************************
  39  */
  40 
  41 import sun.text.Normalizer;
  42 import jdk.internal.icu.text.NormalizerBase;
  43 
  44 import static java.text.Normalizer.Form.*;
  45 
  46 public class ICUBasicTest extends IntlTest {
  47 
  48     public static void main(String[] args) throws Exception {
  49         new ICUBasicTest().run(args);
  50     }
  51 
  52     /*
  53      * Normalization modes
  54      */
  55     private static final NormalizerBase.Mode NFCmode  = NormalizerBase.NFC;
  56     private static final NormalizerBase.Mode NFDmode  = NormalizerBase.NFD;
  57     private static final NormalizerBase.Mode NFKCmode = NormalizerBase.NFKC;
  58     private static final NormalizerBase.Mode NFKDmode = NormalizerBase.NFKD;
  59     private static final NormalizerBase.Mode NONEmode = NormalizerBase.NONE;
  60 
  61     /*
  62      * Normalization options
  63      */
  64 
  65     /* Normal Unicode versions */
  66     private static final int UNICODE_3_2_0  = Normalizer.UNICODE_3_2;
  67     private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
  68 
  69     /*
  70      * Special cases for UAX #15 bug
  71      * see Unicode Public Review Issue #29
  72      * at http://www.unicode.org/review/resolved-pri.html#pri29
  73      *
  74      * Note:
  75      *   PRI #29 is supported in Unicode 4.1.0. Therefore, expected results are
  76      *   different for earlier Unicode versions.
  77      */
  78     public void TestComposition() {
  79 
  80         final TestCompositionCase cases[] = new TestCompositionCase[] {
  81             new TestCompositionCase(NFC, UNICODE_3_2_0,
  82                 "\u1100\u0300\u1161\u0327",
  83                 "\u1100\u0300\u1161\u0327"),
  84             new TestCompositionCase(NFC, UNICODE_LATEST,
  85                 "\u1100\u0300\u1161\u0327",
  86                 "\u1100\u0300\u1161\u0327"),
  87 
  88             new TestCompositionCase(NFC, UNICODE_3_2_0,
  89                 "\u1100\u0300\u1161\u0327\u11a8",
  90                 "\u1100\u0300\u1161\u0327\u11a8"),
  91             new TestCompositionCase(NFC, UNICODE_LATEST,
  92                 "\u1100\u0300\u1161\u0327\u11a8",
  93                 "\u1100\u0300\u1161\u0327\u11a8"),
  94 
  95             new TestCompositionCase(NFC, UNICODE_3_2_0,
  96                 "\uac00\u0300\u0327\u11a8",
  97                 "\uac00\u0327\u0300\u11a8"),
  98             new TestCompositionCase(NFC, UNICODE_LATEST,
  99                 "\uac00\u0300\u0327\u11a8",
 100                 "\uac00\u0327\u0300\u11a8"),
 101 
 102             new TestCompositionCase(NFC, UNICODE_3_2_0,
 103                 "\u0b47\u0300\u0b3e",
 104                 "\u0b47\u0300\u0b3e"),
 105             new TestCompositionCase(NFC, UNICODE_LATEST,
 106                 "\u0b47\u0300\u0b3e",
 107                 "\u0b47\u0300\u0b3e"),
 108         };
 109 
 110         String output;
 111         int i, length;
 112 
 113         for (i=0; i<cases.length; ++i) {
 114             output = Normalizer.normalize(cases[i].input,
 115                                           cases[i].form, cases[i].options);
 116             if (!output.equals(cases[i].expect)) {
 117                 errln("unexpected result for case " + i + ". Expected="
 118                       + cases[i].expect + ", Actual=" + output);
 119             } else if (verbose) {
 120                 logln("expected result for case " + i + ". Expected="
 121                       + cases[i].expect + ", Actual=" + output);
 122             }
 123         }
 124     }
 125 
 126     private final static class TestCompositionCase {
 127         public java.text.Normalizer.Form form;
 128         public int options;
 129         public String input, expect;
 130 
 131         TestCompositionCase(java.text.Normalizer.Form form,
 132                             int options,
 133                             String input,
 134                             String expect) {
 135             this.form    = form;
 136             this.options = options;
 137             this.input   = input;
 138             this.expect  = expect;
 139         }
 140     }
 141 
 142     /*
 143      * Added in order to detect a regression.
 144      */
 145     public void TestCombiningMarks() {
 146         String src      = "\u0f71\u0f72\u0f73\u0f74\u0f75";
 147         String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
 148         String result   = NormalizerBase.normalize(src, NFD);
 149 
 150         if (!expected.equals(result)) {
 151             errln("Reordering of combining marks failed. Expected: " +
 152                   toHexString(expected) + " Got: "+ toHexString(result));
 153         }
 154     }
 155 
 156     /*
 157      * Added in order to detect a regression.
 158      */
 159     public void TestBengali() throws Exception {
 160         String input = "\u09bc\u09be\u09cd\u09be";
 161         String output=NormalizerBase.normalize(input, NFC);
 162 
 163         if (!input.equals(output)) {
 164              errln("ERROR in NFC of string");
 165         }
 166         return;
 167     }
 168 
 169 
 170     /*
 171      * Added in order to detect a regression.
 172      */
 173     /**
 174      * Test for a problem found by Verisign.  Problem is that
 175      * characters at the start of a string are not put in canonical
 176      * order correctly by compose() if there is no starter.
 177      */
 178     public void TestVerisign() throws Exception {
 179         String[] inputs = {
 180             "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
 181             "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
 182         };
 183         String[] outputs = {
 184             "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
 185             "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
 186         };
 187 
 188         for (int i = 0; i < inputs.length; ++i) {
 189             String input = inputs[i];
 190             String output = outputs[i];
 191 
 192             String result = NormalizerBase.normalize(input, NFD);
 193             if (!result.equals(output)) {
 194                 errln("FAIL input: " + toHexString(input) + "\n" +
 195                       " decompose: " + toHexString(result) + "\n" +
 196                       "  expected: " + toHexString(output));
 197             }
 198 
 199             result = NormalizerBase.normalize(input, NFC);
 200             if (!result.equals(output)) {
 201                 errln("FAIL input: " + toHexString(input) + "\n" +
 202                       "   compose: " + toHexString(result) + "\n" +
 203                       "  expected: " + toHexString(output));
 204             }
 205         }
 206     }
 207 
 208     /**
 209      * Test for a problem that showed up just before ICU 1.6 release
 210      * having to do with combining characters with an index of zero.
 211      * Such characters do not participate in any canonical
 212      * decompositions.  However, having an index of zero means that
 213      * they all share one typeMask[] entry, that is, they all have to
 214      * map to the same canonical class, which is not the case, in
 215      * reality.
 216      */
 217     public void TestZeroIndex() throws Exception {
 218         String[] DATA = {
 219             // Expect col1 x COMPOSE_COMPAT => col2
 220             // Expect col2 x DECOMP => col3
 221             "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
 222             "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
 223             "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
 224             "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
 225             "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
 226         };
 227 
 228         for (int i=0; i<DATA.length; i+=3) {
 229             String a = DATA[i];
 230             String b = NormalizerBase.normalize(a, NFKC);
 231             String exp = DATA[i+1];
 232 
 233             if (b.equals(exp)) {
 234                 logln("Ok: " + toHexString(a) + " x COMPOSE_COMPAT => " +
 235                       toHexString(b));
 236             } else {
 237                 errln("FAIL: " + toHexString(a) + " x COMPOSE_COMPAT => " +
 238                       toHexString(b) + ", expect " + toHexString(exp));
 239             }
 240 
 241             a = NormalizerBase.normalize(b, NFD);
 242             exp = DATA[i+2];
 243             if (a.equals(exp)) {
 244                 logln("Ok: " + toHexString(b) + " x DECOMP => " +
 245                       toHexString(a));
 246             } else {
 247                 errln("FAIL: " + toHexString(b) + " x DECOMP => " +
 248                       toHexString(a) + ", expect " + toHexString(exp));
 249             }
 250         }
 251     }
 252 
 253     /**
 254      * Make sure characters in the CompositionExclusion.txt list do not get
 255      * composed to.
 256      */
 257     public void TestCompositionExclusion() throws Exception {
 258         // This list is generated from CompositionExclusion.txt.
 259         // Update whenever the normalizer tables are updated.  Note
 260         // that we test all characters listed, even those that can be
 261         // derived from the Unicode DB and are therefore commented
 262         // out.
 263 
 264         /*
 265          * kyuka's note:
 266          *   Original data seemed to be based on Unicode 3.0.0(the initial
 267          *   Composition Exclusions list) and seemed to have some mistakes.
 268          *   Updated in order to correct mistakes and to support Unicode 4.0.0.
 269          *   And, this table can be used also for Unicode 3.2.0.
 270          */
 271         String[][] EXCLUDED_UNICODE_3_2_0 = {
 272             {"\u0340"},
 273             {"\u0341"},
 274             {"\u0343"},
 275             {"\u0344"},
 276             {"\u0374"},
 277             {"\u037E"},
 278             {"\u0387"},
 279             {"\u0958"},
 280             {"\u0959", "\u095F"},
 281             {"\u09DC"},
 282             {"\u09DD"},
 283             {"\u09DF"},
 284             {"\u0A33"},
 285             {"\u0A36"},
 286             {"\u0A59", "\u0A5B"},
 287             {"\u0A5E"},
 288             {"\u0B5C"},
 289             {"\u0B5D"},
 290             {"\u0F43"},
 291             {"\u0F4D"},
 292             {"\u0F52"},
 293             {"\u0F57"},
 294             {"\u0F5C"},
 295             {"\u0F69"},
 296             {"\u0F73"},
 297             {"\u0F75"},
 298             {"\u0F76"},
 299             {"\u0F78"},
 300             {"\u0F81"},
 301             {"\u0F93"},
 302             {"\u0F9D"},
 303             {"\u0FA2"},
 304             {"\u0FA7"},
 305             {"\u0FAC"},
 306             {"\u0FB9"},
 307             {"\u1F71"},
 308             {"\u1F73"},
 309             {"\u1F75"},
 310             {"\u1F77"},
 311             {"\u1F79"},
 312             {"\u1F7B"},
 313             {"\u1F7D"},
 314             {"\u1FBB"},
 315             {"\u1FBE"},
 316             {"\u1FC9"},
 317             {"\u1FCB"},
 318             {"\u1FD3"},
 319             {"\u1FDB"},
 320             {"\u1FE3"},
 321             {"\u1FEB"},
 322             {"\u1FEE"},
 323             {"\u1FEF"},
 324             {"\u1FF9"},
 325             {"\u1FFB"},
 326             {"\u1FFD"},
 327             {"\u2000"},
 328             {"\u2001"},
 329             {"\u2126"},
 330             {"\u212A"},
 331             {"\u212B"},
 332             {"\u2329"},
 333             {"\u232A"},
 334             {"\u2ADC"},
 335             {"\uF900", "\uFA0D"},
 336             {"\uFA10"},
 337             {"\uFA12"},
 338             {"\uFA15", "\uFA1E"},
 339             {"\uFA20"},
 340             {"\uFA22"},
 341             {"\uFA25"},
 342             {"\uFA26"},
 343             {"\uFA2A", "\uFA2D"},
 344             {"\uFA30", "\uFA6A"},
 345             {"\uFB1D"},
 346             {"\uFB1F"},
 347             {"\uFB2A", "\uFB36"},
 348             {"\uFB38", "\uFB3C"},
 349             {"\uFB3E"},
 350             {"\uFB40"},
 351             {"\uFB41"},
 352             {"\uFB43"},
 353             {"\uFB44"},
 354             {"\uFB46", "\uFB4E"},
 355             {"\uD834\uDD5E", "\uD834\uDD64"},
 356             {"\uD834\uDDBB", "\uD834\uDDC0"},
 357             {"\uD87E\uDC00", "\uD87E\uDE1D"}
 358         };
 359 
 360         String[][] EXCLUDED_LATEST = {
 361 
 362         };
 363 
 364         for (int i = 0; i < EXCLUDED_UNICODE_3_2_0.length; ++i) {
 365             if (EXCLUDED_UNICODE_3_2_0[i].length == 1) {
 366                 checkCompositionExclusion_320(EXCLUDED_UNICODE_3_2_0[i][0]);
 367             } else {
 368                 int from, to;
 369                 from = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][0], 0);
 370                 to   = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][1], 0);
 371 
 372                 for (int j = from; j <= to; j++) {
 373                     checkCompositionExclusion_320(String.valueOf(Character.toChars(j)));
 374                 }
 375             }
 376         }
 377     }
 378 
 379     private void checkCompositionExclusion_320(String s) throws Exception {
 380         String a = String.valueOf(s);
 381         String b = NormalizerBase.normalize(a, NFKD);
 382         String c = NormalizerBase.normalize(b, NFC);
 383 
 384         if (c.equals(a)) {
 385             errln("FAIL: " + toHexString(a) + " x DECOMP_COMPAT => " +
 386                   toHexString(b) + " x COMPOSE => " +
 387                   toHexString(c) + " for the latest Unicode");
 388         } else if (verbose) {
 389             logln("Ok: " + toHexString(a) + " x DECOMP_COMPAT => " +
 390                   toHexString(b) + " x COMPOSE => " +
 391                   toHexString(c) + " for the latest Unicode");
 392         }
 393 
 394         b = NormalizerBase.normalize(a, NFKD, Normalizer.UNICODE_3_2);
 395         c = NormalizerBase.normalize(b, NFC, Normalizer.UNICODE_3_2);
 396         if (c.equals(a)) {
 397             errln("FAIL: " + toHexString(a) + " x DECOMP_COMPAT => " +
 398                   toHexString(b) + " x COMPOSE => " +
 399                   toHexString(c) + " for Unicode 3.2.0");
 400         } else if (verbose) {
 401             logln("Ok: " + toHexString(a) + " x DECOMP_COMPAT => " +
 402                   toHexString(b) + " x COMPOSE => " +
 403                   toHexString(c) + " for Unicode 3.2.0");
 404         }
 405     }
 406 
 407     public void TestTibetan() throws Exception {
 408         String[][] decomp = {
 409             { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
 410         };
 411         String[][] compose = {
 412             { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
 413         };
 414 
 415         staticTest(NFD, decomp, 1);
 416         staticTest(NFKD,decomp, 2);
 417         staticTest(NFC, compose, 1);
 418         staticTest(NFKC,compose, 2);
 419     }
 420 
 421     public void TestExplodingBase() throws Exception{
 422         // \u017f - Latin small letter long s
 423         // \u0307 - combining dot above
 424         // \u1e61 - Latin small letter s with dot above
 425         // \u1e9b - Latin small letter long s with dot above
 426         String[][] canon = {
 427             // Input                Decomposed              Composed
 428             { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"    },
 429             { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"    },
 430         };
 431         String[][] compat = {
 432             // Input                Decomposed              Composed
 433             { "\u017f",             "s",                    "s"           },
 434             { "\u1e9b",             "s\u0307",              "\u1e61"      },
 435         };
 436 
 437         staticTest(NFD, canon,  1);
 438         staticTest(NFC, canon,  2);
 439         staticTest(NFKD, compat, 1);
 440         staticTest(NFKC, compat, 2);
 441     }
 442 
 443     private String[][] canonTests = {
 444         // Input                Decomposed              Composed
 445 
 446         { "cat",                "cat",                  "cat"               },
 447         { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },
 448 
 449         // D-dot_above
 450         { "\u1e0a",             "D\u0307",              "\u1e0a"            },
 451 
 452         // D dot_above
 453         { "D\u0307",            "D\u0307",              "\u1e0a"            },
 454 
 455         // D-dot_below dot_above
 456         { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      },
 457 
 458         // D-dot_above dot_below
 459         { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      },
 460 
 461         // D dot_below dot_above
 462         { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      },
 463 
 464         // D dot_below cedilla dot_above
 465         { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"},
 466 
 467         // D dot_above ogonek dot_below
 468         { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"},
 469 
 470         // E-macron-grave
 471         { "\u1E14",             "E\u0304\u0300",        "\u1E14"            },
 472 
 473         // E-macron + grave
 474         { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            },
 475 
 476         // E-grave + macron
 477         { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      },
 478 
 479         // angstrom_sign
 480         { "\u212b",             "A\u030a",              "\u00c5"            },
 481 
 482         // A-ring
 483         { "\u00c5",             "A\u030a",              "\u00c5"            },
 484         { "\u00c4ffin",         "A\u0308ffin",          "\u00c4ffin"        },
 485         { "\u00c4\uFB03n",      "A\u0308\uFB03n",       "\u00c4\uFB03n"     },
 486 
 487         //updated with 3.0
 488         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
 489         { "\u00fd\uFB03n",      "y\u0301\uFB03n",       "\u00fd\uFB03n"     },
 490 
 491         { "Henry IV",           "Henry IV",             "Henry IV"          },
 492         { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },
 493 
 494         // ga(Zenkaku-Katakana)
 495         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
 496 
 497         // ka(Zenkaku-Katakana) + ten(Zenkaku)
 498         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
 499 
 500         // ka(Hankaku-Katakana) + ten(Hankaku-Katakana)
 501         { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      },
 502 
 503         // ka(Zenkaku-Katakana) + ten(Hankaku)
 504         { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      },
 505         // ka(Hankaku-Katakana) + ten(Zenkaku)
 506         { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      },
 507 
 508         { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
 509 
 510         { "\ud834\udd5e\ud834\udd57\ud834\udd65\ud834\udd5e",
 511           "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65",
 512           "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65" },
 513     };
 514 
 515     private String[][] compatTests = {
 516         // Input                Decomposed              Composed
 517 
 518         { "cat",                 "cat",                     "cat"           },
 519 
 520         // Alef-Lamed vs. Alef, Lamed
 521         { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     },
 522 
 523         { "\u00C4ffin",         "A\u0308ffin",          "\u00C4ffin"        },
 524 
 525         // ffi ligature -> f + f + i
 526         { "\u00C4\uFB03n",      "A\u0308ffin",          "\u00C4ffin"        },
 527 
 528         //updated for 3.0
 529         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
 530 
 531         // ffi ligature -> f + f + i
 532         { "\u00fd\uFB03n",      "y\u0301ffin",          "\u00fdffin"        },
 533 
 534         { "Henry IV",           "Henry IV",             "Henry IV"          },
 535         { "Henry \u2163",       "Henry IV",             "Henry IV"          },
 536 
 537         // ga(Zenkaku-Katakana)
 538         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
 539 
 540         // ka(Zenkaku-Katakana) + ten(Zenkaku)
 541         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
 542 
 543         // ka(Hankaku-Katakana) + ten(Zenkaku)
 544         { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            },
 545 
 546         /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
 547         // ka(Hankaku-Katakana) + ten(Hankaku)
 548         { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
 549 
 550         // ka(Zenkaku-Katakana) + ten(Hankaku)
 551         { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
 552     };
 553 
 554     public void TestNFD() throws Exception{
 555         staticTest(NFD, canonTests, 1);
 556     }
 557 
 558     public void TestNFC() throws Exception{
 559         staticTest(NFC, canonTests, 2);
 560     }
 561 
 562     public void TestNFKD() throws Exception{
 563         staticTest(NFKD, compatTests, 1);
 564     }
 565 
 566     public void TestNFKC() throws Exception{
 567         staticTest(NFKC, compatTests, 2);
 568     }
 569 
 570     private void staticTest(java.text.Normalizer.Form form,
 571                             String[][] tests,
 572                             int outCol) throws Exception {
 573         for (int i = 0; i < tests.length; i++) {
 574             String input = tests[i][0];
 575             logln("Normalizing '" + input + "' (" + toHexString(input) + ")" );
 576 
 577             String expect =tests[i][outCol];
 578             String output = java.text.Normalizer.normalize(input, form);
 579 
 580             if (!output.equals(expect)) {
 581                 errln("FAIL: case " + i
 582                     + " expected '" + expect + "' (" + toHexString(expect) + ")"
 583                     + " but got '" + output + "' (" + toHexString(output) + ")"
 584 );
 585             }
 586         }
 587     }
 588 
 589     // With Canonical decomposition, Hangul syllables should get decomposed
 590     // into Jamo, but Jamo characters should not be decomposed into
 591     // conjoining Jamo
 592     private String[][] hangulCanon = {
 593         // Input                Decomposed              Composed
 594         { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
 595         { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
 596     };
 597 
 598     public void TestHangulCompose() throws Exception{
 599         logln("Canonical composition...");
 600         staticTest(NFC, hangulCanon,  2);
 601      }
 602 
 603     public void TestHangulDecomp() throws Exception{
 604         logln("Canonical decomposition...");
 605         staticTest(NFD, hangulCanon, 1);
 606     }
 607 
 608 }