New test/java/text/Collator/Regression.java

   1 /*
   2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
  27  *      4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
  28  *      4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
  29  *      4133509 4139572 4141640 4179126 4179686 4244884 4663220
  30  * @library /java/text/testlib
  31  * @summary Regression tests for Collation and associated classes
  32  */
  33 /*
  34 (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
  35 (C) Copyright IBM Corp. 1996 - All Rights Reserved
  36 
  37   The original version of this source code and documentation is copyrighted and
  38 owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
  39 provided under terms of a License Agreement between Taligent and Sun. This
  40 technology is protected by multiple US and International patents. This notice and
  41 attribution to Taligent may not be removed.
  42   Taligent is a registered trademark of Taligent, Inc.
  43 */
  44 
  45 import java.text.*;
  46 import java.util.Locale;
  47 import java.util.Vector;
  48 
  49 
  50 public class Regression extends CollatorTest {
  51 
  52     public static void main(String[] args) throws Exception {
  53         new Regression().run(args);
  54     }
  55 
  56     // CollationElementIterator.reset() doesn't work
  57     //
  58     public void Test4048446() {
  59         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
  60         CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
  61 
  62         while ( i1.next() != CollationElementIterator.NULLORDER ) {
  63         }
  64         i1.reset();
  65 
  66         assertEqual(i1, i2);
  67     }
  68 
  69 
  70     // Collator -> rules -> Collator round-trip broken for expanding characters
  71     //
  72     public void Test4051866() throws ParseException {
  73         // Build a collator containing expanding characters
  74         RuleBasedCollator c1 = new RuleBasedCollator("< o "
  75                                                     +"& oe ,o\u3080"
  76                                                     +"& oe ,\u1530 ,O"
  77                                                     +"& OE ,O\u3080"
  78                                                     +"& OE ,\u1520"
  79                                                     +"< p ,P");
  80 
  81         // Build another using the rules from  the first
  82         RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
  83 
  84         // Make sure they're the same
  85         if (!c1.getRules().equals(c2.getRules())) {
  86             errln("Rules are not equal");
  87         }
  88     }
  89 
  90     // Collator thinks "black-bird" == "black"
  91     //
  92     public void Test4053636() {
  93         if (en_us.equals("black-bird","black")) {
  94             errln("black-bird == black");
  95         }
  96     }
  97 
  98 
  99     // CollationElementIterator will not work correctly if the associated
 100     // Collator object's mode is changed
 101     //
 102     public void Test4054238() {
 103         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 104 
 105         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
 106         CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
 107 
 108         c.setDecomposition(Collator.NO_DECOMPOSITION);
 109         CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
 110 
 111         // At this point, BOTH iterators should use NO_DECOMPOSITION, since the
 112         // collator itself is in that mode
 113         assertEqual(i1, i2);
 114     }
 115 
 116     // Collator.IDENTICAL documented but not implemented
 117     //
 118     public void Test4054734() {
 119         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 120         try {
 121             c.setStrength(Collator.IDENTICAL);
 122         }
 123         catch (Exception e) {
 124             errln("Caught " + e.toString() + " setting Collator.IDENTICAL");
 125         }
 126 
 127         String[] decomp = {
 128             "\u0001",   "<",    "\u0002",
 129             "\u0001",   "=",    "\u0001",
 130             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
 131             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
 132         };
 133         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
 134         compareArray(c, decomp);
 135 
 136         String[] nodecomp = {
 137             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
 138         };
 139         c.setDecomposition(Collator.NO_DECOMPOSITION);
 140         compareArray(c, nodecomp);
 141     }
 142 
 143     // Full Decomposition mode not implemented
 144     //
 145     public void Test4054736() {
 146         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 147         c.setDecomposition(Collator.FULL_DECOMPOSITION);
 148 
 149         String[] tests = {
 150             "\uFB4f", "=", "\u05D0\u05DC",  // Alef-Lamed vs. Alef, Lamed
 151         };
 152 
 153         compareArray(c, tests);
 154     }
 155 
 156     // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
 157     //
 158     public void Test4058613() {
 159         // Creating a default collator doesn't work when Korean is the default
 160         // locale
 161 
 162         Locale oldDefault = Locale.getDefault();
 163 
 164         Locale.setDefault( Locale.KOREAN );
 165         try {
 166             Collator c = Collator.getInstance();
 167 
 168             // Since the fix to this bug was to turn of decomposition for Korean collators,
 169             // ensure that's what we got
 170             if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
 171               errln("Decomposition is not set to NO_DECOMPOSITION");
 172             }
 173         }
 174         finally {
 175             Locale.setDefault(oldDefault);
 176         }
 177     }
 178 
 179     // RuleBasedCollator.getRules does not return the exact pattern as input
 180     // for expanding character sequences
 181     //
 182     public void Test4059820() {
 183         RuleBasedCollator c = null;
 184         try {
 185             c = new RuleBasedCollator("< a < b , c/a < d < z");
 186         } catch (ParseException e) {
 187             errln("Exception building collator: " + e.toString());
 188             return;
 189         }
 190         if ( c.getRules().indexOf("c/a") == -1) {
 191             errln("returned rules do not contain 'c/a'");
 192         }
 193     }
 194 
 195     // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
 196     //
 197     public void Test4060154() {
 198         RuleBasedCollator c = null;
 199         try {
 200             c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
 201                                       + " & H < \u0131, \u0130, i, I" );
 202         } catch (ParseException e) {
 203             errln("Exception building collator: " + e.toString());
 204             return;
 205         }
 206         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
 207 
 208         String[] tertiary = {
 209             "A",        "<",    "B",
 210             "H",        "<",    "\u0131",
 211             "H",        "<",    "I",
 212             "\u0131",   "<",    "\u0130",
 213             "\u0130",   "<",    "i",
 214             "\u0130",   ">",    "H",
 215         };
 216         c.setStrength(Collator.TERTIARY);
 217         compareArray(c, tertiary);
 218 
 219         String[] secondary = {
 220             "H",        "<",    "I",
 221             "\u0131",   "=",    "\u0130",
 222         };
 223         c.setStrength(Collator.PRIMARY);
 224         compareArray(c, secondary);
 225     };
 226 
 227     // Secondary/Tertiary comparison incorrect in French Secondary
 228     //
 229     public void Test4062418() throws ParseException {
 230         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
 231         c.setStrength(Collator.SECONDARY);
 232 
 233         String[] tests = {
 234                 "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
 235         };
 236 
 237         compareArray(c, tests);
 238     }
 239 
 240     // Collator.compare() method broken if either string contains spaces
 241     //
 242     public void Test4065540() {
 243         if (en_us.compare("abcd e", "abcd f") == 0) {
 244             errln("'abcd e' == 'abcd f'");
 245         }
 246     }
 247 
 248     // Unicode characters need to be recursively decomposed to get the
 249     // correct result. For example,
 250     // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
 251     //
 252     public void Test4066189() {
 253         String test1 = "\u1EB1";
 254         String test2 = "a\u0306\u0300";
 255 
 256         RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
 257         c1.setDecomposition(Collator.FULL_DECOMPOSITION);
 258         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
 259 
 260         RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
 261         c2.setDecomposition(Collator.NO_DECOMPOSITION);
 262         CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
 263 
 264         assertEqual(i1, i2);
 265     }
 266 
 267     // French secondary collation checking at the end of compare iteration fails
 268     //
 269     public void Test4066696() {
 270         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
 271         c.setStrength(Collator.SECONDARY);
 272 
 273         String[] tests = {
 274             "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
 275         };
 276 
 277         compareArray(c, tests);
 278     }
 279 
 280 
 281     // Bad canonicalization of same-class combining characters
 282     //
 283     public void Test4076676() {
 284         // These combining characters are all in the same class, so they should not
 285         // be reordered, and they should compare as unequal.
 286         String s1 = "A\u0301\u0302\u0300";
 287         String s2 = "A\u0302\u0300\u0301";
 288 
 289         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 290         c.setStrength(Collator.TERTIARY);
 291 
 292         if (c.compare(s1,s2) == 0) {
 293             errln("Same-class combining chars were reordered");
 294         }
 295     }
 296 
 297 
 298     // RuleBasedCollator.equals(null) throws NullPointerException
 299     //
 300     public void Test4079231() {
 301         try {
 302             if (en_us.equals(null)) {
 303                 errln("en_us.equals(null) returned true");
 304             }
 305         }
 306         catch (Exception e) {
 307             errln("en_us.equals(null) threw " + e.toString());
 308         }
 309     }
 310 
 311     // RuleBasedCollator breaks on "< a < bb" rule
 312     //
 313     public void Test4078588() throws ParseException {
 314         RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
 315 
 316         int result = rbc.compare("a","bb");
 317 
 318         if (result != -1) {
 319             errln("Compare(a,bb) returned " + result + "; expected -1");
 320         }
 321     }
 322 
 323     // Combining characters in different classes not reordered properly.
 324     //
 325     public void Test4081866() throws ParseException {
 326         // These combining characters are all in different classes,
 327         // so they should be reordered and the strings should compare as equal.
 328         String s1 = "A\u0300\u0316\u0327\u0315";
 329         String s2 = "A\u0327\u0316\u0315\u0300";
 330 
 331         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 332         c.setStrength(Collator.TERTIARY);
 333 
 334         // Now that the default collators are set to NO_DECOMPOSITION
 335         // (as a result of fixing bug 4114077), we must set it explicitly
 336         // when we're testing reordering behavior.  -- lwerner, 5/5/98
 337         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
 338 
 339         if (c.compare(s1,s2) != 0) {
 340             errln("Combining chars were not reordered");
 341         }
 342     }
 343 
 344     // string comparison errors in Scandinavian collators
 345     //
 346     public void Test4087241() {
 347         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
 348                                                         new Locale("da", "DK"));
 349         c.setStrength(Collator.SECONDARY);
 350 
 351         String[] tests = {
 352             "\u007a",   "<",    "\u00e6",       // z        < ae
 353             "a\u0308",  "<",    "a\u030a",      // a-unlaut < a-ring
 354             "Y",        "<",    "u\u0308",      // Y        < u-umlaut
 355         };
 356 
 357         compareArray(c, tests);
 358     }
 359 
 360     // CollationKey takes ignorable strings into account when it shouldn't
 361     //
 362     public void Test4087243() {
 363         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 364         c.setStrength(Collator.TERTIARY);
 365 
 366         String[] tests = {
 367             "123",      "=",    "123\u0001",        // 1 2 3  =  1 2 3 ctrl-A
 368         };
 369 
 370         compareArray(c, tests);
 371     }
 372 
 373     // Mu/micro conflict
 374     // Micro symbol and greek lowercase letter Mu should sort identically
 375     //
 376     public void Test4092260() {
 377         Collator c = Collator.getInstance(new Locale("el", ""));
 378 
 379         // will only be equal when FULL_DECOMPOSITION is used
 380         c.setDecomposition(Collator.FULL_DECOMPOSITION);
 381 
 382         String[] tests = {
 383             "\u00B5",      "=",    "\u03BC",
 384         };
 385 
 386         compareArray(c, tests);
 387     }
 388 
 389     void Test4095316() {
 390         Collator c = Collator.getInstance(new Locale("el", "GR"));
 391         c.setStrength(Collator.TERTIARY);
 392         // javadocs for RuleBasedCollator clearly specify that characters containing compatability
 393         // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
 394         c.setDecomposition(Collator.FULL_DECOMPOSITION);
 395 
 396         String[] tests = {
 397             "\u03D4",      "=",    "\u03AB",
 398         };
 399 
 400         compareArray(c, tests);
 401     }
 402 
 403     public void Test4101940() {
 404         try {
 405             RuleBasedCollator c = new RuleBasedCollator("< a < b");
 406             CollationElementIterator i = c.getCollationElementIterator("");
 407             i.reset();
 408 
 409             if (i.next() != i.NULLORDER) {
 410                 errln("next did not return NULLORDER");
 411             }
 412         }
 413         catch (Exception e) {
 414             errln("Caught " + e );
 415         }
 416     }
 417 
 418     // Collator.compare not handling spaces properly
 419     //
 420     public void Test4103436() {
 421         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 422         c.setStrength(Collator.TERTIARY);
 423 
 424         String[] tests = {
 425             "file",      "<",    "file access",
 426             "file",      "<",    "fileaccess",
 427         };
 428 
 429         compareArray(c, tests);
 430     }
 431 
 432     // Collation not Unicode conformant with Hangul syllables
 433     //
 434     public void Test4114076() {
 435         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 436         c.setStrength(Collator.TERTIARY);
 437 
 438         //
 439         // With Canonical decomposition, Hangul syllables should get decomposed
 440         // into Jamo, but Jamo characters should not be decomposed into
 441         // conjoining Jamo
 442         //
 443         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
 444         String[] test1 = {
 445             "\ud4db",   "=",    "\u1111\u1171\u11b6",
 446         };
 447         compareArray(c, test1);
 448 
 449         // Full decomposition result should be the same as canonical decomposition
 450         // for all hangul.
 451         c.setDecomposition(Collator.FULL_DECOMPOSITION);
 452         compareArray(c, test1);
 453 
 454     }
 455 
 456 
 457     // Collator.getCollationKey was hanging on certain character sequences
 458     //
 459     public void Test4124632() throws Exception {
 460         Collator coll = Collator.getInstance(Locale.JAPAN);
 461 
 462         try {
 463             coll.getCollationKey("A\u0308bc");
 464         } catch (OutOfMemoryError e) {
 465             errln("Ran out of memory -- probably an infinite loop");
 466         }
 467     }
 468 
 469     // sort order of french words with multiple accents has errors
 470     //
 471     public void Test4132736() {
 472         Collator c = Collator.getInstance(Locale.FRANCE);
 473 
 474         String[] test1 = {
 475             "e\u0300e\u0301",   "<",    "e\u0301e\u0300",
 476             "e\u0300\u0301",    ">",    "e\u0301\u0300",
 477         };
 478         compareArray(c, test1);
 479     }
 480 
 481     // The sorting using java.text.CollationKey is not in the exact order
 482     //
 483     public void Test4133509() {
 484         String[] test1 = {
 485             "Exception",    "<",    "ExceptionInInitializerError",
 486             "Graphics",     "<",    "GraphicsEnvironment",
 487             "String",       "<",    "StringBuffer",
 488         };
 489         compareArray(en_us, test1);
 490     }
 491 
 492     // Collation with decomposition off doesn't work for Europe
 493     //
 494     public void Test4114077() {
 495         // Ensure that we get the same results with decomposition off
 496         // as we do with it on....
 497 
 498         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
 499         c.setStrength(Collator.TERTIARY);
 500 
 501         String[] test1 = {
 502             "\u00C0",        "=", "A\u0300",        // Should be equivalent
 503             "p\u00eache",         ">", "p\u00e9ch\u00e9",
 504             "\u0204",        "=", "E\u030F",
 505             "\u01fa",        "=", "A\u030a\u0301",  // a-ring-acute -> a-ring, acute
 506                                                     //   -> a, ring, acute
 507             "A\u0300\u0316", "<", "A\u0316\u0300",  // No reordering --> unequal
 508         };
 509         c.setDecomposition(Collator.NO_DECOMPOSITION);
 510         compareArray(c, test1);
 511 
 512         String[] test2 = {
 513             "A\u0300\u0316", "=", "A\u0316\u0300",      // Reordering --> equal
 514         };
 515         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
 516         compareArray(c, test2);
 517     }
 518 
 519     // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
 520     //
 521     public void Test4141640() {
 522         //
 523         // Rather than just creating a Swedish collator, we might as well
 524         // try to instantiate one for every locale available on the system
 525         // in order to prevent this sort of bug from cropping up in the future
 526         //
 527         Locale[] locales = Collator.getAvailableLocales();
 528 
 529         for (int i = 0; i < locales.length; i++) {
 530             try {
 531                 Collator c = Collator.getInstance(locales[i]);
 532             } catch (Exception e) {
 533                 errln("Caught " + e + " creating collator for " + locales[i]);
 534             }
 535         }
 536     }
 537 
 538     // getCollationKey throws exception for spanish text
 539     // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
 540     //
 541     public void Test4139572() {
 542         //
 543         // Code pasted straight from the bug report
 544         //
 545         // create spanish locale and collator
 546         Locale l = new Locale("es", "es");
 547         Collator col = Collator.getInstance(l);
 548 
 549         // this spanish phrase kills it!
 550         col.getCollationKey("Nombre De Objeto");
 551     }
 552 
 553     // RuleBasedCollator doesn't use getCollationElementIterator internally
 554     //
 555     public void Test4146160() throws ParseException {
 556         //
 557         // Use a custom collator class whose getCollationElementIterator
 558         // methods increment a count....
 559         //
 560         My4146160Collator.count = 0;
 561         new My4146160Collator().getCollationKey("1");
 562         if (My4146160Collator.count < 1) {
 563             errln("getCollationElementIterator not called");
 564         }
 565 
 566         My4146160Collator.count = 0;
 567         new My4146160Collator().compare("1", "2");
 568         if (My4146160Collator.count < 1) {
 569             errln("getCollationElementIterator not called");
 570         }
 571     }
 572 
 573     static class My4146160Collator extends RuleBasedCollator {
 574         public My4146160Collator() throws ParseException {
 575             super(Regression.en_us.getRules());
 576         }
 577 
 578         public CollationElementIterator getCollationElementIterator(
 579                                             String text) {
 580             count++;
 581             return super.getCollationElementIterator(text);
 582         }
 583         public CollationElementIterator getCollationElementIterator(
 584                                             CharacterIterator text) {
 585             count++;
 586             return super.getCollationElementIterator(text);
 587         }
 588 
 589         public static int count = 0;
 590     };
 591 
 592     // CollationElementIterator.previous broken for expanding char sequences
 593     //
 594     public void Test4179686() throws ParseException {
 595 
 596         // Create a collator with a few expanding character sequences in it....
 597         RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
 598                                                     + " & ae ; \u00e4 & AE ; \u00c4"
 599                                                     + " & oe ; \u00f6 & OE ; \u00d6"
 600                                                     + " & ue ; \u00fc & UE ; \u00dc");
 601 
 602         String text = "T\u00f6ne"; // o-umlaut
 603 
 604         CollationElementIterator iter = coll.getCollationElementIterator(text);
 605         Vector elements = new Vector();
 606         int elem;
 607 
 608         // Iterate forward and collect all of the elements into a Vector
 609         while ((elem = iter.next()) != iter.NULLORDER) {
 610             elements.addElement(new Integer(elem));
 611         }
 612 
 613         // Now iterate backward and make sure they're the same
 614         int index = elements.size() - 1;
 615         while ((elem = iter.previous()) != iter.NULLORDER) {
 616             int expect = ((Integer)elements.elementAt(index)).intValue();
 617 
 618             if (elem != expect) {
 619                 errln("Mismatch at index " + index
 620                       + ": got " + Integer.toString(elem,16)
 621                       + ", expected " + Integer.toString(expect,16));
 622             }
 623             index--;
 624         }
 625     }
 626 
 627     public void Test4244884() throws ParseException {
 628         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
 629         coll = new RuleBasedCollator(coll.getRules()
 630                 + " & C < ch , cH , Ch , CH < cat < crunchy");
 631 
 632         String[] testStrings = new String[] {
 633             "car",
 634             "cave",
 635             "clamp",
 636             "cramp",
 637             "czar",
 638             "church",
 639             "catalogue",
 640             "crunchy",
 641             "dog"
 642         };
 643 
 644         for (int i = 1; i < testStrings.length; i++) {
 645             if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
 646                 errln("error: \"" + testStrings[i - 1]
 647                     + "\" is greater than or equal to \"" + testStrings[i]
 648                     + "\".");
 649             }
 650         }
 651     }
 652 
 653     public void Test4179216() throws ParseException {
 654         // you can position a CollationElementIterator in the middle of
 655         // a contracting character sequence, yielding a bogus collation
 656         // element
 657         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
 658         coll = new RuleBasedCollator(coll.getRules()
 659                 + " & C < ch , cH , Ch , CH < cat < crunchy");
 660         String testText = "church church catcatcher runcrunchynchy";
 661         CollationElementIterator iter = coll.getCollationElementIterator(
 662                 testText);
 663 
 664         // test that the "ch" combination works properly
 665         iter.setOffset(4);
 666         int elt4 = CollationElementIterator.primaryOrder(iter.next());
 667 
 668         iter.reset();
 669         int elt0 = CollationElementIterator.primaryOrder(iter.next());
 670 
 671         iter.setOffset(5);
 672         int elt5 = CollationElementIterator.primaryOrder(iter.next());
 673 
 674         if (elt4 != elt0 || elt5 != elt0)
 675             errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
 676                     + elt4 + "), and 5 (" + elt5 + ") don't match.");
 677 
 678         // test that the "cat" combination works properly
 679         iter.setOffset(14);
 680         int elt14 = CollationElementIterator.primaryOrder(iter.next());
 681 
 682         iter.setOffset(15);
 683         int elt15 = CollationElementIterator.primaryOrder(iter.next());
 684 
 685         iter.setOffset(16);
 686         int elt16 = CollationElementIterator.primaryOrder(iter.next());
 687 
 688         iter.setOffset(17);
 689         int elt17 = CollationElementIterator.primaryOrder(iter.next());
 690 
 691         iter.setOffset(18);
 692         int elt18 = CollationElementIterator.primaryOrder(iter.next());
 693 
 694         iter.setOffset(19);
 695         int elt19 = CollationElementIterator.primaryOrder(iter.next());
 696 
 697         if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
 698                 || elt14 != elt18 || elt14 != elt19)
 699             errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
 700             + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
 701             + ", elt18 = " + elt18 + ", elt19 = " + elt19);
 702 
 703         // now generate a complete list of the collation elements,
 704         // first using next() and then using setOffset(), and
 705         // make sure both interfaces return the same set of elements
 706         iter.reset();
 707 
 708         int elt = iter.next();
 709         int count = 0;
 710         while (elt != CollationElementIterator.NULLORDER) {
 711             ++count;
 712             elt = iter.next();
 713         }
 714 
 715         String[] nextElements = new String[count];
 716         String[] setOffsetElements = new String[count];
 717         int lastPos = 0;
 718 
 719         iter.reset();
 720         elt = iter.next();
 721         count = 0;
 722         while (elt != CollationElementIterator.NULLORDER) {
 723             nextElements[count++] = testText.substring(lastPos, iter.getOffset());
 724             lastPos = iter.getOffset();
 725             elt = iter.next();
 726         }
 727         count = 0;
 728         for (int i = 0; i < testText.length(); ) {
 729             iter.setOffset(i);
 730             lastPos = iter.getOffset();
 731             elt = iter.next();
 732             setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
 733             i = iter.getOffset();
 734         }
 735         for (int i = 0; i < nextElements.length; i++) {
 736             if (nextElements[i].equals(setOffsetElements[i])) {
 737                 logln(nextElements[i]);
 738             } else {
 739                 errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
 740                     + setOffsetElements[i]);
 741             }
 742         }
 743     }
 744 
 745     public void Test4216006() throws Exception {
 746         // rule parser barfs on "<\u00e0=a\u0300", and on other cases
 747         // where the same token (after normalization) appears twice in a row
 748         boolean caughtException = false;
 749         try {
 750             RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
 751         }
 752         catch (ParseException e) {
 753             caughtException = true;
 754         }
 755         if (!caughtException) {
 756             throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
 757         }
 758 
 759         RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
 760         collator.setDecomposition(Collator.FULL_DECOMPOSITION);
 761         collator.setStrength(Collator.IDENTICAL);
 762 
 763         String[] tests = {
 764             "a\u0300", "=", "\u00e0",
 765             "\u00e0",  "=", "a\u0300"
 766         };
 767 
 768         compareArray(collator, tests);
 769     }
 770 
 771     public void Test4171974() {
 772         // test French accent ordering more thoroughly
 773         String[] frenchList = {
 774             "\u0075\u0075",     // u u
 775             "\u00fc\u0075",     // u-umlaut u
 776             "\u01d6\u0075",     // u-umlaut-macron u
 777             "\u016b\u0075",     // u-macron u
 778             "\u1e7b\u0075",     // u-macron-umlaut u
 779             "\u0075\u00fc",     // u u-umlaut
 780             "\u00fc\u00fc",     // u-umlaut u-umlaut
 781             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
 782             "\u016b\u00fc",     // u-macron u-umlaut
 783             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
 784             "\u0075\u01d6",     // u u-umlaut-macron
 785             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
 786             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
 787             "\u016b\u01d6",     // u-macron u-umlaut-macron
 788             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
 789             "\u0075\u016b",     // u u-macron
 790             "\u00fc\u016b",     // u-umlaut u-macron
 791             "\u01d6\u016b",     // u-umlaut-macron u-macron
 792             "\u016b\u016b",     // u-macron u-macron
 793             "\u1e7b\u016b",     // u-macron-umlaut u-macron
 794             "\u0075\u1e7b",     // u u-macron-umlaut
 795             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
 796             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
 797             "\u016b\u1e7b",     // u-macron u-macron-umlaut
 798             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
 799         };
 800         Collator french = Collator.getInstance(Locale.FRENCH);
 801 
 802         logln("Testing French order...");
 803         checkListOrder(frenchList, french);
 804 
 805         logln("Testing French order without decomposition...");
 806         french.setDecomposition(Collator.NO_DECOMPOSITION);
 807         checkListOrder(frenchList, french);
 808 
 809         String[] englishList = {
 810             "\u0075\u0075",     // u u
 811             "\u0075\u00fc",     // u u-umlaut
 812             "\u0075\u01d6",     // u u-umlaut-macron
 813             "\u0075\u016b",     // u u-macron
 814             "\u0075\u1e7b",     // u u-macron-umlaut
 815             "\u00fc\u0075",     // u-umlaut u
 816             "\u00fc\u00fc",     // u-umlaut u-umlaut
 817             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
 818             "\u00fc\u016b",     // u-umlaut u-macron
 819             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
 820             "\u01d6\u0075",     // u-umlaut-macron u
 821             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
 822             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
 823             "\u01d6\u016b",     // u-umlaut-macron u-macron
 824             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
 825             "\u016b\u0075",     // u-macron u
 826             "\u016b\u00fc",     // u-macron u-umlaut
 827             "\u016b\u01d6",     // u-macron u-umlaut-macron
 828             "\u016b\u016b",     // u-macron u-macron
 829             "\u016b\u1e7b",     // u-macron u-macron-umlaut
 830             "\u1e7b\u0075",     // u-macron-umlaut u
 831             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
 832             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
 833             "\u1e7b\u016b",     // u-macron-umlaut u-macron
 834             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
 835         };
 836         Collator english = Collator.getInstance(Locale.ENGLISH);
 837 
 838         logln("Testing English order...");
 839         checkListOrder(englishList, english);
 840 
 841         logln("Testing English order without decomposition...");
 842         english.setDecomposition(Collator.NO_DECOMPOSITION);
 843         checkListOrder(englishList, english);
 844     }
 845 
 846     private void checkListOrder(String[] sortedList, Collator c) {
 847         // this function uses the specified Collator to make sure the
 848         // passed-in list is already sorted into ascending order
 849         for (int i = 0; i < sortedList.length - 1; i++) {
 850             if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
 851                 errln("List out of order at element #" + i + ": "
 852                         + prettify(sortedList[i]) + " >= "
 853                         + prettify(sortedList[i + 1]));
 854             }
 855         }
 856     }
 857 
 858     // CollationElementIterator set doesn't work propertly with next/prev
 859     public void Test4663220() {
 860         RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
 861         CharacterIterator stringIter = new StringCharacterIterator("fox");
 862         CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
 863 
 864         int[] elements_next = new int[3];
 865         logln("calling next:");
 866         for (int i = 0; i < 3; ++i) {
 867             logln("[" + i + "] " + (elements_next[i] = iter.next()));
 868         }
 869 
 870         int[] elements_fwd = new int[3];
 871         logln("calling set/next:");
 872         for (int i = 0; i < 3; ++i) {
 873             iter.setOffset(i);
 874             logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
 875         }
 876 
 877         for (int i = 0; i < 3; ++i) {
 878             if (elements_next[i] != elements_fwd[i]) {
 879                 errln("mismatch at position " + i +
 880                       ": " + elements_next[i] +
 881                       " != " + elements_fwd[i]);
 882             }
 883         }
 884     }
 885 
 886     //------------------------------------------------------------------------
 887     // Internal utilities
 888     //
 889     private void compareArray(Collator c, String[] tests) {
 890         for (int i = 0; i < tests.length; i += 3) {
 891 
 892             int expect = 0;
 893             if (tests[i+1].equals("<")) {
 894                 expect = -1;
 895             } else if (tests[i+1].equals(">")) {
 896                 expect = 1;
 897             } else if (tests[i+1].equals("=")) {
 898                 expect = 0;
 899             } else {
 900                 expect = Integer.decode(tests[i+1]).intValue();
 901             }
 902 
 903             int result = c.compare(tests[i], tests[i+2]);
 904             if (sign(result) != sign(expect))
 905             {
 906                 errln( i/3 + ": compare(" + prettify(tests[i])
 907                                     + " , " + prettify(tests[i+2])
 908                                     + ") got " + result + "; expected " + expect);
 909             }
 910             else
 911             {
 912                 // Collator.compare worked OK; now try the collation keys
 913                 CollationKey k1 = c.getCollationKey(tests[i]);
 914                 CollationKey k2 = c.getCollationKey(tests[i+2]);
 915 
 916                 result = k1.compareTo(k2);
 917                 if (sign(result) != sign(expect)) {
 918                     errln( i/3 + ": key(" + prettify(tests[i])
 919                                         + ").compareTo(key(" + prettify(tests[i+2])
 920                                         + ")) got " + result + "; expected " + expect);
 921 
 922                     errln("  " + prettify(k1) + " vs. " + prettify(k2));
 923                 }
 924             }
 925         }
 926     }
 927 
 928     private static final int sign(int i) {
 929         if (i < 0) return -1;
 930         if (i > 0) return 1;
 931         return 0;
 932     }
 933 
 934 
 935     static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
 936 
 937     String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
 938     String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
 939     String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
 940 }