1 /* 2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154 27 * 4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241 28 * 4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736 29 * 4133509 4139572 4141640 4179126 4179686 4244884 4663220 30 * @library /java/text/testlib 31 * @summary Regression tests for Collation and associated classes 32 */ 33 /* 34 (C) Copyright Taligent, Inc. 1996 - All Rights Reserved 35 (C) Copyright IBM Corp. 1996 - All Rights Reserved 36 37 The original version of this source code and documentation is copyrighted and 38 owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are 39 provided under terms of a License Agreement between Taligent and Sun. This 40 technology is protected by multiple US and International patents. This notice and 41 attribution to Taligent may not be removed. 42 Taligent is a registered trademark of Taligent, Inc. 43 */ 44 45 import java.text.*; 46 import java.util.Locale; 47 import java.util.Vector; 48 49 50 public class Regression extends CollatorTest { 51 52 public static void main(String[] args) throws Exception { 53 new Regression().run(args); 54 } 55 56 // CollationElementIterator.reset() doesn't work 57 // 58 public void Test4048446() { 59 CollationElementIterator i1 = en_us.getCollationElementIterator(test1); 60 CollationElementIterator i2 = en_us.getCollationElementIterator(test1); 61 62 while ( i1.next() != CollationElementIterator.NULLORDER ) { 63 } 64 i1.reset(); 65 66 assertEqual(i1, i2); 67 } 68 69 70 // Collator -> rules -> Collator round-trip broken for expanding characters 71 // 72 public void Test4051866() throws ParseException { 73 // Build a collator containing expanding characters 74 RuleBasedCollator c1 = new RuleBasedCollator("< o " 75 +"& oe ,o\u3080" 76 +"& oe ,\u1530 ,O" 77 +"& OE ,O\u3080" 78 +"& OE ,\u1520" 79 +"< p ,P"); 80 81 // Build another using the rules from the first 82 RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules()); 83 84 // Make sure they're the same 85 if (!c1.getRules().equals(c2.getRules())) { 86 errln("Rules are not equal"); 87 } 88 } 89 90 // Collator thinks "black-bird" == "black" 91 // 92 public void Test4053636() { 93 if (en_us.equals("black-bird","black")) { 94 errln("black-bird == black"); 95 } 96 } 97 98 99 // CollationElementIterator will not work correctly if the associated 100 // Collator object's mode is changed 101 // 102 public void Test4054238() { 103 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 104 105 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 106 CollationElementIterator i1 = en_us.getCollationElementIterator(test3); 107 108 c.setDecomposition(Collator.NO_DECOMPOSITION); 109 CollationElementIterator i2 = en_us.getCollationElementIterator(test3); 110 111 // At this point, BOTH iterators should use NO_DECOMPOSITION, since the 112 // collator itself is in that mode 113 assertEqual(i1, i2); 114 } 115 116 // Collator.IDENTICAL documented but not implemented 117 // 118 public void Test4054734() { 119 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 120 try { 121 c.setStrength(Collator.IDENTICAL); 122 } 123 catch (Exception e) { 124 errln("Caught " + e.toString() + " setting Collator.IDENTICAL"); 125 } 126 127 String[] decomp = { 128 "\u0001", "<", "\u0002", 129 "\u0001", "=", "\u0001", 130 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise 131 "\u00C0", "=", "A\u0300" // Decomp should make these equal 132 }; 133 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 134 compareArray(c, decomp); 135 136 String[] nodecomp = { 137 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave 138 }; 139 c.setDecomposition(Collator.NO_DECOMPOSITION); 140 compareArray(c, nodecomp); 141 } 142 143 // Full Decomposition mode not implemented 144 // 145 public void Test4054736() { 146 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 147 c.setDecomposition(Collator.FULL_DECOMPOSITION); 148 149 String[] tests = { 150 "\uFB4f", "=", "\u05D0\u05DC", // Alef-Lamed vs. Alef, Lamed 151 }; 152 153 compareArray(c, tests); 154 } 155 156 // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean 157 // 158 public void Test4058613() { 159 // Creating a default collator doesn't work when Korean is the default 160 // locale 161 162 Locale oldDefault = Locale.getDefault(); 163 164 Locale.setDefault( Locale.KOREAN ); 165 try { 166 Collator c = Collator.getInstance(); 167 168 // Since the fix to this bug was to turn of decomposition for Korean collators, 169 // ensure that's what we got 170 if (c.getDecomposition() != Collator.NO_DECOMPOSITION) { 171 errln("Decomposition is not set to NO_DECOMPOSITION"); 172 } 173 } 174 finally { 175 Locale.setDefault(oldDefault); 176 } 177 } 178 179 // RuleBasedCollator.getRules does not return the exact pattern as input 180 // for expanding character sequences 181 // 182 public void Test4059820() { 183 RuleBasedCollator c = null; 184 try { 185 c = new RuleBasedCollator("< a < b , c/a < d < z"); 186 } catch (ParseException e) { 187 errln("Exception building collator: " + e.toString()); 188 return; 189 } 190 if ( c.getRules().indexOf("c/a") == -1) { 191 errln("returned rules do not contain 'c/a'"); 192 } 193 } 194 195 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" 196 // 197 public void Test4060154() { 198 RuleBasedCollator c = null; 199 try { 200 c = new RuleBasedCollator("< g, G < h, H < i, I < j, J" 201 + " & H < \u0131, \u0130, i, I" ); 202 } catch (ParseException e) { 203 errln("Exception building collator: " + e.toString()); 204 return; 205 } 206 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 207 208 String[] tertiary = { 209 "A", "<", "B", 210 "H", "<", "\u0131", 211 "H", "<", "I", 212 "\u0131", "<", "\u0130", 213 "\u0130", "<", "i", 214 "\u0130", ">", "H", 215 }; 216 c.setStrength(Collator.TERTIARY); 217 compareArray(c, tertiary); 218 219 String[] secondary = { 220 "H", "<", "I", 221 "\u0131", "=", "\u0130", 222 }; 223 c.setStrength(Collator.PRIMARY); 224 compareArray(c, secondary); 225 }; 226 227 // Secondary/Tertiary comparison incorrect in French Secondary 228 // 229 public void Test4062418() throws ParseException { 230 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE); 231 c.setStrength(Collator.SECONDARY); 232 233 String[] tests = { 234 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater 235 }; 236 237 compareArray(c, tests); 238 } 239 240 // Collator.compare() method broken if either string contains spaces 241 // 242 public void Test4065540() { 243 if (en_us.compare("abcd e", "abcd f") == 0) { 244 errln("'abcd e' == 'abcd f'"); 245 } 246 } 247 248 // Unicode characters need to be recursively decomposed to get the 249 // correct result. For example, 250 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. 251 // 252 public void Test4066189() { 253 String test1 = "\u1EB1"; 254 String test2 = "a\u0306\u0300"; 255 256 RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone(); 257 c1.setDecomposition(Collator.FULL_DECOMPOSITION); 258 CollationElementIterator i1 = en_us.getCollationElementIterator(test1); 259 260 RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone(); 261 c2.setDecomposition(Collator.NO_DECOMPOSITION); 262 CollationElementIterator i2 = en_us.getCollationElementIterator(test2); 263 264 assertEqual(i1, i2); 265 } 266 267 // French secondary collation checking at the end of compare iteration fails 268 // 269 public void Test4066696() { 270 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE); 271 c.setStrength(Collator.SECONDARY); 272 273 String[] tests = { 274 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute 275 }; 276 277 compareArray(c, tests); 278 } 279 280 281 // Bad canonicalization of same-class combining characters 282 // 283 public void Test4076676() { 284 // These combining characters are all in the same class, so they should not 285 // be reordered, and they should compare as unequal. 286 String s1 = "A\u0301\u0302\u0300"; 287 String s2 = "A\u0302\u0300\u0301"; 288 289 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 290 c.setStrength(Collator.TERTIARY); 291 292 if (c.compare(s1,s2) == 0) { 293 errln("Same-class combining chars were reordered"); 294 } 295 } 296 297 298 // RuleBasedCollator.equals(null) throws NullPointerException 299 // 300 public void Test4079231() { 301 try { 302 if (en_us.equals(null)) { 303 errln("en_us.equals(null) returned true"); 304 } 305 } 306 catch (Exception e) { 307 errln("en_us.equals(null) threw " + e.toString()); 308 } 309 } 310 311 // RuleBasedCollator breaks on "< a < bb" rule 312 // 313 public void Test4078588() throws ParseException { 314 RuleBasedCollator rbc=new RuleBasedCollator("< a < bb"); 315 316 int result = rbc.compare("a","bb"); 317 318 if (result != -1) { 319 errln("Compare(a,bb) returned " + result + "; expected -1"); 320 } 321 } 322 323 // Combining characters in different classes not reordered properly. 324 // 325 public void Test4081866() throws ParseException { 326 // These combining characters are all in different classes, 327 // so they should be reordered and the strings should compare as equal. 328 String s1 = "A\u0300\u0316\u0327\u0315"; 329 String s2 = "A\u0327\u0316\u0315\u0300"; 330 331 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 332 c.setStrength(Collator.TERTIARY); 333 334 // Now that the default collators are set to NO_DECOMPOSITION 335 // (as a result of fixing bug 4114077), we must set it explicitly 336 // when we're testing reordering behavior. -- lwerner, 5/5/98 337 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 338 339 if (c.compare(s1,s2) != 0) { 340 errln("Combining chars were not reordered"); 341 } 342 } 343 344 // string comparison errors in Scandinavian collators 345 // 346 public void Test4087241() { 347 RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance( 348 new Locale("da", "DK")); 349 c.setStrength(Collator.SECONDARY); 350 351 String[] tests = { 352 "\u007a", "<", "\u00e6", // z < ae 353 "a\u0308", "<", "a\u030a", // a-unlaut < a-ring 354 "Y", "<", "u\u0308", // Y < u-umlaut 355 }; 356 357 compareArray(c, tests); 358 } 359 360 // CollationKey takes ignorable strings into account when it shouldn't 361 // 362 public void Test4087243() { 363 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 364 c.setStrength(Collator.TERTIARY); 365 366 String[] tests = { 367 "123", "=", "123\u0001", // 1 2 3 = 1 2 3 ctrl-A 368 }; 369 370 compareArray(c, tests); 371 } 372 373 // Mu/micro conflict 374 // Micro symbol and greek lowercase letter Mu should sort identically 375 // 376 public void Test4092260() { 377 Collator c = Collator.getInstance(new Locale("el", "")); 378 379 // will only be equal when FULL_DECOMPOSITION is used 380 c.setDecomposition(Collator.FULL_DECOMPOSITION); 381 382 String[] tests = { 383 "\u00B5", "=", "\u03BC", 384 }; 385 386 compareArray(c, tests); 387 } 388 389 void Test4095316() { 390 Collator c = Collator.getInstance(new Locale("el", "GR")); 391 c.setStrength(Collator.TERTIARY); 392 // javadocs for RuleBasedCollator clearly specify that characters containing compatability 393 // chars MUST use FULL_DECOMPOSITION to get accurate comparisons. 394 c.setDecomposition(Collator.FULL_DECOMPOSITION); 395 396 String[] tests = { 397 "\u03D4", "=", "\u03AB", 398 }; 399 400 compareArray(c, tests); 401 } 402 403 public void Test4101940() { 404 try { 405 RuleBasedCollator c = new RuleBasedCollator("< a < b"); 406 CollationElementIterator i = c.getCollationElementIterator(""); 407 i.reset(); 408 409 if (i.next() != i.NULLORDER) { 410 errln("next did not return NULLORDER"); 411 } 412 } 413 catch (Exception e) { 414 errln("Caught " + e ); 415 } 416 } 417 418 // Collator.compare not handling spaces properly 419 // 420 public void Test4103436() { 421 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 422 c.setStrength(Collator.TERTIARY); 423 424 String[] tests = { 425 "file", "<", "file access", 426 "file", "<", "fileaccess", 427 }; 428 429 compareArray(c, tests); 430 } 431 432 // Collation not Unicode conformant with Hangul syllables 433 // 434 public void Test4114076() { 435 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 436 c.setStrength(Collator.TERTIARY); 437 438 // 439 // With Canonical decomposition, Hangul syllables should get decomposed 440 // into Jamo, but Jamo characters should not be decomposed into 441 // conjoining Jamo 442 // 443 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 444 String[] test1 = { 445 "\ud4db", "=", "\u1111\u1171\u11b6", 446 }; 447 compareArray(c, test1); 448 449 // Full decomposition result should be the same as canonical decomposition 450 // for all hangul. 451 c.setDecomposition(Collator.FULL_DECOMPOSITION); 452 compareArray(c, test1); 453 454 } 455 456 457 // Collator.getCollationKey was hanging on certain character sequences 458 // 459 public void Test4124632() throws Exception { 460 Collator coll = Collator.getInstance(Locale.JAPAN); 461 462 try { 463 coll.getCollationKey("A\u0308bc"); 464 } catch (OutOfMemoryError e) { 465 errln("Ran out of memory -- probably an infinite loop"); 466 } 467 } 468 469 // sort order of french words with multiple accents has errors 470 // 471 public void Test4132736() { 472 Collator c = Collator.getInstance(Locale.FRANCE); 473 474 String[] test1 = { 475 "e\u0300e\u0301", "<", "e\u0301e\u0300", 476 "e\u0300\u0301", ">", "e\u0301\u0300", 477 }; 478 compareArray(c, test1); 479 } 480 481 // The sorting using java.text.CollationKey is not in the exact order 482 // 483 public void Test4133509() { 484 String[] test1 = { 485 "Exception", "<", "ExceptionInInitializerError", 486 "Graphics", "<", "GraphicsEnvironment", 487 "String", "<", "StringBuffer", 488 }; 489 compareArray(en_us, test1); 490 } 491 492 // Collation with decomposition off doesn't work for Europe 493 // 494 public void Test4114077() { 495 // Ensure that we get the same results with decomposition off 496 // as we do with it on.... 497 498 RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); 499 c.setStrength(Collator.TERTIARY); 500 501 String[] test1 = { 502 "\u00C0", "=", "A\u0300", // Should be equivalent 503 "p\u00eache", ">", "p\u00e9ch\u00e9", 504 "\u0204", "=", "E\u030F", 505 "\u01fa", "=", "A\u030a\u0301", // a-ring-acute -> a-ring, acute 506 // -> a, ring, acute 507 "A\u0300\u0316", "<", "A\u0316\u0300", // No reordering --> unequal 508 }; 509 c.setDecomposition(Collator.NO_DECOMPOSITION); 510 compareArray(c, test1); 511 512 String[] test2 = { 513 "A\u0300\u0316", "=", "A\u0316\u0300", // Reordering --> equal 514 }; 515 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 516 compareArray(c, test2); 517 } 518 519 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) 520 // 521 public void Test4141640() { 522 // 523 // Rather than just creating a Swedish collator, we might as well 524 // try to instantiate one for every locale available on the system 525 // in order to prevent this sort of bug from cropping up in the future 526 // 527 Locale[] locales = Collator.getAvailableLocales(); 528 529 for (int i = 0; i < locales.length; i++) { 530 try { 531 Collator c = Collator.getInstance(locales[i]); 532 } catch (Exception e) { 533 errln("Caught " + e + " creating collator for " + locales[i]); 534 } 535 } 536 } 537 538 // getCollationKey throws exception for spanish text 539 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 540 // 541 public void Test4139572() { 542 // 543 // Code pasted straight from the bug report 544 // 545 // create spanish locale and collator 546 Locale l = new Locale("es", "es"); 547 Collator col = Collator.getInstance(l); 548 549 // this spanish phrase kills it! 550 col.getCollationKey("Nombre De Objeto"); 551 } 552 553 // RuleBasedCollator doesn't use getCollationElementIterator internally 554 // 555 public void Test4146160() throws ParseException { 556 // 557 // Use a custom collator class whose getCollationElementIterator 558 // methods increment a count.... 559 // 560 My4146160Collator.count = 0; 561 new My4146160Collator().getCollationKey("1"); 562 if (My4146160Collator.count < 1) { 563 errln("getCollationElementIterator not called"); 564 } 565 566 My4146160Collator.count = 0; 567 new My4146160Collator().compare("1", "2"); 568 if (My4146160Collator.count < 1) { 569 errln("getCollationElementIterator not called"); 570 } 571 } 572 573 static class My4146160Collator extends RuleBasedCollator { 574 public My4146160Collator() throws ParseException { 575 super(Regression.en_us.getRules()); 576 } 577 578 public CollationElementIterator getCollationElementIterator( 579 String text) { 580 count++; 581 return super.getCollationElementIterator(text); 582 } 583 public CollationElementIterator getCollationElementIterator( 584 CharacterIterator text) { 585 count++; 586 return super.getCollationElementIterator(text); 587 } 588 589 public static int count = 0; 590 }; 591 592 // CollationElementIterator.previous broken for expanding char sequences 593 // 594 public void Test4179686() throws ParseException { 595 596 // Create a collator with a few expanding character sequences in it.... 597 RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules() 598 + " & ae ; \u00e4 & AE ; \u00c4" 599 + " & oe ; \u00f6 & OE ; \u00d6" 600 + " & ue ; \u00fc & UE ; \u00dc"); 601 602 String text = "T\u00f6ne"; // o-umlaut 603 604 CollationElementIterator iter = coll.getCollationElementIterator(text); 605 Vector elements = new Vector(); 606 int elem; 607 608 // Iterate forward and collect all of the elements into a Vector 609 while ((elem = iter.next()) != iter.NULLORDER) { 610 elements.addElement(new Integer(elem)); 611 } 612 613 // Now iterate backward and make sure they're the same 614 int index = elements.size() - 1; 615 while ((elem = iter.previous()) != iter.NULLORDER) { 616 int expect = ((Integer)elements.elementAt(index)).intValue(); 617 618 if (elem != expect) { 619 errln("Mismatch at index " + index 620 + ": got " + Integer.toString(elem,16) 621 + ", expected " + Integer.toString(expect,16)); 622 } 623 index--; 624 } 625 } 626 627 public void Test4244884() throws ParseException { 628 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US); 629 coll = new RuleBasedCollator(coll.getRules() 630 + " & C < ch , cH , Ch , CH < cat < crunchy"); 631 632 String[] testStrings = new String[] { 633 "car", 634 "cave", 635 "clamp", 636 "cramp", 637 "czar", 638 "church", 639 "catalogue", 640 "crunchy", 641 "dog" 642 }; 643 644 for (int i = 1; i < testStrings.length; i++) { 645 if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) { 646 errln("error: \"" + testStrings[i - 1] 647 + "\" is greater than or equal to \"" + testStrings[i] 648 + "\"."); 649 } 650 } 651 } 652 653 public void Test4179216() throws ParseException { 654 // you can position a CollationElementIterator in the middle of 655 // a contracting character sequence, yielding a bogus collation 656 // element 657 RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US); 658 coll = new RuleBasedCollator(coll.getRules() 659 + " & C < ch , cH , Ch , CH < cat < crunchy"); 660 String testText = "church church catcatcher runcrunchynchy"; 661 CollationElementIterator iter = coll.getCollationElementIterator( 662 testText); 663 664 // test that the "ch" combination works properly 665 iter.setOffset(4); 666 int elt4 = CollationElementIterator.primaryOrder(iter.next()); 667 668 iter.reset(); 669 int elt0 = CollationElementIterator.primaryOrder(iter.next()); 670 671 iter.setOffset(5); 672 int elt5 = CollationElementIterator.primaryOrder(iter.next()); 673 674 if (elt4 != elt0 || elt5 != elt0) 675 errln("The collation elements at positions 0 (" + elt0 + "), 4 (" 676 + elt4 + "), and 5 (" + elt5 + ") don't match."); 677 678 // test that the "cat" combination works properly 679 iter.setOffset(14); 680 int elt14 = CollationElementIterator.primaryOrder(iter.next()); 681 682 iter.setOffset(15); 683 int elt15 = CollationElementIterator.primaryOrder(iter.next()); 684 685 iter.setOffset(16); 686 int elt16 = CollationElementIterator.primaryOrder(iter.next()); 687 688 iter.setOffset(17); 689 int elt17 = CollationElementIterator.primaryOrder(iter.next()); 690 691 iter.setOffset(18); 692 int elt18 = CollationElementIterator.primaryOrder(iter.next()); 693 694 iter.setOffset(19); 695 int elt19 = CollationElementIterator.primaryOrder(iter.next()); 696 697 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 698 || elt14 != elt18 || elt14 != elt19) 699 errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = " 700 + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17 701 + ", elt18 = " + elt18 + ", elt19 = " + elt19); 702 703 // now generate a complete list of the collation elements, 704 // first using next() and then using setOffset(), and 705 // make sure both interfaces return the same set of elements 706 iter.reset(); 707 708 int elt = iter.next(); 709 int count = 0; 710 while (elt != CollationElementIterator.NULLORDER) { 711 ++count; 712 elt = iter.next(); 713 } 714 715 String[] nextElements = new String[count]; 716 String[] setOffsetElements = new String[count]; 717 int lastPos = 0; 718 719 iter.reset(); 720 elt = iter.next(); 721 count = 0; 722 while (elt != CollationElementIterator.NULLORDER) { 723 nextElements[count++] = testText.substring(lastPos, iter.getOffset()); 724 lastPos = iter.getOffset(); 725 elt = iter.next(); 726 } 727 count = 0; 728 for (int i = 0; i < testText.length(); ) { 729 iter.setOffset(i); 730 lastPos = iter.getOffset(); 731 elt = iter.next(); 732 setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset()); 733 i = iter.getOffset(); 734 } 735 for (int i = 0; i < nextElements.length; i++) { 736 if (nextElements[i].equals(setOffsetElements[i])) { 737 logln(nextElements[i]); 738 } else { 739 errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded " 740 + setOffsetElements[i]); 741 } 742 } 743 } 744 745 public void Test4216006() throws Exception { 746 // rule parser barfs on "<\u00e0=a\u0300", and on other cases 747 // where the same token (after normalization) appears twice in a row 748 boolean caughtException = false; 749 try { 750 RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300"); 751 } 752 catch (ParseException e) { 753 caughtException = true; 754 } 755 if (!caughtException) { 756 throw new Exception("\"a<a\" collation sequence didn't cause parse error!"); 757 } 758 759 RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300"); 760 collator.setDecomposition(Collator.FULL_DECOMPOSITION); 761 collator.setStrength(Collator.IDENTICAL); 762 763 String[] tests = { 764 "a\u0300", "=", "\u00e0", 765 "\u00e0", "=", "a\u0300" 766 }; 767 768 compareArray(collator, tests); 769 } 770 771 public void Test4171974() { 772 // test French accent ordering more thoroughly 773 String[] frenchList = { 774 "\u0075\u0075", // u u 775 "\u00fc\u0075", // u-umlaut u 776 "\u01d6\u0075", // u-umlaut-macron u 777 "\u016b\u0075", // u-macron u 778 "\u1e7b\u0075", // u-macron-umlaut u 779 "\u0075\u00fc", // u u-umlaut 780 "\u00fc\u00fc", // u-umlaut u-umlaut 781 "\u01d6\u00fc", // u-umlaut-macron u-umlaut 782 "\u016b\u00fc", // u-macron u-umlaut 783 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut 784 "\u0075\u01d6", // u u-umlaut-macron 785 "\u00fc\u01d6", // u-umlaut u-umlaut-macron 786 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron 787 "\u016b\u01d6", // u-macron u-umlaut-macron 788 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron 789 "\u0075\u016b", // u u-macron 790 "\u00fc\u016b", // u-umlaut u-macron 791 "\u01d6\u016b", // u-umlaut-macron u-macron 792 "\u016b\u016b", // u-macron u-macron 793 "\u1e7b\u016b", // u-macron-umlaut u-macron 794 "\u0075\u1e7b", // u u-macron-umlaut 795 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut 796 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut 797 "\u016b\u1e7b", // u-macron u-macron-umlaut 798 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut 799 }; 800 Collator french = Collator.getInstance(Locale.FRENCH); 801 802 logln("Testing French order..."); 803 checkListOrder(frenchList, french); 804 805 logln("Testing French order without decomposition..."); 806 french.setDecomposition(Collator.NO_DECOMPOSITION); 807 checkListOrder(frenchList, french); 808 809 String[] englishList = { 810 "\u0075\u0075", // u u 811 "\u0075\u00fc", // u u-umlaut 812 "\u0075\u01d6", // u u-umlaut-macron 813 "\u0075\u016b", // u u-macron 814 "\u0075\u1e7b", // u u-macron-umlaut 815 "\u00fc\u0075", // u-umlaut u 816 "\u00fc\u00fc", // u-umlaut u-umlaut 817 "\u00fc\u01d6", // u-umlaut u-umlaut-macron 818 "\u00fc\u016b", // u-umlaut u-macron 819 "\u00fc\u1e7b", // u-umlaut u-macron-umlaut 820 "\u01d6\u0075", // u-umlaut-macron u 821 "\u01d6\u00fc", // u-umlaut-macron u-umlaut 822 "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron 823 "\u01d6\u016b", // u-umlaut-macron u-macron 824 "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut 825 "\u016b\u0075", // u-macron u 826 "\u016b\u00fc", // u-macron u-umlaut 827 "\u016b\u01d6", // u-macron u-umlaut-macron 828 "\u016b\u016b", // u-macron u-macron 829 "\u016b\u1e7b", // u-macron u-macron-umlaut 830 "\u1e7b\u0075", // u-macron-umlaut u 831 "\u1e7b\u00fc", // u-macron-umlaut u-umlaut 832 "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron 833 "\u1e7b\u016b", // u-macron-umlaut u-macron 834 "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut 835 }; 836 Collator english = Collator.getInstance(Locale.ENGLISH); 837 838 logln("Testing English order..."); 839 checkListOrder(englishList, english); 840 841 logln("Testing English order without decomposition..."); 842 english.setDecomposition(Collator.NO_DECOMPOSITION); 843 checkListOrder(englishList, english); 844 } 845 846 private void checkListOrder(String[] sortedList, Collator c) { 847 // this function uses the specified Collator to make sure the 848 // passed-in list is already sorted into ascending order 849 for (int i = 0; i < sortedList.length - 1; i++) { 850 if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) { 851 errln("List out of order at element #" + i + ": " 852 + prettify(sortedList[i]) + " >= " 853 + prettify(sortedList[i + 1])); 854 } 855 } 856 } 857 858 // CollationElementIterator set doesn't work propertly with next/prev 859 public void Test4663220() { 860 RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US); 861 CharacterIterator stringIter = new StringCharacterIterator("fox"); 862 CollationElementIterator iter = collator.getCollationElementIterator(stringIter); 863 864 int[] elements_next = new int[3]; 865 logln("calling next:"); 866 for (int i = 0; i < 3; ++i) { 867 logln("[" + i + "] " + (elements_next[i] = iter.next())); 868 } 869 870 int[] elements_fwd = new int[3]; 871 logln("calling set/next:"); 872 for (int i = 0; i < 3; ++i) { 873 iter.setOffset(i); 874 logln("[" + i + "] " + (elements_fwd[i] = iter.next())); 875 } 876 877 for (int i = 0; i < 3; ++i) { 878 if (elements_next[i] != elements_fwd[i]) { 879 errln("mismatch at position " + i + 880 ": " + elements_next[i] + 881 " != " + elements_fwd[i]); 882 } 883 } 884 } 885 886 //------------------------------------------------------------------------ 887 // Internal utilities 888 // 889 private void compareArray(Collator c, String[] tests) { 890 for (int i = 0; i < tests.length; i += 3) { 891 892 int expect = 0; 893 if (tests[i+1].equals("<")) { 894 expect = -1; 895 } else if (tests[i+1].equals(">")) { 896 expect = 1; 897 } else if (tests[i+1].equals("=")) { 898 expect = 0; 899 } else { 900 expect = Integer.decode(tests[i+1]).intValue(); 901 } 902 903 int result = c.compare(tests[i], tests[i+2]); 904 if (sign(result) != sign(expect)) 905 { 906 errln( i/3 + ": compare(" + prettify(tests[i]) 907 + " , " + prettify(tests[i+2]) 908 + ") got " + result + "; expected " + expect); 909 } 910 else 911 { 912 // Collator.compare worked OK; now try the collation keys 913 CollationKey k1 = c.getCollationKey(tests[i]); 914 CollationKey k2 = c.getCollationKey(tests[i+2]); 915 916 result = k1.compareTo(k2); 917 if (sign(result) != sign(expect)) { 918 errln( i/3 + ": key(" + prettify(tests[i]) 919 + ").compareTo(key(" + prettify(tests[i+2]) 920 + ")) got " + result + "; expected " + expect); 921 922 errln(" " + prettify(k1) + " vs. " + prettify(k2)); 923 } 924 } 925 } 926 } 927 928 private static final int sign(int i) { 929 if (i < 0) return -1; 930 if (i > 0) return 1; 931 return 0; 932 } 933 934 935 static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 936 937 String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; 938 String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; 939 String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck"; 940 }