1 /* 2 * Copyright (c) 1997, 2006, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved 28 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved 29 * 30 * The original version of this source code and documentation is copyrighted 31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 32 * materials are provided under terms of a License Agreement between Taligent 33 * and Sun. This technology is protected by multiple US and International 34 * patents. This notice and attribution to Taligent may not be removed. 35 * Taligent is a registered trademark of Taligent, Inc. 36 * 37 */ 38 39 package java.text; 40 41 import java.text.spi.CollatorProvider; 42 import java.util.Locale; 43 import java.util.MissingResourceException; 44 import java.util.ResourceBundle; 45 import java.util.spi.LocaleServiceProvider; 46 import sun.misc.SoftCache; 47 import sun.util.resources.LocaleData; 48 import sun.util.LocaleServiceProviderPool; 49 50 51 /** 52 * The <code>Collator</code> class performs locale-sensitive 53 * <code>String</code> comparison. You use this class to build 54 * searching and sorting routines for natural language text. 55 * 56 * <p> 57 * <code>Collator</code> is an abstract base class. Subclasses 58 * implement specific collation strategies. One subclass, 59 * <code>RuleBasedCollator</code>, is currently provided with 60 * the Java Platform and is applicable to a wide set of languages. Other 61 * subclasses may be created to handle more specialized needs. 62 * 63 * <p> 64 * Like other locale-sensitive classes, you can use the static 65 * factory method, <code>getInstance</code>, to obtain the appropriate 66 * <code>Collator</code> object for a given locale. You will only need 67 * to look at the subclasses of <code>Collator</code> if you need 68 * to understand the details of a particular collation strategy or 69 * if you need to modify that strategy. 70 * 71 * <p> 72 * The following example shows how to compare two strings using 73 * the <code>Collator</code> for the default locale. 74 * <blockquote> 75 * <pre> 76 * // Compare two strings in the default locale 77 * Collator myCollator = Collator.getInstance(); 78 * if( myCollator.compare("abc", "ABC") < 0 ) 79 * System.out.println("abc is less than ABC"); 80 * else 81 * System.out.println("abc is greater than or equal to ABC"); 82 * </pre> 83 * </blockquote> 84 * 85 * <p> 86 * You can set a <code>Collator</code>'s <em>strength</em> property 87 * to determine the level of difference considered significant in 88 * comparisons. Four strengths are provided: <code>PRIMARY</code>, 89 * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>. 90 * The exact assignment of strengths to language features is 91 * locale dependant. For example, in Czech, "e" and "f" are considered 92 * primary differences, while "e" and "ě" are secondary differences, 93 * "e" and "E" are tertiary differences and "e" and "e" are identical. 94 * The following shows how both case and accents could be ignored for 95 * US English. 96 * <blockquote> 97 * <pre> 98 * //Get the Collator for US English and set its strength to PRIMARY 99 * Collator usCollator = Collator.getInstance(Locale.US); 100 * usCollator.setStrength(Collator.PRIMARY); 101 * if( usCollator.compare("abc", "ABC") == 0 ) { 102 * System.out.println("Strings are equivalent"); 103 * } 104 * </pre> 105 * </blockquote> 106 * <p> 107 * For comparing <code>String</code>s exactly once, the <code>compare</code> 108 * method provides the best performance. When sorting a list of 109 * <code>String</code>s however, it is generally necessary to compare each 110 * <code>String</code> multiple times. In this case, <code>CollationKey</code>s 111 * provide better performance. The <code>CollationKey</code> class converts 112 * a <code>String</code> to a series of bits that can be compared bitwise 113 * against other <code>CollationKey</code>s. A <code>CollationKey</code> is 114 * created by a <code>Collator</code> object for a given <code>String</code>. 115 * <br> 116 * <strong>Note:</strong> <code>CollationKey</code>s from different 117 * <code>Collator</code>s can not be compared. See the class description 118 * for {@link CollationKey} 119 * for an example using <code>CollationKey</code>s. 120 * 121 * @see RuleBasedCollator 122 * @see CollationKey 123 * @see CollationElementIterator 124 * @see Locale 125 * @author Helena Shih, Laura Werner, Richard Gillam 126 */ 127 128 public abstract class Collator 129 implements java.util.Comparator<Object>, Cloneable 130 { 131 /** 132 * Collator strength value. When set, only PRIMARY differences are 133 * considered significant during comparison. The assignment of strengths 134 * to language features is locale dependant. A common example is for 135 * different base letters ("a" vs "b") to be considered a PRIMARY difference. 136 * @see java.text.Collator#setStrength 137 * @see java.text.Collator#getStrength 138 */ 139 public final static int PRIMARY = 0; 140 /** 141 * Collator strength value. When set, only SECONDARY and above differences are 142 * considered significant during comparison. The assignment of strengths 143 * to language features is locale dependant. A common example is for 144 * different accented forms of the same base letter ("a" vs "\u00E4") to be 145 * considered a SECONDARY difference. 146 * @see java.text.Collator#setStrength 147 * @see java.text.Collator#getStrength 148 */ 149 public final static int SECONDARY = 1; 150 /** 151 * Collator strength value. When set, only TERTIARY and above differences are 152 * considered significant during comparison. The assignment of strengths 153 * to language features is locale dependant. A common example is for 154 * case differences ("a" vs "A") to be considered a TERTIARY difference. 155 * @see java.text.Collator#setStrength 156 * @see java.text.Collator#getStrength 157 */ 158 public final static int TERTIARY = 2; 159 160 /** 161 * Collator strength value. When set, all differences are 162 * considered significant during comparison. The assignment of strengths 163 * to language features is locale dependant. A common example is for control 164 * characters ("\u0001" vs "\u0002") to be considered equal at the 165 * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL 166 * level. Additionally, differences between pre-composed accents such as 167 * "\u00C0" (A-grave) and combining accents such as "A\u0300" 168 * (A, combining-grave) will be considered significant at the IDENTICAL 169 * level if decomposition is set to NO_DECOMPOSITION. 170 */ 171 public final static int IDENTICAL = 3; 172 173 /** 174 * Decomposition mode value. With NO_DECOMPOSITION 175 * set, accented characters will not be decomposed for collation. This 176 * is the default setting and provides the fastest collation but 177 * will only produce correct results for languages that do not use accents. 178 * @see java.text.Collator#getDecomposition 179 * @see java.text.Collator#setDecomposition 180 */ 181 public final static int NO_DECOMPOSITION = 0; 182 183 /** 184 * Decomposition mode value. With CANONICAL_DECOMPOSITION 185 * set, characters that are canonical variants according to Unicode 186 * standard will be decomposed for collation. This should be used to get 187 * correct collation of accented characters. 188 * <p> 189 * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as 190 * described in 191 * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode 192 * Technical Report #15</a>. 193 * @see java.text.Collator#getDecomposition 194 * @see java.text.Collator#setDecomposition 195 */ 196 public final static int CANONICAL_DECOMPOSITION = 1; 197 198 /** 199 * Decomposition mode value. With FULL_DECOMPOSITION 200 * set, both Unicode canonical variants and Unicode compatibility variants 201 * will be decomposed for collation. This causes not only accented 202 * characters to be collated, but also characters that have special formats 203 * to be collated with their norminal form. For example, the half-width and 204 * full-width ASCII and Katakana characters are then collated together. 205 * FULL_DECOMPOSITION is the most complete and therefore the slowest 206 * decomposition mode. 207 * <p> 208 * FULL_DECOMPOSITION corresponds to Normalization Form KD as 209 * described in 210 * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode 211 * Technical Report #15</a>. 212 * @see java.text.Collator#getDecomposition 213 * @see java.text.Collator#setDecomposition 214 */ 215 public final static int FULL_DECOMPOSITION = 2; 216 217 /** 218 * Gets the Collator for the current default locale. 219 * The default locale is determined by java.util.Locale.getDefault. 220 * @return the Collator for the default locale.(for example, en_US) 221 * @see java.util.Locale#getDefault 222 */ 223 public static synchronized Collator getInstance() { 224 return getInstance(Locale.getDefault()); 225 } 226 227 /** 228 * Gets the Collator for the desired locale. 229 * @param desiredLocale the desired locale. 230 * @return the Collator for the desired locale. 231 * @see java.util.Locale 232 * @see java.util.ResourceBundle 233 */ 234 public static synchronized 235 Collator getInstance(Locale desiredLocale) 236 { 237 Collator result = (Collator) cache.get(desiredLocale); 238 if (result != null) { 239 return (Collator)result.clone(); // make the world safe 240 } 241 242 // Check whether a provider can provide an implementation that's closer 243 // to the requested locale than what the Java runtime itself can provide. 244 LocaleServiceProviderPool pool = 245 LocaleServiceProviderPool.getPool(CollatorProvider.class); 246 if (pool.hasProviders()) { 247 Collator providersInstance = pool.getLocalizedObject( 248 CollatorGetter.INSTANCE, 249 desiredLocale, 250 desiredLocale); 251 if (providersInstance != null) { 252 return providersInstance; 253 } 254 } 255 256 // Load the resource of the desired locale from resource 257 // manager. 258 String colString = ""; 259 try { 260 ResourceBundle resource = LocaleData.getCollationData(desiredLocale); 261 262 colString = resource.getString("Rule"); 263 } catch (MissingResourceException e) { 264 // Use default values 265 } 266 try 267 { 268 result = new RuleBasedCollator( CollationRules.DEFAULTRULES + 269 colString, 270 CANONICAL_DECOMPOSITION ); 271 } 272 catch(ParseException foo) 273 { 274 // predefined tables should contain correct grammar 275 try { 276 result = new RuleBasedCollator( CollationRules.DEFAULTRULES ); 277 } catch (ParseException bar) { 278 // do nothing 279 } 280 } 281 // Now that RuleBasedCollator adds expansions for pre-composed characters 282 // into their decomposed equivalents, the default collators don't need 283 // to have decomposition turned on. Laura, 5/5/98, bug 4114077 284 result.setDecomposition(NO_DECOMPOSITION); 285 286 cache.put(desiredLocale,result); 287 return (Collator)result.clone(); 288 } 289 290 /** 291 * Compares the source string to the target string according to the 292 * collation rules for this Collator. Returns an integer less than, 293 * equal to or greater than zero depending on whether the source String is 294 * less than, equal to or greater than the target string. See the Collator 295 * class description for an example of use. 296 * <p> 297 * For a one time comparison, this method has the best performance. If a 298 * given String will be involved in multiple comparisons, CollationKey.compareTo 299 * has the best performance. See the Collator class description for an example 300 * using CollationKeys. 301 * @param source the source string. 302 * @param target the target string. 303 * @return Returns an integer value. Value is less than zero if source is less than 304 * target, value is zero if source and target are equal, value is greater than zero 305 * if source is greater than target. 306 * @see java.text.CollationKey 307 * @see java.text.Collator#getCollationKey 308 */ 309 public abstract int compare(String source, String target); 310 311 /** 312 * Compares its two arguments for order. Returns a negative integer, 313 * zero, or a positive integer as the first argument is less than, equal 314 * to, or greater than the second. 315 * <p> 316 * This implementation merely returns 317 * <code> compare((String)o1, (String)o2) </code>. 318 * 319 * @return a negative integer, zero, or a positive integer as the 320 * first argument is less than, equal to, or greater than the 321 * second. 322 * @exception ClassCastException the arguments cannot be cast to Strings. 323 * @see java.util.Comparator 324 * @since 1.2 325 */ 326 public int compare(Object o1, Object o2) { 327 return compare((String)o1, (String)o2); 328 } 329 330 /** 331 * Transforms the String into a series of bits that can be compared bitwise 332 * to other CollationKeys. CollationKeys provide better performance than 333 * Collator.compare when Strings are involved in multiple comparisons. 334 * See the Collator class description for an example using CollationKeys. 335 * @param source the string to be transformed into a collation key. 336 * @return the CollationKey for the given String based on this Collator's collation 337 * rules. If the source String is null, a null CollationKey is returned. 338 * @see java.text.CollationKey 339 * @see java.text.Collator#compare 340 */ 341 public abstract CollationKey getCollationKey(String source); 342 343 /** 344 * Convenience method for comparing the equality of two strings based on 345 * this Collator's collation rules. 346 * @param source the source string to be compared with. 347 * @param target the target string to be compared with. 348 * @return true if the strings are equal according to the collation 349 * rules. false, otherwise. 350 * @see java.text.Collator#compare 351 */ 352 public boolean equals(String source, String target) 353 { 354 return (compare(source, target) == Collator.EQUAL); 355 } 356 357 /** 358 * Returns this Collator's strength property. The strength property determines 359 * the minimum level of difference considered significant during comparison. 360 * See the Collator class description for an example of use. 361 * @return this Collator's current strength property. 362 * @see java.text.Collator#setStrength 363 * @see java.text.Collator#PRIMARY 364 * @see java.text.Collator#SECONDARY 365 * @see java.text.Collator#TERTIARY 366 * @see java.text.Collator#IDENTICAL 367 */ 368 public synchronized int getStrength() 369 { 370 return strength; 371 } 372 373 /** 374 * Sets this Collator's strength property. The strength property determines 375 * the minimum level of difference considered significant during comparison. 376 * See the Collator class description for an example of use. 377 * @param newStrength the new strength value. 378 * @see java.text.Collator#getStrength 379 * @see java.text.Collator#PRIMARY 380 * @see java.text.Collator#SECONDARY 381 * @see java.text.Collator#TERTIARY 382 * @see java.text.Collator#IDENTICAL 383 * @exception IllegalArgumentException If the new strength value is not one of 384 * PRIMARY, SECONDARY, TERTIARY or IDENTICAL. 385 */ 386 public synchronized void setStrength(int newStrength) { 387 if ((newStrength != PRIMARY) && 388 (newStrength != SECONDARY) && 389 (newStrength != TERTIARY) && 390 (newStrength != IDENTICAL)) 391 throw new IllegalArgumentException("Incorrect comparison level."); 392 strength = newStrength; 393 } 394 395 /** 396 * Get the decomposition mode of this Collator. Decomposition mode 397 * determines how Unicode composed characters are handled. Adjusting 398 * decomposition mode allows the user to select between faster and more 399 * complete collation behavior. 400 * <p>The three values for decomposition mode are: 401 * <UL> 402 * <LI>NO_DECOMPOSITION, 403 * <LI>CANONICAL_DECOMPOSITION 404 * <LI>FULL_DECOMPOSITION. 405 * </UL> 406 * See the documentation for these three constants for a description 407 * of their meaning. 408 * @return the decomposition mode 409 * @see java.text.Collator#setDecomposition 410 * @see java.text.Collator#NO_DECOMPOSITION 411 * @see java.text.Collator#CANONICAL_DECOMPOSITION 412 * @see java.text.Collator#FULL_DECOMPOSITION 413 */ 414 public synchronized int getDecomposition() 415 { 416 return decmp; 417 } 418 /** 419 * Set the decomposition mode of this Collator. See getDecomposition 420 * for a description of decomposition mode. 421 * @param decompositionMode the new decomposition mode. 422 * @see java.text.Collator#getDecomposition 423 * @see java.text.Collator#NO_DECOMPOSITION 424 * @see java.text.Collator#CANONICAL_DECOMPOSITION 425 * @see java.text.Collator#FULL_DECOMPOSITION 426 * @exception IllegalArgumentException If the given value is not a valid decomposition 427 * mode. 428 */ 429 public synchronized void setDecomposition(int decompositionMode) { 430 if ((decompositionMode != NO_DECOMPOSITION) && 431 (decompositionMode != CANONICAL_DECOMPOSITION) && 432 (decompositionMode != FULL_DECOMPOSITION)) 433 throw new IllegalArgumentException("Wrong decomposition mode."); 434 decmp = decompositionMode; 435 } 436 437 /** 438 * Returns an array of all locales for which the 439 * <code>getInstance</code> methods of this class can return 440 * localized instances. 441 * The returned array represents the union of locales supported 442 * by the Java runtime and by installed 443 * {@link java.text.spi.CollatorProvider CollatorProvider} implementations. 444 * It must contain at least a Locale instance equal to 445 * {@link java.util.Locale#US Locale.US}. 446 * 447 * @return An array of locales for which localized 448 * <code>Collator</code> instances are available. 449 */ 450 public static synchronized Locale[] getAvailableLocales() { 451 LocaleServiceProviderPool pool = 452 LocaleServiceProviderPool.getPool(CollatorProvider.class); 453 return pool.getAvailableLocales(); 454 } 455 456 /** 457 * Overrides Cloneable 458 */ 459 public Object clone() 460 { 461 try { 462 return (Collator)super.clone(); 463 } catch (CloneNotSupportedException e) { 464 throw new InternalError(e); 465 } 466 } 467 468 /** 469 * Compares the equality of two Collators. 470 * @param that the Collator to be compared with this. 471 * @return true if this Collator is the same as that Collator; 472 * false otherwise. 473 */ 474 public boolean equals(Object that) 475 { 476 if (this == that) return true; 477 if (that == null) return false; 478 if (getClass() != that.getClass()) return false; 479 Collator other = (Collator) that; 480 return ((strength == other.strength) && 481 (decmp == other.decmp)); 482 } 483 484 /** 485 * Generates the hash code for this Collator. 486 */ 487 abstract public int hashCode(); 488 489 /** 490 * Default constructor. This constructor is 491 * protected so subclasses can get access to it. Users typically create 492 * a Collator sub-class by calling the factory method getInstance. 493 * @see java.text.Collator#getInstance 494 */ 495 protected Collator() 496 { 497 strength = TERTIARY; 498 decmp = CANONICAL_DECOMPOSITION; 499 } 500 501 private int strength = 0; 502 private int decmp = 0; 503 private static SoftCache cache = new SoftCache(); 504 505 // 506 // FIXME: These three constants should be removed. 507 // 508 /** 509 * LESS is returned if source string is compared to be less than target 510 * string in the compare() method. 511 * @see java.text.Collator#compare 512 */ 513 final static int LESS = -1; 514 /** 515 * EQUAL is returned if source string is compared to be equal to target 516 * string in the compare() method. 517 * @see java.text.Collator#compare 518 */ 519 final static int EQUAL = 0; 520 /** 521 * GREATER is returned if source string is compared to be greater than 522 * target string in the compare() method. 523 * @see java.text.Collator#compare 524 */ 525 final static int GREATER = 1; 526 527 /** 528 * Obtains a Collator instance from a CollatorProvider 529 * implementation. 530 */ 531 private static class CollatorGetter 532 implements LocaleServiceProviderPool.LocalizedObjectGetter<CollatorProvider, Collator> { 533 private static final CollatorGetter INSTANCE = new CollatorGetter(); 534 535 public Collator getObject(CollatorProvider collatorProvider, 536 Locale locale, 537 String key, 538 Object... params) { 539 assert params.length == 1; 540 Collator result = collatorProvider.getInstance(locale); 541 if (result != null) { 542 // put this Collator instance in the cache for two locales, one 543 // is for the desired locale, and the other is for the actual 544 // locale where the provider is found, which may be a fall back locale. 545 cache.put((Locale)params[0], result); 546 cache.put(locale, result); 547 return (Collator)result.clone(); 548 } 549 550 return null; 551 } 552 } 553 }