1 /*
   2  * Copyright (c) 1997, 2006, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * (C) Copyright Taligent, Inc. 1996-1998 -  All Rights Reserved
  28  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  29  *
  30  *   The original version of this source code and documentation is copyrighted
  31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  32  * materials are provided under terms of a License Agreement between Taligent
  33  * and Sun. This technology is protected by multiple US and International
  34  * patents. This notice and attribution to Taligent may not be removed.
  35  *   Taligent is a registered trademark of Taligent, Inc.
  36  *
  37  */
  38 
  39 package java.text;
  40 
  41 import java.text.spi.CollatorProvider;
  42 import java.util.Locale;
  43 import java.util.MissingResourceException;
  44 import java.util.ResourceBundle;
  45 import java.util.spi.LocaleServiceProvider;
  46 import sun.misc.SoftCache;
  47 import sun.util.resources.LocaleData;
  48 import sun.util.LocaleServiceProviderPool;
  49 
  50 
  51 /**
  52  * The <code>Collator</code> class performs locale-sensitive
  53  * <code>String</code> comparison. You use this class to build
  54  * searching and sorting routines for natural language text.
  55  *
  56  * <p>
  57  * <code>Collator</code> is an abstract base class. Subclasses
  58  * implement specific collation strategies. One subclass,
  59  * <code>RuleBasedCollator</code>, is currently provided with
  60  * the Java Platform and is applicable to a wide set of languages. Other
  61  * subclasses may be created to handle more specialized needs.
  62  *
  63  * <p>
  64  * Like other locale-sensitive classes, you can use the static
  65  * factory method, <code>getInstance</code>, to obtain the appropriate
  66  * <code>Collator</code> object for a given locale. You will only need
  67  * to look at the subclasses of <code>Collator</code> if you need
  68  * to understand the details of a particular collation strategy or
  69  * if you need to modify that strategy.
  70  *
  71  * <p>
  72  * The following example shows how to compare two strings using
  73  * the <code>Collator</code> for the default locale.
  74  * <blockquote>
  75  * <pre>
  76  * // Compare two strings in the default locale
  77  * Collator myCollator = Collator.getInstance();
  78  * if( myCollator.compare("abc", "ABC") < 0 )
  79  *     System.out.println("abc is less than ABC");
  80  * else
  81  *     System.out.println("abc is greater than or equal to ABC");
  82  * </pre>
  83  * </blockquote>
  84  *
  85  * <p>
  86  * You can set a <code>Collator</code>'s <em>strength</em> property
  87  * to determine the level of difference considered significant in
  88  * comparisons. Four strengths are provided: <code>PRIMARY</code>,
  89  * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
  90  * The exact assignment of strengths to language features is
  91  * locale dependant.  For example, in Czech, "e" and "f" are considered
  92  * primary differences, while "e" and "&#283;" are secondary differences,
  93  * "e" and "E" are tertiary differences and "e" and "e" are identical.
  94  * The following shows how both case and accents could be ignored for
  95  * US English.
  96  * <blockquote>
  97  * <pre>
  98  * //Get the Collator for US English and set its strength to PRIMARY
  99  * Collator usCollator = Collator.getInstance(Locale.US);
 100  * usCollator.setStrength(Collator.PRIMARY);
 101  * if( usCollator.compare("abc", "ABC") == 0 ) {
 102  *     System.out.println("Strings are equivalent");
 103  * }
 104  * </pre>
 105  * </blockquote>
 106  * <p>
 107  * For comparing <code>String</code>s exactly once, the <code>compare</code>
 108  * method provides the best performance. When sorting a list of
 109  * <code>String</code>s however, it is generally necessary to compare each
 110  * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
 111  * provide better performance. The <code>CollationKey</code> class converts
 112  * a <code>String</code> to a series of bits that can be compared bitwise
 113  * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
 114  * created by a <code>Collator</code> object for a given <code>String</code>.
 115  * <br>
 116  * <strong>Note:</strong> <code>CollationKey</code>s from different
 117  * <code>Collator</code>s can not be compared. See the class description
 118  * for {@link CollationKey}
 119  * for an example using <code>CollationKey</code>s.
 120  *
 121  * @see         RuleBasedCollator
 122  * @see         CollationKey
 123  * @see         CollationElementIterator
 124  * @see         Locale
 125  * @author      Helena Shih, Laura Werner, Richard Gillam
 126  */
 127 
 128 public abstract class Collator
 129     implements java.util.Comparator<Object>, Cloneable
 130 {
 131     /**
 132      * Collator strength value.  When set, only PRIMARY differences are
 133      * considered significant during comparison. The assignment of strengths
 134      * to language features is locale dependant. A common example is for
 135      * different base letters ("a" vs "b") to be considered a PRIMARY difference.
 136      * @see java.text.Collator#setStrength
 137      * @see java.text.Collator#getStrength
 138      */
 139     public final static int PRIMARY = 0;
 140     /**
 141      * Collator strength value.  When set, only SECONDARY and above differences are
 142      * considered significant during comparison. The assignment of strengths
 143      * to language features is locale dependant. A common example is for
 144      * different accented forms of the same base letter ("a" vs "\u00E4") to be
 145      * considered a SECONDARY difference.
 146      * @see java.text.Collator#setStrength
 147      * @see java.text.Collator#getStrength
 148      */
 149     public final static int SECONDARY = 1;
 150     /**
 151      * Collator strength value.  When set, only TERTIARY and above differences are
 152      * considered significant during comparison. The assignment of strengths
 153      * to language features is locale dependant. A common example is for
 154      * case differences ("a" vs "A") to be considered a TERTIARY difference.
 155      * @see java.text.Collator#setStrength
 156      * @see java.text.Collator#getStrength
 157      */
 158     public final static int TERTIARY = 2;
 159 
 160     /**
 161      * Collator strength value.  When set, all differences are
 162      * considered significant during comparison. The assignment of strengths
 163      * to language features is locale dependant. A common example is for control
 164      * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at the
 165      * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
 166      * level.  Additionally, differences between pre-composed accents such as
 167      * "&#092;u00C0" (A-grave) and combining accents such as "A&#092;u0300"
 168      * (A, combining-grave) will be considered significant at the IDENTICAL
 169      * level if decomposition is set to NO_DECOMPOSITION.
 170      */
 171     public final static int IDENTICAL = 3;
 172 
 173     /**
 174      * Decomposition mode value. With NO_DECOMPOSITION
 175      * set, accented characters will not be decomposed for collation. This
 176      * is the default setting and provides the fastest collation but
 177      * will only produce correct results for languages that do not use accents.
 178      * @see java.text.Collator#getDecomposition
 179      * @see java.text.Collator#setDecomposition
 180      */
 181     public final static int NO_DECOMPOSITION = 0;
 182 
 183     /**
 184      * Decomposition mode value. With CANONICAL_DECOMPOSITION
 185      * set, characters that are canonical variants according to Unicode
 186      * standard will be decomposed for collation. This should be used to get
 187      * correct collation of accented characters.
 188      * <p>
 189      * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
 190      * described in
 191      * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
 192      * Technical Report #15</a>.
 193      * @see java.text.Collator#getDecomposition
 194      * @see java.text.Collator#setDecomposition
 195      */
 196     public final static int CANONICAL_DECOMPOSITION = 1;
 197 
 198     /**
 199      * Decomposition mode value. With FULL_DECOMPOSITION
 200      * set, both Unicode canonical variants and Unicode compatibility variants
 201      * will be decomposed for collation.  This causes not only accented
 202      * characters to be collated, but also characters that have special formats
 203      * to be collated with their norminal form. For example, the half-width and
 204      * full-width ASCII and Katakana characters are then collated together.
 205      * FULL_DECOMPOSITION is the most complete and therefore the slowest
 206      * decomposition mode.
 207      * <p>
 208      * FULL_DECOMPOSITION corresponds to Normalization Form KD as
 209      * described in
 210      * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
 211      * Technical Report #15</a>.
 212      * @see java.text.Collator#getDecomposition
 213      * @see java.text.Collator#setDecomposition
 214      */
 215     public final static int FULL_DECOMPOSITION = 2;
 216 
 217     /**
 218      * Gets the Collator for the current default locale.
 219      * The default locale is determined by java.util.Locale.getDefault.
 220      * @return the Collator for the default locale.(for example, en_US)
 221      * @see java.util.Locale#getDefault
 222      */
 223     public static synchronized Collator getInstance() {
 224         return getInstance(Locale.getDefault());
 225     }
 226 
 227     /**
 228      * Gets the Collator for the desired locale.
 229      * @param desiredLocale the desired locale.
 230      * @return the Collator for the desired locale.
 231      * @see java.util.Locale
 232      * @see java.util.ResourceBundle
 233      */
 234     public static synchronized
 235     Collator getInstance(Locale desiredLocale)
 236     {
 237         Collator result = (Collator) cache.get(desiredLocale);
 238         if (result != null) {
 239                  return (Collator)result.clone();  // make the world safe
 240         }
 241 
 242         // Check whether a provider can provide an implementation that's closer
 243         // to the requested locale than what the Java runtime itself can provide.
 244         LocaleServiceProviderPool pool =
 245             LocaleServiceProviderPool.getPool(CollatorProvider.class);
 246         if (pool.hasProviders()) {
 247             Collator providersInstance = pool.getLocalizedObject(
 248                                             CollatorGetter.INSTANCE,
 249                                             desiredLocale,
 250                                             desiredLocale);
 251             if (providersInstance != null) {
 252                 return providersInstance;
 253             }
 254         }
 255 
 256         // Load the resource of the desired locale from resource
 257         // manager.
 258         String colString = "";
 259         try {
 260             ResourceBundle resource = LocaleData.getCollationData(desiredLocale);
 261 
 262             colString = resource.getString("Rule");
 263         } catch (MissingResourceException e) {
 264             // Use default values
 265         }
 266         try
 267         {
 268             result = new RuleBasedCollator( CollationRules.DEFAULTRULES +
 269                                             colString,
 270                                             CANONICAL_DECOMPOSITION );
 271         }
 272         catch(ParseException foo)
 273         {
 274             // predefined tables should contain correct grammar
 275             try {
 276                 result = new RuleBasedCollator( CollationRules.DEFAULTRULES );
 277             } catch (ParseException bar) {
 278                 // do nothing
 279             }
 280         }
 281         // Now that RuleBasedCollator adds expansions for pre-composed characters
 282         // into their decomposed equivalents, the default collators don't need
 283         // to have decomposition turned on.  Laura, 5/5/98, bug 4114077
 284         result.setDecomposition(NO_DECOMPOSITION);
 285 
 286         cache.put(desiredLocale,result);
 287         return (Collator)result.clone();
 288     }
 289 
 290     /**
 291      * Compares the source string to the target string according to the
 292      * collation rules for this Collator.  Returns an integer less than,
 293      * equal to or greater than zero depending on whether the source String is
 294      * less than, equal to or greater than the target string.  See the Collator
 295      * class description for an example of use.
 296      * <p>
 297      * For a one time comparison, this method has the best performance. If a
 298      * given String will be involved in multiple comparisons, CollationKey.compareTo
 299      * has the best performance. See the Collator class description for an example
 300      * using CollationKeys.
 301      * @param source the source string.
 302      * @param target the target string.
 303      * @return Returns an integer value. Value is less than zero if source is less than
 304      * target, value is zero if source and target are equal, value is greater than zero
 305      * if source is greater than target.
 306      * @see java.text.CollationKey
 307      * @see java.text.Collator#getCollationKey
 308      */
 309     public abstract int compare(String source, String target);
 310 
 311     /**
 312      * Compares its two arguments for order.  Returns a negative integer,
 313      * zero, or a positive integer as the first argument is less than, equal
 314      * to, or greater than the second.
 315      * <p>
 316      * This implementation merely returns
 317      *  <code> compare((String)o1, (String)o2) </code>.
 318      *
 319      * @return a negative integer, zero, or a positive integer as the
 320      *         first argument is less than, equal to, or greater than the
 321      *         second.
 322      * @exception ClassCastException the arguments cannot be cast to Strings.
 323      * @see java.util.Comparator
 324      * @since   1.2
 325      */
 326     public int compare(Object o1, Object o2) {
 327     return compare((String)o1, (String)o2);
 328     }
 329 
 330     /**
 331      * Transforms the String into a series of bits that can be compared bitwise
 332      * to other CollationKeys. CollationKeys provide better performance than
 333      * Collator.compare when Strings are involved in multiple comparisons.
 334      * See the Collator class description for an example using CollationKeys.
 335      * @param source the string to be transformed into a collation key.
 336      * @return the CollationKey for the given String based on this Collator's collation
 337      * rules. If the source String is null, a null CollationKey is returned.
 338      * @see java.text.CollationKey
 339      * @see java.text.Collator#compare
 340      */
 341     public abstract CollationKey getCollationKey(String source);
 342 
 343     /**
 344      * Convenience method for comparing the equality of two strings based on
 345      * this Collator's collation rules.
 346      * @param source the source string to be compared with.
 347      * @param target the target string to be compared with.
 348      * @return true if the strings are equal according to the collation
 349      * rules.  false, otherwise.
 350      * @see java.text.Collator#compare
 351      */
 352     public boolean equals(String source, String target)
 353     {
 354         return (compare(source, target) == Collator.EQUAL);
 355     }
 356 
 357     /**
 358      * Returns this Collator's strength property.  The strength property determines
 359      * the minimum level of difference considered significant during comparison.
 360      * See the Collator class description for an example of use.
 361      * @return this Collator's current strength property.
 362      * @see java.text.Collator#setStrength
 363      * @see java.text.Collator#PRIMARY
 364      * @see java.text.Collator#SECONDARY
 365      * @see java.text.Collator#TERTIARY
 366      * @see java.text.Collator#IDENTICAL
 367      */
 368     public synchronized int getStrength()
 369     {
 370         return strength;
 371     }
 372 
 373     /**
 374      * Sets this Collator's strength property.  The strength property determines
 375      * the minimum level of difference considered significant during comparison.
 376      * See the Collator class description for an example of use.
 377      * @param newStrength  the new strength value.
 378      * @see java.text.Collator#getStrength
 379      * @see java.text.Collator#PRIMARY
 380      * @see java.text.Collator#SECONDARY
 381      * @see java.text.Collator#TERTIARY
 382      * @see java.text.Collator#IDENTICAL
 383      * @exception  IllegalArgumentException If the new strength value is not one of
 384      * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
 385      */
 386     public synchronized void setStrength(int newStrength) {
 387         if ((newStrength != PRIMARY) &&
 388             (newStrength != SECONDARY) &&
 389             (newStrength != TERTIARY) &&
 390             (newStrength != IDENTICAL))
 391             throw new IllegalArgumentException("Incorrect comparison level.");
 392         strength = newStrength;
 393     }
 394 
 395     /**
 396      * Get the decomposition mode of this Collator. Decomposition mode
 397      * determines how Unicode composed characters are handled. Adjusting
 398      * decomposition mode allows the user to select between faster and more
 399      * complete collation behavior.
 400      * <p>The three values for decomposition mode are:
 401      * <UL>
 402      * <LI>NO_DECOMPOSITION,
 403      * <LI>CANONICAL_DECOMPOSITION
 404      * <LI>FULL_DECOMPOSITION.
 405      * </UL>
 406      * See the documentation for these three constants for a description
 407      * of their meaning.
 408      * @return the decomposition mode
 409      * @see java.text.Collator#setDecomposition
 410      * @see java.text.Collator#NO_DECOMPOSITION
 411      * @see java.text.Collator#CANONICAL_DECOMPOSITION
 412      * @see java.text.Collator#FULL_DECOMPOSITION
 413      */
 414     public synchronized int getDecomposition()
 415     {
 416         return decmp;
 417     }
 418     /**
 419      * Set the decomposition mode of this Collator. See getDecomposition
 420      * for a description of decomposition mode.
 421      * @param decompositionMode  the new decomposition mode.
 422      * @see java.text.Collator#getDecomposition
 423      * @see java.text.Collator#NO_DECOMPOSITION
 424      * @see java.text.Collator#CANONICAL_DECOMPOSITION
 425      * @see java.text.Collator#FULL_DECOMPOSITION
 426      * @exception IllegalArgumentException If the given value is not a valid decomposition
 427      * mode.
 428      */
 429     public synchronized void setDecomposition(int decompositionMode) {
 430         if ((decompositionMode != NO_DECOMPOSITION) &&
 431             (decompositionMode != CANONICAL_DECOMPOSITION) &&
 432             (decompositionMode != FULL_DECOMPOSITION))
 433             throw new IllegalArgumentException("Wrong decomposition mode.");
 434         decmp = decompositionMode;
 435     }
 436 
 437     /**
 438      * Returns an array of all locales for which the
 439      * <code>getInstance</code> methods of this class can return
 440      * localized instances.
 441      * The returned array represents the union of locales supported
 442      * by the Java runtime and by installed
 443      * {@link java.text.spi.CollatorProvider CollatorProvider} implementations.
 444      * It must contain at least a Locale instance equal to
 445      * {@link java.util.Locale#US Locale.US}.
 446      *
 447      * @return An array of locales for which localized
 448      *         <code>Collator</code> instances are available.
 449      */
 450     public static synchronized Locale[] getAvailableLocales() {
 451         LocaleServiceProviderPool pool =
 452             LocaleServiceProviderPool.getPool(CollatorProvider.class);
 453         return pool.getAvailableLocales();
 454     }
 455 
 456     /**
 457      * Overrides Cloneable
 458      */
 459     public Object clone()
 460     {
 461         try {
 462             return (Collator)super.clone();
 463         } catch (CloneNotSupportedException e) {
 464             throw new InternalError(e);
 465         }
 466     }
 467 
 468     /**
 469      * Compares the equality of two Collators.
 470      * @param that the Collator to be compared with this.
 471      * @return true if this Collator is the same as that Collator;
 472      * false otherwise.
 473      */
 474     public boolean equals(Object that)
 475     {
 476         if (this == that) return true;
 477         if (that == null) return false;
 478         if (getClass() != that.getClass()) return false;
 479         Collator other = (Collator) that;
 480         return ((strength == other.strength) &&
 481                 (decmp == other.decmp));
 482     }
 483 
 484     /**
 485      * Generates the hash code for this Collator.
 486      */
 487     abstract public int hashCode();
 488 
 489     /**
 490      * Default constructor.  This constructor is
 491      * protected so subclasses can get access to it. Users typically create
 492      * a Collator sub-class by calling the factory method getInstance.
 493      * @see java.text.Collator#getInstance
 494      */
 495     protected Collator()
 496     {
 497         strength = TERTIARY;
 498         decmp = CANONICAL_DECOMPOSITION;
 499     }
 500 
 501     private int strength = 0;
 502     private int decmp = 0;
 503     private static SoftCache cache = new SoftCache();
 504 
 505     //
 506     // FIXME: These three constants should be removed.
 507     //
 508     /**
 509      * LESS is returned if source string is compared to be less than target
 510      * string in the compare() method.
 511      * @see java.text.Collator#compare
 512      */
 513     final static int LESS = -1;
 514     /**
 515      * EQUAL is returned if source string is compared to be equal to target
 516      * string in the compare() method.
 517      * @see java.text.Collator#compare
 518      */
 519     final static int EQUAL = 0;
 520     /**
 521      * GREATER is returned if source string is compared to be greater than
 522      * target string in the compare() method.
 523      * @see java.text.Collator#compare
 524      */
 525     final static int GREATER = 1;
 526 
 527     /**
 528      * Obtains a Collator instance from a CollatorProvider
 529      * implementation.
 530      */
 531     private static class CollatorGetter
 532         implements LocaleServiceProviderPool.LocalizedObjectGetter<CollatorProvider, Collator> {
 533         private static final CollatorGetter INSTANCE = new CollatorGetter();
 534 
 535         public Collator getObject(CollatorProvider collatorProvider,
 536                                 Locale locale,
 537                                 String key,
 538                                 Object... params) {
 539             assert params.length == 1;
 540             Collator result = collatorProvider.getInstance(locale);
 541             if (result != null) {
 542                 // put this Collator instance in the cache for two locales, one
 543                 // is for the desired locale, and the other is for the actual
 544                 // locale where the provider is found, which may be a fall back locale.
 545                 cache.put((Locale)params[0], result);
 546                 cache.put(locale, result);
 547                 return (Collator)result.clone();
 548             }
 549 
 550             return null;
 551         }
 552     }
 553  }