Old src/share/classes/java/text/BreakIterator.java

   1 /*
   2  * Copyright (c) 1996, 2006, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  28  * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
  29  *
  30  * The original version of this source code and documentation
  31  * is copyrighted and owned by Taligent, Inc., a wholly-owned
  32  * subsidiary of IBM. These materials are provided under terms
  33  * of a License Agreement between Taligent and Sun. This technology
  34  * is protected by multiple US and International patents.
  35  *
  36  * This notice and attribution to Taligent may not be removed.
  37  * Taligent is a registered trademark of Taligent, Inc.
  38  *
  39  */
  40 
  41 package java.text;
  42 
  43 import java.lang.ref.SoftReference;
  44 import java.net.URL;
  45 import java.io.InputStream;
  46 import java.io.IOException;
  47 import java.security.AccessController;
  48 import java.security.PrivilegedAction;
  49 import java.text.CharacterIterator;
  50 import java.text.StringCharacterIterator;
  51 import java.text.spi.BreakIteratorProvider;
  52 import java.util.Locale;
  53 import java.util.MissingResourceException;
  54 import java.util.ResourceBundle;
  55 import java.util.spi.LocaleServiceProvider;
  56 import sun.util.LocaleServiceProviderPool;
  57 import sun.util.resources.LocaleData;
  58 
  59 
  60 /**
  61  * The <code>BreakIterator</code> class implements methods for finding
  62  * the location of boundaries in text. Instances of <code>BreakIterator</code>
  63  * maintain a current position and scan over text
  64  * returning the index of characters where boundaries occur.
  65  * Internally, <code>BreakIterator</code> scans text using a
  66  * <code>CharacterIterator</code>, and is thus able to scan text held
  67  * by any object implementing that protocol. A <code>StringCharacterIterator</code>
  68  * is used to scan <code>String</code> objects passed to <code>setText</code>.
  69  *
  70  * <p>
  71  * You use the factory methods provided by this class to create
  72  * instances of various types of break iterators. In particular,
  73  * use <code>getWordInstance</code>, <code>getLineInstance</code>,
  74  * <code>getSentenceInstance</code>, and <code>getCharacterInstance</code>
  75  * to create <code>BreakIterator</code>s that perform
  76  * word, line, sentence, and character boundary analysis respectively.
  77  * A single <code>BreakIterator</code> can work only on one unit
  78  * (word, line, sentence, and so on). You must use a different iterator
  79  * for each unit boundary analysis you wish to perform.
  80  *
  81  * <p><a name="line"></a>
  82  * Line boundary analysis determines where a text string can be
  83  * broken when line-wrapping. The mechanism correctly handles
  84  * punctuation and hyphenated words. Actual line breaking needs
  85  * to also consider the available line width and is handled by
  86  * higher-level software.
  87  *
  88  * <p><a name="sentence"></a>
  89  * Sentence boundary analysis allows selection with correct interpretation
  90  * of periods within numbers and abbreviations, and trailing punctuation
  91  * marks such as quotation marks and parentheses.
  92  *
  93  * <p><a name="word"></a>
  94  * Word boundary analysis is used by search and replace functions, as
  95  * well as within text editing applications that allow the user to
  96  * select words with a double click. Word selection provides correct
  97  * interpretation of punctuation marks within and following
  98  * words. Characters that are not part of a word, such as symbols
  99  * or punctuation marks, have word-breaks on both sides.
 100  *
 101  * <p><a name="character"></a>
 102  * Character boundary analysis allows users to interact with characters
 103  * as they expect to, for example, when moving the cursor through a text
 104  * string. Character boundary analysis provides correct navigation
 105  * through character strings, regardless of how the character is stored.
 106  * The boundaries returned may be those of supplementary characters,
 107  * combining character sequences, or ligature clusters.
 108  * For example, an accented character might be stored as a base character
 109  * and a diacritical mark. What users consider to be a character can
 110  * differ between languages.
 111  *
 112  * <p>
 113  * The <code>BreakIterator</code> instances returned by the factory methods
 114  * of this class are intended for use with natural languages only, not for
 115  * programming language text. It is however possible to define subclasses
 116  * that tokenize a programming language.
 117  *
 118  * <P>
 119  * <strong>Examples</strong>:<P>
 120  * Creating and using text boundaries:
 121  * <blockquote>
 122  * <pre>
 123  * public static void main(String args[]) {
 124  *      if (args.length == 1) {
 125  *          String stringToExamine = args[0];
 126  *          //print each word in order
 127  *          BreakIterator boundary = BreakIterator.getWordInstance();
 128  *          boundary.setText(stringToExamine);
 129  *          printEachForward(boundary, stringToExamine);
 130  *          //print each sentence in reverse order
 131  *          boundary = BreakIterator.getSentenceInstance(Locale.US);
 132  *          boundary.setText(stringToExamine);
 133  *          printEachBackward(boundary, stringToExamine);
 134  *          printFirst(boundary, stringToExamine);
 135  *          printLast(boundary, stringToExamine);
 136  *      }
 137  * }
 138  * </pre>
 139  * </blockquote>
 140  *
 141  * Print each element in order:
 142  * <blockquote>
 143  * <pre>
 144  * public static void printEachForward(BreakIterator boundary, String source) {
 145  *     int start = boundary.first();
 146  *     for (int end = boundary.next();
 147  *          end != BreakIterator.DONE;
 148  *          start = end, end = boundary.next()) {
 149  *          System.out.println(source.substring(start,end));
 150  *     }
 151  * }
 152  * </pre>
 153  * </blockquote>
 154  *
 155  * Print each element in reverse order:
 156  * <blockquote>
 157  * <pre>
 158  * public static void printEachBackward(BreakIterator boundary, String source) {
 159  *     int end = boundary.last();
 160  *     for (int start = boundary.previous();
 161  *          start != BreakIterator.DONE;
 162  *          end = start, start = boundary.previous()) {
 163  *         System.out.println(source.substring(start,end));
 164  *     }
 165  * }
 166  * </pre>
 167  * </blockquote>
 168  *
 169  * Print first element:
 170  * <blockquote>
 171  * <pre>
 172  * public static void printFirst(BreakIterator boundary, String source) {
 173  *     int start = boundary.first();
 174  *     int end = boundary.next();
 175  *     System.out.println(source.substring(start,end));
 176  * }
 177  * </pre>
 178  * </blockquote>
 179  *
 180  * Print last element:
 181  * <blockquote>
 182  * <pre>
 183  * public static void printLast(BreakIterator boundary, String source) {
 184  *     int end = boundary.last();
 185  *     int start = boundary.previous();
 186  *     System.out.println(source.substring(start,end));
 187  * }
 188  * </pre>
 189  * </blockquote>
 190  *
 191  * Print the element at a specified position:
 192  * <blockquote>
 193  * <pre>
 194  * public static void printAt(BreakIterator boundary, int pos, String source) {
 195  *     int end = boundary.following(pos);
 196  *     int start = boundary.previous();
 197  *     System.out.println(source.substring(start,end));
 198  * }
 199  * </pre>
 200  * </blockquote>
 201  *
 202  * Find the next word:
 203  * <blockquote>
 204  * <pre>
 205  * public static int nextWordStartAfter(int pos, String text) {
 206  *     BreakIterator wb = BreakIterator.getWordInstance();
 207  *     wb.setText(text);
 208  *     int last = wb.following(pos);
 209  *     int current = wb.next();
 210  *     while (current != BreakIterator.DONE) {
 211  *         for (int p = last; p < current; p++) {
 212  *             if (Character.isLetter(text.codePointAt(p)))
 213  *                 return last;
 214  *         }
 215  *         last = current;
 216  *         current = wb.next();
 217  *     }
 218  *     return BreakIterator.DONE;
 219  * }
 220  * </pre>
 221  * (The iterator returned by BreakIterator.getWordInstance() is unique in that
 222  * the break positions it returns don't represent both the start and end of the
 223  * thing being iterated over.  That is, a sentence-break iterator returns breaks
 224  * that each represent the end of one sentence and the beginning of the next.
 225  * With the word-break iterator, the characters between two boundaries might be a
 226  * word, or they might be the punctuation or whitespace between two words.  The
 227  * above code uses a simple heuristic to determine which boundary is the beginning
 228  * of a word: If the characters between this boundary and the next boundary
 229  * include at least one letter (this can be an alphabetical letter, a CJK ideograph,
 230  * a Hangul syllable, a Kana character, etc.), then the text between this boundary
 231  * and the next is a word; otherwise, it's the material between words.)
 232  * </blockquote>
 233  *
 234  * @see CharacterIterator
 235  *
 236  */
 237 
 238 public abstract class BreakIterator implements Cloneable
 239 {
 240     /**
 241      * Constructor. BreakIterator is stateless and has no default behavior.
 242      */
 243     protected BreakIterator()
 244     {
 245     }
 246 
 247     /**
 248      * Create a copy of this iterator
 249      * @return A copy of this
 250      */
 251     public Object clone()
 252     {
 253         try {
 254             return super.clone();
 255         }
 256         catch (CloneNotSupportedException e) {
 257             throw new InternalError(e);
 258         }
 259     }
 260 
 261     /**
 262      * DONE is returned by previous(), next(), next(int), preceding(int)
 263      * and following(int) when either the first or last text boundary has been
 264      * reached.
 265      */
 266     public static final int DONE = -1;
 267 
 268     /**
 269      * Returns the first boundary. The iterator's current position is set
 270      * to the first text boundary.
 271      * @return The character index of the first text boundary.
 272      */
 273     public abstract int first();
 274 
 275     /**
 276      * Returns the last boundary. The iterator's current position is set
 277      * to the last text boundary.
 278      * @return The character index of the last text boundary.
 279      */
 280     public abstract int last();
 281 
 282     /**
 283      * Returns the nth boundary from the current boundary. If either
 284      * the first or last text boundary has been reached, it returns
 285      * <code>BreakIterator.DONE</code> and the current position is set to either
 286      * the first or last text boundary depending on which one is reached. Otherwise,
 287      * the iterator's current position is set to the new boundary.
 288      * For example, if the iterator's current position is the mth text boundary
 289      * and three more boundaries exist from the current boundary to the last text
 290      * boundary, the next(2) call will return m + 2. The new text position is set
 291      * to the (m + 2)th text boundary. A next(4) call would return
 292      * <code>BreakIterator.DONE</code> and the last text boundary would become the
 293      * new text position.
 294      * @param n which boundary to return.  A value of 0
 295      * does nothing.  Negative values move to previous boundaries
 296      * and positive values move to later boundaries.
 297      * @return The character index of the nth boundary from the current position
 298      * or <code>BreakIterator.DONE</code> if either first or last text boundary
 299      * has been reached.
 300      */
 301     public abstract int next(int n);
 302 
 303     /**
 304      * Returns the boundary following the current boundary. If the current boundary
 305      * is the last text boundary, it returns <code>BreakIterator.DONE</code> and
 306      * the iterator's current position is unchanged. Otherwise, the iterator's
 307      * current position is set to the boundary following the current boundary.
 308      * @return The character index of the next text boundary or
 309      * <code>BreakIterator.DONE</code> if the current boundary is the last text
 310      * boundary.
 311      * Equivalent to next(1).
 312      * @see #next(int)
 313      */
 314     public abstract int next();
 315 
 316     /**
 317      * Returns the boundary preceding the current boundary. If the current boundary
 318      * is the first text boundary, it returns <code>BreakIterator.DONE</code> and
 319      * the iterator's current position is unchanged. Otherwise, the iterator's
 320      * current position is set to the boundary preceding the current boundary.
 321      * @return The character index of the previous text boundary or
 322      * <code>BreakIterator.DONE</code> if the current boundary is the first text
 323      * boundary.
 324      */
 325     public abstract int previous();
 326 
 327     /**
 328      * Returns the first boundary following the specified character offset. If the
 329      * specified offset equals to the last text boundary, it returns
 330      * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
 331      * Otherwise, the iterator's current position is set to the returned boundary.
 332      * The value returned is always greater than the offset or the value
 333      * <code>BreakIterator.DONE</code>.
 334      * @param offset the character offset to begin scanning.
 335      * @return The first boundary after the specified offset or
 336      * <code>BreakIterator.DONE</code> if the last text boundary is passed in
 337      * as the offset.
 338      * @exception  IllegalArgumentException if the specified offset is less than
 339      * the first text boundary or greater than the last text boundary.
 340      */
 341     public abstract int following(int offset);
 342 
 343     /**
 344      * Returns the last boundary preceding the specified character offset. If the
 345      * specified offset equals to the first text boundary, it returns
 346      * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
 347      * Otherwise, the iterator's current position is set to the returned boundary.
 348      * The value returned is always less than the offset or the value
 349      * <code>BreakIterator.DONE</code>.
 350      * @param offset the characater offset to begin scanning.
 351      * @return The last boundary before the specified offset or
 352      * <code>BreakIterator.DONE</code> if the first text boundary is passed in
 353      * as the offset.
 354      * @exception   IllegalArgumentException if the specified offset is less than
 355      * the first text boundary or greater than the last text boundary.
 356      * @since 1.2
 357      */
 358     public int preceding(int offset) {
 359         // NOTE:  This implementation is here solely because we can't add new
 360         // abstract methods to an existing class.  There is almost ALWAYS a
 361         // better, faster way to do this.
 362         int pos = following(offset);
 363         while (pos >= offset && pos != DONE)
 364             pos = previous();
 365         return pos;
 366     }
 367 
 368     /**
 369      * Returns true if the specified character offset is a text boundary.
 370      * @param offset the character offset to check.
 371      * @return <code>true</code> if "offset" is a boundary position,
 372      * <code>false</code> otherwise.
 373      * @exception   IllegalArgumentException if the specified offset is less than
 374      * the first text boundary or greater than the last text boundary.
 375      * @since 1.2
 376      */
 377     public boolean isBoundary(int offset) {
 378         // NOTE: This implementation probably is wrong for most situations
 379         // because it fails to take into account the possibility that a
 380         // CharacterIterator passed to setText() may not have a begin offset
 381         // of 0.  But since the abstract BreakIterator doesn't have that
 382         // knowledge, it assumes the begin offset is 0.  If you subclass
 383         // BreakIterator, copy the SimpleTextBoundary implementation of this
 384         // function into your subclass.  [This should have been abstract at
 385         // this level, but it's too late to fix that now.]
 386         if (offset == 0) {
 387             return true;
 388         }
 389         int boundary = following(offset - 1);
 390         if (boundary == DONE) {
 391             throw new IllegalArgumentException();
 392         }
 393         return boundary == offset;
 394     }
 395 
 396     /**
 397      * Returns character index of the text boundary that was most
 398      * recently returned by next(), next(int), previous(), first(), last(),
 399      * following(int) or preceding(int). If any of these methods returns
 400      * <code>BreakIterator.DONE</code> because either first or last text boundary
 401      * has been reached, it returns the first or last text boundary depending on
 402      * which one is reached.
 403      * @return The text boundary returned from the above methods, first or last
 404      * text boundary.
 405      * @see #next()
 406      * @see #next(int)
 407      * @see #previous()
 408      * @see #first()
 409      * @see #last()
 410      * @see #following(int)
 411      * @see #preceding(int)
 412      */
 413     public abstract int current();
 414 
 415     /**
 416      * Get the text being scanned
 417      * @return the text being scanned
 418      */
 419     public abstract CharacterIterator getText();
 420 
 421     /**
 422      * Set a new text string to be scanned.  The current scan
 423      * position is reset to first().
 424      * @param newText new text to scan.
 425      */
 426     public void setText(String newText)
 427     {
 428         setText(new StringCharacterIterator(newText));
 429     }
 430 
 431     /**
 432      * Set a new text for scanning.  The current scan
 433      * position is reset to first().
 434      * @param newText new text to scan.
 435      */
 436     public abstract void setText(CharacterIterator newText);
 437 
 438     private static final int CHARACTER_INDEX = 0;
 439     private static final int WORD_INDEX = 1;
 440     private static final int LINE_INDEX = 2;
 441     private static final int SENTENCE_INDEX = 3;
 442 
 443     @SuppressWarnings("unchecked")
 444     private static final SoftReference<BreakIteratorCache>[] iterCache = (SoftReference<BreakIteratorCache>[]) new SoftReference<?>[4];
 445 
 446     /**
 447      * Returns a new <code>BreakIterator</code> instance
 448      * for <a href="BreakIterator.html#word">word breaks</a>
 449      * for the {@linkplain Locale#getDefault() default locale}.
 450      * @return A break iterator for word breaks
 451      */
 452     public static BreakIterator getWordInstance()
 453     {
 454         return getWordInstance(Locale.getDefault());
 455     }
 456 
 457     /**
 458      * Returns a new <code>BreakIterator</code> instance
 459      * for <a href="BreakIterator.html#word">word breaks</a>
 460      * for the given locale.
 461      * @param locale the desired locale
 462      * @return A break iterator for word breaks
 463      * @exception NullPointerException if <code>locale</code> is null
 464      */
 465     public static BreakIterator getWordInstance(Locale locale)
 466     {
 467         return getBreakInstance(locale,
 468                                 WORD_INDEX,
 469                                 "WordData",
 470                                 "WordDictionary");
 471     }
 472 
 473     /**
 474      * Returns a new <code>BreakIterator</code> instance
 475      * for <a href="BreakIterator.html#line">line breaks</a>
 476      * for the {@linkplain Locale#getDefault() default locale}.
 477      * @return A break iterator for line breaks
 478      */
 479     public static BreakIterator getLineInstance()
 480     {
 481         return getLineInstance(Locale.getDefault());
 482     }
 483 
 484     /**
 485      * Returns a new <code>BreakIterator</code> instance
 486      * for <a href="BreakIterator.html#line">line breaks</a>
 487      * for the given locale.
 488      * @param locale the desired locale
 489      * @return A break iterator for line breaks
 490      * @exception NullPointerException if <code>locale</code> is null
 491      */
 492     public static BreakIterator getLineInstance(Locale locale)
 493     {
 494         return getBreakInstance(locale,
 495                                 LINE_INDEX,
 496                                 "LineData",
 497                                 "LineDictionary");
 498     }
 499 
 500     /**
 501      * Returns a new <code>BreakIterator</code> instance
 502      * for <a href="BreakIterator.html#character">character breaks</a>
 503      * for the {@linkplain Locale#getDefault() default locale}.
 504      * @return A break iterator for character breaks
 505      */
 506     public static BreakIterator getCharacterInstance()
 507     {
 508         return getCharacterInstance(Locale.getDefault());
 509     }
 510 
 511     /**
 512      * Returns a new <code>BreakIterator</code> instance
 513      * for <a href="BreakIterator.html#character">character breaks</a>
 514      * for the given locale.
 515      * @param locale the desired locale
 516      * @return A break iterator for character breaks
 517      * @exception NullPointerException if <code>locale</code> is null
 518      */
 519     public static BreakIterator getCharacterInstance(Locale locale)
 520     {
 521         return getBreakInstance(locale,
 522                                 CHARACTER_INDEX,
 523                                 "CharacterData",
 524                                 "CharacterDictionary");
 525     }
 526 
 527     /**
 528      * Returns a new <code>BreakIterator</code> instance
 529      * for <a href="BreakIterator.html#sentence">sentence breaks</a>
 530      * for the {@linkplain Locale#getDefault() default locale}.
 531      * @return A break iterator for sentence breaks
 532      */
 533     public static BreakIterator getSentenceInstance()
 534     {
 535         return getSentenceInstance(Locale.getDefault());
 536     }
 537 
 538     /**
 539      * Returns a new <code>BreakIterator</code> instance
 540      * for <a href="BreakIterator.html#sentence">sentence breaks</a>
 541      * for the given locale.
 542      * @param locale the desired locale
 543      * @return A break iterator for sentence breaks
 544      * @exception NullPointerException if <code>locale</code> is null
 545      */
 546     public static BreakIterator getSentenceInstance(Locale locale)
 547     {
 548         return getBreakInstance(locale,
 549                                 SENTENCE_INDEX,
 550                                 "SentenceData",
 551                                 "SentenceDictionary");
 552     }
 553 
 554     private static BreakIterator getBreakInstance(Locale locale,
 555                                                   int type,
 556                                                   String dataName,
 557                                                   String dictionaryName) {
 558         if (iterCache[type] != null) {
 559             BreakIteratorCache cache = iterCache[type].get();
 560             if (cache != null) {
 561                 if (cache.getLocale().equals(locale)) {
 562                     return cache.createBreakInstance();
 563                 }
 564             }
 565         }
 566 
 567         BreakIterator result = createBreakInstance(locale,
 568                                                    type,
 569                                                    dataName,
 570                                                    dictionaryName);
 571         BreakIteratorCache cache = new BreakIteratorCache(locale, result);
 572         iterCache[type] = new SoftReference<>(cache);
 573         return result;
 574     }
 575 
 576     private static ResourceBundle getBundle(final String baseName, final Locale locale) {
 577          return AccessController.doPrivileged(new PrivilegedAction<ResourceBundle>() {
 578             public ResourceBundle run() {
 579                 return ResourceBundle.getBundle(baseName, locale);
 580             }
 581         });
 582     }
 583 
 584     private static BreakIterator createBreakInstance(Locale locale,
 585                                                      int type,
 586                                                      String dataName,
 587                                                      String dictionaryName) {
 588 
 589         // Check whether a provider can provide an implementation that's closer
 590         // to the requested locale than what the Java runtime itself can provide.
 591         LocaleServiceProviderPool pool =
 592             LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);
 593         if (pool.hasProviders()) {
 594             BreakIterator providersInstance = pool.getLocalizedObject(
 595                                                     BreakIteratorGetter.INSTANCE,
 596                                                     locale, type);
 597             if (providersInstance != null) {
 598                 return providersInstance;
 599             }
 600         }
 601 
 602         ResourceBundle bundle = getBundle(
 603                         "sun.text.resources.BreakIteratorInfo", locale);
 604         String[] classNames = bundle.getStringArray("BreakIteratorClasses");
 605 
 606         String dataFile = bundle.getString(dataName);
 607 
 608         try {
 609             if (classNames[type].equals("RuleBasedBreakIterator")) {
 610                 return new RuleBasedBreakIterator(dataFile);
 611             }
 612             else if (classNames[type].equals("DictionaryBasedBreakIterator")) {
 613                 String dictionaryFile = bundle.getString(dictionaryName);
 614                 return new DictionaryBasedBreakIterator(dataFile, dictionaryFile);
 615             }
 616             else {
 617                 throw new IllegalArgumentException("Invalid break iterator class \"" +
 618                                 classNames[type] + "\"");
 619             }
 620         }
 621         catch (Exception e) {
 622             throw new InternalError(e.toString(), e);
 623         }
 624     }
 625 
 626     /**
 627      * Returns an array of all locales for which the
 628      * <code>get*Instance</code> methods of this class can return
 629      * localized instances.
 630      * The returned array represents the union of locales supported by the Java
 631      * runtime and by installed
 632      * {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations.
 633      * It must contain at least a <code>Locale</code>
 634      * instance equal to {@link java.util.Locale#US Locale.US}.
 635      *
 636      * @return An array of locales for which localized
 637      *         <code>BreakIterator</code> instances are available.
 638      */
 639     public static synchronized Locale[] getAvailableLocales()
 640     {
 641         LocaleServiceProviderPool pool =
 642             LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);
 643         return pool.getAvailableLocales();
 644     }
 645 
 646     private static final class BreakIteratorCache {
 647 
 648         private BreakIterator iter;
 649         private Locale locale;
 650 
 651         BreakIteratorCache(Locale locale, BreakIterator iter) {
 652             this.locale = locale;
 653             this.iter = (BreakIterator) iter.clone();
 654         }
 655 
 656         Locale getLocale() {
 657             return locale;
 658         }
 659 
 660         BreakIterator createBreakInstance() {
 661             return (BreakIterator) iter.clone();
 662         }
 663     }
 664 
 665     static long getLong(byte[] buf, int offset) {
 666         long num = buf[offset]&0xFF;
 667         for (int i = 1; i < 8; i++) {
 668             num = num<<8 | (buf[offset+i]&0xFF);
 669         }
 670         return num;
 671     }
 672 
 673     static int getInt(byte[] buf, int offset) {
 674         int num = buf[offset]&0xFF;
 675         for (int i = 1; i < 4; i++) {
 676             num = num<<8 | (buf[offset+i]&0xFF);
 677         }
 678         return num;
 679     }
 680 
 681     static short getShort(byte[] buf, int offset) {
 682         short num = (short)(buf[offset]&0xFF);
 683         num = (short)(num<<8 | (buf[offset+1]&0xFF));
 684         return num;
 685     }
 686 
 687     /**
 688      * Obtains a BreakIterator instance from a BreakIteratorProvider
 689      * implementation.
 690      */
 691     private static class BreakIteratorGetter
 692         implements LocaleServiceProviderPool.LocalizedObjectGetter<BreakIteratorProvider, BreakIterator> {
 693         private static final BreakIteratorGetter INSTANCE =
 694             new BreakIteratorGetter();
 695 
 696         public BreakIterator getObject(BreakIteratorProvider breakIteratorProvider,
 697                                 Locale locale,
 698                                 String key,
 699                                 Object... params) {
 700             assert params.length == 1;
 701 
 702             switch ((Integer)params[0]) {
 703             case CHARACTER_INDEX:
 704                 return breakIteratorProvider.getCharacterInstance(locale);
 705             case WORD_INDEX:
 706                 return breakIteratorProvider.getWordInstance(locale);
 707             case LINE_INDEX:
 708                 return breakIteratorProvider.getLineInstance(locale);
 709             case SENTENCE_INDEX:
 710                 return breakIteratorProvider.getSentenceInstance(locale);
 711             default:
 712                 assert false : "should not happen";
 713             }
 714             return null;
 715         }
 716     }
 717 }