24 */
25
26 /*
27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28 * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
29 *
30 * The original version of this source code and documentation
31 * is copyrighted and owned by Taligent, Inc., a wholly-owned
32 * subsidiary of IBM. These materials are provided under terms
33 * of a License Agreement between Taligent and Sun. This technology
34 * is protected by multiple US and International patents.
35 *
36 * This notice and attribution to Taligent may not be removed.
37 * Taligent is a registered trademark of Taligent, Inc.
38 *
39 */
40
41 package java.text;
42
43 import java.lang.ref.SoftReference;
44 import java.net.URL;
45 import java.io.InputStream;
46 import java.io.IOException;
47 import java.security.AccessController;
48 import java.security.PrivilegedAction;
49 import java.text.CharacterIterator;
50 import java.text.StringCharacterIterator;
51 import java.text.spi.BreakIteratorProvider;
52 import java.util.Locale;
53 import java.util.MissingResourceException;
54 import java.util.ResourceBundle;
55 import java.util.spi.LocaleServiceProvider;
56 import sun.util.LocaleServiceProviderPool;
57 import sun.util.resources.LocaleData;
58
59
60 /**
61 * The <code>BreakIterator</code> class implements methods for finding
62 * the location of boundaries in text. Instances of <code>BreakIterator</code>
63 * maintain a current position and scan over text
64 * returning the index of characters where boundaries occur.
65 * Internally, <code>BreakIterator</code> scans text using a
66 * <code>CharacterIterator</code>, and is thus able to scan text held
67 * by any object implementing that protocol. A <code>StringCharacterIterator</code>
68 * is used to scan <code>String</code> objects passed to <code>setText</code>.
69 *
70 * <p>
71 * You use the factory methods provided by this class to create
72 * instances of various types of break iterators. In particular,
73 * use <code>getWordInstance</code>, <code>getLineInstance</code>,
74 * <code>getSentenceInstance</code>, and <code>getCharacterInstance</code>
75 * to create <code>BreakIterator</code>s that perform
76 * word, line, sentence, and character boundary analysis respectively.
77 * A single <code>BreakIterator</code> can work only on one unit
231 * and the next is a word; otherwise, it's the material between words.)
232 * </blockquote>
233 *
234 * @see CharacterIterator
235 *
236 */
237
238 public abstract class BreakIterator implements Cloneable
239 {
240 /**
241 * Constructor. BreakIterator is stateless and has no default behavior.
242 */
243 protected BreakIterator()
244 {
245 }
246
247 /**
248 * Create a copy of this iterator
249 * @return A copy of this
250 */
251 public Object clone()
252 {
253 try {
254 return super.clone();
255 }
256 catch (CloneNotSupportedException e) {
257 throw new InternalError(e);
258 }
259 }
260
261 /**
262 * DONE is returned by previous(), next(), next(int), preceding(int)
263 * and following(int) when either the first or last text boundary has been
264 * reached.
265 */
266 public static final int DONE = -1;
267
268 /**
269 * Returns the first boundary. The iterator's current position is set
270 * to the first text boundary.
343 /**
344 * Returns the last boundary preceding the specified character offset. If the
345 * specified offset equals to the first text boundary, it returns
346 * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
347 * Otherwise, the iterator's current position is set to the returned boundary.
348 * The value returned is always less than the offset or the value
349 * <code>BreakIterator.DONE</code>.
350 * @param offset the characater offset to begin scanning.
351 * @return The last boundary before the specified offset or
352 * <code>BreakIterator.DONE</code> if the first text boundary is passed in
353 * as the offset.
354 * @exception IllegalArgumentException if the specified offset is less than
355 * the first text boundary or greater than the last text boundary.
356 * @since 1.2
357 */
358 public int preceding(int offset) {
359 // NOTE: This implementation is here solely because we can't add new
360 // abstract methods to an existing class. There is almost ALWAYS a
361 // better, faster way to do this.
362 int pos = following(offset);
363 while (pos >= offset && pos != DONE)
364 pos = previous();
365 return pos;
366 }
367
368 /**
369 * Returns true if the specified character offset is a text boundary.
370 * @param offset the character offset to check.
371 * @return <code>true</code> if "offset" is a boundary position,
372 * <code>false</code> otherwise.
373 * @exception IllegalArgumentException if the specified offset is less than
374 * the first text boundary or greater than the last text boundary.
375 * @since 1.2
376 */
377 public boolean isBoundary(int offset) {
378 // NOTE: This implementation probably is wrong for most situations
379 // because it fails to take into account the possibility that a
380 // CharacterIterator passed to setText() may not have a begin offset
381 // of 0. But since the abstract BreakIterator doesn't have that
382 // knowledge, it assumes the begin offset is 0. If you subclass
383 // BreakIterator, copy the SimpleTextBoundary implementation of this
384 // function into your subclass. [This should have been abstract at
447 * Returns a new <code>BreakIterator</code> instance
448 * for <a href="BreakIterator.html#word">word breaks</a>
449 * for the {@linkplain Locale#getDefault() default locale}.
450 * @return A break iterator for word breaks
451 */
452 public static BreakIterator getWordInstance()
453 {
454 return getWordInstance(Locale.getDefault());
455 }
456
457 /**
458 * Returns a new <code>BreakIterator</code> instance
459 * for <a href="BreakIterator.html#word">word breaks</a>
460 * for the given locale.
461 * @param locale the desired locale
462 * @return A break iterator for word breaks
463 * @exception NullPointerException if <code>locale</code> is null
464 */
465 public static BreakIterator getWordInstance(Locale locale)
466 {
467 return getBreakInstance(locale,
468 WORD_INDEX,
469 "WordData",
470 "WordDictionary");
471 }
472
473 /**
474 * Returns a new <code>BreakIterator</code> instance
475 * for <a href="BreakIterator.html#line">line breaks</a>
476 * for the {@linkplain Locale#getDefault() default locale}.
477 * @return A break iterator for line breaks
478 */
479 public static BreakIterator getLineInstance()
480 {
481 return getLineInstance(Locale.getDefault());
482 }
483
484 /**
485 * Returns a new <code>BreakIterator</code> instance
486 * for <a href="BreakIterator.html#line">line breaks</a>
487 * for the given locale.
488 * @param locale the desired locale
489 * @return A break iterator for line breaks
490 * @exception NullPointerException if <code>locale</code> is null
491 */
492 public static BreakIterator getLineInstance(Locale locale)
493 {
494 return getBreakInstance(locale,
495 LINE_INDEX,
496 "LineData",
497 "LineDictionary");
498 }
499
500 /**
501 * Returns a new <code>BreakIterator</code> instance
502 * for <a href="BreakIterator.html#character">character breaks</a>
503 * for the {@linkplain Locale#getDefault() default locale}.
504 * @return A break iterator for character breaks
505 */
506 public static BreakIterator getCharacterInstance()
507 {
508 return getCharacterInstance(Locale.getDefault());
509 }
510
511 /**
512 * Returns a new <code>BreakIterator</code> instance
513 * for <a href="BreakIterator.html#character">character breaks</a>
514 * for the given locale.
515 * @param locale the desired locale
516 * @return A break iterator for character breaks
517 * @exception NullPointerException if <code>locale</code> is null
518 */
519 public static BreakIterator getCharacterInstance(Locale locale)
520 {
521 return getBreakInstance(locale,
522 CHARACTER_INDEX,
523 "CharacterData",
524 "CharacterDictionary");
525 }
526
527 /**
528 * Returns a new <code>BreakIterator</code> instance
529 * for <a href="BreakIterator.html#sentence">sentence breaks</a>
530 * for the {@linkplain Locale#getDefault() default locale}.
531 * @return A break iterator for sentence breaks
532 */
533 public static BreakIterator getSentenceInstance()
534 {
535 return getSentenceInstance(Locale.getDefault());
536 }
537
538 /**
539 * Returns a new <code>BreakIterator</code> instance
540 * for <a href="BreakIterator.html#sentence">sentence breaks</a>
541 * for the given locale.
542 * @param locale the desired locale
543 * @return A break iterator for sentence breaks
544 * @exception NullPointerException if <code>locale</code> is null
545 */
546 public static BreakIterator getSentenceInstance(Locale locale)
547 {
548 return getBreakInstance(locale,
549 SENTENCE_INDEX,
550 "SentenceData",
551 "SentenceDictionary");
552 }
553
554 private static BreakIterator getBreakInstance(Locale locale,
555 int type,
556 String dataName,
557 String dictionaryName) {
558 if (iterCache[type] != null) {
559 BreakIteratorCache cache = iterCache[type].get();
560 if (cache != null) {
561 if (cache.getLocale().equals(locale)) {
562 return cache.createBreakInstance();
563 }
564 }
565 }
566
567 BreakIterator result = createBreakInstance(locale,
568 type,
569 dataName,
570 dictionaryName);
571 BreakIteratorCache cache = new BreakIteratorCache(locale, result);
572 iterCache[type] = new SoftReference<>(cache);
573 return result;
574 }
575
576 private static ResourceBundle getBundle(final String baseName, final Locale locale) {
577 return AccessController.doPrivileged(new PrivilegedAction<ResourceBundle>() {
578 public ResourceBundle run() {
579 return ResourceBundle.getBundle(baseName, locale);
580 }
581 });
582 }
583
584 private static BreakIterator createBreakInstance(Locale locale,
585 int type,
586 String dataName,
587 String dictionaryName) {
588
589 // Check whether a provider can provide an implementation that's closer
590 // to the requested locale than what the Java runtime itself can provide.
591 LocaleServiceProviderPool pool =
592 LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);
593 if (pool.hasProviders()) {
594 BreakIterator providersInstance = pool.getLocalizedObject(
595 BreakIteratorGetter.INSTANCE,
596 locale, type);
597 if (providersInstance != null) {
598 return providersInstance;
599 }
600 }
601
602 ResourceBundle bundle = getBundle(
603 "sun.text.resources.BreakIteratorInfo", locale);
604 String[] classNames = bundle.getStringArray("BreakIteratorClasses");
605
606 String dataFile = bundle.getString(dataName);
607
608 try {
609 if (classNames[type].equals("RuleBasedBreakIterator")) {
610 return new RuleBasedBreakIterator(dataFile);
611 }
612 else if (classNames[type].equals("DictionaryBasedBreakIterator")) {
613 String dictionaryFile = bundle.getString(dictionaryName);
614 return new DictionaryBasedBreakIterator(dataFile, dictionaryFile);
615 }
616 else {
617 throw new IllegalArgumentException("Invalid break iterator class \"" +
618 classNames[type] + "\"");
619 }
620 }
621 catch (Exception e) {
622 throw new InternalError(e.toString(), e);
623 }
624 }
625
626 /**
627 * Returns an array of all locales for which the
628 * <code>get*Instance</code> methods of this class can return
629 * localized instances.
630 * The returned array represents the union of locales supported by the Java
631 * runtime and by installed
632 * {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations.
633 * It must contain at least a <code>Locale</code>
634 * instance equal to {@link java.util.Locale#US Locale.US}.
635 *
636 * @return An array of locales for which localized
637 * <code>BreakIterator</code> instances are available.
638 */
639 public static synchronized Locale[] getAvailableLocales()
640 {
641 LocaleServiceProviderPool pool =
642 LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);
643 return pool.getAvailableLocales();
644 }
645
646 private static final class BreakIteratorCache {
647
648 private BreakIterator iter;
649 private Locale locale;
650
651 BreakIteratorCache(Locale locale, BreakIterator iter) {
652 this.locale = locale;
653 this.iter = (BreakIterator) iter.clone();
654 }
655
656 Locale getLocale() {
657 return locale;
658 }
659
660 BreakIterator createBreakInstance() {
661 return (BreakIterator) iter.clone();
662 }
663 }
664
665 static long getLong(byte[] buf, int offset) {
666 long num = buf[offset]&0xFF;
667 for (int i = 1; i < 8; i++) {
668 num = num<<8 | (buf[offset+i]&0xFF);
669 }
670 return num;
671 }
672
673 static int getInt(byte[] buf, int offset) {
674 int num = buf[offset]&0xFF;
675 for (int i = 1; i < 4; i++) {
676 num = num<<8 | (buf[offset+i]&0xFF);
677 }
678 return num;
679 }
680
681 static short getShort(byte[] buf, int offset) {
682 short num = (short)(buf[offset]&0xFF);
683 num = (short)(num<<8 | (buf[offset+1]&0xFF));
684 return num;
685 }
686
687 /**
688 * Obtains a BreakIterator instance from a BreakIteratorProvider
689 * implementation.
690 */
691 private static class BreakIteratorGetter
692 implements LocaleServiceProviderPool.LocalizedObjectGetter<BreakIteratorProvider, BreakIterator> {
693 private static final BreakIteratorGetter INSTANCE =
694 new BreakIteratorGetter();
695
696 public BreakIterator getObject(BreakIteratorProvider breakIteratorProvider,
697 Locale locale,
698 String key,
699 Object... params) {
700 assert params.length == 1;
701
702 switch ((Integer)params[0]) {
703 case CHARACTER_INDEX:
704 return breakIteratorProvider.getCharacterInstance(locale);
705 case WORD_INDEX:
706 return breakIteratorProvider.getWordInstance(locale);
707 case LINE_INDEX:
708 return breakIteratorProvider.getLineInstance(locale);
709 case SENTENCE_INDEX:
710 return breakIteratorProvider.getSentenceInstance(locale);
711 default:
712 assert false : "should not happen";
713 }
714 return null;
715 }
716 }
717 }
|
24 */
25
26 /*
27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28 * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
29 *
30 * The original version of this source code and documentation
31 * is copyrighted and owned by Taligent, Inc., a wholly-owned
32 * subsidiary of IBM. These materials are provided under terms
33 * of a License Agreement between Taligent and Sun. This technology
34 * is protected by multiple US and International patents.
35 *
36 * This notice and attribution to Taligent may not be removed.
37 * Taligent is a registered trademark of Taligent, Inc.
38 *
39 */
40
41 package java.text;
42
43 import java.lang.ref.SoftReference;
44 import java.text.spi.BreakIteratorProvider;
45 import java.util.Locale;
46 import sun.util.locale.provider.LocaleProviderAdapter;
47 import sun.util.locale.provider.LocaleServiceProviderPool;
48
49
50 /**
51 * The <code>BreakIterator</code> class implements methods for finding
52 * the location of boundaries in text. Instances of <code>BreakIterator</code>
53 * maintain a current position and scan over text
54 * returning the index of characters where boundaries occur.
55 * Internally, <code>BreakIterator</code> scans text using a
56 * <code>CharacterIterator</code>, and is thus able to scan text held
57 * by any object implementing that protocol. A <code>StringCharacterIterator</code>
58 * is used to scan <code>String</code> objects passed to <code>setText</code>.
59 *
60 * <p>
61 * You use the factory methods provided by this class to create
62 * instances of various types of break iterators. In particular,
63 * use <code>getWordInstance</code>, <code>getLineInstance</code>,
64 * <code>getSentenceInstance</code>, and <code>getCharacterInstance</code>
65 * to create <code>BreakIterator</code>s that perform
66 * word, line, sentence, and character boundary analysis respectively.
67 * A single <code>BreakIterator</code> can work only on one unit
221 * and the next is a word; otherwise, it's the material between words.)
222 * </blockquote>
223 *
224 * @see CharacterIterator
225 *
226 */
227
228 public abstract class BreakIterator implements Cloneable
229 {
230 /**
231 * Constructor. BreakIterator is stateless and has no default behavior.
232 */
233 protected BreakIterator()
234 {
235 }
236
237 /**
238 * Create a copy of this iterator
239 * @return A copy of this
240 */
241 @Override
242 public Object clone()
243 {
244 try {
245 return super.clone();
246 }
247 catch (CloneNotSupportedException e) {
248 throw new InternalError(e);
249 }
250 }
251
252 /**
253 * DONE is returned by previous(), next(), next(int), preceding(int)
254 * and following(int) when either the first or last text boundary has been
255 * reached.
256 */
257 public static final int DONE = -1;
258
259 /**
260 * Returns the first boundary. The iterator's current position is set
261 * to the first text boundary.
334 /**
335 * Returns the last boundary preceding the specified character offset. If the
336 * specified offset equals to the first text boundary, it returns
337 * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
338 * Otherwise, the iterator's current position is set to the returned boundary.
339 * The value returned is always less than the offset or the value
340 * <code>BreakIterator.DONE</code>.
341 * @param offset the characater offset to begin scanning.
342 * @return The last boundary before the specified offset or
343 * <code>BreakIterator.DONE</code> if the first text boundary is passed in
344 * as the offset.
345 * @exception IllegalArgumentException if the specified offset is less than
346 * the first text boundary or greater than the last text boundary.
347 * @since 1.2
348 */
349 public int preceding(int offset) {
350 // NOTE: This implementation is here solely because we can't add new
351 // abstract methods to an existing class. There is almost ALWAYS a
352 // better, faster way to do this.
353 int pos = following(offset);
354 while (pos >= offset && pos != DONE) {
355 pos = previous();
356 }
357 return pos;
358 }
359
360 /**
361 * Returns true if the specified character offset is a text boundary.
362 * @param offset the character offset to check.
363 * @return <code>true</code> if "offset" is a boundary position,
364 * <code>false</code> otherwise.
365 * @exception IllegalArgumentException if the specified offset is less than
366 * the first text boundary or greater than the last text boundary.
367 * @since 1.2
368 */
369 public boolean isBoundary(int offset) {
370 // NOTE: This implementation probably is wrong for most situations
371 // because it fails to take into account the possibility that a
372 // CharacterIterator passed to setText() may not have a begin offset
373 // of 0. But since the abstract BreakIterator doesn't have that
374 // knowledge, it assumes the begin offset is 0. If you subclass
375 // BreakIterator, copy the SimpleTextBoundary implementation of this
376 // function into your subclass. [This should have been abstract at
439 * Returns a new <code>BreakIterator</code> instance
440 * for <a href="BreakIterator.html#word">word breaks</a>
441 * for the {@linkplain Locale#getDefault() default locale}.
442 * @return A break iterator for word breaks
443 */
444 public static BreakIterator getWordInstance()
445 {
446 return getWordInstance(Locale.getDefault());
447 }
448
449 /**
450 * Returns a new <code>BreakIterator</code> instance
451 * for <a href="BreakIterator.html#word">word breaks</a>
452 * for the given locale.
453 * @param locale the desired locale
454 * @return A break iterator for word breaks
455 * @exception NullPointerException if <code>locale</code> is null
456 */
457 public static BreakIterator getWordInstance(Locale locale)
458 {
459 return getBreakInstance(locale, WORD_INDEX);
460 }
461
462 /**
463 * Returns a new <code>BreakIterator</code> instance
464 * for <a href="BreakIterator.html#line">line breaks</a>
465 * for the {@linkplain Locale#getDefault() default locale}.
466 * @return A break iterator for line breaks
467 */
468 public static BreakIterator getLineInstance()
469 {
470 return getLineInstance(Locale.getDefault());
471 }
472
473 /**
474 * Returns a new <code>BreakIterator</code> instance
475 * for <a href="BreakIterator.html#line">line breaks</a>
476 * for the given locale.
477 * @param locale the desired locale
478 * @return A break iterator for line breaks
479 * @exception NullPointerException if <code>locale</code> is null
480 */
481 public static BreakIterator getLineInstance(Locale locale)
482 {
483 return getBreakInstance(locale, LINE_INDEX);
484 }
485
486 /**
487 * Returns a new <code>BreakIterator</code> instance
488 * for <a href="BreakIterator.html#character">character breaks</a>
489 * for the {@linkplain Locale#getDefault() default locale}.
490 * @return A break iterator for character breaks
491 */
492 public static BreakIterator getCharacterInstance()
493 {
494 return getCharacterInstance(Locale.getDefault());
495 }
496
497 /**
498 * Returns a new <code>BreakIterator</code> instance
499 * for <a href="BreakIterator.html#character">character breaks</a>
500 * for the given locale.
501 * @param locale the desired locale
502 * @return A break iterator for character breaks
503 * @exception NullPointerException if <code>locale</code> is null
504 */
505 public static BreakIterator getCharacterInstance(Locale locale)
506 {
507 return getBreakInstance(locale, CHARACTER_INDEX);
508 }
509
510 /**
511 * Returns a new <code>BreakIterator</code> instance
512 * for <a href="BreakIterator.html#sentence">sentence breaks</a>
513 * for the {@linkplain Locale#getDefault() default locale}.
514 * @return A break iterator for sentence breaks
515 */
516 public static BreakIterator getSentenceInstance()
517 {
518 return getSentenceInstance(Locale.getDefault());
519 }
520
521 /**
522 * Returns a new <code>BreakIterator</code> instance
523 * for <a href="BreakIterator.html#sentence">sentence breaks</a>
524 * for the given locale.
525 * @param locale the desired locale
526 * @return A break iterator for sentence breaks
527 * @exception NullPointerException if <code>locale</code> is null
528 */
529 public static BreakIterator getSentenceInstance(Locale locale)
530 {
531 return getBreakInstance(locale, SENTENCE_INDEX);
532 }
533
534 private static BreakIterator getBreakInstance(Locale locale, int type) {
535 if (iterCache[type] != null) {
536 BreakIteratorCache cache = iterCache[type].get();
537 if (cache != null) {
538 if (cache.getLocale().equals(locale)) {
539 return cache.createBreakInstance();
540 }
541 }
542 }
543
544 BreakIterator result = createBreakInstance(locale, type);
545 BreakIteratorCache cache = new BreakIteratorCache(locale, result);
546 iterCache[type] = new SoftReference<>(cache);
547 return result;
548 }
549
550 private static BreakIterator createBreakInstance(Locale locale,
551 int type) {
552 LocaleProviderAdapter adapter = LocaleProviderAdapter.getAdapter(BreakIteratorProvider.class, locale);
553 BreakIterator iterator = createBreakInstance(adapter, locale, type);
554 if (iterator == null) {
555 iterator = createBreakInstance(LocaleProviderAdapter.forJRE(), locale, type);
556 }
557 return iterator;
558 }
559
560 private static BreakIterator createBreakInstance(LocaleProviderAdapter adapter, Locale locale, int type) {
561 BreakIteratorProvider breakIteratorProvider = adapter.getBreakIteratorProvider();
562 BreakIterator iterator = null;
563 switch (type) {
564 case CHARACTER_INDEX:
565 iterator = breakIteratorProvider.getCharacterInstance(locale);
566 break;
567 case WORD_INDEX:
568 iterator = breakIteratorProvider.getWordInstance(locale);
569 break;
570 case LINE_INDEX:
571 iterator = breakIteratorProvider.getLineInstance(locale);
572 break;
573 case SENTENCE_INDEX:
574 iterator = breakIteratorProvider.getSentenceInstance(locale);
575 break;
576 }
577 return iterator;
578 }
579
580 /**
581 * Returns an array of all locales for which the
582 * <code>get*Instance</code> methods of this class can return
583 * localized instances.
584 * The returned array represents the union of locales supported by the Java
585 * runtime and by installed
586 * {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations.
587 * It must contain at least a <code>Locale</code>
588 * instance equal to {@link java.util.Locale#US Locale.US}.
589 *
590 * @return An array of locales for which localized
591 * <code>BreakIterator</code> instances are available.
592 */
593 public static synchronized Locale[] getAvailableLocales()
594 {
595 LocaleServiceProviderPool pool =
596 LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);
597 return pool.getAvailableLocales();
598 }
599
600 private static final class BreakIteratorCache {
601
602 private BreakIterator iter;
603 private Locale locale;
604
605 BreakIteratorCache(Locale locale, BreakIterator iter) {
606 this.locale = locale;
607 this.iter = (BreakIterator) iter.clone();
608 }
609
610 Locale getLocale() {
611 return locale;
612 }
613
614 BreakIterator createBreakInstance() {
615 return (BreakIterator) iter.clone();
616 }
617 }
618 }
|