src/share/classes/sun/util/locale/provider/DictionaryBasedBreakIterator.java

Print this page
rev 5696 : 6336885: RFE: Locale Data Deployment Enhancements
4609153: Provide locale data for Indic locales
5104387: Support for gl_ES locale (galician language)
6337471: desktop/system locale preferences support
7056139: (cal) SPI support for locale-dependent Calendar parameters
7058206: Provide CalendarData SPI for week params and display field value names
7073852: Support multiple scripts for digits and decimal symbols per locale
7079560: [Fmt-Da] Context dependent month names support in SimpleDateFormat
7171324: getAvailableLocales() of locale sensitive services should return the actual availability of locales
7151414: (cal) Support calendar type identification
7168528: LocaleServiceProvider needs to be aware of Locale extensions
7171372: (cal) locale's default Calendar should be created if unknown calendar is specified
Summary: JEP 127: Improve Locale Data Packaging and Adopt Unicode CLDR Data (part 1 w/o packaging changes. by Naoto Sato and Masayoshi Okutsu)

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -36,18 +36,17 @@
  *
  * This notice and attribution to Taligent may not be removed.
  * Taligent is a registered trademark of Taligent, Inc.
  */
 
-package java.text;
+package sun.util.locale.provider;
 
-import java.util.Vector;
-import java.util.Stack;
-import java.util.Hashtable;
-import java.text.CharacterIterator;
-import java.io.InputStream;
 import java.io.IOException;
+import java.text.CharacterIterator;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Stack;
 
 /**
  * A subclass of RuleBasedBreakIterator that adds the ability to use a dictionary
  * to further subdivide ranges of text beyond what is possible using just the
  * state-table-based algorithm.  This is necessary, for example, to handle

@@ -112,11 +111,11 @@
      * @param description Same as the description parameter on RuleBasedBreakIterator,
      * except for the special meaning of "<dictionary>".  This parameter is just
      * passed through to RuleBasedBreakIterator's constructor.
      * @param dictionaryFilename The filename of the dictionary file to use
      */
-    public DictionaryBasedBreakIterator(String dataFile, String dictionaryFile)
+    DictionaryBasedBreakIterator(String dataFile, String dictionaryFile)
                                         throws IOException {
         super(dataFile);
         byte[] tmp = super.getAdditionalData();
         if (tmp != null) {
             prepareCategoryFlags(tmp);

@@ -130,10 +129,11 @@
         for (int i = 0; i < data.length; i++) {
             categoryFlags[i] = (data[i] == (byte)1) ? true : false;
         }
     }
 
+    @Override
     public void setText(CharacterIterator newText) {
         super.setText(newText);
         cachedBreakPositions = null;
         dictionaryCharCount = 0;
         positionInCache = 0;

@@ -142,10 +142,11 @@
     /**
      * Sets the current iteration position to the beginning of the text.
      * (i.e., the CharacterIterator's starting offset).
      * @return The offset of the beginning of the text.
      */
+    @Override
     public int first() {
         cachedBreakPositions = null;
         dictionaryCharCount = 0;
         positionInCache = 0;
         return super.first();

@@ -154,10 +155,11 @@
     /**
      * Sets the current iteration position to the end of the text.
      * (i.e., the CharacterIterator's ending offset).
      * @return The text's past-the-end offset.
      */
+    @Override
     public int last() {
         cachedBreakPositions = null;
         dictionaryCharCount = 0;
         positionInCache = 0;
         return super.last();

@@ -166,10 +168,11 @@
     /**
      * Advances the iterator one step backwards.
      * @return The position of the last boundary position before the
      * current iteration position
      */
+    @Override
     public int previous() {
         CharacterIterator text = getText();
 
         // if we have cached break positions and we're still in the range
         // covered by them, just move one step backward in the cache

@@ -196,10 +199,11 @@
      * Sets the current iteration position to the last boundary position
      * before the specified position.
      * @param offset The position to begin searching from
      * @return The position of the last boundary before "offset"
      */
+    @Override
     public int preceding(int offset) {
         CharacterIterator text = getText();
         checkOffset(offset, text);
 
         // if we have no cached break positions, or "offset" is outside the

@@ -231,10 +235,11 @@
      * Sets the current iteration position to the first boundary position after
      * the specified position.
      * @param offset The position to begin searching forward from
      * @return The position of the first boundary after "offset"
      */
+    @Override
     public int following(int offset) {
         CharacterIterator text = getText();
         checkOffset(offset, text);
 
         // if we have no cached break positions, or if "offset" is outside the

@@ -262,10 +267,11 @@
     }
 
     /**
      * This is the implementation function for next().
      */
+    @Override
     protected int handleNext() {
         CharacterIterator text = getText();
 
         // if there are no cached break positions, or if we've just moved
         // off the end of the range covered by the cache, we have to dump

@@ -307,10 +313,11 @@
     }
 
     /**
      * Looks up a character category for a character.
      */
+    @Override
     protected int lookupCategory(int c) {
         // this override of lookupCategory() exists only to keep track of whether we've
         // passed over any dictionary characters.  It calls the inherited lookupCategory()
         // to do the real work, and then checks whether its return value is one of the
         // categories represented in the dictionary.  If it is, bump the dictionary-

@@ -328,10 +335,11 @@
      * dictionary to determine the positions of any boundaries in this
      * range.  It stores all the boundary positions it discovers in
      * cachedBreakPositions so that we only have to do this work once
      * for each time we enter the range.
      */
+    @SuppressWarnings("unchecked")
     private void divideUpDictionaryRange(int startPos, int endPos) {
         CharacterIterator text = getText();
 
         // the range we're dividing may begin or end with non-dictionary characters
         // (i.e., for line breaking, we may have leading or trailing punctuation

@@ -356,11 +364,11 @@
         // continues in this way until we either successfully make it all the way
         // across the range, or exhaust all of our combinations of break
         // positions.)
         Stack<Integer> currentBreakPositions = new Stack<>();
         Stack<Integer> possibleBreakPositions = new Stack<>();
-        Vector<Integer> wrongBreakPositions = new Vector<>();
+        List<Integer> wrongBreakPositions = new ArrayList<>();
 
         // the dictionary is implemented as a trie, which is treated as a state
         // machine.  -1 represents the end of a legal word.  Every word in the
         // dictionary is represented by a path from the root node to -1.  A path
         // that ends in state 0 is an illegal combination of characters.

@@ -382,11 +390,11 @@
 
             // if we can transition to state "-1" from our current state, we're
             // on the last character of a legal word.  Push that position onto
             // the possible-break-positions stack
             if (dictionary.getNextState(state, 0) == -1) {
-                possibleBreakPositions.push(Integer.valueOf(text.getIndex()));
+                possibleBreakPositions.push(text.getIndex());
             }
 
             // look up the new state to transition to in the dictionary
             state = dictionary.getNextStateFromCharacter(state, c);
 

@@ -393,11 +401,11 @@
             // if the character we're sitting on causes us to transition to
             // the "end of word" state, then it was a non-dictionary character
             // and we've successfully traversed the whole range.  Drop out
             // of the loop.
             if (state == -1) {
-                currentBreakPositions.push(Integer.valueOf(text.getIndex()));
+                currentBreakPositions.push(text.getIndex());
                 break;
             }
 
             // if the character we're sitting on causes us to transition to
             // the error state, or if we've gone off the end of the range

@@ -417,23 +425,22 @@
                 }
 
                 // wrongBreakPositions is a list of all break positions
                 // we've tried starting that didn't allow us to traverse
                 // all the way through the text.  Every time we pop a
-                //break position off of currentBreakPositions, we put it
+                // break position off of currentBreakPositions, we put it
                 // into wrongBreakPositions to avoid trying it again later.
                 // If we make it to this spot, we're either going to back
                 // up to a break in possibleBreakPositions and try starting
                 // over from there, or we've exhausted all possible break
                 // positions and are going to do the fallback procedure.
                 // This loop prevents us from messing with anything in
                 // possibleBreakPositions that didn't work as a starting
                 // point the last time we tried it (this is to prevent a bunch of
                 // repetitive checks from slowing down some extreme cases)
-                Integer newStartingSpot = null;
-                while (!possibleBreakPositions.isEmpty() && wrongBreakPositions.contains(
-                            possibleBreakPositions.peek())) {
+                while (!possibleBreakPositions.isEmpty()
+                        && wrongBreakPositions.contains(possibleBreakPositions.peek())) {
                     possibleBreakPositions.pop();
                 }
 
                 // if we've used up all possible break-position combinations, there's
                 // an error or an unknown word in the text.  In this case, we start

@@ -470,11 +477,11 @@
                     Integer temp = possibleBreakPositions.pop();
                     Integer temp2 = null;
                     while (!currentBreakPositions.isEmpty() && temp.intValue() <
                            currentBreakPositions.peek().intValue()) {
                         temp2 = currentBreakPositions.pop();
-                        wrongBreakPositions.addElement(temp2);
+                        wrongBreakPositions.add(temp2);
                     }
                     currentBreakPositions.push(temp);
                     text.setIndex(currentBreakPositions.peek().intValue());
                 }
 

@@ -498,11 +505,11 @@
         // because the range actually ended with non-dictionary characters we want to
         // keep with the word)
         if (!currentBreakPositions.isEmpty()) {
             currentBreakPositions.pop();
         }
-        currentBreakPositions.push(Integer.valueOf(endPos));
+        currentBreakPositions.push(endPos);
 
         // create a regular array to hold the break positions and copy
         // the break positions from the stack to the array (in addition,
         // our starting position goes into this array as a break position).
         // This array becomes the cache of break positions used by next()