jdk Cdiff src/share/classes/sun/util/locale/provider/DictionaryBasedBreakIterator.java

src/share/classes/sun/util/locale/provider/DictionaryBasedBreakIterator.java

rev 5615 : 6336885: RFE: Locale Data Deployment Enhancements
4609153: Provide locale data for Indic locales
5104387: Support for gl_ES locale (galician language)
6337471: desktop/system locale preferences support
7056139: (cal) SPI support for locale-dependent Calendar parameters
7058206: Provide CalendarData SPI for week params and display field value names
7073852: Support multiple scripts for digits and decimal symbols per locale
7079560: [Fmt-Da] Context dependent month names support in SimpleDateFormat
7171324: getAvailableLocales() of locale sensitive services should return the actual availability of locales
7151414: (cal) Support calendar type identification
7168528: LocaleServiceProvider needs to be aware of Locale extensions
7171372: (cal) locale's default Calendar should be created if unknown calendar is specified
Summary: JEP 127: Improve Locale Data Packaging and Adopt Unicode CLDR Data (part 1 w/o Jigsaw. by Naoto Sato and Masayoshi Okutsu)


*** 1,7 ****
  /*
!  * Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.  Oracle designates this
--- 1,7 ----
  /*
!  * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.  Oracle designates this
*** 36,53 ****
   *
   * This notice and attribution to Taligent may not be removed.
   * Taligent is a registered trademark of Taligent, Inc.
   */
  
! package java.text;
  
- import java.util.Vector;
- import java.util.Stack;
- import java.util.Hashtable;
- import java.text.CharacterIterator;
- import java.io.InputStream;
  import java.io.IOException;
  
  /**
   * A subclass of RuleBasedBreakIterator that adds the ability to use a dictionary
   * to further subdivide ranges of text beyond what is possible using just the
   * state-table-based algorithm.  This is necessary, for example, to handle
--- 36,52 ----
   *
   * This notice and attribution to Taligent may not be removed.
   * Taligent is a registered trademark of Taligent, Inc.
   */
  
! package sun.util.locale.provider;
  
  import java.io.IOException;
+ import java.text.CharacterIterator;
+ import java.util.ArrayList;
+ import java.util.List;
+ import java.util.Stack;
  
  /**
   * A subclass of RuleBasedBreakIterator that adds the ability to use a dictionary
   * to further subdivide ranges of text beyond what is possible using just the
   * state-table-based algorithm.  This is necessary, for example, to handle
*** 112,122 ****
       * @param description Same as the description parameter on RuleBasedBreakIterator,
       * except for the special meaning of "<dictionary>".  This parameter is just
       * passed through to RuleBasedBreakIterator's constructor.
       * @param dictionaryFilename The filename of the dictionary file to use
       */
!     public DictionaryBasedBreakIterator(String dataFile, String dictionaryFile)
                                          throws IOException {
          super(dataFile);
          byte[] tmp = super.getAdditionalData();
          if (tmp != null) {
              prepareCategoryFlags(tmp);
--- 111,121 ----
       * @param description Same as the description parameter on RuleBasedBreakIterator,
       * except for the special meaning of "<dictionary>".  This parameter is just
       * passed through to RuleBasedBreakIterator's constructor.
       * @param dictionaryFilename The filename of the dictionary file to use
       */
!     DictionaryBasedBreakIterator(String dataFile, String dictionaryFile)
                                          throws IOException {
          super(dataFile);
          byte[] tmp = super.getAdditionalData();
          if (tmp != null) {
              prepareCategoryFlags(tmp);
*** 130,139 ****
--- 129,139 ----
          for (int i = 0; i < data.length; i++) {
              categoryFlags[i] = (data[i] == (byte)1) ? true : false;
          }
      }
  
+     @Override
      public void setText(CharacterIterator newText) {
          super.setText(newText);
          cachedBreakPositions = null;
          dictionaryCharCount = 0;
          positionInCache = 0;
*** 142,151 ****
--- 142,152 ----
      /**
       * Sets the current iteration position to the beginning of the text.
       * (i.e., the CharacterIterator's starting offset).
       * @return The offset of the beginning of the text.
       */
+     @Override
      public int first() {
          cachedBreakPositions = null;
          dictionaryCharCount = 0;
          positionInCache = 0;
          return super.first();
*** 154,163 ****
--- 155,165 ----
      /**
       * Sets the current iteration position to the end of the text.
       * (i.e., the CharacterIterator's ending offset).
       * @return The text's past-the-end offset.
       */
+     @Override
      public int last() {
          cachedBreakPositions = null;
          dictionaryCharCount = 0;
          positionInCache = 0;
          return super.last();
*** 166,175 ****
--- 168,178 ----
      /**
       * Advances the iterator one step backwards.
       * @return The position of the last boundary position before the
       * current iteration position
       */
+     @Override
      public int previous() {
          CharacterIterator text = getText();
  
          // if we have cached break positions and we're still in the range
          // covered by them, just move one step backward in the cache
*** 196,205 ****
--- 199,209 ----
       * Sets the current iteration position to the last boundary position
       * before the specified position.
       * @param offset The position to begin searching from
       * @return The position of the last boundary before "offset"
       */
+     @Override
      public int preceding(int offset) {
          CharacterIterator text = getText();
          checkOffset(offset, text);
  
          // if we have no cached break positions, or "offset" is outside the
*** 231,240 ****
--- 235,245 ----
       * Sets the current iteration position to the first boundary position after
       * the specified position.
       * @param offset The position to begin searching forward from
       * @return The position of the first boundary after "offset"
       */
+     @Override
      public int following(int offset) {
          CharacterIterator text = getText();
          checkOffset(offset, text);
  
          // if we have no cached break positions, or if "offset" is outside the
*** 262,271 ****
--- 267,277 ----
      }
  
      /**
       * This is the implementation function for next().
       */
+     @Override
      protected int handleNext() {
          CharacterIterator text = getText();
  
          // if there are no cached break positions, or if we've just moved
          // off the end of the range covered by the cache, we have to dump
*** 307,316 ****
--- 313,323 ----
      }
  
      /**
       * Looks up a character category for a character.
       */
+     @Override
      protected int lookupCategory(int c) {
          // this override of lookupCategory() exists only to keep track of whether we've
          // passed over any dictionary characters.  It calls the inherited lookupCategory()
          // to do the real work, and then checks whether its return value is one of the
          // categories represented in the dictionary.  If it is, bump the dictionary-
*** 328,337 ****
--- 335,345 ----
       * dictionary to determine the positions of any boundaries in this
       * range.  It stores all the boundary positions it discovers in
       * cachedBreakPositions so that we only have to do this work once
       * for each time we enter the range.
       */
+     @SuppressWarnings("unchecked")
      private void divideUpDictionaryRange(int startPos, int endPos) {
          CharacterIterator text = getText();
  
          // the range we're dividing may begin or end with non-dictionary characters
          // (i.e., for line breaking, we may have leading or trailing punctuation
*** 356,366 ****
          // continues in this way until we either successfully make it all the way
          // across the range, or exhaust all of our combinations of break
          // positions.)
          Stack<Integer> currentBreakPositions = new Stack<>();
          Stack<Integer> possibleBreakPositions = new Stack<>();
!         Vector<Integer> wrongBreakPositions = new Vector<>();
  
          // the dictionary is implemented as a trie, which is treated as a state
          // machine.  -1 represents the end of a legal word.  Every word in the
          // dictionary is represented by a path from the root node to -1.  A path
          // that ends in state 0 is an illegal combination of characters.
--- 364,374 ----
          // continues in this way until we either successfully make it all the way
          // across the range, or exhaust all of our combinations of break
          // positions.)
          Stack<Integer> currentBreakPositions = new Stack<>();
          Stack<Integer> possibleBreakPositions = new Stack<>();
!         List<Integer> wrongBreakPositions = new ArrayList<>();
  
          // the dictionary is implemented as a trie, which is treated as a state
          // machine.  -1 represents the end of a legal word.  Every word in the
          // dictionary is represented by a path from the root node to -1.  A path
          // that ends in state 0 is an illegal combination of characters.
*** 382,392 ****
  
              // if we can transition to state "-1" from our current state, we're
              // on the last character of a legal word.  Push that position onto
              // the possible-break-positions stack
              if (dictionary.getNextState(state, 0) == -1) {
!                 possibleBreakPositions.push(Integer.valueOf(text.getIndex()));
              }
  
              // look up the new state to transition to in the dictionary
              state = dictionary.getNextStateFromCharacter(state, c);
  
--- 390,400 ----
  
              // if we can transition to state "-1" from our current state, we're
              // on the last character of a legal word.  Push that position onto
              // the possible-break-positions stack
              if (dictionary.getNextState(state, 0) == -1) {
!                 possibleBreakPositions.push(text.getIndex());
              }
  
              // look up the new state to transition to in the dictionary
              state = dictionary.getNextStateFromCharacter(state, c);
  
*** 393,403 ****
              // if the character we're sitting on causes us to transition to
              // the "end of word" state, then it was a non-dictionary character
              // and we've successfully traversed the whole range.  Drop out
              // of the loop.
              if (state == -1) {
!                 currentBreakPositions.push(Integer.valueOf(text.getIndex()));
                  break;
              }
  
              // if the character we're sitting on causes us to transition to
              // the error state, or if we've gone off the end of the range
--- 401,411 ----
              // if the character we're sitting on causes us to transition to
              // the "end of word" state, then it was a non-dictionary character
              // and we've successfully traversed the whole range.  Drop out
              // of the loop.
              if (state == -1) {
!                 currentBreakPositions.push(text.getIndex());
                  break;
              }
  
              // if the character we're sitting on causes us to transition to
              // the error state, or if we've gone off the end of the range
*** 417,439 ****
                  }
  
                  // wrongBreakPositions is a list of all break positions
                  // we've tried starting that didn't allow us to traverse
                  // all the way through the text.  Every time we pop a
!                 //break position off of currentBreakPositions, we put it
                  // into wrongBreakPositions to avoid trying it again later.
                  // If we make it to this spot, we're either going to back
                  // up to a break in possibleBreakPositions and try starting
                  // over from there, or we've exhausted all possible break
                  // positions and are going to do the fallback procedure.
                  // This loop prevents us from messing with anything in
                  // possibleBreakPositions that didn't work as a starting
                  // point the last time we tried it (this is to prevent a bunch of
                  // repetitive checks from slowing down some extreme cases)
!                 Integer newStartingSpot = null;
!                 while (!possibleBreakPositions.isEmpty() && wrongBreakPositions.contains(
!                             possibleBreakPositions.peek())) {
                      possibleBreakPositions.pop();
                  }
  
                  // if we've used up all possible break-position combinations, there's
                  // an error or an unknown word in the text.  In this case, we start
--- 425,446 ----
                  }
  
                  // wrongBreakPositions is a list of all break positions
                  // we've tried starting that didn't allow us to traverse
                  // all the way through the text.  Every time we pop a
!                 // break position off of currentBreakPositions, we put it
                  // into wrongBreakPositions to avoid trying it again later.
                  // If we make it to this spot, we're either going to back
                  // up to a break in possibleBreakPositions and try starting
                  // over from there, or we've exhausted all possible break
                  // positions and are going to do the fallback procedure.
                  // This loop prevents us from messing with anything in
                  // possibleBreakPositions that didn't work as a starting
                  // point the last time we tried it (this is to prevent a bunch of
                  // repetitive checks from slowing down some extreme cases)
!                 while (!possibleBreakPositions.isEmpty()
!                         && wrongBreakPositions.contains(possibleBreakPositions.peek())) {
                      possibleBreakPositions.pop();
                  }
  
                  // if we've used up all possible break-position combinations, there's
                  // an error or an unknown word in the text.  In this case, we start
*** 470,480 ****
                      Integer temp = possibleBreakPositions.pop();
                      Integer temp2 = null;
                      while (!currentBreakPositions.isEmpty() && temp.intValue() <
                             currentBreakPositions.peek().intValue()) {
                          temp2 = currentBreakPositions.pop();
!                         wrongBreakPositions.addElement(temp2);
                      }
                      currentBreakPositions.push(temp);
                      text.setIndex(currentBreakPositions.peek().intValue());
                  }
  
--- 477,487 ----
                      Integer temp = possibleBreakPositions.pop();
                      Integer temp2 = null;
                      while (!currentBreakPositions.isEmpty() && temp.intValue() <
                             currentBreakPositions.peek().intValue()) {
                          temp2 = currentBreakPositions.pop();
!                         wrongBreakPositions.add(temp2);
                      }
                      currentBreakPositions.push(temp);
                      text.setIndex(currentBreakPositions.peek().intValue());
                  }
  
*** 498,508 ****
          // because the range actually ended with non-dictionary characters we want to
          // keep with the word)
          if (!currentBreakPositions.isEmpty()) {
              currentBreakPositions.pop();
          }
!         currentBreakPositions.push(Integer.valueOf(endPos));
  
          // create a regular array to hold the break positions and copy
          // the break positions from the stack to the array (in addition,
          // our starting position goes into this array as a break position).
          // This array becomes the cache of break positions used by next()
--- 505,515 ----
          // because the range actually ended with non-dictionary characters we want to
          // keep with the word)
          if (!currentBreakPositions.isEmpty()) {
              currentBreakPositions.pop();
          }
!         currentBreakPositions.push(endPos);
  
          // create a regular array to hold the break positions and copy
          // the break positions from the stack to the array (in addition,
          // our starting position goes into this array as a break position).
          // This array becomes the cache of break positions used by next()