jdk Cdiff src/java.base/share/classes/sun/text/normalizer/NormalizerBase.java

src/java.base/share/classes/sun/text/normalizer/NormalizerBase.java


*** 53,72 ****
   * Characters with accents or other adornments can be encoded in
   * several different ways in Unicode.  For example, take the character A-acute.
   * In Unicode, this can be encoded as a single character (the
   * "composed" form):
   *
!  * <p>
   *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
!  * </p>
   *
   * or as two separate characters (the "decomposed" form):
   *
!  * <p>
   *      0041    LATIN CAPITAL LETTER A
   *      0301    COMBINING ACUTE ACCENT
!  * </p>
   *
   * To a user of your program, however, both of these sequences should be
   * treated as the same "user-level" character "A with acute accent".  When you
   * are searching or comparing text, you must ensure that these two sequences are
   * treated equivalently.  In addition, you must handle characters with more than
--- 53,72 ----
   * Characters with accents or other adornments can be encoded in
   * several different ways in Unicode.  For example, take the character A-acute.
   * In Unicode, this can be encoded as a single character (the
   * "composed" form):
   *
!  * <pre>
   *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
!  * </pre>
   *
   * or as two separate characters (the "decomposed" form):
   *
!  * <pre>
   *      0041    LATIN CAPITAL LETTER A
   *      0301    COMBINING ACUTE ACCENT
!  * </pre>
   *
   * To a user of your program, however, both of these sequences should be
   * treated as the same "user-level" character "A with acute accent".  When you
   * are searching or comparing text, you must ensure that these two sequences are
   * treated equivalently.  In addition, you must handle characters with more than
*** 74,94 ****
   * significant, while in other cases accent sequences in different orders are
   * really equivalent.
   *
   * Similarly, the string "ffi" can be encoded as three separate letters:
   *
!  * <p>
   *      0066    LATIN SMALL LETTER F
   *      0066    LATIN SMALL LETTER F
   *      0069    LATIN SMALL LETTER I
!  * </p>
   *
   * or as the single character
   *
!  * <p>
   *      FB03    LATIN SMALL LIGATURE FFI
!  * </p>
   *
   * The ffi ligature is not a distinct semantic character, and strictly speaking
   * it shouldn't be in Unicode at all, but it was included for compatibility
   * with existing character sets that already provided it.  The Unicode standard
   * identifies such characters by giving them "compatibility" decompositions
--- 74,94 ----
   * significant, while in other cases accent sequences in different orders are
   * really equivalent.
   *
   * Similarly, the string "ffi" can be encoded as three separate letters:
   *
!  * <pre>
   *      0066    LATIN SMALL LETTER F
   *      0066    LATIN SMALL LETTER F
   *      0069    LATIN SMALL LETTER I
!  * </pre>
   *
   * or as the single character
   *
!  * <pre>
   *      FB03    LATIN SMALL LIGATURE FFI
!  * </pre>
   *
   * The ffi ligature is not a distinct semantic character, and strictly speaking
   * it shouldn't be in Unicode at all, but it was included for compatibility
   * with existing character sets that already provided it.  The Unicode standard
   * identifies such characters by giving them "compatibility" decompositions
*** 553,568 ****
      //-------------------------------------------------------------------------
      // Constructors
      //-------------------------------------------------------------------------
  
      /**
!      * Creates a new <tt>Normalizer</tt> object for iterating over the
       * normalized form of a given string.
       * <p>
!      * The <tt>options</tt> parameter specifies which optional
!      * <tt>Normalizer</tt> features are to be enabled for this object.
!      * <p>
       * @param str  The string to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode The normalization mode.
       *
--- 553,568 ----
      //-------------------------------------------------------------------------
      // Constructors
      //-------------------------------------------------------------------------
  
      /**
!      * Creates a new {@code Normalizer} object for iterating over the
       * normalized form of a given string.
       * <p>
!      * The {@code options} parameter specifies which optional
!      * {@code Normalizer} features are to be enabled for this object.
!      *
       * @param str  The string to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode The normalization mode.
       *
*** 577,602 ****
          this.mode = mode;
          this.options=opt;
      }
  
      /**
!      * Creates a new <tt>Normalizer</tt> object for iterating over the
       * normalized form of the given text.
!      * <p>
       * @param iter  The input text to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode  The normalization mode.
       */
      public NormalizerBase(CharacterIterator iter, Mode mode) {
            this(iter, mode, UNICODE_LATEST);
      }
  
      /**
!      * Creates a new <tt>Normalizer</tt> object for iterating over the
       * normalized form of the given text.
!      * <p>
       * @param iter  The input text to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode  The normalization mode.
       *
--- 577,602 ----
          this.mode = mode;
          this.options=opt;
      }
  
      /**
!      * Creates a new {@code Normalizer} object for iterating over the
       * normalized form of the given text.
!      *
       * @param iter  The input text to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode  The normalization mode.
       */
      public NormalizerBase(CharacterIterator iter, Mode mode) {
            this(iter, mode, UNICODE_LATEST);
      }
  
      /**
!      * Creates a new {@code Normalizer} object for iterating over the
       * normalized form of the given text.
!      *
       * @param iter  The input text to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode  The normalization mode.
       *
*** 613,629 ****
          this.mode = mode;
          this.options = opt;
      }
  
      /**
!      * Clones this <tt>Normalizer</tt> object.  All properties of this
       * object are duplicated in the new object, including the cloning of any
       * {@link CharacterIterator} that was passed in to the constructor
       * or to {@link #setText(CharacterIterator) setText}.
       * However, the text storage underlying
!      * the <tt>CharacterIterator</tt> is not duplicated unless the
!      * iterator's <tt>clone</tt> method does so.
       * @stable ICU 2.8
       */
      public Object clone() {
          try {
              NormalizerBase copy = (NormalizerBase) super.clone();
--- 613,629 ----
          this.mode = mode;
          this.options = opt;
      }
  
      /**
!      * Clones this {@code Normalizer} object.  All properties of this
       * object are duplicated in the new object, including the cloning of any
       * {@link CharacterIterator} that was passed in to the constructor
       * or to {@link #setText(CharacterIterator) setText}.
       * However, the text storage underlying
!      * the {@code CharacterIterator} is not duplicated unless the
!      * iterator's {@code clone} method does so.
       * @stable ICU 2.8
       */
      public Object clone() {
          try {
              NormalizerBase copy = (NormalizerBase) super.clone();
*** 789,799 ****
      //-------------------------------------------------------------------------
      // Iteration API
      //-------------------------------------------------------------------------
  
      /**
!      * Return the current character in the normalized text->
       * @return The codepoint as an int
       * @stable ICU 2.8
       */
      public int current() {
          if(bufferPos<bufferLimit || nextNormalize()) {
--- 789,799 ----
      //-------------------------------------------------------------------------
      // Iteration API
      //-------------------------------------------------------------------------
  
      /**
!      * Return the current character in the normalized text.
       * @return The codepoint as an int
       * @stable ICU 2.8
       */
      public int current() {
          if(bufferPos<bufferLimit || nextNormalize()) {
*** 870,883 ****
       * <p>
       * <b>Note:</b> This method sets the position in the <em>input</em> text,
       * while {@link #next} and {@link #previous} iterate through characters
       * in the normalized <em>output</em>.  This means that there is not
       * necessarily a one-to-one correspondence between characters returned
!      * by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
!      * returned from <tt>setIndex</tt> and {@link #getIndex}.
!      * <p>
!      * @param index the desired index in the input text->
       *
       * @return   the first normalized character that is the result of iterating
       *            forward starting at the given index.
       *
       * @throws IllegalArgumentException if the given index is less than
--- 870,883 ----
       * <p>
       * <b>Note:</b> This method sets the position in the <em>input</em> text,
       * while {@link #next} and {@link #previous} iterate through characters
       * in the normalized <em>output</em>.  This means that there is not
       * necessarily a one-to-one correspondence between characters returned
!      * by {@code next} and {@code previous} and the indices passed to and
!      * returned from {@code setIndex} and {@link #getIndex}.
!      *
!      * @param index the desired index in the input text.
       *
       * @return   the first normalized character that is the result of iterating
       *            forward starting at the given index.
       *
       * @throws IllegalArgumentException if the given index is less than
*** 892,903 ****
           return current();
       }
  
      /**
       * Retrieve the index of the start of the input text. This is the begin
!      * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
!      * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
       * @deprecated ICU 2.2. Use startIndex() instead.
       * @return The codepoint as an int
       * @see #startIndex
       */
      @Deprecated
--- 892,903 ----
           return current();
       }
  
      /**
       * Retrieve the index of the start of the input text. This is the begin
!      * index of the {@code CharacterIterator} or the start (i.e. 0) of the
!      * {@code String} over which this {@code Normalizer} is iterating
       * @deprecated ICU 2.2. Use startIndex() instead.
       * @return The codepoint as an int
       * @see #startIndex
       */
      @Deprecated
*** 905,916 ****
          return 0;
      }
  
      /**
       * Retrieve the index of the end of the input text.  This is the end index
!      * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
!      * over which this <tt>Normalizer</tt> is iterating
       * @deprecated ICU 2.2. Use endIndex() instead.
       * @return The codepoint as an int
       * @see #endIndex
       */
      @Deprecated
--- 905,916 ----
          return 0;
      }
  
      /**
       * Retrieve the index of the end of the input text.  This is the end index
!      * of the {@code CharacterIterator} or the length of the {@code String}
!      * over which this {@code Normalizer} is iterating
       * @deprecated ICU 2.2. Use endIndex() instead.
       * @return The codepoint as an int
       * @see #endIndex
       */
      @Deprecated
*** 925,937 ****
       * the input text that corresponds to a given normalized output character.
       * <p>
       * <b>Note:</b> This method sets the position in the <em>input</em>, while
       * {@link #next} and {@link #previous} iterate through characters in the
       * <em>output</em>.  This means that there is not necessarily a one-to-one
!      * correspondence between characters returned by <tt>next</tt> and
!      * <tt>previous</tt> and the indices passed to and returned from
!      * <tt>setIndex</tt> and {@link #getIndex}.
       * @return The current iteration position
       * @stable ICU 2.8
       */
      public int getIndex() {
          if(bufferPos<bufferLimit) {
--- 925,937 ----
       * the input text that corresponds to a given normalized output character.
       * <p>
       * <b>Note:</b> This method sets the position in the <em>input</em>, while
       * {@link #next} and {@link #previous} iterate through characters in the
       * <em>output</em>.  This means that there is not necessarily a one-to-one
!      * correspondence between characters returned by {@code next} and
!      * {@code previous} and the indices passed to and returned from
!      * {@code setIndex} and {@link #getIndex}.
       * @return The current iteration position
       * @stable ICU 2.8
       */
      public int getIndex() {
          if(bufferPos<bufferLimit) {
*** 940,952 ****
              return nextIndex;
          }
      }
  
      /**
!      * Retrieve the index of the end of the input text->  This is the end index
!      * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
!      * over which this <tt>Normalizer</tt> is iterating
       * @return The current iteration position
       * @stable ICU 2.8
       */
      public int endIndex() {
          return text.getLength();
--- 940,952 ----
              return nextIndex;
          }
      }
  
      /**
!      * Retrieve the index of the end of the input text. This is the end index
!      * of the {@code CharacterIterator} or the length of the {@code String}
!      * over which this {@code Normalizer} is iterating
       * @return The current iteration position
       * @stable ICU 2.8
       */
      public int endIndex() {
          return text.getLength();
*** 961,973 ****
       * <b>Note:</b>If the normalization mode is changed while iterating
       * over a string, calls to {@link #next} and {@link #previous} may
       * return previously buffers characters in the old normalization mode
       * until the iteration is able to re-sync at the next base character.
       * It is safest to call {@link #setText setText()}, {@link #first},
!      * {@link #last}, etc. after calling <tt>setMode</tt>.
!      * <p>
!      * @param newMode the new mode for this <tt>Normalizer</tt>.
       * The supported modes are:
       * <ul>
       *  <li>{@link #COMPOSE}        - Unicode canonical decompositiion
       *                                  followed by canonical composition.
       *  <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
--- 961,973 ----
       * <b>Note:</b>If the normalization mode is changed while iterating
       * over a string, calls to {@link #next} and {@link #previous} may
       * return previously buffers characters in the old normalization mode
       * until the iteration is able to re-sync at the next base character.
       * It is safest to call {@link #setText setText()}, {@link #first},
!      * {@link #last}, etc. after calling {@code setMode}.
!      *
!      * @param newMode the new mode for this {@code Normalizer}.
       * The supported modes are:
       * <ul>
       *  <li>{@link #COMPOSE}        - Unicode canonical decompositiion
       *                                  followed by canonical composition.
       *  <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
*** 983,1004 ****
       */
      public void setMode(Mode newMode) {
          mode = newMode;
      }
      /**
!      * Return the basic operation performed by this <tt>Normalizer</tt>
       *
       * @see #setMode
       * @stable ICU 2.8
       */
      public Mode getMode() {
          return mode;
      }
  
      /**
!      * Set the input text over which this <tt>Normalizer</tt> will iterate.
!      * The iteration position is set to the beginning of the input text->
       * @param newText   The new string to be normalized.
       * @stable ICU 2.8
       */
      public void setText(String newText) {
  
--- 983,1004 ----
       */
      public void setMode(Mode newMode) {
          mode = newMode;
      }
      /**
!      * Return the basic operation performed by this {@code Normalizer}
       *
       * @see #setMode
       * @stable ICU 2.8
       */
      public Mode getMode() {
          return mode;
      }
  
      /**
!      * Set the input text over which this {@code Normalizer} will iterate.
!      * The iteration position is set to the beginning of the input text.
       * @param newText   The new string to be normalized.
       * @stable ICU 2.8
       */
      public void setText(String newText) {
  
*** 1009,1020 ****
          text = newIter;
          reset();
      }
  
      /**
!      * Set the input text over which this <tt>Normalizer</tt> will iterate.
!      * The iteration position is set to the beginning of the input text->
       * @param newText   The new string to be normalized.
       * @stable ICU 2.8
       */
      public void setText(CharacterIterator newText) {
  
--- 1009,1020 ----
          text = newIter;
          reset();
      }
  
      /**
!      * Set the input text over which this {@code Normalizer} will iterate.
!      * The iteration position is set to the beginning of the input text.
       * @param newText   The new string to be normalized.
       * @stable ICU 2.8
       */
      public void setText(CharacterIterator newText) {
  
*** 1569,1579 ****
      // public constructor and methods for java.text.Normalizer and
      // sun.text.Normalizer
      //
  
      /**
!      * Creates a new <tt>Normalizer</tt> object for iterating over the
       * normalized form of a given string.
       *
       * @param str  The string to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
--- 1569,1579 ----
      // public constructor and methods for java.text.Normalizer and
      // sun.text.Normalizer
      //
  
      /**
!      * Creates a new {@code Normalizer} object for iterating over the
       * normalized form of a given string.
       *
       * @param str  The string to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
*** 1644,1654 ****
       * For NFD, NFKD, and FCD, both functions work exactly the same.
       * For NFC and NFKC where quickCheck may return "maybe", this function will
       * perform further tests to arrive at a true/false result.
       * @param str       the input string to be checked to see if it is normalized
       * @param form      the normalization form
-      * @param options   the optional features to be enabled.
       */
      public static boolean isNormalized(String str, Normalizer.Form form) {
          return isNormalized(str, form, UNICODE_LATEST);
      }
  
--- 1644,1653 ----
< prev index next >