jdk9-Dev-Unicode7-8032446 Cdiff jdk/src/java.base/share/classes/sun/text/normalizer/NormalizerBase.java

jdk/src/java.base/share/classes/sun/text/normalizer/NormalizerBase.java


*** 1,7 ****
  /*
!  * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.  Oracle designates this
--- 1,7 ----
  /*
!  * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.  Oracle designates this
*** 20,41 ****
   *
   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   * or visit www.oracle.com if you need additional information or have any
   * questions.
   */
  /*
   *******************************************************************************
!  * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
!  *                                                                             *
!  * The original version of this source code and documentation is copyrighted   *
!  * and owned by IBM, These materials are provided under terms of a License     *
!  * Agreement between IBM and Sun. This technology is protected by multiple     *
!  * US and International patents. This notice and attribution to IBM may not    *
!  * to removed.                                                                 *
   *******************************************************************************
   */
- 
  package sun.text.normalizer;
  
  import java.text.CharacterIterator;
  import java.text.Normalizer;
  
--- 20,36 ----
   *
   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   * or visit www.oracle.com if you need additional information or have any
   * questions.
   */
+ 
  /*
   *******************************************************************************
!  * Copyright (C) 2000-2014, International Business Machines Corporation and
!  * others. All Rights Reserved.
   *******************************************************************************
   */
  package sun.text.normalizer;
  
  import java.text.CharacterIterator;
  import java.text.Normalizer;
  
*** 123,134 ****
   * normalized (NFD) for such a process. The FCD quick check will return YES for
   * most strings in practice.
   *
   * normalize(FCD) may be implemented with NFD.
   *
!  * For more details on FCD see the collation design document:
!  * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
   *
   * ICU collation performs either NFD or FCD normalization automatically if
   * normalization is turned on for the collator object. Beyond collation and
   * string search, normalized strings may be useful for string equivalence
   * comparisons, transliteration/transcription, unique representations, etc.
--- 118,129 ----
   * normalized (NFD) for such a process. The FCD quick check will return YES for
   * most strings in practice.
   *
   * normalize(FCD) may be implemented with NFD.
   *
!  * For more details on FCD see Unicode Technical Note #5 (Canonical Equivalence in Applications):
!  * http://www.unicode.org/notes/tn5/#FCD
   *
   * ICU collation performs either NFD or FCD normalization automatically if
   * normalization is turned on for the collator object. Beyond collation and
   * string search, normalized strings may be useful for string equivalence
   * comparisons, transliteration/transcription, unique representations, etc.
*** 136,568 ****
   * The W3C generally recommends to exchange texts in NFC.
   * Note also that most legacy character encodings use only precomposed forms and
   * often do not encode any combining marks by themselves. For conversion to such
   * character encodings the Unicode text needs to be normalized to NFC.
   * For more usage examples, see the Unicode Standard Annex.
   * @stable ICU 2.8
   */
! 
  public final class NormalizerBase implements Cloneable {
  
-     //-------------------------------------------------------------------------
-     // Private data
-     //-------------------------------------------------------------------------
-     private char[] buffer = new char[100];
-     private int bufferStart = 0;
-     private int bufferPos   = 0;
-     private int bufferLimit = 0;
- 
      // The input text and our position in it
      private UCharacterIterator  text;
!     private Mode                mode = NFC;
!     private int                 options = 0;
      private int                 currentIndex;
      private int                 nextIndex;
  
!     /**
!      * Options bit set value to select Unicode 3.2 normalization
!      * (except NormalizationCorrections).
!      * At most one Unicode version can be selected at a time.
!      * @stable ICU 2.6
!      */
!     public static final int UNICODE_3_2=0x20;
  
!     /**
!      * Constant indicating that the end of the iteration has been reached.
!      * This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
!      * @stable ICU 2.8
!      */
!     public static final int DONE = UCharacterIterator.DONE;
  
!     /**
!      * Constants for normalization modes.
!      * @stable ICU 2.8
!      */
!     public static class Mode {
!         private int modeValue;
!         private Mode(int value) {
!             modeValue = value;
          }
  
!         /**
!          * This method is used for method dispatch
!          * @stable ICU 2.6
!          */
!         protected int normalize(char[] src, int srcStart, int srcLimit,
!                                 char[] dest,int destStart,int destLimit,
!                                 UnicodeSet nx) {
!             int srcLen = (srcLimit - srcStart);
!             int destLen = (destLimit - destStart);
!             if( srcLen > destLen ) {
!                 return srcLen;
              }
!             System.arraycopy(src,srcStart,dest,destStart,srcLen);
!             return srcLen;
          }
  
!         /**
!          * This method is used for method dispatch
!          * @stable ICU 2.6
!          */
!         protected int normalize(char[] src, int srcStart, int srcLimit,
!                                 char[] dest,int destStart,int destLimit,
!                                 int options) {
!             return normalize(   src, srcStart, srcLimit,
!                                 dest,destStart,destLimit,
!                                 NormalizerImpl.getNX(options)
!                                 );
          }
  
!         /**
!          * This method is used for method dispatch
!          * @stable ICU 2.6
!          */
!         protected String normalize(String src, int options) {
!             return src;
          }
  
!         /**
!          * This method is used for method dispatch
!          * @stable ICU 2.8
!          */
!         protected int getMinC() {
!             return -1;
          }
  
!         /**
!          * This method is used for method dispatch
!          * @stable ICU 2.8
!          */
!         protected int getMask() {
!             return -1;
          }
  
!         /**
!          * This method is used for method dispatch
!          * @stable ICU 2.8
!          */
!         protected IsPrevBoundary getPrevBoundary() {
!             return null;
          }
  
!         /**
!          * This method is used for method dispatch
!          * @stable ICU 2.8
!          */
!         protected IsNextBoundary getNextBoundary() {
!             return null;
          }
  
          /**
!          * This method is used for method dispatch
           * @stable ICU 2.6
           */
!         protected QuickCheckResult quickCheck(char[] src,int start, int limit,
!                                               boolean allowMaybe,UnicodeSet nx) {
!             if(allowMaybe) {
!                 return MAYBE;
!             }
!             return NO;
!         }
  
!         /**
!          * This method is used for method dispatch
!          * @stable ICU 2.8
           */
!         protected boolean isNFSkippable(int c) {
!             return true;
!         }
!     }
  
      /**
!      * No decomposition/composition.
       * @stable ICU 2.8
       */
!     public static final Mode NONE = new Mode(1);
  
      /**
!      * Canonical decomposition.
       * @stable ICU 2.8
       */
!     public static final Mode NFD = new NFDMode(2);
! 
!     private static final class NFDMode extends Mode {
!         private NFDMode(int value) {
!             super(value);
!         }
! 
!         protected int normalize(char[] src, int srcStart, int srcLimit,
!                                 char[] dest,int destStart,int destLimit,
!                                 UnicodeSet nx) {
!             int[] trailCC = new int[1];
!             return NormalizerImpl.decompose(src,  srcStart,srcLimit,
!                                             dest, destStart,destLimit,
!                                             false, trailCC,nx);
!         }
  
!         protected String normalize( String src, int options) {
!             return decompose(src,false,options);
!         }
! 
!         protected int getMinC() {
!             return NormalizerImpl.MIN_WITH_LEAD_CC;
          }
  
!         protected IsPrevBoundary getPrevBoundary() {
!             return new IsPrevNFDSafe();
          }
  
!         protected IsNextBoundary getNextBoundary() {
!             return new IsNextNFDSafe();
          }
  
!         protected int getMask() {
!             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD);
          }
  
!         protected QuickCheckResult quickCheck(char[] src,int start,
!                                               int limit,boolean allowMaybe,
!                                               UnicodeSet nx) {
!             return NormalizerImpl.quickCheck(
!                                              src, start,limit,
!                                              NormalizerImpl.getFromIndexesArr(
!                                                                               NormalizerImpl.INDEX_MIN_NFD_NO_MAYBE
!                                                                               ),
!                                              NormalizerImpl.QC_NFD,
!                                              0,
!                                              allowMaybe,
!                                              nx
!                                              );
          }
  
!         protected boolean isNFSkippable(int c) {
!             return NormalizerImpl.isNFSkippable(c,this,
!                                                 (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD)
!                                                 );
          }
      }
  
-     /**
-      * Compatibility decomposition.
-      * @stable ICU 2.8
-      */
-     public static final Mode NFKD = new NFKDMode(3);
- 
      private static final class NFKDMode extends Mode {
!         private NFKDMode(int value) {
!             super(value);
!         }
! 
!         protected int normalize(char[] src, int srcStart, int srcLimit,
!                                 char[] dest,int destStart,int destLimit,
!                                 UnicodeSet nx) {
!             int[] trailCC = new int[1];
!             return NormalizerImpl.decompose(src,  srcStart,srcLimit,
!                                             dest, destStart,destLimit,
!                                             true, trailCC, nx);
!         }
! 
!         protected String normalize( String src, int options) {
!             return decompose(src,true,options);
!         }
! 
!         protected int getMinC() {
!             return NormalizerImpl.MIN_WITH_LEAD_CC;
!         }
! 
!         protected IsPrevBoundary getPrevBoundary() {
!             return new IsPrevNFDSafe();
          }
- 
-         protected IsNextBoundary getNextBoundary() {
-             return new IsNextNFDSafe();
          }
  
!         protected int getMask() {
!             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD);
          }
- 
-         protected QuickCheckResult quickCheck(char[] src,int start,
-                                               int limit,boolean allowMaybe,
-                                               UnicodeSet nx) {
-             return NormalizerImpl.quickCheck(
-                                              src,start,limit,
-                                              NormalizerImpl.getFromIndexesArr(
-                                                                               NormalizerImpl.INDEX_MIN_NFKD_NO_MAYBE
-                                                                               ),
-                                              NormalizerImpl.QC_NFKD,
-                                              NormalizerImpl.OPTIONS_COMPAT,
-                                              allowMaybe,
-                                              nx
-                                              );
          }
  
!         protected boolean isNFSkippable(int c) {
!             return NormalizerImpl.isNFSkippable(c, this,
!                                                 (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD)
!                                                 );
          }
      }
  
      /**
!      * Canonical decomposition followed by canonical composition.
       * @stable ICU 2.8
       */
!     public static final Mode NFC = new NFCMode(4);
! 
!     private static final class NFCMode extends Mode{
!         private NFCMode(int value) {
!             super(value);
!         }
!         protected int normalize(char[] src, int srcStart, int srcLimit,
!                                 char[] dest,int destStart,int destLimit,
!                                 UnicodeSet nx) {
!             return NormalizerImpl.compose( src, srcStart, srcLimit,
!                                            dest,destStart,destLimit,
!                                            0, nx);
!         }
! 
!         protected String normalize( String src, int options) {
!             return compose(src, false, options);
!         }
! 
!         protected int getMinC() {
!             return NormalizerImpl.getFromIndexesArr(
!                                                     NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
!                                                     );
!         }
!         protected IsPrevBoundary getPrevBoundary() {
!             return new IsPrevTrueStarter();
!         }
!         protected IsNextBoundary getNextBoundary() {
!             return new IsNextTrueStarter();
!         }
!         protected int getMask() {
!             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFC);
!         }
!         protected QuickCheckResult quickCheck(char[] src,int start,
!                                               int limit,boolean allowMaybe,
!                                               UnicodeSet nx) {
!             return NormalizerImpl.quickCheck(
!                                              src,start,limit,
!                                              NormalizerImpl.getFromIndexesArr(
!                                                                               NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
!                                                                               ),
!                                              NormalizerImpl.QC_NFC,
!                                              0,
!                                              allowMaybe,
!                                              nx
!                                              );
!         }
!         protected boolean isNFSkippable(int c) {
!             return NormalizerImpl.isNFSkippable(c,this,
!                                                 ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
!                                                   (NormalizerImpl.QC_NFC & NormalizerImpl.QC_ANY_NO)
!                                                   )
!                                                 );
!         }
!     };
  
      /**
!      * Compatibility decomposition followed by canonical composition.
       * @stable ICU 2.8
       */
!     public static final Mode NFKC =new NFKCMode(5);
! 
!     private static final class NFKCMode extends Mode{
!         private NFKCMode(int value) {
!             super(value);
!         }
!         protected int normalize(char[] src, int srcStart, int srcLimit,
!                                 char[] dest,int destStart,int destLimit,
!                                 UnicodeSet nx) {
!             return NormalizerImpl.compose(src,  srcStart,srcLimit,
!                                           dest, destStart,destLimit,
!                                           NormalizerImpl.OPTIONS_COMPAT, nx);
!         }
! 
!         protected String normalize( String src, int options) {
!             return compose(src, true, options);
!         }
!         protected int getMinC() {
!             return NormalizerImpl.getFromIndexesArr(
!                                                     NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
!                                                     );
!         }
!         protected IsPrevBoundary getPrevBoundary() {
!             return new IsPrevTrueStarter();
!         }
!         protected IsNextBoundary getNextBoundary() {
!             return new IsNextTrueStarter();
!         }
!         protected int getMask() {
!             return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKC);
!         }
!         protected QuickCheckResult quickCheck(char[] src,int start,
!                                               int limit,boolean allowMaybe,
!                                               UnicodeSet nx) {
!             return NormalizerImpl.quickCheck(
!                                              src,start,limit,
!                                              NormalizerImpl.getFromIndexesArr(
!                                                                               NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
!                                                                               ),
!                                              NormalizerImpl.QC_NFKC,
!                                              NormalizerImpl.OPTIONS_COMPAT,
!                                              allowMaybe,
!                                              nx
!                                              );
!         }
!         protected boolean isNFSkippable(int c) {
!             return NormalizerImpl.isNFSkippable(c, this,
!                                                 ( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
!                                                   (NormalizerImpl.QC_NFKC & NormalizerImpl.QC_ANY_NO)
!                                                   )
!                                                 );
!         }
!     };
  
      /**
!      * Result values for quickCheck().
!      * For details see Unicode Technical Report 15.
!      * @stable ICU 2.8
!      */
!     public static final class QuickCheckResult{
!         private int resultValue;
!         private QuickCheckResult(int value) {
!             resultValue=value;
!         }
!     }
!     /**
!      * Indicates that string is not in the normalized format
       * @stable ICU 2.8
       */
!     public static final QuickCheckResult NO = new QuickCheckResult(0);
  
      /**
!      * Indicates that string is in the normalized format
       * @stable ICU 2.8
       */
!     public static final QuickCheckResult YES = new QuickCheckResult(1);
  
!     /**
!      * Indicates it cannot be determined if string is in the normalized
!      * format without further thorough checks.
!      * @stable ICU 2.8
!      */
!     public static final QuickCheckResult MAYBE = new QuickCheckResult(2);
  
      //-------------------------------------------------------------------------
!     // Constructors
      //-------------------------------------------------------------------------
  
      /**
!      * Creates a new {@code Normalizer} object for iterating over the
       * normalized form of a given string.
       * <p>
       * The {@code options} parameter specifies which optional
!      * {@code Normalizer} features are to be enabled for this object.
!      *
       * @param str  The string to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode The normalization mode.
       *
--- 131,364 ----
   * The W3C generally recommends to exchange texts in NFC.
   * Note also that most legacy character encodings use only precomposed forms and
   * often do not encode any combining marks by themselves. For conversion to such
   * character encodings the Unicode text needs to be normalized to NFC.
   * For more usage examples, see the Unicode Standard Annex.
+  *
+  * Note: The Normalizer class also provides API for iterative normalization.
+  * While the setIndex() and getIndex() refer to indices in the
+  * underlying Unicode input text, the next() and previous() methods
+  * iterate through characters in the normalized output.
+  * This means that there is not necessarily a one-to-one correspondence
+  * between characters returned by next() and previous() and the indices
+  * passed to and returned from setIndex() and getIndex().
+  * It is for this reason that Normalizer does not implement the CharacterIterator interface.
+  *
   * @stable ICU 2.8
   */
! // Original filename in ICU4J: Normalizer.java
  public final class NormalizerBase implements Cloneable {
  
      // The input text and our position in it
      private UCharacterIterator  text;
!     private Normalizer2         norm2;
!     private Mode                mode;
!     private int                 options;
! 
!     // The normalization buffer is the result of normalization
!     // of the source in [currentIndex..nextIndex] .
      private int                 currentIndex;
      private int                 nextIndex;
  
!     // A buffer for holding intermediate results
!     private StringBuilder       buffer;
!     private int                 bufferPos;
  
!     // Helper classes to defer loading of normalization data.
!     private static final class ModeImpl {
!         private ModeImpl(Normalizer2 n2) {
!             normalizer2 = n2;
!         }
!         private final Normalizer2 normalizer2;
!     }
  
!     private static final class NFDModeImpl {
!         private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFDInstance());
      }
  
!     private static final class NFKDModeImpl {
!         private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKDInstance());
      }
! 
!     private static final class NFCModeImpl {
!         private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFCInstance());
      }
  
!     private static final class NFKCModeImpl {
!         private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKCInstance());
      }
  
!     private static final class Unicode32 {
!         private static final UnicodeSet INSTANCE = new UnicodeSet("[:age=3.2:]").freeze();
      }
  
!     private static final class NFD32ModeImpl {
!         private static final ModeImpl INSTANCE =
!             new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFDInstance(),
!                                                  Unicode32.INSTANCE));
      }
  
!     private static final class NFKD32ModeImpl {
!         private static final ModeImpl INSTANCE =
!             new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFKDInstance(),
!                                                  Unicode32.INSTANCE));
      }
  
!     private static final class NFC32ModeImpl {
!         private static final ModeImpl INSTANCE =
!             new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFCInstance(),
!                                                  Unicode32.INSTANCE));
      }
  
!     private static final class NFKC32ModeImpl {
!         private static final ModeImpl INSTANCE =
!             new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFKCInstance(),
!                                                  Unicode32.INSTANCE));
      }
  
      /**
!      * Options bit set value to select Unicode 3.2 normalization
!      * (except NormalizationCorrections).
!      * At most one Unicode version can be selected at a time.
       * @stable ICU 2.6
       */
!     public static final int UNICODE_3_2=0x20;
  
!     public static final int UNICODE_3_2_0_ORIGINAL=UNICODE_3_2;
! 
!     /*
!      * Default option for the latest Unicode normalization. This option is
!      * provided mainly for testing.
!      * The value zero means that normalization is done with the fixes for
!      *   - Corrigendum 4 (Five CJK Canonical Mapping Errors)
!      *   - Corrigendum 5 (Normalization Idempotency)
       */
!     public static final int UNICODE_LATEST = 0x00;
  
      /**
!      * Constant indicating that the end of the iteration has been reached.
!      * This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
       * @stable ICU 2.8
       */
!     public static final int DONE = UCharacterIterator.DONE;
  
      /**
!      * Constants for normalization modes.
!      * <p>
!      * The Mode class is not intended for public subclassing.
!      * Only the Mode constants provided by the Normalizer class should be used,
!      * and any fields or methods should not be called or overridden by users.
       * @stable ICU 2.8
       */
!     public static abstract class Mode {
  
!         /**
!          * Sole constructor
!          * @internal
!          * @deprecated This API is ICU internal only.
!          */
!         @Deprecated
!         protected Mode() {
          }
  
!         /**
!          * @internal
!          * @deprecated This API is ICU internal only.
!          */
!         @Deprecated
!         protected abstract Normalizer2 getNormalizer2(int options);
      }
  
!     private static Mode toMode(Normalizer.Form form) {
!         switch (form) {
!         case NFC :
!             return NFC;
!         case NFD :
!             return NFD;
!         case NFKC :
!             return NFKC;
!         case NFKD :
!             return NFKD;
          }
  
!         throw new IllegalArgumentException("Unexpected normalization form: " +
!                                            form);
      }
  
!     private static final class NONEMode extends Mode {
!         protected Normalizer2 getNormalizer2(int options) { return Norm2AllModes.NOOP_NORMALIZER2; }
      }
  
!     private static final class NFDMode extends Mode {
!         protected Normalizer2 getNormalizer2(int options) {
!             return (options&UNICODE_3_2) != 0 ?
!                     NFD32ModeImpl.INSTANCE.normalizer2 :
!                     NFDModeImpl.INSTANCE.normalizer2;
          }
      }
  
      private static final class NFKDMode extends Mode {
!         protected Normalizer2 getNormalizer2(int options) {
!             return (options&UNICODE_3_2) != 0 ?
!                     NFKD32ModeImpl.INSTANCE.normalizer2 :
!                     NFKDModeImpl.INSTANCE.normalizer2;
          }
      }
  
!     private static final class NFCMode extends Mode {
!         protected Normalizer2 getNormalizer2(int options) {
!             return (options&UNICODE_3_2) != 0 ?
!                     NFC32ModeImpl.INSTANCE.normalizer2 :
!                     NFCModeImpl.INSTANCE.normalizer2;
          }
      }
  
!     private static final class NFKCMode extends Mode {
!         protected Normalizer2 getNormalizer2(int options) {
!             return (options&UNICODE_3_2) != 0 ?
!                     NFKC32ModeImpl.INSTANCE.normalizer2 :
!                     NFKCModeImpl.INSTANCE.normalizer2;
          }
      }
  
      /**
!      * No decomposition/composition.
       * @stable ICU 2.8
       */
!     public static final Mode NONE = new NONEMode();
  
      /**
!      * Canonical decomposition.
       * @stable ICU 2.8
       */
!     public static final Mode NFD = new NFDMode();
  
      /**
!      * Compatibility decomposition.
       * @stable ICU 2.8
       */
!     public static final Mode NFKD = new NFKDMode();
  
      /**
!      * Canonical decomposition followed by canonical composition.
       * @stable ICU 2.8
       */
!     public static final Mode NFC = new NFCMode();
  
!     public static final Mode NFKC =new NFKCMode();
  
      //-------------------------------------------------------------------------
!     // Iterator constructors
      //-------------------------------------------------------------------------
  
      /**
!      * Creates a new {@code NormalizerBase} object for iterating over the
       * normalized form of a given string.
       * <p>
       * The {@code options} parameter specifies which optional
!      * {@code NormalizerBase} features are to be enabled for this object.
!      * <p>
       * @param str  The string to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode The normalization mode.
       *
*** 574,602 ****
       */
      public NormalizerBase(String str, Mode mode, int opt) {
          this.text = UCharacterIterator.getInstance(str);
          this.mode = mode;
          this.options=opt;
      }
  
!     /**
!      * Creates a new {@code Normalizer} object for iterating over the
!      * normalized form of the given text.
!      *
!      * @param iter  The input text to be normalized.  The normalization
!      *              will start at the beginning of the string.
!      *
!      * @param mode  The normalization mode.
!      */
!     public NormalizerBase(CharacterIterator iter, Mode mode) {
!           this(iter, mode, UNICODE_LATEST);
      }
  
      /**
!      * Creates a new {@code Normalizer} object for iterating over the
       * normalized form of the given text.
!      *
       * @param iter  The input text to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode  The normalization mode.
       *
--- 370,392 ----
       */
      public NormalizerBase(String str, Mode mode, int opt) {
          this.text = UCharacterIterator.getInstance(str);
          this.mode = mode;
          this.options=opt;
+         norm2 = mode.getNormalizer2(opt);
+         buffer = new StringBuilder();
      }
  
!     public NormalizerBase(String str, Mode mode) {
!        this(str, mode, 0);
      }
  
+ 
      /**
!      * Creates a new {@code NormalizerBase} object for iterating over the
       * normalized form of the given text.
!      * <p>
       * @param iter  The input text to be normalized.  The normalization
       *              will start at the beginning of the string.
       *
       * @param mode  The normalization mode.
       *
*** 605,623 ****
       *            If you want the default behavior corresponding to one of the
       *            standard Unicode Normalization Forms, use 0 for this argument.
       * @stable ICU 2.6
       */
      public NormalizerBase(CharacterIterator iter, Mode mode, int opt) {
!         this.text = UCharacterIterator.getInstance(
!                                                    (CharacterIterator)iter.clone()
!                                                    );
          this.mode = mode;
          this.options = opt;
      }
  
      /**
!      * Clones this {@code Normalizer} object.  All properties of this
       * object are duplicated in the new object, including the cloning of any
       * {@link CharacterIterator} that was passed in to the constructor
       * or to {@link #setText(CharacterIterator) setText}.
       * However, the text storage underlying
       * the {@code CharacterIterator} is not duplicated unless the
--- 395,417 ----
       *            If you want the default behavior corresponding to one of the
       *            standard Unicode Normalization Forms, use 0 for this argument.
       * @stable ICU 2.6
       */
      public NormalizerBase(CharacterIterator iter, Mode mode, int opt) {
!         this.text = UCharacterIterator.getInstance((CharacterIterator)iter.clone());
          this.mode = mode;
          this.options = opt;
+         norm2 = mode.getNormalizer2(opt);
+         buffer = new StringBuilder();
+     }
+ 
+     public NormalizerBase(CharacterIterator iter, Mode mode) {
+        this(iter, mode, 0);
      }
  
      /**
!      * Clones this {@code NormalizerBase} object.  All properties of this
       * object are duplicated in the new object, including the cloning of any
       * {@link CharacterIterator} that was passed in to the constructor
       * or to {@link #setText(CharacterIterator) setText}.
       * However, the text storage underlying
       * the {@code CharacterIterator} is not duplicated unless the
*** 626,791 ****
       */
      public Object clone() {
          try {
              NormalizerBase copy = (NormalizerBase) super.clone();
              copy.text = (UCharacterIterator) text.clone();
!             //clone the internal buffer
!             if (buffer != null) {
!                 copy.buffer = new char[buffer.length];
!                 System.arraycopy(buffer,0,copy.buffer,0,buffer.length);
!             }
              return copy;
          }
          catch (CloneNotSupportedException e) {
              throw new InternalError(e.toString(), e);
          }
      }
  
-     //--------------------------------------------------------------------------
-     // Static Utility methods
-     //--------------------------------------------------------------------------
- 
      /**
!      * Compose a string.
!      * The string will be composed according to the specified mode.
!      * @param str        The string to compose.
!      * @param compat     If true the string will be composed according to
!      *                    NFKC rules and if false will be composed according to
!      *                    NFC rules.
!      * @param options    The only recognized option is UNICODE_3_2
!      * @return String    The composed string
       * @stable ICU 2.6
       */
!     public static String compose(String str, boolean compat, int options) {
! 
!         char[] dest, src;
!         if (options == UNICODE_3_2_0_ORIGINAL) {
!             String mappedStr = NormalizerImpl.convert(str);
!             dest = new char[mappedStr.length()*MAX_BUF_SIZE_COMPOSE];
!             src = mappedStr.toCharArray();
!         } else {
!             dest = new char[str.length()*MAX_BUF_SIZE_COMPOSE];
!             src = str.toCharArray();
          }
-         int destSize=0;
- 
-         UnicodeSet nx = NormalizerImpl.getNX(options);
  
!         /* reset options bits that should only be set here or inside compose() */
!         options&=~(NormalizerImpl.OPTIONS_SETS_MASK|NormalizerImpl.OPTIONS_COMPAT|NormalizerImpl.OPTIONS_COMPOSE_CONTIGUOUS);
! 
!         if(compat) {
!             options|=NormalizerImpl.OPTIONS_COMPAT;
!         }
! 
!         for(;;) {
!             destSize=NormalizerImpl.compose(src,0,src.length,
!                                             dest,0,dest.length,options,
!                                             nx);
!             if(destSize<=dest.length) {
!                 return new String(dest,0,destSize);
!             } else {
!                 dest = new char[destSize];
!             }
!         }
      }
  
!     private static final int MAX_BUF_SIZE_COMPOSE = 2;
!     private static final int MAX_BUF_SIZE_DECOMPOSE = 3;
! 
!     /**
!      * Decompose a string.
!      * The string will be decomposed according to the specified mode.
!      * @param str       The string to decompose.
!      * @param compat    If true the string will be decomposed according to NFKD
!      *                   rules and if false will be decomposed according to NFD
!      *                   rules.
!      * @return String   The decomposed string
!      * @stable ICU 2.8
!      */
!     public static String decompose(String str, boolean compat) {
!         return decompose(str,compat,UNICODE_LATEST);
      }
  
      /**
!      * Decompose a string.
!      * The string will be decomposed according to the specified mode.
!      * @param str     The string to decompose.
!      * @param compat  If true the string will be decomposed according to NFKD
!      *                 rules and if false will be decomposed according to NFD
!      *                 rules.
!      * @param options The normalization options, ORed together (0 for no options).
!      * @return String The decomposed string
       * @stable ICU 2.6
       */
!     public static String decompose(String str, boolean compat, int options) {
! 
!         int[] trailCC = new int[1];
!         int destSize=0;
!         UnicodeSet nx = NormalizerImpl.getNX(options);
!         char[] dest;
! 
!         if (options == UNICODE_3_2_0_ORIGINAL) {
!             String mappedStr = NormalizerImpl.convert(str);
!             dest = new char[mappedStr.length()*MAX_BUF_SIZE_DECOMPOSE];
! 
!             for(;;) {
!                 destSize=NormalizerImpl.decompose(mappedStr.toCharArray(),0,mappedStr.length(),
!                                                   dest,0,dest.length,
!                                                   compat,trailCC, nx);
!                 if(destSize<=dest.length) {
!                     return new String(dest,0,destSize);
!                 } else {
!                     dest = new char[destSize];
                  }
-             }
-         } else {
-             dest = new char[str.length()*MAX_BUF_SIZE_DECOMPOSE];
  
!             for(;;) {
!                 destSize=NormalizerImpl.decompose(str.toCharArray(),0,str.length(),
!                                                   dest,0,dest.length,
!                                                   compat,trailCC, nx);
!                 if(destSize<=dest.length) {
!                     return new String(dest,0,destSize);
!                 } else {
!                     dest = new char[destSize];
!                 }
!             }
!         }
      }
  
!     /**
!      * Normalize a string.
!      * The string will be normalized according to the specified normalization
!      * mode and options.
!      * @param src       The char array to compose.
!      * @param srcStart  Start index of the source
!      * @param srcLimit  Limit index of the source
!      * @param dest      The char buffer to fill in
!      * @param destStart Start index of the destination buffer
!      * @param destLimit End index of the destination buffer
!      * @param mode      The normalization mode; one of Normalizer.NONE,
!      *                   Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
!      *                   Normalizer.NFKD, Normalizer.DEFAULT
!      * @param options The normalization options, ORed together (0 for no options).
!      * @return int      The total buffer size needed;if greater than length of
!      *                   result, the output was truncated.
!      * @exception       IndexOutOfBoundsException if the target capacity is
!      *                   less than the required length
!      * @stable ICU 2.6
!      */
!     public static int normalize(char[] src,int srcStart, int srcLimit,
!                                 char[] dest,int destStart, int destLimit,
!                                 Mode  mode, int options) {
!         int length = mode.normalize(src,srcStart,srcLimit,dest,destStart,destLimit, options);
! 
!         if(length<=(destLimit-destStart)) {
!             return length;
!         } else {
!             throw new IndexOutOfBoundsException(Integer.toString(length));
!         }
      }
  
      //-------------------------------------------------------------------------
      // Iteration API
      //-------------------------------------------------------------------------
--- 420,497 ----
       */
      public Object clone() {
          try {
              NormalizerBase copy = (NormalizerBase) super.clone();
              copy.text = (UCharacterIterator) text.clone();
!             copy.mode = mode;
!             copy.options = options;
!             copy.norm2 = norm2;
!             copy.buffer = new StringBuilder(buffer);
!             copy.bufferPos = bufferPos;
!             copy.currentIndex = currentIndex;
!             copy.nextIndex = nextIndex;
              return copy;
          }
          catch (CloneNotSupportedException e) {
              throw new InternalError(e.toString(), e);
          }
      }
  
      /**
!      * Normalizes a {@code String} using the given normalization operation.
!      * <p>
!      * The {@code options} parameter specifies which optional
!      * {@code NormalizerBase} features are to be enabled for this operation.
!      * Currently the only available option is {@link #UNICODE_3_2}.
!      * If you want the default behavior corresponding to one of the standard
!      * Unicode Normalization Forms, use 0 for this argument.
!      * <p>
!      * @param str       the input string to be normalized.
!      * @param mode      the normalization mode
!      * @param options   the optional features to be enabled.
!      * @return String   the normalized string
       * @stable ICU 2.6
       */
!     public static String normalize(String str, Mode mode, int options) {
!         return mode.getNormalizer2(options).normalize(str);
      }
  
!     public static String normalize(String str, Normalizer.Form form) {
!         return NormalizerBase.normalize(str, toMode(form), UNICODE_LATEST);
      }
  
!     public static String normalize(String str, Normalizer.Form form, int options) {
!         return NormalizerBase.normalize(str, toMode(form), options);
      }
  
      /**
!      * Test if a string is in a given normalization form.
!      * This is semantically equivalent to source.equals(normalize(source, mode)).
!      *
!      * Unlike quickCheck(), this function returns a definitive result,
!      * never a "maybe".
!      * For NFD, NFKD, and FCD, both functions work exactly the same.
!      * For NFC and NFKC where quickCheck may return "maybe", this function will
!      * perform further tests to arrive at a true/false result.
!      * @param str       the input string to be checked to see if it is
!      *                   normalized
!      * @param mode      the normalization mode
!      * @param options   Options for use with exclusion set and tailored Normalization
!      *                  The only option that is currently recognized is UNICODE_3_2
!      * @see #isNormalized
       * @stable ICU 2.6
       */
!     public static boolean isNormalized(String str, Mode mode, int options) {
!         return mode.getNormalizer2(options).isNormalized(str);
      }
  
!     public static boolean isNormalized(String str, Normalizer.Form form) {
!         return NormalizerBase.isNormalized(str, toMode(form), UNICODE_LATEST);
      }
  
!     public static boolean isNormalized(String str, Normalizer.Form form, int options) {
!         return NormalizerBase.isNormalized(str, toMode(form), options);
      }
  
      //-------------------------------------------------------------------------
      // Iteration API
      //-------------------------------------------------------------------------
*** 794,805 ****
       * Return the current character in the normalized text.
       * @return The codepoint as an int
       * @stable ICU 2.8
       */
      public int current() {
!         if(bufferPos<bufferLimit || nextNormalize()) {
!             return getCodePointAt(bufferPos);
          } else {
              return DONE;
          }
      }
  
--- 500,511 ----
       * Return the current character in the normalized text.
       * @return The codepoint as an int
       * @stable ICU 2.8
       */
      public int current() {
!         if(bufferPos<buffer.length() || nextNormalize()) {
!             return buffer.codePointAt(bufferPos);
          } else {
              return DONE;
          }
      }
  
*** 809,839 ****
       * of the text has already been reached, {@link #DONE} is returned.
       * @return The codepoint as an int
       * @stable ICU 2.8
       */
      public int next() {
!         if(bufferPos<bufferLimit ||  nextNormalize()) {
!             int c=getCodePointAt(bufferPos);
!             bufferPos+=(c>0xFFFF) ? 2 : 1;
              return c;
          } else {
              return DONE;
          }
      }
  
- 
      /**
       * Return the previous character in the normalized text and decrement
       * the iteration position by one.  If the beginning
       * of the text has already been reached, {@link #DONE} is returned.
       * @return The codepoint as an int
       * @stable ICU 2.8
       */
      public int previous() {
          if(bufferPos>0 || previousNormalize()) {
!             int c=getCodePointAt(bufferPos-1);
!             bufferPos-=(c>0xFFFF) ? 2 : 1;
              return c;
          } else {
              return DONE;
          }
      }
--- 515,544 ----
       * of the text has already been reached, {@link #DONE} is returned.
       * @return The codepoint as an int
       * @stable ICU 2.8
       */
      public int next() {
!         if(bufferPos<buffer.length() ||  nextNormalize()) {
!             int c=buffer.codePointAt(bufferPos);
!             bufferPos+=Character.charCount(c);
              return c;
          } else {
              return DONE;
          }
      }
  
      /**
       * Return the previous character in the normalized text and decrement
       * the iteration position by one.  If the beginning
       * of the text has already been reached, {@link #DONE} is returned.
       * @return The codepoint as an int
       * @stable ICU 2.8
       */
      public int previous() {
          if(bufferPos>0 || previousNormalize()) {
!             int c=buffer.codePointBefore(bufferPos);
!             bufferPos-=Character.charCount(c);
              return c;
          } else {
              return DONE;
          }
      }
*** 857,868 ****
       *
       * @param index the desired index in the input text.
       * @stable ICU 2.8
       */
      public void setIndexOnly(int index) {
!         text.setIndex(index);
!         currentIndex=nextIndex=index; // validates index
          clearBuffer();
      }
  
      /**
       * Set the iteration position in the input text that is being normalized
--- 562,573 ----
       *
       * @param index the desired index in the input text.
       * @stable ICU 2.8
       */
      public void setIndexOnly(int index) {
!         text.setIndex(index);  // validates index
!         currentIndex=nextIndex=index;
          clearBuffer();
      }
  
      /**
       * Set the iteration position in the input text that is being normalized
*** 872,903 ****
       * while {@link #next} and {@link #previous} iterate through characters
       * in the normalized <em>output</em>.  This means that there is not
       * necessarily a one-to-one correspondence between characters returned
       * by {@code next} and {@code previous} and the indices passed to and
       * returned from {@code setIndex} and {@link #getIndex}.
!      *
       * @param index the desired index in the input text.
       *
       * @return   the first normalized character that is the result of iterating
       *            forward starting at the given index.
       *
       * @throws IllegalArgumentException if the given index is less than
       *          {@link #getBeginIndex} or greater than {@link #getEndIndex}.
!      * @return The codepoint as an int
!      * @deprecated ICU 3.2
       * @obsolete ICU 3.2
       */
-      @Deprecated
       public int setIndex(int index) {
           setIndexOnly(index);
           return current();
       }
  
      /**
       * Retrieve the index of the start of the input text. This is the begin
       * index of the {@code CharacterIterator} or the start (i.e. 0) of the
!      * {@code String} over which this {@code Normalizer} is iterating
       * @deprecated ICU 2.2. Use startIndex() instead.
       * @return The codepoint as an int
       * @see #startIndex
       */
      @Deprecated
--- 577,606 ----
       * while {@link #next} and {@link #previous} iterate through characters
       * in the normalized <em>output</em>.  This means that there is not
       * necessarily a one-to-one correspondence between characters returned
       * by {@code next} and {@code previous} and the indices passed to and
       * returned from {@code setIndex} and {@link #getIndex}.
!      * <p>
       * @param index the desired index in the input text.
       *
       * @return   the first normalized character that is the result of iterating
       *            forward starting at the given index.
       *
       * @throws IllegalArgumentException if the given index is less than
       *          {@link #getBeginIndex} or greater than {@link #getEndIndex}.
!      * deprecated ICU 3.2
       * @obsolete ICU 3.2
       */
       public int setIndex(int index) {
           setIndexOnly(index);
           return current();
       }
  
      /**
       * Retrieve the index of the start of the input text. This is the begin
       * index of the {@code CharacterIterator} or the start (i.e. 0) of the
!      * {@code String} over which this {@code NormalizerBase} is iterating
       * @deprecated ICU 2.2. Use startIndex() instead.
       * @return The codepoint as an int
       * @see #startIndex
       */
      @Deprecated
*** 906,916 ****
      }
  
      /**
       * Retrieve the index of the end of the input text.  This is the end index
       * of the {@code CharacterIterator} or the length of the {@code String}
!      * over which this {@code Normalizer} is iterating
       * @deprecated ICU 2.2. Use endIndex() instead.
       * @return The codepoint as an int
       * @see #endIndex
       */
      @Deprecated
--- 609,619 ----
      }
  
      /**
       * Retrieve the index of the end of the input text.  This is the end index
       * of the {@code CharacterIterator} or the length of the {@code String}
!      * over which this {@code NormalizerBase} is iterating
       * @deprecated ICU 2.2. Use endIndex() instead.
       * @return The codepoint as an int
       * @see #endIndex
       */
      @Deprecated
*** 932,1682 ****
       * {@code setIndex} and {@link #getIndex}.
       * @return The current iteration position
       * @stable ICU 2.8
       */
      public int getIndex() {
!         if(bufferPos<bufferLimit) {
              return currentIndex;
          } else {
              return nextIndex;
          }
      }
  
      /**
       * Retrieve the index of the end of the input text. This is the end index
       * of the {@code CharacterIterator} or the length of the {@code String}
!      * over which this {@code Normalizer} is iterating
       * @return The current iteration position
       * @stable ICU 2.8
       */
      public int endIndex() {
          return text.getLength();
      }
  
      //-------------------------------------------------------------------------
!     // Property access methods
      //-------------------------------------------------------------------------
      /**
       * Set the normalization mode for this object.
       * <p>
       * <b>Note:</b>If the normalization mode is changed while iterating
       * over a string, calls to {@link #next} and {@link #previous} may
       * return previously buffers characters in the old normalization mode
       * until the iteration is able to re-sync at the next base character.
       * It is safest to call {@link #setText setText()}, {@link #first},
       * {@link #last}, etc. after calling {@code setMode}.
!      *
!      * @param newMode the new mode for this {@code Normalizer}.
       * The supported modes are:
       * <ul>
!      *  <li>{@link #COMPOSE}        - Unicode canonical decompositiion
       *                                  followed by canonical composition.
!      *  <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
       *                                  follwed by canonical composition.
!      *  <li>{@link #DECOMP}         - Unicode canonical decomposition
!      *  <li>{@link #DECOMP_COMPAT}  - Unicode compatibility decomposition.
!      *  <li>{@link #NO_OP}          - Do nothing but return characters
       *                                  from the underlying input text.
       * </ul>
       *
       * @see #getMode
       * @stable ICU 2.8
       */
      public void setMode(Mode newMode) {
          mode = newMode;
      }
      /**
!      * Return the basic operation performed by this {@code Normalizer}
       *
       * @see #setMode
       * @stable ICU 2.8
       */
      public Mode getMode() {
          return mode;
      }
  
      /**
!      * Set the input text over which this {@code Normalizer} will iterate.
       * The iteration position is set to the beginning of the input text.
       * @param newText   The new string to be normalized.
       * @stable ICU 2.8
       */
      public void setText(String newText) {
- 
          UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
          if (newIter == null) {
!             throw new InternalError("Could not create a new UCharacterIterator");
          }
          text = newIter;
          reset();
      }
  
      /**
!      * Set the input text over which this {@code Normalizer} will iterate.
       * The iteration position is set to the beginning of the input text.
       * @param newText   The new string to be normalized.
       * @stable ICU 2.8
       */
      public void setText(CharacterIterator newText) {
- 
          UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
          if (newIter == null) {
!             throw new InternalError("Could not create a new UCharacterIterator");
          }
          text = newIter;
          currentIndex=nextIndex=0;
          clearBuffer();
      }
  
-     //-------------------------------------------------------------------------
-     // Private utility methods
-     //-------------------------------------------------------------------------
- 
- 
-     /* backward iteration --------------------------------------------------- */
- 
-     /*
-      * read backwards and get norm32
-      * return 0 if the character is <minC
-      * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
-      * surrogate but read second!)
-      */
- 
-     private static  long getPrevNorm32(UCharacterIterator src,
-                                        int/*unsigned*/ minC,
-                                        int/*unsigned*/ mask,
-                                        char[] chars) {
-         long norm32;
-         int ch=0;
-         /* need src.hasPrevious() */
-         if((ch=src.previous()) == UCharacterIterator.DONE) {
-             return 0;
-         }
-         chars[0]=(char)ch;
-         chars[1]=0;
- 
-         /* check for a surrogate before getting norm32 to see if we need to
-          * predecrement further */
-         if(chars[0]<minC) {
-             return 0;
-         } else if(!UTF16.isSurrogate(chars[0])) {
-             return NormalizerImpl.getNorm32(chars[0]);
-         } else if(UTF16.isLeadSurrogate(chars[0]) || (src.getIndex()==0)) {
-             /* unpaired surrogate */
-             chars[1]=(char)src.current();
-             return 0;
-         } else if(UTF16.isLeadSurrogate(chars[1]=(char)src.previous())) {
-             norm32=NormalizerImpl.getNorm32(chars[1]);
-             if((norm32&mask)==0) {
-                 /* all surrogate pairs with this lead surrogate have irrelevant
-                  * data */
-                 return 0;
-             } else {
-                 /* norm32 must be a surrogate special */
-                 return NormalizerImpl.getNorm32FromSurrogatePair(norm32,chars[0]);
-             }
-         } else {
-             /* unpaired second surrogate, undo the c2=src.previous() movement */
-             src.moveIndex( 1);
-             return 0;
-         }
-     }
- 
-     private interface IsPrevBoundary{
-         public boolean isPrevBoundary(UCharacterIterator src,
-                                       int/*unsigned*/ minC,
-                                       int/*unsigned*/ mask,
-                                       char[] chars);
-     }
-     private static final class IsPrevNFDSafe implements IsPrevBoundary{
-         /*
-          * for NF*D:
-          * read backwards and check if the lead combining class is 0
-          * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
-          * surrogate but read second!)
-          */
-         public boolean isPrevBoundary(UCharacterIterator src,
-                                       int/*unsigned*/ minC,
-                                       int/*unsigned*/ ccOrQCMask,
-                                       char[] chars) {
- 
-             return NormalizerImpl.isNFDSafe(getPrevNorm32(src, minC,
-                                                           ccOrQCMask, chars),
-                                             ccOrQCMask,
-                                             ccOrQCMask& NormalizerImpl.QC_MASK);
-         }
-     }
- 
-     private static final class IsPrevTrueStarter implements IsPrevBoundary{
-         /*
-          * read backwards and check if the character is (or its decomposition
-          * begins with) a "true starter" (cc==0 and NF*C_YES)
-          * if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
-          * surrogate but read second!)
-          */
-         public boolean isPrevBoundary(UCharacterIterator src,
-                                       int/*unsigned*/ minC,
-                                       int/*unsigned*/ ccOrQCMask,
-                                       char[] chars) {
-             long norm32;
-             int/*unsigned*/ decompQCMask;
- 
-             decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
-             norm32=getPrevNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
-             return NormalizerImpl.isTrueStarter(norm32,ccOrQCMask,decompQCMask);
-         }
-     }
- 
-     private static int findPreviousIterationBoundary(UCharacterIterator src,
-                                                      IsPrevBoundary obj,
-                                                      int/*unsigned*/ minC,
-                                                      int/*mask*/ mask,
-                                                      char[] buffer,
-                                                      int[] startIndex) {
-         char[] chars=new char[2];
-         boolean isBoundary;
- 
-         /* fill the buffer from the end backwards */
-         startIndex[0] = buffer.length;
-         chars[0]=0;
-         while(src.getIndex()>0 && chars[0]!=UCharacterIterator.DONE) {
-             isBoundary=obj.isPrevBoundary(src, minC, mask, chars);
- 
-             /* always write this character to the front of the buffer */
-             /* make sure there is enough space in the buffer */
-             if(startIndex[0] < (chars[1]==0 ? 1 : 2)) {
- 
-                 // grow the buffer
-                 char[] newBuf = new char[buffer.length*2];
-                 /* move the current buffer contents up */
-                 System.arraycopy(buffer,startIndex[0],newBuf,
-                                  newBuf.length-(buffer.length-startIndex[0]),
-                                  buffer.length-startIndex[0]);
-                 //adjust the startIndex
-                 startIndex[0]+=newBuf.length-buffer.length;
- 
-                 buffer=newBuf;
-                 newBuf=null;
- 
-             }
- 
-             buffer[--startIndex[0]]=chars[0];
-             if(chars[1]!=0) {
-                 buffer[--startIndex[0]]=chars[1];
-             }
- 
-             /* stop if this just-copied character is a boundary */
-             if(isBoundary) {
-                 break;
-             }
-         }
- 
-         /* return the length of the buffer contents */
-         return buffer.length-startIndex[0];
-     }
- 
-     private static int previous(UCharacterIterator src,
-                                 char[] dest, int destStart, int destLimit,
-                                 Mode mode,
-                                 boolean doNormalize,
-                                 boolean[] pNeededToNormalize,
-                                 int options) {
- 
-         IsPrevBoundary isPreviousBoundary;
-         int destLength, bufferLength;
-         int/*unsigned*/ mask;
-         int c,c2;
- 
-         char minC;
-         int destCapacity = destLimit-destStart;
-         destLength=0;
- 
-         if(pNeededToNormalize!=null) {
-             pNeededToNormalize[0]=false;
-         }
-         minC = (char)mode.getMinC();
-         mask = mode.getMask();
-         isPreviousBoundary = mode.getPrevBoundary();
- 
-         if(isPreviousBoundary==null) {
-             destLength=0;
-             if((c=src.previous())>=0) {
-                 destLength=1;
-                 if(UTF16.isTrailSurrogate((char)c)) {
-                     c2= src.previous();
-                     if(c2!= UCharacterIterator.DONE) {
-                         if(UTF16.isLeadSurrogate((char)c2)) {
-                             if(destCapacity>=2) {
-                                 dest[1]=(char)c; // trail surrogate
-                                 destLength=2;
-                             }
-                             // lead surrogate to be written below
-                             c=c2;
-                         } else {
-                             src.moveIndex(1);
-                         }
-                     }
-                 }
- 
-                 if(destCapacity>0) {
-                     dest[0]=(char)c;
-                 }
-             }
-             return destLength;
-         }
- 
-         char[] buffer = new char[100];
-         int[] startIndex= new int[1];
-         bufferLength=findPreviousIterationBoundary(src,
-                                                    isPreviousBoundary,
-                                                    minC, mask,buffer,
-                                                    startIndex);
-         if(bufferLength>0) {
-             if(doNormalize) {
-                 destLength=NormalizerBase.normalize(buffer,startIndex[0],
-                                                 startIndex[0]+bufferLength,
-                                                 dest, destStart,destLimit,
-                                                 mode, options);
- 
-                 if(pNeededToNormalize!=null) {
-                     pNeededToNormalize[0]=destLength!=bufferLength ||
-                                           Utility.arrayRegionMatches(
-                                             buffer,0,dest,
-                                             destStart,destLimit
-                                           );
-                 }
-             } else {
-                 /* just copy the source characters */
-                 if(destCapacity>0) {
-                     System.arraycopy(buffer,startIndex[0],dest,0,
-                                      (bufferLength<destCapacity) ?
-                                      bufferLength : destCapacity
-                                      );
-                 }
-             }
-         }
- 
- 
-         return destLength;
-     }
- 
- 
- 
-     /* forward iteration ---------------------------------------------------- */
-     /*
-      * read forward and check if the character is a next-iteration boundary
-      * if c2!=0 then (c, c2) is a surrogate pair
-      */
-     private interface IsNextBoundary{
-         boolean isNextBoundary(UCharacterIterator src,
-                                int/*unsigned*/ minC,
-                                int/*unsigned*/ mask,
-                                int[] chars);
-     }
-     /*
-      * read forward and get norm32
-      * return 0 if the character is <minC
-      * if c2!=0 then (c2, c) is a surrogate pair
-      * always reads complete characters
-      */
-     private static long /*unsigned*/ getNextNorm32(UCharacterIterator src,
-                                                    int/*unsigned*/ minC,
-                                                    int/*unsigned*/ mask,
-                                                    int[] chars) {
-         long norm32;
- 
-         /* need src.hasNext() to be true */
-         chars[0]=src.next();
-         chars[1]=0;
- 
-         if(chars[0]<minC) {
-             return 0;
-         }
- 
-         norm32=NormalizerImpl.getNorm32((char)chars[0]);
-         if(UTF16.isLeadSurrogate((char)chars[0])) {
-             if(src.current()!=UCharacterIterator.DONE &&
-                UTF16.isTrailSurrogate((char)(chars[1]=src.current()))) {
-                 src.moveIndex(1); /* skip the c2 surrogate */
-                 if((norm32&mask)==0) {
-                     /* irrelevant data */
-                     return 0;
-                 } else {
-                     /* norm32 must be a surrogate special */
-                     return NormalizerImpl.getNorm32FromSurrogatePair(norm32,(char)chars[1]);
-                 }
-             } else {
-                 /* unmatched surrogate */
-                 return 0;
-             }
-         }
-         return norm32;
-     }
- 
- 
-     /*
-      * for NF*D:
-      * read forward and check if the lead combining class is 0
-      * if c2!=0 then (c, c2) is a surrogate pair
-      */
-     private static final class IsNextNFDSafe implements IsNextBoundary{
-         public boolean isNextBoundary(UCharacterIterator src,
-                                       int/*unsigned*/ minC,
-                                       int/*unsigned*/ ccOrQCMask,
-                                       int[] chars) {
-             return NormalizerImpl.isNFDSafe(getNextNorm32(src,minC,ccOrQCMask,chars),
-                                             ccOrQCMask, ccOrQCMask&NormalizerImpl.QC_MASK);
-         }
-     }
- 
-     /*
-      * for NF*C:
-      * read forward and check if the character is (or its decomposition begins
-      * with) a "true starter" (cc==0 and NF*C_YES)
-      * if c2!=0 then (c, c2) is a surrogate pair
-      */
-     private static final class IsNextTrueStarter implements IsNextBoundary{
-         public boolean isNextBoundary(UCharacterIterator src,
-                                       int/*unsigned*/ minC,
-                                       int/*unsigned*/ ccOrQCMask,
-                                       int[] chars) {
-             long norm32;
-             int/*unsigned*/ decompQCMask;
- 
-             decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
-             norm32=getNextNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
-             return NormalizerImpl.isTrueStarter(norm32, ccOrQCMask, decompQCMask);
-         }
-     }
- 
-     private static int findNextIterationBoundary(UCharacterIterator src,
-                                                  IsNextBoundary obj,
-                                                  int/*unsigned*/ minC,
-                                                  int/*unsigned*/ mask,
-                                                  char[] buffer) {
-         if(src.current()==UCharacterIterator.DONE) {
-             return 0;
-         }
- 
-         /* get one character and ignore its properties */
-         int[] chars = new int[2];
-         chars[0]=src.next();
-         buffer[0]=(char)chars[0];
-         int bufferIndex = 1;
- 
-         if(UTF16.isLeadSurrogate((char)chars[0])&&
-            src.current()!=UCharacterIterator.DONE) {
-             if(UTF16.isTrailSurrogate((char)(chars[1]=src.next()))) {
-                 buffer[bufferIndex++]=(char)chars[1];
-             } else {
-                 src.moveIndex(-1); /* back out the non-trail-surrogate */
-             }
-         }
- 
-         /* get all following characters until we see a boundary */
-         /* checking hasNext() instead of c!=DONE on the off-chance that U+ffff
-          * is part of the string */
-         while( src.current()!=UCharacterIterator.DONE) {
-             if(obj.isNextBoundary(src, minC, mask, chars)) {
-                 /* back out the latest movement to stop at the boundary */
-                 src.moveIndex(chars[1]==0 ? -1 : -2);
-                 break;
-             } else {
-                 if(bufferIndex+(chars[1]==0 ? 1 : 2)<=buffer.length) {
-                     buffer[bufferIndex++]=(char)chars[0];
-                     if(chars[1]!=0) {
-                         buffer[bufferIndex++]=(char)chars[1];
-                     }
-                 } else {
-                     char[] newBuf = new char[buffer.length*2];
-                     System.arraycopy(buffer,0,newBuf,0,bufferIndex);
-                     buffer = newBuf;
-                     buffer[bufferIndex++]=(char)chars[0];
-                     if(chars[1]!=0) {
-                         buffer[bufferIndex++]=(char)chars[1];
-                     }
-                 }
-             }
-         }
- 
-         /* return the length of the buffer contents */
-         return bufferIndex;
-     }
- 
-     private static int next(UCharacterIterator src,
-                             char[] dest, int destStart, int destLimit,
-                             NormalizerBase.Mode mode,
-                             boolean doNormalize,
-                             boolean[] pNeededToNormalize,
-                             int options) {
- 
-         IsNextBoundary isNextBoundary;
-         int /*unsigned*/ mask;
-         int /*unsigned*/ bufferLength;
-         int c,c2;
-         char minC;
-         int destCapacity = destLimit - destStart;
-         int destLength = 0;
-         if(pNeededToNormalize!=null) {
-             pNeededToNormalize[0]=false;
-         }
- 
-         minC = (char)mode.getMinC();
-         mask = mode.getMask();
-         isNextBoundary = mode.getNextBoundary();
- 
-         if(isNextBoundary==null) {
-             destLength=0;
-             c=src.next();
-             if(c!=UCharacterIterator.DONE) {
-                 destLength=1;
-                 if(UTF16.isLeadSurrogate((char)c)) {
-                     c2= src.next();
-                     if(c2!= UCharacterIterator.DONE) {
-                         if(UTF16.isTrailSurrogate((char)c2)) {
-                             if(destCapacity>=2) {
-                                 dest[1]=(char)c2; // trail surrogate
-                                 destLength=2;
-                             }
-                             // lead surrogate to be written below
-                         } else {
-                             src.moveIndex(-1);
-                         }
-                     }
-                 }
- 
-                 if(destCapacity>0) {
-                     dest[0]=(char)c;
-                 }
-             }
-             return destLength;
-         }
- 
-         char[] buffer=new char[100];
-         int[] startIndex = new int[1];
-         bufferLength=findNextIterationBoundary(src,isNextBoundary, minC, mask,
-                                                buffer);
-         if(bufferLength>0) {
-             if(doNormalize) {
-                 destLength=mode.normalize(buffer,startIndex[0],bufferLength,
-                                           dest,destStart,destLimit, options);
- 
-                 if(pNeededToNormalize!=null) {
-                     pNeededToNormalize[0]=destLength!=bufferLength ||
-                                           Utility.arrayRegionMatches(buffer,startIndex[0],
-                                             dest,destStart,
-                                             destLength);
-                 }
-             } else {
-                 /* just copy the source characters */
-                 if(destCapacity>0) {
-                     System.arraycopy(buffer,0,dest,destStart,
-                                      Math.min(bufferLength,destCapacity)
-                                      );
-                 }
- 
- 
-             }
-         }
-         return destLength;
-     }
- 
      private void clearBuffer() {
!         bufferLimit=bufferStart=bufferPos=0;
      }
  
      private boolean nextNormalize() {
- 
          clearBuffer();
          currentIndex=nextIndex;
          text.setIndex(nextIndex);
! 
!         bufferLimit=next(text,buffer,bufferStart,buffer.length,mode,true,null,options);
! 
          nextIndex=text.getIndex();
!         return (bufferLimit>0);
      }
  
      private boolean previousNormalize() {
- 
          clearBuffer();
          nextIndex=currentIndex;
          text.setIndex(currentIndex);
!         bufferLimit=previous(text,buffer,bufferStart,buffer.length,mode,true,null,options);
! 
!         currentIndex=text.getIndex();
!         bufferPos = bufferLimit;
!         return bufferLimit>0;
!     }
! 
!     private int getCodePointAt(int index) {
!         if( UTF16.isSurrogate(buffer[index])) {
!             if(UTF16.isLeadSurrogate(buffer[index])) {
!                 if((index+1)<bufferLimit &&
!                    UTF16.isTrailSurrogate(buffer[index+1])) {
!                     return UCharacterProperty.getRawSupplementary(
!                                                                   buffer[index],
!                                                                   buffer[index+1]
!                                                                   );
!                 }
!             }else if(UTF16.isTrailSurrogate(buffer[index])) {
!                 if(index>0 && UTF16.isLeadSurrogate(buffer[index-1])) {
!                     return UCharacterProperty.getRawSupplementary(
!                                                                   buffer[index-1],
!                                                                   buffer[index]
!                                                                   );
!                 }
!             }
!         }
!         return buffer[index];
! 
!     }
! 
!     /**
!      * Internal API
!      * @internal
!      */
!     public static boolean isNFSkippable(int c, Mode mode) {
!         return mode.isNFSkippable(c);
!     }
! 
!     //
!     // Options
!     //
! 
!     /*
!      * Default option for Unicode 3.2.0 normalization.
!      * Corrigendum 4 was fixed in Unicode 3.2.0 but isn't supported in
!      * IDNA/StringPrep.
!      * The public review issue #29 was fixed in Unicode 4.1.0. Corrigendum 5
!      * allowed Unicode 3.2 to 4.0.1 to apply the fix for PRI #29, but it isn't
!      * supported by IDNA/StringPrep as well as Corrigendum 4.
!      */
!     public static final int UNICODE_3_2_0_ORIGINAL =
!                                UNICODE_3_2 |
!                                NormalizerImpl.WITHOUT_CORRIGENDUM4_CORRECTIONS |
!                                NormalizerImpl.BEFORE_PRI_29;
! 
!     /*
!      * Default option for the latest Unicode normalization. This option is
!      * provided mainly for testing.
!      * The value zero means that normalization is done with the fixes for
!      *   - Corrigendum 4 (Five CJK Canonical Mapping Errors)
!      *   - Corrigendum 5 (Normalization Idempotency)
!      */
!     public static final int UNICODE_LATEST = 0x00;
! 
!     //
!     // public constructor and methods for java.text.Normalizer and
!     // sun.text.Normalizer
!     //
! 
!     /**
!      * Creates a new {@code Normalizer} object for iterating over the
!      * normalized form of a given string.
!      *
!      * @param str  The string to be normalized.  The normalization
!      *              will start at the beginning of the string.
!      *
!      * @param mode The normalization mode.
!      */
!     public NormalizerBase(String str, Mode mode) {
!           this(str, mode, UNICODE_LATEST);
!     }
! 
!     /**
!      * Normalizes a <code>String</code> using the given normalization form.
!      *
!      * @param str      the input string to be normalized.
!      * @param form     the normalization form
!      */
!     public static String normalize(String str, Normalizer.Form form) {
!         return normalize(str, form, UNICODE_LATEST);
!     }
! 
!     /**
!      * Normalizes a <code>String</code> using the given normalization form.
!      *
!      * @param str      the input string to be normalized.
!      * @param form     the normalization form
!      * @param options   the optional features to be enabled.
!      */
!     public static String normalize(String str, Normalizer.Form form, int options) {
!         int len = str.length();
!         boolean asciiOnly = true;
!         if (len < 80) {
!             for (int i = 0; i < len; i++) {
!                 if (str.charAt(i) > 127) {
!                     asciiOnly = false;
!                     break;
!                 }
!             }
          } else {
!             char[] a = str.toCharArray();
!             for (int i = 0; i < len; i++) {
!                 if (a[i] > 127) {
!                     asciiOnly = false;
!                     break;
                  }
              }
          }
! 
!         switch (form) {
!         case NFC :
!             return asciiOnly ? str : NFC.normalize(str, options);
!         case NFD :
!             return asciiOnly ? str : NFD.normalize(str, options);
!         case NFKC :
!             return asciiOnly ? str : NFKC.normalize(str, options);
!         case NFKD :
!             return asciiOnly ? str : NFKD.normalize(str, options);
!         }
! 
!         throw new IllegalArgumentException("Unexpected normalization form: " +
!                                            form);
!     }
! 
!     /**
!      * Test if a string is in a given normalization form.
!      * This is semantically equivalent to source.equals(normalize(source, mode)).
!      *
!      * Unlike quickCheck(), this function returns a definitive result,
!      * never a "maybe".
!      * For NFD, NFKD, and FCD, both functions work exactly the same.
!      * For NFC and NFKC where quickCheck may return "maybe", this function will
!      * perform further tests to arrive at a true/false result.
!      * @param str       the input string to be checked to see if it is normalized
!      * @param form      the normalization form
!      */
!     public static boolean isNormalized(String str, Normalizer.Form form) {
!         return isNormalized(str, form, UNICODE_LATEST);
!     }
! 
!     /**
!      * Test if a string is in a given normalization form.
!      * This is semantically equivalent to source.equals(normalize(source, mode)).
!      *
!      * Unlike quickCheck(), this function returns a definitive result,
!      * never a "maybe".
!      * For NFD, NFKD, and FCD, both functions work exactly the same.
!      * For NFC and NFKC where quickCheck may return "maybe", this function will
!      * perform further tests to arrive at a true/false result.
!      * @param str       the input string to be checked to see if it is normalized
!      * @param form      the normalization form
!      * @param options   the optional features to be enabled.
!      */
!     public static boolean isNormalized(String str, Normalizer.Form form, int options) {
!         switch (form) {
!         case NFC:
!             return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
!         case NFD:
!             return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
!         case NFKC:
!             return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
!         case NFKD:
!             return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
          }
  
-         throw new IllegalArgumentException("Unexpected normalization form: " +
-                                            form);
-     }
  }
--- 635,782 ----
       * {@code setIndex} and {@link #getIndex}.
       * @return The current iteration position
       * @stable ICU 2.8
       */
      public int getIndex() {
!         if(bufferPos<buffer.length()) {
              return currentIndex;
          } else {
              return nextIndex;
          }
      }
  
      /**
       * Retrieve the index of the end of the input text.  This is the end index
       * of the {@code CharacterIterator} or the length of the {@code String}
!      * over which this {@code NormalizerBase} is iterating
       * @return The current iteration position
       * @stable ICU 2.8
       */
      public int endIndex() {
          return text.getLength();
      }
  
      //-------------------------------------------------------------------------
!     // Iterator attributes
      //-------------------------------------------------------------------------
      /**
       * Set the normalization mode for this object.
       * <p>
       * <b>Note:</b>If the normalization mode is changed while iterating
       * over a string, calls to {@link #next} and {@link #previous} may
       * return previously buffers characters in the old normalization mode
       * until the iteration is able to re-sync at the next base character.
       * It is safest to call {@link #setText setText()}, {@link #first},
       * {@link #last}, etc. after calling {@code setMode}.
!      * <p>
!      * @param newMode the new mode for this {@code NormalizerBase}.
       * The supported modes are:
       * <ul>
!      *  <li>{@link #NFC}    - Unicode canonical decompositiion
       *                        followed by canonical composition.
!      *  <li>{@link #NFKC}   - Unicode compatibility decompositiion
       *                        follwed by canonical composition.
!      *  <li>{@link #NFD}    - Unicode canonical decomposition
!      *  <li>{@link #NFKD}   - Unicode compatibility decomposition.
!      *  <li>{@link #NONE}   - Do nothing but return characters
       *                        from the underlying input text.
       * </ul>
       *
       * @see #getMode
       * @stable ICU 2.8
       */
      public void setMode(Mode newMode) {
          mode = newMode;
+         norm2 = mode.getNormalizer2(options);
      }
+ 
      /**
!      * Return the basic operation performed by this {@code NormalizerBase}
       *
       * @see #setMode
       * @stable ICU 2.8
       */
      public Mode getMode() {
          return mode;
      }
  
      /**
!      * Set the input text over which this {@code NormalizerBase} will iterate.
       * The iteration position is set to the beginning of the input text.
       * @param newText   The new string to be normalized.
       * @stable ICU 2.8
       */
      public void setText(String newText) {
          UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
          if (newIter == null) {
!             throw new IllegalStateException("Could not create a new UCharacterIterator");
          }
          text = newIter;
          reset();
      }
  
      /**
!      * Set the input text over which this {@code NormalizerBase} will iterate.
       * The iteration position is set to the beginning of the input text.
       * @param newText   The new string to be normalized.
       * @stable ICU 2.8
       */
      public void setText(CharacterIterator newText) {
          UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
          if (newIter == null) {
!             throw new IllegalStateException("Could not create a new UCharacterIterator");
          }
          text = newIter;
          currentIndex=nextIndex=0;
          clearBuffer();
      }
  
      private void clearBuffer() {
!         buffer.setLength(0);
!         bufferPos=0;
      }
  
      private boolean nextNormalize() {
          clearBuffer();
          currentIndex=nextIndex;
          text.setIndex(nextIndex);
!         // Skip at least one character so we make progress.
!         int c=text.nextCodePoint();
!         if(c<0) {
!             return false;
!         }
!         StringBuilder segment=new StringBuilder().appendCodePoint(c);
!         while((c=text.nextCodePoint())>=0) {
!             if(norm2.hasBoundaryBefore(c)) {
!                 text.moveCodePointIndex(-1);
!                 break;
!             }
!             segment.appendCodePoint(c);
!         }
          nextIndex=text.getIndex();
!         norm2.normalize(segment, buffer);
!         return buffer.length()!=0;
      }
  
      private boolean previousNormalize() {
          clearBuffer();
          nextIndex=currentIndex;
          text.setIndex(currentIndex);
!         StringBuilder segment=new StringBuilder();
!         int c;
!         while((c=text.previousCodePoint())>=0) {
!             if(c<=0xffff) {
!                 segment.insert(0, (char)c);
              } else {
!                 segment.insert(0, Character.toChars(c));
              }
+             if(norm2.hasBoundaryBefore(c)) {
+                 break;
              }
          }
!         currentIndex=text.getIndex();
!         norm2.normalize(segment, buffer);
!         bufferPos=buffer.length();
!         return buffer.length()!=0;
      }
  
  }
< prev index next >