open Cdiff src/java.base/share/classes/sun/text/normalizer/NormalizerImpl.java

src/java.base/share/classes/sun/text/normalizer/NormalizerImpl.java


*** 1,7 ****
  /*
!  * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.  Oracle designates this
--- 1,7 ----
  /*
!  * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.  Oracle designates this
*** 27,46 ****
   *******************************************************************************
   *   Copyright (C) 2009-2014, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   *******************************************************************************
   */
- 
  package sun.text.normalizer;
  
  import java.io.IOException;
  import java.nio.ByteBuffer;
  import java.text.Normalizer;
  
  // Original filename in ICU4J: Normalizer2Impl.java
  public final class NormalizerImpl {
- 
      public static final class Hangul {
          /* Korean Hangul and Jamo constants */
          public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
          public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
          public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */
--- 27,44 ----
*** 56,69 ****
          public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;
  
          public static boolean isHangul(int c) {
              return HANGUL_BASE<=c && c<HANGUL_LIMIT;
          }
! 
!         public static boolean isHangulWithoutJamoT(char c) {
              c-=HANGUL_BASE;
!             return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
          }
  
          /**
           * Decomposes c, which must be a Hangul syllable, into buffer
           * and returns the length of the decomposition (2 or 3).
--- 54,66 ----
          public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;
  
          public static boolean isHangul(int c) {
              return HANGUL_BASE<=c && c<HANGUL_LIMIT;
          }
!         public static boolean isHangulLV(int c) {
              c-=HANGUL_BASE;
!             return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
          }
          
                  /**
           * Decomposes c, which must be a Hangul syllable, into buffer
           * and returns the length of the decomposition (2 or 3).
*** 100,113 ****
       */
      public static final class ReorderingBuffer implements Appendable {
          public ReorderingBuffer(NormalizerImpl ni, Appendable dest, int destCapacity) {
              impl=ni;
              app=dest;
!             if (app instanceof StringBuilder) {
                  appIsStringBuilder=true;
                  str=(StringBuilder)dest;
!                 // In Java, the constructor subsumes public void init(int destCapacity)
                  str.ensureCapacity(destCapacity);
                  reorderStart=0;
                  if(str.length()==0) {
                      lastCC=0;
                  } else {
--- 97,110 ----
       */
      public static final class ReorderingBuffer implements Appendable {
          public ReorderingBuffer(NormalizerImpl ni, Appendable dest, int destCapacity) {
              impl=ni;
              app=dest;
!             if(app instanceof StringBuilder) {
                  appIsStringBuilder=true;
                  str=(StringBuilder)dest;
!                 // In Java, the constructor subsumes public void init(int destCapacity) {
                  str.ensureCapacity(destCapacity);
                  reorderStart=0;
                  if(str.length()==0) {
                      lastCC=0;
                  } else {
*** 135,149 ****
  
          public boolean equals(CharSequence s, int start, int limit) {
              return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
          }
  
-         // For Hangul composition, replacing the Leading consonant Jamo with the syllable.
-         public void setLastChar(char c) {
-             str.setCharAt(str.length()-1, c);
-         }
- 
          public void append(int c, int cc) {
              if(lastCC<=cc || cc==0) {
                  str.appendCodePoint(c);
                  lastCC=cc;
                  if(cc<=1) {
--- 132,141 ----
*** 151,161 ****
                  }
              } else {
                  insert(c, cc);
              }
          }
- 
          // s must be in NFD, otherwise change the implementation.
          public void append(CharSequence s, int start, int limit,
                             int leadCC, int trailCC) {
              if(start==limit) {
                  return;
--- 143,152 ----
*** 183,230 ****
                      }
                      append(c, leadCC);
                  }
              }
          }
- 
          // The following append() methods work like C++ appendZeroCC().
          // They assume that the cc or trailCC of their input is 0.
          // Most of them implement Appendable interface methods.
!         // @Override when we switch to Java 6
          public ReorderingBuffer append(char c) {
              str.append(c);
              lastCC=0;
              reorderStart=str.length();
              return this;
          }
- 
          public void appendZeroCC(int c) {
              str.appendCodePoint(c);
              lastCC=0;
              reorderStart=str.length();
          }
! 
!         // @Override when we switch to Java 6
          public ReorderingBuffer append(CharSequence s) {
              if(s.length()!=0) {
                  str.append(s);
                  lastCC=0;
                  reorderStart=str.length();
              }
              return this;
          }
! 
!         // @Override when we switch to Java 6
          public ReorderingBuffer append(CharSequence s, int start, int limit) {
              if(start!=limit) {
                  str.append(s, start, limit);
                  lastCC=0;
                  reorderStart=str.length();
              }
              return this;
          }
- 
          /**
           * Flushes from the intermediate StringBuilder to the Appendable,
           * if they are different objects.
           * Used after recomposition.
           * Must be called at the end when writing to a non-StringBuilder Appendable.
--- 174,216 ----
                      }
                      append(c, leadCC);
                  }
              }
          }
          // The following append() methods work like C++ appendZeroCC().
          // They assume that the cc or trailCC of their input is 0.
          // Most of them implement Appendable interface methods.
!         @Override
          public ReorderingBuffer append(char c) {
              str.append(c);
              lastCC=0;
              reorderStart=str.length();
              return this;
          }
          public void appendZeroCC(int c) {
              str.appendCodePoint(c);
              lastCC=0;
              reorderStart=str.length();
          }
!         @Override
          public ReorderingBuffer append(CharSequence s) {
              if(s.length()!=0) {
                  str.append(s);
                  lastCC=0;
                  reorderStart=str.length();
              }
              return this;
          }
!         @Override
          public ReorderingBuffer append(CharSequence s, int start, int limit) {
              if(start!=limit) {
                  str.append(s, start, limit);
                  lastCC=0;
                  reorderStart=str.length();
              }
              return this;
          }
          /**
           * Flushes from the intermediate StringBuilder to the Appendable,
           * if they are different objects.
           * Used after recomposition.
           * Must be called at the end when writing to a non-StringBuilder Appendable.
*** 241,251 ****
                      throw new InternalError(e);  // Avoid declaring "throws IOException".
                  }
              }
              lastCC=0;
          }
- 
          /**
           * Flushes from the intermediate StringBuilder to the Appendable,
           * if they are different objects.
           * Then appends the new text to the Appendable or StringBuilder.
           * Normally used after quick check loops find a non-empty sequence.
--- 227,236 ----
*** 264,280 ****
                  }
              }
              lastCC=0;
              return this;
          }
- 
          public void remove() {
              str.setLength(0);
              lastCC=0;
              reorderStart=0;
          }
- 
          public void removeSuffix(int suffixLength) {
              int oldLength=str.length();
              str.delete(oldLength-suffixLength, oldLength);
              lastCC=0;
              reorderStart=str.length();
--- 249,263 ----
*** 316,331 ****
              if(reorderStart>=codePointStart) {
                  return 0;
              }
              int c=str.codePointBefore(codePointStart);
              codePointStart-=Character.charCount(c);
!             if(c<MIN_CCC_LCCC_CP) {
!                 return 0;
!             }
!             return getCCFromYesOrMaybe(impl.getNorm16(c));
          }
- 
          private int codePointStart, codePointLimit;
      }
  
      // TODO: Propose as public API on the UTF16 class.
      // TODO: Propose widening UTF16 methods that take char to take int.
--- 299,310 ----
              if(reorderStart>=codePointStart) {
                  return 0;
              }
              int c=str.codePointBefore(codePointStart);
              codePointStart-=Character.charCount(c);
!             return impl.getCCFromYesOrMaybeCP(c);
          }
          private int codePointStart, codePointLimit;
      }
  
      // TODO: Propose as public API on the UTF16 class.
      // TODO: Propose widening UTF16 methods that take char to take int.
*** 368,415 ****
      }
  
      public NormalizerImpl() {}
  
      private static final class IsAcceptable implements ICUBinary.Authenticate {
-         // @Override when we switch to Java 6
          public boolean isDataVersionAcceptable(byte version[]) {
!             return version[0]==2;
          }
      }
- 
      private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
      private static final int DATA_FORMAT = 0x4e726d32;  // "Nrm2"
  
      public NormalizerImpl load(ByteBuffer bytes) {
          try {
              dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
              int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
!             if(indexesLength<=IX_MIN_MAYBE_YES) {
!                 throw new IOException("Normalizer2 data: not enough indexes");
              }
              int[] inIndexes=new int[indexesLength];
              inIndexes[0]=indexesLength*4;
              for(int i=1; i<indexesLength; ++i) {
                  inIndexes[i]=bytes.getInt();
              }
  
              minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
              minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
  
              minYesNo=inIndexes[IX_MIN_YES_NO];
              minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
              minNoNo=inIndexes[IX_MIN_NO_NO];
              limitNoNo=inIndexes[IX_LIMIT_NO_NO];
              minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
  
              // Read the normTrie.
              int offset=inIndexes[IX_NORM_TRIE_OFFSET];
              int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
              normTrie=Trie2_16.createFromSerialized(bytes);
              int trieLength=normTrie.getSerializedLength();
              if(trieLength>(nextOffset-offset)) {
!                 throw new IOException("Normalizer2 data: not enough bytes for normTrie");
              }
              ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength);  // skip padding after trie bytes
  
              // Read the composition and mapping data.
              offset=nextOffset;
--- 347,398 ----
      }
  
      public NormalizerImpl() {}
  
      private static final class IsAcceptable implements ICUBinary.Authenticate {
          public boolean isDataVersionAcceptable(byte version[]) {
!             return version[0]==3;
          }
      }
      private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
      private static final int DATA_FORMAT = 0x4e726d32;  // "Nrm2"
  
      public NormalizerImpl load(ByteBuffer bytes) {
          try {
              dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
              int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
!             if(indexesLength<=IX_MIN_LCCC_CP) {
!                 throw new InternalError("Normalizer2 data: not enough indexes");
              }
              int[] inIndexes=new int[indexesLength];
              inIndexes[0]=indexesLength*4;
              for(int i=1; i<indexesLength; ++i) {
                  inIndexes[i]=bytes.getInt();
              }
  
              minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
              minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
+             minLcccCP=inIndexes[IX_MIN_LCCC_CP];
  
              minYesNo=inIndexes[IX_MIN_YES_NO];
              minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
              minNoNo=inIndexes[IX_MIN_NO_NO];
+             minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
+             minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
+             minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY];
              limitNoNo=inIndexes[IX_LIMIT_NO_NO];
              minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
+             assert((minMaybeYes&7)==0);  // 8-aligned for noNoDelta bit fields
+             centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1;
  
              // Read the normTrie.
              int offset=inIndexes[IX_NORM_TRIE_OFFSET];
              int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
              normTrie=Trie2_16.createFromSerialized(bytes);
              int trieLength=normTrie.getSerializedLength();
              if(trieLength>(nextOffset-offset)) {
!                 throw new InternalError("Normalizer2 data: not enough bytes for normTrie");
              }
              ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength);  // skip padding after trie bytes
  
              // Read the composition and mapping data.
              offset=nextOffset;
*** 420,606 ****
                  chars=new char[numChars];
                  for(int i=0; i<numChars; ++i) {
                      chars[i]=bytes.getChar();
                  }
                  maybeYesCompositions=new String(chars);
!                 extraData=maybeYesCompositions.substring(MIN_NORMAL_MAYBE_YES-minMaybeYes);
              }
  
              // smallFCD: new in formatVersion 2
              offset=nextOffset;
              smallFCD=new byte[0x100];
!             for(int i=0; i<0x100; ++i) {
!                 smallFCD[i]=bytes.get();
!             }
! 
!             // Build tccc180[].
!             // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
!             tccc180=new int[0x180];
!             int bits=0;
!             for(int c=0; c<0x180; bits>>=1) {
!                 if((c&0xff)==0) {
!                     bits=smallFCD[c>>8];  // one byte per 0x100 code points
!                 }
!                 if((bits&1)!=0) {
!                     for(int i=0; i<0x20; ++i, ++c) {
!                         tccc180[c]=getFCD16FromNormData(c)&0xff;
!                     }
!                 } else {
!                     c+=0x20;
!                 }
!             }
  
              return this;
          } catch(IOException e) {
              throw new InternalError(e);
          }
      }
- 
      public NormalizerImpl load(String name) {
          return load(ICUBinary.getRequiredData(name));
      }
  
-     public int getNorm16(int c) {
-         return normTrie.get(c);
-     }
  
      public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
  
      public int getCC(int norm16) {
          if(norm16>=MIN_NORMAL_MAYBE_YES) {
!             return norm16&0xff;
          }
          if(norm16<minNoNo || limitNoNo<=norm16) {
              return 0;
          }
          return getCCFromNoNo(norm16);
      }
! 
      public static int getCCFromYesOrMaybe(int norm16) {
!         return norm16>=MIN_NORMAL_MAYBE_YES ? norm16&0xff : 0;
      }
  
      /**
       * Returns the FCD data for code point c.
       * @param c A Unicode code point.
       * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
       */
      public int getFCD16(int c) {
!         if(c<0) {
              return 0;
-         } else if(c<0x180) {
-             return tccc180[c];
          } else if(c<=0xffff) {
              if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
          }
          return getFCD16FromNormData(c);
      }
- 
-     /** Returns the FCD data for U+0000<=c<U+0180. */
-     public int getFCD16FromBelow180(int c) { return tccc180[c]; }
      /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
      public boolean singleLeadMightHaveNonZeroFCD16(int lead) {
          // 0<=lead<=0xffff
          byte bits=smallFCD[lead>>8];
          if(bits==0) { return false; }
          return ((bits>>((lead>>5)&7))&1)!=0;
      }
  
      /** Gets the FCD value from the regular normalization data. */
      public int getFCD16FromNormData(int c) {
-         // Only loops for 1:1 algorithmic mappings.
-         for(;;) {
              int norm16=getNorm16(c);
!             if(norm16<=minYesNo) {
!                 // no decomposition or Hangul syllable, all zeros
!                 return 0;
!             } else if(norm16>=MIN_NORMAL_MAYBE_YES) {
                  // combining mark
!                 norm16&=0xff;
                  return norm16|(norm16<<8);
              } else if(norm16>=minMaybeYes) {
                  return 0;
!             } else if(isDecompNoAlgorithmic(norm16)) {
                  c=mapAlgorithmic(c, norm16);
!             } else {
                  // c decomposes, get everything from the variable-length extra data
!                 int firstUnit=extraData.charAt(norm16);
!                 if((firstUnit&MAPPING_LENGTH_MASK)==0) {
!                     // A character that is deleted (maps to an empty string) must
!                     // get the worst-case lccc and tccc values because arbitrary
!                     // characters on both sides will become adjacent.
!                     return 0x1ff;
!                 } else {
                      int fcd16=firstUnit>>8;  // tccc
                      if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
!                         fcd16|=extraData.charAt(norm16-1)&0xff00;  // lccc
                      }
                      return fcd16;
                  }
-             }
-         }
-     }
  
      /**
       * Gets the decomposition for one code point.
       * @param c code point
       * @return c's decomposition, if it has one; returns null if it does not have a decomposition
       */
      public String getDecomposition(int c) {
-         int decomp=-1;
          int norm16;
!         for(;;) {
!             if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
                  // c does not decompose
!             } else if(isHangul(norm16)) {
!                 // Hangul syllable: decompose algorithmically
!                 StringBuilder buffer=new StringBuilder();
!                 Hangul.decompose(c, buffer);
!                 return buffer.toString();
!             } else if(isDecompNoAlgorithmic(norm16)) {
                  decomp=c=mapAlgorithmic(c, norm16);
!                 continue;
!             } else {
!                 // c decomposes, get everything from the variable-length extra data
!                 int length=extraData.charAt(norm16++)&MAPPING_LENGTH_MASK;
!                 return extraData.substring(norm16, norm16+length);
              }
              if(decomp<0) {
                  return null;
              } else {
                  return UTF16.valueOf(decomp);
              }
          }
!     }
! 
!     public static final int MIN_CCC_LCCC_CP=0x300;
  
-     public static final int MIN_YES_YES_WITH_CC=0xff01;
-     public static final int JAMO_VT=0xff00;
-     public static final int MIN_NORMAL_MAYBE_YES=0xfe00;
      public static final int MAX_DELTA=0x40;
  
      // Byte offsets from the start of the data, after the generic header.
      public static final int IX_NORM_TRIE_OFFSET=0;
      public static final int IX_EXTRA_DATA_OFFSET=1;
      public static final int IX_SMALL_FCD_OFFSET=2;
! 
      // Code point thresholds for quick check codes.
      public static final int IX_MIN_DECOMP_NO_CP=8;
      public static final int IX_MIN_COMP_NO_MAYBE_CP=9;
  
      // Norm16 value thresholds for quick check combinations and types of extra data.
!     // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
      public static final int IX_MIN_YES_NO=10;
      public static final int IX_MIN_NO_NO=11;
      public static final int IX_LIMIT_NO_NO=12;
      public static final int IX_MIN_MAYBE_YES=13;
  
!     // Mappings only in [minYesNoMappingsOnly..minNoNo[.
      public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14;
  
      public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
      public static final int MAPPING_LENGTH_MASK=0x1f;
  
      public static final int COMP_1_LAST_TUPLE=0x8000;
      public static final int COMP_1_TRIPLE=1;
      public static final int COMP_1_TRAIL_LIMIT=0x3400;
--- 403,602 ----
                  chars=new char[numChars];
                  for(int i=0; i<numChars; ++i) {
                      chars[i]=bytes.getChar();
                  }
                  maybeYesCompositions=new String(chars);
!                 extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
              }
  
              // smallFCD: new in formatVersion 2
              offset=nextOffset;
              smallFCD=new byte[0x100];
!             bytes.get(smallFCD);
  
              return this;
          } catch(IOException e) {
              throw new InternalError(e);
          }
      }
      public NormalizerImpl load(String name) {
          return load(ICUBinary.getRequiredData(name));
      }
  
         
+     public int getNorm16(int c) { return normTrie.get(c); }
+     public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
+     public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
      public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
  
      public int getCC(int norm16) {
          if(norm16>=MIN_NORMAL_MAYBE_YES) {
!             return getCCFromNormalYesOrMaybe(norm16);
          }
          if(norm16<minNoNo || limitNoNo<=norm16) {
              return 0;
          }
          return getCCFromNoNo(norm16);
      }
!     public static int getCCFromNormalYesOrMaybe(int norm16) {
!         return (norm16 >> OFFSET_SHIFT) & 0xff;
!     }
      public static int getCCFromYesOrMaybe(int norm16) {
!         return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
!     }
!     public int getCCFromYesOrMaybeCP(int c) {
!         if (c < minCompNoMaybeCP) { return 0; }
!         return getCCFromYesOrMaybe(getNorm16(c));
      }
  
      /**
       * Returns the FCD data for code point c.
       * @param c A Unicode code point.
       * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
       */
      public int getFCD16(int c) {
!         if(c<minDecompNoCP) {
              return 0;
          } else if(c<=0xffff) {
              if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
          }
          return getFCD16FromNormData(c);
      }
      /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
      public boolean singleLeadMightHaveNonZeroFCD16(int lead) {
          // 0<=lead<=0xffff
          byte bits=smallFCD[lead>>8];
          if(bits==0) { return false; }
          return ((bits>>((lead>>5)&7))&1)!=0;
      }
  
      /** Gets the FCD value from the regular normalization data. */
      public int getFCD16FromNormData(int c) {
          int norm16=getNorm16(c);
!         if (norm16 >= limitNoNo) {
!             if(norm16>=MIN_NORMAL_MAYBE_YES) {
                  // combining mark
!                 norm16=getCCFromNormalYesOrMaybe(norm16);
                  return norm16|(norm16<<8);
              } else if(norm16>=minMaybeYes) {
                  return 0;
!             } else {  // isDecompNoAlgorithmic(norm16)
!                 int deltaTrailCC = norm16 & DELTA_TCCC_MASK;
!                 if (deltaTrailCC <= DELTA_TCCC_1) {
!                     return deltaTrailCC >> OFFSET_SHIFT;
!                 }
!                 // Maps to an isCompYesAndZeroCC.
                  c=mapAlgorithmic(c, norm16);
!                 norm16=getNorm16(c);
!             }
!         }
!         if(norm16<=minYesNo || isHangulLVT(norm16)) {
!             // no decomposition or Hangul syllable, all zeros
!             return 0;
!         }
          // c decomposes, get everything from the variable-length extra data
!         int mapping=norm16>>OFFSET_SHIFT;
!         int firstUnit=extraData.charAt(mapping);
          int fcd16=firstUnit>>8;  // tccc
          if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
!             fcd16|=extraData.charAt(mapping-1)&0xff00;  // lccc
          }
          return fcd16;
      }
  
      /**
       * Gets the decomposition for one code point.
       * @param c code point
       * @return c's decomposition, if it has one; returns null if it does not have a decomposition
       */
      public String getDecomposition(int c) {
          int norm16;
!         if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
              // c does not decompose
!             return null;
!         }
!         int decomp = -1;
!         if(isDecompNoAlgorithmic(norm16)) {
!             // Maps to an isCompYesAndZeroCC.
              decomp=c=mapAlgorithmic(c, norm16);
!             // The mapping might decompose further.
!             norm16 = getNorm16(c);
          }
+         if (norm16 < minYesNo) {
              if(decomp<0) {
                  return null;
              } else {
                  return UTF16.valueOf(decomp);
              }
+         } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
+             // Hangul syllable: decompose algorithmically
+             StringBuilder buffer=new StringBuilder();
+             Hangul.decompose(c, buffer);
+             return buffer.toString();
          }
!         // c decomposes, get everything from the variable-length extra data
!         int mapping=norm16>>OFFSET_SHIFT;
!         int length=extraData.charAt(mapping++)&MAPPING_LENGTH_MASK;
!         return extraData.substring(mapping, mapping+length);
!     }
!         
!     // Fixed norm16 values.
!     public static final int MIN_YES_YES_WITH_CC=0xfe02;
!     public static final int JAMO_VT=0xfe00;
!     public static final int MIN_NORMAL_MAYBE_YES=0xfc00;
!     public static final int JAMO_L=2;  // offset=1 hasCompBoundaryAfter=FALSE
!     public static final int INERT=1;  // offset=0 hasCompBoundaryAfter=TRUE
! 
!     // norm16 bit 0 is comp-boundary-after.
!     public static final int HAS_COMP_BOUNDARY_AFTER=1;
!     public static final int OFFSET_SHIFT=1;
! 
!     // For algorithmic one-way mappings, norm16 bits 2..1 indicate the
!     // tccc (0, 1, >1) for quick FCC boundary-after tests.
!     public static final int DELTA_TCCC_0=0;
!     public static final int DELTA_TCCC_1=2;
!     public static final int DELTA_TCCC_GT_1=4;
!     public static final int DELTA_TCCC_MASK=6;
!     public static final int DELTA_SHIFT=3;
  
      public static final int MAX_DELTA=0x40;
  
      // Byte offsets from the start of the data, after the generic header.
      public static final int IX_NORM_TRIE_OFFSET=0;
      public static final int IX_EXTRA_DATA_OFFSET=1;
      public static final int IX_SMALL_FCD_OFFSET=2;
!     public static final int IX_RESERVED3_OFFSET=3;
!     public static final int IX_TOTAL_SIZE=7;
!     public static final int MIN_CCC_LCCC_CP=0x300;
      // Code point thresholds for quick check codes.
      public static final int IX_MIN_DECOMP_NO_CP=8;
      public static final int IX_MIN_COMP_NO_MAYBE_CP=9;
  
      // Norm16 value thresholds for quick check combinations and types of extra data.
! 
!     /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
      public static final int IX_MIN_YES_NO=10;
+     /** Mappings are comp-normalized. */
      public static final int IX_MIN_NO_NO=11;
      public static final int IX_LIMIT_NO_NO=12;
      public static final int IX_MIN_MAYBE_YES=13;
  
!     /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
      public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14;
+     /** Mappings are not comp-normalized but have a comp boundary before. */
+     public static final int IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE=15;
+     /** Mappings do not have a comp boundary before. */
+     public static final int IX_MIN_NO_NO_COMP_NO_MAYBE_CC=16;
+     /** Mappings to the empty string. */
+     public static final int IX_MIN_NO_NO_EMPTY=17;
+ 
+     public static final int IX_MIN_LCCC_CP=18;
+     public static final int IX_COUNT=20;
  
      public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
+     public static final int MAPPING_HAS_RAW_MAPPING=0x40;
+     // unused bit 0x20;
      public static final int MAPPING_LENGTH_MASK=0x1f;
  
      public static final int COMP_1_LAST_TUPLE=0x8000;
      public static final int COMP_1_TRIPLE=1;
      public static final int COMP_1_TRAIL_LIMIT=0x3400;
*** 700,710 ****
                  return prevBoundary;  // "no" or cc out of order
              }
          }
          return src;
      }
- 
      public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
          int limit=s.length();
          if(limit==0) {
              return;
          }
--- 696,705 ----
*** 735,978 ****
      // !doCompose: isNormalized (buffer must be empty and initialized)
      public boolean compose(CharSequence s, int src, int limit,
                             boolean onlyContiguous,
                             boolean doCompose,
                             ReorderingBuffer buffer) {
          int minNoMaybeCP=minCompNoMaybeCP;
  
!         /*
!          * prevBoundary points to the last character before the current one
!          * that has a composition boundary before it with ccc==0 and quick check "yes".
!          * Keeping track of prevBoundary saves us looking for a composition boundary
!          * when we find a "no" or "maybe".
!          *
!          * When we back out from prevSrc back to prevBoundary,
!          * then we also remove those same characters (which had been simply copied
!          * or canonically-order-inserted) from the ReorderingBuffer.
!          * Therefore, at all times, the [prevBoundary..prevSrc[ source units
!          * must correspond 1:1 to destination units at the end of the destination buffer.
!          */
!         int prevBoundary=src;
          int prevSrc;
!         int c=0;
!         int norm16=0;
! 
!         // only for isNormalized
!         int prevCC=0;
! 
!         for(;;) {
!             // count code units below the minimum or with irrelevant data for the quick check
!             for(prevSrc=src; src!=limit;) {
                  if( (c=s.charAt(src))<minNoMaybeCP ||
                      isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
                  ) {
                      ++src;
!                 } else if(!UTF16.isSurrogate((char)c)) {
                      break;
                  } else {
                      char c2;
                      if(UTF16Plus.isSurrogateLead(c)) {
!                         if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
                              c=Character.toCodePoint((char)c, c2);
                          }
                      } else /* trail surrogate */ {
!                         if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
!                             --src;
                              c=Character.toCodePoint(c2, (char)c);
                          }
                      }
!                     if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
!                         src+=Character.charCount(c);
!                     } else {
                          break;
                      }
                  }
              }
-             // copy these code units all at once
-             if(src!=prevSrc) {
-                 if(src==limit) {
-                     if(doCompose) {
-                         buffer.flushAndAppendZeroCC(s, prevSrc, src);
                      }
!                     break;
                  }
!                 // Set prevBoundary to the last character in the quick check loop.
!                 prevBoundary=src-1;
!                 if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary &&
!                     Character.isHighSurrogate(s.charAt(prevBoundary-1))
!                 ) {
!                     --prevBoundary;
                  }
!                 if(doCompose) {
!                     // The last "quick check yes" character is excluded from the
!                     // flush-and-append call in case it needs to be modified.
!                     buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
!                     buffer.append(s, prevBoundary, src);
!                 } else {
!                     prevCC=0;
                  }
!                 // The start of the current character (c).
!                 prevSrc=src;
!             } else if(src==limit) {
!                 break;
              }
! 
!             src+=Character.charCount(c);
!             /*
!              * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
!              * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
!              * or has ccc!=0.
!              * Check for Jamo V/T, then for regular characters.
!              * c is not a Hangul syllable or Jamo L because those have "yes" properties.
!              */
!             if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
                  char prev=s.charAt(prevSrc-1);
-                 boolean needToDecompose=false;
                  if(c<Hangul.JAMO_T_BASE) {
!                     // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
!                     prev-=Hangul.JAMO_L_BASE;
!                     if(prev<Hangul.JAMO_L_COUNT) {
!                         if(!doCompose) {
                              return false;
                          }
!                         char syllable=(char)
!                             (Hangul.HANGUL_BASE+
!                              (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
!                              Hangul.JAMO_T_COUNT);
!                         char t;
!                         if(src!=limit && (t=(char)(s.charAt(src)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
                              ++src;
!                             syllable+=t;  // The next character was a Jamo T.
!                             prevBoundary=src;
!                             buffer.setLastChar(syllable);
                              continue;
                          }
                          // If we see L+V+x where x!=T then we drop to the slow path,
                          // decompose and recompose.
                          // This is to deal with NFKC finding normal L and V but a
!                         // compatibility variant of a T. We need to either fully compose that
!                         // combination here (which would complicate the code and may not work
!                         // with strange custom data) or use the slow path -- or else our replacing
!                         // two input characters (L+V) with one output character (LV syllable)
!                         // would violate the invariant that [prevBoundary..prevSrc[ has the same
!                         // length as what we appended to the buffer since prevBoundary.
!                         needToDecompose=true;
                      }
!                 } else if(Hangul.isHangulWithoutJamoT(prev)) {
!                     // c is a Jamo Trailing consonant,
                      // compose with previous Hangul LV that does not contain a Jamo T.
!                     if(!doCompose) {
                          return false;
                      }
!                     buffer.setLastChar((char)(prev+c-Hangul.JAMO_T_BASE));
!                     prevBoundary=src;
!                     continue;
!                 }
!                 if(!needToDecompose) {
!                     // The Jamo V/T did not compose into a Hangul syllable.
!                     if(doCompose) {
!                         buffer.append((char)c);
!                     } else {
!                         prevCC=0;
                      }
                      continue;
                  }
!             }
!             /*
!              * Source buffer pointers:
!              *
!              *  all done      quick check   current char  not yet
!              *                "yes" but     (c)           processed
!              *                may combine
!              *                forward
!              * [-------------[-------------[-------------[-------------[
!              * |             |             |             |             |
!              * orig. src     prevBoundary  prevSrc       src           limit
!              *
!              *
!              * Destination buffer pointers inside the ReorderingBuffer:
!              *
!              *  all done      might take    not filled yet
!              *                characters for
!              *                reordering
!              * [-------------[-------------[-------------[
!              * |             |             |             |
!              * start         reorderStart  limit         |
!              *                             +remainingCap.+
!              */
!             if(norm16>=MIN_YES_YES_WITH_CC) {
!                 int cc=norm16&0xff;  // cc!=0
!                 if( onlyContiguous &&  // FCC
!                     (doCompose ? buffer.getLastCC() : prevCC)==0 &&
!                     prevBoundary<prevSrc &&
!                     // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
!                     // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
!                     // passed the quick check "yes && ccc==0" test.
!                     // Check whether the last character was a "yesYes" or a "yesNo".
!                     // If a "yesNo", then we get its trailing ccc from its
!                     // mapping and check for canonical order.
!                     // All other cases are ok.
!                     getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc
!                 ) {
                      // Fails FCD test, need to decompose and contiguously recompose.
!                     if(!doCompose) {
                          return false;
                      }
-                 } else if(doCompose) {
-                     buffer.append(c, cc);
-                     continue;
-                 } else if(prevCC<=cc) {
-                     prevCC=cc;
-                     continue;
                  } else {
!                     return false;
                  }
!             } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
                  return false;
              }
! 
!             /*
!              * Find appropriate boundaries around this character,
!              * decompose the source text from between the boundaries,
!              * and recompose it.
!              *
!              * We may need to remove the last few characters from the ReorderingBuffer
!              * to account for source text that was copied or appended
!              * but needs to take part in the recomposition.
!              */
! 
!             /*
!              * Find the last composition boundary in [prevBoundary..src[.
!              * It is either the decomposition of the current character (at prevSrc),
!              * or prevBoundary.
!              */
!             if(hasCompBoundaryBefore(c, norm16)) {
!                 prevBoundary=prevSrc;
!             } else if(doCompose) {
!                 buffer.removeSuffix(prevSrc-prevBoundary);
              }
  
!             // Find the next composition boundary in [src..limit[ -
!             // modifies src to point to the next starter.
!             src=findNextCompBoundary(s, src, limit);
! 
!             // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
              int recomposeStartIndex=buffer.length();
!             decomposeShort(s, prevBoundary, src, buffer);
              recompose(buffer, recomposeStartIndex, onlyContiguous);
              if(!doCompose) {
!                 if(!buffer.equals(s, prevBoundary, src)) {
                      return false;
                  }
                  buffer.remove();
-                 prevCC=0;
              }
- 
-             // Move to the next starter. We never need to look back before this point again.
              prevBoundary=src;
          }
-         return true;
      }
  
      /**
       * Very similar to compose(): Make the same changes in both places if relevant.
       * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
--- 730,971 ----
      // !doCompose: isNormalized (buffer must be empty and initialized)
      public boolean compose(CharSequence s, int src, int limit,
                             boolean onlyContiguous,
                             boolean doCompose,
                             ReorderingBuffer buffer) {
+         int prevBoundary=src;
          int minNoMaybeCP=minCompNoMaybeCP;
  
!         for (;;) {
!             // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
!             // or with (compYes && ccc==0) properties.
              int prevSrc;
!             int c = 0;
!             int norm16 = 0;
!             for (;;) {
!                 if (src == limit) {
!                     if (prevBoundary != limit && doCompose) {
!                         buffer.append(s, prevBoundary, limit);
!                     }
!                     return true;
!                 }
                  if( (c=s.charAt(src))<minNoMaybeCP ||
                      isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
                  ) {
                      ++src;
!                 } else {
!                     prevSrc = src++;
!                     if(!UTF16.isSurrogate((char)c)) {
                          break;
                      } else {
                          char c2;
                          if(UTF16Plus.isSurrogateLead(c)) {
!                             if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) {
!                                 ++src;
                                  c=Character.toCodePoint((char)c, c2);
                              }
                          } else /* trail surrogate */ {
!                             if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) {
!                                 --prevSrc;
                                  c=Character.toCodePoint(c2, (char)c);
                              }
                          }
!                         if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
                              break;
                          }
                      }
                  }
              }
!             // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
!             // The current character is either a "noNo" (has a mapping)
!             // or a "maybeYes" (combines backward)
!             // or a "yesYes" with ccc!=0.
!             // It is not a Hangul syllable or Jamo L because those have "yes" properties.
! 
!             // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
!             if (!isMaybeOrNonZeroCC(norm16)) {  // minNoNo <= norm16 < minMaybeYes
!                 if (!doCompose) {
!                     return false;
                  }
!                 // Fast path for mapping a character that is immediately surrounded by boundaries.
!                 // In this case, we need not decompose around the current character.
!                 if (isDecompNoAlgorithmic(norm16)) {
!                     // Maps to a single isCompYesAndZeroCC character
!                     // which also implies hasCompBoundaryBefore.
!                     if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
!                             hasCompBoundaryBefore(s, src, limit)) {
!                         if (prevBoundary != prevSrc) {
!                             buffer.append(s, prevBoundary, prevSrc);
                          }
!                         buffer.append(mapAlgorithmic(c, norm16), 0);
!                         prevBoundary = src;
!                         continue;
                      }
!                 } else if (norm16 < minNoNoCompBoundaryBefore) {
!                     // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
!                     if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
!                             hasCompBoundaryBefore(s, src, limit)) {
!                         if (prevBoundary != prevSrc) {
!                             buffer.append(s, prevBoundary, prevSrc);
!                         }
!                         int mapping = norm16 >> OFFSET_SHIFT;
!                         int length = extraData.charAt(mapping++) & MAPPING_LENGTH_MASK;
!                         buffer.append(extraData, mapping, mapping + length);
!                         prevBoundary = src;
!                         continue;
                      }
!                 } else if (norm16 >= minNoNoEmpty) {
!                     // The current character maps to nothing.
!                     // Simply omit it from the output if there is a boundary before _or_ after it.
!                     // The character itself implies no boundaries.
!                     if (hasCompBoundaryBefore(s, src, limit) ||
!                             hasCompBoundaryAfter(s, prevBoundary, prevSrc, onlyContiguous)) {
!                         if (prevBoundary != prevSrc) {
!                             buffer.append(s, prevBoundary, prevSrc);
!                         }
!                         prevBoundary = src;
!                         continue;
!                     }
!                 }
!                 // Other "noNo" type, or need to examine more text around this character:
!                 // Fall through to the slow path.
!             } else if (isJamoVT(norm16) && prevBoundary != prevSrc) {
                  char prev=s.charAt(prevSrc-1);
                  if(c<Hangul.JAMO_T_BASE) {
!                     // The current character is a Jamo Vowel,
!                     // compose with previous Jamo L and following Jamo T.
!                     char l = (char)(prev-Hangul.JAMO_L_BASE);
!                     if(l<Hangul.JAMO_L_COUNT) {
!                         if (!doCompose) {
                              return false;
                          }
!                         int t;
!                         if (src != limit &&
!                                 0 < (t = (s.charAt(src) - Hangul.JAMO_T_BASE)) &&
!                                 t < Hangul.JAMO_T_COUNT) {
!                             // The next character is a Jamo T.
                              ++src;
!                         } else if (hasCompBoundaryBefore(s, src, limit)) {
!                             // No Jamo T follows, not even via decomposition.
!                             t = 0;
!                         } else {
!                             t = -1;
!                         }
!                         if (t >= 0) {
!                             int syllable = Hangul.HANGUL_BASE +
!                                 (l*Hangul.JAMO_V_COUNT + (c-Hangul.JAMO_V_BASE)) *
!                                 Hangul.JAMO_T_COUNT + t;
!                             --prevSrc;  // Replace the Jamo L as well.
!                             if (prevBoundary != prevSrc) {
!                                 buffer.append(s, prevBoundary, prevSrc);
!                             }
!                             buffer.append((char)syllable);
!                             prevBoundary = src;
                              continue;
                          }
                          // If we see L+V+x where x!=T then we drop to the slow path,
                          // decompose and recompose.
                          // This is to deal with NFKC finding normal L and V but a
!                         // compatibility variant of a T.
!                         // We need to either fully compose that combination here
!                         // (which would complicate the code and may not work with strange custom data)
!                         // or use the slow path.
                      }
!                 } else if (Hangul.isHangulLV(prev)) {
!                     // The current character is a Jamo Trailing consonant,
                      // compose with previous Hangul LV that does not contain a Jamo T.
!                     if (!doCompose) {
                          return false;
                      }
!                     int syllable = prev + c - Hangul.JAMO_T_BASE;
!                     --prevSrc;  // Replace the Hangul LV as well.
!                     if (prevBoundary != prevSrc) {
!                         buffer.append(s, prevBoundary, prevSrc);
                      }
+                     buffer.append((char)syllable);
+                     prevBoundary = src;
                      continue;
                  }
!                 // No matching context, or may need to decompose surrounding text first:
!                 // Fall through to the slow path.
!             } else if (norm16 > JAMO_VT) {  // norm16 >= MIN_YES_YES_WITH_CC
!                 // One or more combining marks that do not combine-back:
!                 // Check for canonical order, copy unchanged if ok and
!                 // if followed by a character with a boundary-before.
!                 int cc = getCCFromNormalYesOrMaybe(norm16);  // cc!=0
!                 if (onlyContiguous /* FCC */ && getPreviousTrailCC(s, prevBoundary, prevSrc) > cc) {
                      // Fails FCD test, need to decompose and contiguously recompose.
!                     if (!doCompose) {
                          return false;
                      }
                  } else {
!                     // If !onlyContiguous (not FCC), then we ignore the tccc of
!                     // the previous character which passed the quick check "yes && ccc==0" test.
!                     int n16;
!                     for (;;) {
!                         if (src == limit) {
!                             if (doCompose) {
!                                 buffer.append(s, prevBoundary, limit);
                              }
!                             return true;
!                         }
!                         int prevCC = cc;
!                         c = Character.codePointAt(s, src);
!                         n16 = normTrie.get(c);
!                         if (n16 >= MIN_YES_YES_WITH_CC) {
!                             cc = getCCFromNormalYesOrMaybe(n16);
!                             if (prevCC > cc) {
!                                 if (!doCompose) {
                                      return false;
                                  }
!                                 break;
!                             }
!                         } else {
!                             break;
!                         }
!                         src += Character.charCount(c);
!                     }
!                     // p is after the last in-order combining mark.
!                     // If there is a boundary here, then we continue with no change.
!                     if (norm16HasCompBoundaryBefore(n16)) {
!                         if (isCompYesAndZeroCC(n16)) {
!                             src += Character.charCount(c);
!                         }
!                         continue;
!                     }
!                     // Use the slow path. There is no boundary in [prevSrc, src[.
!                 }
              }
  
!             // Slow path: Find the nearest boundaries around the current character,
!             // decompose and recompose.
!             if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
!                 c = Character.codePointBefore(s, prevSrc);
!                 norm16 = normTrie.get(c);
!                 if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
!                     prevSrc -= Character.charCount(c);
!                 }
!             }
!             if (doCompose && prevBoundary != prevSrc) {
!                 buffer.append(s, prevBoundary, prevSrc);
!             }
              int recomposeStartIndex=buffer.length();
!             // We know there is not a boundary here.
!             decomposeShort(s, prevSrc, src, false /* !stopAtCompBoundary */, onlyContiguous,
!                            buffer);
!             // Decompose until the next boundary.
!             src = decomposeShort(s, src, limit, true /* stopAtCompBoundary */, onlyContiguous,
!                                  buffer);
              recompose(buffer, recomposeStartIndex, onlyContiguous);
              if(!doCompose) {
!                 if(!buffer.equals(s, prevSrc, src)) {
                      return false;
                  }
                  buffer.remove();
              }
              prevBoundary=src;
          }
      }
  
      /**
       * Very similar to compose(): Make the same changes in both places if relevant.
       * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
*** 982,1095 ****
       *         then the quick check result is "no"
       */
      public int composeQuickCheck(CharSequence s, int src, int limit,
                                   boolean onlyContiguous, boolean doSpan) {
          int qcResult=0;
-         int minNoMaybeCP=minCompNoMaybeCP;
- 
-         /*
-          * prevBoundary points to the last character before the current one
-          * that has a composition boundary before it with ccc==0 and quick check "yes".
-          */
          int prevBoundary=src;
!         int prevSrc;
!         int c=0;
!         int norm16=0;
!         int prevCC=0;
  
          for(;;) {
!             // count code units below the minimum or with irrelevant data for the quick check
!             for(prevSrc=src;;) {
                  if(src==limit) {
                      return (src<<1)|qcResult;  // "yes" or "maybe"
                  }
                  if( (c=s.charAt(src))<minNoMaybeCP ||
                      isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
                  ) {
                      ++src;
!                 } else if(!UTF16.isSurrogate((char)c)) {
                      break;
                  } else {
                      char c2;
                      if(UTF16Plus.isSurrogateLead(c)) {
!                         if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
                              c=Character.toCodePoint((char)c, c2);
                          }
                      } else /* trail surrogate */ {
!                         if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
!                             --src;
                              c=Character.toCodePoint(c2, (char)c);
                          }
                      }
!                     if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
!                         src+=Character.charCount(c);
!                     } else {
                          break;
                      }
                  }
              }
-             if(src!=prevSrc) {
-                 // Set prevBoundary to the last character in the quick check loop.
-                 prevBoundary=src-1;
-                 if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary &&
-                         Character.isHighSurrogate(s.charAt(prevBoundary-1))
-                 ) {
-                     --prevBoundary;
                  }
!                 prevCC=0;
!                 // The start of the current character (c).
!                 prevSrc=src;
              }
  
-             src+=Character.charCount(c);
-             /*
-              * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
-              * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
-              * or has ccc!=0.
-              */
              if(isMaybeOrNonZeroCC(norm16)) {
                  int cc=getCCFromYesOrMaybe(norm16);
!                 if( onlyContiguous &&  // FCC
!                     cc!=0 &&
!                     prevCC==0 &&
!                     prevBoundary<prevSrc &&
!                     // prevCC==0 && prevBoundary<prevSrc tell us that
!                     // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
!                     // passed the quick check "yes && ccc==0" test.
!                     // Check whether the last character was a "yesYes" or a "yesNo".
!                     // If a "yesNo", then we get its trailing ccc from its
!                     // mapping and check for canonical order.
!                     // All other cases are ok.
!                     getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc
!                 ) {
!                     // Fails FCD test.
!                 } else if(prevCC<=cc || cc==0) {
!                     prevCC=cc;
!                     if(norm16<MIN_YES_YES_WITH_CC) {
!                         if(!doSpan) {
!                             qcResult=1;
                          } else {
!                             return prevBoundary<<1;  // spanYes does not care to know it's "maybe"
                          }
                      }
                      continue;
                  }
              }
              return prevBoundary<<1;  // "no"
          }
      }
- 
      public void composeAndAppend(CharSequence s,
                                   boolean doCompose,
                                   boolean onlyContiguous,
                                   ReorderingBuffer buffer) {
          int src=0, limit=s.length();
          if(!buffer.isEmpty()) {
!             int firstStarterInSrc=findNextCompBoundary(s, 0, limit);
              if(0!=firstStarterInSrc) {
                  int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
!                                                                buffer.length());
                  StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
                                                         firstStarterInSrc+16);
                  middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
                  buffer.removeSuffix(buffer.length()-lastStarterInDest);
                  middle.append(s, 0, firstStarterInSrc);
--- 975,1098 ----
       *         then the quick check result is "no"
       */
      public int composeQuickCheck(CharSequence s, int src, int limit,
                                   boolean onlyContiguous, boolean doSpan) {
          int qcResult=0;
          int prevBoundary=src;
!         int minNoMaybeCP=minCompNoMaybeCP;
  
          for(;;) {
!             // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
!             // or with (compYes && ccc==0) properties.
!             int prevSrc;
!             int c = 0;
!             int norm16 = 0;
!             for (;;) {
                  if(src==limit) {
                      return (src<<1)|qcResult;  // "yes" or "maybe"
                  }
                  if( (c=s.charAt(src))<minNoMaybeCP ||
                      isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
                  ) {
                      ++src;
!                 } else {
!                     prevSrc = src++;
!                     if(!UTF16.isSurrogate((char)c)) {
                          break;
                      } else {
                          char c2;
                          if(UTF16Plus.isSurrogateLead(c)) {
!                             if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) {
!                                 ++src;
                                  c=Character.toCodePoint((char)c, c2);
                              }
                          } else /* trail surrogate */ {
!                             if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) {
!                                 --prevSrc;
                                  c=Character.toCodePoint(c2, (char)c);
                              }
                          }
!                         if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
                              break;
                          }
                      }
                  }
              }
!             // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
!             // The current character is either a "noNo" (has a mapping)
!             // or a "maybeYes" (combines backward)
!             // or a "yesYes" with ccc!=0.
!             // It is not a Hangul syllable or Jamo L because those have "yes" properties.
! 
!             int prevNorm16 = INERT;
!             if (prevBoundary != prevSrc) {
!                 prevBoundary = prevSrc;
!                 if (!norm16HasCompBoundaryBefore(norm16)) {
!                     c = Character.codePointBefore(s, prevSrc);
!                     int n16 = getNorm16(c);
!                     if (!norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
!                         prevBoundary -= Character.charCount(c);
!                         prevNorm16 = n16;
!                     }
!                 }
              }
  
              if(isMaybeOrNonZeroCC(norm16)) {
                  int cc=getCCFromYesOrMaybe(norm16);
!                 if (onlyContiguous /* FCC */ && cc != 0 &&
!                         getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
!                     // The [prevBoundary..prevSrc[ character
!                     // passed the quick check "yes && ccc==0" test
!                     // but is out of canonical order with the current combining mark.
!                 } else {
!                     // If !onlyContiguous (not FCC), then we ignore the tccc of
!                     // the previous character which passed the quick check "yes && ccc==0" test.
!                     for (;;) {
!                         if (norm16 < MIN_YES_YES_WITH_CC) {
!                             if (!doSpan) {
!                                 qcResult = 1;
!                             } else {
!                                 return prevBoundary << 1;  // spanYes does not care to know it's "maybe"
!                             }
!                         }
!                         if (src == limit) {
!                             return (src<<1) | qcResult;  // "yes" or "maybe"
!                         }
!                         int prevCC = cc;
!                         c = Character.codePointAt(s, src);
!                         norm16 = getNorm16(c);
!                         if (isMaybeOrNonZeroCC(norm16)) {
!                             cc = getCCFromYesOrMaybe(norm16);
!                             if (!(prevCC <= cc || cc == 0)) {
!                                 break;
!                             }
                          } else {
!                             break;
                          }
+                         src += Character.charCount(c);
                      }
+                     // src is after the last in-order combining mark.
+                     if (isCompYesAndZeroCC(norm16)) {
+                         prevBoundary = src;
+                         src += Character.charCount(c);
                          continue;
                      }
                  }
+             }
              return prevBoundary<<1;  // "no"
          }
      }
      public void composeAndAppend(CharSequence s,
                                   boolean doCompose,
                                   boolean onlyContiguous,
                                   ReorderingBuffer buffer) {
          int src=0, limit=s.length();
          if(!buffer.isEmpty()) {
!             int firstStarterInSrc=findNextCompBoundary(s, 0, limit, onlyContiguous);
              if(0!=firstStarterInSrc) {
                  int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
!                                                                buffer.length(), onlyContiguous);
                  StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
                                                         firstStarterInSrc+16);
                  middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
                  buffer.removeSuffix(buffer.length()-lastStarterInDest);
                  middle.append(s, 0, firstStarterInSrc);
*** 1101,1111 ****
              compose(s, src, limit, onlyContiguous, true, buffer);
          } else {
              buffer.append(s, src, limit);
          }
      }
- 
      // Dual functionality:
      // buffer!=NULL: normalize
      // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
      public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
          // Note: In this function we use buffer->appendZeroCC() because we track
--- 1104,1113 ----
*** 1123,1133 ****
          int fcd16=0;
  
          for(;;) {
              // count code units with lccc==0
              for(prevSrc=src; src!=limit;) {
!                 if((c=s.charAt(src))<MIN_CCC_LCCC_CP) {
                      prevFCD16=~c;
                      ++src;
                  } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
                      prevFCD16=0;
                      ++src;
--- 1125,1135 ----
          int fcd16=0;
  
          for(;;) {
              // count code units with lccc==0
              for(prevSrc=src; src!=limit;) {
!                 if((c=s.charAt(src))<minLcccCP) {
                      prevFCD16=~c;
                      ++src;
                  } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
                      prevFCD16=0;
                      ++src;
*** 1162,1177 ****
                      break;
                  }
                  prevBoundary=src;
                  // We know that the previous character's lccc==0.
                  if(prevFCD16<0) {
!                     // Fetching the fcd16 value was deferred for this below-U+0300 code point.
                      int prev=~prevFCD16;
!                     prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
                      if(prevFCD16>1) {
                          --prevBoundary;
                      }
                  } else {
                      int p=src-1;
                      if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p &&
                          Character.isHighSurrogate(s.charAt(p-1))
                      ) {
--- 1164,1183 ----
                      break;
                  }
                  prevBoundary=src;
                  // We know that the previous character's lccc==0.
                  if(prevFCD16<0) {
!                     // Fetching the fcd16 value was deferred for this below-minLcccCP code point.
                      int prev=~prevFCD16;
!                     if(prev<minDecompNoCP) {
!                         prevFCD16=0;
!                     } else {
!                         prevFCD16=getFCD16FromNormData(prev);
                          if(prevFCD16>1) {
                              --prevBoundary;
                          }
+                     }
                  } else {
                      int p=src-1;
                      if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p &&
                          Character.isHighSurrogate(s.charAt(p-1))
                      ) {
*** 1226,1291 ****
                  src=findNextFCDBoundary(s, src, limit);
                  /*
                   * The source text does not fulfill the conditions for FCD.
                   * Decompose and reorder a limited piece of the text.
                   */
!                 decomposeShort(s, prevBoundary, src, buffer);
                  prevBoundary=src;
                  prevFCD16=0;
              }
          }
          return src;
      }
  
!     // Note: hasDecompBoundary() could be implemented as aliases to
!     // hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
!     // at the cost of building the FCD trie for a decomposition normalizer.
!     public boolean hasDecompBoundary(int c, boolean before) {
!         for(;;) {
!             if(c<minDecompNoCP) {
!                 return true;
              }
!             int norm16=getNorm16(c);
!             if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
                  return true;
!             } else if(norm16>MIN_NORMAL_MAYBE_YES) {
!                 return false;  // ccc!=0
!             } else if(isDecompNoAlgorithmic(norm16)) {
!                 c=mapAlgorithmic(c, norm16);
!             } else {
                  // c decomposes, get everything from the variable-length extra data
!                 int firstUnit=extraData.charAt(norm16);
!                 if((firstUnit&MAPPING_LENGTH_MASK)==0) {
!                     return false;
                  }
!                 if(!before) {
                      // decomp after-boundary: same as hasFCDBoundaryAfter(),
                      // fcd16<=1 || trailCC==0
                      if(firstUnit>0x1ff) {
                          return false;  // trailCC>1
                      }
                      if(firstUnit<=0xff) {
                          return true;  // trailCC==0
                      }
                      // if(trailCC==1) test leadCC==0, same as checking for before-boundary
-                 }
                  // true if leadCC==0 (hasFCDBoundaryBefore())
!                 return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16-1)&0xff00)==0;
!             }
!         }
      }
  
      public boolean hasCompBoundaryBefore(int c) {
!         return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
      }
  
      private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
      private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; }
      private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
!     private boolean isHangul(int norm16) { return norm16==minYesNo; }
      private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; }
- 
      // UBool isCompYes(uint16_t norm16) const {
      //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
      // }
      // UBool isCompYesOrMaybe(uint16_t norm16) const {
      //     return norm16<minNoNo || minMaybeYes<=norm16;
--- 1232,1320 ----
                  src=findNextFCDBoundary(s, src, limit);
                  /*
                   * The source text does not fulfill the conditions for FCD.
                   * Decompose and reorder a limited piece of the text.
                   */
!                 decomposeShort(s, prevBoundary, src, false, false, buffer);
                  prevBoundary=src;
                  prevFCD16=0;
              }
          }
          return src;
      }
      
!     public boolean hasDecompBoundaryBefore(int c) {
!         return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) ||
!             norm16HasDecompBoundaryBefore(getNorm16(c));
      }
!     public boolean norm16HasDecompBoundaryBefore(int norm16) {
!         if (norm16 < minNoNoCompNoMaybeCC) {
              return true;
!         }
!         if (norm16 >= limitNoNo) {
!             return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
!         }
          // c decomposes, get everything from the variable-length extra data
!         int mapping=norm16>>OFFSET_SHIFT;
!         int firstUnit=extraData.charAt(mapping);
!         // true if leadCC==0 (hasFCDBoundaryBefore())
!         return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0;
!     }
!     public boolean hasDecompBoundaryAfter(int c) {
!         if (c < minDecompNoCP) {
!             return true;
!         }
!         if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) {
!             return true;
          }
!         return norm16HasDecompBoundaryAfter(getNorm16(c));
!     }
!     public boolean norm16HasDecompBoundaryAfter(int norm16) {
!         if(norm16 <= minYesNo || isHangulLVT(norm16)) {
!             return true;
!         }
!         if (norm16 >= limitNoNo) {
!             if (isMaybeOrNonZeroCC(norm16)) {
!                 return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
!             }
!             // Maps to an isCompYesAndZeroCC.
!             return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
!         }
!         // c decomposes, get everything from the variable-length extra data
!         int mapping=norm16>>OFFSET_SHIFT;
!         int firstUnit=extraData.charAt(mapping);
          // decomp after-boundary: same as hasFCDBoundaryAfter(),
          // fcd16<=1 || trailCC==0
          if(firstUnit>0x1ff) {
              return false;  // trailCC>1
          }
          if(firstUnit<=0xff) {
              return true;  // trailCC==0
          }
          // if(trailCC==1) test leadCC==0, same as checking for before-boundary
          // true if leadCC==0 (hasFCDBoundaryBefore())
!         return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0;
      }
+     public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }
  
      public boolean hasCompBoundaryBefore(int c) {
!         return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
!     }
!     public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous) {
!         return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
      }
      
      private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
      private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; }
+     private static boolean isInert(int norm16) { return norm16==INERT; }
      private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
!     private int hangulLVT() { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
!     private boolean isHangulLV(int norm16) { return norm16==minYesNo; }
!     private boolean isHangulLVT(int norm16) {
!         return norm16==hangulLVT();
!     }
      private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; }
      // UBool isCompYes(uint16_t norm16) const {
      //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
      // }
      // UBool isCompYesOrMaybe(uint16_t norm16) const {
      //     return norm16<minNoNo || minMaybeYes<=norm16;
*** 1296,1426 ****
      private boolean isDecompYesAndZeroCC(int norm16) {
          return norm16<minYesNo ||
                 norm16==JAMO_VT ||
                 (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
      }
- 
      /**
       * A little faster and simpler than isDecompYesAndZeroCC() but does not include
       * the MaybeYes which combine-forward and have ccc=0.
!      * (Standard Unicode 5.2 normalization does not have such characters.)
       */
      private boolean isMostDecompYesAndZeroCC(int norm16) {
          return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
      }
- 
      private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; }
  
      // For use with isCompYes().
      // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
      // static uint8_t getCCFromYes(uint16_t norm16) {
!     //     return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
      // }
      private int getCCFromNoNo(int norm16) {
!         if((extraData.charAt(norm16)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
!             return extraData.charAt(norm16-1)&0xff;
          } else {
              return 0;
          }
      }
! 
!     // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
!     int getTrailCCFromCompYesAndZeroCC(CharSequence s, int cpStart, int cpLimit) {
!         int c;
!         if(cpStart==(cpLimit-1)) {
!             c=s.charAt(cpStart);
!         } else {
!             c=Character.codePointAt(s, cpStart);
!         }
!         int prevNorm16=getNorm16(c);
!         if(prevNorm16<=minYesNo) {
!             return 0;  // yesYes and Hangul LV/LVT have ccc=tccc=0
          } else {
!             return extraData.charAt(prevNorm16)>>8;  // tccc from yesNo
          }
      }
  
      // Requires algorithmic-NoNo.
      private int mapAlgorithmic(int c, int norm16) {
!         return c+norm16-(minMaybeYes-MAX_DELTA-1);
      }
  
      // Requires minYesNo<norm16<limitNoNo.
!     // private int getMapping(int norm16) { return /*extraData+*/norm16; }
  
      /**
       * @return index into maybeYesCompositions, or -1
       */
      private int getCompositionsListForDecompYes(int norm16) {
!         if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
              return -1;
          } else {
              if((norm16-=minMaybeYes)<0) {
                  // norm16<minMaybeYes: index into extraData which is a substring at
                  //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
                  // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
                  norm16+=MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
              }
!             return norm16;
          }
      }
- 
      /**
       * @return index into maybeYesCompositions
       */
      private int getCompositionsListForComposite(int norm16) {
!         // composite has both mapping & compositions list
!         int firstUnit=extraData.charAt(norm16);
!         return (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16+  // mapping in maybeYesCompositions
!             1+  // +1 to skip the first unit with the mapping lenth
              (firstUnit&MAPPING_LENGTH_MASK);  // + mapping length
      }
  
      // Decompose a short piece of text which is likely to contain characters that
      // fail the quick check loop and/or where the quick check loop's overhead
      // is unlikely to be amortized.
      // Called by the compose() and makeFCD() implementations.
      // Public in Java for collation implementation code.
!     public void decomposeShort(CharSequence s, int src, int limit,
                                 ReorderingBuffer buffer) {
          while(src<limit) {
              int c=Character.codePointAt(s, src);
              src+=Character.charCount(c);
!             decompose(c, getNorm16(c), buffer);
          }
      }
! 
!     private void decompose(int c, int norm16,
!                            ReorderingBuffer buffer) {
!         // Only loops for 1:1 algorithmic mappings.
!         for(;;) {
              // get the decomposition and the lead and trail cc's
!             if(isDecompYes(norm16)) {
!                 // c does not decompose
                  buffer.append(c, getCCFromYesOrMaybe(norm16));
!             } else if(isHangul(norm16)) {
!                 // Hangul syllable: decompose algorithmically
!                 Hangul.decompose(c, buffer);
!             } else if(isDecompNoAlgorithmic(norm16)) {
                  c=mapAlgorithmic(c, norm16);
                  norm16=getNorm16(c);
!                 continue;
              } else {
                  // c decomposes, get everything from the variable-length extra data
!                 int firstUnit=extraData.charAt(norm16);
                  int length=firstUnit&MAPPING_LENGTH_MASK;
                  int leadCC, trailCC;
                  trailCC=firstUnit>>8;
                  if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
!                     leadCC=extraData.charAt(norm16-1)>>8;
                  } else {
                      leadCC=0;
                  }
!                 ++norm16;  // skip over the firstUnit
!                 buffer.append(extraData, norm16, norm16+length, leadCC, trailCC);
!             }
!             return;
          }
      }
  
      /**
       * Finds the recomposition result for
--- 1325,1459 ----
      private boolean isDecompYesAndZeroCC(int norm16) {
          return norm16<minYesNo ||
                 norm16==JAMO_VT ||
                 (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
      }
      /**
       * A little faster and simpler than isDecompYesAndZeroCC() but does not include
       * the MaybeYes which combine-forward and have ccc=0.
!      * (Standard Unicode 10 normalization does not have such characters.)
       */
      private boolean isMostDecompYesAndZeroCC(int norm16) {
          return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
      }
      private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; }
  
      // For use with isCompYes().
      // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
      // static uint8_t getCCFromYes(uint16_t norm16) {
!     //     return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
      // }
      private int getCCFromNoNo(int norm16) {
!         int mapping=norm16>>OFFSET_SHIFT;
!         if((extraData.charAt(mapping)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
!             return extraData.charAt(mapping-1)&0xff;
          } else {
              return 0;
          }
      }
!     int getTrailCCFromCompYesAndZeroCC(int norm16) {
!         if(norm16<=minYesNo) {
!             return 0;  // yesYes and Hangul LV have ccc=tccc=0
          } else {
!             // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
!             return extraData.charAt(norm16>>OFFSET_SHIFT)>>8;  // tccc from yesNo
          }
      }
  
      // Requires algorithmic-NoNo.
      private int mapAlgorithmic(int c, int norm16) {
!         return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
      }
  
      // Requires minYesNo<norm16<limitNoNo.
!     // private int getMapping(int norm16) { return extraData+(norm16>>OFFSET_SHIFT); }
  
      /**
       * @return index into maybeYesCompositions, or -1
       */
      private int getCompositionsListForDecompYes(int norm16) {
!         if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
              return -1;
          } else {
              if((norm16-=minMaybeYes)<0) {
                  // norm16<minMaybeYes: index into extraData which is a substring at
                  //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
                  // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
                  norm16+=MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
              }
!             return norm16>>OFFSET_SHIFT;
          }
      }
      /**
       * @return index into maybeYesCompositions
       */
      private int getCompositionsListForComposite(int norm16) {
!         // A composite has both mapping & compositions list.
!         int list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT;
!         int firstUnit=maybeYesCompositions.charAt(list);
!         return list+  // mapping in maybeYesCompositions
!             1+  // +1 to skip the first unit with the mapping length
              (firstUnit&MAPPING_LENGTH_MASK);  // + mapping length
      }
      
      // Decompose a short piece of text which is likely to contain characters that
      // fail the quick check loop and/or where the quick check loop's overhead
      // is unlikely to be amortized.
      // Called by the compose() and makeFCD() implementations.
      // Public in Java for collation implementation code.
!     private int decomposeShort(
!             CharSequence s, int src, int limit,
!             boolean stopAtCompBoundary, boolean onlyContiguous,
              ReorderingBuffer buffer) {
          while(src<limit) {
              int c=Character.codePointAt(s, src);
+             if (stopAtCompBoundary && c < minCompNoMaybeCP) {
+                 return src;
+             }
+             int norm16 = getNorm16(c);
+             if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
+                 return src;
+             }
              src+=Character.charCount(c);
!             decompose(c, norm16, buffer);
!             if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
!                 return src;
              }
          }
!         return src;
!     }
!     private void decompose(int c, int norm16, ReorderingBuffer buffer) {
          // get the decomposition and the lead and trail cc's
!         if (norm16 >= limitNoNo) {
!             if (isMaybeOrNonZeroCC(norm16)) {
                  buffer.append(c, getCCFromYesOrMaybe(norm16));
!                 return;
!             }
!             // Maps to an isCompYesAndZeroCC.
              c=mapAlgorithmic(c, norm16);
              norm16=getNorm16(c);
!         }
!         if (norm16 < minYesNo) {
!             // c does not decompose
!             buffer.append(c, 0);
!         } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
!             // Hangul syllable: decompose algorithmically
!             Hangul.decompose(c, buffer);
          } else {
              // c decomposes, get everything from the variable-length extra data
!             int mapping=norm16>>OFFSET_SHIFT;
!             int firstUnit=extraData.charAt(mapping);
              int length=firstUnit&MAPPING_LENGTH_MASK;
              int leadCC, trailCC;
              trailCC=firstUnit>>8;
              if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
!                 leadCC=extraData.charAt(mapping-1)>>8;
              } else {
                  leadCC=0;
              }
!             ++mapping;  // skip over the firstUnit
!             buffer.append(extraData, mapping, mapping+length, leadCC, trailCC);
          }
      }
  
      /**
       * Finds the recomposition result for
*** 1455,1465 ****
              while(key1>(firstUnit=compositions.charAt(list))) {
                  list+=2+(firstUnit&COMP_1_TRIPLE);
              }
              if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
                  if((firstUnit&COMP_1_TRIPLE)!=0) {
!                     return ((int)compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
                  } else {
                      return compositions.charAt(list+1);
                  }
              }
          } else {
--- 1488,1498 ----
              while(key1>(firstUnit=compositions.charAt(list))) {
                  list+=2+(firstUnit&COMP_1_TRIPLE);
              }
              if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
                  if((firstUnit&COMP_1_TRIPLE)!=0) {
!                     return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
                  } else {
                      return compositions.charAt(list+1);
                  }
              }
          } else {
*** 1531,1541 ****
              if( // this character combines backward and
                  isMaybe(norm16) &&
                  // we have seen a starter that combines forward and
                  compositionsList>=0 &&
                  // the backward-combining character is not blocked
!                 (prevCC<cc || prevCC==0)) {
                  if(isJamoVT(norm16)) {
                      // c is a Jamo V/T, see if we can compose it with the previous character.
                      if(c<Hangul.JAMO_T_BASE) {
                          // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
                          char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE);
--- 1564,1575 ----
              if( // this character combines backward and
                  isMaybe(norm16) &&
                  // we have seen a starter that combines forward and
                  compositionsList>=0 &&
                  // the backward-combining character is not blocked
!                 (prevCC<cc || prevCC==0)
!             ) {
                  if(isJamoVT(norm16)) {
                      // c is a Jamo V/T, see if we can compose it with the previous character.
                      if(c<Hangul.JAMO_T_BASE) {
                          // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
                          char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE);
*** 1652,1715 ****
       * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
       * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
       * (isCompYesAndZeroCC()) so we need not decompose.
       */
      private boolean hasCompBoundaryBefore(int c, int norm16) {
!         for(;;) {
!             if(isCompYesAndZeroCC(norm16)) {
!                 return true;
!             } else if(isMaybeOrNonZeroCC(norm16)) {
!                 return false;
!             } else if(isDecompNoAlgorithmic(norm16)) {
!                 c=mapAlgorithmic(c, norm16);
!                 norm16=getNorm16(c);
!             } else {
!                 // c decomposes, get everything from the variable-length extra data
!                 int firstUnit=extraData.charAt(norm16);
!                 if((firstUnit&MAPPING_LENGTH_MASK)==0) {
!                     return false;
                  }
!                 if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0 && (extraData.charAt(norm16-1)&0xff00)!=0) {
!                     return false;  // non-zero leadCC
                  }
!                 return isCompYesAndZeroCC(getNorm16(Character.codePointAt(extraData, norm16+1)));
              }
          }
      }
  
!     private int findPreviousCompBoundary(CharSequence s, int p) {
          while(p>0) {
              int c=Character.codePointBefore(s, p);
              p-=Character.charCount(c);
!             if(hasCompBoundaryBefore(c)) {
                  break;
              }
-             // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
-             // but that's probably not worth the extra cost.
          }
          return p;
      }
! 
!     private int findNextCompBoundary(CharSequence s, int p, int limit) {
          while(p<limit) {
              int c=Character.codePointAt(s, p);
              int norm16=normTrie.get(c);
              if(hasCompBoundaryBefore(c, norm16)) {
                  break;
              }
              p+=Character.charCount(c);
          }
          return p;
      }
  
      private int findNextFCDBoundary(CharSequence s, int p, int limit) {
          while(p<limit) {
              int c=Character.codePointAt(s, p);
!             if(c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff) {
                  break;
              }
              p+=Character.charCount(c);
          }
          return p;
      }
  
      /**
--- 1686,1757 ----
       * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
       * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
       * (isCompYesAndZeroCC()) so we need not decompose.
       */
      private boolean hasCompBoundaryBefore(int c, int norm16) {
!         return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
      }
!     private boolean norm16HasCompBoundaryBefore(int norm16) {
!         return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
      }
!     private boolean hasCompBoundaryBefore(CharSequence s, int src, int limit) {
!         return src == limit || hasCompBoundaryBefore(Character.codePointAt(s, src));
      }
+     private boolean norm16HasCompBoundaryAfter(int norm16, boolean onlyContiguous) {
+         return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
+             (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
      }
+     private boolean hasCompBoundaryAfter(CharSequence s, int start, int p, boolean onlyContiguous) {
+         return start == p || hasCompBoundaryAfter(Character.codePointBefore(s, p), onlyContiguous);
+     }
+     /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
+     private boolean isTrailCC01ForCompBoundaryAfter(int norm16) {
+         return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
+             (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : extraData.charAt(norm16 >> OFFSET_SHIFT) <= 0x1ff);
      }
  
!     private int findPreviousCompBoundary(CharSequence s, int p, boolean onlyContiguous) {
          while(p>0) {
              int c=Character.codePointBefore(s, p);
+             int norm16 = getNorm16(c);
+             if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+                 break;
+             }
              p-=Character.charCount(c);
!             if(hasCompBoundaryBefore(c, norm16)) {
                  break;
              }
          }
          return p;
      }
!     private int findNextCompBoundary(CharSequence s, int p, int limit, boolean onlyContiguous) {
          while(p<limit) {
              int c=Character.codePointAt(s, p);
              int norm16=normTrie.get(c);
              if(hasCompBoundaryBefore(c, norm16)) {
                  break;
              }
              p+=Character.charCount(c);
+             if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+                 break;
+             }
          }
          return p;
      }
  
+     
      private int findNextFCDBoundary(CharSequence s, int p, int limit) {
          while(p<limit) {
              int c=Character.codePointAt(s, p);
!             int norm16;
!             if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16 = getNorm16(c))) {
                  break;
              }
              p+=Character.charCount(c);
+             if (norm16HasDecompBoundaryAfter(norm16)) {
+                 break;
+             }
          }
          return p;
      }
      
      /**
*** 1988,1998 ****
          }
  
          // we know the cc of the last code point
          return trailCC;
      }
- 
      /**
       * merge two UTF-16 string parts together
       * to canonically order (order by combining classes) their concatenation
       *
       * the two strings may already be adjacent, so that the merging is done
--- 2030,2039 ----
*** 2072,2082 ****
                  prevArgs.current =  ncArgs.limit;
                  return getPrevCC(prevArgs);
              }
  
      }
- 
      private static final class PrevArgs{
          char[] src;
          int start;
          int current;
          char c1;
--- 2113,2122 ----
*** 2088,2098 ****
--- 2128,2156 ----
          int next;
          int limit;
          char c1;
          char c2;
      }
+     private static int /*unsigned byte*/ getNextCC(NextCCArgs args) {
+         args.c1=args.source[args.next++];
+         args.c2=0;
  
+         if (UTF16.isTrailSurrogate(args.c1)) {
+             /* unpaired second surrogate */
+             return 0;
+         } else if (!UTF16.isLeadSurrogate(args.c1)) {
+             return UCharacter.getCombiningClass(args.c1);
+         } else if (args.next!=args.limit &&
+                         UTF16.isTrailSurrogate(args.c2=args.source[args.next])){
+             ++args.next;
+             return UCharacter.getCombiningClass(Character.toCodePoint(args.c1, args.c2));
+         } else {
+             /* unpaired first surrogate */
+             args.c2=0;
+             return 0;
+         }
+     }
      private static int /*unsigned*/ getPrevCC(PrevArgs args) {
          args.c1=args.src[--args.current];
          args.c2=0;
  
          if (args.c1 < MIN_CCC_LCCC_CP) {
*** 2111,2155 ****
              args.c2=0;
              return 0;
          }
      }
  
!     private static int /*unsigned byte*/ getNextCC(NextCCArgs args) {
!         args.c1=args.source[args.next++];
!         args.c2=0;
! 
!         if (UTF16.isTrailSurrogate(args.c1)) {
!             /* unpaired second surrogate */
!             return 0;
!         } else if (!UTF16.isLeadSurrogate(args.c1)) {
!             return UCharacter.getCombiningClass(args.c1);
!         } else if (args.next!=args.limit &&
!                         UTF16.isTrailSurrogate(args.c2=args.source[args.next])){
!             ++args.next;
!             return UCharacter.getCombiningClass(Character.toCodePoint(args.c1, args.c2));
!         } else {
!             /* unpaired first surrogate */
!             args.c2=0;
              return 0;
          }
      }
  
      private VersionInfo dataVersion;
  
!     // Code point thresholds for quick check codes.
      private int minDecompNoCP;
      private int minCompNoMaybeCP;
  
      // Norm16 value thresholds for quick check combinations and types of extra data.
      private int minYesNo;
      private int minYesNoMappingsOnly;
      private int minNoNo;
      private int limitNoNo;
      private int minMaybeYes;
  
      private Trie2_16 normTrie;
      private String maybeYesCompositions;
      private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
      private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
-     private int[] tccc180;  // [0x180] tccc values for U+0000..U+017F
  
! }
--- 2169,2204 ----
              args.c2=0;
              return 0;
          }
      }
  
!     private int getPreviousTrailCC(CharSequence s, int start, int p) {
!         if (start == p) {
              return 0;
          }
+         return getFCD16(Character.codePointBefore(s, p));
      }
  
      private VersionInfo dataVersion;
  
!     // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
      private int minDecompNoCP;
      private int minCompNoMaybeCP;
+     private int minLcccCP;
  
      // Norm16 value thresholds for quick check combinations and types of extra data.
      private int minYesNo;
      private int minYesNoMappingsOnly;
      private int minNoNo;
+     private int minNoNoCompBoundaryBefore;
+     private int minNoNoCompNoMaybeCC;
+     private int minNoNoEmpty;
      private int limitNoNo;
+     private int centerNoNoDelta;
      private int minMaybeYes;
  
      private Trie2_16 normTrie;
      private String maybeYesCompositions;
      private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
      private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
  
!    }
< prev index next >