jdk9-Dev-Unicode7-8032446 Cdiff jdk/src/java.base/share/classes/sun/text/normalizer/UnicodeSet.java

jdk/src/java.base/share/classes/sun/text/normalizer/UnicodeSet.java


*** 1,7 ****
  /*
!  * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.  Oracle designates this
--- 1,7 ----
  /*
!  * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.  Oracle designates this
*** 20,52 ****
   *
   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   * or visit www.oracle.com if you need additional information or have any
   * questions.
   */
  /*
   *******************************************************************************
!  * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
!  *                                                                             *
!  * The original version of this source code and documentation is copyrighted   *
!  * and owned by IBM, These materials are provided under terms of a License     *
!  * Agreement between IBM and Sun. This technology is protected by multiple     *
!  * US and International patents. This notice and attribution to IBM may not    *
!  * to removed.                                                                 *
   *******************************************************************************
   */
- 
  package sun.text.normalizer;
  
  import java.text.ParsePosition;
! import java.util.Iterator;
  import java.util.TreeSet;
  
  /**
!  * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
!  * represent <em>character classes</em> used in regular expressions.
!  * A character specifies a subset of Unicode code points.  Legal
!  * code points are U+0000 to U+10FFFF, inclusive.
   *
   * <p>The UnicodeSet class is not designed to be subclassed.
   *
   * <p><code>UnicodeSet</code> supports two APIs. The first is the
   * <em>operand</em> API that allows the caller to modify the value of
--- 20,54 ----
   *
   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   * or visit www.oracle.com if you need additional information or have any
   * questions.
   */
+ 
  /*
   *******************************************************************************
!  * Copyright (C) 1996-2015, International Business Machines Corporation and
!  * others. All Rights Reserved.
   *******************************************************************************
   */
  package sun.text.normalizer;
  
+ import java.io.IOException;
  import java.text.ParsePosition;
! import java.util.ArrayList;
  import java.util.TreeSet;
  
  /**
!  * A mutable set of Unicode characters and multicharacter strings.
!  * Objects of this class represent <em>character classes</em> used
!  * in regular expressions. A character specifies a subset of Unicode
!  * code points.  Legal code points are U+0000 to U+10FFFF, inclusive.
!  *
!  * Note: method freeze() will not only make the set immutable, but
!  * also makes important methods much higher performance:
!  * contains(c), containsNone(...), span(...), spanBack(...) etc.
!  * After the object is frozen, any subsequent call that wants to change
!  * the object will throw UnsupportedOperationException.
   *
   * <p>The UnicodeSet class is not designed to be subclassed.
   *
   * <p><code>UnicodeSet</code> supports two APIs. The first is the
   * <em>operand</em> API that allows the caller to modify the value of
*** 116,126 ****
   *     </tr>
   *   </table>
   * </blockquote>
   *
   * Any character may be preceded by a backslash in order to remove any special
!  * meaning.  White space characters, as defined by UCharacterProperty.isRuleWhiteSpace(), are
   * ignored, unless they are escaped.
   *
   * <p>Property patterns specify a set of characters having a certain
   * property as defined by the Unicode standard.  Both the POSIX-like
   * "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized.  For a
--- 118,128 ----
   *     </tr>
   *   </table>
   * </blockquote>
   *
   * Any character may be preceded by a backslash in order to remove any special
!  * meaning.  White space characters, as defined by the Unicode Pattern_White_Space property, are
   * ignored, unless they are escaped.
   *
   * <p>Property patterns specify a set of characters having a certain
   * property as defined by the Unicode standard.  Both the POSIX-like
   * "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized.  For a
*** 265,282 ****
   *       </table>
   *       </td>
   *     </tr>
   *   </table>
   * </blockquote>
!  * <p>To iterate over contents of UnicodeSet, use UnicodeSetIterator class.
   *
   * @author Alan Liu
   * @stable ICU 2.0
-  * @see UnicodeSetIterator
   */
! @SuppressWarnings("deprecation")
! public class UnicodeSet implements UnicodeMatcher {
  
      private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
      private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
                                               // 110000 for codepoints
  
--- 267,290 ----
   *       </table>
   *       </td>
   *     </tr>
   *   </table>
   * </blockquote>
!  * <p>To iterate over contents of UnicodeSet, the following are available:
!  * <ul><li>{@link #ranges()} to iterate through the ranges</li>
!  * <li>{@link #strings()} to iterate through the strings</li>
!  * <li>{@link #iterator()} to iterate through the entire contents in a single loop.
!  * That method is, however, not particularly efficient, since it "boxes" each code point into a String.
!  * </ul>
!  * All of the above can be used in <b>for</b> loops.
!  * The {@link com.ibm.icu.text.UnicodeSetIterator UnicodeSetIterator} can also be used, but not in <b>for</b> loops.
!  * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
   *
   * @author Alan Liu
   * @stable ICU 2.0
   */
! class UnicodeSet {
  
      private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
      private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
      // 110000 for codepoints
  
*** 297,346 ****
      private int[] rangeList; // internal buffer
      private int[] buffer; // internal buffer
  
      // NOTE: normally the field should be of type SortedSet; but that is missing a public clone!!
      // is not private so that UnicodeSetIterator can get access
!     TreeSet<String> strings = new TreeSet<>();
  
      /**
       * The pattern representation of this set.  This may not be the
       * most economical pattern.  It is the pattern supplied to
       * applyPattern(), with variables substituted and whitespace
       * removed.  For sets constructed without applyPattern(), or
       * modified using the non-pattern API, this string will be null,
       * indicating that toPattern() must generate a pattern
       * representation from the inversion list.
       */
-     private String pat = null;
  
      private static final int START_EXTRA = 16;         // initial storage. Must be >= 0
      private static final int GROW_EXTRA = START_EXTRA; // extra amount for growth. Must be >= 0
  
!     /**
!      * A set of all characters _except_ the second through last characters of
!      * certain ranges.  These ranges are ranges of characters whose
!      * properties are all exactly alike, e.g. CJK Ideographs from
!      * U+4E00 to U+9FA5.
!      */
!     private static UnicodeSet INCLUSIONS[] = null;
  
      //----------------------------------------------------------------
      // Public API
      //----------------------------------------------------------------
  
      /**
       * Constructs an empty set.
       * @stable ICU 2.0
       */
!     public UnicodeSet() {
          list = new int[1 + START_EXTRA];
          list[len++] = HIGH;
      }
  
      /**
!      * Constructs a set containing the given range.
!      * If {@code end > start} then an empty set is created.
       *
       * @param start first character, inclusive, of range
       * @param end last character, inclusive, of range
       * @stable ICU 2.0
       */
--- 305,358 ----
      private int[] rangeList; // internal buffer
      private int[] buffer; // internal buffer
  
      // NOTE: normally the field should be of type SortedSet; but that is missing a public clone!!
      // is not private so that UnicodeSetIterator can get access
!     TreeSet<String> strings = new TreeSet<String>();
  
      /**
       * The pattern representation of this set.  This may not be the
       * most economical pattern.  It is the pattern supplied to
       * applyPattern(), with variables substituted and whitespace
       * removed.  For sets constructed without applyPattern(), or
       * modified using the non-pattern API, this string will be null,
       * indicating that toPattern() must generate a pattern
       * representation from the inversion list.
       */
  
      private static final int START_EXTRA = 16;         // initial storage. Must be >= 0
      private static final int GROW_EXTRA = START_EXTRA; // extra amount for growth. Must be >= 0
  
!     private static UnicodeSet INCLUSION = null;
! 
!     private volatile BMPSet bmpSet; // The set is frozen if bmpSet or stringSpan is not null.
!     private volatile UnicodeSetStringSpan stringSpan;
  
      //----------------------------------------------------------------
      // Public API
      //----------------------------------------------------------------
  
      /**
       * Constructs an empty set.
       * @stable ICU 2.0
       */
!     private UnicodeSet() {
          list = new int[1 + START_EXTRA];
          list[len++] = HIGH;
      }
  
      /**
!      * Constructs a copy of an existing set.
!      * @stable ICU 2.0
!      */
!     private UnicodeSet(UnicodeSet other) {
!         set(other);
!     }
! 
!     /**
!      * Constructs a set containing the given range. If <code>end >
!      * start</code> then an empty set is created.
       *
       * @param start first character, inclusive, of range
       * @param end last character, inclusive, of range
       * @stable ICU 2.0
       */
*** 357,541 ****
       * a syntax error.
       * @stable ICU 2.0
       */
      public UnicodeSet(String pattern) {
          this();
!         applyPattern(pattern, null, null, IGNORE_SPACE);
      }
  
      /**
       * Make this object represent the same set as <code>other</code>.
       * @param other a <code>UnicodeSet</code> whose value will be
       * copied to this object
       * @stable ICU 2.0
       */
-     @SuppressWarnings("unchecked") // Casting result of clone of a collection
      public UnicodeSet set(UnicodeSet other) {
          list = other.list.clone();
          len = other.len;
!         pat = other.pat;
!         strings = (TreeSet)other.strings.clone();
          return this;
      }
  
      /**
!      * Modifies this set to represent the set specified by the given pattern.
!      * See the class description for the syntax of the pattern language.
!      * Whitespace is ignored.
!      * @param pattern a string specifying what characters are in the set
!      * @exception java.lang.IllegalArgumentException if the pattern
!      * contains a syntax error.
       * @stable ICU 2.0
       */
!     public final UnicodeSet applyPattern(String pattern) {
!         return applyPattern(pattern, null, null, IGNORE_SPACE);
!     }
! 
!     /**
!      * Append the <code>toPattern()</code> representation of a
!      * string to the given <code>StringBuffer</code>.
!      */
!     private static void _appendToPat(StringBuffer buf, String s, boolean escapeUnprintable) {
!         for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
!             _appendToPat(buf, UTF16.charAt(s, i), escapeUnprintable);
!         }
!     }
! 
!     /**
!      * Append the <code>toPattern()</code> representation of a
!      * character to the given <code>StringBuffer</code>.
!      */
!     private static void _appendToPat(StringBuffer buf, int c, boolean escapeUnprintable) {
!         if (escapeUnprintable && Utility.isUnprintable(c)) {
!             // Use hex escape notation (<backslash>uxxxx or <backslash>Uxxxxxxxx) for anything
!             // unprintable
!             if (Utility.escapeUnprintable(buf, c)) {
!                 return;
!             }
!         }
!         // Okay to let ':' pass through
!         switch (c) {
!         case '[': // SET_OPEN:
!         case ']': // SET_CLOSE:
!         case '-': // HYPHEN:
!         case '^': // COMPLEMENT:
!         case '&': // INTERSECTION:
!         case '\\': //BACKSLASH:
!         case '{':
!         case '}':
!         case '$':
!         case ':':
!             buf.append('\\');
!             break;
!         default:
!             // Escape whitespace
!             if (UCharacterProperty.isRuleWhiteSpace(c)) {
!                 buf.append('\\');
!             }
!             break;
!         }
!         UTF16.append(buf, c);
!     }
! 
!     /**
!      * Append a string representation of this set to result.  This will be
!      * a cleaned version of the string passed to applyPattern(), if there
!      * is one.  Otherwise it will be generated.
!      */
!     private StringBuffer _toPattern(StringBuffer result,
!                                     boolean escapeUnprintable) {
!         if (pat != null) {
!             int i;
!             int backslashCount = 0;
!             for (i=0; i<pat.length(); ) {
!                 int c = UTF16.charAt(pat, i);
!                 i += UTF16.getCharCount(c);
!                 if (escapeUnprintable && Utility.isUnprintable(c)) {
!                     // If the unprintable character is preceded by an odd
!                     // number of backslashes, then it has been escaped.
!                     // Before unescaping it, we delete the final
!                     // backslash.
!                     if ((backslashCount % 2) == 1) {
!                         result.setLength(result.length() - 1);
!                     }
!                     Utility.escapeUnprintable(result, c);
!                     backslashCount = 0;
!                 } else {
!                     UTF16.append(result, c);
!                     if (c == '\\') {
!                         ++backslashCount;
!                     } else {
!                         backslashCount = 0;
!                     }
!                 }
!             }
!             return result;
!         }
! 
!         return _generatePattern(result, escapeUnprintable, true);
!     }
! 
!     /**
!      * Generate and append a string representation of this set to result.
!      * This does not use this.pat, the cleaned up copy of the string
!      * passed to applyPattern().
!      * @param includeStrings if false, doesn't include the strings.
!      * @stable ICU 3.8
!      */
!     public StringBuffer _generatePattern(StringBuffer result,
!                                          boolean escapeUnprintable, boolean includeStrings) {
!         result.append('[');
! 
          int count = getRangeCount();
- 
-         // If the set contains at least 2 intervals and includes both
-         // MIN_VALUE and MAX_VALUE, then the inverse representation will
-         // be more economical.
-         if (count > 1 &&
-             getRangeStart(0) == MIN_VALUE &&
-             getRangeEnd(count-1) == MAX_VALUE) {
- 
-             // Emit the inverse
-             result.append('^');
- 
-             for (int i = 1; i < count; ++i) {
-                 int start = getRangeEnd(i-1)+1;
-                 int end = getRangeStart(i)-1;
-                 _appendToPat(result, start, escapeUnprintable);
-                 if (start != end) {
-                     if ((start+1) != end) {
-                         result.append('-');
-                     }
-                     _appendToPat(result, end, escapeUnprintable);
-                 }
-             }
-         }
- 
-         // Default; emit the ranges as pairs
-         else {
              for (int i = 0; i < count; ++i) {
!                 int start = getRangeStart(i);
!                 int end = getRangeEnd(i);
!                 _appendToPat(result, start, escapeUnprintable);
!                 if (start != end) {
!                     if ((start+1) != end) {
!                         result.append('-');
!                     }
!                     _appendToPat(result, end, escapeUnprintable);
!                 }
              }
!         }
! 
!         if (includeStrings && strings.size() > 0) {
!             Iterator<String> it = strings.iterator();
!             while (it.hasNext()) {
!                 result.append('{');
!                 _appendToPat(result, it.next(), escapeUnprintable);
!                 result.append('}');
!             }
!         }
!         return result.append(']');
      }
  
      // for internal use, after checkFrozen has been called
      private UnicodeSet add_unchecked(int start, int end) {
          if (start < MIN_VALUE || start > MAX_VALUE) {
--- 369,410 ----
       * a syntax error.
       * @stable ICU 2.0
       */
      public UnicodeSet(String pattern) {
          this();
!         applyPattern(pattern, null);
      }
  
      /**
       * Make this object represent the same set as <code>other</code>.
       * @param other a <code>UnicodeSet</code> whose value will be
       * copied to this object
       * @stable ICU 2.0
       */
      public UnicodeSet set(UnicodeSet other) {
+         checkFrozen();
          list = other.list.clone();
          len = other.len;
!         strings = new TreeSet<String>(other.strings);
          return this;
      }
  
      /**
!      * Returns the number of elements in this set (its cardinality)
!      * Note than the elements of a set may include both individual
!      * codepoints and strings.
!      *
!      * @return the number of elements in this set (its cardinality).
       * @stable ICU 2.0
       */
!     public int size() {
!         int n = 0;
          int count = getRangeCount();
          for (int i = 0; i < count; ++i) {
!             n += getRangeEnd(i) - getRangeStart(i) + 1;
          }
!         return n + strings.size();
      }
  
      // for internal use, after checkFrozen has been called
      private UnicodeSet add_unchecked(int start, int end) {
          if (start < MIN_VALUE || start > MAX_VALUE) {
*** 557,566 ****
--- 426,436 ----
       * present.  If this set already contains the specified character,
       * the call leaves this set unchanged.
       * @stable ICU 2.0
       */
      public final UnicodeSet add(int c) {
+         checkFrozen();
          return add_unchecked(c);
      }
  
      // for internal use only, after checkFrozen has been called
      private final UnicodeSet add_unchecked(int c) {
*** 641,651 ****
              list[i] = c;
              list[i+1] = c+1;
              len += 2;
          }
  
-         pat = null;
          return this;
      }
  
      /**
       * Adds the specified multicharacter to this set if it is not already
--- 511,520 ----
*** 655,681 ****
       * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
       * @param s the source string
       * @return this object, for chaining
       * @stable ICU 2.0
       */
!     public final UnicodeSet add(String s) {
          int cp = getSingleCP(s);
          if (cp < 0) {
!             strings.add(s);
!             pat = null;
          } else {
              add_unchecked(cp, cp);
          }
          return this;
      }
  
      /**
       * @return a code point IF the string consists of a single one.
       * otherwise returns -1.
!      * @param string to test
       */
!     private static int getSingleCP(String s) {
          if (s.length() < 1) {
              throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
          }
          if (s.length() > 2) return -1;
          if (s.length() == 1) return s.charAt(0);
--- 524,552 ----
       * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
       * @param s the source string
       * @return this object, for chaining
       * @stable ICU 2.0
       */
!     public final UnicodeSet add(CharSequence s) {
!         checkFrozen();
          int cp = getSingleCP(s);
          if (cp < 0) {
!             strings.add(s.toString());
          } else {
              add_unchecked(cp, cp);
          }
          return this;
      }
  
      /**
+      * Utility for getting code point from single code point CharSequence.
+      * See the public UTF16.getSingleCodePoint()
       * @return a code point IF the string consists of a single one.
       * otherwise returns -1.
!      * @param s to test
       */
!     private static int getSingleCP(CharSequence s) {
          if (s.length() < 1) {
              throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
          }
          if (s.length() > 2) return -1;
          if (s.length() == 1) return s.charAt(0);
*** 699,737 ****
       * @param end last character, inclusive, of range to be removed
       * from this set.
       * @stable ICU 2.0
       */
      public UnicodeSet complement(int start, int end) {
          if (start < MIN_VALUE || start > MAX_VALUE) {
              throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
          }
          if (end < MIN_VALUE || end > MAX_VALUE) {
              throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
          }
          if (start <= end) {
              xor(range(start, end), 2, 0);
          }
-         pat = null;
-         return this;
-     }
- 
-     /**
-      * This is equivalent to
-      * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
-      * @stable ICU 2.0
-      */
-     public UnicodeSet complement() {
-         if (list[0] == LOW) {
-             System.arraycopy(list, 1, list, 0, len-1);
-             --len;
-         } else {
-             ensureCapacity(len+1);
-             System.arraycopy(list, 0, list, 1, len);
-             list[0] = LOW;
-             ++len;
-         }
-         pat = null;
          return this;
      }
  
      /**
       * Returns true if this set contains the given character.
--- 570,589 ----
       * @param end last character, inclusive, of range to be removed
       * from this set.
       * @stable ICU 2.0
       */
      public UnicodeSet complement(int start, int end) {
+         checkFrozen();
          if (start < MIN_VALUE || start > MAX_VALUE) {
              throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
          }
          if (end < MIN_VALUE || end > MAX_VALUE) {
              throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6));
          }
          if (start <= end) {
              xor(range(start, end), 2, 0);
          }
          return this;
      }
  
      /**
       * Returns true if this set contains the given character.
*** 741,750 ****
--- 593,608 ----
       */
      public boolean contains(int c) {
          if (c < MIN_VALUE || c > MAX_VALUE) {
              throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
          }
+         if (bmpSet != null) {
+             return bmpSet.contains(c);
+         }
+         if (stringSpan != null) {
+             return stringSpan.contains(c);
+         }
  
          /*
          // Set i to the index of the start item greater than ch
          // We know we will terminate without length test!
          int i = -1;
*** 798,863 ****
              }
          }
      }
  
      /**
-      * Adds all of the elements in the specified set to this set if
-      * they're not already present.  This operation effectively
-      * modifies this set so that its value is the <i>union</i> of the two
-      * sets.  The behavior of this operation is unspecified if the specified
-      * collection is modified while the operation is in progress.
-      *
-      * @param c set whose elements are to be added to this set.
-      * @stable ICU 2.0
-      */
-     public UnicodeSet addAll(UnicodeSet c) {
-         add(c.list, c.len, 0);
-         strings.addAll(c.strings);
-         return this;
-     }
- 
-     /**
       * Retains only the elements in this set that are contained in the
       * specified set.  In other words, removes from this set all of
       * its elements that are not contained in the specified set.  This
       * operation effectively modifies this set so that its value is
       * the <i>intersection</i> of the two sets.
       *
       * @param c set that defines which elements this set will retain.
       * @stable ICU 2.0
       */
      public UnicodeSet retainAll(UnicodeSet c) {
          retain(c.list, c.len, 0);
          strings.retainAll(c.strings);
          return this;
      }
  
      /**
-      * Removes from this set all of its elements that are contained in the
-      * specified set.  This operation effectively modifies this
-      * set so that its value is the <i>asymmetric set difference</i> of
-      * the two sets.
-      *
-      * @param c set that defines which elements will be removed from
-      *          this set.
-      * @stable ICU 2.0
-      */
-     public UnicodeSet removeAll(UnicodeSet c) {
-         retain(c.list, c.len, 2);
-         strings.removeAll(c.strings);
-         return this;
-     }
- 
-     /**
       * Removes all of the elements from this set.  This set will be
       * empty after this call returns.
       * @stable ICU 2.0
       */
      public UnicodeSet clear() {
          list[0] = HIGH;
          len = 1;
-         pat = null;
          strings.clear();
          return this;
      }
  
      /**
--- 656,690 ----
              }
          }
      }
  
      /**
       * Retains only the elements in this set that are contained in the
       * specified set.  In other words, removes from this set all of
       * its elements that are not contained in the specified set.  This
       * operation effectively modifies this set so that its value is
       * the <i>intersection</i> of the two sets.
       *
       * @param c set that defines which elements this set will retain.
       * @stable ICU 2.0
       */
      public UnicodeSet retainAll(UnicodeSet c) {
+         checkFrozen();
          retain(c.list, c.len, 0);
          strings.retainAll(c.strings);
          return this;
      }
  
      /**
       * Removes all of the elements from this set.  This set will be
       * empty after this call returns.
       * @stable ICU 2.0
       */
      public UnicodeSet clear() {
+         checkFrozen();
          list[0] = HIGH;
          len = 1;
          strings.clear();
          return this;
      }
  
      /**
*** 921,1329 ****
       * is the last character of the pattern string.
       * @return an inversion list for the parsed substring
       * of <code>pattern</code>
       * @exception java.lang.IllegalArgumentException if the parse fails.
       */
!     UnicodeSet applyPattern(String pattern,
!                       ParsePosition pos,
!                       SymbolTable symbols,
!                       int options) {
! 
!         // Need to build the pattern in a temporary string because
!         // _applyPattern calls add() etc., which set pat to empty.
!         boolean parsePositionWasNull = pos == null;
!         if (parsePositionWasNull) {
!             pos = new ParsePosition(0);
!         }
! 
!         StringBuffer rebuiltPat = new StringBuffer();
!         RuleCharacterIterator chars =
!             new RuleCharacterIterator(pattern, symbols, pos);
!         applyPattern(chars, symbols, rebuiltPat, options);
!         if (chars.inVariable()) {
!             syntaxError(chars, "Extra chars in variable value");
!         }
!         pat = rebuiltPat.toString();
!         if (parsePositionWasNull) {
!             int i = pos.getIndex();
! 
!             // Skip over trailing whitespace
!             if ((options & IGNORE_SPACE) != 0) {
!                 i = Utility.skipWhitespace(pattern, i);
!             }
! 
!             if (i != pattern.length()) {
!                 throw new IllegalArgumentException("Parse of \"" + pattern +
!                                                    "\" failed at " + i);
!             }
!         }
!         return this;
!     }
! 
!     /**
!      * Parse the pattern from the given RuleCharacterIterator.  The
!      * iterator is advanced over the parsed pattern.
!      * @param chars iterator over the pattern characters.  Upon return
!      * it will be advanced to the first character after the parsed
!      * pattern, or the end of the iteration if all characters are
!      * parsed.
!      * @param symbols symbol table to use to parse and dereference
!      * variables, or null if none.
!      * @param rebuiltPat the pattern that was parsed, rebuilt or
!      * copied from the input pattern, as appropriate.
!      * @param options a bit mask of zero or more of the following:
!      * IGNORE_SPACE, CASE.
!      */
!     void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
!                       StringBuffer rebuiltPat, int options) {
!         // Syntax characters: [ ] ^ - & { }
! 
!         // Recognized special forms for chars, sets: c-c s-s s&s
! 
!         int opts = RuleCharacterIterator.PARSE_VARIABLES |
!                    RuleCharacterIterator.PARSE_ESCAPES;
!         if ((options & IGNORE_SPACE) != 0) {
!             opts |= RuleCharacterIterator.SKIP_WHITESPACE;
!         }
! 
!         StringBuffer patBuf = new StringBuffer(), buf = null;
!         boolean usePat = false;
!         UnicodeSet scratch = null;
!         Object backup = null;
! 
!         // mode: 0=before [, 1=between [...], 2=after ]
!         // lastItem: 0=none, 1=char, 2=set
!         int lastItem = 0, lastChar = 0, mode = 0;
!         char op = 0;
! 
!         boolean invert = false;
! 
!         clear();
! 
!         while (mode != 2 && !chars.atEnd()) {
!             if (false) {
!                 // Debugging assertion
!                 if (!((lastItem == 0 && op == 0) ||
!                       (lastItem == 1 && (op == 0 || op == '-')) ||
!                       (lastItem == 2 && (op == 0 || op == '-' || op == '&')))) {
!                     throw new IllegalArgumentException();
!                 }
!             }
! 
!             int c = 0;
!             boolean literal = false;
!             UnicodeSet nested = null;
! 
!             // -------- Check for property pattern
! 
!             // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
!             int setMode = 0;
!             if (resemblesPropertyPattern(chars, opts)) {
!                 setMode = 2;
!             }
! 
!             // -------- Parse '[' of opening delimiter OR nested set.
!             // If there is a nested set, use `setMode' to define how
!             // the set should be parsed.  If the '[' is part of the
!             // opening delimiter for this pattern, parse special
!             // strings "[", "[^", "[-", and "[^-".  Check for stand-in
!             // characters representing a nested set in the symbol
!             // table.
! 
!             else {
!                 // Prepare to backup if necessary
!                 backup = chars.getPos(backup);
!                 c = chars.next(opts);
!                 literal = chars.isEscaped();
! 
!                 if (c == '[' && !literal) {
!                     if (mode == 1) {
!                         chars.setPos(backup); // backup
!                         setMode = 1;
!                     } else {
!                         // Handle opening '[' delimiter
!                         mode = 1;
!                         patBuf.append('[');
!                         backup = chars.getPos(backup); // prepare to backup
!                         c = chars.next(opts);
!                         literal = chars.isEscaped();
!                         if (c == '^' && !literal) {
!                             invert = true;
!                             patBuf.append('^');
!                             backup = chars.getPos(backup); // prepare to backup
!                             c = chars.next(opts);
!                             literal = chars.isEscaped();
!                         }
!                         // Fall through to handle special leading '-';
!                         // otherwise restart loop for nested [], \p{}, etc.
!                         if (c == '-') {
!                             literal = true;
!                             // Fall through to handle literal '-' below
!                         } else {
!                             chars.setPos(backup); // backup
!                             continue;
!                         }
!                     }
!                 } else if (symbols != null) {
!                      UnicodeMatcher m = symbols.lookupMatcher(c); // may be null
!                      if (m != null) {
!                          try {
!                              nested = (UnicodeSet) m;
!                              setMode = 3;
!                          } catch (ClassCastException e) {
!                              syntaxError(chars, "Syntax error");
!                          }
!                      }
!                 }
!             }
! 
!             // -------- Handle a nested set.  This either is inline in
!             // the pattern or represented by a stand-in that has
!             // previously been parsed and was looked up in the symbol
!             // table.
! 
!             if (setMode != 0) {
!                 if (lastItem == 1) {
!                     if (op != 0) {
!                         syntaxError(chars, "Char expected after operator");
!                     }
!                     add_unchecked(lastChar, lastChar);
!                     _appendToPat(patBuf, lastChar, false);
!                     lastItem = op = 0;
!                 }
! 
!                 if (op == '-' || op == '&') {
!                     patBuf.append(op);
!                 }
! 
!                 if (nested == null) {
!                     if (scratch == null) scratch = new UnicodeSet();
!                     nested = scratch;
!                 }
!                 switch (setMode) {
!                 case 1:
!                     nested.applyPattern(chars, symbols, patBuf, options);
!                     break;
!                 case 2:
!                     chars.skipIgnored(opts);
!                     nested.applyPropertyPattern(chars, patBuf, symbols);
!                     break;
!                 case 3: // `nested' already parsed
!                     nested._toPattern(patBuf, false);
!                     break;
!                 }
! 
!                 usePat = true;
! 
!                 if (mode == 0) {
!                     // Entire pattern is a category; leave parse loop
!                     set(nested);
!                     mode = 2;
!                     break;
!                 }
! 
!                 switch (op) {
!                 case '-':
!                     removeAll(nested);
!                     break;
!                 case '&':
!                     retainAll(nested);
!                     break;
!                 case 0:
!                     addAll(nested);
!                     break;
!                 }
! 
!                 op = 0;
!                 lastItem = 2;
! 
!                 continue;
!             }
! 
!             if (mode == 0) {
!                 syntaxError(chars, "Missing '['");
!             }
! 
!             // -------- Parse special (syntax) characters.  If the
!             // current character is not special, or if it is escaped,
!             // then fall through and handle it below.
! 
!             if (!literal) {
!                 switch (c) {
!                 case ']':
!                     if (lastItem == 1) {
!                         add_unchecked(lastChar, lastChar);
!                         _appendToPat(patBuf, lastChar, false);
!                     }
!                     // Treat final trailing '-' as a literal
!                     if (op == '-') {
!                         add_unchecked(op, op);
!                         patBuf.append(op);
!                     } else if (op == '&') {
!                         syntaxError(chars, "Trailing '&'");
!                     }
!                     patBuf.append(']');
!                     mode = 2;
!                     continue;
!                 case '-':
!                     if (op == 0) {
!                         if (lastItem != 0) {
!                             op = (char) c;
!                             continue;
!                         } else {
!                             // Treat final trailing '-' as a literal
!                             add_unchecked(c, c);
!                             c = chars.next(opts);
!                             literal = chars.isEscaped();
!                             if (c == ']' && !literal) {
!                                 patBuf.append("-]");
!                                 mode = 2;
!                                 continue;
!                             }
!                         }
!                     }
!                     syntaxError(chars, "'-' not after char or set");
!                     break;
!                 case '&':
!                     if (lastItem == 2 && op == 0) {
!                         op = (char) c;
!                         continue;
!                     }
!                     syntaxError(chars, "'&' not after set");
!                     break;
!                 case '^':
!                     syntaxError(chars, "'^' not after '['");
!                     break;
!                 case '{':
!                     if (op != 0) {
!                         syntaxError(chars, "Missing operand after operator");
!                     }
!                     if (lastItem == 1) {
!                         add_unchecked(lastChar, lastChar);
!                         _appendToPat(patBuf, lastChar, false);
!                     }
!                     lastItem = 0;
!                     if (buf == null) {
!                         buf = new StringBuffer();
!                     } else {
!                         buf.setLength(0);
!                     }
!                     boolean ok = false;
!                     while (!chars.atEnd()) {
!                         c = chars.next(opts);
!                         literal = chars.isEscaped();
!                         if (c == '}' && !literal) {
!                             ok = true;
!                             break;
!                         }
!                         UTF16.append(buf, c);
!                     }
!                     if (buf.length() < 1 || !ok) {
!                         syntaxError(chars, "Invalid multicharacter string");
!                     }
!                     // We have new string. Add it to set and continue;
!                     // we don't need to drop through to the further
!                     // processing
!                     add(buf.toString());
!                     patBuf.append('{');
!                     _appendToPat(patBuf, buf.toString(), false);
!                     patBuf.append('}');
!                     continue;
!                 case SymbolTable.SYMBOL_REF:
!                     //         symbols  nosymbols
!                     // [a-$]   error    error (ambiguous)
!                     // [a$]    anchor   anchor
!                     // [a-$x]  var "x"* literal '$'
!                     // [a-$.]  error    literal '$'
!                     // *We won't get here in the case of var "x"
!                     backup = chars.getPos(backup);
!                     c = chars.next(opts);
!                     literal = chars.isEscaped();
!                     boolean anchor = (c == ']' && !literal);
!                     if (symbols == null && !anchor) {
!                         c = SymbolTable.SYMBOL_REF;
!                         chars.setPos(backup);
!                         break; // literal '$'
!                     }
!                     if (anchor && op == 0) {
!                         if (lastItem == 1) {
!                             add_unchecked(lastChar, lastChar);
!                             _appendToPat(patBuf, lastChar, false);
!                         }
!                         add_unchecked(UnicodeMatcher.ETHER);
!                         usePat = true;
!                         patBuf.append(SymbolTable.SYMBOL_REF).append(']');
!                         mode = 2;
!                         continue;
!                     }
!                     syntaxError(chars, "Unquoted '$'");
!                     break;
!                 default:
!                     break;
!                 }
!             }
! 
!             // -------- Parse literal characters.  This includes both
!             // escaped chars ("\u4E01") and non-syntax characters
!             // ("a").
! 
!             switch (lastItem) {
!             case 0:
!                 lastItem = 1;
!                 lastChar = c;
!                 break;
!             case 1:
!                 if (op == '-') {
!                     if (lastChar >= c) {
!                         // Don't allow redundant (a-a) or empty (b-a) ranges;
!                         // these are most likely typos.
!                         syntaxError(chars, "Invalid range");
!                     }
!                     add_unchecked(lastChar, c);
!                     _appendToPat(patBuf, lastChar, false);
!                     patBuf.append(op);
!                     _appendToPat(patBuf, c, false);
!                     lastItem = op = 0;
!                 } else {
!                     add_unchecked(lastChar, lastChar);
!                     _appendToPat(patBuf, lastChar, false);
!                     lastChar = c;
!                 }
!                 break;
!             case 2:
!                 if (op != 0) {
!                     syntaxError(chars, "Set expected after operator");
!                 }
!                 lastChar = c;
!                 lastItem = 1;
!                 break;
!             }
!         }
! 
!         if (mode != 2) {
!             syntaxError(chars, "Missing ']'");
!         }
! 
!         chars.skipIgnored(opts);
! 
!         if (invert) {
!             complement();
!         }
! 
!         // Use the rebuilt pattern (pat) only if necessary.  Prefer the
!         // generated pattern.
!         if (usePat) {
!             rebuiltPat.append(patBuf.toString());
          } else {
!             _generatePattern(rebuiltPat, false, true);
!         }
      }
  
!     private static void syntaxError(RuleCharacterIterator chars, String msg) {
!         throw new IllegalArgumentException("Error: " + msg + " at \"" +
!                                            Utility.escape(chars.toString()) +
!                                            '"');
      }
  
      //----------------------------------------------------------------
      // Implementation: Utility methods
      //----------------------------------------------------------------
--- 748,769 ----
       * is the last character of the pattern string.
       * @return an inversion list for the parsed substring
       * of <code>pattern</code>
       * @exception java.lang.IllegalArgumentException if the parse fails.
       */
!     private UnicodeSet applyPattern(String pattern,
!             ParsePosition pos) {
!         if ("[:age=3.2:]".equals(pattern)) {
!             checkFrozen();
!             VersionInfo version = VersionInfo.getInstance("3.2");
!             applyFilter(new VersionFilter(version), UCharacterProperty.SRC_PROPSVEC);
          } else {
!             throw new IllegalStateException("UnicodeSet.applyPattern(unexpected pattern "
!                           + pattern + ")");
          }
  
!         return this;
      }
  
      //----------------------------------------------------------------
      // Implementation: Utility methods
      //----------------------------------------------------------------
*** 1395,1405 ****
          }
          // swap list and buffer
          int[] temp = list;
          list = buffer;
          buffer = temp;
-         pat = null;
          return this;
      }
  
      // polarity = 0 is normal: x union y
      // polarity = 2: x union ~y
--- 835,844 ----
*** 1493,1503 ****
          len = k;
          // swap list and buffer
          int[] temp = list;
          list = buffer;
          buffer = temp;
-         pat = null;
          return this;
      }
  
      // polarity = 0 is normal: x intersect y
      // polarity = 2: x intersect ~y == set-minus
--- 932,941 ----
*** 1564,1574 ****
          len = k;
          // swap list and buffer
          int[] temp = list;
          list = buffer;
          buffer = temp;
-         pat = null;
          return this;
      }
  
      private static final int max(int a, int b) {
          return (a > b) ? a : b;
--- 1002,1011 ----
*** 1580,1641 ****
  
      private static interface Filter {
          boolean contains(int codePoint);
      }
  
!     // VersionInfo for unassigned characters
!     static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
  
      private static class VersionFilter implements Filter {
          VersionInfo version;
- 
          VersionFilter(VersionInfo version) { this.version = version; }
- 
          public boolean contains(int ch) {
              VersionInfo v = UCharacter.getAge(ch);
              // Reference comparison ok; VersionInfo caches and reuses
              // unique objects.
              return v != NO_VERSION &&
                     v.compareTo(version) <= 0;
          }
      }
  
      private static synchronized UnicodeSet getInclusions(int src) {
!         if (INCLUSIONS == null) {
!             INCLUSIONS = new UnicodeSet[UCharacterProperty.SRC_COUNT];
!         }
!         if(INCLUSIONS[src] == null) {
!             UnicodeSet incl = new UnicodeSet();
!             switch(src) {
!             case UCharacterProperty.SRC_PROPSVEC:
!                 UCharacterProperty.getInstance().upropsvec_addPropertyStarts(incl);
!                 break;
!             default:
                  throw new IllegalStateException("UnicodeSet.getInclusions(unknown src "+src+")");
              }
!             INCLUSIONS[src] = incl;
          }
!         return INCLUSIONS[src];
      }
  
      /**
       * Generic filter-based scanning code for UCD property UnicodeSets.
       */
      private UnicodeSet applyFilter(Filter filter, int src) {
!         // Walk through all Unicode characters, noting the start
          // and end of each range for which filter.contain(c) is
          // true.  Add each range to a set.
          //
!         // To improve performance, use the INCLUSIONS set, which
          // encodes information about character ranges that are known
!         // to have identical properties, such as the CJK Ideographs
!         // from U+4E00 to U+9FA5.  INCLUSIONS contains all characters
!         // except the first characters of such ranges.
!         //
!         // TODO Where possible, instead of scanning over code points,
!         // use internal property data to initialize UnicodeSets for
!         // those properties.  Scanning code points is slow.
  
          clear();
  
          int startHasProperty = -1;
          UnicodeSet inclusions = getInclusions(src);
--- 1017,1066 ----
  
      private static interface Filter {
          boolean contains(int codePoint);
      }
  
!     private static final VersionInfo NO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
  
      private static class VersionFilter implements Filter {
          VersionInfo version;
          VersionFilter(VersionInfo version) { this.version = version; }
          public boolean contains(int ch) {
              VersionInfo v = UCharacter.getAge(ch);
              // Reference comparison ok; VersionInfo caches and reuses
              // unique objects.
              return v != NO_VERSION &&
                      v.compareTo(version) <= 0;
          }
      }
  
      private static synchronized UnicodeSet getInclusions(int src) {
!         if (src != UCharacterProperty.SRC_PROPSVEC) {
              throw new IllegalStateException("UnicodeSet.getInclusions(unknown src "+src+")");
          }
! 
!         if (INCLUSION == null) {
!             UnicodeSet incl = new UnicodeSet();
!             UCharacterProperty.INSTANCE.upropsvec_addPropertyStarts(incl);
!             INCLUSION = incl;
          }
!         return INCLUSION;
      }
  
      /**
       * Generic filter-based scanning code for UCD property UnicodeSets.
       */
      private UnicodeSet applyFilter(Filter filter, int src) {
!         // Logically, walk through all Unicode characters, noting the start
          // and end of each range for which filter.contain(c) is
          // true.  Add each range to a set.
          //
!         // To improve performance, use an inclusions set which
          // encodes information about character ranges that are known
!         // to have identical properties.
!         // getInclusions(src) contains exactly the first characters of
!         // same-value ranges for the given properties "source".
  
          clear();
  
          int startHasProperty = -1;
          UnicodeSet inclusions = getInclusions(src);
*** 1666,1871 ****
  
          return this;
      }
  
      /**
!      * Remove leading and trailing rule white space and compress
!      * internal rule white space to a single space character.
       *
!      * @see UCharacterProperty#isRuleWhiteSpace
       */
!     private static String mungeCharName(String source) {
!         StringBuffer buf = new StringBuffer();
!         for (int i=0; i<source.length(); ) {
!             int ch = UTF16.charAt(source, i);
!             i += UTF16.getCharCount(ch);
!             if (UCharacterProperty.isRuleWhiteSpace(ch)) {
!                 if (buf.length() == 0 ||
!                     buf.charAt(buf.length() - 1) == ' ') {
!                     continue;
!                 }
!                 ch = ' '; // convert to ' '
!             }
!             UTF16.append(buf, ch);
!         }
!         if (buf.length() != 0 &&
!             buf.charAt(buf.length() - 1) == ' ') {
!             buf.setLength(buf.length() - 1);
!         }
!         return buf.toString();
      }
  
      /**
!      * Modifies this set to contain those code points which have the
!      * given value for the given property.  Prior contents of this
!      * set are lost.
!      * @param propertyAlias the property alias
!      * @param valueAlias the value alias
!      * @param symbols if not null, then symbols are first called to see if a property
!      * is available. If true, then everything else is skipped.
!      * @return this set
!      * @stable ICU 3.2
!      */
!     public UnicodeSet applyPropertyAlias(String propertyAlias,
!                                          String valueAlias, SymbolTable symbols) {
!         if (valueAlias.length() > 0) {
!             if (propertyAlias.equals("Age")) {
!                 // Must munge name, since
!                 // VersionInfo.getInstance() does not do
!                 // 'loose' matching.
!                 VersionInfo version = VersionInfo.getInstance(mungeCharName(valueAlias));
!                 applyFilter(new VersionFilter(version), UCharacterProperty.SRC_PROPSVEC);
!                 return this;
!             }
          }
-         throw new IllegalArgumentException("Unsupported property: " + propertyAlias);
      }
  
!     /**
!      * Return true if the given iterator appears to point at a
!      * property pattern.  Regardless of the result, return with the
!      * iterator unchanged.
!      * @param chars iterator over the pattern characters.  Upon return
!      * it will be unchanged.
!      * @param iterOpts RuleCharacterIterator options
!      */
!     private static boolean resemblesPropertyPattern(RuleCharacterIterator chars,
!                                                     int iterOpts) {
!         boolean result = false;
!         iterOpts &= ~RuleCharacterIterator.PARSE_ESCAPES;
!         Object pos = chars.getPos(null);
!         int c = chars.next(iterOpts);
!         if (c == '[' || c == '\\') {
!             int d = chars.next(iterOpts & ~RuleCharacterIterator.SKIP_WHITESPACE);
!             result = (c == '[') ? (d == ':') :
!                      (d == 'N' || d == 'p' || d == 'P');
          }
!         chars.setPos(pos);
!         return result;
      }
  
      /**
!      * Parse the given property pattern at the given parse position.
!      * @param symbols TODO
       */
!     private UnicodeSet applyPropertyPattern(String pattern, ParsePosition ppos, SymbolTable symbols) {
!         int pos = ppos.getIndex();
! 
!         // On entry, ppos should point to one of the following locations:
  
!         // Minimum length is 5 characters, e.g. \p{L}
!         if ((pos+5) > pattern.length()) {
!             return null;
!         }
! 
!         boolean posix = false; // true for [:pat:], false for \p{pat} \P{pat} \N{pat}
!         boolean isName = false; // true for \N{pat}, o/w false
!         boolean invert = false;
! 
!         // Look for an opening [:, [:^, \p, or \P
!         if (pattern.regionMatches(pos, "[:", 0, 2)) {
!             posix = true;
!             pos = Utility.skipWhitespace(pattern, pos+2);
!             if (pos < pattern.length() && pattern.charAt(pos) == '^') {
!                 ++pos;
!                 invert = true;
!             }
!         } else if (pattern.regionMatches(true, pos, "\\p", 0, 2) ||
!                    pattern.regionMatches(pos, "\\N", 0, 2)) {
!             char c = pattern.charAt(pos+1);
!             invert = (c == 'P');
!             isName = (c == 'N');
!             pos = Utility.skipWhitespace(pattern, pos+2);
!             if (pos == pattern.length() || pattern.charAt(pos++) != '{') {
!                 // Syntax error; "\p" or "\P" not followed by "{"
!                 return null;
              }
!         } else {
!             // Open delimiter not seen
!             return null;
          }
- 
-         // Look for the matching close delimiter, either :] or }
-         int close = pattern.indexOf(posix ? ":]" : "}", pos);
-         if (close < 0) {
-             // Syntax error; close delimiter missing
-             return null;
-         }
- 
-         // Look for an '=' sign.  If this is present, we will parse a
-         // medium \p{gc=Cf} or long \p{GeneralCategory=Format}
-         // pattern.
-         int equals = pattern.indexOf('=', pos);
-         String propName, valueName;
-         if (equals >= 0 && equals < close && !isName) {
-             // Equals seen; parse medium/long pattern
-             propName = pattern.substring(pos, equals);
-             valueName = pattern.substring(equals+1, close);
          }
  
!         else {
!             // Handle case where no '=' is seen, and \N{}
!             propName = pattern.substring(pos, close);
!             valueName = "";
  
!             // Handle \N{name}
!             if (isName) {
!                 // This is a little inefficient since it means we have to
!                 // parse "na" back to UProperty.NAME even though we already
!                 // know it's UProperty.NAME.  If we refactor the API to
!                 // support args of (int, String) then we can remove
!                 // "na" and make this a little more efficient.
!                 valueName = propName;
!                 propName = "na";
              }
          }
  
!         applyPropertyAlias(propName, valueName, symbols);
! 
!         if (invert) {
!             complement();
          }
  
!         // Move to the limit position after the close delimiter
!         ppos.setIndex(close + (posix ? 2 : 1));
  
!         return this;
      }
  
      /**
!      * Parse a property pattern.
!      * @param chars iterator over the pattern characters.  Upon return
!      * it will be advanced to the first character after the parsed
!      * pattern, or the end of the iteration if all characters are
!      * parsed.
!      * @param rebuiltPat the pattern that was parsed, rebuilt or
!      * copied from the input pattern, as appropriate.
!      * @param symbols TODO
!      */
!     private void applyPropertyPattern(RuleCharacterIterator chars,
!                                       StringBuffer rebuiltPat, SymbolTable symbols) {
!         String patStr = chars.lookahead();
!         ParsePosition pos = new ParsePosition(0);
!         applyPropertyPattern(patStr, pos, symbols);
!         if (pos.getIndex() == 0) {
!             syntaxError(chars, "Invalid property pattern");
          }
-         chars.jumpahead(pos.getIndex());
-         rebuiltPat.append(patStr, 0, pos.getIndex());
      }
  
!     //----------------------------------------------------------------
!     // Case folding API
!     //----------------------------------------------------------------
  
      /**
!      * Bitmask for constructor and applyPattern() indicating that
!      * white space should be ignored.  If set, ignore characters for
!      * which UCharacterProperty.isRuleWhiteSpace() returns true,
!      * unless they are quoted or escaped.  This may be ORed together
!      * with other selectors.
!      * @stable ICU 3.8
       */
!     public static final int IGNORE_SPACE = 1;
  
! }
  
--- 1091,1407 ----
  
          return this;
      }
  
      /**
!      * Is this frozen, according to the Freezable interface?
       *
!      * @return value
!      * @stable ICU 3.8
       */
!     public boolean isFrozen() {
!         return (bmpSet != null || stringSpan != null);
      }
  
      /**
!      * Freeze this class, according to the Freezable interface.
!      *
!      * @return this
!      * @stable ICU 4.4
!      */
!     public UnicodeSet freeze() {
!         if (!isFrozen()) {
!             // Do most of what compact() does before freezing because
!             // compact() will not work when the set is frozen.
!             // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA).
! 
!             // Delete buffer first to defragment memory less.
!             buffer = null;
!             if (list.length > (len + GROW_EXTRA)) {
!                 // Make the capacity equal to len or 1.
!                 // We don't want to realloc of 0 size.
!                 int capacity = (len == 0) ? 1 : len;
!                 int[] oldList = list;
!                 list = new int[capacity];
!                 for (int i = capacity; i-- > 0;) {
!                     list[i] = oldList[i];
                  }
              }
  
!             // Optimize contains() and span() and similar functions.
!             if (!strings.isEmpty()) {
!                 stringSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), UnicodeSetStringSpan.ALL);
!             }
!             if (stringSpan == null || !stringSpan.needsStringSpanUTF16()) {
!                 // Optimize for code point spans.
!                 // There are no strings, or
!                 // all strings are irrelevant for span() etc. because
!                 // all of each string's code points are contained in this set.
!                 // However, fully contained strings are relevant for spanAndCount(),
!                 // so we create both objects.
!                 bmpSet = new BMPSet(list, len);
              }
!         }
!         return this;
      }
  
      /**
!      * Span a string using this UnicodeSet.
!      * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
!      * @param s The string to be spanned
!      * @param spanCondition The span condition
!      * @return the length of the span
!      * @stable ICU 4.4
       */
!     public int span(CharSequence s, SpanCondition spanCondition) {
!         return span(s, 0, spanCondition);
!     }
  
!     /**
!      * Span a string using this UnicodeSet.
!      *   If the start index is less than 0, span will start from 0.
!      *   If the start index is greater than the string length, span returns the string length.
!      * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
!      * @param s The string to be spanned
!      * @param start The start index that the span begins
!      * @param spanCondition The span condition
!      * @return the string index which ends the span (i.e. exclusive)
!      * @stable ICU 4.4
!      */
!     public int span(CharSequence s, int start, SpanCondition spanCondition) {
!         int end = s.length();
!         if (start < 0) {
!             start = 0;
!         } else if (start >= end) {
!             return end;
          }
!         if (bmpSet != null) {
!             // Frozen set without strings, or no string is relevant for span().
!             return bmpSet.span(s, start, spanCondition, null);
!         }
!         if (stringSpan != null) {
!             return stringSpan.span(s, start, spanCondition);
!         } else if (!strings.isEmpty()) {
!             int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED
!                     : UnicodeSetStringSpan.FWD_UTF16_CONTAINED;
!             UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
!             if (strSpan.needsStringSpanUTF16()) {
!                 return strSpan.span(s, start, spanCondition);
              }
          }
  
!         return spanCodePointsAndCount(s, start, spanCondition, null);
!     }
  
!     /**
!      * Same as span() but also counts the smallest number of set elements on any path across the span.
!      * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
!      * @param outCount An output-only object (must not be null) for returning the count.
!      * @return the limit (exclusive end) of the span
!      */
!     public int spanAndCount(CharSequence s, int start, SpanCondition spanCondition, OutputInt outCount) {
!         if (outCount == null) {
!             throw new IllegalArgumentException("outCount must not be null");
          }
+         int end = s.length();
+         if (start < 0) {
+             start = 0;
+         } else if (start >= end) {
+             return end;
+         }
+         if (stringSpan != null) {
+             // We might also have bmpSet != null,
+             // but fully-contained strings are relevant for counting elements.
+             return stringSpan.spanAndCount(s, start, spanCondition, outCount);
+         } else if (bmpSet != null) {
+             return bmpSet.span(s, start, spanCondition, outCount);
+         } else if (!strings.isEmpty()) {
+             int which = spanCondition == SpanCondition.NOT_CONTAINED ? UnicodeSetStringSpan.FWD_UTF16_NOT_CONTAINED
+                     : UnicodeSetStringSpan.FWD_UTF16_CONTAINED;
+             which |= UnicodeSetStringSpan.WITH_COUNT;
+             UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
+             return strSpan.spanAndCount(s, start, spanCondition, outCount);
          }
  
!         return spanCodePointsAndCount(s, start, spanCondition, outCount);
      }
  
!     private int spanCodePointsAndCount(CharSequence s, int start,
!             SpanCondition spanCondition, OutputInt outCount) {
!         // Pin to 0/1 values.
!         boolean spanContained = (spanCondition != SpanCondition.NOT_CONTAINED);
  
!         int c;
!         int next = start;
!         int length = s.length();
!         int count = 0;
!         do {
!             c = Character.codePointAt(s, next);
!             if (spanContained != contains(c)) {
!                 break;
!             }
!             ++count;
!             next += Character.charCount(c);
!         } while (next < length);
!         if (outCount != null) { outCount.value = count; }
!         return next;
      }
  
      /**
!      * Span a string backwards (from the fromIndex) using this UnicodeSet.
!      * If the fromIndex is less than 0, spanBack will return 0.
!      * If fromIndex is greater than the string length, spanBack will start from the string length.
!      * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
!      * @param s The string to be spanned
!      * @param fromIndex The index of the char (exclusive) that the string should be spanned backwards
!      * @param spanCondition The span condition
!      * @return The string index which starts the span (i.e. inclusive).
!      * @stable ICU 4.4
!      */
!     public int spanBack(CharSequence s, int fromIndex, SpanCondition spanCondition) {
!         if (fromIndex <= 0) {
!             return 0;
!         }
!         if (fromIndex > s.length()) {
!             fromIndex = s.length();
!         }
!         if (bmpSet != null) {
!             // Frozen set without strings, or no string is relevant for spanBack().
!             return bmpSet.spanBack(s, fromIndex, spanCondition);
!         }
!         if (stringSpan != null) {
!             return stringSpan.spanBack(s, fromIndex, spanCondition);
!         } else if (!strings.isEmpty()) {
!             int which = (spanCondition == SpanCondition.NOT_CONTAINED)
!                     ? UnicodeSetStringSpan.BACK_UTF16_NOT_CONTAINED
!                             : UnicodeSetStringSpan.BACK_UTF16_CONTAINED;
!             UnicodeSetStringSpan strSpan = new UnicodeSetStringSpan(this, new ArrayList<String>(strings), which);
!             if (strSpan.needsStringSpanUTF16()) {
!                 return strSpan.spanBack(s, fromIndex, spanCondition);
              }
          }
  
!         // Pin to 0/1 values.
!         boolean spanContained = (spanCondition != SpanCondition.NOT_CONTAINED);
! 
!         int c;
!         int prev = fromIndex;
!         do {
!             c = Character.codePointBefore(s, prev);
!             if (spanContained != contains(c)) {
!                 break;
!             }
!             prev -= Character.charCount(c);
!         } while (prev > 0);
!         return prev;
!     }
  
      /**
!      * Clone a thawed version of this class, according to the Freezable interface.
!      * @return the clone, not frozen
!      * @stable ICU 4.4
       */
!     public UnicodeSet cloneAsThawed() {
!         UnicodeSet result = new UnicodeSet(this);
!         assert !result.isFrozen();
!         return result;
!     }
  
!     // internal function
!     private void checkFrozen() {
!         if (isFrozen()) {
!             throw new UnsupportedOperationException("Attempt to modify frozen object");
!         }
!     }
! 
!     /**
!      * Argument values for whether span() and similar functions continue while the current character is contained vs.
!      * not contained in the set.
!      * <p>
!      * The functionality is straightforward for sets with only single code points, without strings (which is the common
!      * case):
!      * <ul>
!      * <li>CONTAINED and SIMPLE work the same.
!      * <li>CONTAINED and SIMPLE are inverses of NOT_CONTAINED.
!      * <li>span() and spanBack() partition any string the
!      * same way when alternating between span(NOT_CONTAINED) and span(either "contained" condition).
!      * <li>Using a
!      * complemented (inverted) set and the opposite span conditions yields the same results.
!      * </ul>
!      * When a set contains multi-code point strings, then these statements may not be true, depending on the strings in
!      * the set (for example, whether they overlap with each other) and the string that is processed. For a set with
!      * strings:
!      * <ul>
!      * <li>The complement of the set contains the opposite set of code points, but the same set of strings.
!      * Therefore, complementing both the set and the span conditions may yield different results.
!      * <li>When starting spans
!      * at different positions in a string (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
!      * because a set string may start before the later position.
!      * <li>span(SIMPLE) may be shorter than
!      * span(CONTAINED) because it will not recursively try all possible paths. For example, with a set which
!      * contains the three strings "xy", "xya" and "ax", span("xyax", CONTAINED) will return 4 but span("xyax",
!      * SIMPLE) will return 3. span(SIMPLE) will never be longer than span(CONTAINED).
!      * <li>With either "contained" condition, span() and spanBack() may partition a string in different ways. For example,
!      * with a set which contains the two strings "ab" and "ba", and when processing the string "aba", span() will yield
!      * contained/not-contained boundaries of { 0, 2, 3 } while spanBack() will yield boundaries of { 0, 1, 3 }.
!      * </ul>
!      * Note: If it is important to get the same boundaries whether iterating forward or backward through a string, then
!      * either only span() should be used and the boundaries cached for backward operation, or an ICU BreakIterator could
!      * be used.
!      * <p>
!      * Note: Unpaired surrogates are treated like surrogate code points. Similarly, set strings match only on code point
!      * boundaries, never in the middle of a surrogate pair.
!      *
!      * @stable ICU 4.4
!      */
!     public enum SpanCondition {
!         /**
!          * Continues a span() while there is no set element at the current position.
!          * Increments by one code point at a time.
!          * Stops before the first set element (character or string).
!          * (For code points only, this is like while contains(current)==false).
!          * <p>
!          * When span() returns, the substring between where it started and the position it returned consists only of
!          * characters that are not in the set, and none of its strings overlap with the span.
!          *
!          * @stable ICU 4.4
!          */
!         NOT_CONTAINED,
! 
!         /**
!          * Spans the longest substring that is a concatenation of set elements (characters or strings).
!          * (For characters only, this is like while contains(current)==true).
!          * <p>
!          * When span() returns, the substring between where it started and the position it returned consists only of set
!          * elements (characters or strings) that are in the set.
!          * <p>
!          * If a set contains strings, then the span will be the longest substring for which there
!          * exists at least one non-overlapping concatenation of set elements (characters or strings).
!          * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
!          * (Java/ICU/Perl regex stops at the first match of an OR.)
!          *
!          * @stable ICU 4.4
!          */
!         CONTAINED,
! 
!         /**
!          * Continues a span() while there is a set element at the current position.
!          * Increments by the longest matching element at each position.
!          * (For characters only, this is like while contains(current)==true).
!          * <p>
!          * When span() returns, the substring between where it started and the position it returned consists only of set
!          * elements (characters or strings) that are in the set.
!          * <p>
!          * If a set only contains single characters, then this is the same as CONTAINED.
!          * <p>
!          * If a set contains strings, then the span will be the longest substring with a match at each position with the
!          * longest single set element (character or string).
!          * <p>
!          * Use this span condition together with other longest-match algorithms, such as ICU converters
!          * (ucnv_getUnicodeSet()).
!          *
!          * @stable ICU 4.4
!          */
!         SIMPLE,
!     }
  
+ }
< prev index next >