< prev index next >

src/java.base/share/classes/jdk/internal/icu/text/UnicodeSet.java

Print this page
rev 57619 : [mq]: 8174270
   1 /*
   2  * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  *******************************************************************************
  28  * Copyright (C) 1996-2015, International Business Machines Corporation and
  29  * others. All Rights Reserved.
  30  *******************************************************************************
  31  */
  32 package sun.text.normalizer;
  33 
  34 import java.io.IOException;
  35 import java.text.ParsePosition;
  36 import java.util.ArrayList;
  37 import java.util.TreeSet;
  38 








  39 /**
  40  * A mutable set of Unicode characters and multicharacter strings.
  41  * Objects of this class represent <em>character classes</em> used
  42  * in regular expressions. A character specifies a subset of Unicode
  43  * code points.  Legal code points are U+0000 to U+10FFFF, inclusive.
  44  *
  45  * Note: method freeze() will not only make the set immutable, but
  46  * also makes important methods much higher performance:
  47  * contains(c), containsNone(...), span(...), spanBack(...) etc.
  48  * After the object is frozen, any subsequent call that wants to change
  49  * the object will throw UnsupportedOperationException.
  50  *
  51  * <p>The UnicodeSet class is not designed to be subclassed.
  52  *
  53  * <p><code>UnicodeSet</code> supports two APIs. The first is the
  54  * <em>operand</em> API that allows the caller to modify the value of
  55  * a <code>UnicodeSet</code> object. It conforms to Java 2's
  56  * <code>java.util.Set</code> interface, although
  57  * <code>UnicodeSet</code> does not actually implement that
  58  * interface. All methods of <code>Set</code> are supported, with the


 265  *           <td valign="top">the literal string between the quotes </td>
 266  *         </tr>
 267  *       </table>
 268  *       </td>
 269  *     </tr>
 270  *   </table>
 271  * </blockquote>
 272  * <p>To iterate over contents of UnicodeSet, the following are available:
 273  * <ul><li>{@link #ranges()} to iterate through the ranges</li>
 274  * <li>{@link #strings()} to iterate through the strings</li>
 275  * <li>{@link #iterator()} to iterate through the entire contents in a single loop.
 276  * That method is, however, not particularly efficient, since it "boxes" each code point into a String.
 277  * </ul>
 278  * All of the above can be used in <b>for</b> loops.
 279  * The {@link com.ibm.icu.text.UnicodeSetIterator UnicodeSetIterator} can also be used, but not in <b>for</b> loops.
 280  * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
 281  *
 282  * @author Alan Liu
 283  * @stable ICU 2.0
 284  */
 285 class UnicodeSet {
 286 
 287     private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
 288     private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
 289     // 110000 for codepoints
 290 
 291     /**
 292      * Minimum value that can be stored in a UnicodeSet.
 293      * @stable ICU 2.0
 294      */
 295     public static final int MIN_VALUE = LOW;
 296 
 297     /**
 298      * Maximum value that can be stored in a UnicodeSet.
 299      * @stable ICU 2.0
 300      */
 301     public static final int MAX_VALUE = HIGH - 1;
 302 
 303     private int len;      // length used; list may be longer to minimize reallocs
 304     private int[] list;   // MUST be terminated with HIGH
 305     private int[] rangeList; // internal buffer


   1 /*
   2  * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  *******************************************************************************
  28  * Copyright (C) 1996-2015, International Business Machines Corporation and
  29  * others. All Rights Reserved.
  30  *******************************************************************************
  31  */
  32 package jdk.internal.icu.text;
  33 

  34 import java.text.ParsePosition;
  35 import java.util.ArrayList;
  36 import java.util.TreeSet;
  37 
  38 import jdk.internal.icu.impl.BMPSet;
  39 import jdk.internal.icu.impl.UCharacterProperty;
  40 import jdk.internal.icu.impl.UnicodeSetStringSpan;
  41 import jdk.internal.icu.impl.Utility;
  42 import jdk.internal.icu.lang.UCharacter;
  43 import jdk.internal.icu.util.OutputInt;
  44 import jdk.internal.icu.util.VersionInfo;
  45 
  46 /**
  47  * A mutable set of Unicode characters and multicharacter strings.
  48  * Objects of this class represent <em>character classes</em> used
  49  * in regular expressions. A character specifies a subset of Unicode
  50  * code points.  Legal code points are U+0000 to U+10FFFF, inclusive.
  51  *
  52  * Note: method freeze() will not only make the set immutable, but
  53  * also makes important methods much higher performance:
  54  * contains(c), containsNone(...), span(...), spanBack(...) etc.
  55  * After the object is frozen, any subsequent call that wants to change
  56  * the object will throw UnsupportedOperationException.
  57  *
  58  * <p>The UnicodeSet class is not designed to be subclassed.
  59  *
  60  * <p><code>UnicodeSet</code> supports two APIs. The first is the
  61  * <em>operand</em> API that allows the caller to modify the value of
  62  * a <code>UnicodeSet</code> object. It conforms to Java 2's
  63  * <code>java.util.Set</code> interface, although
  64  * <code>UnicodeSet</code> does not actually implement that
  65  * interface. All methods of <code>Set</code> are supported, with the


 272  *           <td valign="top">the literal string between the quotes </td>
 273  *         </tr>
 274  *       </table>
 275  *       </td>
 276  *     </tr>
 277  *   </table>
 278  * </blockquote>
 279  * <p>To iterate over contents of UnicodeSet, the following are available:
 280  * <ul><li>{@link #ranges()} to iterate through the ranges</li>
 281  * <li>{@link #strings()} to iterate through the strings</li>
 282  * <li>{@link #iterator()} to iterate through the entire contents in a single loop.
 283  * That method is, however, not particularly efficient, since it "boxes" each code point into a String.
 284  * </ul>
 285  * All of the above can be used in <b>for</b> loops.
 286  * The {@link com.ibm.icu.text.UnicodeSetIterator UnicodeSetIterator} can also be used, but not in <b>for</b> loops.
 287  * <p>To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}.
 288  *
 289  * @author Alan Liu
 290  * @stable ICU 2.0
 291  */
 292 public class UnicodeSet {
 293 
 294     private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
 295     private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
 296     // 110000 for codepoints
 297 
 298     /**
 299      * Minimum value that can be stored in a UnicodeSet.
 300      * @stable ICU 2.0
 301      */
 302     public static final int MIN_VALUE = LOW;
 303 
 304     /**
 305      * Maximum value that can be stored in a UnicodeSet.
 306      * @stable ICU 2.0
 307      */
 308     public static final int MAX_VALUE = HIGH - 1;
 309 
 310     private int len;      // length used; list may be longer to minimize reallocs
 311     private int[] list;   // MUST be terminated with HIGH
 312     private int[] rangeList; // internal buffer


< prev index next >