1 /*
   2  * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 /*
  26  *******************************************************************************
  27  * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
  28  *                                                                             *
  29  * The original version of this source code and documentation is copyrighted   *
  30  * and owned by IBM, These materials are provided under terms of a License     *
  31  * Agreement between IBM and Sun. This technology is protected by multiple     *
  32  * US and International patents. This notice and attribution to IBM may not    *
  33  * to removed.                                                                 *
  34  *******************************************************************************
  35  */
  36 
  37 package sun.text.normalizer;
  38 
  39 import java.util.Iterator;
  40 
  41 /**
  42  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
  43  * iterates over either code points or code point ranges.  After all
  44  * code points or ranges have been returned, it returns the
  45  * multicharacter strings of the UnicodSet, if any.
  46  *
  47  * <p>To iterate over code points, use a loop like this:
  48  * <pre>
  49  * UnicodeSetIterator it(set);
  50  * while (set.next()) {
  51  *   if (set.codepoint != UnicodeSetIterator::IS_STRING) {
  52  *     processCodepoint(set.codepoint);
  53  *   } else {
  54  *     processString(set.string);
  55  *   }
  56  * }
  57  * </pre>
  58  *
  59  * <p>To iterate over code point ranges, use a loop like this:
  60  * <pre>
  61  * UnicodeSetIterator it(set);
  62  * while (set.nextRange()) {
  63  *   if (set.codepoint != UnicodeSetIterator::IS_STRING) {
  64  *     processCodepointRange(set.codepoint, set.codepointEnd);
  65  *   } else {
  66  *     processString(set.string);
  67  *   }
  68  * }
  69  * </pre>
  70  * @author M. Davis
  71  * @stable ICU 2.0
  72  */
  73 public class UnicodeSetIterator {
  74 
  75     /**
  76      * Value of {@code codepoint} if the iterator points to a string.
  77      * If {@code codepoint == IS_STRING}, then examine
  78      * {@code string} for the current iteration result.
  79      * @stable ICU 2.0
  80      */
  81     public static int IS_STRING = -1;
  82 
  83     /**
  84      * Current code point, or the special value {@code IS_STRING}, if
  85      * the iterator points to a string.
  86      * @stable ICU 2.0
  87      */
  88     public int codepoint;
  89 
  90     /**
  91      * When iterating over ranges using {@code nextRange()},
  92      * {@code codepointEnd} contains the inclusive end of the
  93      * iteration range, if {@code codepoint != IS_STRING}.  If
  94      * iterating over code points using {@code next()}, or if
  95      * {@code codepoint == IS_STRING}, then the value of
  96      * {@code codepointEnd} is undefined.
  97      * @stable ICU 2.0
  98      */
  99     public int codepointEnd;
 100 
 101     /**
 102      * If {@code codepoint == IS_STRING}, then {@code string} points
 103      * to the current string.  If {@code codepoint != IS_STRING}, the
 104      * value of {@code string} is undefined.
 105      * @stable ICU 2.0
 106      */
 107     public String string;
 108 
 109     /**
 110      * Create an iterator over the given set.
 111      * @param set set to iterate over
 112      * @stable ICU 2.0
 113      */
 114     public UnicodeSetIterator(UnicodeSet set) {
 115         reset(set);
 116     }
 117 
 118     /**
 119      * Returns the next element in the set, either a code point range
 120      * or a string.  If there are no more elements in the set, return
 121      * false.  If {@code codepoint == IS_STRING}, the value is a
 122      * string in the {@code string} field.  Otherwise the value is a
 123      * range of one or more code points from {@code codepoint} to
 124      * {@code codepointeEnd} inclusive.
 125      *
 126      * <p>The order of iteration is all code points ranges in sorted
 127      * order, followed by all strings sorted order.  Ranges are
 128      * disjoint and non-contiguous.  {@code string} is undefined
 129      * unless {@code codepoint == IS_STRING}.  Do not mix calls to
 130      * {@code next()} and {@code nextRange()} without calling
 131      * {@code reset()} between them.  The results of doing so are
 132      * undefined.
 133      *
 134      * @return true if there was another element in the set and this
 135      * object contains the element.
 136      * @stable ICU 2.0
 137      */
 138     public boolean nextRange() {
 139         if (nextElement <= endElement) {
 140             codepointEnd = endElement;
 141             codepoint = nextElement;
 142             nextElement = endElement+1;
 143             return true;
 144         }
 145         if (range < endRange) {
 146             loadRange(++range);
 147             codepointEnd = endElement;
 148             codepoint = nextElement;
 149             nextElement = endElement+1;
 150             return true;
 151         }
 152 
 153         // stringIterator == null iff there are no string elements remaining
 154 
 155         if (stringIterator == null) return false;
 156         codepoint = IS_STRING; // signal that value is actually a string
 157         string = stringIterator.next();
 158         if (!stringIterator.hasNext()) stringIterator = null;
 159         return true;
 160     }
 161 
 162     /**
 163      * Sets this iterator to visit the elements of the given set and
 164      * resets it to the start of that set.  The iterator is valid only
 165      * so long as {@code set} is valid.
 166      * @param uset the set to iterate over.
 167      * @stable ICU 2.0
 168      */
 169     public void reset(UnicodeSet uset) {
 170         set = uset;
 171         reset();
 172     }
 173 
 174     /**
 175      * Resets this iterator to the start of the set.
 176      * @stable ICU 2.0
 177      */
 178     public void reset() {
 179         endRange = set.getRangeCount() - 1;
 180         range = 0;
 181         endElement = -1;
 182         nextElement = 0;
 183         if (endRange >= 0) {
 184             loadRange(range);
 185         }
 186         stringIterator = null;
 187         if (set.strings != null) {
 188             stringIterator = set.strings.iterator();
 189             if (!stringIterator.hasNext()) stringIterator = null;
 190         }
 191     }
 192 
 193     // ======================= PRIVATES ===========================
 194 
 195     private UnicodeSet set;
 196     private int endRange = 0;
 197     private int range = 0;
 198     /**
 199      * @internal
 200      */
 201     protected int endElement;
 202     /**
 203      * @internal
 204      */
 205     protected int nextElement;
 206     private Iterator<String> stringIterator = null;
 207 
 208     /**
 209      * Invariant: stringIterator is null when there are no (more) strings remaining
 210      */
 211 
 212     /**
 213      * @internal
 214      */
 215     protected void loadRange(int aRange) {
 216         nextElement = set.getRangeStart(aRange);
 217         endElement = set.getRangeEnd(aRange);
 218     }
 219 }