1 /* 2 * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 /* 26 ******************************************************************************* 27 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * 28 * * 29 * The original version of this source code and documentation is copyrighted * 30 * and owned by IBM, These materials are provided under terms of a License * 31 * Agreement between IBM and Sun. This technology is protected by multiple * 32 * US and International patents. This notice and attribution to IBM may not * 33 * to removed. * 34 ******************************************************************************* 35 */ 36 37 package sun.text.normalizer; 38 39 import java.util.Iterator; 40 41 /** 42 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It 43 * iterates over either code points or code point ranges. After all 44 * code points or ranges have been returned, it returns the 45 * multicharacter strings of the UnicodSet, if any. 46 * 47 * <p>To iterate over code points, use a loop like this: 48 * <pre> 49 * UnicodeSetIterator it(set); 50 * while (set.next()) { 51 * if (set.codepoint != UnicodeSetIterator::IS_STRING) { 52 * processCodepoint(set.codepoint); 53 * } else { 54 * processString(set.string); 55 * } 56 * } 57 * </pre> 58 * 59 * <p>To iterate over code point ranges, use a loop like this: 60 * <pre> 61 * UnicodeSetIterator it(set); 62 * while (set.nextRange()) { 63 * if (set.codepoint != UnicodeSetIterator::IS_STRING) { 64 * processCodepointRange(set.codepoint, set.codepointEnd); 65 * } else { 66 * processString(set.string); 67 * } 68 * } 69 * </pre> 70 * @author M. Davis 71 * @stable ICU 2.0 72 */ 73 public class UnicodeSetIterator { 74 75 /** 76 * Value of <tt>codepoint</tt> if the iterator points to a string. 77 * If <tt>codepoint == IS_STRING</tt>, then examine 78 * <tt>string</tt> for the current iteration result. 79 * @stable ICU 2.0 80 */ 81 public static int IS_STRING = -1; 82 83 /** 84 * Current code point, or the special value <tt>IS_STRING</tt>, if 85 * the iterator points to a string. 86 * @stable ICU 2.0 87 */ 88 public int codepoint; 89 90 /** 91 * When iterating over ranges using <tt>nextRange()</tt>, 92 * <tt>codepointEnd</tt> contains the inclusive end of the 93 * iteration range, if <tt>codepoint != IS_STRING</tt>. If 94 * iterating over code points using <tt>next()</tt>, or if 95 * <tt>codepoint == IS_STRING</tt>, then the value of 96 * <tt>codepointEnd</tt> is undefined. 97 * @stable ICU 2.0 98 */ 99 public int codepointEnd; 100 101 /** 102 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points 103 * to the current string. If <tt>codepoint != IS_STRING</tt>, the 104 * value of <tt>string</tt> is undefined. 105 * @stable ICU 2.0 106 */ 107 public String string; 108 109 /** 110 * Create an iterator over the given set. 111 * @param set set to iterate over 112 * @stable ICU 2.0 113 */ 114 public UnicodeSetIterator(UnicodeSet set) { 115 reset(set); 116 } 117 118 /** 119 * Returns the next element in the set, either a code point range 120 * or a string. If there are no more elements in the set, return 121 * false. If <tt>codepoint == IS_STRING</tt>, the value is a 122 * string in the <tt>string</tt> field. Otherwise the value is a 123 * range of one or more code points from <tt>codepoint</tt> to 124 * <tt>codepointeEnd</tt> inclusive. 125 * 126 * <p>The order of iteration is all code points ranges in sorted 127 * order, followed by all strings sorted order. Ranges are 128 * disjoint and non-contiguous. <tt>string</tt> is undefined 129 * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to 130 * <tt>next()</tt> and <tt>nextRange()</tt> without calling 131 * <tt>reset()</tt> between them. The results of doing so are 132 * undefined. 133 * 134 * @return true if there was another element in the set and this 135 * object contains the element. 136 * @stable ICU 2.0 137 */ 138 public boolean nextRange() { 139 if (nextElement <= endElement) { 140 codepointEnd = endElement; 141 codepoint = nextElement; 142 nextElement = endElement+1; 143 return true; 144 } 145 if (range < endRange) { 146 loadRange(++range); 147 codepointEnd = endElement; 148 codepoint = nextElement; 149 nextElement = endElement+1; 150 return true; 151 } 152 153 // stringIterator == null iff there are no string elements remaining 154 155 if (stringIterator == null) return false; 156 codepoint = IS_STRING; // signal that value is actually a string 157 string = stringIterator.next(); 158 if (!stringIterator.hasNext()) stringIterator = null; 159 return true; 160 } 161 162 /** 163 * Sets this iterator to visit the elements of the given set and 164 * resets it to the start of that set. The iterator is valid only 165 * so long as <tt>set</tt> is valid. 166 * @param set the set to iterate over. 167 * @stable ICU 2.0 168 */ 169 public void reset(UnicodeSet uset) { 170 set = uset; 171 reset(); 172 } 173 174 /** 175 * Resets this iterator to the start of the set. 176 * @stable ICU 2.0 177 */ 178 public void reset() { 179 endRange = set.getRangeCount() - 1; 180 range = 0; 181 endElement = -1; 182 nextElement = 0; 183 if (endRange >= 0) { 184 loadRange(range); 185 } 186 stringIterator = null; 187 if (set.strings != null) { 188 stringIterator = set.strings.iterator(); 189 if (!stringIterator.hasNext()) stringIterator = null; 190 } 191 } 192 193 // ======================= PRIVATES =========================== 194 195 private UnicodeSet set; 196 private int endRange = 0; 197 private int range = 0; 198 /** 199 * @internal 200 */ 201 protected int endElement; 202 /** 203 * @internal 204 */ 205 protected int nextElement; 206 private Iterator<String> stringIterator = null; 207 208 /** 209 * Invariant: stringIterator is null when there are no (more) strings remaining 210 */ 211 212 /** 213 * @internal 214 */ 215 protected void loadRange(int aRange) { 216 nextElement = set.getRangeStart(aRange); 217 endElement = set.getRangeEnd(aRange); 218 } 219 }