1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * 29 * * 30 * The original version of this source code and documentation is copyrighted * 31 * and owned by IBM, These materials are provided under terms of a License * 32 * Agreement between IBM and Sun. This technology is protected by multiple * 33 * US and International patents. This notice and attribution to IBM may not * 34 * to removed. * 35 ******************************************************************************* 36 */ 37 38 package sun.text.normalizer; 39 40 import java.text.CharacterIterator; 41 42 /** 43 * Abstract class that defines an API for iteration on text objects.This is an 44 * interface for forward and backward iteration and random access into a text 45 * object. Forward iteration is done with post-increment and backward iteration 46 * is done with pre-decrement semantics, while the 47 * <code>java.text.CharacterIterator</code> interface methods provided forward 48 * iteration with "pre-increment" and backward iteration with pre-decrement 49 * semantics. This API is more efficient for forward iteration over code points. 50 * The other major difference is that this API can do both code unit and code point 51 * iteration, <code>java.text.CharacterIterator</code> can only iterate over 52 * code units and is limited to BMP (0 - 0xFFFF) 53 * @author Ram 54 * @stable ICU 2.4 55 */ 56 public abstract class UCharacterIterator 57 implements Cloneable { 58 59 /** 60 * Protected default constructor for the subclasses 61 * @stable ICU 2.4 62 */ 63 protected UCharacterIterator(){ 64 } 65 66 /** 67 * Indicator that we have reached the ends of the UTF16 text. 68 * Moved from UForwardCharacterIterator.java 69 * @stable ICU 2.4 70 */ 71 public static final int DONE = -1; 72 73 // static final methods ---------------------------------------------------- 74 75 /** 76 * Returns a <code>UCharacterIterator</code> object given a 77 * source string. 78 * @param source a string 79 * @return UCharacterIterator object 80 * @exception IllegalArgumentException if the argument is null 81 * @stable ICU 2.4 82 */ 83 public static final UCharacterIterator getInstance(String source){ 84 return new ReplaceableUCharacterIterator(source); 85 } 86 87 //// for StringPrep 88 /** 89 * Returns a <code>UCharacterIterator</code> object given a 90 * source StringBuffer. 91 * @param source an string buffer of UTF-16 code units 92 * @return UCharacterIterator object 93 * @exception IllegalArgumentException if the argument is null 94 * @stable ICU 2.4 95 */ 96 public static final UCharacterIterator getInstance(StringBuffer source){ 97 return new ReplaceableUCharacterIterator(source); 98 } 99 100 /** 101 * Returns a <code>UCharacterIterator</code> object given a 102 * CharacterIterator. 103 * @param source a valid CharacterIterator object. 104 * @return UCharacterIterator object 105 * @exception IllegalArgumentException if the argument is null 106 * @stable ICU 2.4 107 */ 108 public static final UCharacterIterator getInstance(CharacterIterator source){ 109 return new CharacterIteratorWrapper(source); 110 } 111 112 // public methods ---------------------------------------------------------- 113 114 /** 115 * Returns the code unit at the current index. If index is out 116 * of range, returns DONE. Index is not changed. 117 * @return current code unit 118 * @stable ICU 2.4 119 */ 120 public abstract int current(); 121 122 /** 123 * Returns the length of the text 124 * @return length of the text 125 * @stable ICU 2.4 126 */ 127 public abstract int getLength(); 128 129 130 /** 131 * Gets the current index in text. 132 * @return current index in text. 133 * @stable ICU 2.4 134 */ 135 public abstract int getIndex(); 136 137 138 /** 139 * Returns the UTF16 code unit at index, and increments to the next 140 * code unit (post-increment semantics). If index is out of 141 * range, DONE is returned, and the iterator is reset to the limit 142 * of the text. 143 * @return the next UTF16 code unit, or DONE if the index is at the limit 144 * of the text. 145 * @stable ICU 2.4 146 */ 147 public abstract int next(); 148 149 /** 150 * Returns the code point at index, and increments to the next code 151 * point (post-increment semantics). If index does not point to a 152 * valid surrogate pair, the behavior is the same as 153 * <code>next()</code>. Otherwise the iterator is incremented past 154 * the surrogate pair, and the code point represented by the pair 155 * is returned. 156 * @return the next codepoint in text, or DONE if the index is at 157 * the limit of the text. 158 * @stable ICU 2.4 159 */ 160 public int nextCodePoint(){ 161 int ch1 = next(); 162 if(UTF16.isLeadSurrogate((char)ch1)){ 163 int ch2 = next(); 164 if(UTF16.isTrailSurrogate((char)ch2)){ 165 return UCharacterProperty.getRawSupplementary((char)ch1, 166 (char)ch2); 167 }else if (ch2 != DONE) { 168 // unmatched surrogate so back out 169 previous(); 170 } 171 } 172 return ch1; 173 } 174 175 /** 176 * Decrement to the position of the previous code unit in the 177 * text, and return it (pre-decrement semantics). If the 178 * resulting index is less than 0, the index is reset to 0 and 179 * DONE is returned. 180 * @return the previous code unit in the text, or DONE if the new 181 * index is before the start of the text. 182 * @stable ICU 2.4 183 */ 184 public abstract int previous(); 185 186 /** 187 * Sets the index to the specified index in the text. 188 * @param index the index within the text. 189 * @exception IndexOutOfBoundsException is thrown if an invalid index is 190 * supplied 191 * @stable ICU 2.4 192 */ 193 public abstract void setIndex(int index); 194 195 //// for StringPrep 196 /** 197 * Fills the buffer with the underlying text storage of the iterator 198 * If the buffer capacity is not enough a exception is thrown. The capacity 199 * of the fill in buffer should at least be equal to length of text in the 200 * iterator obtained by calling <code>getLength()</code>. 201 * <b>Usage:</b> 202 * 203 * <pre>{@code 204 * UChacterIterator iter = new UCharacterIterator.getInstance(text); 205 * char[] buf = new char[iter.getLength()]; 206 * iter.getText(buf); 207 * 208 * OR 209 * char[] buf= new char[1]; 210 * int len = 0; 211 * for(;;){ 212 * try{ 213 * len = iter.getText(buf); 214 * break; 215 * }catch(IndexOutOfBoundsException e){ 216 * buf = new char[iter.getLength()]; 217 * } 218 * } 219 * }</pre> 220 * 221 * @param fillIn an array of chars to fill with the underlying UTF-16 code 222 * units. 223 * @param offset the position within the array to start putting the data. 224 * @return the number of code units added to fillIn, as a convenience 225 * @exception IndexOutOfBounds exception if there is not enough 226 * room after offset in the array, or if offset {@literal <} 0. 227 * @stable ICU 2.4 228 */ 229 public abstract int getText(char[] fillIn, int offset); 230 231 //// for StringPrep 232 /** 233 * Convenience override for <code>getText(char[], int)</code> that provides 234 * an offset of 0. 235 * @param fillIn an array of chars to fill with the underlying UTF-16 code 236 * units. 237 * @return the number of code units added to fillIn, as a convenience 238 * @exception IndexOutOfBounds exception if there is not enough 239 * room in the array. 240 * @stable ICU 2.4 241 */ 242 public final int getText(char[] fillIn) { 243 return getText(fillIn, 0); 244 } 245 246 //// for StringPrep 247 /** 248 * Convenience method for returning the underlying text storage as a string 249 * @return the underlying text storage in the iterator as a string 250 * @stable ICU 2.4 251 */ 252 public String getText() { 253 char[] text = new char[getLength()]; 254 getText(text); 255 return new String(text); 256 } 257 258 /** 259 * Moves the current position by the number of code units 260 * specified, either forward or backward depending on the sign 261 * of delta (positive or negative respectively). If the resulting 262 * index would be less than zero, the index is set to zero, and if 263 * the resulting index would be greater than limit, the index is 264 * set to limit. 265 * 266 * @param delta the number of code units to move the current 267 * index. 268 * @return the new index. 269 * @exception IndexOutOfBoundsException is thrown if an invalid index is 270 * supplied 271 * @stable ICU 2.4 272 * 273 */ 274 public int moveIndex(int delta) { 275 int x = Math.max(0, Math.min(getIndex() + delta, getLength())); 276 setIndex(x); 277 return x; 278 } 279 280 /** 281 * Creates a copy of this iterator, independent from other iterators. 282 * If it is not possible to clone the iterator, returns null. 283 * @return copy of this iterator 284 * @stable ICU 2.4 285 */ 286 public Object clone() throws CloneNotSupportedException{ 287 return super.clone(); 288 } 289 290 }