1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * Copyright (C) 1996-2014, International Business Machines Corporation and * 29 * others. All Rights Reserved. * 30 ******************************************************************************* 31 */ 32 33 package sun.text.normalizer; 34 35 import java.text.CharacterIterator; 36 37 /** 38 * Abstract class that defines an API for iteration on text objects.This is an 39 * interface for forward and backward iteration and random access into a text 40 * object. Forward iteration is done with post-increment and backward iteration 41 * is done with pre-decrement semantics, while the 42 * <code>java.text.CharacterIterator</code> interface methods provided forward 43 * iteration with "pre-increment" and backward iteration with pre-decrement 44 * semantics. This API is more efficient for forward iteration over code points. 45 * The other major difference is that this API can do both code unit and code point 46 * iteration, <code>java.text.CharacterIterator</code> can only iterate over 47 * code units and is limited to BMP (0 - 0xFFFF) 48 * @author Ram 49 * @stable ICU 2.4 50 */ 51 public abstract class UCharacterIterator 52 implements Cloneable { 53 54 /** 55 * Protected default constructor for the subclasses 56 * @stable ICU 2.4 57 */ 58 protected UCharacterIterator(){ 59 } 60 61 /** 62 * Indicator that we have reached the ends of the UTF16 text. 63 * Moved from UForwardCharacterIterator.java 64 * @stable ICU 2.4 65 */ 66 public static final int DONE = -1; 67 68 // static final methods ---------------------------------------------------- 69 70 /** 71 * Returns a <code>UCharacterIterator</code> object given a 72 * source string. 73 * @param source a string 74 * @return UCharacterIterator object 75 * @exception IllegalArgumentException if the argument is null 76 * @stable ICU 2.4 77 */ 78 public static final UCharacterIterator getInstance(String source){ 79 return new ReplaceableUCharacterIterator(source); 80 } 81 82 /** 83 * Returns a <code>UCharacterIterator</code> object given a 84 * source StringBuffer. 85 * @param source an string buffer of UTF-16 code units 86 * @return UCharacterIterator object 87 * @exception IllegalArgumentException if the argument is null 88 * @stable ICU 2.4 89 */ 90 public static final UCharacterIterator getInstance(StringBuffer source){ 91 return new ReplaceableUCharacterIterator(source); 92 } 93 94 /** 95 * Returns a <code>UCharacterIterator</code> object given a 96 * CharacterIterator. 97 * @param source a valid CharacterIterator object. 98 * @return UCharacterIterator object 99 * @exception IllegalArgumentException if the argument is null 100 * @stable ICU 2.4 101 */ 102 public static final UCharacterIterator getInstance(CharacterIterator source){ 103 return new CharacterIteratorWrapper(source); 104 } 105 106 // public methods ---------------------------------------------------------- 107 108 /** 109 * Returns the length of the text 110 * @return length of the text 111 * @stable ICU 2.4 112 */ 113 public abstract int getLength(); 114 115 /** 116 * Gets the current index in text. 117 * @return current index in text. 118 * @stable ICU 2.4 119 */ 120 public abstract int getIndex(); 121 122 /** 123 * Returns the UTF16 code unit at index, and increments to the next 124 * code unit (post-increment semantics). If index is out of 125 * range, DONE is returned, and the iterator is reset to the limit 126 * of the text. 127 * @return the next UTF16 code unit, or DONE if the index is at the limit 128 * of the text. 129 * @stable ICU 2.4 130 */ 131 public abstract int next(); 132 133 /** 134 * Returns the code point at index, and increments to the next code 135 * point (post-increment semantics). If index does not point to a 136 * valid surrogate pair, the behavior is the same as 137 * <code>next()</code>. Otherwise the iterator is incremented past 138 * the surrogate pair, and the code point represented by the pair 139 * is returned. 140 * @return the next codepoint in text, or DONE if the index is at 141 * the limit of the text. 142 * @stable ICU 2.4 143 */ 144 public int nextCodePoint(){ 145 int ch1 = next(); 146 if(UTF16.isLeadSurrogate((char)ch1)){ 147 int ch2 = next(); 148 if(UTF16.isTrailSurrogate((char)ch2)){ 149 return UCharacterProperty.getRawSupplementary((char)ch1, 150 (char)ch2); 151 }else if (ch2 != DONE) { 152 // unmatched surrogate so back out 153 previous(); 154 } 155 } 156 return ch1; 157 } 158 159 /** 160 * Decrement to the position of the previous code unit in the 161 * text, and return it (pre-decrement semantics). If the 162 * resulting index is less than 0, the index is reset to 0 and 163 * DONE is returned. 164 * @return the previous code unit in the text, or DONE if the new 165 * index is before the start of the text. 166 * @stable ICU 2.4 167 */ 168 public abstract int previous(); 169 170 171 /** 172 * Retreat to the start of the previous code point in the text, 173 * and return it (pre-decrement semantics). If the index is not 174 * preceeded by a valid surrogate pair, the behavior is the same 175 * as <code>previous()</code>. Otherwise the iterator is 176 * decremented to the start of the surrogate pair, and the code 177 * point represented by the pair is returned. 178 * @return the previous code point in the text, or DONE if the new 179 * index is before the start of the text. 180 * @stable ICU 2.4 181 */ 182 public int previousCodePoint(){ 183 int ch1 = previous(); 184 if(UTF16.isTrailSurrogate((char)ch1)){ 185 int ch2 = previous(); 186 if(UTF16.isLeadSurrogate((char)ch2)){ 187 return UCharacterProperty.getRawSupplementary((char)ch2, 188 (char)ch1); 189 }else if (ch2 != DONE) { 190 //unmatched trail surrogate so back out 191 next(); 192 } 193 } 194 return ch1; 195 } 196 197 /** 198 * Sets the index to the specified index in the text. 199 * @param index the index within the text. 200 * @exception IndexOutOfBoundsException is thrown if an invalid index is 201 * supplied 202 * @stable ICU 2.4 203 */ 204 public abstract void setIndex(int index); 205 206 /** 207 * Sets the current index to the start. 208 * @stable ICU 2.4 209 */ 210 public void setToStart() { 211 setIndex(0); 212 } 213 214 /** 215 * Fills the buffer with the underlying text storage of the iterator 216 * If the buffer capacity is not enough a exception is thrown. The capacity 217 * of the fill in buffer should at least be equal to length of text in the 218 * iterator obtained by calling <code>getLength()</code>. 219 * <b>Usage:</b> 220 * 221 * <pre>{@code 222 * UChacterIterator iter = new UCharacterIterator.getInstance(text); 223 * char[] buf = new char[iter.getLength()]; 224 * iter.getText(buf); 225 * 226 * OR 227 * char[] buf= new char[1]; 228 * int len = 0; 229 * for(;;){ 230 * try{ 231 * len = iter.getText(buf); 232 * break; 233 * }catch(IndexOutOfBoundsException e){ 234 * buf = new char[iter.getLength()]; 235 * } 236 * } 237 * }</pre> 238 * 239 * @param fillIn an array of chars to fill with the underlying UTF-16 code 240 * units. 241 * @param offset the position within the array to start putting the data. 242 * @return the number of code units added to fillIn, as a convenience 243 * @exception IndexOutOfBoundsException exception if there is not enough 244 * room after offset in the array, or if offset < 0. 245 * @stable ICU 2.4 246 */ 247 public abstract int getText(char[] fillIn, int offset); 248 249 /** 250 * Convenience override for <code>getText(char[], int)</code> that provides 251 * an offset of 0. 252 * @param fillIn an array of chars to fill with the underlying UTF-16 code 253 * units. 254 * @return the number of code units added to fillIn, as a convenience 255 * @exception IndexOutOfBoundsException exception if there is not enough 256 * room in the array. 257 * @stable ICU 2.4 258 */ 259 public final int getText(char[] fillIn) { 260 return getText(fillIn, 0); 261 } 262 263 /** 264 * Convenience method for returning the underlying text storage as a string 265 * @return the underlying text storage in the iterator as a string 266 * @stable ICU 2.4 267 */ 268 public String getText() { 269 char[] text = new char[getLength()]; 270 getText(text); 271 return new String(text); 272 } 273 274 /** 275 * Moves the current position by the number of code points 276 * specified, either forward or backward depending on the sign of 277 * delta (positive or negative respectively). If the current index 278 * is at a trail surrogate then the first adjustment is by code 279 * unit, and the remaining adjustments are by code points. If the 280 * resulting index would be less than zero, the index is set to 281 * zero, and if the resulting index would be greater than limit, 282 * the index is set to limit. 283 * @param delta the number of code units to move the current index. 284 * @return the new index 285 * @exception IndexOutOfBoundsException is thrown if an invalid delta is 286 * supplied 287 * @stable ICU 2.4 288 * 289 */ 290 public int moveCodePointIndex(int delta){ 291 if(delta>0){ 292 while(delta>0 && nextCodePoint() != DONE){delta--;} 293 }else{ 294 while(delta<0 && previousCodePoint() != DONE){delta++;} 295 } 296 if(delta!=0){ 297 throw new IndexOutOfBoundsException(); 298 } 299 300 return getIndex(); 301 } 302 303 /** 304 * Creates a copy of this iterator, independent from other iterators. 305 * If it is not possible to clone the iterator, returns null. 306 * @return copy of this iterator 307 * @stable ICU 2.4 308 */ 309 public Object clone() throws CloneNotSupportedException{ 310 return super.clone(); 311 } 312 313 }