1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * Copyright (C) 1996-2014, International Business Machines Corporation and * 29 * others. All Rights Reserved. * 30 ******************************************************************************* 31 */ 32 33 package jdk.internal.icu.text; 34 35 import jdk.internal.icu.impl.CharacterIteratorWrapper; 36 import jdk.internal.icu.impl.ReplaceableUCharacterIterator; 37 import jdk.internal.icu.impl.UCharacterProperty; 38 39 import java.text.CharacterIterator; 40 41 /** 42 * Abstract class that defines an API for iteration on text objects.This is an 43 * interface for forward and backward iteration and random access into a text 44 * object. Forward iteration is done with post-increment and backward iteration 45 * is done with pre-decrement semantics, while the 46 * <code>java.text.CharacterIterator</code> interface methods provided forward 47 * iteration with "pre-increment" and backward iteration with pre-decrement 48 * semantics. This API is more efficient for forward iteration over code points. 49 * The other major difference is that this API can do both code unit and code point 50 * iteration, <code>java.text.CharacterIterator</code> can only iterate over 51 * code units and is limited to BMP (0 - 0xFFFF) 52 * @author Ram 53 * @stable ICU 2.4 54 */ 55 public abstract class UCharacterIterator 56 implements Cloneable { 57 58 /** 59 * Protected default constructor for the subclasses 60 * @stable ICU 2.4 61 */ 62 protected UCharacterIterator(){ 63 } 64 65 /** 66 * Indicator that we have reached the ends of the UTF16 text. 67 * Moved from UForwardCharacterIterator.java 68 * @stable ICU 2.4 69 */ 70 public static final int DONE = -1; 71 72 // static final methods ---------------------------------------------------- 73 74 /** 75 * Returns a <code>UCharacterIterator</code> object given a 76 * source string. 77 * @param source a string 78 * @return UCharacterIterator object 79 * @exception IllegalArgumentException if the argument is null 80 * @stable ICU 2.4 81 */ 82 public static final UCharacterIterator getInstance(String source){ 83 return new ReplaceableUCharacterIterator(source); 84 } 85 86 /** 87 * Returns a <code>UCharacterIterator</code> object given a 88 * source StringBuffer. 89 * @param source an string buffer of UTF-16 code units 90 * @return UCharacterIterator object 91 * @exception IllegalArgumentException if the argument is null 92 * @stable ICU 2.4 93 */ 94 public static final UCharacterIterator getInstance(StringBuffer source){ 95 return new ReplaceableUCharacterIterator(source); 96 } 97 98 /** 99 * Returns a <code>UCharacterIterator</code> object given a 100 * CharacterIterator. 101 * @param source a valid CharacterIterator object. 102 * @return UCharacterIterator object 103 * @exception IllegalArgumentException if the argument is null 104 * @stable ICU 2.4 105 */ 106 public static final UCharacterIterator getInstance(CharacterIterator source){ 107 return new CharacterIteratorWrapper(source); 108 } 109 110 // public methods ---------------------------------------------------------- 111 112 /** 113 * Returns the length of the text 114 * @return length of the text 115 * @stable ICU 2.4 116 */ 117 public abstract int getLength(); 118 119 /** 120 * Gets the current index in text. 121 * @return current index in text. 122 * @stable ICU 2.4 123 */ 124 public abstract int getIndex(); 125 126 /** 127 * Returns the UTF16 code unit at index, and increments to the next 128 * code unit (post-increment semantics). If index is out of 129 * range, DONE is returned, and the iterator is reset to the limit 130 * of the text. 131 * @return the next UTF16 code unit, or DONE if the index is at the limit 132 * of the text. 133 * @stable ICU 2.4 134 */ 135 public abstract int next(); 136 137 /** 138 * Returns the code point at index, and increments to the next code 139 * point (post-increment semantics). If index does not point to a 140 * valid surrogate pair, the behavior is the same as 141 * <code>next()</code>. Otherwise the iterator is incremented past 142 * the surrogate pair, and the code point represented by the pair 143 * is returned. 144 * @return the next codepoint in text, or DONE if the index is at 145 * the limit of the text. 146 * @stable ICU 2.4 147 */ 148 public int nextCodePoint(){ 149 int ch1 = next(); 150 if(UTF16.isLeadSurrogate((char)ch1)){ 151 int ch2 = next(); 152 if(UTF16.isTrailSurrogate((char)ch2)){ 153 return UCharacterProperty.getRawSupplementary((char)ch1, 154 (char)ch2); 155 }else if (ch2 != DONE) { 156 // unmatched surrogate so back out 157 previous(); 158 } 159 } 160 return ch1; 161 } 162 163 /** 164 * Decrement to the position of the previous code unit in the 165 * text, and return it (pre-decrement semantics). If the 166 * resulting index is less than 0, the index is reset to 0 and 167 * DONE is returned. 168 * @return the previous code unit in the text, or DONE if the new 169 * index is before the start of the text. 170 * @stable ICU 2.4 171 */ 172 public abstract int previous(); 173 174 175 /** 176 * Retreat to the start of the previous code point in the text, 177 * and return it (pre-decrement semantics). If the index is not 178 * preceeded by a valid surrogate pair, the behavior is the same 179 * as <code>previous()</code>. Otherwise the iterator is 180 * decremented to the start of the surrogate pair, and the code 181 * point represented by the pair is returned. 182 * @return the previous code point in the text, or DONE if the new 183 * index is before the start of the text. 184 * @stable ICU 2.4 185 */ 186 public int previousCodePoint(){ 187 int ch1 = previous(); 188 if(UTF16.isTrailSurrogate((char)ch1)){ 189 int ch2 = previous(); 190 if(UTF16.isLeadSurrogate((char)ch2)){ 191 return UCharacterProperty.getRawSupplementary((char)ch2, 192 (char)ch1); 193 }else if (ch2 != DONE) { 194 //unmatched trail surrogate so back out 195 next(); 196 } 197 } 198 return ch1; 199 } 200 201 /** 202 * Sets the index to the specified index in the text. 203 * @param index the index within the text. 204 * @exception IndexOutOfBoundsException is thrown if an invalid index is 205 * supplied 206 * @stable ICU 2.4 207 */ 208 public abstract void setIndex(int index); 209 210 /** 211 * Sets the current index to the start. 212 * @stable ICU 2.4 213 */ 214 public void setToStart() { 215 setIndex(0); 216 } 217 218 /** 219 * Fills the buffer with the underlying text storage of the iterator 220 * If the buffer capacity is not enough a exception is thrown. The capacity 221 * of the fill in buffer should at least be equal to length of text in the 222 * iterator obtained by calling <code>getLength()</code>. 223 * <b>Usage:</b> 224 * 225 * <pre>{@code 226 * UChacterIterator iter = new UCharacterIterator.getInstance(text); 227 * char[] buf = new char[iter.getLength()]; 228 * iter.getText(buf); 229 * 230 * OR 231 * char[] buf= new char[1]; 232 * int len = 0; 233 * for(;;){ 234 * try{ 235 * len = iter.getText(buf); 236 * break; 237 * }catch(IndexOutOfBoundsException e){ 238 * buf = new char[iter.getLength()]; 239 * } 240 * } 241 * }</pre> 242 * 243 * @param fillIn an array of chars to fill with the underlying UTF-16 code 244 * units. 245 * @param offset the position within the array to start putting the data. 246 * @return the number of code units added to fillIn, as a convenience 247 * @exception IndexOutOfBoundsException exception if there is not enough 248 * room after offset in the array, or if offset < 0. 249 * @stable ICU 2.4 250 */ 251 public abstract int getText(char[] fillIn, int offset); 252 253 /** 254 * Convenience override for <code>getText(char[], int)</code> that provides 255 * an offset of 0. 256 * @param fillIn an array of chars to fill with the underlying UTF-16 code 257 * units. 258 * @return the number of code units added to fillIn, as a convenience 259 * @exception IndexOutOfBoundsException exception if there is not enough 260 * room in the array. 261 * @stable ICU 2.4 262 */ 263 public final int getText(char[] fillIn) { 264 return getText(fillIn, 0); 265 } 266 267 /** 268 * Convenience method for returning the underlying text storage as a string 269 * @return the underlying text storage in the iterator as a string 270 * @stable ICU 2.4 271 */ 272 public String getText() { 273 char[] text = new char[getLength()]; 274 getText(text); 275 return new String(text); 276 } 277 278 /** 279 * Moves the current position by the number of code points 280 * specified, either forward or backward depending on the sign of 281 * delta (positive or negative respectively). If the current index 282 * is at a trail surrogate then the first adjustment is by code 283 * unit, and the remaining adjustments are by code points. If the 284 * resulting index would be less than zero, the index is set to 285 * zero, and if the resulting index would be greater than limit, 286 * the index is set to limit. 287 * @param delta the number of code units to move the current index. 288 * @return the new index 289 * @exception IndexOutOfBoundsException is thrown if an invalid delta is 290 * supplied 291 * @stable ICU 2.4 292 * 293 */ 294 public int moveCodePointIndex(int delta){ 295 if(delta>0){ 296 while(delta>0 && nextCodePoint() != DONE){delta--;} 297 }else{ 298 while(delta<0 && previousCodePoint() != DONE){delta++;} 299 } 300 if(delta!=0){ 301 throw new IndexOutOfBoundsException(); 302 } 303 304 return getIndex(); 305 } 306 307 /** 308 * Creates a copy of this iterator, independent from other iterators. 309 * If it is not possible to clone the iterator, returns null. 310 * @return copy of this iterator 311 * @stable ICU 2.4 312 */ 313 public Object clone() throws CloneNotSupportedException{ 314 return super.clone(); 315 } 316 317 }