1 /* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * 29 * * 30 * The original version of this source code and documentation is copyrighted * 31 * and owned by IBM, These materials are provided under terms of a License * 32 * Agreement between IBM and Sun. This technology is protected by multiple * 33 * US and International patents. This notice and attribution to IBM may not * 34 * to removed. * 35 ******************************************************************************* 36 */ 37 38 package sun.text.normalizer; 39 40 import java.text.CharacterIterator; 41 42 /** 43 * Abstract class that defines an API for iteration on text objects.This is an 44 * interface for forward and backward iteration and random access into a text 45 * object. Forward iteration is done with post-increment and backward iteration 46 * is done with pre-decrement semantics, while the 47 * <code>java.text.CharacterIterator</code> interface methods provided forward 48 * iteration with "pre-increment" and backward iteration with pre-decrement 49 * semantics. This API is more efficient for forward iteration over code points. 50 * The other major difference is that this API can do both code unit and code point 51 * iteration, <code>java.text.CharacterIterator</code> can only iterate over 52 * code units and is limited to BMP (0 - 0xFFFF) 53 * @author Ram 54 * @stable ICU 2.4 67 * Indicator that we have reached the ends of the UTF16 text. 68 * Moved from UForwardCharacterIterator.java 69 * @stable ICU 2.4 70 */ 71 public static final int DONE = -1; 72 73 // static final methods ---------------------------------------------------- 74 75 /** 76 * Returns a <code>UCharacterIterator</code> object given a 77 * source string. 78 * @param source a string 79 * @return UCharacterIterator object 80 * @exception IllegalArgumentException if the argument is null 81 * @stable ICU 2.4 82 */ 83 public static final UCharacterIterator getInstance(String source){ 84 return new ReplaceableUCharacterIterator(source); 85 } 86 87 //// for StringPrep 88 /** 89 * Returns a <code>UCharacterIterator</code> object given a 90 * source StringBuffer. 91 * @param source an string buffer of UTF-16 code units 92 * @return UCharacterIterator object 93 * @exception IllegalArgumentException if the argument is null 94 * @stable ICU 2.4 95 */ 96 public static final UCharacterIterator getInstance(StringBuffer source){ 97 return new ReplaceableUCharacterIterator(source); 98 } 99 100 /** 101 * Returns a <code>UCharacterIterator</code> object given a 102 * CharacterIterator. 103 * @param source a valid CharacterIterator object. 104 * @return UCharacterIterator object 105 * @exception IllegalArgumentException if the argument is null 106 * @stable ICU 2.4 107 */ 108 public static final UCharacterIterator getInstance(CharacterIterator source){ 109 return new CharacterIteratorWrapper(source); 110 } 111 112 // public methods ---------------------------------------------------------- 113 114 /** 115 * Returns the code unit at the current index. If index is out 116 * of range, returns DONE. Index is not changed. 117 * @return current code unit 118 * @stable ICU 2.4 119 */ 120 public abstract int current(); 121 122 /** 123 * Returns the length of the text 124 * @return length of the text 125 * @stable ICU 2.4 126 */ 127 public abstract int getLength(); 128 129 130 /** 131 * Gets the current index in text. 132 * @return current index in text. 133 * @stable ICU 2.4 134 */ 135 public abstract int getIndex(); 136 137 138 /** 139 * Returns the UTF16 code unit at index, and increments to the next 140 * code unit (post-increment semantics). If index is out of 141 * range, DONE is returned, and the iterator is reset to the limit 142 * of the text. 143 * @return the next UTF16 code unit, or DONE if the index is at the limit 144 * of the text. 145 * @stable ICU 2.4 146 */ 147 public abstract int next(); 148 149 /** 150 * Returns the code point at index, and increments to the next code 151 * point (post-increment semantics). If index does not point to a 152 * valid surrogate pair, the behavior is the same as 153 * <code>next()</code>. Otherwise the iterator is incremented past 154 * the surrogate pair, and the code point represented by the pair 155 * is returned. 156 * @return the next codepoint in text, or DONE if the index is at 157 * the limit of the text. 166 (char)ch2); 167 }else if (ch2 != DONE) { 168 // unmatched surrogate so back out 169 previous(); 170 } 171 } 172 return ch1; 173 } 174 175 /** 176 * Decrement to the position of the previous code unit in the 177 * text, and return it (pre-decrement semantics). If the 178 * resulting index is less than 0, the index is reset to 0 and 179 * DONE is returned. 180 * @return the previous code unit in the text, or DONE if the new 181 * index is before the start of the text. 182 * @stable ICU 2.4 183 */ 184 public abstract int previous(); 185 186 /** 187 * Sets the index to the specified index in the text. 188 * @param index the index within the text. 189 * @exception IndexOutOfBoundsException is thrown if an invalid index is 190 * supplied 191 * @stable ICU 2.4 192 */ 193 public abstract void setIndex(int index); 194 195 //// for StringPrep 196 /** 197 * Fills the buffer with the underlying text storage of the iterator 198 * If the buffer capacity is not enough a exception is thrown. The capacity 199 * of the fill in buffer should at least be equal to length of text in the 200 * iterator obtained by calling <code>getLength()</code>. 201 * <b>Usage:</b> 202 * 203 * <pre>{@code 204 * UChacterIterator iter = new UCharacterIterator.getInstance(text); 205 * char[] buf = new char[iter.getLength()]; 206 * iter.getText(buf); 207 * 208 * OR 209 * char[] buf= new char[1]; 210 * int len = 0; 211 * for(;;){ 212 * try{ 213 * len = iter.getText(buf); 214 * break; 215 * }catch(IndexOutOfBoundsException e){ 216 * buf = new char[iter.getLength()]; 217 * } 218 * } 219 * }</pre> 220 * 221 * @param fillIn an array of chars to fill with the underlying UTF-16 code 222 * units. 223 * @param offset the position within the array to start putting the data. 224 * @return the number of code units added to fillIn, as a convenience 225 * @exception IndexOutOfBounds exception if there is not enough 226 * room after offset in the array, or if offset {@literal <} 0. 227 * @stable ICU 2.4 228 */ 229 public abstract int getText(char[] fillIn, int offset); 230 231 //// for StringPrep 232 /** 233 * Convenience override for <code>getText(char[], int)</code> that provides 234 * an offset of 0. 235 * @param fillIn an array of chars to fill with the underlying UTF-16 code 236 * units. 237 * @return the number of code units added to fillIn, as a convenience 238 * @exception IndexOutOfBounds exception if there is not enough 239 * room in the array. 240 * @stable ICU 2.4 241 */ 242 public final int getText(char[] fillIn) { 243 return getText(fillIn, 0); 244 } 245 246 //// for StringPrep 247 /** 248 * Convenience method for returning the underlying text storage as a string 249 * @return the underlying text storage in the iterator as a string 250 * @stable ICU 2.4 251 */ 252 public String getText() { 253 char[] text = new char[getLength()]; 254 getText(text); 255 return new String(text); 256 } 257 258 /** 259 * Moves the current position by the number of code units 260 * specified, either forward or backward depending on the sign 261 * of delta (positive or negative respectively). If the resulting 262 * index would be less than zero, the index is set to zero, and if 263 * the resulting index would be greater than limit, the index is 264 * set to limit. 265 * 266 * @param delta the number of code units to move the current 267 * index. 268 * @return the new index. 269 * @exception IndexOutOfBoundsException is thrown if an invalid index is 270 * supplied 271 * @stable ICU 2.4 272 * 273 */ 274 public int moveIndex(int delta) { 275 int x = Math.max(0, Math.min(getIndex() + delta, getLength())); 276 setIndex(x); 277 return x; 278 } 279 280 /** 281 * Creates a copy of this iterator, independent from other iterators. 282 * If it is not possible to clone the iterator, returns null. 283 * @return copy of this iterator 284 * @stable ICU 2.4 285 */ 286 public Object clone() throws CloneNotSupportedException{ 287 return super.clone(); 288 } 289 290 } | 1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * Copyright (C) 1996-2014, International Business Machines Corporation and * 29 * others. All Rights Reserved. * 30 ******************************************************************************* 31 */ 32 33 package sun.text.normalizer; 34 35 import java.text.CharacterIterator; 36 37 /** 38 * Abstract class that defines an API for iteration on text objects.This is an 39 * interface for forward and backward iteration and random access into a text 40 * object. Forward iteration is done with post-increment and backward iteration 41 * is done with pre-decrement semantics, while the 42 * <code>java.text.CharacterIterator</code> interface methods provided forward 43 * iteration with "pre-increment" and backward iteration with pre-decrement 44 * semantics. This API is more efficient for forward iteration over code points. 45 * The other major difference is that this API can do both code unit and code point 46 * iteration, <code>java.text.CharacterIterator</code> can only iterate over 47 * code units and is limited to BMP (0 - 0xFFFF) 48 * @author Ram 49 * @stable ICU 2.4 62 * Indicator that we have reached the ends of the UTF16 text. 63 * Moved from UForwardCharacterIterator.java 64 * @stable ICU 2.4 65 */ 66 public static final int DONE = -1; 67 68 // static final methods ---------------------------------------------------- 69 70 /** 71 * Returns a <code>UCharacterIterator</code> object given a 72 * source string. 73 * @param source a string 74 * @return UCharacterIterator object 75 * @exception IllegalArgumentException if the argument is null 76 * @stable ICU 2.4 77 */ 78 public static final UCharacterIterator getInstance(String source){ 79 return new ReplaceableUCharacterIterator(source); 80 } 81 82 /** 83 * Returns a <code>UCharacterIterator</code> object given a 84 * source StringBuffer. 85 * @param source an string buffer of UTF-16 code units 86 * @return UCharacterIterator object 87 * @exception IllegalArgumentException if the argument is null 88 * @stable ICU 2.4 89 */ 90 public static final UCharacterIterator getInstance(StringBuffer source){ 91 return new ReplaceableUCharacterIterator(source); 92 } 93 94 /** 95 * Returns a <code>UCharacterIterator</code> object given a 96 * CharacterIterator. 97 * @param source a valid CharacterIterator object. 98 * @return UCharacterIterator object 99 * @exception IllegalArgumentException if the argument is null 100 * @stable ICU 2.4 101 */ 102 public static final UCharacterIterator getInstance(CharacterIterator source){ 103 return new CharacterIteratorWrapper(source); 104 } 105 106 // public methods ---------------------------------------------------------- 107 108 /** 109 * Returns the length of the text 110 * @return length of the text 111 * @stable ICU 2.4 112 */ 113 public abstract int getLength(); 114 115 /** 116 * Gets the current index in text. 117 * @return current index in text. 118 * @stable ICU 2.4 119 */ 120 public abstract int getIndex(); 121 122 /** 123 * Returns the UTF16 code unit at index, and increments to the next 124 * code unit (post-increment semantics). If index is out of 125 * range, DONE is returned, and the iterator is reset to the limit 126 * of the text. 127 * @return the next UTF16 code unit, or DONE if the index is at the limit 128 * of the text. 129 * @stable ICU 2.4 130 */ 131 public abstract int next(); 132 133 /** 134 * Returns the code point at index, and increments to the next code 135 * point (post-increment semantics). If index does not point to a 136 * valid surrogate pair, the behavior is the same as 137 * <code>next()</code>. Otherwise the iterator is incremented past 138 * the surrogate pair, and the code point represented by the pair 139 * is returned. 140 * @return the next codepoint in text, or DONE if the index is at 141 * the limit of the text. 150 (char)ch2); 151 }else if (ch2 != DONE) { 152 // unmatched surrogate so back out 153 previous(); 154 } 155 } 156 return ch1; 157 } 158 159 /** 160 * Decrement to the position of the previous code unit in the 161 * text, and return it (pre-decrement semantics). If the 162 * resulting index is less than 0, the index is reset to 0 and 163 * DONE is returned. 164 * @return the previous code unit in the text, or DONE if the new 165 * index is before the start of the text. 166 * @stable ICU 2.4 167 */ 168 public abstract int previous(); 169 170 171 /** 172 * Retreat to the start of the previous code point in the text, 173 * and return it (pre-decrement semantics). If the index is not 174 * preceeded by a valid surrogate pair, the behavior is the same 175 * as <code>previous()</code>. Otherwise the iterator is 176 * decremented to the start of the surrogate pair, and the code 177 * point represented by the pair is returned. 178 * @return the previous code point in the text, or DONE if the new 179 * index is before the start of the text. 180 * @stable ICU 2.4 181 */ 182 public int previousCodePoint(){ 183 int ch1 = previous(); 184 if(UTF16.isTrailSurrogate((char)ch1)){ 185 int ch2 = previous(); 186 if(UTF16.isLeadSurrogate((char)ch2)){ 187 return UCharacterProperty.getRawSupplementary((char)ch2, 188 (char)ch1); 189 }else if (ch2 != DONE) { 190 //unmatched trail surrogate so back out 191 next(); 192 } 193 } 194 return ch1; 195 } 196 197 /** 198 * Sets the index to the specified index in the text. 199 * @param index the index within the text. 200 * @exception IndexOutOfBoundsException is thrown if an invalid index is 201 * supplied 202 * @stable ICU 2.4 203 */ 204 public abstract void setIndex(int index); 205 206 /** 207 * Sets the current index to the start. 208 * @stable ICU 2.4 209 */ 210 public void setToStart() { 211 setIndex(0); 212 } 213 214 /** 215 * Fills the buffer with the underlying text storage of the iterator 216 * If the buffer capacity is not enough a exception is thrown. The capacity 217 * of the fill in buffer should at least be equal to length of text in the 218 * iterator obtained by calling <code>getLength()</code>. 219 * <b>Usage:</b> 220 * 221 * <pre>{@code 222 * UChacterIterator iter = new UCharacterIterator.getInstance(text); 223 * char[] buf = new char[iter.getLength()]; 224 * iter.getText(buf); 225 * 226 * OR 227 * char[] buf= new char[1]; 228 * int len = 0; 229 * for(;;){ 230 * try{ 231 * len = iter.getText(buf); 232 * break; 233 * }catch(IndexOutOfBoundsException e){ 234 * buf = new char[iter.getLength()]; 235 * } 236 * } 237 * }</pre> 238 * 239 * @param fillIn an array of chars to fill with the underlying UTF-16 code 240 * units. 241 * @param offset the position within the array to start putting the data. 242 * @return the number of code units added to fillIn, as a convenience 243 * @exception IndexOutOfBoundsException exception if there is not enough 244 * room after offset in the array, or if offset < 0. 245 * @stable ICU 2.4 246 */ 247 public abstract int getText(char[] fillIn, int offset); 248 249 /** 250 * Convenience override for <code>getText(char[], int)</code> that provides 251 * an offset of 0. 252 * @param fillIn an array of chars to fill with the underlying UTF-16 code 253 * units. 254 * @return the number of code units added to fillIn, as a convenience 255 * @exception IndexOutOfBoundsException exception if there is not enough 256 * room in the array. 257 * @stable ICU 2.4 258 */ 259 public final int getText(char[] fillIn) { 260 return getText(fillIn, 0); 261 } 262 263 /** 264 * Convenience method for returning the underlying text storage as a string 265 * @return the underlying text storage in the iterator as a string 266 * @stable ICU 2.4 267 */ 268 public String getText() { 269 char[] text = new char[getLength()]; 270 getText(text); 271 return new String(text); 272 } 273 274 /** 275 * Moves the current position by the number of code points 276 * specified, either forward or backward depending on the sign of 277 * delta (positive or negative respectively). If the current index 278 * is at a trail surrogate then the first adjustment is by code 279 * unit, and the remaining adjustments are by code points. If the 280 * resulting index would be less than zero, the index is set to 281 * zero, and if the resulting index would be greater than limit, 282 * the index is set to limit. 283 * @param delta the number of code units to move the current index. 284 * @return the new index 285 * @exception IndexOutOfBoundsException is thrown if an invalid delta is 286 * supplied 287 * @stable ICU 2.4 288 * 289 */ 290 public int moveCodePointIndex(int delta){ 291 if(delta>0){ 292 while(delta>0 && nextCodePoint() != DONE){delta--;} 293 }else{ 294 while(delta<0 && previousCodePoint() != DONE){delta++;} 295 } 296 if(delta!=0){ 297 throw new IndexOutOfBoundsException(); 298 } 299 300 return getIndex(); 301 } 302 303 /** 304 * Creates a copy of this iterator, independent from other iterators. 305 * If it is not possible to clone the iterator, returns null. 306 * @return copy of this iterator 307 * @stable ICU 2.4 308 */ 309 public Object clone() throws CloneNotSupportedException{ 310 return super.clone(); 311 } 312 313 } |