1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  *******************************************************************************
  28  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
  29  * others. All Rights Reserved.                                                *
  30  *******************************************************************************
  31  */
  32 
  33 package jdk.internal.icu.text;
  34 
  35 import jdk.internal.icu.impl.CharacterIteratorWrapper;
  36 import jdk.internal.icu.impl.ReplaceableUCharacterIterator;
  37 import jdk.internal.icu.impl.UCharacterProperty;
  38 
  39 import java.text.CharacterIterator;
  40 
  41 /**
  42  * Abstract class that defines an API for iteration on text objects.This is an
  43  * interface for forward and backward iteration and random access into a text
  44  * object. Forward iteration is done with post-increment and backward iteration
  45  * is done with pre-decrement semantics, while the
  46  * <code>java.text.CharacterIterator</code> interface methods provided forward
  47  * iteration with "pre-increment" and backward iteration with pre-decrement
  48  * semantics. This API is more efficient for forward iteration over code points.
  49  * The other major difference is that this API can do both code unit and code point
  50  * iteration, <code>java.text.CharacterIterator</code> can only iterate over
  51  * code units and is limited to BMP (0 - 0xFFFF)
  52  * @author Ram
  53  * @stable ICU 2.4
  54  */
  55 public abstract class UCharacterIterator
  56                       implements Cloneable {
  57 
  58     /**
  59      * Protected default constructor for the subclasses
  60      * @stable ICU 2.4
  61      */
  62     protected UCharacterIterator(){
  63     }
  64 
  65     /**
  66      * Indicator that we have reached the ends of the UTF16 text.
  67      * Moved from UForwardCharacterIterator.java
  68      * @stable ICU 2.4
  69      */
  70     public static final int DONE = -1;
  71 
  72     // static final methods ----------------------------------------------------
  73 
  74     /**
  75      * Returns a <code>UCharacterIterator</code> object given a
  76      * source string.
  77      * @param source a string
  78      * @return UCharacterIterator object
  79      * @exception IllegalArgumentException if the argument is null
  80      * @stable ICU 2.4
  81      */
  82     public static final UCharacterIterator getInstance(String source){
  83         return new ReplaceableUCharacterIterator(source);
  84     }
  85 
  86     /**
  87      * Returns a <code>UCharacterIterator</code> object given a
  88      * source StringBuffer.
  89      * @param source an string buffer of UTF-16 code units
  90      * @return UCharacterIterator object
  91      * @exception IllegalArgumentException if the argument is null
  92      * @stable ICU 2.4
  93      */
  94     public static final UCharacterIterator getInstance(StringBuffer source){
  95         return new ReplaceableUCharacterIterator(source);
  96     }
  97 
  98    /**
  99      * Returns a <code>UCharacterIterator</code> object given a
 100      * CharacterIterator.
 101      * @param source a valid CharacterIterator object.
 102      * @return UCharacterIterator object
 103      * @exception IllegalArgumentException if the argument is null
 104      * @stable ICU 2.4
 105      */
 106     public static final UCharacterIterator getInstance(CharacterIterator source){
 107         return new CharacterIteratorWrapper(source);
 108     }
 109 
 110     // public methods ----------------------------------------------------------
 111 
 112     /**
 113      * Returns the length of the text
 114      * @return length of the text
 115      * @stable ICU 2.4
 116      */
 117     public abstract int getLength();
 118 
 119     /**
 120      * Gets the current index in text.
 121      * @return current index in text.
 122      * @stable ICU 2.4
 123      */
 124     public abstract int getIndex();
 125 
 126     /**
 127      * Returns the UTF16 code unit at index, and increments to the next
 128      * code unit (post-increment semantics).  If index is out of
 129      * range, DONE is returned, and the iterator is reset to the limit
 130      * of the text.
 131      * @return the next UTF16 code unit, or DONE if the index is at the limit
 132      *         of the text.
 133      * @stable ICU 2.4
 134      */
 135     public abstract int next();
 136 
 137     /**
 138      * Returns the code point at index, and increments to the next code
 139      * point (post-increment semantics).  If index does not point to a
 140      * valid surrogate pair, the behavior is the same as
 141      * <code>next()</code>.  Otherwise the iterator is incremented past
 142      * the surrogate pair, and the code point represented by the pair
 143      * is returned.
 144      * @return the next codepoint in text, or DONE if the index is at
 145      *         the limit of the text.
 146      * @stable ICU 2.4
 147      */
 148     public int nextCodePoint(){
 149         int ch1 = next();
 150         if(UTF16.isLeadSurrogate((char)ch1)){
 151             int ch2 = next();
 152             if(UTF16.isTrailSurrogate((char)ch2)){
 153                 return UCharacterProperty.getRawSupplementary((char)ch1,
 154                                                               (char)ch2);
 155             }else if (ch2 != DONE) {
 156                 // unmatched surrogate so back out
 157                 previous();
 158             }
 159         }
 160         return ch1;
 161     }
 162 
 163     /**
 164      * Decrement to the position of the previous code unit in the
 165      * text, and return it (pre-decrement semantics).  If the
 166      * resulting index is less than 0, the index is reset to 0 and
 167      * DONE is returned.
 168      * @return the previous code unit in the text, or DONE if the new
 169      *         index is before the start of the text.
 170      * @stable ICU 2.4
 171      */
 172     public abstract int previous();
 173 
 174 
 175     /**
 176      * Retreat to the start of the previous code point in the text,
 177      * and return it (pre-decrement semantics).  If the index is not
 178      * preceeded by a valid surrogate pair, the behavior is the same
 179      * as <code>previous()</code>.  Otherwise the iterator is
 180      * decremented to the start of the surrogate pair, and the code
 181      * point represented by the pair is returned.
 182      * @return the previous code point in the text, or DONE if the new
 183      *         index is before the start of the text.
 184      * @stable ICU 2.4
 185      */
 186     public int previousCodePoint(){
 187         int ch1 = previous();
 188         if(UTF16.isTrailSurrogate((char)ch1)){
 189             int ch2 = previous();
 190             if(UTF16.isLeadSurrogate((char)ch2)){
 191                 return UCharacterProperty.getRawSupplementary((char)ch2,
 192                                                               (char)ch1);
 193             }else if (ch2 != DONE) {
 194                 //unmatched trail surrogate so back out
 195                 next();
 196             }
 197         }
 198         return ch1;
 199     }
 200 
 201     /**
 202      * Sets the index to the specified index in the text.
 203      * @param index the index within the text.
 204      * @exception IndexOutOfBoundsException is thrown if an invalid index is
 205      *            supplied
 206      * @stable ICU 2.4
 207      */
 208     public abstract void setIndex(int index);
 209 
 210     /**
 211      * Sets the current index to the start.
 212      * @stable ICU 2.4
 213      */
 214     public void setToStart() {
 215         setIndex(0);
 216     }
 217 
 218     /**
 219      * Fills the buffer with the underlying text storage of the iterator
 220      * If the buffer capacity is not enough a exception is thrown. The capacity
 221      * of the fill in buffer should at least be equal to length of text in the
 222      * iterator obtained by calling <code>getLength()</code>.
 223      * <b>Usage:</b>
 224      *
 225      * <pre>{@code
 226      *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
 227      *         char[] buf = new char[iter.getLength()];
 228      *         iter.getText(buf);
 229      *
 230      *         OR
 231      *         char[] buf= new char[1];
 232      *         int len = 0;
 233      *         for(;;){
 234      *             try{
 235      *                 len = iter.getText(buf);
 236      *                 break;
 237      *             }catch(IndexOutOfBoundsException e){
 238      *                 buf = new char[iter.getLength()];
 239      *             }
 240      *         }
 241      * }</pre>
 242      *
 243      * @param fillIn an array of chars to fill with the underlying UTF-16 code
 244      *         units.
 245      * @param offset the position within the array to start putting the data.
 246      * @return the number of code units added to fillIn, as a convenience
 247      * @exception IndexOutOfBoundsException exception if there is not enough
 248      *            room after offset in the array, or if offset < 0.
 249      * @stable ICU 2.4
 250      */
 251     public abstract int getText(char[] fillIn, int offset);
 252 
 253     /**
 254      * Convenience override for <code>getText(char[], int)</code> that provides
 255      * an offset of 0.
 256      * @param fillIn an array of chars to fill with the underlying UTF-16 code
 257      *         units.
 258      * @return the number of code units added to fillIn, as a convenience
 259      * @exception IndexOutOfBoundsException exception if there is not enough
 260      *            room in the array.
 261      * @stable ICU 2.4
 262      */
 263     public final int getText(char[] fillIn) {
 264         return getText(fillIn, 0);
 265     }
 266 
 267     /**
 268      * Convenience method for returning the underlying text storage as a string
 269      * @return the underlying text storage in the iterator as a string
 270      * @stable ICU 2.4
 271      */
 272     public String getText() {
 273         char[] text = new char[getLength()];
 274         getText(text);
 275         return new String(text);
 276     }
 277 
 278     /**
 279      * Moves the current position by the number of code points
 280      * specified, either forward or backward depending on the sign of
 281      * delta (positive or negative respectively). If the current index
 282      * is at a trail surrogate then the first adjustment is by code
 283      * unit, and the remaining adjustments are by code points.  If the
 284      * resulting index would be less than zero, the index is set to
 285      * zero, and if the resulting index would be greater than limit,
 286      * the index is set to limit.
 287      * @param delta the number of code units to move the current index.
 288      * @return the new index
 289      * @exception IndexOutOfBoundsException is thrown if an invalid delta is
 290      *            supplied
 291      * @stable ICU 2.4
 292      *
 293      */
 294     public int moveCodePointIndex(int delta){
 295         if(delta>0){
 296             while(delta>0 && nextCodePoint() != DONE){delta--;}
 297         }else{
 298             while(delta<0 && previousCodePoint() != DONE){delta++;}
 299         }
 300         if(delta!=0){
 301             throw new IndexOutOfBoundsException();
 302         }
 303 
 304         return getIndex();
 305     }
 306 
 307     /**
 308      * Creates a copy of this iterator, independent from other iterators.
 309      * If it is not possible to clone the iterator, returns null.
 310      * @return copy of this iterator
 311      * @stable ICU 2.4
 312      */
 313     public Object clone() throws CloneNotSupportedException{
 314         return super.clone();
 315     }
 316 
 317 }