1 /*
   2  * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.NoSuchElementException;
  29 import java.util.PrimitiveIterator;
  30 import java.util.Spliterator;
  31 import java.util.Spliterators;
  32 import java.util.function.IntConsumer;
  33 import java.util.stream.IntStream;
  34 import java.util.stream.StreamSupport;
  35 
  36 /**
  37  * A {@code CharSequence} is a readable sequence of {@code char} values. This
  38  * interface provides uniform, read-only access to many different kinds of
  39  * {@code char} sequences.
  40  * A {@code char} value represents a character in the <i>Basic
  41  * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a
  42  * href="Character.html#unicode">Unicode Character Representation</a> for details.
  43  *
  44  * <p> This interface does not refine the general contracts of the {@link
  45  * java.lang.Object#equals(java.lang.Object) equals} and {@link
  46  * java.lang.Object#hashCode() hashCode} methods.  The result of comparing two
  47  * objects that implement {@code CharSequence} is therefore, in general,
  48  * undefined.  Each object may be implemented by a different class, and there
  49  * is no guarantee that each class will be capable of testing its instances
  50  * for equality with those of the other.  It is therefore inappropriate to use
  51  * arbitrary {@code CharSequence} instances as elements in a set or as keys in
  52  * a map. </p>
  53  *
  54  * @author Mike McCloskey
  55  * @since 1.4
  56  * @spec JSR-51
  57  */
  58 
  59 public interface CharSequence {
  60 
  61     /**
  62      * Returns the length of this character sequence.  The length is the number
  63      * of 16-bit {@code char}s in the sequence.
  64      *
  65      * @return  the number of {@code char}s in this sequence
  66      */
  67     int length();
  68 
  69     /**
  70      * Returns the {@code char} value at the specified index.  An index ranges from zero
  71      * to {@code length() - 1}.  The first {@code char} value of the sequence is at
  72      * index zero, the next at index one, and so on, as for array
  73      * indexing.
  74      *
  75      * <p>If the {@code char} value specified by the index is a
  76      * <a href="{@docRoot}/java/lang/Character.html#unicode">surrogate</a>, the surrogate
  77      * value is returned.
  78      *
  79      * @param   index   the index of the {@code char} value to be returned
  80      *
  81      * @return  the specified {@code char} value
  82      *
  83      * @throws  IndexOutOfBoundsException
  84      *          if the {@code index} argument is negative or not less than
  85      *          {@code length()}
  86      */
  87     char charAt(int index);
  88 
  89     /**
  90      * Returns a {@code CharSequence} that is a subsequence of this sequence.
  91      * The subsequence starts with the {@code char} value at the specified index and
  92      * ends with the {@code char} value at index {@code end - 1}.  The length
  93      * (in {@code char}s) of the
  94      * returned sequence is {@code end - start}, so if {@code start == end}
  95      * then an empty sequence is returned.
  96      *
  97      * @param   start   the start index, inclusive
  98      * @param   end     the end index, exclusive
  99      *
 100      * @return  the specified subsequence
 101      *
 102      * @throws  IndexOutOfBoundsException
 103      *          if {@code start} or {@code end} are negative,
 104      *          if {@code end} is greater than {@code length()},
 105      *          or if {@code start} is greater than {@code end}
 106      */
 107     CharSequence subSequence(int start, int end);
 108 
 109     /**
 110      * Returns a string containing the characters in this sequence in the same
 111      * order as this sequence.  The length of the string will be the length of
 112      * this sequence.
 113      *
 114      * @return  a string consisting of exactly this sequence of characters
 115      */
 116     public String toString();
 117 
 118     /**
 119      * Returns a stream of {@code int} zero-extending the {@code char} values
 120      * from this sequence.  Any char which maps to a <a
 121      * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
 122      * point</a> is passed through uninterpreted.
 123      *
 124      * <p>The stream binds to this sequence when the terminal stream operation
 125      * commences (specifically, for mutable sequences the spliterator for the
 126      * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>).
 127      * If the sequence is modified during that operation then the result is
 128      * undefined.
 129      *
 130      * @return an IntStream of char values from this sequence
 131      * @since 1.8
 132      */
 133     public default IntStream chars() {
 134         class CharIterator implements PrimitiveIterator.OfInt {
 135             int cur = 0;
 136 
 137             public boolean hasNext() {
 138                 return cur < length();
 139             }
 140 
 141             public int nextInt() {
 142                 if (hasNext()) {
 143                     return charAt(cur++);
 144                 } else {
 145                     throw new NoSuchElementException();
 146                 }
 147             }
 148 
 149             @Override
 150             public void forEachRemaining(IntConsumer block) {
 151                 for (; cur < length(); cur++) {
 152                     block.accept(charAt(cur));
 153                 }
 154             }
 155         }
 156 
 157         return StreamSupport.intStream(() ->
 158                 Spliterators.spliterator(
 159                         new CharIterator(),
 160                         length(),
 161                         Spliterator.ORDERED),
 162                 Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED,
 163                 false);
 164     }
 165 
 166     /**
 167      * Returns a stream of code point values from this sequence.  Any surrogate
 168      * pairs encountered in the sequence are combined as if by {@linkplain
 169      * Character#toCodePoint Character.toCodePoint} and the result is passed
 170      * to the stream. Any other code units, including ordinary BMP characters,
 171      * unpaired surrogates, and undefined code units, are zero-extended to
 172      * {@code int} values which are then passed to the stream.
 173      *
 174      * <p>The stream binds to this sequence when the terminal stream operation
 175      * commences (specifically, for mutable sequences the spliterator for the
 176      * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>).
 177      * If the sequence is modified during that operation then the result is
 178      * undefined.
 179      *
 180      * @return an IntStream of Unicode code points from this sequence
 181      * @since 1.8
 182      */
 183     public default IntStream codePoints() {
 184         class CodePointIterator implements PrimitiveIterator.OfInt {
 185             int cur = 0;
 186 
 187             @Override
 188             public void forEachRemaining(IntConsumer block) {
 189                 final int length = length();
 190                 int i = cur;
 191                 try {
 192                     while (i < length) {
 193                         char c1 = charAt(i++);
 194                         if (!Character.isHighSurrogate(c1) || i >= length) {
 195                             block.accept(c1);
 196                         } else {
 197                             char c2 = charAt(i);
 198                             if (Character.isLowSurrogate(c2)) {
 199                                 i++;
 200                                 block.accept(Character.toCodePoint(c1, c2));
 201                             } else {
 202                                 block.accept(c1);
 203                             }
 204                         }
 205                     }
 206                 } finally {
 207                     cur = i;
 208                 }
 209             }
 210 
 211             public boolean hasNext() {
 212                 return cur < length();
 213             }
 214 
 215             public int nextInt() {
 216                 final int length = length();
 217 
 218                 if (cur >= length) {
 219                     throw new NoSuchElementException();
 220                 }
 221                 char c1 = charAt(cur++);
 222                 if (Character.isHighSurrogate(c1) && cur < length) {
 223                     char c2 = charAt(cur);
 224                     if (Character.isLowSurrogate(c2)) {
 225                         cur++;
 226                         return Character.toCodePoint(c1, c2);
 227                     }
 228                 }
 229                 return c1;
 230             }
 231         }
 232 
 233         return StreamSupport.intStream(() ->
 234                 Spliterators.spliteratorUnknownSize(
 235                         new CodePointIterator(),
 236                         Spliterator.ORDERED),
 237                 Spliterator.ORDERED,
 238                 false);
 239     }
 240 }