1 /*
   2  * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.NoSuchElementException;
  29 import java.util.Objects;
  30 import java.util.PrimitiveIterator;
  31 import java.util.Spliterator;
  32 import java.util.Spliterators;
  33 import java.util.function.IntConsumer;
  34 import java.util.stream.IntStream;
  35 import java.util.stream.StreamSupport;
  36 
  37 /**
  38  * A {@code CharSequence} is a readable sequence of {@code char} values. This
  39  * interface provides uniform, read-only access to many different kinds of
  40  * {@code char} sequences.
  41  * A {@code char} value represents a character in the <i>Basic
  42  * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a
  43  * href="Character.html#unicode">Unicode Character Representation</a> for details.
  44  *
  45  * <p> This interface does not refine the general contracts of the {@link
  46  * java.lang.Object#equals(java.lang.Object) equals} and {@link
  47  * java.lang.Object#hashCode() hashCode} methods. The result of testing two objects
  48  * that implement {@code CharSequence} for equality is therefore, in general, undefined.
  49  * Each object may be implemented by a different class, and there
  50  * is no guarantee that each class will be capable of testing its instances
  51  * for equality with those of the other.  It is therefore inappropriate to use
  52  * arbitrary {@code CharSequence} instances as elements in a set or as keys in
  53  * a map. </p>
  54  *
  55  * @author Mike McCloskey
  56  * @since 1.4
  57  * @spec JSR-51
  58  */
  59 
  60 public interface CharSequence {
  61 
  62     /**
  63      * Returns the length of this character sequence.  The length is the number
  64      * of 16-bit {@code char}s in the sequence.
  65      *
  66      * @return  the number of {@code char}s in this sequence
  67      */
  68     int length();
  69 
  70     /**
  71      * Returns the {@code char} value at the specified index.  An index ranges from zero
  72      * to {@code length() - 1}.  The first {@code char} value of the sequence is at
  73      * index zero, the next at index one, and so on, as for array
  74      * indexing.
  75      *
  76      * <p>If the {@code char} value specified by the index is a
  77      * <a href="{@docRoot}/java/lang/Character.html#unicode">surrogate</a>, the surrogate
  78      * value is returned.
  79      *
  80      * @param   index   the index of the {@code char} value to be returned
  81      *
  82      * @return  the specified {@code char} value
  83      *
  84      * @throws  IndexOutOfBoundsException
  85      *          if the {@code index} argument is negative or not less than
  86      *          {@code length()}
  87      */
  88     char charAt(int index);
  89 
  90     /**
  91      * Returns a {@code CharSequence} that is a subsequence of this sequence.
  92      * The subsequence starts with the {@code char} value at the specified index and
  93      * ends with the {@code char} value at index {@code end - 1}.  The length
  94      * (in {@code char}s) of the
  95      * returned sequence is {@code end - start}, so if {@code start == end}
  96      * then an empty sequence is returned.
  97      *
  98      * @param   start   the start index, inclusive
  99      * @param   end     the end index, exclusive
 100      *
 101      * @return  the specified subsequence
 102      *
 103      * @throws  IndexOutOfBoundsException
 104      *          if {@code start} or {@code end} are negative,
 105      *          if {@code end} is greater than {@code length()},
 106      *          or if {@code start} is greater than {@code end}
 107      */
 108     CharSequence subSequence(int start, int end);
 109 
 110     /**
 111      * Returns a string containing the characters in this sequence in the same
 112      * order as this sequence.  The length of the string will be the length of
 113      * this sequence.
 114      *
 115      * @return  a string consisting of exactly this sequence of characters
 116      */
 117     public String toString();
 118 
 119     /**
 120      * Returns a stream of {@code int} zero-extending the {@code char} values
 121      * from this sequence.  Any char which maps to a <a
 122      * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
 123      * point</a> is passed through uninterpreted.
 124      *
 125      * <p>The stream binds to this sequence when the terminal stream operation
 126      * commences (specifically, for mutable sequences the spliterator for the
 127      * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>).
 128      * If the sequence is modified during that operation then the result is
 129      * undefined.
 130      *
 131      * @return an IntStream of char values from this sequence
 132      * @since 1.8
 133      */
 134     public default IntStream chars() {
 135         class CharIterator implements PrimitiveIterator.OfInt {
 136             int cur = 0;
 137 
 138             public boolean hasNext() {
 139                 return cur < length();
 140             }
 141 
 142             public int nextInt() {
 143                 if (hasNext()) {
 144                     return charAt(cur++);
 145                 } else {
 146                     throw new NoSuchElementException();
 147                 }
 148             }
 149 
 150             @Override
 151             public void forEachRemaining(IntConsumer block) {
 152                 for (; cur < length(); cur++) {
 153                     block.accept(charAt(cur));
 154                 }
 155             }
 156         }
 157 
 158         return StreamSupport.intStream(() ->
 159                 Spliterators.spliterator(
 160                         new CharIterator(),
 161                         length(),
 162                         Spliterator.ORDERED),
 163                 Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED,
 164                 false);
 165     }
 166 
 167     /**
 168      * Returns a stream of code point values from this sequence.  Any surrogate
 169      * pairs encountered in the sequence are combined as if by {@linkplain
 170      * Character#toCodePoint Character.toCodePoint} and the result is passed
 171      * to the stream. Any other code units, including ordinary BMP characters,
 172      * unpaired surrogates, and undefined code units, are zero-extended to
 173      * {@code int} values which are then passed to the stream.
 174      *
 175      * <p>The stream binds to this sequence when the terminal stream operation
 176      * commences (specifically, for mutable sequences the spliterator for the
 177      * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>).
 178      * If the sequence is modified during that operation then the result is
 179      * undefined.
 180      *
 181      * @return an IntStream of Unicode code points from this sequence
 182      * @since 1.8
 183      */
 184     public default IntStream codePoints() {
 185         class CodePointIterator implements PrimitiveIterator.OfInt {
 186             int cur = 0;
 187 
 188             @Override
 189             public void forEachRemaining(IntConsumer block) {
 190                 final int length = length();
 191                 int i = cur;
 192                 try {
 193                     while (i < length) {
 194                         char c1 = charAt(i++);
 195                         if (!Character.isHighSurrogate(c1) || i >= length) {
 196                             block.accept(c1);
 197                         } else {
 198                             char c2 = charAt(i);
 199                             if (Character.isLowSurrogate(c2)) {
 200                                 i++;
 201                                 block.accept(Character.toCodePoint(c1, c2));
 202                             } else {
 203                                 block.accept(c1);
 204                             }
 205                         }
 206                     }
 207                 } finally {
 208                     cur = i;
 209                 }
 210             }
 211 
 212             public boolean hasNext() {
 213                 return cur < length();
 214             }
 215 
 216             public int nextInt() {
 217                 final int length = length();
 218 
 219                 if (cur >= length) {
 220                     throw new NoSuchElementException();
 221                 }
 222                 char c1 = charAt(cur++);
 223                 if (Character.isHighSurrogate(c1) && cur < length) {
 224                     char c2 = charAt(cur);
 225                     if (Character.isLowSurrogate(c2)) {
 226                         cur++;
 227                         return Character.toCodePoint(c1, c2);
 228                     }
 229                 }
 230                 return c1;
 231             }
 232         }
 233 
 234         return StreamSupport.intStream(() ->
 235                 Spliterators.spliteratorUnknownSize(
 236                         new CodePointIterator(),
 237                         Spliterator.ORDERED),
 238                 Spliterator.ORDERED,
 239                 false);
 240     }
 241 
 242     /**
 243      * Compares two {@code CharSequence} instances lexicographically. Returns a
 244      * negative value, zero, or a positive value if the first sequence is lexicographically
 245      * less than, equal to, or greater than the second, respectively.
 246      *
 247      * <p>
 248      * The lexicographical ordering of {@code CharSequence} is defined as follows.
 249      * Consider a {@code CharSequence} <i>cs</i> of length <i>len</i> to be a
 250      * sequence of char values, <i>cs[0]</i> to <i>cs[len-1]</i>. Suppose <i>k</i>
 251      * is the lowest index at which the corresponding char values from each sequence
 252      * differ. The lexicographic ordering of the sequences is determined by a numeric
 253      * comparison of the char values <i>cs1[k]</i> with <i>cs2[k]</i>. If there is
 254      * no such index <i>k</i>, the shorter sequence is considered lexicographically
 255      * less than the other. If the sequences have the same length, the sequences are
 256      * considered lexicographically equal.
 257      *
 258      *
 259      * @param cs1 the first {@code CharSequence}
 260      * @param cs2 the second {@code CharSequence}
 261      *
 262      * @return  the value {@code 0} if the two {@code CharSequence} are equal;
 263      *          a negative integer if the first {@code CharSequence}
 264      *          is lexicographically less than the second; or a
 265      *          positive integer if the first {@code CharSequence} is
 266      *          lexicographically greater than the second.
 267      *
 268      * @since 11
 269      */
 270     @SuppressWarnings("unchecked")
 271     public static int compare(CharSequence cs1, CharSequence cs2) {
 272         if (Objects.requireNonNull(cs1) == Objects.requireNonNull(cs2)) {
 273             return 0;
 274         }
 275 
 276         if (cs1.getClass() == cs2.getClass() && cs1 instanceof Comparable) {
 277             return ((Comparable<Object>) cs1).compareTo(cs2);
 278         }
 279 
 280         for (int i = 0, len = Math.min(cs1.length(), cs2.length()); i < len; i++) {
 281             char a = cs1.charAt(i);
 282             char b = cs2.charAt(i);
 283             if (a != b) {
 284                 return a - b;
 285             }
 286         }
 287 
 288         return cs1.length() - cs2.length();
 289     }
 290 
 291 }