1 /* 2 * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.NoSuchElementException; 29 import java.util.PrimitiveIterator; 30 import java.util.Spliterator; 31 import java.util.Spliterators; 32 import java.util.function.IntConsumer; 33 import java.util.stream.IntStream; 34 import java.util.stream.StreamSupport; 35 36 /** 37 * A {@code CharSequence} is a readable sequence of {@code char} values. This 38 * interface provides uniform, read-only access to many different kinds of 39 * {@code char} sequences. 40 * A {@code char} value represents a character in the <i>Basic 41 * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a 42 * href="Character.html#unicode">Unicode Character Representation</a> for details. 43 * 44 * <p> This interface does not refine the general contracts of the {@link 45 * java.lang.Object#equals(java.lang.Object) equals} and {@link 46 * java.lang.Object#hashCode() hashCode} methods. The result of comparing two 47 * objects that implement {@code CharSequence} is therefore, in general, 48 * undefined. Each object may be implemented by a different class, and there 49 * is no guarantee that each class will be capable of testing its instances 50 * for equality with those of the other. It is therefore inappropriate to use 51 * arbitrary {@code CharSequence} instances as elements in a set or as keys in 52 * a map. </p> 53 * 54 * @author Mike McCloskey 55 * @since 1.4 56 * @spec JSR-51 57 */ 58 59 public interface CharSequence { 60 61 /** 62 * Returns the length of this character sequence. The length is the number 63 * of 16-bit {@code char}s in the sequence. 64 * 65 * @return the number of {@code char}s in this sequence 66 */ 67 int length(); 68 69 /** 70 * Returns the {@code char} value at the specified index. An index ranges from zero 71 * to {@code length() - 1}. The first {@code char} value of the sequence is at 72 * index zero, the next at index one, and so on, as for array 73 * indexing. 74 * 75 * <p>If the {@code char} value specified by the index is a 76 * <a href="{@docRoot}/java/lang/Character.html#unicode">surrogate</a>, the surrogate 77 * value is returned. 78 * 79 * @param index the index of the {@code char} value to be returned 80 * 81 * @return the specified {@code char} value 82 * 83 * @throws IndexOutOfBoundsException 84 * if the {@code index} argument is negative or not less than 85 * {@code length()} 86 */ 87 char charAt(int index); 88 89 /** 90 * Returns a {@code CharSequence} that is a subsequence of this sequence. 91 * The subsequence starts with the {@code char} value at the specified index and 92 * ends with the {@code char} value at index {@code end - 1}. The length 93 * (in {@code char}s) of the 94 * returned sequence is {@code end - start}, so if {@code start == end} 95 * then an empty sequence is returned. 96 * 97 * @param start the start index, inclusive 98 * @param end the end index, exclusive 99 * 100 * @return the specified subsequence 101 * 102 * @throws IndexOutOfBoundsException 103 * if {@code start} or {@code end} are negative, 104 * if {@code end} is greater than {@code length()}, 105 * or if {@code start} is greater than {@code end} 106 */ 107 CharSequence subSequence(int start, int end); 108 109 /** 110 * Returns a string containing the characters in this sequence in the same 111 * order as this sequence. The length of the string will be the length of 112 * this sequence. 113 * 114 * @return a string consisting of exactly this sequence of characters 115 */ 116 public String toString(); 117 118 /** 119 * Returns a stream of {@code int} zero-extending the {@code char} values 120 * from this sequence. Any char which maps to a <a 121 * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code 122 * point</a> is passed through uninterpreted. 123 * 124 * <p>The stream binds to this sequence when the terminal stream operation 125 * commences (specifically, for mutable sequences the spliterator for the 126 * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>). 127 * If the sequence is modified during that operation then the result is 128 * undefined. 129 * 130 * @return an IntStream of char values from this sequence 131 * @since 1.8 132 */ 133 public default IntStream chars() { 134 class CharIterator implements PrimitiveIterator.OfInt { 135 int cur = 0; 136 137 public boolean hasNext() { 138 return cur < length(); 139 } 140 141 public int nextInt() { 142 if (hasNext()) { 143 return charAt(cur++); 144 } else { 145 throw new NoSuchElementException(); 146 } 147 } 148 149 @Override 150 public void forEachRemaining(IntConsumer block) { 151 for (; cur < length(); cur++) { 152 block.accept(charAt(cur)); 153 } 154 } 155 } 156 157 return StreamSupport.intStream(() -> 158 Spliterators.spliterator( 159 new CharIterator(), 160 length(), 161 Spliterator.ORDERED), 162 Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED, 163 false); 164 } 165 166 /** 167 * Returns a stream of code point values from this sequence. Any surrogate 168 * pairs encountered in the sequence are combined as if by {@linkplain 169 * Character#toCodePoint Character.toCodePoint} and the result is passed 170 * to the stream. Any other code units, including ordinary BMP characters, 171 * unpaired surrogates, and undefined code units, are zero-extended to 172 * {@code int} values which are then passed to the stream. 173 * 174 * <p>The stream binds to this sequence when the terminal stream operation 175 * commences (specifically, for mutable sequences the spliterator for the 176 * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>). 177 * If the sequence is modified during that operation then the result is 178 * undefined. 179 * 180 * @return an IntStream of Unicode code points from this sequence 181 * @since 1.8 182 */ 183 public default IntStream codePoints() { 184 class CodePointIterator implements PrimitiveIterator.OfInt { 185 int cur = 0; 186 187 @Override 188 public void forEachRemaining(IntConsumer block) { 189 final int length = length(); 190 int i = cur; 191 try { 192 while (i < length) { 193 char c1 = charAt(i++); 194 if (!Character.isHighSurrogate(c1) || i >= length) { 195 block.accept(c1); 196 } else { 197 char c2 = charAt(i); 198 if (Character.isLowSurrogate(c2)) { 199 i++; 200 block.accept(Character.toCodePoint(c1, c2)); 201 } else { 202 block.accept(c1); 203 } 204 } 205 } 206 } finally { 207 cur = i; 208 } 209 } 210 211 public boolean hasNext() { 212 return cur < length(); 213 } 214 215 public int nextInt() { 216 final int length = length(); 217 218 if (cur >= length) { 219 throw new NoSuchElementException(); 220 } 221 char c1 = charAt(cur++); 222 if (Character.isHighSurrogate(c1) && cur < length) { 223 char c2 = charAt(cur); 224 if (Character.isLowSurrogate(c2)) { 225 cur++; 226 return Character.toCodePoint(c1, c2); 227 } 228 } 229 return c1; 230 } 231 } 232 233 return StreamSupport.intStream(() -> 234 Spliterators.spliteratorUnknownSize( 235 new CodePointIterator(), 236 Spliterator.ORDERED), 237 Spliterator.ORDERED, 238 false); 239 } 240 }