19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import java.io.ObjectStreamField;
29 import java.io.UnsupportedEncodingException;
30 import java.nio.charset.Charset;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Comparator;
34 import java.util.Formatter;
35 import java.util.Locale;
36 import java.util.Objects;
37 import java.util.Spliterator;
38 import java.util.StringJoiner;
39 import java.util.function.IntConsumer;
40 import java.util.regex.Matcher;
41 import java.util.regex.Pattern;
42 import java.util.regex.PatternSyntaxException;
43 import java.util.stream.IntStream;
44 import java.util.stream.StreamSupport;
45 import jdk.internal.HotSpotIntrinsicCandidate;
46
47 /**
48 * The {@code String} class represents character strings. All
49 * string literals in Java programs, such as {@code "abc"}, are
50 * implemented as instances of this class.
51 * <p>
52 * Strings are constant; their values cannot be changed after they
53 * are created. String buffers support mutable strings.
54 * Because String objects are immutable they can be shared. For example:
55 * <blockquote><pre>
56 * String str = "abc";
57 * </pre></blockquote><p>
58 * is equivalent to:
59 * <blockquote><pre>
102 * Unicode code points (i.e., characters), in addition to those for
103 * dealing with Unicode code units (i.e., {@code char} values).
104 *
105 * <p>Unless otherwise noted, methods for comparing Strings do not take locale
106 * into account. The {@link java.text.Collator} class provides methods for
107 * finer-grain, locale-sensitive String comparison.
108 *
109 * @author Lee Boynton
110 * @author Arthur van Hoff
111 * @author Martin Buchholz
112 * @author Ulf Zibis
113 * @see java.lang.Object#toString()
114 * @see java.lang.StringBuffer
115 * @see java.lang.StringBuilder
116 * @see java.nio.charset.Charset
117 * @since 1.0
118 */
119
120 public final class String
121 implements java.io.Serializable, Comparable<String>, CharSequence {
122 /** The value is used for character storage. */
123 private final char value[];
124
125 /** Cache the hash code for the string */
126 private int hash; // Default to 0
127
128 /** use serialVersionUID from JDK 1.0.2 for interoperability */
129 private static final long serialVersionUID = -6849794470754667710L;
130
131 /**
132 * Class String is special cased within the Serialization Stream Protocol.
133 *
134 * A String instance is written into an ObjectOutputStream according to
135 * <a href="{@docRoot}/../platform/serialization/spec/output.html">
136 * Object Serialization Specification, Section 6.2, "Stream Elements"</a>
137 */
138 private static final ObjectStreamField[] serialPersistentFields =
139 new ObjectStreamField[0];
140
141 /**
142 * Initializes a newly created {@code String} object so that it represents
143 * an empty character sequence. Note that use of this constructor is
144 * unnecessary since Strings are immutable.
145 */
146 public String() {
147 this.value = "".value;
148 }
149
150 /**
151 * Initializes a newly created {@code String} object so that it represents
152 * the same sequence of characters as the argument; in other words, the
153 * newly created string is a copy of the argument string. Unless an
154 * explicit copy of {@code original} is needed, use of this constructor is
155 * unnecessary since Strings are immutable.
156 *
157 * @param original
158 * A {@code String}
159 */
160 @HotSpotIntrinsicCandidate
161 public String(String original) {
162 this.value = original.value;
163 this.hash = original.hash;
164 }
165
166 /**
167 * Allocates a new {@code String} so that it represents the sequence of
168 * characters currently contained in the character array argument. The
169 * contents of the character array are copied; subsequent modification of
170 * the character array does not affect the newly created string.
171 *
172 * @param value
173 * The initial value of the string
174 */
175 public String(char value[]) {
176 this.value = Arrays.copyOf(value, value.length);
177 }
178
179 /**
180 * Allocates a new {@code String} that contains characters from a subarray
181 * of the character array argument. The {@code offset} argument is the
182 * index of the first character of the subarray and the {@code count}
183 * argument specifies the length of the subarray. The contents of the
184 * subarray are copied; subsequent modification of the character array does
185 * not affect the newly created string.
186 *
187 * @param value
188 * Array that is the source of characters
189 *
190 * @param offset
191 * The initial offset
192 *
193 * @param count
194 * The length
195 *
196 * @throws IndexOutOfBoundsException
197 * If {@code offset} is negative, {@code count} is negative, or
198 * {@code offset} is greater than {@code value.length - count}
199 */
200 public String(char value[], int offset, int count) {
201 if (offset < 0) {
202 throw new StringIndexOutOfBoundsException(offset);
203 }
204 if (count <= 0) {
205 if (count < 0) {
206 throw new StringIndexOutOfBoundsException(count);
207 }
208 if (offset <= value.length) {
209 this.value = "".value;
210 return;
211 }
212 }
213 // Note: offset or count might be near -1>>>1.
214 if (offset > value.length - count) {
215 throw new StringIndexOutOfBoundsException(offset + count);
216 }
217 this.value = Arrays.copyOfRange(value, offset, offset + count);
218 }
219
220 /**
221 * Allocates a new {@code String} that contains characters from a subarray
222 * of the <a href="Character.html#unicode">Unicode code point</a> array
223 * argument. The {@code offset} argument is the index of the first code
224 * point of the subarray and the {@code count} argument specifies the
225 * length of the subarray. The contents of the subarray are converted to
226 * {@code char}s; subsequent modification of the {@code int} array does not
227 * affect the newly created string.
228 *
229 * @param codePoints
230 * Array that is the source of Unicode code points
231 *
232 * @param offset
233 * The initial offset
234 *
235 * @param count
236 * The length
237 *
238 * @throws IllegalArgumentException
239 * If any invalid Unicode code point is found in {@code
240 * codePoints}
241 *
242 * @throws IndexOutOfBoundsException
243 * If {@code offset} is negative, {@code count} is negative, or
244 * {@code offset} is greater than {@code codePoints.length - count}
245 *
246 * @since 1.5
247 */
248 public String(int[] codePoints, int offset, int count) {
249 if (offset < 0) {
250 throw new StringIndexOutOfBoundsException(offset);
251 }
252 if (count <= 0) {
253 if (count < 0) {
254 throw new StringIndexOutOfBoundsException(count);
255 }
256 if (offset <= codePoints.length) {
257 this.value = "".value;
258 return;
259 }
260 }
261 // Note: offset or count might be near -1>>>1.
262 if (offset > codePoints.length - count) {
263 throw new StringIndexOutOfBoundsException(offset + count);
264 }
265
266 final int end = offset + count;
267
268 // Pass 1: Compute precise size of char[]
269 int n = count;
270 for (int i = offset; i < end; i++) {
271 int c = codePoints[i];
272 if (Character.isBmpCodePoint(c))
273 continue;
274 else if (Character.isValidCodePoint(c))
275 n++;
276 else throw new IllegalArgumentException(Integer.toString(c));
277 }
278
279 // Pass 2: Allocate and fill in char[]
280 final char[] v = new char[n];
281
282 for (int i = offset, j = 0; i < end; i++, j++) {
283 int c = codePoints[i];
284 if (Character.isBmpCodePoint(c))
285 v[j] = (char)c;
286 else
287 Character.toSurrogates(c, v, j++);
288 }
289
290 this.value = v;
291 }
292
293 /**
294 * Allocates a new {@code String} constructed from a subarray of an array
295 * of 8-bit integer values.
296 *
297 * <p> The {@code offset} argument is the index of the first byte of the
298 * subarray, and the {@code count} argument specifies the length of the
299 * subarray.
300 *
301 * <p> Each {@code byte} in the subarray is converted to a {@code char} as
302 * specified in the method above.
303 *
304 * @deprecated This method does not properly convert bytes into characters.
305 * As of JDK 1.1, the preferred way to do this is via the
306 * {@code String} constructors that take a {@link
307 * java.nio.charset.Charset}, charset name, or that use the platform's
308 * default charset.
309 *
310 * @param ascii
315 *
316 * @param offset
317 * The initial offset
318 * @param count
319 * The length
320 *
321 * @throws IndexOutOfBoundsException
322 * If {@code offset} is negative, {@code count} is negative, or
323 * {@code offset} is greater than {@code ascii.length - count}
324 *
325 * @see #String(byte[], int)
326 * @see #String(byte[], int, int, java.lang.String)
327 * @see #String(byte[], int, int, java.nio.charset.Charset)
328 * @see #String(byte[], int, int)
329 * @see #String(byte[], java.lang.String)
330 * @see #String(byte[], java.nio.charset.Charset)
331 * @see #String(byte[])
332 */
333 @Deprecated
334 public String(byte ascii[], int hibyte, int offset, int count) {
335 checkBounds(ascii, offset, count);
336 char[] value = new char[count];
337
338 if (hibyte == 0) {
339 for (int i = count; i-- > 0;) {
340 value[i] = (char)(ascii[i + offset] & 0xff);
341 }
342 } else {
343 hibyte <<= 8;
344 for (int i = count; i-- > 0;) {
345 value[i] = (char)(hibyte | (ascii[i + offset] & 0xff));
346 }
347 }
348 this.value = value;
349 }
350
351 /**
352 * Allocates a new {@code String} containing characters constructed from
353 * an array of 8-bit integer values. Each character <i>c</i>in the
354 * resulting string is constructed from the corresponding component
355 * <i>b</i> in the byte array such that:
356 *
357 * <blockquote><pre>
358 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8)
359 * | (<b><i>b</i></b> & 0xff))
360 * </pre></blockquote>
361 *
362 * @deprecated This method does not properly convert bytes into
363 * characters. As of JDK 1.1, the preferred way to do this is via the
364 * {@code String} constructors that take a {@link
365 * java.nio.charset.Charset}, charset name, or that use the platform's
366 * default charset.
367 *
368 * @param ascii
369 * The bytes to be converted to characters
370 *
371 * @param hibyte
372 * The top 8 bits of each 16-bit Unicode code unit
373 *
374 * @see #String(byte[], int, int, java.lang.String)
375 * @see #String(byte[], int, int, java.nio.charset.Charset)
376 * @see #String(byte[], int, int)
377 * @see #String(byte[], java.lang.String)
378 * @see #String(byte[], java.nio.charset.Charset)
379 * @see #String(byte[])
380 */
381 @Deprecated
382 public String(byte ascii[], int hibyte) {
383 this(ascii, hibyte, 0, ascii.length);
384 }
385
386 /* Common private utility method used to bounds check the byte array
387 * and requested offset & length values used by the String(byte[],..)
388 * constructors.
389 */
390 private static void checkBounds(byte[] bytes, int offset, int length) {
391 if (length < 0)
392 throw new StringIndexOutOfBoundsException(length);
393 if (offset < 0)
394 throw new StringIndexOutOfBoundsException(offset);
395 if (offset > bytes.length - length)
396 throw new StringIndexOutOfBoundsException(offset + length);
397 }
398
399 /**
400 * Constructs a new {@code String} by decoding the specified subarray of
401 * bytes using the specified charset. The length of the new {@code String}
402 * is a function of the charset, and hence may not be equal to the length
403 * of the subarray.
404 *
405 * <p> The behavior of this constructor when the given bytes are not valid
406 * in the given charset is unspecified. The {@link
407 * java.nio.charset.CharsetDecoder} class should be used when more control
408 * over the decoding process is required.
409 *
410 * @param bytes
411 * The bytes to be decoded into characters
412 *
413 * @param offset
414 * The index of the first byte to decode
415 *
416 * @param length
417 * The number of bytes to decode
418
419 * @param charsetName
420 * The name of a supported {@linkplain java.nio.charset.Charset
421 * charset}
422 *
423 * @throws UnsupportedEncodingException
424 * If the named charset is not supported
425 *
426 * @throws IndexOutOfBoundsException
427 * If {@code offset} is negative, {@code length} is negative, or
428 * {@code offset} is greater than {@code bytes.length - length}
429 *
430 * @since 1.1
431 */
432 public String(byte bytes[], int offset, int length, String charsetName)
433 throws UnsupportedEncodingException {
434 if (charsetName == null)
435 throw new NullPointerException("charsetName");
436 checkBounds(bytes, offset, length);
437 this.value = StringCoding.decode(charsetName, bytes, offset, length);
438 }
439
440 /**
441 * Constructs a new {@code String} by decoding the specified subarray of
442 * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
443 * The length of the new {@code String} is a function of the charset, and
444 * hence may not be equal to the length of the subarray.
445 *
446 * <p> This method always replaces malformed-input and unmappable-character
447 * sequences with this charset's default replacement string. The {@link
448 * java.nio.charset.CharsetDecoder} class should be used when more control
449 * over the decoding process is required.
450 *
451 * @param bytes
452 * The bytes to be decoded into characters
453 *
454 * @param offset
455 * The index of the first byte to decode
456 *
457 * @param length
458 * The number of bytes to decode
459 *
460 * @param charset
461 * The {@linkplain java.nio.charset.Charset charset} to be used to
462 * decode the {@code bytes}
463 *
464 * @throws IndexOutOfBoundsException
465 * If {@code offset} is negative, {@code length} is negative, or
466 * {@code offset} is greater than {@code bytes.length - length}
467 *
468 * @since 1.6
469 */
470 public String(byte bytes[], int offset, int length, Charset charset) {
471 if (charset == null)
472 throw new NullPointerException("charset");
473 checkBounds(bytes, offset, length);
474 this.value = StringCoding.decode(charset, bytes, offset, length);
475 }
476
477 /**
478 * Constructs a new {@code String} by decoding the specified array of bytes
479 * using the specified {@linkplain java.nio.charset.Charset charset}. The
480 * length of the new {@code String} is a function of the charset, and hence
481 * may not be equal to the length of the byte array.
482 *
483 * <p> The behavior of this constructor when the given bytes are not valid
484 * in the given charset is unspecified. The {@link
485 * java.nio.charset.CharsetDecoder} class should be used when more control
486 * over the decoding process is required.
487 *
488 * @param bytes
489 * The bytes to be decoded into characters
490 *
491 * @param charsetName
492 * The name of a supported {@linkplain java.nio.charset.Charset
493 * charset}
494 *
536 * in the default charset is unspecified. The {@link
537 * java.nio.charset.CharsetDecoder} class should be used when more control
538 * over the decoding process is required.
539 *
540 * @param bytes
541 * The bytes to be decoded into characters
542 *
543 * @param offset
544 * The index of the first byte to decode
545 *
546 * @param length
547 * The number of bytes to decode
548 *
549 * @throws IndexOutOfBoundsException
550 * If {@code offset} is negative, {@code length} is negative, or
551 * {@code offset} is greater than {@code bytes.length - length}
552 *
553 * @since 1.1
554 */
555 public String(byte bytes[], int offset, int length) {
556 checkBounds(bytes, offset, length);
557 this.value = StringCoding.decode(bytes, offset, length);
558 }
559
560 /**
561 * Constructs a new {@code String} by decoding the specified array of bytes
562 * using the platform's default charset. The length of the new {@code
563 * String} is a function of the charset, and hence may not be equal to the
564 * length of the byte array.
565 *
566 * <p> The behavior of this constructor when the given bytes are not valid
567 * in the default charset is unspecified. The {@link
568 * java.nio.charset.CharsetDecoder} class should be used when more control
569 * over the decoding process is required.
570 *
571 * @param bytes
572 * The bytes to be decoded into characters
573 *
574 * @since 1.1
575 */
576 public String(byte[] bytes) {
577 this(bytes, 0, bytes.length);
578 }
579
580 /**
581 * Allocates a new string that contains the sequence of characters
582 * currently contained in the string buffer argument. The contents of the
583 * string buffer are copied; subsequent modification of the string buffer
584 * does not affect the newly created string.
585 *
586 * @param buffer
587 * A {@code StringBuffer}
588 */
589 public String(StringBuffer buffer) {
590 synchronized(buffer) {
591 this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
592 }
593 }
594
595 /**
596 * Allocates a new string that contains the sequence of characters
597 * currently contained in the string builder argument. The contents of the
598 * string builder are copied; subsequent modification of the string builder
599 * does not affect the newly created string.
600 *
601 * <p> This constructor is provided to ease migration to {@code
602 * StringBuilder}. Obtaining a string from a string builder via the {@code
603 * toString} method is likely to run faster and is generally preferred.
604 *
605 * @param builder
606 * A {@code StringBuilder}
607 *
608 * @since 1.5
609 */
610 public String(StringBuilder builder) {
611 this.value = Arrays.copyOf(builder.getValue(), builder.length());
612 }
613
614 /*
615 * Package private constructor which shares value array for speed.
616 * this constructor is always expected to be called with share==true.
617 * a separate constructor is needed because we already have a public
618 * String(char[]) constructor that makes a copy of the given char[].
619 */
620 String(char[] value, boolean share) {
621 // assert share : "unshared not supported";
622 this.value = value;
623 }
624
625 /**
626 * Returns the length of this string.
627 * The length is equal to the number of <a href="Character.html#unicode">Unicode
628 * code units</a> in the string.
629 *
630 * @return the length of the sequence of characters represented by this
631 * object.
632 */
633 public int length() {
634 return value.length;
635 }
636
637 /**
638 * Returns {@code true} if, and only if, {@link #length()} is {@code 0}.
639 *
640 * @return {@code true} if {@link #length()} is {@code 0}, otherwise
641 * {@code false}
642 *
643 * @since 1.6
644 */
645 public boolean isEmpty() {
646 return value.length == 0;
647 }
648
649 /**
650 * Returns the {@code char} value at the
651 * specified index. An index ranges from {@code 0} to
652 * {@code length() - 1}. The first {@code char} value of the sequence
653 * is at index {@code 0}, the next at index {@code 1},
654 * and so on, as for array indexing.
655 *
656 * <p>If the {@code char} value specified by the index is a
657 * <a href="Character.html#unicode">surrogate</a>, the surrogate
658 * value is returned.
659 *
660 * @param index the index of the {@code char} value.
661 * @return the {@code char} value at the specified index of this string.
662 * The first {@code char} value is at index {@code 0}.
663 * @exception IndexOutOfBoundsException if the {@code index}
664 * argument is negative or not less than the length of this
665 * string.
666 */
667 public char charAt(int index) {
668 if ((index < 0) || (index >= value.length)) {
669 throw new StringIndexOutOfBoundsException(index);
670 }
671 return value[index];
672 }
673
674 /**
675 * Returns the character (Unicode code point) at the specified
676 * index. The index refers to {@code char} values
677 * (Unicode code units) and ranges from {@code 0} to
678 * {@link #length()}{@code - 1}.
679 *
680 * <p> If the {@code char} value specified at the given index
681 * is in the high-surrogate range, the following index is less
682 * than the length of this {@code String}, and the
683 * {@code char} value at the following index is in the
684 * low-surrogate range, then the supplementary code point
685 * corresponding to this surrogate pair is returned. Otherwise,
686 * the {@code char} value at the given index is returned.
687 *
688 * @param index the index to the {@code char} values
689 * @return the code point value of the character at the
690 * {@code index}
691 * @exception IndexOutOfBoundsException if the {@code index}
692 * argument is negative or not less than the length of this
693 * string.
694 * @since 1.5
695 */
696 public int codePointAt(int index) {
697 if ((index < 0) || (index >= value.length)) {
698 throw new StringIndexOutOfBoundsException(index);
699 }
700 return Character.codePointAtImpl(value, index, value.length);
701 }
702
703 /**
704 * Returns the character (Unicode code point) before the specified
705 * index. The index refers to {@code char} values
706 * (Unicode code units) and ranges from {@code 1} to {@link
707 * CharSequence#length() length}.
708 *
709 * <p> If the {@code char} value at {@code (index - 1)}
710 * is in the low-surrogate range, {@code (index - 2)} is not
711 * negative, and the {@code char} value at {@code (index -
712 * 2)} is in the high-surrogate range, then the
713 * supplementary code point value of the surrogate pair is
714 * returned. If the {@code char} value at {@code index -
715 * 1} is an unpaired low-surrogate or a high-surrogate, the
716 * surrogate value is returned.
717 *
718 * @param index the index following the code point that should be returned
719 * @return the Unicode code point value before the given index.
720 * @exception IndexOutOfBoundsException if the {@code index}
721 * argument is less than 1 or greater than the length
722 * of this string.
723 * @since 1.5
724 */
725 public int codePointBefore(int index) {
726 int i = index - 1;
727 if ((i < 0) || (i >= value.length)) {
728 throw new StringIndexOutOfBoundsException(index);
729 }
730 return Character.codePointBeforeImpl(value, index, 0);
731 }
732
733 /**
734 * Returns the number of Unicode code points in the specified text
735 * range of this {@code String}. The text range begins at the
736 * specified {@code beginIndex} and extends to the
737 * {@code char} at index {@code endIndex - 1}. Thus the
738 * length (in {@code char}s) of the text range is
739 * {@code endIndex-beginIndex}. Unpaired surrogates within
740 * the text range count as one code point each.
741 *
742 * @param beginIndex the index to the first {@code char} of
743 * the text range.
744 * @param endIndex the index after the last {@code char} of
745 * the text range.
746 * @return the number of Unicode code points in the specified text
747 * range
748 * @exception IndexOutOfBoundsException if the
749 * {@code beginIndex} is negative, or {@code endIndex}
750 * is larger than the length of this {@code String}, or
751 * {@code beginIndex} is larger than {@code endIndex}.
752 * @since 1.5
753 */
754 public int codePointCount(int beginIndex, int endIndex) {
755 if (beginIndex < 0 || endIndex > value.length || beginIndex > endIndex) {
756 throw new IndexOutOfBoundsException();
757 }
758 return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex);
759 }
760
761 /**
762 * Returns the index within this {@code String} that is
763 * offset from the given {@code index} by
764 * {@code codePointOffset} code points. Unpaired surrogates
765 * within the text range given by {@code index} and
766 * {@code codePointOffset} count as one code point each.
767 *
768 * @param index the index to be offset
769 * @param codePointOffset the offset in code points
770 * @return the index within this {@code String}
771 * @exception IndexOutOfBoundsException if {@code index}
772 * is negative or larger then the length of this
773 * {@code String}, or if {@code codePointOffset} is positive
774 * and the substring starting with {@code index} has fewer
775 * than {@code codePointOffset} code points,
776 * or if {@code codePointOffset} is negative and the substring
777 * before {@code index} has fewer than the absolute value
778 * of {@code codePointOffset} code points.
779 * @since 1.5
780 */
781 public int offsetByCodePoints(int index, int codePointOffset) {
782 if (index < 0 || index > value.length) {
783 throw new IndexOutOfBoundsException();
784 }
785 return Character.offsetByCodePointsImpl(value, 0, value.length,
786 index, codePointOffset);
787 }
788
789 /**
790 * Copy characters from this string into dst starting at dstBegin.
791 * This method doesn't perform any range checking.
792 */
793 void getChars(char dst[], int dstBegin) {
794 System.arraycopy(value, 0, dst, dstBegin, value.length);
795 }
796
797 /**
798 * Copies characters from this string into the destination character
799 * array.
800 * <p>
801 * The first character to be copied is at index {@code srcBegin};
802 * the last character to be copied is at index {@code srcEnd-1}
803 * (thus the total number of characters to be copied is
804 * {@code srcEnd-srcBegin}). The characters are copied into the
805 * subarray of {@code dst} starting at index {@code dstBegin}
806 * and ending at index:
807 * <blockquote><pre>
808 * dstBegin + (srcEnd-srcBegin) - 1
809 * </pre></blockquote>
810 *
811 * @param srcBegin index of the first character in the string
812 * to copy.
813 * @param srcEnd index after the last character in the string
814 * to copy.
815 * @param dst the destination array.
816 * @param dstBegin the start offset in the destination array.
817 * @exception IndexOutOfBoundsException If any of the following
818 * is true:
819 * <ul><li>{@code srcBegin} is negative.
820 * <li>{@code srcBegin} is greater than {@code srcEnd}
821 * <li>{@code srcEnd} is greater than the length of this
822 * string
823 * <li>{@code dstBegin} is negative
824 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
825 * {@code dst.length}</ul>
826 */
827 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
828 if (srcBegin < 0) {
829 throw new StringIndexOutOfBoundsException(srcBegin);
830 }
831 if (srcEnd > value.length) {
832 throw new StringIndexOutOfBoundsException(srcEnd);
833 }
834 if (srcBegin > srcEnd) {
835 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
836 }
837 System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
838 }
839
840 /**
841 * Copies characters from this string into the destination byte array. Each
842 * byte receives the 8 low-order bits of the corresponding character. The
843 * eight high-order bits of each character are not copied and do not
844 * participate in the transfer in any way.
845 *
846 * <p> The first character to be copied is at index {@code srcBegin}; the
847 * last character to be copied is at index {@code srcEnd-1}. The total
848 * number of characters to be copied is {@code srcEnd-srcBegin}. The
849 * characters, converted to bytes, are copied into the subarray of {@code
850 * dst} starting at index {@code dstBegin} and ending at index:
851 *
852 * <blockquote><pre>
853 * dstBegin + (srcEnd-srcBegin) - 1
854 * </pre></blockquote>
855 *
856 * @deprecated This method does not properly convert characters into
857 * bytes. As of JDK 1.1, the preferred way to do this is via the
865 *
866 * @param dst
867 * The destination array
868 *
869 * @param dstBegin
870 * The start offset in the destination array
871 *
872 * @throws IndexOutOfBoundsException
873 * If any of the following is true:
874 * <ul>
875 * <li> {@code srcBegin} is negative
876 * <li> {@code srcBegin} is greater than {@code srcEnd}
877 * <li> {@code srcEnd} is greater than the length of this String
878 * <li> {@code dstBegin} is negative
879 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
880 * dst.length}
881 * </ul>
882 */
883 @Deprecated
884 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
885 if (srcBegin < 0) {
886 throw new StringIndexOutOfBoundsException(srcBegin);
887 }
888 if (srcEnd > value.length) {
889 throw new StringIndexOutOfBoundsException(srcEnd);
890 }
891 if (srcBegin > srcEnd) {
892 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
893 }
894 Objects.requireNonNull(dst);
895
896 int j = dstBegin;
897 int n = srcEnd;
898 int i = srcBegin;
899 char[] val = value; /* avoid getfield opcode */
900
901 while (i < n) {
902 dst[j++] = (byte)val[i++];
903 }
904 }
905
906 /**
907 * Encodes this {@code String} into a sequence of bytes using the named
908 * charset, storing the result into a new byte array.
909 *
910 * <p> The behavior of this method when this string cannot be encoded in
911 * the given charset is unspecified. The {@link
912 * java.nio.charset.CharsetEncoder} class should be used when more control
913 * over the encoding process is required.
914 *
915 * @param charsetName
916 * The name of a supported {@linkplain java.nio.charset.Charset
917 * charset}
918 *
919 * @return The resultant byte array
920 *
921 * @throws UnsupportedEncodingException
922 * If the named charset is not supported
923 *
924 * @since 1.1
925 */
926 public byte[] getBytes(String charsetName)
927 throws UnsupportedEncodingException {
928 if (charsetName == null) throw new NullPointerException();
929 return StringCoding.encode(charsetName, value, 0, value.length);
930 }
931
932 /**
933 * Encodes this {@code String} into a sequence of bytes using the given
934 * {@linkplain java.nio.charset.Charset charset}, storing the result into a
935 * new byte array.
936 *
937 * <p> This method always replaces malformed-input and unmappable-character
938 * sequences with this charset's default replacement byte array. The
939 * {@link java.nio.charset.CharsetEncoder} class should be used when more
940 * control over the encoding process is required.
941 *
942 * @param charset
943 * The {@linkplain java.nio.charset.Charset} to be used to encode
944 * the {@code String}
945 *
946 * @return The resultant byte array
947 *
948 * @since 1.6
949 */
950 public byte[] getBytes(Charset charset) {
951 if (charset == null) throw new NullPointerException();
952 return StringCoding.encode(charset, value, 0, value.length);
953 }
954
955 /**
956 * Encodes this {@code String} into a sequence of bytes using the
957 * platform's default charset, storing the result into a new byte array.
958 *
959 * <p> The behavior of this method when this string cannot be encoded in
960 * the default charset is unspecified. The {@link
961 * java.nio.charset.CharsetEncoder} class should be used when more control
962 * over the encoding process is required.
963 *
964 * @return The resultant byte array
965 *
966 * @since 1.1
967 */
968 public byte[] getBytes() {
969 return StringCoding.encode(value, 0, value.length);
970 }
971
972 /**
973 * Compares this string to the specified object. The result is {@code
974 * true} if and only if the argument is not {@code null} and is a {@code
975 * String} object that represents the same sequence of characters as this
976 * object.
977 *
978 * <p>For finer-grained String comparison, refer to
979 * {@link java.text.Collator}.
980 *
981 * @param anObject
982 * The object to compare this {@code String} against
983 *
984 * @return {@code true} if the given object represents a {@code String}
985 * equivalent to this string, {@code false} otherwise
986 *
987 * @see #compareTo(String)
988 * @see #equalsIgnoreCase(String)
989 */
990 @HotSpotIntrinsicCandidate
991 public boolean equals(Object anObject) {
992 if (this == anObject) {
993 return true;
994 }
995 if (anObject instanceof String) {
996 char[] v1 = value;
997 char[] v2 = ((String)anObject).value;
998 int n = v1.length;
999 if (n == v2.length) {
1000 int i = 0;
1001 while (n-- != 0) {
1002 if (v1[i] != v2[i])
1003 return false;
1004 i++;
1005 }
1006 return true;
1007 }
1008 }
1009 return false;
1010 }
1011
1012 /**
1013 * Compares this string to the specified {@code StringBuffer}. The result
1014 * is {@code true} if and only if this {@code String} represents the same
1015 * sequence of characters as the specified {@code StringBuffer}. This method
1016 * synchronizes on the {@code StringBuffer}.
1017 *
1018 * <p>For finer-grained String comparison, refer to
1019 * {@link java.text.Collator}.
1020 *
1021 * @param sb
1022 * The {@code StringBuffer} to compare this {@code String} against
1023 *
1024 * @return {@code true} if this {@code String} represents the same
1025 * sequence of characters as the specified {@code StringBuffer},
1026 * {@code false} otherwise
1027 *
1028 * @since 1.4
1029 */
1030 public boolean contentEquals(StringBuffer sb) {
1031 return contentEquals((CharSequence)sb);
1032 }
1033
1034 private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
1035 char[] v1 = value;
1036 char[] v2 = sb.getValue();
1037 int n = v1.length;
1038 if (n != sb.length()) {
1039 return false;
1040 }
1041 for (int i = 0; i < n; i++) {
1042 if (v1[i] != v2[i]) {
1043 return false;
1044 }
1045 }
1046 return true;
1047 }
1048
1049 /**
1050 * Compares this string to the specified {@code CharSequence}. The
1051 * result is {@code true} if and only if this {@code String} represents the
1052 * same sequence of char values as the specified sequence. Note that if the
1053 * {@code CharSequence} is a {@code StringBuffer} then the method
1054 * synchronizes on it.
1055 *
1056 * <p>For finer-grained String comparison, refer to
1057 * {@link java.text.Collator}.
1058 *
1059 * @param cs
1060 * The sequence to compare this {@code String} against
1061 *
1062 * @return {@code true} if this {@code String} represents the same
1063 * sequence of char values as the specified sequence, {@code
1064 * false} otherwise
1065 *
1066 * @since 1.5
1067 */
1068 public boolean contentEquals(CharSequence cs) {
1069 // Argument is a StringBuffer, StringBuilder
1070 if (cs instanceof AbstractStringBuilder) {
1071 if (cs instanceof StringBuffer) {
1072 synchronized(cs) {
1073 return nonSyncContentEquals((AbstractStringBuilder)cs);
1074 }
1075 } else {
1076 return nonSyncContentEquals((AbstractStringBuilder)cs);
1077 }
1078 }
1079 // Argument is a String
1080 if (cs instanceof String) {
1081 return equals(cs);
1082 }
1083 // Argument is a generic CharSequence
1084 char[] v1 = value;
1085 int n = v1.length;
1086 if (n != cs.length()) {
1087 return false;
1088 }
1089 for (int i = 0; i < n; i++) {
1090 if (v1[i] != cs.charAt(i)) {
1091 return false;
1092 }
1093 }
1094 return true;
1095 }
1096
1097 /**
1098 * Compares this {@code String} to another {@code String}, ignoring case
1099 * considerations. Two strings are considered equal ignoring case if they
1100 * are of the same length and corresponding characters in the two strings
1101 * are equal ignoring case.
1102 *
1103 * <p> Two characters {@code c1} and {@code c2} are considered the same
1104 * ignoring case if at least one of the following is true:
1105 * <ul>
1106 * <li> The two characters are the same (as compared by the
1107 * {@code ==} operator)
1108 * <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))}
1109 * on each character produces the same result
1110 * </ul>
1111 *
1112 * <p>Note that this method does <em>not</em> take locale into account, and
1113 * will result in unsatisfactory results for certain locales. The
1114 * {@link java.text.Collator} class provides locale-sensitive comparison.
1115 *
1116 * @param anotherString
1117 * The {@code String} to compare this {@code String} against
1118 *
1119 * @return {@code true} if the argument is not {@code null} and it
1120 * represents an equivalent {@code String} ignoring case; {@code
1121 * false} otherwise
1122 *
1123 * @see #equals(Object)
1124 */
1125 public boolean equalsIgnoreCase(String anotherString) {
1126 return (this == anotherString) ? true
1127 : (anotherString != null)
1128 && (anotherString.value.length == value.length)
1129 && regionMatches(true, 0, anotherString, 0, value.length);
1130 }
1131
1132 /**
1133 * Compares two strings lexicographically.
1134 * The comparison is based on the Unicode value of each character in
1135 * the strings. The character sequence represented by this
1136 * {@code String} object is compared lexicographically to the
1137 * character sequence represented by the argument string. The result is
1138 * a negative integer if this {@code String} object
1139 * lexicographically precedes the argument string. The result is a
1140 * positive integer if this {@code String} object lexicographically
1141 * follows the argument string. The result is zero if the strings
1142 * are equal; {@code compareTo} returns {@code 0} exactly when
1143 * the {@link #equals(Object)} method would return {@code true}.
1144 * <p>
1145 * This is the definition of lexicographic ordering. If two strings are
1146 * different, then either they have different characters at some index
1147 * that is a valid index for both strings, or their lengths are different,
1148 * or both. If they have different characters at one or more index
1149 * positions, let <i>k</i> be the smallest such index; then the string
1156 * this.charAt(k)-anotherString.charAt(k)
1157 * </pre></blockquote>
1158 * If there is no index position at which they differ, then the shorter
1159 * string lexicographically precedes the longer string. In this case,
1160 * {@code compareTo} returns the difference of the lengths of the
1161 * strings -- that is, the value:
1162 * <blockquote><pre>
1163 * this.length()-anotherString.length()
1164 * </pre></blockquote>
1165 *
1166 * <p>For finer-grained String comparison, refer to
1167 * {@link java.text.Collator}.
1168 *
1169 * @param anotherString the {@code String} to be compared.
1170 * @return the value {@code 0} if the argument string is equal to
1171 * this string; a value less than {@code 0} if this string
1172 * is lexicographically less than the string argument; and a
1173 * value greater than {@code 0} if this string is
1174 * lexicographically greater than the string argument.
1175 */
1176 @HotSpotIntrinsicCandidate
1177 public int compareTo(String anotherString) {
1178 char[] v1 = value;
1179 char[] v2 = anotherString.value;
1180 int len1 = v1.length;
1181 int len2 = v2.length;
1182 int lim = Math.min(len1, len2);
1183
1184 for (int k = 0; k < lim; k++) {
1185 char c1 = v1[k];
1186 char c2 = v2[k];
1187 if (c1 != c2) {
1188 return c1 - c2;
1189 }
1190 }
1191 return len1 - len2;
1192 }
1193
1194 /**
1195 * A Comparator that orders {@code String} objects as by
1196 * {@code compareToIgnoreCase}. This comparator is serializable.
1197 * <p>
1198 * Note that this Comparator does <em>not</em> take locale into account,
1199 * and will result in an unsatisfactory ordering for certain locales.
1200 * The {@link java.text.Collator} class provides locale-sensitive comparison.
1201 *
1202 * @see java.text.Collator
1203 * @since 1.2
1204 */
1205 public static final Comparator<String> CASE_INSENSITIVE_ORDER
1206 = new CaseInsensitiveComparator();
1207 private static class CaseInsensitiveComparator
1208 implements Comparator<String>, java.io.Serializable {
1209 // use serialVersionUID from JDK 1.2.2 for interoperability
1210 private static final long serialVersionUID = 8575799808933029326L;
1211
1212 public int compare(String s1, String s2) {
1213 int n1 = s1.length();
1214 int n2 = s2.length();
1215 int min = Math.min(n1, n2);
1216 for (int i = 0; i < min; i++) {
1217 char c1 = s1.charAt(i);
1218 char c2 = s2.charAt(i);
1219 if (c1 != c2) {
1220 c1 = Character.toUpperCase(c1);
1221 c2 = Character.toUpperCase(c2);
1222 if (c1 != c2) {
1223 c1 = Character.toLowerCase(c1);
1224 c2 = Character.toLowerCase(c2);
1225 if (c1 != c2) {
1226 // No overflow because of numeric promotion
1227 return c1 - c2;
1228 }
1229 }
1230 }
1231 }
1232 return n1 - n2;
1233 }
1234
1235 /** Replaces the de-serialized object. */
1236 private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
1237 }
1238
1277 * <li>{@code ooffset+len} is greater than the length of the other
1278 * argument.
1279 * <li>There is some nonnegative integer <i>k</i> less than {@code len}
1280 * such that:
1281 * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + }
1282 * <i>k</i>{@code )}
1283 * </ul>
1284 *
1285 * <p>Note that this method does <em>not</em> take locale into account. The
1286 * {@link java.text.Collator} class provides locale-sensitive comparison.
1287 *
1288 * @param toffset the starting offset of the subregion in this string.
1289 * @param other the string argument.
1290 * @param ooffset the starting offset of the subregion in the string
1291 * argument.
1292 * @param len the number of characters to compare.
1293 * @return {@code true} if the specified subregion of this string
1294 * exactly matches the specified subregion of the string argument;
1295 * {@code false} otherwise.
1296 */
1297 public boolean regionMatches(int toffset, String other, int ooffset,
1298 int len) {
1299 char[] ta = value;
1300 int to = toffset;
1301 char[] pa = other.value;
1302 int po = ooffset;
1303 // Note: toffset, ooffset, or len might be near -1>>>1.
1304 if ((ooffset < 0) || (toffset < 0)
1305 || (toffset > (long)ta.length - len)
1306 || (ooffset > (long)pa.length - len)) {
1307 return false;
1308 }
1309 while (len-- > 0) {
1310 if (ta[to++] != pa[po++]) {
1311 return false;
1312 }
1313 }
1314 return true;
1315 }
1316
1317 /**
1318 * Tests if two string regions are equal.
1319 * <p>
1320 * A substring of this {@code String} object is compared to a substring
1321 * of the argument {@code other}. The result is {@code true} if these
1322 * substrings represent character sequences that are the same, ignoring
1323 * case if and only if {@code ignoreCase} is true. The substring of
1324 * this {@code String} object to be compared begins at index
1325 * {@code toffset} and has length {@code len}. The substring of
1326 * {@code other} to be compared begins at index {@code ooffset} and
1327 * has length {@code len}. The result is {@code false} if and only if
1328 * at least one of the following is true:
1329 * <ul><li>{@code toffset} is negative.
1330 * <li>{@code ooffset} is negative.
1331 * <li>{@code toffset+len} is greater than the length of this
1332 * {@code String} object.
1333 * <li>{@code ooffset+len} is greater than the length of the other
1349 * and will result in unsatisfactory results for certain locales when
1350 * {@code ignoreCase} is {@code true}. The {@link java.text.Collator} class
1351 * provides locale-sensitive comparison.
1352 *
1353 * @param ignoreCase if {@code true}, ignore case when comparing
1354 * characters.
1355 * @param toffset the starting offset of the subregion in this
1356 * string.
1357 * @param other the string argument.
1358 * @param ooffset the starting offset of the subregion in the string
1359 * argument.
1360 * @param len the number of characters to compare.
1361 * @return {@code true} if the specified subregion of this string
1362 * matches the specified subregion of the string argument;
1363 * {@code false} otherwise. Whether the matching is exact
1364 * or case insensitive depends on the {@code ignoreCase}
1365 * argument.
1366 */
1367 public boolean regionMatches(boolean ignoreCase, int toffset,
1368 String other, int ooffset, int len) {
1369 char[] ta = value;
1370 int to = toffset;
1371 char[] pa = other.value;
1372 int po = ooffset;
1373 // Note: toffset, ooffset, or len might be near -1>>>1.
1374 if ((ooffset < 0) || (toffset < 0)
1375 || (toffset > (long)ta.length - len)
1376 || (ooffset > (long)pa.length - len)) {
1377 return false;
1378 }
1379 while (len-- > 0) {
1380 char c1 = ta[to++];
1381 char c2 = pa[po++];
1382 if (c1 == c2) {
1383 continue;
1384 }
1385 if (ignoreCase) {
1386 // If characters don't match but case may be ignored,
1387 // try converting both characters to uppercase.
1388 // If the results match, then the comparison scan should
1389 // continue.
1390 char u1 = Character.toUpperCase(c1);
1391 char u2 = Character.toUpperCase(c2);
1392 if (u1 == u2) {
1393 continue;
1394 }
1395 // Unfortunately, conversion to uppercase does not work properly
1396 // for the Georgian alphabet, which has strange rules about case
1397 // conversion. So we need to make one last check before
1398 // exiting.
1399 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
1400 continue;
1401 }
1402 }
1403 return false;
1404 }
1405 return true;
1406 }
1407
1408 /**
1409 * Tests if the substring of this string beginning at the
1410 * specified index starts with the specified prefix.
1411 *
1412 * @param prefix the prefix.
1413 * @param toffset where to begin looking in this string.
1414 * @return {@code true} if the character sequence represented by the
1415 * argument is a prefix of the substring of this object starting
1416 * at index {@code toffset}; {@code false} otherwise.
1417 * The result is {@code false} if {@code toffset} is
1418 * negative or greater than the length of this
1419 * {@code String} object; otherwise the result is the same
1420 * as the result of the expression
1421 * <pre>
1422 * this.substring(toffset).startsWith(prefix)
1423 * </pre>
1424 */
1425 public boolean startsWith(String prefix, int toffset) {
1426 char[] ta = value;
1427 int to = toffset;
1428 char[] pa = prefix.value;
1429 int po = 0;
1430 int pc = pa.length;
1431 // Note: toffset might be near -1>>>1.
1432 if ((toffset < 0) || (toffset > ta.length - pc)) {
1433 return false;
1434 }
1435 while (--pc >= 0) {
1436 if (ta[to++] != pa[po++]) {
1437 return false;
1438 }
1439 }
1440 return true;
1441 }
1442
1443 /**
1444 * Tests if this string starts with the specified prefix.
1445 *
1446 * @param prefix the prefix.
1447 * @return {@code true} if the character sequence represented by the
1448 * argument is a prefix of the character sequence represented by
1449 * this string; {@code false} otherwise.
1450 * Note also that {@code true} will be returned if the
1451 * argument is an empty string or is equal to this
1452 * {@code String} object as determined by the
1453 * {@link #equals(Object)} method.
1454 * @since 1.0
1455 */
1456 public boolean startsWith(String prefix) {
1457 return startsWith(prefix, 0);
1458 }
1459
1460 /**
1461 * Tests if this string ends with the specified suffix.
1462 *
1463 * @param suffix the suffix.
1464 * @return {@code true} if the character sequence represented by the
1465 * argument is a suffix of the character sequence represented by
1466 * this object; {@code false} otherwise. Note that the
1467 * result will be {@code true} if the argument is the
1468 * empty string or is equal to this {@code String} object
1469 * as determined by the {@link #equals(Object)} method.
1470 */
1471 public boolean endsWith(String suffix) {
1472 return startsWith(suffix, value.length - suffix.value.length);
1473 }
1474
1475 /**
1476 * Returns a hash code for this string. The hash code for a
1477 * {@code String} object is computed as
1478 * <blockquote><pre>
1479 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
1480 * </pre></blockquote>
1481 * using {@code int} arithmetic, where {@code s[i]} is the
1482 * <i>i</i>th character of the string, {@code n} is the length of
1483 * the string, and {@code ^} indicates exponentiation.
1484 * (The hash value of the empty string is zero.)
1485 *
1486 * @return a hash code value for this object.
1487 */
1488 public int hashCode() {
1489 int h = hash;
1490 if (h == 0) {
1491 for (char v : value) {
1492 h = 31 * h + v;
1493 }
1494 if (h != 0) {
1495 hash = h;
1496 }
1497 }
1498 return h;
1499 }
1500
1501 /**
1502 * Returns the index within this string of the first occurrence of
1503 * the specified character. If a character with value
1504 * {@code ch} occurs in the character sequence represented by
1505 * this {@code String} object, then the index (in Unicode
1506 * code units) of the first such occurrence is returned. For
1507 * values of {@code ch} in the range from 0 to 0xFFFF
1508 * (inclusive), this is the smallest value <i>k</i> such that:
1509 * <blockquote><pre>
1510 * this.charAt(<i>k</i>) == ch
1511 * </pre></blockquote>
1512 * is true. For other values of {@code ch}, it is the
1513 * smallest value <i>k</i> such that:
1514 * <blockquote><pre>
1515 * this.codePointAt(<i>k</i>) == ch
1516 * </pre></blockquote>
1517 * is true. In either case, if no such character occurs in this
1518 * string, then {@code -1} is returned.
1549 * {@code -1} is returned.
1550 *
1551 * <p>
1552 * There is no restriction on the value of {@code fromIndex}. If it
1553 * is negative, it has the same effect as if it were zero: this entire
1554 * string may be searched. If it is greater than the length of this
1555 * string, it has the same effect as if it were equal to the length of
1556 * this string: {@code -1} is returned.
1557 *
1558 * <p>All indices are specified in {@code char} values
1559 * (Unicode code units).
1560 *
1561 * @param ch a character (Unicode code point).
1562 * @param fromIndex the index to start the search from.
1563 * @return the index of the first occurrence of the character in the
1564 * character sequence represented by this object that is greater
1565 * than or equal to {@code fromIndex}, or {@code -1}
1566 * if the character does not occur.
1567 */
1568 public int indexOf(int ch, int fromIndex) {
1569 final int max = value.length;
1570 if (fromIndex < 0) {
1571 fromIndex = 0;
1572 } else if (fromIndex >= max) {
1573 // Note: fromIndex might be near -1>>>1.
1574 return -1;
1575 }
1576
1577 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
1578 // handle most cases here (ch is a BMP code point or a
1579 // negative value (invalid code point))
1580 final char[] value = this.value;
1581 for (int i = fromIndex; i < max; i++) {
1582 if (value[i] == ch) {
1583 return i;
1584 }
1585 }
1586 return -1;
1587 } else {
1588 return indexOfSupplementary(ch, fromIndex);
1589 }
1590 }
1591
1592 /**
1593 * Handles (rare) calls of indexOf with a supplementary character.
1594 */
1595 private int indexOfSupplementary(int ch, int fromIndex) {
1596 if (Character.isValidCodePoint(ch)) {
1597 final char[] value = this.value;
1598 final char hi = Character.highSurrogate(ch);
1599 final char lo = Character.lowSurrogate(ch);
1600 final int max = value.length - 1;
1601 for (int i = fromIndex; i < max; i++) {
1602 if (value[i] == hi && value[i + 1] == lo) {
1603 return i;
1604 }
1605 }
1606 }
1607 return -1;
1608 }
1609
1610 /**
1611 * Returns the index within this string of the last occurrence of
1612 * the specified character. For values of {@code ch} in the
1613 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
1614 * units) returned is the largest value <i>k</i> such that:
1615 * <blockquote><pre>
1616 * this.charAt(<i>k</i>) == ch
1617 * </pre></blockquote>
1618 * is true. For other values of {@code ch}, it is the
1619 * largest value <i>k</i> such that:
1620 * <blockquote><pre>
1621 * this.codePointAt(<i>k</i>) == ch
1622 * </pre></blockquote>
1623 * is true. In either case, if no such character occurs in this
1624 * string, then {@code -1} is returned. The
1625 * {@code String} is searched backwards starting at the last
1626 * character.
1627 *
1628 * @param ch a character (Unicode code point).
1629 * @return the index of the last occurrence of the character in the
1630 * character sequence represented by this object, or
1631 * {@code -1} if the character does not occur.
1632 */
1633 public int lastIndexOf(int ch) {
1634 return lastIndexOf(ch, value.length - 1);
1635 }
1636
1637 /**
1638 * Returns the index within this string of the last occurrence of
1639 * the specified character, searching backward starting at the
1640 * specified index. For values of {@code ch} in the range
1641 * from 0 to 0xFFFF (inclusive), the index returned is the largest
1642 * value <i>k</i> such that:
1643 * <blockquote><pre>
1644 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex)
1645 * </pre></blockquote>
1646 * is true. For other values of {@code ch}, it is the
1647 * largest value <i>k</i> such that:
1648 * <blockquote><pre>
1649 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex)
1650 * </pre></blockquote>
1651 * is true. In either case, if no such character occurs in this
1652 * string at or before position {@code fromIndex}, then
1653 * {@code -1} is returned.
1654 *
1655 * <p>All indices are specified in {@code char} values
1656 * (Unicode code units).
1657 *
1658 * @param ch a character (Unicode code point).
1659 * @param fromIndex the index to start the search from. There is no
1660 * restriction on the value of {@code fromIndex}. If it is
1661 * greater than or equal to the length of this string, it has
1662 * the same effect as if it were equal to one less than the
1663 * length of this string: this entire string may be searched.
1664 * If it is negative, it has the same effect as if it were -1:
1665 * -1 is returned.
1666 * @return the index of the last occurrence of the character in the
1667 * character sequence represented by this object that is less
1668 * than or equal to {@code fromIndex}, or {@code -1}
1669 * if the character does not occur before that point.
1670 */
1671 public int lastIndexOf(int ch, int fromIndex) {
1672 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
1673 // handle most cases here (ch is a BMP code point or a
1674 // negative value (invalid code point))
1675 final char[] value = this.value;
1676 int i = Math.min(fromIndex, value.length - 1);
1677 for (; i >= 0; i--) {
1678 if (value[i] == ch) {
1679 return i;
1680 }
1681 }
1682 return -1;
1683 } else {
1684 return lastIndexOfSupplementary(ch, fromIndex);
1685 }
1686 }
1687
1688 /**
1689 * Handles (rare) calls of lastIndexOf with a supplementary character.
1690 */
1691 private int lastIndexOfSupplementary(int ch, int fromIndex) {
1692 if (Character.isValidCodePoint(ch)) {
1693 final char[] value = this.value;
1694 char hi = Character.highSurrogate(ch);
1695 char lo = Character.lowSurrogate(ch);
1696 int i = Math.min(fromIndex, value.length - 2);
1697 for (; i >= 0; i--) {
1698 if (value[i] == hi && value[i + 1] == lo) {
1699 return i;
1700 }
1701 }
1702 }
1703 return -1;
1704 }
1705
1706 /**
1707 * Returns the index within this string of the first occurrence of the
1708 * specified substring.
1709 *
1710 * <p>The returned index is the smallest value {@code k} for which:
1711 * <pre>{@code
1712 * this.startsWith(str, k)
1713 * }</pre>
1714 * If no such value of {@code k} exists, then {@code -1} is returned.
1715 *
1716 * @param str the substring to search for.
1717 * @return the index of the first occurrence of the specified substring,
1718 * or {@code -1} if there is no such occurrence.
1719 */
1720 @HotSpotIntrinsicCandidate
1721 public int indexOf(String str) {
1722 return indexOf(str, 0);
1723 }
1724
1725 /**
1726 * Returns the index within this string of the first occurrence of the
1727 * specified substring, starting at the specified index.
1728 *
1729 * <p>The returned index is the smallest value {@code k} for which:
1730 * <pre>{@code
1731 * k >= Math.min(fromIndex, this.length()) &&
1732 * this.startsWith(str, k)
1733 * }</pre>
1734 * If no such value of {@code k} exists, then {@code -1} is returned.
1735 *
1736 * @param str the substring to search for.
1737 * @param fromIndex the index from which to start the search.
1738 * @return the index of the first occurrence of the specified substring,
1739 * starting at the specified index,
1740 * or {@code -1} if there is no such occurrence.
1741 */
1742 public int indexOf(String str, int fromIndex) {
1743 return indexOf(value, 0, value.length,
1744 str.value, 0, str.value.length, fromIndex);
1745 }
1746
1747 /**
1748 * Code shared by String and AbstractStringBuilder to do searches. The
1749 * source is the character array being searched, and the target
1750 * is the string being searched for.
1751 *
1752 * @param source the characters being searched.
1753 * @param sourceOffset offset of the source string.
1754 * @param sourceCount count of the source string.
1755 * @param target the characters being searched for.
1756 * @param fromIndex the index to begin searching from.
1757 */
1758 static int indexOf(char[] source, int sourceOffset, int sourceCount,
1759 String target, int fromIndex) {
1760 return indexOf(source, sourceOffset, sourceCount,
1761 target.value, 0, target.value.length,
1762 fromIndex);
1763 }
1764
1765 /**
1766 * Code shared by String and StringBuffer to do searches. The
1767 * source is the character array being searched, and the target
1768 * is the string being searched for.
1769 *
1770 * @param source the characters being searched.
1771 * @param sourceOffset offset of the source string.
1772 * @param sourceCount count of the source string.
1773 * @param target the characters being searched for.
1774 * @param targetOffset offset of the target string.
1775 * @param targetCount count of the target string.
1776 * @param fromIndex the index to begin searching from.
1777 */
1778 static int indexOf(char[] source, int sourceOffset, int sourceCount,
1779 char[] target, int targetOffset, int targetCount,
1780 int fromIndex) {
1781 if (fromIndex >= sourceCount) {
1782 return (targetCount == 0 ? sourceCount : -1);
1783 }
1784 if (fromIndex < 0) {
1785 fromIndex = 0;
1786 }
1787 if (targetCount == 0) {
1788 return fromIndex;
1789 }
1790
1791 char first = target[targetOffset];
1792 int max = sourceOffset + (sourceCount - targetCount);
1793
1794 for (int i = sourceOffset + fromIndex; i <= max; i++) {
1795 /* Look for first character. */
1796 if (source[i] != first) {
1797 while (++i <= max && source[i] != first);
1798 }
1799
1800 /* Found first character, now look at the rest of v2 */
1801 if (i <= max) {
1802 int j = i + 1;
1803 int end = j + targetCount - 1;
1804 for (int k = targetOffset + 1; j < end && source[j]
1805 == target[k]; j++, k++);
1806
1807 if (j == end) {
1808 /* Found whole string. */
1809 return i - sourceOffset;
1810 }
1811 }
1812 }
1813 return -1;
1814 }
1815
1816 /**
1817 * Returns the index within this string of the last occurrence of the
1818 * specified substring. The last occurrence of the empty string ""
1819 * is considered to occur at the index value {@code this.length()}.
1820 *
1821 * <p>The returned index is the largest value {@code k} for which:
1822 * <pre>{@code
1823 * this.startsWith(str, k)
1824 * }</pre>
1825 * If no such value of {@code k} exists, then {@code -1} is returned.
1826 *
1827 * @param str the substring to search for.
1828 * @return the index of the last occurrence of the specified substring,
1829 * or {@code -1} if there is no such occurrence.
1830 */
1831 public int lastIndexOf(String str) {
1832 return lastIndexOf(str, value.length);
1833 }
1834
1835 /**
1836 * Returns the index within this string of the last occurrence of the
1837 * specified substring, searching backward starting at the specified index.
1838 *
1839 * <p>The returned index is the largest value {@code k} for which:
1840 * <pre>{@code
1841 * k <= Math.min(fromIndex, this.length()) &&
1842 * this.startsWith(str, k)
1843 * }</pre>
1844 * If no such value of {@code k} exists, then {@code -1} is returned.
1845 *
1846 * @param str the substring to search for.
1847 * @param fromIndex the index to start the search from.
1848 * @return the index of the last occurrence of the specified substring,
1849 * searching backward from the specified index,
1850 * or {@code -1} if there is no such occurrence.
1851 */
1852 public int lastIndexOf(String str, int fromIndex) {
1853 return lastIndexOf(value, 0, value.length,
1854 str.value, 0, str.value.length, fromIndex);
1855 }
1856
1857 /**
1858 * Code shared by String and AbstractStringBuilder to do searches. The
1859 * source is the character array being searched, and the target
1860 * is the string being searched for.
1861 *
1862 * @param source the characters being searched.
1863 * @param sourceOffset offset of the source string.
1864 * @param sourceCount count of the source string.
1865 * @param target the characters being searched for.
1866 * @param fromIndex the index to begin searching from.
1867 */
1868 static int lastIndexOf(char[] source, int sourceOffset, int sourceCount,
1869 String target, int fromIndex) {
1870 return lastIndexOf(source, sourceOffset, sourceCount,
1871 target.value, 0, target.value.length,
1872 fromIndex);
1873 }
1874
1875 /**
1876 * Code shared by String and StringBuffer to do searches. The
1877 * source is the character array being searched, and the target
1878 * is the string being searched for.
1879 *
1880 * @param source the characters being searched.
1881 * @param sourceOffset offset of the source string.
1882 * @param sourceCount count of the source string.
1883 * @param target the characters being searched for.
1884 * @param targetOffset offset of the target string.
1885 * @param targetCount count of the target string.
1886 * @param fromIndex the index to begin searching from.
1887 */
1888 static int lastIndexOf(char[] source, int sourceOffset, int sourceCount,
1889 char[] target, int targetOffset, int targetCount,
1890 int fromIndex) {
1891 /*
1892 * Check arguments; return immediately where possible. For
1893 * consistency, don't check for null str.
1894 */
1895 int rightIndex = sourceCount - targetCount;
1896 if (fromIndex < 0) {
1897 return -1;
1898 }
1899 if (fromIndex > rightIndex) {
1900 fromIndex = rightIndex;
1901 }
1902 /* Empty string always matches. */
1903 if (targetCount == 0) {
1904 return fromIndex;
1905 }
1906
1907 int strLastIndex = targetOffset + targetCount - 1;
1908 char strLastChar = target[strLastIndex];
1909 int min = sourceOffset + targetCount - 1;
1910 int i = min + fromIndex;
1911
1912 startSearchForLastChar:
1913 while (true) {
1914 while (i >= min && source[i] != strLastChar) {
1915 i--;
1916 }
1917 if (i < min) {
1918 return -1;
1919 }
1920 int j = i - 1;
1921 int start = j - (targetCount - 1);
1922 int k = strLastIndex - 1;
1923
1924 while (j > start) {
1925 if (source[j--] != target[k--]) {
1926 i--;
1927 continue startSearchForLastChar;
1928 }
1929 }
1930 return start - sourceOffset + 1;
1931 }
1932 }
1933
1934 /**
1935 * Returns a string that is a substring of this string. The
1936 * substring begins with the character at the specified index and
1937 * extends to the end of this string. <p>
1938 * Examples:
1939 * <blockquote><pre>
1940 * "unhappy".substring(2) returns "happy"
1941 * "Harbison".substring(3) returns "bison"
1942 * "emptiness".substring(9) returns "" (an empty string)
1943 * </pre></blockquote>
1944 *
1945 * @param beginIndex the beginning index, inclusive.
1946 * @return the specified substring.
1947 * @exception IndexOutOfBoundsException if
1948 * {@code beginIndex} is negative or larger than the
1949 * length of this {@code String} object.
1950 */
1951 public String substring(int beginIndex) {
1952 if (beginIndex <= 0) {
1953 if (beginIndex < 0) {
1954 throw new StringIndexOutOfBoundsException(beginIndex);
1955 }
1956 return this;
1957 }
1958 int subLen = value.length - beginIndex;
1959 if (subLen < 0) {
1960 throw new StringIndexOutOfBoundsException(subLen);
1961 }
1962 return new String(value, beginIndex, subLen);
1963 }
1964
1965 /**
1966 * Returns a string that is a substring of this string. The
1967 * substring begins at the specified {@code beginIndex} and
1968 * extends to the character at index {@code endIndex - 1}.
1969 * Thus the length of the substring is {@code endIndex-beginIndex}.
1970 * <p>
1971 * Examples:
1972 * <blockquote><pre>
1973 * "hamburger".substring(4, 8) returns "urge"
1974 * "smiles".substring(1, 5) returns "mile"
1975 * </pre></blockquote>
1976 *
1977 * @param beginIndex the beginning index, inclusive.
1978 * @param endIndex the ending index, exclusive.
1979 * @return the specified substring.
1980 * @exception IndexOutOfBoundsException if the
1981 * {@code beginIndex} is negative, or
1982 * {@code endIndex} is larger than the length of
1983 * this {@code String} object, or
1984 * {@code beginIndex} is larger than
1985 * {@code endIndex}.
1986 */
1987 public String substring(int beginIndex, int endIndex) {
1988 if (beginIndex <= 0) {
1989 if (beginIndex < 0) {
1990 throw new StringIndexOutOfBoundsException(beginIndex);
1991 }
1992 if (endIndex == value.length) {
1993 return this;
1994 }
1995 }
1996 if (endIndex > value.length) {
1997 throw new StringIndexOutOfBoundsException(endIndex);
1998 }
1999 int subLen = endIndex - beginIndex;
2000 if (subLen < 0) {
2001 throw new StringIndexOutOfBoundsException(subLen);
2002 }
2003 return new String(value, beginIndex, subLen);
2004 }
2005
2006 /**
2007 * Returns a character sequence that is a subsequence of this sequence.
2008 *
2009 * <p> An invocation of this method of the form
2010 *
2011 * <blockquote><pre>
2012 * str.subSequence(begin, end)</pre></blockquote>
2013 *
2014 * behaves in exactly the same way as the invocation
2015 *
2016 * <blockquote><pre>
2017 * str.substring(begin, end)</pre></blockquote>
2018 *
2019 * @apiNote
2020 * This method is defined so that the {@code String} class can implement
2021 * the {@link CharSequence} interface.
2022 *
2023 * @param beginIndex the begin index, inclusive.
2040 * Concatenates the specified string to the end of this string.
2041 * <p>
2042 * If the length of the argument string is {@code 0}, then this
2043 * {@code String} object is returned. Otherwise, a
2044 * {@code String} object is returned that represents a character
2045 * sequence that is the concatenation of the character sequence
2046 * represented by this {@code String} object and the character
2047 * sequence represented by the argument string.<p>
2048 * Examples:
2049 * <blockquote><pre>
2050 * "cares".concat("s") returns "caress"
2051 * "to".concat("get").concat("her") returns "together"
2052 * </pre></blockquote>
2053 *
2054 * @param str the {@code String} that is concatenated to the end
2055 * of this {@code String}.
2056 * @return a string that represents the concatenation of this object's
2057 * characters followed by the string argument's characters.
2058 */
2059 public String concat(String str) {
2060 int otherLen = str.length();
2061 if (otherLen == 0) {
2062 return this;
2063 }
2064 int len = value.length;
2065 char[] buf = Arrays.copyOf(value, len + otherLen);
2066 str.getChars(buf, len);
2067 return new String(buf, true);
2068 }
2069
2070 /**
2071 * Returns a string resulting from replacing all occurrences of
2072 * {@code oldChar} in this string with {@code newChar}.
2073 * <p>
2074 * If the character {@code oldChar} does not occur in the
2075 * character sequence represented by this {@code String} object,
2076 * then a reference to this {@code String} object is returned.
2077 * Otherwise, a {@code String} object is returned that
2078 * represents a character sequence identical to the character sequence
2079 * represented by this {@code String} object, except that every
2080 * occurrence of {@code oldChar} is replaced by an occurrence
2081 * of {@code newChar}.
2082 * <p>
2083 * Examples:
2084 * <blockquote><pre>
2085 * "mesquite in your cellar".replace('e', 'o')
2086 * returns "mosquito in your collar"
2087 * "the war of baronets".replace('r', 'y')
2088 * returns "the way of bayonets"
2089 * "sparring with a purple porpoise".replace('p', 't')
2090 * returns "starring with a turtle tortoise"
2091 * "JonL".replace('q', 'x') returns "JonL" (no change)
2092 * </pre></blockquote>
2093 *
2094 * @param oldChar the old character.
2095 * @param newChar the new character.
2096 * @return a string derived from this string by replacing every
2097 * occurrence of {@code oldChar} with {@code newChar}.
2098 */
2099 public String replace(char oldChar, char newChar) {
2100 if (oldChar != newChar) {
2101 char[] val = value; /* avoid getfield opcode */
2102 int len = val.length;
2103 int i = -1;
2104
2105 while (++i < len) {
2106 if (val[i] == oldChar) {
2107 break;
2108 }
2109 }
2110 if (i < len) {
2111 char[] buf = new char[len];
2112 for (int j = 0; j < i; j++) {
2113 buf[j] = val[j];
2114 }
2115 while (i < len) {
2116 char c = val[i];
2117 buf[i] = (c == oldChar) ? newChar : c;
2118 i++;
2119 }
2120 return new String(buf, true);
2121 }
2122 }
2123 return this;
2124 }
2125
2126 /**
2127 * Tells whether or not this string matches the given <a
2128 * href="../util/regex/Pattern.html#sum">regular expression</a>.
2129 *
2130 * <p> An invocation of this method of the form
2131 * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the
2132 * same result as the expression
2133 *
2134 * <blockquote>
2135 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence)
2136 * matches(<i>regex</i>, <i>str</i>)}
2137 * </blockquote>
2138 *
2139 * @param regex
2140 * the regular expression to which this string is to be matched
2252 * @since 1.4
2253 * @spec JSR-51
2254 */
2255 public String replaceAll(String regex, String replacement) {
2256 return Pattern.compile(regex).matcher(this).replaceAll(replacement);
2257 }
2258
2259 /**
2260 * Replaces each substring of this string that matches the literal target
2261 * sequence with the specified literal replacement sequence. The
2262 * replacement proceeds from the beginning of the string to the end, for
2263 * example, replacing "aa" with "b" in the string "aaa" will result in
2264 * "ba" rather than "ab".
2265 *
2266 * @param target The sequence of char values to be replaced
2267 * @param replacement The replacement sequence of char values
2268 * @return The resulting string
2269 * @since 1.5
2270 */
2271 public String replace(CharSequence target, CharSequence replacement) {
2272 String starget = target.toString();
2273 String srepl = replacement.toString();
2274 int j = indexOf(starget);
2275 if (j < 0) {
2276 return this;
2277 }
2278 int targLen = starget.length();
2279 int targLen1 = Math.max(targLen, 1);
2280 final char[] value = this.value;
2281 final char[] replValue = srepl.value;
2282 int newLenHint = value.length - targLen + replValue.length;
2283 if (newLenHint < 0) {
2284 throw new OutOfMemoryError();
2285 }
2286 StringBuilder sb = new StringBuilder(newLenHint);
2287 int i = 0;
2288 do {
2289 sb.append(value, i, j - i)
2290 .append(replValue);
2291 i = j + targLen;
2292 } while (j < value.length && (j = indexOf(starget, j + targLen1)) > 0);
2293
2294 return sb.append(value, i, value.length - i).toString();
2295 }
2296
2297 /**
2298 * Splits this string around matches of the given
2299 * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
2300 *
2301 * <p> The array returned by this method contains each substring of this
2302 * string that is terminated by another substring that matches the given
2303 * expression or is terminated by the end of the string. The substrings in
2304 * the array are in the order in which they occur in this string. If the
2305 * expression does not match any part of the input then the resulting array
2306 * has just one element, namely this string.
2307 *
2308 * <p> When there is a positive-width match at the beginning of this
2309 * string then an empty leading substring is included at the beginning
2310 * of the resulting array. A zero-width match at the beginning however
2311 * never produces such empty leading substring.
2312 *
2313 * <p> The {@code limit} parameter controls the number of times the
2314 * pattern is applied and therefore affects the length of the resulting
2371 *
2372 * @return the array of strings computed by splitting this string
2373 * around matches of the given regular expression
2374 *
2375 * @throws PatternSyntaxException
2376 * if the regular expression's syntax is invalid
2377 *
2378 * @see java.util.regex.Pattern
2379 *
2380 * @since 1.4
2381 * @spec JSR-51
2382 */
2383 public String[] split(String regex, int limit) {
2384 /* fastpath if the regex is a
2385 (1)one-char String and this character is not one of the
2386 RegEx's meta characters ".$|()[{^?*+\\", or
2387 (2)two-char String and the first char is the backslash and
2388 the second is not the ascii digit or ascii letter.
2389 */
2390 char ch = 0;
2391 if (((regex.value.length == 1 &&
2392 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
2393 (regex.length() == 2 &&
2394 regex.charAt(0) == '\\' &&
2395 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
2396 ((ch-'a')|('z'-ch)) < 0 &&
2397 ((ch-'A')|('Z'-ch)) < 0)) &&
2398 (ch < Character.MIN_HIGH_SURROGATE ||
2399 ch > Character.MAX_LOW_SURROGATE))
2400 {
2401 int off = 0;
2402 int next = 0;
2403 boolean limited = limit > 0;
2404 ArrayList<String> list = new ArrayList<>();
2405 while ((next = indexOf(ch, off)) != -1) {
2406 if (!limited || list.size() < limit - 1) {
2407 list.add(substring(off, next));
2408 off = next + 1;
2409 } else { // last one
2410 //assert (list.size() == limit - 1);
2411 list.add(substring(off, value.length));
2412 off = value.length;
2413 break;
2414 }
2415 }
2416 // If no match was found, return this
2417 if (off == 0)
2418 return new String[]{this};
2419
2420 // Add remaining segment
2421 if (!limited || list.size() < limit)
2422 list.add(substring(off, value.length));
2423
2424 // Construct result
2425 int resultSize = list.size();
2426 if (limit == 0) {
2427 while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {
2428 resultSize--;
2429 }
2430 }
2431 String[] result = new String[resultSize];
2432 return list.subList(0, resultSize).toArray(result);
2433 }
2434 return Pattern.compile(regex).split(this, limit);
2435 }
2436
2437 /**
2438 * Splits this string around matches of the given <a
2439 * href="../util/regex/Pattern.html#sum">regular expression</a>.
2440 *
2441 * <p> This method works as if by invoking the two-argument {@link
2442 * #split(String, int) split} method with the given expression and a limit
2596 * <tr>
2597 * <td>(all)</td>
2598 * <td><img src="doc-files/capiota.gif" alt="capiota"><img src="doc-files/capchi.gif" alt="capchi">
2599 * <img src="doc-files/captheta.gif" alt="captheta"><img src="doc-files/capupsil.gif" alt="capupsil">
2600 * <img src="doc-files/capsigma.gif" alt="capsigma"></td>
2601 * <td><img src="doc-files/iota.gif" alt="iota"><img src="doc-files/chi.gif" alt="chi">
2602 * <img src="doc-files/theta.gif" alt="theta"><img src="doc-files/upsilon.gif" alt="upsilon">
2603 * <img src="doc-files/sigma1.gif" alt="sigma"></td>
2604 * <td>lowercased all chars in String</td>
2605 * </tr>
2606 * </table>
2607 *
2608 * @param locale use the case transformation rules for this locale
2609 * @return the {@code String}, converted to lowercase.
2610 * @see java.lang.String#toLowerCase()
2611 * @see java.lang.String#toUpperCase()
2612 * @see java.lang.String#toUpperCase(Locale)
2613 * @since 1.1
2614 */
2615 public String toLowerCase(Locale locale) {
2616 if (locale == null) {
2617 throw new NullPointerException();
2618 }
2619 int first;
2620 boolean hasSurr = false;
2621 final int len = value.length;
2622
2623 // Now check if there are any characters that need to be changed, or are surrogate
2624 for (first = 0 ; first < len; first++) {
2625 int cp = (int)value[first];
2626 if (Character.isSurrogate((char)cp)) {
2627 hasSurr = true;
2628 break;
2629 }
2630 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR
2631 break;
2632 }
2633 }
2634 if (first == len)
2635 return this;
2636 char[] result = new char[len];
2637 System.arraycopy(value, 0, result, 0, first); // Just copy the first few
2638 // lowerCase characters.
2639 String lang = locale.getLanguage();
2640 if (lang == "tr" || lang == "az" || lang == "lt") {
2641 return toLowerCaseEx(result, first, locale, true);
2642 }
2643 if (hasSurr) {
2644 return toLowerCaseEx(result, first, locale, false);
2645 }
2646 for (int i = first; i < len; i++) {
2647 int cp = (int)value[i];
2648 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA
2649 Character.isSurrogate((char)cp)) {
2650 return toLowerCaseEx(result, i, locale, false);
2651 }
2652 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE
2653 return toLowerCaseEx(result, i, locale, true);
2654 }
2655 cp = Character.toLowerCase(cp);
2656 if (!Character.isBmpCodePoint(cp)) {
2657 return toLowerCaseEx(result, i, locale, false);
2658 }
2659 result[i] = (char)cp;
2660 }
2661 return new String(result, true);
2662 }
2663
2664 private String toLowerCaseEx(char[] result, int first, Locale locale, boolean localeDependent) {
2665 int resultOffset = first;
2666 int srcCount;
2667 for (int i = first; i < value.length; i += srcCount) {
2668 int srcChar = (int)value[i];
2669 int lowerChar;
2670 char[] lowerCharArray;
2671 srcCount = 1;
2672 if (Character.isSurrogate((char)srcChar)) {
2673 srcChar = codePointAt(i);
2674 srcCount = Character.charCount(srcChar);
2675 }
2676 if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
2677 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
2678 } else {
2679 lowerChar = Character.toLowerCase(srcChar);
2680 }
2681 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp
2682 result[resultOffset++] = (char)lowerChar;
2683 } else {
2684 if (lowerChar == Character.ERROR) {
2685 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
2686 } else if (srcCount == 2) {
2687 resultOffset += Character.toChars(lowerChar, result, resultOffset);
2688 continue;
2689 } else {
2690 lowerCharArray = Character.toChars(lowerChar);
2691 }
2692 /* Grow result if needed */
2693 int mapLen = lowerCharArray.length;
2694 if (mapLen > srcCount) {
2695 char[] result2 = new char[result.length + mapLen - srcCount];
2696 System.arraycopy(result, 0, result2, 0, resultOffset);
2697 result = result2;
2698 }
2699 for (int x = 0; x < mapLen; ++x) {
2700 result[resultOffset++] = lowerCharArray[x];
2701 }
2702 }
2703 }
2704 return new String(result, 0, resultOffset);
2705 }
2706
2707 /**
2708 * Converts all of the characters in this {@code String} to lower
2709 * case using the rules of the default locale. This is equivalent to calling
2710 * {@code toLowerCase(Locale.getDefault())}.
2711 * <p>
2712 * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2713 * results if used for strings that are intended to be interpreted locale
2714 * independently.
2715 * Examples are programming language identifiers, protocol keys, and HTML
2716 * tags.
2717 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
2718 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
2719 * LATIN SMALL LETTER DOTLESS I character.
2720 * To obtain correct results for locale insensitive strings, use
2721 * {@code toLowerCase(Locale.ROOT)}.
2722 *
2723 * @return the {@code String}, converted to lowercase.
2724 * @see java.lang.String#toLowerCase(Locale)
2759 * <td>(all)</td>
2760 * <td>\u00df</td>
2761 * <td>\u0053 \u0053</td>
2762 * <td>small letter sharp s -> two letters: SS</td>
2763 * </tr>
2764 * <tr>
2765 * <td>(all)</td>
2766 * <td>Fahrvergnügen</td>
2767 * <td>FAHRVERGNÜGEN</td>
2768 * <td></td>
2769 * </tr>
2770 * </table>
2771 * @param locale use the case transformation rules for this locale
2772 * @return the {@code String}, converted to uppercase.
2773 * @see java.lang.String#toUpperCase()
2774 * @see java.lang.String#toLowerCase()
2775 * @see java.lang.String#toLowerCase(Locale)
2776 * @since 1.1
2777 */
2778 public String toUpperCase(Locale locale) {
2779 if (locale == null) {
2780 throw new NullPointerException();
2781 }
2782 int first;
2783 boolean hasSurr = false;
2784 final int len = value.length;
2785
2786 // Now check if there are any characters that need to be changed, or are surrogate
2787 for (first = 0 ; first < len; first++ ) {
2788 int cp = (int)value[first];
2789 if (Character.isSurrogate((char)cp)) {
2790 hasSurr = true;
2791 break;
2792 }
2793 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR
2794 break;
2795 }
2796 }
2797 if (first == len) {
2798 return this;
2799 }
2800 char[] result = new char[len];
2801 System.arraycopy(value, 0, result, 0, first); // Just copy the first few
2802 // upperCase characters.
2803 String lang = locale.getLanguage();
2804 if (lang == "tr" || lang == "az" || lang == "lt") {
2805 return toUpperCaseEx(result, first, locale, true);
2806 }
2807 if (hasSurr) {
2808 return toUpperCaseEx(result, first, locale, false);
2809 }
2810 for (int i = first; i < len; i++) {
2811 int cp = (int)value[i];
2812 if (Character.isSurrogate((char)cp)) {
2813 return toUpperCaseEx(result, i, locale, false);
2814 }
2815 cp = Character.toUpperCaseEx(cp);
2816 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp
2817 return toUpperCaseEx(result, i, locale, false);
2818 }
2819 result[i] = (char)cp;
2820 }
2821 return new String(result, true);
2822 }
2823
2824 private String toUpperCaseEx(char[] result, int first, Locale locale,
2825 boolean localeDependent) {
2826 int resultOffset = first;
2827 int srcCount;
2828 for (int i = first; i < value.length; i += srcCount) {
2829 int srcChar = (int)value[i];
2830 int upperChar;
2831 char[] upperCharArray;
2832 srcCount = 1;
2833 if (Character.isSurrogate((char)srcChar)) {
2834 srcChar = codePointAt(i);
2835 srcCount = Character.charCount(srcChar);
2836 }
2837 if (localeDependent) {
2838 upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
2839 } else {
2840 upperChar = Character.toUpperCaseEx(srcChar);
2841 }
2842 if (Character.isBmpCodePoint(upperChar)) {
2843 result[resultOffset++] = (char)upperChar;
2844 } else {
2845 if (upperChar == Character.ERROR) {
2846 if (localeDependent) {
2847 upperCharArray =
2848 ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
2849 } else {
2850 upperCharArray = Character.toUpperCaseCharArray(srcChar);
2851 }
2852 } else if (srcCount == 2) {
2853 resultOffset += Character.toChars(upperChar, result, resultOffset);
2854 continue;
2855 } else {
2856 upperCharArray = Character.toChars(upperChar);
2857 }
2858 /* Grow result if needed */
2859 int mapLen = upperCharArray.length;
2860 if (mapLen > srcCount) {
2861 char[] result2 = new char[result.length + mapLen - srcCount];
2862 System.arraycopy(result, 0, result2, 0, resultOffset);
2863 result = result2;
2864 }
2865 for (int x = 0; x < mapLen; ++x) {
2866 result[resultOffset++] = upperCharArray[x];
2867 }
2868 }
2869 }
2870 return new String(result, 0, resultOffset);
2871 }
2872
2873 /**
2874 * Converts all of the characters in this {@code String} to upper
2875 * case using the rules of the default locale. This method is equivalent to
2876 * {@code toUpperCase(Locale.getDefault())}.
2877 * <p>
2878 * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2879 * results if used for strings that are intended to be interpreted locale
2880 * independently.
2881 * Examples are programming language identifiers, protocol keys, and HTML
2882 * tags.
2883 * For instance, {@code "title".toUpperCase()} in a Turkish locale
2884 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
2885 * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
2886 * To obtain correct results for locale insensitive strings, use
2887 * {@code toUpperCase(Locale.ROOT)}.
2888 *
2889 * @return the {@code String}, converted to uppercase.
2890 * @see java.lang.String#toUpperCase(Locale)
2908 * {@code String} object representing an empty string is
2909 * returned.
2910 * <p>
2911 * Otherwise, let <i>k</i> be the index of the first character in the
2912 * string whose code is greater than {@code '\u005Cu0020'}, and let
2913 * <i>m</i> be the index of the last character in the string whose code
2914 * is greater than {@code '\u005Cu0020'}. A {@code String}
2915 * object is returned, representing the substring of this string that
2916 * begins with the character at index <i>k</i> and ends with the
2917 * character at index <i>m</i>-that is, the result of
2918 * {@code this.substring(k, m + 1)}.
2919 * <p>
2920 * This method may be used to trim whitespace (as defined above) from
2921 * the beginning and end of a string.
2922 *
2923 * @return A string whose value is this string, with any leading and trailing white
2924 * space removed, or this string if it has no leading or
2925 * trailing white space.
2926 */
2927 public String trim() {
2928 char[] val = value; /* avoid getfield opcode */
2929 int end = val.length;
2930 int beg = 0;
2931
2932 while ((beg < end) && (val[beg] <= ' ')) {
2933 beg++;
2934 }
2935 while ((beg < end) && (val[end - 1] <= ' ')) {
2936 end--;
2937 }
2938 return substring(beg, end);
2939 }
2940
2941 /**
2942 * This object (which is already a string!) is itself returned.
2943 *
2944 * @return the string itself.
2945 */
2946 public String toString() {
2947 return this;
2948 }
2949
2950 static class IntCharArraySpliterator implements Spliterator.OfInt {
2951 private final char[] array;
2952 private int index; // current index, modified on advance/split
2953 private final int fence; // one past last index
2954 private final int cs;
2955
2956 IntCharArraySpliterator(char[] array, int acs) {
2957 this(array, 0, array.length, acs);
2958 }
2959
2960 IntCharArraySpliterator(char[] array, int origin, int fence, int acs) {
2961 this.array = array;
2962 this.index = origin;
2963 this.fence = fence;
2964 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
2965 | Spliterator.SUBSIZED;
2966 }
2967
2968 @Override
2969 public OfInt trySplit() {
2970 int lo = index, mid = (lo + fence) >>> 1;
2971 return (lo >= mid)
2972 ? null
2973 : new IntCharArraySpliterator(array, lo, index = mid, cs);
2974 }
2975
2976 @Override
2977 public void forEachRemaining(IntConsumer action) {
2978 char[] a; int i, hi; // hoist accesses and checks from loop
2979 if (action == null)
2980 throw new NullPointerException();
2981 if ((a = array).length >= (hi = fence) &&
2982 (i = index) >= 0 && i < (index = hi)) {
2983 do { action.accept(a[i]); } while (++i < hi);
2984 }
2985 }
2986
2987 @Override
2988 public boolean tryAdvance(IntConsumer action) {
2989 if (action == null)
2990 throw new NullPointerException();
2991 if (index >= 0 && index < fence) {
2992 action.accept(array[index++]);
2993 return true;
2994 }
2995 return false;
2996 }
2997
2998 @Override
2999 public long estimateSize() { return (long)(fence - index); }
3000
3001 @Override
3002 public int characteristics() {
3003 return cs;
3004 }
3005 }
3006
3007 /**
3008 * Returns a stream of {@code int} zero-extending the {@code char} values
3009 * from this sequence. Any char which maps to a <a
3010 * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
3011 * point</a> is passed through uninterpreted.
3012 *
3013 * @return an IntStream of char values from this sequence
3014 * @since 1.9
3015 */
3016 @Override
3017 public IntStream chars() {
3018 return StreamSupport.intStream(
3019 new IntCharArraySpliterator(value, Spliterator.IMMUTABLE), false);
3020 }
3021
3022 static class CodePointsSpliterator implements Spliterator.OfInt {
3023 private final char[] array;
3024 private int index; // current index, modified on advance/split
3025 private final int fence; // one past last index
3026 private final int cs;
3027
3028 CodePointsSpliterator(char[] array, int acs) {
3029 this(array, 0, array.length, acs);
3030 }
3031
3032 CodePointsSpliterator(char[] array, int origin, int fence, int acs) {
3033 this.array = array;
3034 this.index = origin;
3035 this.fence = fence;
3036 this.cs = acs | Spliterator.ORDERED;
3037 }
3038
3039 @Override
3040 public OfInt trySplit() {
3041 int lo = index, mid = (lo + fence) >>> 1;
3042 if (lo >= mid)
3043 return null;
3044
3045 int midOneLess;
3046 // If the mid-point intersects a surrogate pair
3047 if (Character.isLowSurrogate(array[mid]) &&
3048 Character.isHighSurrogate(array[midOneLess = (mid -1)])) {
3049 // If there is only one pair it cannot be split
3050 if (lo >= midOneLess)
3051 return null;
3052 // Shift the mid-point to align with the surrogate pair
3053 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
3054 }
3055 return new CodePointsSpliterator(array, lo, index = mid, cs);
3056 }
3057
3058 @Override
3059 public void forEachRemaining(IntConsumer action) {
3060 char[] a; int i, hi; // hoist accesses and checks from loop
3061 if (action == null)
3062 throw new NullPointerException();
3063 if ((a = array).length >= (hi = fence) &&
3064 (i = index) >= 0 && i < (index = hi)) {
3065 do {
3066 i = advance(a, i, hi, action);
3067 } while (i < hi);
3068 }
3069 }
3070
3071 @Override
3072 public boolean tryAdvance(IntConsumer action) {
3073 if (action == null)
3074 throw new NullPointerException();
3075 if (index >= 0 && index < fence) {
3076 index = advance(array, index, fence, action);
3077 return true;
3078 }
3079 return false;
3080 }
3081
3082 // Advance one code point from the index, i, and return the next
3083 // index to advance from
3084 private static int advance(char[] a, int i, int hi, IntConsumer action) {
3085 char c1 = a[i++];
3086 int cp = c1;
3087 if (Character.isHighSurrogate(c1) && i < hi) {
3088 char c2 = a[i];
3089 if (Character.isLowSurrogate(c2)) {
3090 i++;
3091 cp = Character.toCodePoint(c1, c2);
3092 }
3093 }
3094 action.accept(cp);
3095 return i;
3096 }
3097
3098 @Override
3099 public long estimateSize() { return (long)(fence - index); }
3100
3101 @Override
3102 public int characteristics() {
3103 return cs;
3104 }
3105 }
3106
3107 /**
3108 * Returns a stream of code point values from this sequence. Any surrogate
3109 * pairs encountered in the sequence are combined as if by {@linkplain
3110 * Character#toCodePoint Character.toCodePoint} and the result is passed
3111 * to the stream. Any other code units, including ordinary BMP characters,
3112 * unpaired surrogates, and undefined code units, are zero-extended to
3113 * {@code int} values which are then passed to the stream.
3114 *
3115 * @return an IntStream of Unicode code points from this sequence
3116 * @since 1.9
3117 */
3118 @Override
3119 public IntStream codePoints() {
3120 return StreamSupport.intStream(
3121 new CodePointsSpliterator(value, Spliterator.IMMUTABLE), false);
3122 }
3123
3124 /**
3125 * Converts this string to a new character array.
3126 *
3127 * @return a newly allocated character array whose length is the length
3128 * of this string and whose contents are initialized to contain
3129 * the character sequence represented by this string.
3130 */
3131 public char[] toCharArray() {
3132 // Cannot use Arrays.copyOf because of class initialization order issues
3133 char[] result = new char[value.length];
3134 System.arraycopy(value, 0, result, 0, value.length);
3135 return result;
3136 }
3137
3138 /**
3139 * Returns a formatted string using the specified format string and
3140 * arguments.
3141 *
3142 * <p> The locale always used is the one returned by {@link
3143 * java.util.Locale#getDefault() Locale.getDefault()}.
3144 *
3145 * @param format
3146 * A <a href="../util/Formatter.html#syntax">format string</a>
3147 *
3148 * @param args
3149 * Arguments referenced by the format specifiers in the format
3150 * string. If there are more arguments than format specifiers, the
3151 * extra arguments are ignored. The number of arguments is
3152 * variable and may be zero. The maximum number of arguments is
3153 * limited by the maximum dimension of a Java array as defined by
3154 * <cite>The Java™ Virtual Machine Specification</cite>.
3155 * The behaviour on a
3298 * Returns the string representation of the {@code boolean} argument.
3299 *
3300 * @param b a {@code boolean}.
3301 * @return if the argument is {@code true}, a string equal to
3302 * {@code "true"} is returned; otherwise, a string equal to
3303 * {@code "false"} is returned.
3304 */
3305 public static String valueOf(boolean b) {
3306 return b ? "true" : "false";
3307 }
3308
3309 /**
3310 * Returns the string representation of the {@code char}
3311 * argument.
3312 *
3313 * @param c a {@code char}.
3314 * @return a string of length {@code 1} containing
3315 * as its single character the argument {@code c}.
3316 */
3317 public static String valueOf(char c) {
3318 return new String(new char[]{c}, true);
3319 }
3320
3321 /**
3322 * Returns the string representation of the {@code int} argument.
3323 * <p>
3324 * The representation is exactly the one returned by the
3325 * {@code Integer.toString} method of one argument.
3326 *
3327 * @param i an {@code int}.
3328 * @return a string representation of the {@code int} argument.
3329 * @see java.lang.Integer#toString(int, int)
3330 */
3331 public static String valueOf(int i) {
3332 return Integer.toString(i);
3333 }
3334
3335 /**
3336 * Returns the string representation of the {@code long} argument.
3337 * <p>
3338 * The representation is exactly the one returned by the
3381 * class {@code String}.
3382 * <p>
3383 * When the intern method is invoked, if the pool already contains a
3384 * string equal to this {@code String} object as determined by
3385 * the {@link #equals(Object)} method, then the string from the pool is
3386 * returned. Otherwise, this {@code String} object is added to the
3387 * pool and a reference to this {@code String} object is returned.
3388 * <p>
3389 * It follows that for any two strings {@code s} and {@code t},
3390 * {@code s.intern() == t.intern()} is {@code true}
3391 * if and only if {@code s.equals(t)} is {@code true}.
3392 * <p>
3393 * All literal strings and string-valued constant expressions are
3394 * interned. String literals are defined in section 3.10.5 of the
3395 * <cite>The Java™ Language Specification</cite>.
3396 *
3397 * @return a string that has the same contents as this string, but is
3398 * guaranteed to be from a pool of unique strings.
3399 */
3400 public native String intern();
3401 }
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import java.io.ObjectStreamField;
29 import java.io.UnsupportedEncodingException;
30 import java.nio.charset.Charset;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Comparator;
34 import java.util.Formatter;
35 import java.util.Locale;
36 import java.util.Objects;
37 import java.util.Spliterator;
38 import java.util.StringJoiner;
39 import java.util.regex.Matcher;
40 import java.util.regex.Pattern;
41 import java.util.regex.PatternSyntaxException;
42 import java.util.stream.IntStream;
43 import java.util.stream.StreamSupport;
44 import jdk.internal.HotSpotIntrinsicCandidate;
45
46 /**
47 * The {@code String} class represents character strings. All
48 * string literals in Java programs, such as {@code "abc"}, are
49 * implemented as instances of this class.
50 * <p>
51 * Strings are constant; their values cannot be changed after they
52 * are created. String buffers support mutable strings.
53 * Because String objects are immutable they can be shared. For example:
54 * <blockquote><pre>
55 * String str = "abc";
56 * </pre></blockquote><p>
57 * is equivalent to:
58 * <blockquote><pre>
101 * Unicode code points (i.e., characters), in addition to those for
102 * dealing with Unicode code units (i.e., {@code char} values).
103 *
104 * <p>Unless otherwise noted, methods for comparing Strings do not take locale
105 * into account. The {@link java.text.Collator} class provides methods for
106 * finer-grain, locale-sensitive String comparison.
107 *
108 * @author Lee Boynton
109 * @author Arthur van Hoff
110 * @author Martin Buchholz
111 * @author Ulf Zibis
112 * @see java.lang.Object#toString()
113 * @see java.lang.StringBuffer
114 * @see java.lang.StringBuilder
115 * @see java.nio.charset.Charset
116 * @since 1.0
117 */
118
119 public final class String
120 implements java.io.Serializable, Comparable<String>, CharSequence {
121
122 /** The value is used for character storage. */
123 private final byte[] value;
124
125 /**
126 * The identifier of the encoding used to encode the bytes in
127 * {@code value}. The supported values in this implementation are
128 *
129 * LATIN1
130 * UTF16
131 *
132 */
133 private final byte coder;
134
135 /** Cache the hash code for the string */
136 private int hash; // Default to 0
137
138 /** use serialVersionUID from JDK 1.0.2 for interoperability */
139 private static final long serialVersionUID = -6849794470754667710L;
140
141 /**
142 * If String compaction is disabled, the bytes in {@code value} are
143 * always encoded in UTF16.
144 *
145 * For methods with several possible implementation paths, when String
146 * compaction is disabled, only one code path is taken.
147 *
148 * The instance field value is generally opaque to optimizing JIT
149 * compilers. Therefore, in performance-sensitive place, an explicit
150 * check of the static boolean {@code COMPACT_STRINGS} is done first
151 * before checking the {@code coder} field since the static boolean
152 * {@code COMPACT_STRINGS} would be constant folded away by an
153 * optimizing JIT compiler. The idioms for these cases are as follows.
154 *
155 * For code such as:
156 *
157 * if (coder == LATIN1) { ... }
158 *
159 * can be written more optimally as
160 *
161 * if (coder() == LATIN1) { ... }
162 *
163 * or:
164 *
165 * if (COMPACT_STRINGS && coder == LATIN1) { ... }
166 *
167 * An optimizing JIT compiler can fold the above conditional as:
168 *
169 * COMPACT_STRINGS == true => if (coder == LATIN1) { ... }
170 * COMPACT_STRINGS == false => if (false) { ... }
171 *
172 * @implNote
173 * The actual value for this field is injected by JVM. The static
174 * initialization block is used to set the value here to communicate
175 * that this static final field is not statically foldable, and to
176 * avoid any possible circular dependency during vm initialization.
177 */
178 static final boolean COMPACT_STRINGS;
179
180 static {
181 COMPACT_STRINGS = true;
182 }
183
184 /**
185 * Class String is special cased within the Serialization Stream Protocol.
186 *
187 * A String instance is written into an ObjectOutputStream according to
188 * <a href="{@docRoot}/../platform/serialization/spec/output.html">
189 * Object Serialization Specification, Section 6.2, "Stream Elements"</a>
190 */
191 private static final ObjectStreamField[] serialPersistentFields =
192 new ObjectStreamField[0];
193
194 /**
195 * Initializes a newly created {@code String} object so that it represents
196 * an empty character sequence. Note that use of this constructor is
197 * unnecessary since Strings are immutable.
198 */
199 public String() {
200 this.value = "".value;
201 this.coder = "".coder;
202 }
203
204 /**
205 * Initializes a newly created {@code String} object so that it represents
206 * the same sequence of characters as the argument; in other words, the
207 * newly created string is a copy of the argument string. Unless an
208 * explicit copy of {@code original} is needed, use of this constructor is
209 * unnecessary since Strings are immutable.
210 *
211 * @param original
212 * A {@code String}
213 */
214 @HotSpotIntrinsicCandidate
215 public String(String original) {
216 this.value = original.value;
217 this.coder = original.coder;
218 this.hash = original.hash;
219 }
220
221 /**
222 * Allocates a new {@code String} so that it represents the sequence of
223 * characters currently contained in the character array argument. The
224 * contents of the character array are copied; subsequent modification of
225 * the character array does not affect the newly created string.
226 *
227 * @param value
228 * The initial value of the string
229 */
230 public String(char value[]) {
231 this(value, 0, value.length, null);
232 }
233
234 /**
235 * Allocates a new {@code String} that contains characters from a subarray
236 * of the character array argument. The {@code offset} argument is the
237 * index of the first character of the subarray and the {@code count}
238 * argument specifies the length of the subarray. The contents of the
239 * subarray are copied; subsequent modification of the character array does
240 * not affect the newly created string.
241 *
242 * @param value
243 * Array that is the source of characters
244 *
245 * @param offset
246 * The initial offset
247 *
248 * @param count
249 * The length
250 *
251 * @throws IndexOutOfBoundsException
252 * If {@code offset} is negative, {@code count} is negative, or
253 * {@code offset} is greater than {@code value.length - count}
254 */
255 public String(char value[], int offset, int count) {
256 this(value, offset, count, rangeCheck(value, offset, count));
257 }
258
259 private static Void rangeCheck(char[] value, int offset, int count) {
260 checkBoundsOffCount(offset, count, value.length);
261 return null;
262 }
263
264 /**
265 * Allocates a new {@code String} that contains characters from a subarray
266 * of the <a href="Character.html#unicode">Unicode code point</a> array
267 * argument. The {@code offset} argument is the index of the first code
268 * point of the subarray and the {@code count} argument specifies the
269 * length of the subarray. The contents of the subarray are converted to
270 * {@code char}s; subsequent modification of the {@code int} array does not
271 * affect the newly created string.
272 *
273 * @param codePoints
274 * Array that is the source of Unicode code points
275 *
276 * @param offset
277 * The initial offset
278 *
279 * @param count
280 * The length
281 *
282 * @throws IllegalArgumentException
283 * If any invalid Unicode code point is found in {@code
284 * codePoints}
285 *
286 * @throws IndexOutOfBoundsException
287 * If {@code offset} is negative, {@code count} is negative, or
288 * {@code offset} is greater than {@code codePoints.length - count}
289 *
290 * @since 1.5
291 */
292 public String(int[] codePoints, int offset, int count) {
293 checkBoundsOffCount(offset, count, codePoints.length);
294 if (count == 0) {
295 this.value = "".value;
296 this.coder = "".coder;
297 return;
298 }
299 if (COMPACT_STRINGS) {
300 byte[] val = StringLatin1.toBytes(codePoints, offset, count);
301 if (val != null) {
302 this.coder = LATIN1;
303 this.value = val;
304 return;
305 }
306 }
307 this.coder = UTF16;
308 this.value = StringUTF16.toBytes(codePoints, offset, count);
309 }
310
311 /**
312 * Allocates a new {@code String} constructed from a subarray of an array
313 * of 8-bit integer values.
314 *
315 * <p> The {@code offset} argument is the index of the first byte of the
316 * subarray, and the {@code count} argument specifies the length of the
317 * subarray.
318 *
319 * <p> Each {@code byte} in the subarray is converted to a {@code char} as
320 * specified in the method above.
321 *
322 * @deprecated This method does not properly convert bytes into characters.
323 * As of JDK 1.1, the preferred way to do this is via the
324 * {@code String} constructors that take a {@link
325 * java.nio.charset.Charset}, charset name, or that use the platform's
326 * default charset.
327 *
328 * @param ascii
333 *
334 * @param offset
335 * The initial offset
336 * @param count
337 * The length
338 *
339 * @throws IndexOutOfBoundsException
340 * If {@code offset} is negative, {@code count} is negative, or
341 * {@code offset} is greater than {@code ascii.length - count}
342 *
343 * @see #String(byte[], int)
344 * @see #String(byte[], int, int, java.lang.String)
345 * @see #String(byte[], int, int, java.nio.charset.Charset)
346 * @see #String(byte[], int, int)
347 * @see #String(byte[], java.lang.String)
348 * @see #String(byte[], java.nio.charset.Charset)
349 * @see #String(byte[])
350 */
351 @Deprecated
352 public String(byte ascii[], int hibyte, int offset, int count) {
353 checkBoundsOffCount(offset, count, ascii.length);
354 if (count == 0) {
355 this.value = "".value;
356 this.coder = "".coder;
357 return;
358 }
359 if (COMPACT_STRINGS && (byte)hibyte == 0) {
360 this.value = Arrays.copyOfRange(ascii, offset, offset + count);
361 this.coder = LATIN1;
362 } else {
363 hibyte <<= 8;
364 byte[] val = StringUTF16.newBytesFor(count);
365 for (int i = 0; i < count; i++) {
366 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff));
367 }
368 this.value = val;
369 this.coder = UTF16;
370 }
371 }
372
373 /**
374 * Allocates a new {@code String} containing characters constructed from
375 * an array of 8-bit integer values. Each character <i>c</i>in the
376 * resulting string is constructed from the corresponding component
377 * <i>b</i> in the byte array such that:
378 *
379 * <blockquote><pre>
380 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8)
381 * | (<b><i>b</i></b> & 0xff))
382 * </pre></blockquote>
383 *
384 * @deprecated This method does not properly convert bytes into
385 * characters. As of JDK 1.1, the preferred way to do this is via the
386 * {@code String} constructors that take a {@link
387 * java.nio.charset.Charset}, charset name, or that use the platform's
388 * default charset.
389 *
390 * @param ascii
391 * The bytes to be converted to characters
392 *
393 * @param hibyte
394 * The top 8 bits of each 16-bit Unicode code unit
395 *
396 * @see #String(byte[], int, int, java.lang.String)
397 * @see #String(byte[], int, int, java.nio.charset.Charset)
398 * @see #String(byte[], int, int)
399 * @see #String(byte[], java.lang.String)
400 * @see #String(byte[], java.nio.charset.Charset)
401 * @see #String(byte[])
402 */
403 @Deprecated
404 public String(byte ascii[], int hibyte) {
405 this(ascii, hibyte, 0, ascii.length);
406 }
407
408 /**
409 * Constructs a new {@code String} by decoding the specified subarray of
410 * bytes using the specified charset. The length of the new {@code String}
411 * is a function of the charset, and hence may not be equal to the length
412 * of the subarray.
413 *
414 * <p> The behavior of this constructor when the given bytes are not valid
415 * in the given charset is unspecified. The {@link
416 * java.nio.charset.CharsetDecoder} class should be used when more control
417 * over the decoding process is required.
418 *
419 * @param bytes
420 * The bytes to be decoded into characters
421 *
422 * @param offset
423 * The index of the first byte to decode
424 *
425 * @param length
426 * The number of bytes to decode
427
428 * @param charsetName
429 * The name of a supported {@linkplain java.nio.charset.Charset
430 * charset}
431 *
432 * @throws UnsupportedEncodingException
433 * If the named charset is not supported
434 *
435 * @throws IndexOutOfBoundsException
436 * If {@code offset} is negative, {@code length} is negative, or
437 * {@code offset} is greater than {@code bytes.length - length}
438 *
439 * @since 1.1
440 */
441 public String(byte bytes[], int offset, int length, String charsetName)
442 throws UnsupportedEncodingException {
443 if (charsetName == null)
444 throw new NullPointerException("charsetName");
445 checkBoundsOffCount(offset, length, bytes.length);
446 StringCoding.Result ret =
447 StringCoding.decode(charsetName, bytes, offset, length);
448 this.value = ret.value;
449 this.coder = ret.coder;
450 }
451
452 /**
453 * Constructs a new {@code String} by decoding the specified subarray of
454 * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
455 * The length of the new {@code String} is a function of the charset, and
456 * hence may not be equal to the length of the subarray.
457 *
458 * <p> This method always replaces malformed-input and unmappable-character
459 * sequences with this charset's default replacement string. The {@link
460 * java.nio.charset.CharsetDecoder} class should be used when more control
461 * over the decoding process is required.
462 *
463 * @param bytes
464 * The bytes to be decoded into characters
465 *
466 * @param offset
467 * The index of the first byte to decode
468 *
469 * @param length
470 * The number of bytes to decode
471 *
472 * @param charset
473 * The {@linkplain java.nio.charset.Charset charset} to be used to
474 * decode the {@code bytes}
475 *
476 * @throws IndexOutOfBoundsException
477 * If {@code offset} is negative, {@code length} is negative, or
478 * {@code offset} is greater than {@code bytes.length - length}
479 *
480 * @since 1.6
481 */
482 public String(byte bytes[], int offset, int length, Charset charset) {
483 if (charset == null)
484 throw new NullPointerException("charset");
485 checkBoundsOffCount(offset, length, bytes.length);
486 StringCoding.Result ret =
487 StringCoding.decode(charset, bytes, offset, length);
488 this.value = ret.value;
489 this.coder = ret.coder;
490 }
491
492 /**
493 * Constructs a new {@code String} by decoding the specified array of bytes
494 * using the specified {@linkplain java.nio.charset.Charset charset}. The
495 * length of the new {@code String} is a function of the charset, and hence
496 * may not be equal to the length of the byte array.
497 *
498 * <p> The behavior of this constructor when the given bytes are not valid
499 * in the given charset is unspecified. The {@link
500 * java.nio.charset.CharsetDecoder} class should be used when more control
501 * over the decoding process is required.
502 *
503 * @param bytes
504 * The bytes to be decoded into characters
505 *
506 * @param charsetName
507 * The name of a supported {@linkplain java.nio.charset.Charset
508 * charset}
509 *
551 * in the default charset is unspecified. The {@link
552 * java.nio.charset.CharsetDecoder} class should be used when more control
553 * over the decoding process is required.
554 *
555 * @param bytes
556 * The bytes to be decoded into characters
557 *
558 * @param offset
559 * The index of the first byte to decode
560 *
561 * @param length
562 * The number of bytes to decode
563 *
564 * @throws IndexOutOfBoundsException
565 * If {@code offset} is negative, {@code length} is negative, or
566 * {@code offset} is greater than {@code bytes.length - length}
567 *
568 * @since 1.1
569 */
570 public String(byte bytes[], int offset, int length) {
571 checkBoundsOffCount(offset, length, bytes.length);
572 StringCoding.Result ret = StringCoding.decode(bytes, offset, length);
573 this.value = ret.value;
574 this.coder = ret.coder;
575 }
576
577 /**
578 * Constructs a new {@code String} by decoding the specified array of bytes
579 * using the platform's default charset. The length of the new {@code
580 * String} is a function of the charset, and hence may not be equal to the
581 * length of the byte array.
582 *
583 * <p> The behavior of this constructor when the given bytes are not valid
584 * in the default charset is unspecified. The {@link
585 * java.nio.charset.CharsetDecoder} class should be used when more control
586 * over the decoding process is required.
587 *
588 * @param bytes
589 * The bytes to be decoded into characters
590 *
591 * @since 1.1
592 */
593 public String(byte[] bytes) {
594 this(bytes, 0, bytes.length);
595 }
596
597 /**
598 * Allocates a new string that contains the sequence of characters
599 * currently contained in the string buffer argument. The contents of the
600 * string buffer are copied; subsequent modification of the string buffer
601 * does not affect the newly created string.
602 *
603 * @param buffer
604 * A {@code StringBuffer}
605 */
606 public String(StringBuffer buffer) {
607 this(buffer.toString());
608 }
609
610 /**
611 * Allocates a new string that contains the sequence of characters
612 * currently contained in the string builder argument. The contents of the
613 * string builder are copied; subsequent modification of the string builder
614 * does not affect the newly created string.
615 *
616 * <p> This constructor is provided to ease migration to {@code
617 * StringBuilder}. Obtaining a string from a string builder via the {@code
618 * toString} method is likely to run faster and is generally preferred.
619 *
620 * @param builder
621 * A {@code StringBuilder}
622 *
623 * @since 1.5
624 */
625 public String(StringBuilder builder) {
626 this(builder, null);
627 }
628
629 /*
630 * Package private constructor which shares value array for speed.
631 * this constructor is always expected to be called with share==true.
632 * a separate constructor is needed because we already have a public
633 * String(char[]) constructor that makes a copy of the given char[].
634 */
635 // TBD: this is kept for package internal use (Thread/System),
636 // should be removed if they all have a byte[] version
637 String(char[] val, boolean share) {
638 // assert share : "unshared not supported";
639 this(val, 0, val.length, null);
640 }
641
642 /**
643 * Returns the length of this string.
644 * The length is equal to the number of <a href="Character.html#unicode">Unicode
645 * code units</a> in the string.
646 *
647 * @return the length of the sequence of characters represented by this
648 * object.
649 */
650 public int length() {
651 return value.length >> coder();
652 }
653
654 /**
655 * Returns {@code true} if, and only if, {@link #length()} is {@code 0}.
656 *
657 * @return {@code true} if {@link #length()} is {@code 0}, otherwise
658 * {@code false}
659 *
660 * @since 1.6
661 */
662 public boolean isEmpty() {
663 return value.length == 0;
664 }
665
666 /**
667 * Returns the {@code char} value at the
668 * specified index. An index ranges from {@code 0} to
669 * {@code length() - 1}. The first {@code char} value of the sequence
670 * is at index {@code 0}, the next at index {@code 1},
671 * and so on, as for array indexing.
672 *
673 * <p>If the {@code char} value specified by the index is a
674 * <a href="Character.html#unicode">surrogate</a>, the surrogate
675 * value is returned.
676 *
677 * @param index the index of the {@code char} value.
678 * @return the {@code char} value at the specified index of this string.
679 * The first {@code char} value is at index {@code 0}.
680 * @exception IndexOutOfBoundsException if the {@code index}
681 * argument is negative or not less than the length of this
682 * string.
683 */
684 public char charAt(int index) {
685 if (isLatin1()) {
686 return StringLatin1.charAt(value, index);
687 } else {
688 return StringUTF16.charAt(value, index);
689 }
690 }
691
692 /**
693 * Returns the character (Unicode code point) at the specified
694 * index. The index refers to {@code char} values
695 * (Unicode code units) and ranges from {@code 0} to
696 * {@link #length()}{@code - 1}.
697 *
698 * <p> If the {@code char} value specified at the given index
699 * is in the high-surrogate range, the following index is less
700 * than the length of this {@code String}, and the
701 * {@code char} value at the following index is in the
702 * low-surrogate range, then the supplementary code point
703 * corresponding to this surrogate pair is returned. Otherwise,
704 * the {@code char} value at the given index is returned.
705 *
706 * @param index the index to the {@code char} values
707 * @return the code point value of the character at the
708 * {@code index}
709 * @exception IndexOutOfBoundsException if the {@code index}
710 * argument is negative or not less than the length of this
711 * string.
712 * @since 1.5
713 */
714 public int codePointAt(int index) {
715 if (isLatin1()) {
716 checkIndex(index, value.length);
717 return value[index] & 0xff;
718 }
719 int length = value.length >> 1;
720 checkIndex(index, length);
721 return StringUTF16.codePointAt(value, index, length);
722 }
723
724 /**
725 * Returns the character (Unicode code point) before the specified
726 * index. The index refers to {@code char} values
727 * (Unicode code units) and ranges from {@code 1} to {@link
728 * CharSequence#length() length}.
729 *
730 * <p> If the {@code char} value at {@code (index - 1)}
731 * is in the low-surrogate range, {@code (index - 2)} is not
732 * negative, and the {@code char} value at {@code (index -
733 * 2)} is in the high-surrogate range, then the
734 * supplementary code point value of the surrogate pair is
735 * returned. If the {@code char} value at {@code index -
736 * 1} is an unpaired low-surrogate or a high-surrogate, the
737 * surrogate value is returned.
738 *
739 * @param index the index following the code point that should be returned
740 * @return the Unicode code point value before the given index.
741 * @exception IndexOutOfBoundsException if the {@code index}
742 * argument is less than 1 or greater than the length
743 * of this string.
744 * @since 1.5
745 */
746 public int codePointBefore(int index) {
747 int i = index - 1;
748 if (i < 0 || i >= length()) {
749 throw new StringIndexOutOfBoundsException(index);
750 }
751 if (isLatin1()) {
752 return (value[i] & 0xff);
753 }
754 return StringUTF16.codePointBefore(value, index);
755 }
756
757 /**
758 * Returns the number of Unicode code points in the specified text
759 * range of this {@code String}. The text range begins at the
760 * specified {@code beginIndex} and extends to the
761 * {@code char} at index {@code endIndex - 1}. Thus the
762 * length (in {@code char}s) of the text range is
763 * {@code endIndex-beginIndex}. Unpaired surrogates within
764 * the text range count as one code point each.
765 *
766 * @param beginIndex the index to the first {@code char} of
767 * the text range.
768 * @param endIndex the index after the last {@code char} of
769 * the text range.
770 * @return the number of Unicode code points in the specified text
771 * range
772 * @exception IndexOutOfBoundsException if the
773 * {@code beginIndex} is negative, or {@code endIndex}
774 * is larger than the length of this {@code String}, or
775 * {@code beginIndex} is larger than {@code endIndex}.
776 * @since 1.5
777 */
778 public int codePointCount(int beginIndex, int endIndex) {
779 if (beginIndex < 0 || beginIndex > endIndex ||
780 endIndex > length()) {
781 throw new IndexOutOfBoundsException();
782 }
783 if (isLatin1()) {
784 return endIndex - beginIndex;
785 }
786 return StringUTF16.codePointCount(value, beginIndex, endIndex);
787 }
788
789 /**
790 * Returns the index within this {@code String} that is
791 * offset from the given {@code index} by
792 * {@code codePointOffset} code points. Unpaired surrogates
793 * within the text range given by {@code index} and
794 * {@code codePointOffset} count as one code point each.
795 *
796 * @param index the index to be offset
797 * @param codePointOffset the offset in code points
798 * @return the index within this {@code String}
799 * @exception IndexOutOfBoundsException if {@code index}
800 * is negative or larger then the length of this
801 * {@code String}, or if {@code codePointOffset} is positive
802 * and the substring starting with {@code index} has fewer
803 * than {@code codePointOffset} code points,
804 * or if {@code codePointOffset} is negative and the substring
805 * before {@code index} has fewer than the absolute value
806 * of {@code codePointOffset} code points.
807 * @since 1.5
808 */
809 public int offsetByCodePoints(int index, int codePointOffset) {
810 if (index < 0 || index > length()) {
811 throw new IndexOutOfBoundsException();
812 }
813 return Character.offsetByCodePoints(this, index, codePointOffset);
814 }
815
816 /**
817 * Copies characters from this string into the destination character
818 * array.
819 * <p>
820 * The first character to be copied is at index {@code srcBegin};
821 * the last character to be copied is at index {@code srcEnd-1}
822 * (thus the total number of characters to be copied is
823 * {@code srcEnd-srcBegin}). The characters are copied into the
824 * subarray of {@code dst} starting at index {@code dstBegin}
825 * and ending at index:
826 * <blockquote><pre>
827 * dstBegin + (srcEnd-srcBegin) - 1
828 * </pre></blockquote>
829 *
830 * @param srcBegin index of the first character in the string
831 * to copy.
832 * @param srcEnd index after the last character in the string
833 * to copy.
834 * @param dst the destination array.
835 * @param dstBegin the start offset in the destination array.
836 * @exception IndexOutOfBoundsException If any of the following
837 * is true:
838 * <ul><li>{@code srcBegin} is negative.
839 * <li>{@code srcBegin} is greater than {@code srcEnd}
840 * <li>{@code srcEnd} is greater than the length of this
841 * string
842 * <li>{@code dstBegin} is negative
843 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
844 * {@code dst.length}</ul>
845 */
846 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
847 checkBoundsBeginEnd(srcBegin, srcEnd, length());
848 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
849 if (isLatin1()) {
850 StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin);
851 } else {
852 StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin);
853 }
854 }
855
856 /**
857 * Copies characters from this string into the destination byte array. Each
858 * byte receives the 8 low-order bits of the corresponding character. The
859 * eight high-order bits of each character are not copied and do not
860 * participate in the transfer in any way.
861 *
862 * <p> The first character to be copied is at index {@code srcBegin}; the
863 * last character to be copied is at index {@code srcEnd-1}. The total
864 * number of characters to be copied is {@code srcEnd-srcBegin}. The
865 * characters, converted to bytes, are copied into the subarray of {@code
866 * dst} starting at index {@code dstBegin} and ending at index:
867 *
868 * <blockquote><pre>
869 * dstBegin + (srcEnd-srcBegin) - 1
870 * </pre></blockquote>
871 *
872 * @deprecated This method does not properly convert characters into
873 * bytes. As of JDK 1.1, the preferred way to do this is via the
881 *
882 * @param dst
883 * The destination array
884 *
885 * @param dstBegin
886 * The start offset in the destination array
887 *
888 * @throws IndexOutOfBoundsException
889 * If any of the following is true:
890 * <ul>
891 * <li> {@code srcBegin} is negative
892 * <li> {@code srcBegin} is greater than {@code srcEnd}
893 * <li> {@code srcEnd} is greater than the length of this String
894 * <li> {@code dstBegin} is negative
895 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
896 * dst.length}
897 * </ul>
898 */
899 @Deprecated
900 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
901 checkBoundsBeginEnd(srcBegin, srcEnd, length());
902 Objects.requireNonNull(dst);
903 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
904 if (isLatin1()) {
905 StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
906 } else {
907 StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
908 }
909 }
910
911 /**
912 * Encodes this {@code String} into a sequence of bytes using the named
913 * charset, storing the result into a new byte array.
914 *
915 * <p> The behavior of this method when this string cannot be encoded in
916 * the given charset is unspecified. The {@link
917 * java.nio.charset.CharsetEncoder} class should be used when more control
918 * over the encoding process is required.
919 *
920 * @param charsetName
921 * The name of a supported {@linkplain java.nio.charset.Charset
922 * charset}
923 *
924 * @return The resultant byte array
925 *
926 * @throws UnsupportedEncodingException
927 * If the named charset is not supported
928 *
929 * @since 1.1
930 */
931 public byte[] getBytes(String charsetName)
932 throws UnsupportedEncodingException {
933 if (charsetName == null) throw new NullPointerException();
934 return StringCoding.encode(charsetName, coder(), value);
935 }
936
937 /**
938 * Encodes this {@code String} into a sequence of bytes using the given
939 * {@linkplain java.nio.charset.Charset charset}, storing the result into a
940 * new byte array.
941 *
942 * <p> This method always replaces malformed-input and unmappable-character
943 * sequences with this charset's default replacement byte array. The
944 * {@link java.nio.charset.CharsetEncoder} class should be used when more
945 * control over the encoding process is required.
946 *
947 * @param charset
948 * The {@linkplain java.nio.charset.Charset} to be used to encode
949 * the {@code String}
950 *
951 * @return The resultant byte array
952 *
953 * @since 1.6
954 */
955 public byte[] getBytes(Charset charset) {
956 if (charset == null) throw new NullPointerException();
957 return StringCoding.encode(charset, coder(), value);
958 }
959
960 /**
961 * Encodes this {@code String} into a sequence of bytes using the
962 * platform's default charset, storing the result into a new byte array.
963 *
964 * <p> The behavior of this method when this string cannot be encoded in
965 * the default charset is unspecified. The {@link
966 * java.nio.charset.CharsetEncoder} class should be used when more control
967 * over the encoding process is required.
968 *
969 * @return The resultant byte array
970 *
971 * @since 1.1
972 */
973 public byte[] getBytes() {
974 return StringCoding.encode(coder(), value);
975 }
976
977 /**
978 * Compares this string to the specified object. The result is {@code
979 * true} if and only if the argument is not {@code null} and is a {@code
980 * String} object that represents the same sequence of characters as this
981 * object.
982 *
983 * <p>For finer-grained String comparison, refer to
984 * {@link java.text.Collator}.
985 *
986 * @param anObject
987 * The object to compare this {@code String} against
988 *
989 * @return {@code true} if the given object represents a {@code String}
990 * equivalent to this string, {@code false} otherwise
991 *
992 * @see #compareTo(String)
993 * @see #equalsIgnoreCase(String)
994 */
995 public boolean equals(Object anObject) {
996 if (this == anObject) {
997 return true;
998 }
999 if (anObject instanceof String) {
1000 String aString = (String)anObject;
1001 if (coder() == aString.coder()) {
1002 return isLatin1() ? StringLatin1.equals(value, aString.value)
1003 : StringUTF16.equals(value, aString.value);
1004 }
1005 }
1006 return false;
1007 }
1008
1009 /**
1010 * Compares this string to the specified {@code StringBuffer}. The result
1011 * is {@code true} if and only if this {@code String} represents the same
1012 * sequence of characters as the specified {@code StringBuffer}. This method
1013 * synchronizes on the {@code StringBuffer}.
1014 *
1015 * <p>For finer-grained String comparison, refer to
1016 * {@link java.text.Collator}.
1017 *
1018 * @param sb
1019 * The {@code StringBuffer} to compare this {@code String} against
1020 *
1021 * @return {@code true} if this {@code String} represents the same
1022 * sequence of characters as the specified {@code StringBuffer},
1023 * {@code false} otherwise
1024 *
1025 * @since 1.4
1026 */
1027 public boolean contentEquals(StringBuffer sb) {
1028 return contentEquals((CharSequence)sb);
1029 }
1030
1031 private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
1032 int len = length();
1033 if (len != sb.length()) {
1034 return false;
1035 }
1036 byte v1[] = value;
1037 byte v2[] = sb.getValue();
1038 if (coder() == sb.getCoder()) {
1039 int n = v1.length;
1040 for (int i = 0; i < n; i++) {
1041 if (v1[i] != v2[i]) {
1042 return false;
1043 }
1044 }
1045 } else {
1046 if (!isLatin1()) { // utf16 str and latin1 abs can never be "equal"
1047 return false;
1048 }
1049 for (int i = 0; i < len; i++) {
1050 if ((char)(v1[i] & 0xff) != StringUTF16.getChar(v2, i)) {
1051 return false;
1052 }
1053 }
1054 }
1055 return true;
1056 }
1057
1058 /**
1059 * Compares this string to the specified {@code CharSequence}. The
1060 * result is {@code true} if and only if this {@code String} represents the
1061 * same sequence of char values as the specified sequence. Note that if the
1062 * {@code CharSequence} is a {@code StringBuffer} then the method
1063 * synchronizes on it.
1064 *
1065 * <p>For finer-grained String comparison, refer to
1066 * {@link java.text.Collator}.
1067 *
1068 * @param cs
1069 * The sequence to compare this {@code String} against
1070 *
1071 * @return {@code true} if this {@code String} represents the same
1072 * sequence of char values as the specified sequence, {@code
1073 * false} otherwise
1074 *
1075 * @since 1.5
1076 */
1077 public boolean contentEquals(CharSequence cs) {
1078 // Argument is a StringBuffer, StringBuilder
1079 if (cs instanceof AbstractStringBuilder) {
1080 if (cs instanceof StringBuffer) {
1081 synchronized(cs) {
1082 return nonSyncContentEquals((AbstractStringBuilder)cs);
1083 }
1084 } else {
1085 return nonSyncContentEquals((AbstractStringBuilder)cs);
1086 }
1087 }
1088 // Argument is a String
1089 if (cs instanceof String) {
1090 return equals(cs);
1091 }
1092 // Argument is a generic CharSequence
1093 int n = cs.length();
1094 if (n != length()) {
1095 return false;
1096 }
1097 byte[] val = this.value;
1098 if (isLatin1()) {
1099 for (int i = 0; i < n; i++) {
1100 if ((val[i] & 0xff) != cs.charAt(i)) {
1101 return false;
1102 }
1103 }
1104 } else {
1105 for (int i = 0; i < n; i++) {
1106 if (StringUTF16.getChar(val, i) != cs.charAt(i)) {
1107 return false;
1108 }
1109 }
1110 }
1111 return true;
1112 }
1113
1114 /**
1115 * Compares this {@code String} to another {@code String}, ignoring case
1116 * considerations. Two strings are considered equal ignoring case if they
1117 * are of the same length and corresponding characters in the two strings
1118 * are equal ignoring case.
1119 *
1120 * <p> Two characters {@code c1} and {@code c2} are considered the same
1121 * ignoring case if at least one of the following is true:
1122 * <ul>
1123 * <li> The two characters are the same (as compared by the
1124 * {@code ==} operator)
1125 * <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))}
1126 * on each character produces the same result
1127 * </ul>
1128 *
1129 * <p>Note that this method does <em>not</em> take locale into account, and
1130 * will result in unsatisfactory results for certain locales. The
1131 * {@link java.text.Collator} class provides locale-sensitive comparison.
1132 *
1133 * @param anotherString
1134 * The {@code String} to compare this {@code String} against
1135 *
1136 * @return {@code true} if the argument is not {@code null} and it
1137 * represents an equivalent {@code String} ignoring case; {@code
1138 * false} otherwise
1139 *
1140 * @see #equals(Object)
1141 */
1142 public boolean equalsIgnoreCase(String anotherString) {
1143 return (this == anotherString) ? true
1144 : (anotherString != null)
1145 && (anotherString.length() == length())
1146 && regionMatches(true, 0, anotherString, 0, length());
1147 }
1148
1149 /**
1150 * Compares two strings lexicographically.
1151 * The comparison is based on the Unicode value of each character in
1152 * the strings. The character sequence represented by this
1153 * {@code String} object is compared lexicographically to the
1154 * character sequence represented by the argument string. The result is
1155 * a negative integer if this {@code String} object
1156 * lexicographically precedes the argument string. The result is a
1157 * positive integer if this {@code String} object lexicographically
1158 * follows the argument string. The result is zero if the strings
1159 * are equal; {@code compareTo} returns {@code 0} exactly when
1160 * the {@link #equals(Object)} method would return {@code true}.
1161 * <p>
1162 * This is the definition of lexicographic ordering. If two strings are
1163 * different, then either they have different characters at some index
1164 * that is a valid index for both strings, or their lengths are different,
1165 * or both. If they have different characters at one or more index
1166 * positions, let <i>k</i> be the smallest such index; then the string
1173 * this.charAt(k)-anotherString.charAt(k)
1174 * </pre></blockquote>
1175 * If there is no index position at which they differ, then the shorter
1176 * string lexicographically precedes the longer string. In this case,
1177 * {@code compareTo} returns the difference of the lengths of the
1178 * strings -- that is, the value:
1179 * <blockquote><pre>
1180 * this.length()-anotherString.length()
1181 * </pre></blockquote>
1182 *
1183 * <p>For finer-grained String comparison, refer to
1184 * {@link java.text.Collator}.
1185 *
1186 * @param anotherString the {@code String} to be compared.
1187 * @return the value {@code 0} if the argument string is equal to
1188 * this string; a value less than {@code 0} if this string
1189 * is lexicographically less than the string argument; and a
1190 * value greater than {@code 0} if this string is
1191 * lexicographically greater than the string argument.
1192 */
1193 public int compareTo(String anotherString) {
1194 byte v1[] = value;
1195 byte v2[] = anotherString.value;
1196 if (coder() == anotherString.coder()) {
1197 return isLatin1() ? StringLatin1.compareTo(v1, v2)
1198 : StringUTF16.compareTo(v1, v2);
1199 }
1200 return isLatin1() ? StringLatin1.compareToUTF16(v1, v2)
1201 : StringUTF16.compareToLatin1(v1, v2);
1202 }
1203
1204 /**
1205 * A Comparator that orders {@code String} objects as by
1206 * {@code compareToIgnoreCase}. This comparator is serializable.
1207 * <p>
1208 * Note that this Comparator does <em>not</em> take locale into account,
1209 * and will result in an unsatisfactory ordering for certain locales.
1210 * The {@link java.text.Collator} class provides locale-sensitive comparison.
1211 *
1212 * @see java.text.Collator
1213 * @since 1.2
1214 */
1215 public static final Comparator<String> CASE_INSENSITIVE_ORDER
1216 = new CaseInsensitiveComparator();
1217 private static class CaseInsensitiveComparator
1218 implements Comparator<String>, java.io.Serializable {
1219 // use serialVersionUID from JDK 1.2.2 for interoperability
1220 private static final long serialVersionUID = 8575799808933029326L;
1221
1222 public int compare(String s1, String s2) {
1223 byte v1[] = s1.value;
1224 byte v2[] = s2.value;
1225 int n1 = s1.length();
1226 int n2 = s2.length();
1227 boolean s1IsLatin1 = s1.isLatin1();
1228 boolean s2IsLatin1 = s2.isLatin1();
1229 int min = Math.min(n1, n2);
1230 for (int i = 0; i < min; i++) {
1231 char c1 = s1IsLatin1 ? StringLatin1.getChar(v1, i)
1232 : StringUTF16.getChar(v1, i);
1233 char c2 = s2IsLatin1 ? StringLatin1.getChar(v2, i)
1234 : StringUTF16.getChar(v2, i);
1235 if (c1 != c2) {
1236 c1 = Character.toUpperCase(c1);
1237 c2 = Character.toUpperCase(c2);
1238 if (c1 != c2) {
1239 c1 = Character.toLowerCase(c1);
1240 c2 = Character.toLowerCase(c2);
1241 if (c1 != c2) {
1242 // No overflow because of numeric promotion
1243 return c1 - c2;
1244 }
1245 }
1246 }
1247 }
1248 return n1 - n2;
1249 }
1250
1251 /** Replaces the de-serialized object. */
1252 private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
1253 }
1254
1293 * <li>{@code ooffset+len} is greater than the length of the other
1294 * argument.
1295 * <li>There is some nonnegative integer <i>k</i> less than {@code len}
1296 * such that:
1297 * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + }
1298 * <i>k</i>{@code )}
1299 * </ul>
1300 *
1301 * <p>Note that this method does <em>not</em> take locale into account. The
1302 * {@link java.text.Collator} class provides locale-sensitive comparison.
1303 *
1304 * @param toffset the starting offset of the subregion in this string.
1305 * @param other the string argument.
1306 * @param ooffset the starting offset of the subregion in the string
1307 * argument.
1308 * @param len the number of characters to compare.
1309 * @return {@code true} if the specified subregion of this string
1310 * exactly matches the specified subregion of the string argument;
1311 * {@code false} otherwise.
1312 */
1313 public boolean regionMatches(int toffset, String other, int ooffset, int len) {
1314 byte tv[] = value;
1315 byte ov[] = other.value;
1316 // Note: toffset, ooffset, or len might be near -1>>>1.
1317 if ((ooffset < 0) || (toffset < 0) ||
1318 (toffset > (long)length() - len) ||
1319 (ooffset > (long)other.length() - len)) {
1320 return false;
1321 }
1322 if (coder() == other.coder()) {
1323 if (!isLatin1() && (len > 0)) {
1324 toffset = toffset << 1;
1325 ooffset = ooffset << 1;
1326 len = len << 1;
1327 }
1328 while (len-- > 0) {
1329 if (tv[toffset++] != ov[ooffset++]) {
1330 return false;
1331 }
1332 }
1333 } else {
1334 if (coder() == LATIN1) {
1335 while (len-- > 0) {
1336 if (StringLatin1.getChar(tv, toffset++) !=
1337 StringUTF16.getChar(ov, ooffset++)) {
1338 return false;
1339 }
1340 }
1341 } else {
1342 while (len-- > 0) {
1343 if (StringUTF16.getChar(tv, toffset++) !=
1344 StringLatin1.getChar(ov, ooffset++)) {
1345 return false;
1346 }
1347 }
1348 }
1349 }
1350 return true;
1351 }
1352
1353 /**
1354 * Tests if two string regions are equal.
1355 * <p>
1356 * A substring of this {@code String} object is compared to a substring
1357 * of the argument {@code other}. The result is {@code true} if these
1358 * substrings represent character sequences that are the same, ignoring
1359 * case if and only if {@code ignoreCase} is true. The substring of
1360 * this {@code String} object to be compared begins at index
1361 * {@code toffset} and has length {@code len}. The substring of
1362 * {@code other} to be compared begins at index {@code ooffset} and
1363 * has length {@code len}. The result is {@code false} if and only if
1364 * at least one of the following is true:
1365 * <ul><li>{@code toffset} is negative.
1366 * <li>{@code ooffset} is negative.
1367 * <li>{@code toffset+len} is greater than the length of this
1368 * {@code String} object.
1369 * <li>{@code ooffset+len} is greater than the length of the other
1385 * and will result in unsatisfactory results for certain locales when
1386 * {@code ignoreCase} is {@code true}. The {@link java.text.Collator} class
1387 * provides locale-sensitive comparison.
1388 *
1389 * @param ignoreCase if {@code true}, ignore case when comparing
1390 * characters.
1391 * @param toffset the starting offset of the subregion in this
1392 * string.
1393 * @param other the string argument.
1394 * @param ooffset the starting offset of the subregion in the string
1395 * argument.
1396 * @param len the number of characters to compare.
1397 * @return {@code true} if the specified subregion of this string
1398 * matches the specified subregion of the string argument;
1399 * {@code false} otherwise. Whether the matching is exact
1400 * or case insensitive depends on the {@code ignoreCase}
1401 * argument.
1402 */
1403 public boolean regionMatches(boolean ignoreCase, int toffset,
1404 String other, int ooffset, int len) {
1405 if (!ignoreCase) {
1406 return regionMatches(toffset, other, ooffset, len);
1407 }
1408 // Note: toffset, ooffset, or len might be near -1>>>1.
1409 if ((ooffset < 0) || (toffset < 0)
1410 || (toffset > (long)length() - len)
1411 || (ooffset > (long)other.length() - len)) {
1412 return false;
1413 }
1414 byte tv[] = value;
1415 byte ov[] = other.value;
1416 if (coder() == other.coder()) {
1417 return isLatin1()
1418 ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len)
1419 : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len);
1420 }
1421 return isLatin1()
1422 ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len)
1423 : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len);
1424 }
1425
1426 /**
1427 * Tests if the substring of this string beginning at the
1428 * specified index starts with the specified prefix.
1429 *
1430 * @param prefix the prefix.
1431 * @param toffset where to begin looking in this string.
1432 * @return {@code true} if the character sequence represented by the
1433 * argument is a prefix of the substring of this object starting
1434 * at index {@code toffset}; {@code false} otherwise.
1435 * The result is {@code false} if {@code toffset} is
1436 * negative or greater than the length of this
1437 * {@code String} object; otherwise the result is the same
1438 * as the result of the expression
1439 * <pre>
1440 * this.substring(toffset).startsWith(prefix)
1441 * </pre>
1442 */
1443 public boolean startsWith(String prefix, int toffset) {
1444 // Note: toffset might be near -1>>>1.
1445 if (toffset < 0 || toffset > length() - prefix.length()) {
1446 return false;
1447 }
1448 byte ta[] = value;
1449 byte pa[] = prefix.value;
1450 int po = 0;
1451 int pc = pa.length;
1452 if (coder() == prefix.coder()) {
1453 int to = isLatin1() ? toffset : toffset << 1;
1454 while (po < pc) {
1455 if (ta[to++] != pa[po++]) {
1456 return false;
1457 }
1458 }
1459 } else {
1460 if (isLatin1()) { // && pcoder == UTF16
1461 return false;
1462 }
1463 // coder == UTF16 && pcoder == LATIN1)
1464 while (po < pc) {
1465 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
1466 return false;
1467 }
1468 }
1469 }
1470 return true;
1471 }
1472
1473 /**
1474 * Tests if this string starts with the specified prefix.
1475 *
1476 * @param prefix the prefix.
1477 * @return {@code true} if the character sequence represented by the
1478 * argument is a prefix of the character sequence represented by
1479 * this string; {@code false} otherwise.
1480 * Note also that {@code true} will be returned if the
1481 * argument is an empty string or is equal to this
1482 * {@code String} object as determined by the
1483 * {@link #equals(Object)} method.
1484 * @since 1.0
1485 */
1486 public boolean startsWith(String prefix) {
1487 return startsWith(prefix, 0);
1488 }
1489
1490 /**
1491 * Tests if this string ends with the specified suffix.
1492 *
1493 * @param suffix the suffix.
1494 * @return {@code true} if the character sequence represented by the
1495 * argument is a suffix of the character sequence represented by
1496 * this object; {@code false} otherwise. Note that the
1497 * result will be {@code true} if the argument is the
1498 * empty string or is equal to this {@code String} object
1499 * as determined by the {@link #equals(Object)} method.
1500 */
1501 public boolean endsWith(String suffix) {
1502 return startsWith(suffix, length() - suffix.length());
1503 }
1504
1505 /**
1506 * Returns a hash code for this string. The hash code for a
1507 * {@code String} object is computed as
1508 * <blockquote><pre>
1509 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
1510 * </pre></blockquote>
1511 * using {@code int} arithmetic, where {@code s[i]} is the
1512 * <i>i</i>th character of the string, {@code n} is the length of
1513 * the string, and {@code ^} indicates exponentiation.
1514 * (The hash value of the empty string is zero.)
1515 *
1516 * @return a hash code value for this object.
1517 */
1518 public int hashCode() {
1519 if (hash == 0 && value.length > 0) {
1520 hash = isLatin1() ? StringLatin1.hashCode(value)
1521 : StringUTF16.hashCode(value);
1522 }
1523 return hash;
1524 }
1525
1526 /**
1527 * Returns the index within this string of the first occurrence of
1528 * the specified character. If a character with value
1529 * {@code ch} occurs in the character sequence represented by
1530 * this {@code String} object, then the index (in Unicode
1531 * code units) of the first such occurrence is returned. For
1532 * values of {@code ch} in the range from 0 to 0xFFFF
1533 * (inclusive), this is the smallest value <i>k</i> such that:
1534 * <blockquote><pre>
1535 * this.charAt(<i>k</i>) == ch
1536 * </pre></blockquote>
1537 * is true. For other values of {@code ch}, it is the
1538 * smallest value <i>k</i> such that:
1539 * <blockquote><pre>
1540 * this.codePointAt(<i>k</i>) == ch
1541 * </pre></blockquote>
1542 * is true. In either case, if no such character occurs in this
1543 * string, then {@code -1} is returned.
1574 * {@code -1} is returned.
1575 *
1576 * <p>
1577 * There is no restriction on the value of {@code fromIndex}. If it
1578 * is negative, it has the same effect as if it were zero: this entire
1579 * string may be searched. If it is greater than the length of this
1580 * string, it has the same effect as if it were equal to the length of
1581 * this string: {@code -1} is returned.
1582 *
1583 * <p>All indices are specified in {@code char} values
1584 * (Unicode code units).
1585 *
1586 * @param ch a character (Unicode code point).
1587 * @param fromIndex the index to start the search from.
1588 * @return the index of the first occurrence of the character in the
1589 * character sequence represented by this object that is greater
1590 * than or equal to {@code fromIndex}, or {@code -1}
1591 * if the character does not occur.
1592 */
1593 public int indexOf(int ch, int fromIndex) {
1594 return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex)
1595 : StringUTF16.indexOf(value, ch, fromIndex);
1596 }
1597
1598 /**
1599 * Returns the index within this string of the last occurrence of
1600 * the specified character. For values of {@code ch} in the
1601 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
1602 * units) returned is the largest value <i>k</i> such that:
1603 * <blockquote><pre>
1604 * this.charAt(<i>k</i>) == ch
1605 * </pre></blockquote>
1606 * is true. For other values of {@code ch}, it is the
1607 * largest value <i>k</i> such that:
1608 * <blockquote><pre>
1609 * this.codePointAt(<i>k</i>) == ch
1610 * </pre></blockquote>
1611 * is true. In either case, if no such character occurs in this
1612 * string, then {@code -1} is returned. The
1613 * {@code String} is searched backwards starting at the last
1614 * character.
1615 *
1616 * @param ch a character (Unicode code point).
1617 * @return the index of the last occurrence of the character in the
1618 * character sequence represented by this object, or
1619 * {@code -1} if the character does not occur.
1620 */
1621 public int lastIndexOf(int ch) {
1622 return lastIndexOf(ch, length() - 1);
1623 }
1624
1625 /**
1626 * Returns the index within this string of the last occurrence of
1627 * the specified character, searching backward starting at the
1628 * specified index. For values of {@code ch} in the range
1629 * from 0 to 0xFFFF (inclusive), the index returned is the largest
1630 * value <i>k</i> such that:
1631 * <blockquote><pre>
1632 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex)
1633 * </pre></blockquote>
1634 * is true. For other values of {@code ch}, it is the
1635 * largest value <i>k</i> such that:
1636 * <blockquote><pre>
1637 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex)
1638 * </pre></blockquote>
1639 * is true. In either case, if no such character occurs in this
1640 * string at or before position {@code fromIndex}, then
1641 * {@code -1} is returned.
1642 *
1643 * <p>All indices are specified in {@code char} values
1644 * (Unicode code units).
1645 *
1646 * @param ch a character (Unicode code point).
1647 * @param fromIndex the index to start the search from. There is no
1648 * restriction on the value of {@code fromIndex}. If it is
1649 * greater than or equal to the length of this string, it has
1650 * the same effect as if it were equal to one less than the
1651 * length of this string: this entire string may be searched.
1652 * If it is negative, it has the same effect as if it were -1:
1653 * -1 is returned.
1654 * @return the index of the last occurrence of the character in the
1655 * character sequence represented by this object that is less
1656 * than or equal to {@code fromIndex}, or {@code -1}
1657 * if the character does not occur before that point.
1658 */
1659 public int lastIndexOf(int ch, int fromIndex) {
1660 return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex)
1661 : StringUTF16.lastIndexOf(value, ch, fromIndex);
1662 }
1663
1664 /**
1665 * Returns the index within this string of the first occurrence of the
1666 * specified substring.
1667 *
1668 * <p>The returned index is the smallest value {@code k} for which:
1669 * <pre>{@code
1670 * this.startsWith(str, k)
1671 * }</pre>
1672 * If no such value of {@code k} exists, then {@code -1} is returned.
1673 *
1674 * @param str the substring to search for.
1675 * @return the index of the first occurrence of the specified substring,
1676 * or {@code -1} if there is no such occurrence.
1677 */
1678 public int indexOf(String str) {
1679 if (coder() == str.coder()) {
1680 return isLatin1() ? StringLatin1.indexOf(value, str.value)
1681 : StringUTF16.indexOf(value, str.value);
1682 }
1683 if (coder() == LATIN1) { // str.coder == UTF16
1684 return -1;
1685 }
1686 return StringUTF16.indexOfLatin1(value, str.value);
1687 }
1688
1689 /**
1690 * Returns the index within this string of the first occurrence of the
1691 * specified substring, starting at the specified index.
1692 *
1693 * <p>The returned index is the smallest value {@code k} for which:
1694 * <pre>{@code
1695 * k >= Math.min(fromIndex, this.length()) &&
1696 * this.startsWith(str, k)
1697 * }</pre>
1698 * If no such value of {@code k} exists, then {@code -1} is returned.
1699 *
1700 * @param str the substring to search for.
1701 * @param fromIndex the index from which to start the search.
1702 * @return the index of the first occurrence of the specified substring,
1703 * starting at the specified index,
1704 * or {@code -1} if there is no such occurrence.
1705 */
1706 public int indexOf(String str, int fromIndex) {
1707 return indexOf(value, coder(), length(), str, fromIndex);
1708 }
1709
1710 /**
1711 * Code shared by String and AbstractStringBuilder to do searches. The
1712 * source is the character array being searched, and the target
1713 * is the string being searched for.
1714 *
1715 * @param src the characters being searched.
1716 * @param srcCoder the coder of the source string.
1717 * @param srcCount length of the source string.
1718 * @param tgtStr the characters being searched for.
1719 * @param fromIndex the index to begin searching from.
1720 */
1721 static int indexOf(byte[] src, byte srcCoder, int srcCount,
1722 String tgtStr, int fromIndex) {
1723
1724 byte[] tgt = tgtStr.value;
1725 byte tgtCoder = tgtStr.coder();
1726 int tgtCount = tgtStr.length();
1727
1728 if (fromIndex >= srcCount) {
1729 return (tgtCount == 0 ? srcCount : -1);
1730 }
1731 if (fromIndex < 0) {
1732 fromIndex = 0;
1733 }
1734 if (tgtCount == 0) {
1735 return fromIndex;
1736 }
1737 if (srcCoder == tgtCoder) {
1738 return srcCoder == LATIN1
1739 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex)
1740 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex);
1741 }
1742 if (srcCoder == LATIN1) { // && tgtCoder == UTF16
1743 return -1;
1744 }
1745 // srcCoder == UTF16 && tgtCoder == LATIN1) {
1746 return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
1747 }
1748
1749 /**
1750 * Returns the index within this string of the last occurrence of the
1751 * specified substring. The last occurrence of the empty string ""
1752 * is considered to occur at the index value {@code this.length()}.
1753 *
1754 * <p>The returned index is the largest value {@code k} for which:
1755 * <pre>{@code
1756 * this.startsWith(str, k)
1757 * }</pre>
1758 * If no such value of {@code k} exists, then {@code -1} is returned.
1759 *
1760 * @param str the substring to search for.
1761 * @return the index of the last occurrence of the specified substring,
1762 * or {@code -1} if there is no such occurrence.
1763 */
1764 public int lastIndexOf(String str) {
1765 return lastIndexOf(str, length());
1766 }
1767
1768 /**
1769 * Returns the index within this string of the last occurrence of the
1770 * specified substring, searching backward starting at the specified index.
1771 *
1772 * <p>The returned index is the largest value {@code k} for which:
1773 * <pre>{@code
1774 * k <= Math.min(fromIndex, this.length()) &&
1775 * this.startsWith(str, k)
1776 * }</pre>
1777 * If no such value of {@code k} exists, then {@code -1} is returned.
1778 *
1779 * @param str the substring to search for.
1780 * @param fromIndex the index to start the search from.
1781 * @return the index of the last occurrence of the specified substring,
1782 * searching backward from the specified index,
1783 * or {@code -1} if there is no such occurrence.
1784 */
1785 public int lastIndexOf(String str, int fromIndex) {
1786 return lastIndexOf(value, coder(), length(), str, fromIndex);
1787 }
1788
1789 /**
1790 * Code shared by String and AbstractStringBuilder to do searches. The
1791 * source is the character array being searched, and the target
1792 * is the string being searched for.
1793 *
1794 * @param src the characters being searched.
1795 * @param srcCoder coder handles the mapping between bytes/chars
1796 * @param srcCount count of the source string.
1797 * @param tgt the characters being searched for.
1798 * @param fromIndex the index to begin searching from.
1799 */
1800 static int lastIndexOf(byte[] src, byte srcCoder, int srcCount,
1801 String tgtStr, int fromIndex) {
1802 byte[] tgt = tgtStr.value;
1803 byte tgtCoder = tgtStr.coder();
1804 int tgtCount = tgtStr.length();
1805 /*
1806 * Check arguments; return immediately where possible. For
1807 * consistency, don't check for null str.
1808 */
1809 int rightIndex = srcCount - tgtCount;
1810 if (fromIndex < 0) {
1811 return -1;
1812 }
1813 if (fromIndex > rightIndex) {
1814 fromIndex = rightIndex;
1815 }
1816 /* Empty string always matches. */
1817 if (tgtCount == 0) {
1818 return fromIndex;
1819 }
1820 if (srcCoder == tgtCoder) {
1821 return srcCoder == LATIN1
1822 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex)
1823 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex);
1824 }
1825 if (srcCoder == LATIN1) { // && tgtCoder == UTF16
1826 return -1;
1827 }
1828 // srcCoder == UTF16 && tgtCoder == LATIN1
1829 int min = tgtCount - 1;
1830 int i = min + fromIndex;
1831 int strLastIndex = tgtCount - 1;
1832
1833 char strLastChar = (char)(tgt[strLastIndex] & 0xff);
1834 startSearchForLastChar:
1835 while (true) {
1836 while (i >= min && StringUTF16.getChar(src, i) != strLastChar) {
1837 i--;
1838 }
1839 if (i < min) {
1840 return -1;
1841 }
1842 int j = i - 1;
1843 int start = j - strLastIndex;
1844 int k = strLastIndex - 1;
1845 while (j > start) {
1846 if (StringUTF16.getChar(src, j--) != (tgt[k--] & 0xff)) {
1847 i--;
1848 continue startSearchForLastChar;
1849 }
1850 }
1851 return start + 1;
1852 }
1853 }
1854
1855 /**
1856 * Returns a string that is a substring of this string. The
1857 * substring begins with the character at the specified index and
1858 * extends to the end of this string. <p>
1859 * Examples:
1860 * <blockquote><pre>
1861 * "unhappy".substring(2) returns "happy"
1862 * "Harbison".substring(3) returns "bison"
1863 * "emptiness".substring(9) returns "" (an empty string)
1864 * </pre></blockquote>
1865 *
1866 * @param beginIndex the beginning index, inclusive.
1867 * @return the specified substring.
1868 * @exception IndexOutOfBoundsException if
1869 * {@code beginIndex} is negative or larger than the
1870 * length of this {@code String} object.
1871 */
1872 public String substring(int beginIndex) {
1873 if (beginIndex < 0) {
1874 throw new StringIndexOutOfBoundsException(beginIndex);
1875 }
1876 int subLen = length() - beginIndex;
1877 if (subLen < 0) {
1878 throw new StringIndexOutOfBoundsException(subLen);
1879 }
1880 if (beginIndex == 0) {
1881 return this;
1882 }
1883 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
1884 : StringUTF16.newString(value, beginIndex, subLen);
1885 }
1886
1887 /**
1888 * Returns a string that is a substring of this string. The
1889 * substring begins at the specified {@code beginIndex} and
1890 * extends to the character at index {@code endIndex - 1}.
1891 * Thus the length of the substring is {@code endIndex-beginIndex}.
1892 * <p>
1893 * Examples:
1894 * <blockquote><pre>
1895 * "hamburger".substring(4, 8) returns "urge"
1896 * "smiles".substring(1, 5) returns "mile"
1897 * </pre></blockquote>
1898 *
1899 * @param beginIndex the beginning index, inclusive.
1900 * @param endIndex the ending index, exclusive.
1901 * @return the specified substring.
1902 * @exception IndexOutOfBoundsException if the
1903 * {@code beginIndex} is negative, or
1904 * {@code endIndex} is larger than the length of
1905 * this {@code String} object, or
1906 * {@code beginIndex} is larger than
1907 * {@code endIndex}.
1908 */
1909 public String substring(int beginIndex, int endIndex) {
1910 int length = length();
1911 checkBoundsBeginEnd(beginIndex, endIndex, length);
1912 int subLen = endIndex - beginIndex;
1913 if (beginIndex == 0 && endIndex == length) {
1914 return this;
1915 }
1916 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
1917 : StringUTF16.newString(value, beginIndex, subLen);
1918 }
1919
1920 /**
1921 * Returns a character sequence that is a subsequence of this sequence.
1922 *
1923 * <p> An invocation of this method of the form
1924 *
1925 * <blockquote><pre>
1926 * str.subSequence(begin, end)</pre></blockquote>
1927 *
1928 * behaves in exactly the same way as the invocation
1929 *
1930 * <blockquote><pre>
1931 * str.substring(begin, end)</pre></blockquote>
1932 *
1933 * @apiNote
1934 * This method is defined so that the {@code String} class can implement
1935 * the {@link CharSequence} interface.
1936 *
1937 * @param beginIndex the begin index, inclusive.
1954 * Concatenates the specified string to the end of this string.
1955 * <p>
1956 * If the length of the argument string is {@code 0}, then this
1957 * {@code String} object is returned. Otherwise, a
1958 * {@code String} object is returned that represents a character
1959 * sequence that is the concatenation of the character sequence
1960 * represented by this {@code String} object and the character
1961 * sequence represented by the argument string.<p>
1962 * Examples:
1963 * <blockquote><pre>
1964 * "cares".concat("s") returns "caress"
1965 * "to".concat("get").concat("her") returns "together"
1966 * </pre></blockquote>
1967 *
1968 * @param str the {@code String} that is concatenated to the end
1969 * of this {@code String}.
1970 * @return a string that represents the concatenation of this object's
1971 * characters followed by the string argument's characters.
1972 */
1973 public String concat(String str) {
1974 int olen = str.length();
1975 if (olen == 0) {
1976 return this;
1977 }
1978 if (coder() == str.coder()) {
1979 byte[] val = this.value;
1980 byte[] oval = str.value;
1981 int len = val.length + oval.length;
1982 byte[] buf = Arrays.copyOf(val, len);
1983 System.arraycopy(oval, 0, buf, val.length, oval.length);
1984 return new String(buf, coder);
1985 }
1986 int len = length();
1987 byte[] buf = StringUTF16.newBytesFor(len + olen);
1988 getBytes(buf, 0, UTF16);
1989 str.getBytes(buf, len, UTF16);
1990 return new String(buf, UTF16);
1991 }
1992
1993 /**
1994 * Returns a string resulting from replacing all occurrences of
1995 * {@code oldChar} in this string with {@code newChar}.
1996 * <p>
1997 * If the character {@code oldChar} does not occur in the
1998 * character sequence represented by this {@code String} object,
1999 * then a reference to this {@code String} object is returned.
2000 * Otherwise, a {@code String} object is returned that
2001 * represents a character sequence identical to the character sequence
2002 * represented by this {@code String} object, except that every
2003 * occurrence of {@code oldChar} is replaced by an occurrence
2004 * of {@code newChar}.
2005 * <p>
2006 * Examples:
2007 * <blockquote><pre>
2008 * "mesquite in your cellar".replace('e', 'o')
2009 * returns "mosquito in your collar"
2010 * "the war of baronets".replace('r', 'y')
2011 * returns "the way of bayonets"
2012 * "sparring with a purple porpoise".replace('p', 't')
2013 * returns "starring with a turtle tortoise"
2014 * "JonL".replace('q', 'x') returns "JonL" (no change)
2015 * </pre></blockquote>
2016 *
2017 * @param oldChar the old character.
2018 * @param newChar the new character.
2019 * @return a string derived from this string by replacing every
2020 * occurrence of {@code oldChar} with {@code newChar}.
2021 */
2022 public String replace(char oldChar, char newChar) {
2023 if (oldChar != newChar) {
2024 String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
2025 : StringUTF16.replace(value, oldChar, newChar);
2026 if (ret != null) {
2027 return ret;
2028 }
2029 }
2030 return this;
2031 }
2032
2033 /**
2034 * Tells whether or not this string matches the given <a
2035 * href="../util/regex/Pattern.html#sum">regular expression</a>.
2036 *
2037 * <p> An invocation of this method of the form
2038 * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the
2039 * same result as the expression
2040 *
2041 * <blockquote>
2042 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence)
2043 * matches(<i>regex</i>, <i>str</i>)}
2044 * </blockquote>
2045 *
2046 * @param regex
2047 * the regular expression to which this string is to be matched
2159 * @since 1.4
2160 * @spec JSR-51
2161 */
2162 public String replaceAll(String regex, String replacement) {
2163 return Pattern.compile(regex).matcher(this).replaceAll(replacement);
2164 }
2165
2166 /**
2167 * Replaces each substring of this string that matches the literal target
2168 * sequence with the specified literal replacement sequence. The
2169 * replacement proceeds from the beginning of the string to the end, for
2170 * example, replacing "aa" with "b" in the string "aaa" will result in
2171 * "ba" rather than "ab".
2172 *
2173 * @param target The sequence of char values to be replaced
2174 * @param replacement The replacement sequence of char values
2175 * @return The resulting string
2176 * @since 1.5
2177 */
2178 public String replace(CharSequence target, CharSequence replacement) {
2179 String tgtStr = target.toString();
2180 String replStr = replacement.toString();
2181 int j = indexOf(tgtStr);
2182 if (j < 0) {
2183 return this;
2184 }
2185 int tgtLen = tgtStr.length();
2186 int tgtLen1 = Math.max(tgtLen, 1);
2187 int thisLen = length();
2188
2189 int newLenHint = thisLen - tgtLen + replStr.length();
2190 if (newLenHint < 0) {
2191 throw new OutOfMemoryError();
2192 }
2193 StringBuilder sb = new StringBuilder(newLenHint);
2194 int i = 0;
2195 do {
2196 sb.append(this, i, j).append(replStr);
2197 i = j + tgtLen;
2198 } while (j < thisLen && (j = indexOf(tgtStr, j + tgtLen1)) > 0);
2199 return sb.append(this, i, thisLen).toString();
2200 }
2201
2202 /**
2203 * Splits this string around matches of the given
2204 * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
2205 *
2206 * <p> The array returned by this method contains each substring of this
2207 * string that is terminated by another substring that matches the given
2208 * expression or is terminated by the end of the string. The substrings in
2209 * the array are in the order in which they occur in this string. If the
2210 * expression does not match any part of the input then the resulting array
2211 * has just one element, namely this string.
2212 *
2213 * <p> When there is a positive-width match at the beginning of this
2214 * string then an empty leading substring is included at the beginning
2215 * of the resulting array. A zero-width match at the beginning however
2216 * never produces such empty leading substring.
2217 *
2218 * <p> The {@code limit} parameter controls the number of times the
2219 * pattern is applied and therefore affects the length of the resulting
2276 *
2277 * @return the array of strings computed by splitting this string
2278 * around matches of the given regular expression
2279 *
2280 * @throws PatternSyntaxException
2281 * if the regular expression's syntax is invalid
2282 *
2283 * @see java.util.regex.Pattern
2284 *
2285 * @since 1.4
2286 * @spec JSR-51
2287 */
2288 public String[] split(String regex, int limit) {
2289 /* fastpath if the regex is a
2290 (1)one-char String and this character is not one of the
2291 RegEx's meta characters ".$|()[{^?*+\\", or
2292 (2)two-char String and the first char is the backslash and
2293 the second is not the ascii digit or ascii letter.
2294 */
2295 char ch = 0;
2296 if (((regex.length() == 1 &&
2297 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
2298 (regex.length() == 2 &&
2299 regex.charAt(0) == '\\' &&
2300 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
2301 ((ch-'a')|('z'-ch)) < 0 &&
2302 ((ch-'A')|('Z'-ch)) < 0)) &&
2303 (ch < Character.MIN_HIGH_SURROGATE ||
2304 ch > Character.MAX_LOW_SURROGATE))
2305 {
2306 int off = 0;
2307 int next = 0;
2308 boolean limited = limit > 0;
2309 ArrayList<String> list = new ArrayList<>();
2310 while ((next = indexOf(ch, off)) != -1) {
2311 if (!limited || list.size() < limit - 1) {
2312 list.add(substring(off, next));
2313 off = next + 1;
2314 } else { // last one
2315 //assert (list.size() == limit - 1);
2316 int last = length();
2317 list.add(substring(off, last));
2318 off = last;
2319 break;
2320 }
2321 }
2322 // If no match was found, return this
2323 if (off == 0)
2324 return new String[]{this};
2325
2326 // Add remaining segment
2327 if (!limited || list.size() < limit)
2328 list.add(substring(off, length()));
2329
2330 // Construct result
2331 int resultSize = list.size();
2332 if (limit == 0) {
2333 while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {
2334 resultSize--;
2335 }
2336 }
2337 String[] result = new String[resultSize];
2338 return list.subList(0, resultSize).toArray(result);
2339 }
2340 return Pattern.compile(regex).split(this, limit);
2341 }
2342
2343 /**
2344 * Splits this string around matches of the given <a
2345 * href="../util/regex/Pattern.html#sum">regular expression</a>.
2346 *
2347 * <p> This method works as if by invoking the two-argument {@link
2348 * #split(String, int) split} method with the given expression and a limit
2502 * <tr>
2503 * <td>(all)</td>
2504 * <td><img src="doc-files/capiota.gif" alt="capiota"><img src="doc-files/capchi.gif" alt="capchi">
2505 * <img src="doc-files/captheta.gif" alt="captheta"><img src="doc-files/capupsil.gif" alt="capupsil">
2506 * <img src="doc-files/capsigma.gif" alt="capsigma"></td>
2507 * <td><img src="doc-files/iota.gif" alt="iota"><img src="doc-files/chi.gif" alt="chi">
2508 * <img src="doc-files/theta.gif" alt="theta"><img src="doc-files/upsilon.gif" alt="upsilon">
2509 * <img src="doc-files/sigma1.gif" alt="sigma"></td>
2510 * <td>lowercased all chars in String</td>
2511 * </tr>
2512 * </table>
2513 *
2514 * @param locale use the case transformation rules for this locale
2515 * @return the {@code String}, converted to lowercase.
2516 * @see java.lang.String#toLowerCase()
2517 * @see java.lang.String#toUpperCase()
2518 * @see java.lang.String#toUpperCase(Locale)
2519 * @since 1.1
2520 */
2521 public String toLowerCase(Locale locale) {
2522 return isLatin1() ? StringLatin1.toLowerCase(this, value, locale)
2523 : StringUTF16.toLowerCase(this, value, locale);
2524 }
2525
2526 /**
2527 * Converts all of the characters in this {@code String} to lower
2528 * case using the rules of the default locale. This is equivalent to calling
2529 * {@code toLowerCase(Locale.getDefault())}.
2530 * <p>
2531 * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2532 * results if used for strings that are intended to be interpreted locale
2533 * independently.
2534 * Examples are programming language identifiers, protocol keys, and HTML
2535 * tags.
2536 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
2537 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
2538 * LATIN SMALL LETTER DOTLESS I character.
2539 * To obtain correct results for locale insensitive strings, use
2540 * {@code toLowerCase(Locale.ROOT)}.
2541 *
2542 * @return the {@code String}, converted to lowercase.
2543 * @see java.lang.String#toLowerCase(Locale)
2578 * <td>(all)</td>
2579 * <td>\u00df</td>
2580 * <td>\u0053 \u0053</td>
2581 * <td>small letter sharp s -> two letters: SS</td>
2582 * </tr>
2583 * <tr>
2584 * <td>(all)</td>
2585 * <td>Fahrvergnügen</td>
2586 * <td>FAHRVERGNÜGEN</td>
2587 * <td></td>
2588 * </tr>
2589 * </table>
2590 * @param locale use the case transformation rules for this locale
2591 * @return the {@code String}, converted to uppercase.
2592 * @see java.lang.String#toUpperCase()
2593 * @see java.lang.String#toLowerCase()
2594 * @see java.lang.String#toLowerCase(Locale)
2595 * @since 1.1
2596 */
2597 public String toUpperCase(Locale locale) {
2598 return isLatin1() ? StringLatin1.toUpperCase(this, value, locale)
2599 : StringUTF16.toUpperCase(this, value, locale);
2600 }
2601
2602 /**
2603 * Converts all of the characters in this {@code String} to upper
2604 * case using the rules of the default locale. This method is equivalent to
2605 * {@code toUpperCase(Locale.getDefault())}.
2606 * <p>
2607 * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2608 * results if used for strings that are intended to be interpreted locale
2609 * independently.
2610 * Examples are programming language identifiers, protocol keys, and HTML
2611 * tags.
2612 * For instance, {@code "title".toUpperCase()} in a Turkish locale
2613 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
2614 * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
2615 * To obtain correct results for locale insensitive strings, use
2616 * {@code toUpperCase(Locale.ROOT)}.
2617 *
2618 * @return the {@code String}, converted to uppercase.
2619 * @see java.lang.String#toUpperCase(Locale)
2637 * {@code String} object representing an empty string is
2638 * returned.
2639 * <p>
2640 * Otherwise, let <i>k</i> be the index of the first character in the
2641 * string whose code is greater than {@code '\u005Cu0020'}, and let
2642 * <i>m</i> be the index of the last character in the string whose code
2643 * is greater than {@code '\u005Cu0020'}. A {@code String}
2644 * object is returned, representing the substring of this string that
2645 * begins with the character at index <i>k</i> and ends with the
2646 * character at index <i>m</i>-that is, the result of
2647 * {@code this.substring(k, m + 1)}.
2648 * <p>
2649 * This method may be used to trim whitespace (as defined above) from
2650 * the beginning and end of a string.
2651 *
2652 * @return A string whose value is this string, with any leading and trailing white
2653 * space removed, or this string if it has no leading or
2654 * trailing white space.
2655 */
2656 public String trim() {
2657 String ret = isLatin1() ? StringLatin1.trim(value)
2658 : StringUTF16.trim(value);
2659 return ret == null ? this : ret;
2660 }
2661
2662 /**
2663 * This object (which is already a string!) is itself returned.
2664 *
2665 * @return the string itself.
2666 */
2667 public String toString() {
2668 return this;
2669 }
2670
2671 /**
2672 * Returns a stream of {@code int} zero-extending the {@code char} values
2673 * from this sequence. Any char which maps to a <a
2674 * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
2675 * point</a> is passed through uninterpreted.
2676 *
2677 * @return an IntStream of char values from this sequence
2678 * @since 1.9
2679 */
2680 @Override
2681 public IntStream chars() {
2682 return StreamSupport.intStream(
2683 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
2684 : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE),
2685 false);
2686 }
2687
2688
2689 /**
2690 * Returns a stream of code point values from this sequence. Any surrogate
2691 * pairs encountered in the sequence are combined as if by {@linkplain
2692 * Character#toCodePoint Character.toCodePoint} and the result is passed
2693 * to the stream. Any other code units, including ordinary BMP characters,
2694 * unpaired surrogates, and undefined code units, are zero-extended to
2695 * {@code int} values which are then passed to the stream.
2696 *
2697 * @return an IntStream of Unicode code points from this sequence
2698 * @since 1.9
2699 */
2700 @Override
2701 public IntStream codePoints() {
2702 return StreamSupport.intStream(
2703 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
2704 : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE),
2705 false);
2706 }
2707
2708 /**
2709 * Converts this string to a new character array.
2710 *
2711 * @return a newly allocated character array whose length is the length
2712 * of this string and whose contents are initialized to contain
2713 * the character sequence represented by this string.
2714 */
2715 public char[] toCharArray() {
2716 return isLatin1() ? StringLatin1.toChars(value)
2717 : StringUTF16.toChars(value);
2718 }
2719
2720 /**
2721 * Returns a formatted string using the specified format string and
2722 * arguments.
2723 *
2724 * <p> The locale always used is the one returned by {@link
2725 * java.util.Locale#getDefault() Locale.getDefault()}.
2726 *
2727 * @param format
2728 * A <a href="../util/Formatter.html#syntax">format string</a>
2729 *
2730 * @param args
2731 * Arguments referenced by the format specifiers in the format
2732 * string. If there are more arguments than format specifiers, the
2733 * extra arguments are ignored. The number of arguments is
2734 * variable and may be zero. The maximum number of arguments is
2735 * limited by the maximum dimension of a Java array as defined by
2736 * <cite>The Java™ Virtual Machine Specification</cite>.
2737 * The behaviour on a
2880 * Returns the string representation of the {@code boolean} argument.
2881 *
2882 * @param b a {@code boolean}.
2883 * @return if the argument is {@code true}, a string equal to
2884 * {@code "true"} is returned; otherwise, a string equal to
2885 * {@code "false"} is returned.
2886 */
2887 public static String valueOf(boolean b) {
2888 return b ? "true" : "false";
2889 }
2890
2891 /**
2892 * Returns the string representation of the {@code char}
2893 * argument.
2894 *
2895 * @param c a {@code char}.
2896 * @return a string of length {@code 1} containing
2897 * as its single character the argument {@code c}.
2898 */
2899 public static String valueOf(char c) {
2900 if (COMPACT_STRINGS && StringLatin1.canEncode(c)) {
2901 return new String(StringLatin1.toBytes(c), LATIN1);
2902 }
2903 return new String(StringUTF16.toBytes(c), UTF16);
2904 }
2905
2906 /**
2907 * Returns the string representation of the {@code int} argument.
2908 * <p>
2909 * The representation is exactly the one returned by the
2910 * {@code Integer.toString} method of one argument.
2911 *
2912 * @param i an {@code int}.
2913 * @return a string representation of the {@code int} argument.
2914 * @see java.lang.Integer#toString(int, int)
2915 */
2916 public static String valueOf(int i) {
2917 return Integer.toString(i);
2918 }
2919
2920 /**
2921 * Returns the string representation of the {@code long} argument.
2922 * <p>
2923 * The representation is exactly the one returned by the
2966 * class {@code String}.
2967 * <p>
2968 * When the intern method is invoked, if the pool already contains a
2969 * string equal to this {@code String} object as determined by
2970 * the {@link #equals(Object)} method, then the string from the pool is
2971 * returned. Otherwise, this {@code String} object is added to the
2972 * pool and a reference to this {@code String} object is returned.
2973 * <p>
2974 * It follows that for any two strings {@code s} and {@code t},
2975 * {@code s.intern() == t.intern()} is {@code true}
2976 * if and only if {@code s.equals(t)} is {@code true}.
2977 * <p>
2978 * All literal strings and string-valued constant expressions are
2979 * interned. String literals are defined in section 3.10.5 of the
2980 * <cite>The Java™ Language Specification</cite>.
2981 *
2982 * @return a string that has the same contents as this string, but is
2983 * guaranteed to be from a pool of unique strings.
2984 */
2985 public native String intern();
2986
2987 ////////////////////////////////////////////////////////////////
2988
2989 /**
2990 * Copy character bytes from this string into dst starting at dstBegin.
2991 * This method doesn't perform any range checking.
2992 *
2993 * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
2994 * coders are different, and dst is big enough (range check)
2995 *
2996 * @param dstBegin the char index, not offset of byte[]
2997 * @param coder the coder of dst[]
2998 */
2999 void getBytes(byte dst[], int dstBegin, byte coder) {
3000 if (coder() == coder) {
3001 System.arraycopy(value, 0, dst, dstBegin << coder, value.length);
3002 } else { // this.coder == LATIN && coder == UTF16
3003 StringLatin1.inflate(value, 0, dst, dstBegin, value.length);
3004 }
3005 }
3006
3007 /*
3008 * Package private constructor. Trailing Void argument is there for
3009 * disambiguating it against other (public) constructors.
3010 *
3011 * Stores the char[] value into a byte[] that each byte represents
3012 * the8 low-order bits of the corresponding character, if the char[]
3013 * contains only latin1 character. Or a byte[] that stores all
3014 * characters in their byte sequences defined by the {@code StringUTF16}.
3015 */
3016 String(char[] value, int off, int len, Void sig) {
3017 if (len == 0) {
3018 this.value = "".value;
3019 this.coder = "".coder;
3020 return;
3021 }
3022 if (COMPACT_STRINGS) {
3023 byte[] val = StringUTF16.compress(value, off, len);
3024 if (val != null) {
3025 this.value = val;
3026 this.coder = LATIN1;
3027 return;
3028 }
3029 }
3030 this.coder = UTF16;
3031 this.value = StringUTF16.toBytes(value, off, len);
3032 }
3033
3034 /*
3035 * Package private constructor. Trailing Void argument is there for
3036 * disambiguating it against other (public) constructors.
3037 */
3038 String(AbstractStringBuilder asb, Void sig) {
3039 byte[] val = asb.getValue();
3040 int length = asb.length();
3041 if (asb.isLatin1()) {
3042 this.coder = LATIN1;
3043 this.value = Arrays.copyOfRange(val, 0, length);
3044 } else {
3045 if (COMPACT_STRINGS) {
3046 byte[] buf = StringUTF16.compress(val, 0, length);
3047 if (buf != null) {
3048 this.coder = LATIN1;
3049 this.value = buf;
3050 return;
3051 }
3052 }
3053 this.coder = UTF16;
3054 this.value = Arrays.copyOfRange(val, 0, length << 1);
3055 }
3056 }
3057
3058 /*
3059 * Package private constructor which shares value array for speed.
3060 */
3061 String(byte[] value, byte coder) {
3062 this.value = value;
3063 this.coder = coder;
3064 }
3065
3066 byte coder() {
3067 return COMPACT_STRINGS ? coder : UTF16;
3068 }
3069
3070 private boolean isLatin1() {
3071 return COMPACT_STRINGS && coder == LATIN1;
3072 }
3073
3074 static final byte LATIN1 = 0;
3075 static final byte UTF16 = 1;
3076
3077 /*
3078 * StringIndexOutOfBoundsException if {@code index} is
3079 * negative or greater than or equal to {@code length}.
3080 */
3081 static void checkIndex(int index, int length) {
3082 if (index < 0 || index >= length) {
3083 throw new StringIndexOutOfBoundsException("index " + index);
3084 }
3085 }
3086
3087 /*
3088 * StringIndexOutOfBoundsException if {@code offset}
3089 * is negative or greater than {@code length}.
3090 */
3091 static void checkOffset(int offset, int length) {
3092 if (offset < 0 || offset > length) {
3093 throw new StringIndexOutOfBoundsException("offset " + offset +
3094 ",length " + length);
3095 }
3096 }
3097
3098 /*
3099 * Check {@code offset}, {@code count} against {@code 0} and {@code length}
3100 * bounds.
3101 *
3102 * @throws StringIndexOutOfBoundsException
3103 * If {@code offset} is negative, {@code count} is negative,
3104 * or {@code offset} is greater than {@code length - count}
3105 */
3106 private static void checkBoundsOffCount(int offset, int count, int length) {
3107 if (offset < 0 || count < 0 || offset > length - count) {
3108 throw new StringIndexOutOfBoundsException(
3109 "offset " + offset + ", count " + count + ", length " + length);
3110 }
3111 }
3112
3113 /*
3114 * Check {@code begin}, {@code end} against {@code 0} and {@code length}
3115 * bounds.
3116 *
3117 * @throws StringIndexOutOfBoundsException
3118 * If {@code begin} is negative, {@code begin} is greater than
3119 * {@code end}, or {@code end} is greater than {@code length}.
3120 */
3121 private static void checkBoundsBeginEnd(int begin, int end, int length) {
3122 if (begin < 0 || begin > end || end > length) {
3123 throw new StringIndexOutOfBoundsException(
3124 "begin " + begin + ", end " + end + ", length " + length);
3125 }
3126 }
3127 }
|