5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import java.io.ObjectStreamClass;
29 import java.io.ObjectStreamField;
30 import java.io.UnsupportedEncodingException;
31 import java.nio.charset.Charset;
32 import java.util.ArrayList;
33 import java.util.Arrays;
34 import java.util.Comparator;
35 import java.util.Formatter;
36 import java.util.Locale;
37 import java.util.regex.Matcher;
38 import java.util.regex.Pattern;
39 import java.util.regex.PatternSyntaxException;
40
41 /**
42 * The {@code String} class represents character strings. All
43 * string literals in Java programs, such as {@code "abc"}, are
44 * implemented as instances of this class.
45 * <p>
46 * Strings are constant; their values cannot be changed after they
47 * are created. String buffers support mutable strings.
48 * Because String objects are immutable they can be shared. For example:
89 * in which <em>supplementary characters</em> are represented by <em>surrogate
90 * pairs</em> (see the section <a href="Character.html#unicode">Unicode
91 * Character Representations</a> in the {@code Character} class for
92 * more information).
93 * Index values refer to {@code char} code units, so a supplementary
94 * character uses two positions in a {@code String}.
95 * <p>The {@code String} class provides methods for dealing with
96 * Unicode code points (i.e., characters), in addition to those for
97 * dealing with Unicode code units (i.e., {@code char} values).
98 *
99 * @author Lee Boynton
100 * @author Arthur van Hoff
101 * @author Martin Buchholz
102 * @author Ulf Zibis
103 * @see java.lang.Object#toString()
104 * @see java.lang.StringBuffer
105 * @see java.lang.StringBuilder
106 * @see java.nio.charset.Charset
107 * @since JDK1.0
108 */
109
110 public final class String
111 implements java.io.Serializable, Comparable<String>, CharSequence
112 {
113 /** The value is used for character storage. */
114 private final char value[];
115
116 /** The offset is the first index of the storage that is used. */
117 private final int offset;
118
119 /** The count is the number of characters in the String. */
120 private final int count;
121
122 /** Cache the hash code for the string */
123 private int hash; // Default to 0
124
125 /** use serialVersionUID from JDK 1.0.2 for interoperability */
126 private static final long serialVersionUID = -6849794470754667710L;
127
128 /**
129 * Class String is special cased within the Serialization Stream Protocol.
130 *
131 * A String instance is written initially into an ObjectOutputStream in the
132 * following format:
133 * <pre>
134 * {@code TC_STRING} (utf String)
135 * </pre>
136 * The String is written by method {@code DataOutput.writeUTF}.
137 * A new handle is generated to refer to all future references to the
138 * string instance within the stream.
139 */
140 private static final ObjectStreamField[] serialPersistentFields =
141 new ObjectStreamField[0];
142
143 /**
144 * Initializes a newly created {@code String} object so that it represents
145 * an empty character sequence. Note that use of this constructor is
146 * unnecessary since Strings are immutable.
147 */
148 public String() {
149 this.offset = 0;
150 this.count = 0;
151 this.value = new char[0];
152 }
153
154 /**
155 * Initializes a newly created {@code String} object so that it represents
156 * the same sequence of characters as the argument; in other words, the
157 * newly created string is a copy of the argument string. Unless an
158 * explicit copy of {@code original} is needed, use of this constructor is
159 * unnecessary since Strings are immutable.
160 *
161 * @param original
162 * A {@code String}
163 */
164 public String(String original) {
165 int size = original.count;
166 char[] originalValue = original.value;
167 char[] v;
168 if (originalValue.length > size) {
169 // The array representing the String is bigger than the new
170 // String itself. Perhaps this constructor is being called
171 // in order to trim the baggage, so make a copy of the array.
172 int off = original.offset;
173 v = Arrays.copyOfRange(originalValue, off, off+size);
174 } else {
175 // The array representing the String is the same
176 // size as the String, so no point in making a copy.
177 v = originalValue;
178 }
179 this.offset = 0;
180 this.count = size;
181 this.value = v;
182 }
183
184 /**
185 * Allocates a new {@code String} so that it represents the sequence of
186 * characters currently contained in the character array argument. The
187 * contents of the character array are copied; subsequent modification of
188 * the character array does not affect the newly created string.
189 *
190 * @param value
191 * The initial value of the string
192 */
193 public String(char value[]) {
194 int size = value.length;
195 this.offset = 0;
196 this.count = size;
197 this.value = Arrays.copyOf(value, size);
198 }
199
200 /**
201 * Allocates a new {@code String} that contains characters from a subarray
202 * of the character array argument. The {@code offset} argument is the
203 * index of the first character of the subarray and the {@code count}
204 * argument specifies the length of the subarray. The contents of the
205 * subarray are copied; subsequent modification of the character array does
206 * not affect the newly created string.
207 *
208 * @param value
209 * Array that is the source of characters
210 *
211 * @param offset
212 * The initial offset
213 *
214 * @param count
215 * The length
216 *
217 * @throws IndexOutOfBoundsException
218 * If the {@code offset} and {@code count} arguments index
219 * characters outside the bounds of the {@code value} array
220 */
221 public String(char value[], int offset, int count) {
222 if (offset < 0) {
223 throw new StringIndexOutOfBoundsException(offset);
224 }
225 if (count < 0) {
226 throw new StringIndexOutOfBoundsException(count);
227 }
228 // Note: offset or count might be near -1>>>1.
229 if (offset > value.length - count) {
230 throw new StringIndexOutOfBoundsException(offset + count);
231 }
232 this.offset = 0;
233 this.count = count;
234 this.value = Arrays.copyOfRange(value, offset, offset+count);
235 }
236
237 /**
238 * Allocates a new {@code String} that contains characters from a subarray
239 * of the <a href="Character.html#unicode">Unicode code point</a> array
240 * argument. The {@code offset} argument is the index of the first code
241 * point of the subarray and the {@code count} argument specifies the
242 * length of the subarray. The contents of the subarray are converted to
243 * {@code char}s; subsequent modification of the {@code int} array does not
244 * affect the newly created string.
245 *
246 * @param codePoints
247 * Array that is the source of Unicode code points
248 *
249 * @param offset
250 * The initial offset
251 *
252 * @param count
253 * The length
276
277 final int end = offset + count;
278
279 // Pass 1: Compute precise size of char[]
280 int n = count;
281 for (int i = offset; i < end; i++) {
282 int c = codePoints[i];
283 if (Character.isBmpCodePoint(c))
284 continue;
285 else if (Character.isValidCodePoint(c))
286 n++;
287 else throw new IllegalArgumentException(Integer.toString(c));
288 }
289
290 // Pass 2: Allocate and fill in char[]
291 final char[] v = new char[n];
292
293 for (int i = offset, j = 0; i < end; i++, j++) {
294 int c = codePoints[i];
295 if (Character.isBmpCodePoint(c))
296 v[j] = (char) c;
297 else
298 Character.toSurrogates(c, v, j++);
299 }
300
301 this.value = v;
302 this.count = n;
303 this.offset = 0;
304 }
305
306 /**
307 * Allocates a new {@code String} constructed from a subarray of an array
308 * of 8-bit integer values.
309 *
310 * <p> The {@code offset} argument is the index of the first byte of the
311 * subarray, and the {@code count} argument specifies the length of the
312 * subarray.
313 *
314 * <p> Each {@code byte} in the subarray is converted to a {@code char} as
315 * specified in the method above.
316 *
317 * @deprecated This method does not properly convert bytes into characters.
318 * As of JDK 1.1, the preferred way to do this is via the
319 * {@code String} constructors that take a {@link
320 * java.nio.charset.Charset}, charset name, or that use the platform's
321 * default charset.
322 *
323 * @param ascii
331 * @param count
332 * The length
333 *
334 * @throws IndexOutOfBoundsException
335 * If the {@code offset} or {@code count} argument is invalid
336 *
337 * @see #String(byte[], int)
338 * @see #String(byte[], int, int, java.lang.String)
339 * @see #String(byte[], int, int, java.nio.charset.Charset)
340 * @see #String(byte[], int, int)
341 * @see #String(byte[], java.lang.String)
342 * @see #String(byte[], java.nio.charset.Charset)
343 * @see #String(byte[])
344 */
345 @Deprecated
346 public String(byte ascii[], int hibyte, int offset, int count) {
347 checkBounds(ascii, offset, count);
348 char value[] = new char[count];
349
350 if (hibyte == 0) {
351 for (int i = count ; i-- > 0 ;) {
352 value[i] = (char) (ascii[i + offset] & 0xff);
353 }
354 } else {
355 hibyte <<= 8;
356 for (int i = count ; i-- > 0 ;) {
357 value[i] = (char) (hibyte | (ascii[i + offset] & 0xff));
358 }
359 }
360 this.offset = 0;
361 this.count = count;
362 this.value = value;
363 }
364
365 /**
366 * Allocates a new {@code String} containing characters constructed from
367 * an array of 8-bit integer values. Each character <i>c</i>in the
368 * resulting string is constructed from the corresponding component
369 * <i>b</i> in the byte array such that:
370 *
371 * <blockquote><pre>
372 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8)
373 * | (<b><i>b</i></b> & 0xff))
374 * </pre></blockquote>
375 *
376 * @deprecated This method does not properly convert bytes into
377 * characters. As of JDK 1.1, the preferred way to do this is via the
378 * {@code String} constructors that take a {@link
379 * java.nio.charset.Charset}, charset name, or that use the platform's
380 * default charset.
381 *
427 * @param offset
428 * The index of the first byte to decode
429 *
430 * @param length
431 * The number of bytes to decode
432
433 * @param charsetName
434 * The name of a supported {@linkplain java.nio.charset.Charset
435 * charset}
436 *
437 * @throws UnsupportedEncodingException
438 * If the named charset is not supported
439 *
440 * @throws IndexOutOfBoundsException
441 * If the {@code offset} and {@code length} arguments index
442 * characters outside the bounds of the {@code bytes} array
443 *
444 * @since JDK1.1
445 */
446 public String(byte bytes[], int offset, int length, String charsetName)
447 throws UnsupportedEncodingException
448 {
449 if (charsetName == null)
450 throw new NullPointerException("charsetName");
451 checkBounds(bytes, offset, length);
452 char[] v = StringCoding.decode(charsetName, bytes, offset, length);
453 this.offset = 0;
454 this.count = v.length;
455 this.value = v;
456 }
457
458 /**
459 * Constructs a new {@code String} by decoding the specified subarray of
460 * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
461 * The length of the new {@code String} is a function of the charset, and
462 * hence may not be equal to the length of the subarray.
463 *
464 * <p> This method always replaces malformed-input and unmappable-character
465 * sequences with this charset's default replacement string. The {@link
466 * java.nio.charset.CharsetDecoder} class should be used when more control
467 * over the decoding process is required.
468 *
469 * @param bytes
470 * The bytes to be decoded into characters
471 *
472 * @param offset
473 * The index of the first byte to decode
474 *
475 * @param length
476 * The number of bytes to decode
477 *
478 * @param charset
479 * The {@linkplain java.nio.charset.Charset charset} to be used to
480 * decode the {@code bytes}
481 *
482 * @throws IndexOutOfBoundsException
483 * If the {@code offset} and {@code length} arguments index
484 * characters outside the bounds of the {@code bytes} array
485 *
486 * @since 1.6
487 */
488 public String(byte bytes[], int offset, int length, Charset charset) {
489 if (charset == null)
490 throw new NullPointerException("charset");
491 checkBounds(bytes, offset, length);
492 char[] v = StringCoding.decode(charset, bytes, offset, length);
493 this.offset = 0;
494 this.count = v.length;
495 this.value = v;
496 }
497
498 /**
499 * Constructs a new {@code String} by decoding the specified array of bytes
500 * using the specified {@linkplain java.nio.charset.Charset charset}. The
501 * length of the new {@code String} is a function of the charset, and hence
502 * may not be equal to the length of the byte array.
503 *
504 * <p> The behavior of this constructor when the given bytes are not valid
505 * in the given charset is unspecified. The {@link
506 * java.nio.charset.CharsetDecoder} class should be used when more control
507 * over the decoding process is required.
508 *
509 * @param bytes
510 * The bytes to be decoded into characters
511 *
512 * @param charsetName
513 * The name of a supported {@linkplain java.nio.charset.Charset
514 * charset}
515 *
516 * @throws UnsupportedEncodingException
517 * If the named charset is not supported
518 *
519 * @since JDK1.1
520 */
521 public String(byte bytes[], String charsetName)
522 throws UnsupportedEncodingException
523 {
524 this(bytes, 0, bytes.length, charsetName);
525 }
526
527 /**
528 * Constructs a new {@code String} by decoding the specified array of
529 * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
530 * The length of the new {@code String} is a function of the charset, and
531 * hence may not be equal to the length of the byte array.
532 *
533 * <p> This method always replaces malformed-input and unmappable-character
534 * sequences with this charset's default replacement string. The {@link
535 * java.nio.charset.CharsetDecoder} class should be used when more control
536 * over the decoding process is required.
537 *
538 * @param bytes
539 * The bytes to be decoded into characters
540 *
541 * @param charset
542 * The {@linkplain java.nio.charset.Charset charset} to be used to
543 * decode the {@code bytes}
559 * java.nio.charset.CharsetDecoder} class should be used when more control
560 * over the decoding process is required.
561 *
562 * @param bytes
563 * The bytes to be decoded into characters
564 *
565 * @param offset
566 * The index of the first byte to decode
567 *
568 * @param length
569 * The number of bytes to decode
570 *
571 * @throws IndexOutOfBoundsException
572 * If the {@code offset} and the {@code length} arguments index
573 * characters outside the bounds of the {@code bytes} array
574 *
575 * @since JDK1.1
576 */
577 public String(byte bytes[], int offset, int length) {
578 checkBounds(bytes, offset, length);
579 char[] v = StringCoding.decode(bytes, offset, length);
580 this.offset = 0;
581 this.count = v.length;
582 this.value = v;
583 }
584
585 /**
586 * Constructs a new {@code String} by decoding the specified array of bytes
587 * using the platform's default charset. The length of the new {@code
588 * String} is a function of the charset, and hence may not be equal to the
589 * length of the byte array.
590 *
591 * <p> The behavior of this constructor when the given bytes are not valid
592 * in the default charset is unspecified. The {@link
593 * java.nio.charset.CharsetDecoder} class should be used when more control
594 * over the decoding process is required.
595 *
596 * @param bytes
597 * The bytes to be decoded into characters
598 *
599 * @since JDK1.1
600 */
601 public String(byte bytes[]) {
602 this(bytes, 0, bytes.length);
603 }
604
605 /**
606 * Allocates a new string that contains the sequence of characters
607 * currently contained in the string buffer argument. The contents of the
608 * string buffer are copied; subsequent modification of the string buffer
609 * does not affect the newly created string.
610 *
611 * @param buffer
612 * A {@code StringBuffer}
613 */
614 public String(StringBuffer buffer) {
615 String result = buffer.toString();
616 this.value = result.value;
617 this.count = result.count;
618 this.offset = result.offset;
619 }
620
621 /**
622 * Allocates a new string that contains the sequence of characters
623 * currently contained in the string builder argument. The contents of the
624 * string builder are copied; subsequent modification of the string builder
625 * does not affect the newly created string.
626 *
627 * <p> This constructor is provided to ease migration to {@code
628 * StringBuilder}. Obtaining a string from a string builder via the {@code
629 * toString} method is likely to run faster and is generally preferred.
630 *
631 * @param builder
632 * A {@code StringBuilder}
633 *
634 * @since 1.5
635 */
636 public String(StringBuilder builder) {
637 String result = builder.toString();
638 this.value = result.value;
639 this.count = result.count;
640 this.offset = result.offset;
641 }
642
643
644 // Package private constructor which shares value array for speed.
645 String(int offset, int count, char value[]) {
646 this.value = value;
647 this.offset = offset;
648 this.count = count;
649 }
650
651 /**
652 * Returns the length of this string.
653 * The length is equal to the number of <a href="Character.html#unicode">Unicode
654 * code units</a> in the string.
655 *
656 * @return the length of the sequence of characters represented by this
657 * object.
658 */
659 public int length() {
660 return count;
661 }
662
663 /**
664 * Returns <tt>true</tt> if, and only if, {@link #length()} is <tt>0</tt>.
665 *
666 * @return <tt>true</tt> if {@link #length()} is <tt>0</tt>, otherwise
667 * <tt>false</tt>
668 *
669 * @since 1.6
670 */
671 public boolean isEmpty() {
672 return count == 0;
673 }
674
675 /**
676 * Returns the {@code char} value at the
677 * specified index. An index ranges from {@code 0} to
678 * {@code length() - 1}. The first {@code char} value of the sequence
679 * is at index {@code 0}, the next at index {@code 1},
680 * and so on, as for array indexing.
681 *
682 * <p>If the {@code char} value specified by the index is a
683 * <a href="Character.html#unicode">surrogate</a>, the surrogate
684 * value is returned.
685 *
686 * @param index the index of the {@code char} value.
687 * @return the {@code char} value at the specified index of this string.
688 * The first {@code char} value is at index {@code 0}.
689 * @exception IndexOutOfBoundsException if the {@code index}
690 * argument is negative or not less than the length of this
691 * string.
692 */
693 public char charAt(int index) {
694 if ((index < 0) || (index >= count)) {
695 throw new StringIndexOutOfBoundsException(index);
696 }
697 return value[index + offset];
698 }
699
700 /**
701 * Returns the character (Unicode code point) at the specified
702 * index. The index refers to {@code char} values
703 * (Unicode code units) and ranges from {@code 0} to
704 * {@link #length()}{@code - 1}.
705 *
706 * <p> If the {@code char} value specified at the given index
707 * is in the high-surrogate range, the following index is less
708 * than the length of this {@code String}, and the
709 * {@code char} value at the following index is in the
710 * low-surrogate range, then the supplementary code point
711 * corresponding to this surrogate pair is returned. Otherwise,
712 * the {@code char} value at the given index is returned.
713 *
714 * @param index the index to the {@code char} values
715 * @return the code point value of the character at the
716 * {@code index}
717 * @exception IndexOutOfBoundsException if the {@code index}
718 * argument is negative or not less than the length of this
719 * string.
720 * @since 1.5
721 */
722 public int codePointAt(int index) {
723 if ((index < 0) || (index >= count)) {
724 throw new StringIndexOutOfBoundsException(index);
725 }
726 return Character.codePointAtImpl(value, offset + index, offset + count);
727 }
728
729 /**
730 * Returns the character (Unicode code point) before the specified
731 * index. The index refers to {@code char} values
732 * (Unicode code units) and ranges from {@code 1} to {@link
733 * CharSequence#length() length}.
734 *
735 * <p> If the {@code char} value at {@code (index - 1)}
736 * is in the low-surrogate range, {@code (index - 2)} is not
737 * negative, and the {@code char} value at {@code (index -
738 * 2)} is in the high-surrogate range, then the
739 * supplementary code point value of the surrogate pair is
740 * returned. If the {@code char} value at {@code index -
741 * 1} is an unpaired low-surrogate or a high-surrogate, the
742 * surrogate value is returned.
743 *
744 * @param index the index following the code point that should be returned
745 * @return the Unicode code point value before the given index.
746 * @exception IndexOutOfBoundsException if the {@code index}
747 * argument is less than 1 or greater than the length
748 * of this string.
749 * @since 1.5
750 */
751 public int codePointBefore(int index) {
752 int i = index - 1;
753 if ((i < 0) || (i >= count)) {
754 throw new StringIndexOutOfBoundsException(index);
755 }
756 return Character.codePointBeforeImpl(value, offset + index, offset);
757 }
758
759 /**
760 * Returns the number of Unicode code points in the specified text
761 * range of this {@code String}. The text range begins at the
762 * specified {@code beginIndex} and extends to the
763 * {@code char} at index {@code endIndex - 1}. Thus the
764 * length (in {@code char}s) of the text range is
765 * {@code endIndex-beginIndex}. Unpaired surrogates within
766 * the text range count as one code point each.
767 *
768 * @param beginIndex the index to the first {@code char} of
769 * the text range.
770 * @param endIndex the index after the last {@code char} of
771 * the text range.
772 * @return the number of Unicode code points in the specified text
773 * range
774 * @exception IndexOutOfBoundsException if the
775 * {@code beginIndex} is negative, or {@code endIndex}
776 * is larger than the length of this {@code String}, or
777 * {@code beginIndex} is larger than {@code endIndex}.
778 * @since 1.5
779 */
780 public int codePointCount(int beginIndex, int endIndex) {
781 if (beginIndex < 0 || endIndex > count || beginIndex > endIndex) {
782 throw new IndexOutOfBoundsException();
783 }
784 return Character.codePointCountImpl(value, offset+beginIndex, endIndex-beginIndex);
785 }
786
787 /**
788 * Returns the index within this {@code String} that is
789 * offset from the given {@code index} by
790 * {@code codePointOffset} code points. Unpaired surrogates
791 * within the text range given by {@code index} and
792 * {@code codePointOffset} count as one code point each.
793 *
794 * @param index the index to be offset
795 * @param codePointOffset the offset in code points
796 * @return the index within this {@code String}
797 * @exception IndexOutOfBoundsException if {@code index}
798 * is negative or larger then the length of this
799 * {@code String}, or if {@code codePointOffset} is positive
800 * and the substring starting with {@code index} has fewer
801 * than {@code codePointOffset} code points,
802 * or if {@code codePointOffset} is negative and the substring
803 * before {@code index} has fewer than the absolute value
804 * of {@code codePointOffset} code points.
805 * @since 1.5
806 */
807 public int offsetByCodePoints(int index, int codePointOffset) {
808 if (index < 0 || index > count) {
809 throw new IndexOutOfBoundsException();
810 }
811 return Character.offsetByCodePointsImpl(value, offset, count,
812 offset+index, codePointOffset) - offset;
813 }
814
815 /**
816 * Copy characters from this string into dst starting at dstBegin.
817 * This method doesn't perform any range checking.
818 */
819 void getChars(char dst[], int dstBegin) {
820 System.arraycopy(value, offset, dst, dstBegin, count);
821 }
822
823 /**
824 * Copies characters from this string into the destination character
825 * array.
826 * <p>
827 * The first character to be copied is at index {@code srcBegin};
828 * the last character to be copied is at index {@code srcEnd-1}
829 * (thus the total number of characters to be copied is
830 * {@code srcEnd-srcBegin}). The characters are copied into the
831 * subarray of {@code dst} starting at index {@code dstBegin}
832 * and ending at index:
833 * <p><blockquote><pre>
834 * dstbegin + (srcEnd-srcBegin) - 1
835 * </pre></blockquote>
836 *
837 * @param srcBegin index of the first character in the string
838 * to copy.
839 * @param srcEnd index after the last character in the string
840 * to copy.
841 * @param dst the destination array.
842 * @param dstBegin the start offset in the destination array.
843 * @exception IndexOutOfBoundsException If any of the following
844 * is true:
845 * <ul><li>{@code srcBegin} is negative.
846 * <li>{@code srcBegin} is greater than {@code srcEnd}
847 * <li>{@code srcEnd} is greater than the length of this
848 * string
849 * <li>{@code dstBegin} is negative
850 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
851 * {@code dst.length}</ul>
852 */
853 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
854 if (srcBegin < 0) {
855 throw new StringIndexOutOfBoundsException(srcBegin);
856 }
857 if (srcEnd > count) {
858 throw new StringIndexOutOfBoundsException(srcEnd);
859 }
860 if (srcBegin > srcEnd) {
861 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
862 }
863 System.arraycopy(value, offset + srcBegin, dst, dstBegin,
864 srcEnd - srcBegin);
865 }
866
867 /**
868 * Copies characters from this string into the destination byte array. Each
869 * byte receives the 8 low-order bits of the corresponding character. The
870 * eight high-order bits of each character are not copied and do not
871 * participate in the transfer in any way.
872 *
873 * <p> The first character to be copied is at index {@code srcBegin}; the
874 * last character to be copied is at index {@code srcEnd-1}. The total
875 * number of characters to be copied is {@code srcEnd-srcBegin}. The
876 * characters, converted to bytes, are copied into the subarray of {@code
877 * dst} starting at index {@code dstBegin} and ending at index:
878 *
879 * <blockquote><pre>
880 * dstbegin + (srcEnd-srcBegin) - 1
881 * </pre></blockquote>
882 *
883 * @deprecated This method does not properly convert characters into
884 * bytes. As of JDK 1.1, the preferred way to do this is via the
895 *
896 * @param dstBegin
897 * The start offset in the destination array
898 *
899 * @throws IndexOutOfBoundsException
900 * If any of the following is true:
901 * <ul>
902 * <li> {@code srcBegin} is negative
903 * <li> {@code srcBegin} is greater than {@code srcEnd}
904 * <li> {@code srcEnd} is greater than the length of this String
905 * <li> {@code dstBegin} is negative
906 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
907 * dst.length}
908 * </ul>
909 */
910 @Deprecated
911 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
912 if (srcBegin < 0) {
913 throw new StringIndexOutOfBoundsException(srcBegin);
914 }
915 if (srcEnd > count) {
916 throw new StringIndexOutOfBoundsException(srcEnd);
917 }
918 if (srcBegin > srcEnd) {
919 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
920 }
921 int j = dstBegin;
922 int n = offset + srcEnd;
923 int i = offset + srcBegin;
924 char[] val = value; /* avoid getfield opcode */
925
926 while (i < n) {
927 dst[j++] = (byte)val[i++];
928 }
929 }
930
931 /**
932 * Encodes this {@code String} into a sequence of bytes using the named
933 * charset, storing the result into a new byte array.
934 *
935 * <p> The behavior of this method when this string cannot be encoded in
936 * the given charset is unspecified. The {@link
937 * java.nio.charset.CharsetEncoder} class should be used when more control
938 * over the encoding process is required.
939 *
940 * @param charsetName
941 * The name of a supported {@linkplain java.nio.charset.Charset
942 * charset}
943 *
944 * @return The resultant byte array
945 *
946 * @throws UnsupportedEncodingException
947 * If the named charset is not supported
948 *
949 * @since JDK1.1
950 */
951 public byte[] getBytes(String charsetName)
952 throws UnsupportedEncodingException
953 {
954 if (charsetName == null) throw new NullPointerException();
955 return StringCoding.encode(charsetName, value, offset, count);
956 }
957
958 /**
959 * Encodes this {@code String} into a sequence of bytes using the given
960 * {@linkplain java.nio.charset.Charset charset}, storing the result into a
961 * new byte array.
962 *
963 * <p> This method always replaces malformed-input and unmappable-character
964 * sequences with this charset's default replacement byte array. The
965 * {@link java.nio.charset.CharsetEncoder} class should be used when more
966 * control over the encoding process is required.
967 *
968 * @param charset
969 * The {@linkplain java.nio.charset.Charset} to be used to encode
970 * the {@code String}
971 *
972 * @return The resultant byte array
973 *
974 * @since 1.6
975 */
976 public byte[] getBytes(Charset charset) {
977 if (charset == null) throw new NullPointerException();
978 return StringCoding.encode(charset, value, offset, count);
979 }
980
981 /**
982 * Encodes this {@code String} into a sequence of bytes using the
983 * platform's default charset, storing the result into a new byte array.
984 *
985 * <p> The behavior of this method when this string cannot be encoded in
986 * the default charset is unspecified. The {@link
987 * java.nio.charset.CharsetEncoder} class should be used when more control
988 * over the encoding process is required.
989 *
990 * @return The resultant byte array
991 *
992 * @since JDK1.1
993 */
994 public byte[] getBytes() {
995 return StringCoding.encode(value, offset, count);
996 }
997
998 /**
999 * Compares this string to the specified object. The result is {@code
1000 * true} if and only if the argument is not {@code null} and is a {@code
1001 * String} object that represents the same sequence of characters as this
1002 * object.
1003 *
1004 * @param anObject
1005 * The object to compare this {@code String} against
1006 *
1007 * @return {@code true} if the given object represents a {@code String}
1008 * equivalent to this string, {@code false} otherwise
1009 *
1010 * @see #compareTo(String)
1011 * @see #equalsIgnoreCase(String)
1012 */
1013 public boolean equals(Object anObject) {
1014 if (this == anObject) {
1015 return true;
1016 }
1017 if (anObject instanceof String) {
1018 String anotherString = (String)anObject;
1019 int n = count;
1020 if (n == anotherString.count) {
1021 char v1[] = value;
1022 char v2[] = anotherString.value;
1023 int i = offset;
1024 int j = anotherString.offset;
1025 while (n-- != 0) {
1026 if (v1[i++] != v2[j++])
1027 return false;
1028 }
1029 return true;
1030 }
1031 }
1032 return false;
1033 }
1034
1035 /**
1036 * Compares this string to the specified {@code StringBuffer}. The result
1037 * is {@code true} if and only if this {@code String} represents the same
1038 * sequence of characters as the specified {@code StringBuffer}.
1039 *
1040 * @param sb
1041 * The {@code StringBuffer} to compare this {@code String} against
1042 *
1043 * @return {@code true} if this {@code String} represents the same
1044 * sequence of characters as the specified {@code StringBuffer},
1045 * {@code false} otherwise
1046 *
1047 * @since 1.4
1048 */
1049 public boolean contentEquals(StringBuffer sb) {
1050 synchronized(sb) {
1051 return contentEquals((CharSequence)sb);
1052 }
1053 }
1054
1055 /**
1056 * Compares this string to the specified {@code CharSequence}. The result
1057 * is {@code true} if and only if this {@code String} represents the same
1058 * sequence of char values as the specified sequence.
1059 *
1060 * @param cs
1061 * The sequence to compare this {@code String} against
1062 *
1063 * @return {@code true} if this {@code String} represents the same
1064 * sequence of char values as the specified sequence, {@code
1065 * false} otherwise
1066 *
1067 * @since 1.5
1068 */
1069 public boolean contentEquals(CharSequence cs) {
1070 if (count != cs.length())
1071 return false;
1072 // Argument is a StringBuffer, StringBuilder
1073 if (cs instanceof AbstractStringBuilder) {
1074 char v1[] = value;
1075 char v2[] = ((AbstractStringBuilder)cs).getValue();
1076 int i = offset;
1077 int j = 0;
1078 int n = count;
1079 while (n-- != 0) {
1080 if (v1[i++] != v2[j++])
1081 return false;
1082 }
1083 return true;
1084 }
1085 // Argument is a String
1086 if (cs.equals(this))
1087 return true;
1088 // Argument is a generic CharSequence
1089 char v1[] = value;
1090 int i = offset;
1091 int j = 0;
1092 int n = count;
1093 while (n-- != 0) {
1094 if (v1[i++] != cs.charAt(j++))
1095 return false;
1096 }
1097 return true;
1098 }
1099
1100 /**
1101 * Compares this {@code String} to another {@code String}, ignoring case
1102 * considerations. Two strings are considered equal ignoring case if they
1103 * are of the same length and corresponding characters in the two strings
1104 * are equal ignoring case.
1105 *
1106 * <p> Two characters {@code c1} and {@code c2} are considered the same
1107 * ignoring case if at least one of the following is true:
1108 * <ul>
1109 * <li> The two characters are the same (as compared by the
1110 * {@code ==} operator)
1111 * <li> Applying the method {@link
1112 * java.lang.Character#toUpperCase(char)} to each character
1113 * produces the same result
1114 * <li> Applying the method {@link
1115 * java.lang.Character#toLowerCase(char)} to each character
1116 * produces the same result
1117 * </ul>
1118 *
1119 * @param anotherString
1120 * The {@code String} to compare this {@code String} against
1121 *
1122 * @return {@code true} if the argument is not {@code null} and it
1123 * represents an equivalent {@code String} ignoring case; {@code
1124 * false} otherwise
1125 *
1126 * @see #equals(Object)
1127 */
1128 public boolean equalsIgnoreCase(String anotherString) {
1129 return (this == anotherString) ? true :
1130 (anotherString != null) && (anotherString.count == count) &&
1131 regionMatches(true, 0, anotherString, 0, count);
1132 }
1133
1134 /**
1135 * Compares two strings lexicographically.
1136 * The comparison is based on the Unicode value of each character in
1137 * the strings. The character sequence represented by this
1138 * {@code String} object is compared lexicographically to the
1139 * character sequence represented by the argument string. The result is
1140 * a negative integer if this {@code String} object
1141 * lexicographically precedes the argument string. The result is a
1142 * positive integer if this {@code String} object lexicographically
1143 * follows the argument string. The result is zero if the strings
1144 * are equal; {@code compareTo} returns {@code 0} exactly when
1145 * the {@link #equals(Object)} method would return {@code true}.
1146 * <p>
1147 * This is the definition of lexicographic ordering. If two strings are
1148 * different, then either they have different characters at some index
1149 * that is a valid index for both strings, or their lengths are different,
1150 * or both. If they have different characters at one or more index
1151 * positions, let <i>k</i> be the smallest such index; then the string
1156 * the two string -- that is, the value:
1157 * <blockquote><pre>
1158 * this.charAt(k)-anotherString.charAt(k)
1159 * </pre></blockquote>
1160 * If there is no index position at which they differ, then the shorter
1161 * string lexicographically precedes the longer string. In this case,
1162 * {@code compareTo} returns the difference of the lengths of the
1163 * strings -- that is, the value:
1164 * <blockquote><pre>
1165 * this.length()-anotherString.length()
1166 * </pre></blockquote>
1167 *
1168 * @param anotherString the {@code String} to be compared.
1169 * @return the value {@code 0} if the argument string is equal to
1170 * this string; a value less than {@code 0} if this string
1171 * is lexicographically less than the string argument; and a
1172 * value greater than {@code 0} if this string is
1173 * lexicographically greater than the string argument.
1174 */
1175 public int compareTo(String anotherString) {
1176 int len1 = count;
1177 int len2 = anotherString.count;
1178 int n = Math.min(len1, len2);
1179 char v1[] = value;
1180 char v2[] = anotherString.value;
1181 int i = offset;
1182 int j = anotherString.offset;
1183
1184 if (i == j) {
1185 int k = i;
1186 int lim = n + i;
1187 while (k < lim) {
1188 char c1 = v1[k];
1189 char c2 = v2[k];
1190 if (c1 != c2) {
1191 return c1 - c2;
1192 }
1193 k++;
1194 }
1195 } else {
1196 while (n-- != 0) {
1197 char c1 = v1[i++];
1198 char c2 = v2[j++];
1199 if (c1 != c2) {
1200 return c1 - c2;
1201 }
1202 }
1203 }
1204 return len1 - len2;
1205 }
1206
1207 /**
1208 * A Comparator that orders {@code String} objects as by
1209 * {@code compareToIgnoreCase}. This comparator is serializable.
1210 * <p>
1211 * Note that this Comparator does <em>not</em> take locale into account,
1212 * and will result in an unsatisfactory ordering for certain locales.
1213 * The java.text package provides <em>Collators</em> to allow
1214 * locale-sensitive ordering.
1215 *
1216 * @see java.text.Collator#compare(String, String)
1217 * @since 1.2
1218 */
1219 public static final Comparator<String> CASE_INSENSITIVE_ORDER
1220 = new CaseInsensitiveComparator();
1221 private static class CaseInsensitiveComparator
1222 implements Comparator<String>, java.io.Serializable {
1223 // use serialVersionUID from JDK 1.2.2 for interoperability
1291 * <tt>String</tt> object.
1292 * <li><tt>ooffset+len</tt> is greater than the length of the other
1293 * argument.
1294 * <li>There is some nonnegative integer <i>k</i> less than <tt>len</tt>
1295 * such that:
1296 * <tt>this.charAt(toffset+<i>k</i>) != other.charAt(ooffset+<i>k</i>)</tt>
1297 * </ul>
1298 *
1299 * @param toffset the starting offset of the subregion in this string.
1300 * @param other the string argument.
1301 * @param ooffset the starting offset of the subregion in the string
1302 * argument.
1303 * @param len the number of characters to compare.
1304 * @return {@code true} if the specified subregion of this string
1305 * exactly matches the specified subregion of the string argument;
1306 * {@code false} otherwise.
1307 */
1308 public boolean regionMatches(int toffset, String other, int ooffset,
1309 int len) {
1310 char ta[] = value;
1311 int to = offset + toffset;
1312 char pa[] = other.value;
1313 int po = other.offset + ooffset;
1314 // Note: toffset, ooffset, or len might be near -1>>>1.
1315 if ((ooffset < 0) || (toffset < 0) || (toffset > (long)count - len)
1316 || (ooffset > (long)other.count - len)) {
1317 return false;
1318 }
1319 while (len-- > 0) {
1320 if (ta[to++] != pa[po++]) {
1321 return false;
1322 }
1323 }
1324 return true;
1325 }
1326
1327 /**
1328 * Tests if two string regions are equal.
1329 * <p>
1330 * A substring of this <tt>String</tt> object is compared to a substring
1331 * of the argument <tt>other</tt>. The result is <tt>true</tt> if these
1332 * substrings represent character sequences that are the same, ignoring
1333 * case if and only if <tt>ignoreCase</tt> is true. The substring of
1334 * this <tt>String</tt> object to be compared begins at index
1335 * <tt>toffset</tt> and has length <tt>len</tt>. The substring of
1336 * <tt>other</tt> to be compared begins at index <tt>ooffset</tt> and
1360 * </pre></blockquote>
1361 * </ul>
1362 *
1363 * @param ignoreCase if {@code true}, ignore case when comparing
1364 * characters.
1365 * @param toffset the starting offset of the subregion in this
1366 * string.
1367 * @param other the string argument.
1368 * @param ooffset the starting offset of the subregion in the string
1369 * argument.
1370 * @param len the number of characters to compare.
1371 * @return {@code true} if the specified subregion of this string
1372 * matches the specified subregion of the string argument;
1373 * {@code false} otherwise. Whether the matching is exact
1374 * or case insensitive depends on the {@code ignoreCase}
1375 * argument.
1376 */
1377 public boolean regionMatches(boolean ignoreCase, int toffset,
1378 String other, int ooffset, int len) {
1379 char ta[] = value;
1380 int to = offset + toffset;
1381 char pa[] = other.value;
1382 int po = other.offset + ooffset;
1383 // Note: toffset, ooffset, or len might be near -1>>>1.
1384 if ((ooffset < 0) || (toffset < 0) || (toffset > (long)count - len) ||
1385 (ooffset > (long)other.count - len)) {
1386 return false;
1387 }
1388 while (len-- > 0) {
1389 char c1 = ta[to++];
1390 char c2 = pa[po++];
1391 if (c1 == c2) {
1392 continue;
1393 }
1394 if (ignoreCase) {
1395 // If characters don't match but case may be ignored,
1396 // try converting both characters to uppercase.
1397 // If the results match, then the comparison scan should
1398 // continue.
1399 char u1 = Character.toUpperCase(c1);
1400 char u2 = Character.toUpperCase(c2);
1401 if (u1 == u2) {
1402 continue;
1403 }
1404 // Unfortunately, conversion to uppercase does not work properly
1405 // for the Georgian alphabet, which has strange rules about case
1416
1417 /**
1418 * Tests if the substring of this string beginning at the
1419 * specified index starts with the specified prefix.
1420 *
1421 * @param prefix the prefix.
1422 * @param toffset where to begin looking in this string.
1423 * @return {@code true} if the character sequence represented by the
1424 * argument is a prefix of the substring of this object starting
1425 * at index {@code toffset}; {@code false} otherwise.
1426 * The result is {@code false} if {@code toffset} is
1427 * negative or greater than the length of this
1428 * {@code String} object; otherwise the result is the same
1429 * as the result of the expression
1430 * <pre>
1431 * this.substring(toffset).startsWith(prefix)
1432 * </pre>
1433 */
1434 public boolean startsWith(String prefix, int toffset) {
1435 char ta[] = value;
1436 int to = offset + toffset;
1437 char pa[] = prefix.value;
1438 int po = prefix.offset;
1439 int pc = prefix.count;
1440 // Note: toffset might be near -1>>>1.
1441 if ((toffset < 0) || (toffset > count - pc)) {
1442 return false;
1443 }
1444 while (--pc >= 0) {
1445 if (ta[to++] != pa[po++]) {
1446 return false;
1447 }
1448 }
1449 return true;
1450 }
1451
1452 /**
1453 * Tests if this string starts with the specified prefix.
1454 *
1455 * @param prefix the prefix.
1456 * @return {@code true} if the character sequence represented by the
1457 * argument is a prefix of the character sequence represented by
1458 * this string; {@code false} otherwise.
1459 * Note also that {@code true} will be returned if the
1460 * argument is an empty string or is equal to this
1461 * {@code String} object as determined by the
1462 * {@link #equals(Object)} method.
1463 * @since 1. 0
1464 */
1465 public boolean startsWith(String prefix) {
1466 return startsWith(prefix, 0);
1467 }
1468
1469 /**
1470 * Tests if this string ends with the specified suffix.
1471 *
1472 * @param suffix the suffix.
1473 * @return {@code true} if the character sequence represented by the
1474 * argument is a suffix of the character sequence represented by
1475 * this object; {@code false} otherwise. Note that the
1476 * result will be {@code true} if the argument is the
1477 * empty string or is equal to this {@code String} object
1478 * as determined by the {@link #equals(Object)} method.
1479 */
1480 public boolean endsWith(String suffix) {
1481 return startsWith(suffix, count - suffix.count);
1482 }
1483
1484 /**
1485 * Returns a hash code for this string. The hash code for a
1486 * {@code String} object is computed as
1487 * <blockquote><pre>
1488 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
1489 * </pre></blockquote>
1490 * using {@code int} arithmetic, where {@code s[i]} is the
1491 * <i>i</i>th character of the string, {@code n} is the length of
1492 * the string, and {@code ^} indicates exponentiation.
1493 * (The hash value of the empty string is zero.)
1494 *
1495 * @return a hash code value for this object.
1496 */
1497 public int hashCode() {
1498 int h = hash;
1499 if (h == 0 && count > 0) {
1500 int off = offset;
1501 char val[] = value;
1502 int len = count;
1503
1504 for (int i = 0; i < len; i++) {
1505 h = 31*h + val[off++];
1506 }
1507 hash = h;
1508 }
1509 return h;
1510 }
1511
1512 /**
1513 * Returns the index within this string of the first occurrence of
1514 * the specified character. If a character with value
1515 * {@code ch} occurs in the character sequence represented by
1516 * this {@code String} object, then the index (in Unicode
1517 * code units) of the first such occurrence is returned. For
1518 * values of {@code ch} in the range from 0 to 0xFFFF
1519 * (inclusive), this is the smallest value <i>k</i> such that:
1520 * <blockquote><pre>
1521 * this.charAt(<i>k</i>) == ch
1522 * </pre></blockquote>
1523 * is true. For other values of {@code ch}, it is the
1524 * smallest value <i>k</i> such that:
1525 * <blockquote><pre>
1560 * {@code -1} is returned.
1561 *
1562 * <p>
1563 * There is no restriction on the value of {@code fromIndex}. If it
1564 * is negative, it has the same effect as if it were zero: this entire
1565 * string may be searched. If it is greater than the length of this
1566 * string, it has the same effect as if it were equal to the length of
1567 * this string: {@code -1} is returned.
1568 *
1569 * <p>All indices are specified in {@code char} values
1570 * (Unicode code units).
1571 *
1572 * @param ch a character (Unicode code point).
1573 * @param fromIndex the index to start the search from.
1574 * @return the index of the first occurrence of the character in the
1575 * character sequence represented by this object that is greater
1576 * than or equal to {@code fromIndex}, or {@code -1}
1577 * if the character does not occur.
1578 */
1579 public int indexOf(int ch, int fromIndex) {
1580 if (fromIndex < 0) {
1581 fromIndex = 0;
1582 } else if (fromIndex >= count) {
1583 // Note: fromIndex might be near -1>>>1.
1584 return -1;
1585 }
1586
1587 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
1588 // handle most cases here (ch is a BMP code point or a
1589 // negative value (invalid code point))
1590 final char[] value = this.value;
1591 final int offset = this.offset;
1592 final int max = offset + count;
1593 for (int i = offset + fromIndex; i < max ; i++) {
1594 if (value[i] == ch) {
1595 return i - offset;
1596 }
1597 }
1598 return -1;
1599 } else {
1600 return indexOfSupplementary(ch, fromIndex);
1601 }
1602 }
1603
1604 /**
1605 * Handles (rare) calls of indexOf with a supplementary character.
1606 */
1607 private int indexOfSupplementary(int ch, int fromIndex) {
1608 if (Character.isValidCodePoint(ch)) {
1609 final char[] value = this.value;
1610 final int offset = this.offset;
1611 final char hi = Character.highSurrogate(ch);
1612 final char lo = Character.lowSurrogate(ch);
1613 final int max = offset + count - 1;
1614 for (int i = offset + fromIndex; i < max; i++) {
1615 if (value[i] == hi && value[i+1] == lo) {
1616 return i - offset;
1617 }
1618 }
1619 }
1620 return -1;
1621 }
1622
1623 /**
1624 * Returns the index within this string of the last occurrence of
1625 * the specified character. For values of {@code ch} in the
1626 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
1627 * units) returned is the largest value <i>k</i> such that:
1628 * <blockquote><pre>
1629 * this.charAt(<i>k</i>) == ch
1630 * </pre></blockquote>
1631 * is true. For other values of {@code ch}, it is the
1632 * largest value <i>k</i> such that:
1633 * <blockquote><pre>
1634 * this.codePointAt(<i>k</i>) == ch
1635 * </pre></blockquote>
1636 * is true. In either case, if no such character occurs in this
1637 * string, then {@code -1} is returned. The
1638 * {@code String} is searched backwards starting at the last
1639 * character.
1640 *
1641 * @param ch a character (Unicode code point).
1642 * @return the index of the last occurrence of the character in the
1643 * character sequence represented by this object, or
1644 * {@code -1} if the character does not occur.
1645 */
1646 public int lastIndexOf(int ch) {
1647 return lastIndexOf(ch, count - 1);
1648 }
1649
1650 /**
1651 * Returns the index within this string of the last occurrence of
1652 * the specified character, searching backward starting at the
1653 * specified index. For values of {@code ch} in the range
1654 * from 0 to 0xFFFF (inclusive), the index returned is the largest
1655 * value <i>k</i> such that:
1656 * <blockquote><pre>
1657 * (this.charAt(<i>k</i>) == ch) && (<i>k</i> <= fromIndex)
1658 * </pre></blockquote>
1659 * is true. For other values of {@code ch}, it is the
1660 * largest value <i>k</i> such that:
1661 * <blockquote><pre>
1662 * (this.codePointAt(<i>k</i>) == ch) && (<i>k</i> <= fromIndex)
1663 * </pre></blockquote>
1664 * is true. In either case, if no such character occurs in this
1665 * string at or before position {@code fromIndex}, then
1666 * {@code -1} is returned.
1667 *
1669 * (Unicode code units).
1670 *
1671 * @param ch a character (Unicode code point).
1672 * @param fromIndex the index to start the search from. There is no
1673 * restriction on the value of {@code fromIndex}. If it is
1674 * greater than or equal to the length of this string, it has
1675 * the same effect as if it were equal to one less than the
1676 * length of this string: this entire string may be searched.
1677 * If it is negative, it has the same effect as if it were -1:
1678 * -1 is returned.
1679 * @return the index of the last occurrence of the character in the
1680 * character sequence represented by this object that is less
1681 * than or equal to {@code fromIndex}, or {@code -1}
1682 * if the character does not occur before that point.
1683 */
1684 public int lastIndexOf(int ch, int fromIndex) {
1685 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
1686 // handle most cases here (ch is a BMP code point or a
1687 // negative value (invalid code point))
1688 final char[] value = this.value;
1689 final int offset = this.offset;
1690 int i = offset + Math.min(fromIndex, count - 1);
1691 for (; i >= offset ; i--) {
1692 if (value[i] == ch) {
1693 return i - offset;
1694 }
1695 }
1696 return -1;
1697 } else {
1698 return lastIndexOfSupplementary(ch, fromIndex);
1699 }
1700 }
1701
1702 /**
1703 * Handles (rare) calls of lastIndexOf with a supplementary character.
1704 */
1705 private int lastIndexOfSupplementary(int ch, int fromIndex) {
1706 if (Character.isValidCodePoint(ch)) {
1707 final char[] value = this.value;
1708 final int offset = this.offset;
1709 char hi = Character.highSurrogate(ch);
1710 char lo = Character.lowSurrogate(ch);
1711 int i = offset + Math.min(fromIndex, count - 2);
1712 for (; i >= offset; i--) {
1713 if (value[i] == hi && value[i+1] == lo) {
1714 return i - offset;
1715 }
1716 }
1717 }
1718 return -1;
1719 }
1720
1721 /**
1722 * Returns the index within this string of the first occurrence of the
1723 * specified substring.
1724 *
1725 * <p>The returned index is the smallest value <i>k</i> for which:
1726 * <blockquote><pre>
1727 * this.startsWith(str, <i>k</i>)
1728 * </pre></blockquote>
1729 * If no such value of <i>k</i> exists, then {@code -1} is returned.
1730 *
1731 * @param str the substring to search for.
1732 * @return the index of the first occurrence of the specified substring,
1733 * or {@code -1} if there is no such occurrence.
1734 */
1736 return indexOf(str, 0);
1737 }
1738
1739 /**
1740 * Returns the index within this string of the first occurrence of the
1741 * specified substring, starting at the specified index.
1742 *
1743 * <p>The returned index is the smallest value <i>k</i> for which:
1744 * <blockquote><pre>
1745 * <i>k</i> >= fromIndex && this.startsWith(str, <i>k</i>)
1746 * </pre></blockquote>
1747 * If no such value of <i>k</i> exists, then {@code -1} is returned.
1748 *
1749 * @param str the substring to search for.
1750 * @param fromIndex the index from which to start the search.
1751 * @return the index of the first occurrence of the specified substring,
1752 * starting at the specified index,
1753 * or {@code -1} if there is no such occurrence.
1754 */
1755 public int indexOf(String str, int fromIndex) {
1756 return indexOf(value, offset, count,
1757 str.value, str.offset, str.count, fromIndex);
1758 }
1759
1760 /**
1761 * Code shared by String and StringBuffer to do searches. The
1762 * source is the character array being searched, and the target
1763 * is the string being searched for.
1764 *
1765 * @param source the characters being searched.
1766 * @param sourceOffset offset of the source string.
1767 * @param sourceCount count of the source string.
1768 * @param target the characters being searched for.
1769 * @param targetOffset offset of the target string.
1770 * @param targetCount count of the target string.
1771 * @param fromIndex the index to begin searching from.
1772 */
1773 static int indexOf(char[] source, int sourceOffset, int sourceCount,
1774 char[] target, int targetOffset, int targetCount,
1775 int fromIndex) {
1776 if (fromIndex >= sourceCount) {
1777 return (targetCount == 0 ? sourceCount : -1);
1779 if (fromIndex < 0) {
1780 fromIndex = 0;
1781 }
1782 if (targetCount == 0) {
1783 return fromIndex;
1784 }
1785
1786 char first = target[targetOffset];
1787 int max = sourceOffset + (sourceCount - targetCount);
1788
1789 for (int i = sourceOffset + fromIndex; i <= max; i++) {
1790 /* Look for first character. */
1791 if (source[i] != first) {
1792 while (++i <= max && source[i] != first);
1793 }
1794
1795 /* Found first character, now look at the rest of v2 */
1796 if (i <= max) {
1797 int j = i + 1;
1798 int end = j + targetCount - 1;
1799 for (int k = targetOffset + 1; j < end && source[j] ==
1800 target[k]; j++, k++);
1801
1802 if (j == end) {
1803 /* Found whole string. */
1804 return i - sourceOffset;
1805 }
1806 }
1807 }
1808 return -1;
1809 }
1810
1811 /**
1812 * Returns the index within this string of the last occurrence of the
1813 * specified substring. The last occurrence of the empty string ""
1814 * is considered to occur at the index value {@code this.length()}.
1815 *
1816 * <p>The returned index is the largest value <i>k</i> for which:
1817 * <blockquote><pre>
1818 * this.startsWith(str, <i>k</i>)
1819 * </pre></blockquote>
1820 * If no such value of <i>k</i> exists, then {@code -1} is returned.
1821 *
1822 * @param str the substring to search for.
1823 * @return the index of the last occurrence of the specified substring,
1824 * or {@code -1} if there is no such occurrence.
1825 */
1826 public int lastIndexOf(String str) {
1827 return lastIndexOf(str, count);
1828 }
1829
1830 /**
1831 * Returns the index within this string of the last occurrence of the
1832 * specified substring, searching backward starting at the specified index.
1833 *
1834 * <p>The returned index is the largest value <i>k</i> for which:
1835 * <blockquote><pre>
1836 * <i>k</i> <= fromIndex && this.startsWith(str, <i>k</i>)
1837 * </pre></blockquote>
1838 * If no such value of <i>k</i> exists, then {@code -1} is returned.
1839 *
1840 * @param str the substring to search for.
1841 * @param fromIndex the index to start the search from.
1842 * @return the index of the last occurrence of the specified substring,
1843 * searching backward from the specified index,
1844 * or {@code -1} if there is no such occurrence.
1845 */
1846 public int lastIndexOf(String str, int fromIndex) {
1847 return lastIndexOf(value, offset, count,
1848 str.value, str.offset, str.count, fromIndex);
1849 }
1850
1851 /**
1852 * Code shared by String and StringBuffer to do searches. The
1853 * source is the character array being searched, and the target
1854 * is the string being searched for.
1855 *
1856 * @param source the characters being searched.
1857 * @param sourceOffset offset of the source string.
1858 * @param sourceCount count of the source string.
1859 * @param target the characters being searched for.
1860 * @param targetOffset offset of the target string.
1861 * @param targetCount count of the target string.
1862 * @param fromIndex the index to begin searching from.
1863 */
1864 static int lastIndexOf(char[] source, int sourceOffset, int sourceCount,
1865 char[] target, int targetOffset, int targetCount,
1866 int fromIndex) {
1867 /*
1868 * Check arguments; return immediately where possible. For
1908 }
1909
1910 /**
1911 * Returns a new string that is a substring of this string. The
1912 * substring begins with the character at the specified index and
1913 * extends to the end of this string. <p>
1914 * Examples:
1915 * <blockquote><pre>
1916 * "unhappy".substring(2) returns "happy"
1917 * "Harbison".substring(3) returns "bison"
1918 * "emptiness".substring(9) returns "" (an empty string)
1919 * </pre></blockquote>
1920 *
1921 * @param beginIndex the beginning index, inclusive.
1922 * @return the specified substring.
1923 * @exception IndexOutOfBoundsException if
1924 * {@code beginIndex} is negative or larger than the
1925 * length of this {@code String} object.
1926 */
1927 public String substring(int beginIndex) {
1928 return substring(beginIndex, count);
1929 }
1930
1931 /**
1932 * Returns a new string that is a substring of this string. The
1933 * substring begins at the specified {@code beginIndex} and
1934 * extends to the character at index {@code endIndex - 1}.
1935 * Thus the length of the substring is {@code endIndex-beginIndex}.
1936 * <p>
1937 * Examples:
1938 * <blockquote><pre>
1939 * "hamburger".substring(4, 8) returns "urge"
1940 * "smiles".substring(1, 5) returns "mile"
1941 * </pre></blockquote>
1942 *
1943 * @param beginIndex the beginning index, inclusive.
1944 * @param endIndex the ending index, exclusive.
1945 * @return the specified substring.
1946 * @exception IndexOutOfBoundsException if the
1947 * {@code beginIndex} is negative, or
1948 * {@code endIndex} is larger than the length of
1949 * this {@code String} object, or
1950 * {@code beginIndex} is larger than
1951 * {@code endIndex}.
1952 */
1953 public String substring(int beginIndex, int endIndex) {
1954 if (beginIndex < 0) {
1955 throw new StringIndexOutOfBoundsException(beginIndex);
1956 }
1957 if (endIndex > count) {
1958 throw new StringIndexOutOfBoundsException(endIndex);
1959 }
1960 if (beginIndex > endIndex) {
1961 throw new StringIndexOutOfBoundsException(endIndex - beginIndex);
1962 }
1963 return ((beginIndex == 0) && (endIndex == count)) ? this :
1964 new String(offset + beginIndex, endIndex - beginIndex, value);
1965 }
1966
1967 /**
1968 * Returns a new character sequence that is a subsequence of this sequence.
1969 *
1970 * <p> An invocation of this method of the form
1971 *
1972 * <blockquote><pre>
1973 * str.subSequence(begin, end)</pre></blockquote>
1974 *
1975 * behaves in exactly the same way as the invocation
1976 *
1977 * <blockquote><pre>
1978 * str.substring(begin, end)</pre></blockquote>
1979 *
1980 * This method is defined so that the <tt>String</tt> class can implement
1981 * the {@link CharSequence} interface. </p>
1982 *
1983 * @param beginIndex the begin index, inclusive.
1984 * @param endIndex the end index, exclusive.
2004 * {@code String} object is created, representing a character
2005 * sequence that is the concatenation of the character sequence
2006 * represented by this {@code String} object and the character
2007 * sequence represented by the argument string.<p>
2008 * Examples:
2009 * <blockquote><pre>
2010 * "cares".concat("s") returns "caress"
2011 * "to".concat("get").concat("her") returns "together"
2012 * </pre></blockquote>
2013 *
2014 * @param str the {@code String} that is concatenated to the end
2015 * of this {@code String}.
2016 * @return a string that represents the concatenation of this object's
2017 * characters followed by the string argument's characters.
2018 */
2019 public String concat(String str) {
2020 int otherLen = str.length();
2021 if (otherLen == 0) {
2022 return this;
2023 }
2024 char buf[] = new char[count + otherLen];
2025 getChars(0, count, buf, 0);
2026 str.getChars(0, otherLen, buf, count);
2027 return new String(0, count + otherLen, buf);
2028 }
2029
2030 /**
2031 * Returns a new string resulting from replacing all occurrences of
2032 * {@code oldChar} in this string with {@code newChar}.
2033 * <p>
2034 * If the character {@code oldChar} does not occur in the
2035 * character sequence represented by this {@code String} object,
2036 * then a reference to this {@code String} object is returned.
2037 * Otherwise, a new {@code String} object is created that
2038 * represents a character sequence identical to the character sequence
2039 * represented by this {@code String} object, except that every
2040 * occurrence of {@code oldChar} is replaced by an occurrence
2041 * of {@code newChar}.
2042 * <p>
2043 * Examples:
2044 * <blockquote><pre>
2045 * "mesquite in your cellar".replace('e', 'o')
2046 * returns "mosquito in your collar"
2047 * "the war of baronets".replace('r', 'y')
2048 * returns "the way of bayonets"
2049 * "sparring with a purple porpoise".replace('p', 't')
2050 * returns "starring with a turtle tortoise"
2051 * "JonL".replace('q', 'x') returns "JonL" (no change)
2052 * </pre></blockquote>
2053 *
2054 * @param oldChar the old character.
2055 * @param newChar the new character.
2056 * @return a string derived from this string by replacing every
2057 * occurrence of {@code oldChar} with {@code newChar}.
2058 */
2059 public String replace(char oldChar, char newChar) {
2060 if (oldChar != newChar) {
2061 int len = count;
2062 int i = -1;
2063 char[] val = value; /* avoid getfield opcode */
2064 int off = offset; /* avoid getfield opcode */
2065
2066 while (++i < len) {
2067 if (val[off + i] == oldChar) {
2068 break;
2069 }
2070 }
2071 if (i < len) {
2072 char buf[] = new char[len];
2073 for (int j = 0 ; j < i ; j++) {
2074 buf[j] = val[off+j];
2075 }
2076 while (i < len) {
2077 char c = val[off + i];
2078 buf[i] = (c == oldChar) ? newChar : c;
2079 i++;
2080 }
2081 return new String(0, len, buf);
2082 }
2083 }
2084 return this;
2085 }
2086
2087 /**
2088 * Tells whether or not this string matches the given <a
2089 * href="../util/regex/Pattern.html#sum">regular expression</a>.
2090 *
2091 * <p> An invocation of this method of the form
2092 * <i>str</i><tt>.matches(</tt><i>regex</i><tt>)</tt> yields exactly the
2093 * same result as the expression
2094 *
2095 * <blockquote><tt> {@link java.util.regex.Pattern}.{@link
2096 * java.util.regex.Pattern#matches(String,CharSequence)
2097 * matches}(</tt><i>regex</i><tt>,</tt> <i>str</i><tt>)</tt></blockquote>
2098 *
2099 * @param regex
2100 * the regular expression to which this string is to be matched
2101 *
2303 *
2304 * @return the array of strings computed by splitting this string
2305 * around matches of the given regular expression
2306 *
2307 * @throws PatternSyntaxException
2308 * if the regular expression's syntax is invalid
2309 *
2310 * @see java.util.regex.Pattern
2311 *
2312 * @since 1.4
2313 * @spec JSR-51
2314 */
2315 public String[] split(String regex, int limit) {
2316 /* fastpath if the regex is a
2317 (1)one-char String and this character is not one of the
2318 RegEx's meta characters ".$|()[{^?*+\\", or
2319 (2)two-char String and the first char is the backslash and
2320 the second is not the ascii digit or ascii letter.
2321 */
2322 char ch = 0;
2323 if (((regex.count == 1 &&
2324 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
2325 (regex.length() == 2 &&
2326 regex.charAt(0) == '\\' &&
2327 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
2328 ((ch-'a')|('z'-ch)) < 0 &&
2329 ((ch-'A')|('Z'-ch)) < 0)) &&
2330 (ch < Character.MIN_HIGH_SURROGATE ||
2331 ch > Character.MAX_LOW_SURROGATE))
2332 {
2333 int off = 0;
2334 int next = 0;
2335 boolean limited = limit > 0;
2336 ArrayList<String> list = new ArrayList<>();
2337 while ((next = indexOf(ch, off)) != -1) {
2338 if (!limited || list.size() < limit - 1) {
2339 list.add(substring(off, next));
2340 off = next + 1;
2341 } else { // last one
2342 //assert (list.size() == limit - 1);
2343 list.add(substring(off, count));
2344 off = count;
2345 break;
2346 }
2347 }
2348 // If no match was found, return this
2349 if (off == 0)
2350 return new String[] { this };
2351
2352 // Add remaining segment
2353 if (!limited || list.size() < limit)
2354 list.add(substring(off, count));
2355
2356 // Construct result
2357 int resultSize = list.size();
2358 if (limit == 0)
2359 while (resultSize > 0 && list.get(resultSize-1).length() == 0)
2360 resultSize--;
2361 String[] result = new String[resultSize];
2362 return list.subList(0, resultSize).toArray(result);
2363 }
2364 return Pattern.compile(regex).split(this, limit);
2365 }
2366
2367 /**
2368 * Splits this string around matches of the given <a
2369 * href="../util/regex/Pattern.html#sum">regular expression</a>.
2370 *
2371 * <p> This method works as if by invoking the two-argument {@link
2372 * #split(String, int) split} method with the given expression and a limit
2373 * argument of zero. Trailing empty strings are therefore not included in
2374 * the resulting array.
2375 *
2376 * <p> The string <tt>"boo:and:foo"</tt>, for example, yields the following
2377 * results with these expressions:
2378 *
2379 * <blockquote><table cellpadding=1 cellspacing=0 summary="Split examples showing regex and result">
2447 * <td><img src="doc-files/iota.gif" alt="iota"><img src="doc-files/chi.gif" alt="chi">
2448 * <img src="doc-files/theta.gif" alt="theta"><img src="doc-files/upsilon.gif" alt="upsilon">
2449 * <img src="doc-files/sigma1.gif" alt="sigma"></td>
2450 * <td>lowercased all chars in String</td>
2451 * </tr>
2452 * </table>
2453 *
2454 * @param locale use the case transformation rules for this locale
2455 * @return the {@code String}, converted to lowercase.
2456 * @see java.lang.String#toLowerCase()
2457 * @see java.lang.String#toUpperCase()
2458 * @see java.lang.String#toUpperCase(Locale)
2459 * @since 1.1
2460 */
2461 public String toLowerCase(Locale locale) {
2462 if (locale == null) {
2463 throw new NullPointerException();
2464 }
2465
2466 int firstUpper;
2467
2468 /* Now check if there are any characters that need to be changed. */
2469 scan: {
2470 for (firstUpper = 0 ; firstUpper < count; ) {
2471 char c = value[offset+firstUpper];
2472 if ((c >= Character.MIN_HIGH_SURROGATE) &&
2473 (c <= Character.MAX_HIGH_SURROGATE)) {
2474 int supplChar = codePointAt(firstUpper);
2475 if (supplChar != Character.toLowerCase(supplChar)) {
2476 break scan;
2477 }
2478 firstUpper += Character.charCount(supplChar);
2479 } else {
2480 if (c != Character.toLowerCase(c)) {
2481 break scan;
2482 }
2483 firstUpper++;
2484 }
2485 }
2486 return this;
2487 }
2488
2489 char[] result = new char[count];
2490 int resultOffset = 0; /* result may grow, so i+resultOffset
2491 * is the write location in result */
2492
2493 /* Just copy the first few lowerCase characters. */
2494 System.arraycopy(value, offset, result, 0, firstUpper);
2495
2496 String lang = locale.getLanguage();
2497 boolean localeDependent =
2498 (lang == "tr" || lang == "az" || lang == "lt");
2499 char[] lowerCharArray;
2500 int lowerChar;
2501 int srcChar;
2502 int srcCount;
2503 for (int i = firstUpper; i < count; i += srcCount) {
2504 srcChar = (int)value[offset+i];
2505 if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
2506 (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
2507 srcChar = codePointAt(i);
2508 srcCount = Character.charCount(srcChar);
2509 } else {
2510 srcCount = 1;
2511 }
2512 if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
2513 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
2514 } else if (srcChar == '\u0130') { // LATIN CAPITAL LETTER I DOT
2515 lowerChar = Character.ERROR;
2516 } else {
2517 lowerChar = Character.toLowerCase(srcChar);
2518 }
2519 if ((lowerChar == Character.ERROR) ||
2520 (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
2521 if (lowerChar == Character.ERROR) {
2522 if (!localeDependent && srcChar == '\u0130') {
2523 lowerCharArray =
2524 ConditionalSpecialCasing.toLowerCaseCharArray(this, i, Locale.ENGLISH);
2525 } else {
2526 lowerCharArray =
2527 ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
2528 }
2529 } else if (srcCount == 2) {
2530 resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
2531 continue;
2532 } else {
2533 lowerCharArray = Character.toChars(lowerChar);
2534 }
2535
2536 /* Grow result if needed */
2537 int mapLen = lowerCharArray.length;
2538 if (mapLen > srcCount) {
2539 char[] result2 = new char[result.length + mapLen - srcCount];
2540 System.arraycopy(result, 0, result2, 0,
2541 i + resultOffset);
2542 result = result2;
2543 }
2544 for (int x=0; x<mapLen; ++x) {
2545 result[i+resultOffset+x] = lowerCharArray[x];
2546 }
2547 resultOffset += (mapLen - srcCount);
2548 } else {
2549 result[i+resultOffset] = (char)lowerChar;
2550 }
2551 }
2552 return new String(0, count+resultOffset, result);
2553 }
2554
2555 /**
2556 * Converts all of the characters in this {@code String} to lower
2557 * case using the rules of the default locale. This is equivalent to calling
2558 * {@code toLowerCase(Locale.getDefault())}.
2559 * <p>
2560 * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2561 * results if used for strings that are intended to be interpreted locale
2562 * independently.
2563 * Examples are programming language identifiers, protocol keys, and HTML
2564 * tags.
2565 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
2566 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
2567 * LATIN SMALL LETTER DOTLESS I character.
2568 * To obtain correct results for locale insensitive strings, use
2569 * {@code toLowerCase(Locale.ENGLISH)}.
2570 * <p>
2571 * @return the {@code String}, converted to lowercase.
2572 * @see java.lang.String#toLowerCase(Locale)
2612 * <tr>
2613 * <td>(all)</td>
2614 * <td>Fahrvergnügen</td>
2615 * <td>FAHRVERGNÜGEN</td>
2616 * <td></td>
2617 * </tr>
2618 * </table>
2619 * @param locale use the case transformation rules for this locale
2620 * @return the {@code String}, converted to uppercase.
2621 * @see java.lang.String#toUpperCase()
2622 * @see java.lang.String#toLowerCase()
2623 * @see java.lang.String#toLowerCase(Locale)
2624 * @since 1.1
2625 */
2626 public String toUpperCase(Locale locale) {
2627 if (locale == null) {
2628 throw new NullPointerException();
2629 }
2630
2631 int firstLower;
2632
2633 /* Now check if there are any characters that need to be changed. */
2634 scan: {
2635 for (firstLower = 0 ; firstLower < count; ) {
2636 int c = (int)value[offset+firstLower];
2637 int srcCount;
2638 if ((c >= Character.MIN_HIGH_SURROGATE) &&
2639 (c <= Character.MAX_HIGH_SURROGATE)) {
2640 c = codePointAt(firstLower);
2641 srcCount = Character.charCount(c);
2642 } else {
2643 srcCount = 1;
2644 }
2645 int upperCaseChar = Character.toUpperCaseEx(c);
2646 if ((upperCaseChar == Character.ERROR) ||
2647 (c != upperCaseChar)) {
2648 break scan;
2649 }
2650 firstLower += srcCount;
2651 }
2652 return this;
2653 }
2654
2655 char[] result = new char[count]; /* may grow */
2656 int resultOffset = 0; /* result may grow, so i+resultOffset
2657 * is the write location in result */
2658
2659 /* Just copy the first few upperCase characters. */
2660 System.arraycopy(value, offset, result, 0, firstLower);
2661
2662 String lang = locale.getLanguage();
2663 boolean localeDependent =
2664 (lang == "tr" || lang == "az" || lang == "lt");
2665 char[] upperCharArray;
2666 int upperChar;
2667 int srcChar;
2668 int srcCount;
2669 for (int i = firstLower; i < count; i += srcCount) {
2670 srcChar = (int)value[offset+i];
2671 if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
2672 (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
2673 srcChar = codePointAt(i);
2674 srcCount = Character.charCount(srcChar);
2675 } else {
2676 srcCount = 1;
2677 }
2678 if (localeDependent) {
2679 upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
2680 } else {
2681 upperChar = Character.toUpperCaseEx(srcChar);
2682 }
2683 if ((upperChar == Character.ERROR) ||
2684 (upperChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
2685 if (upperChar == Character.ERROR) {
2686 if (localeDependent) {
2687 upperCharArray =
2688 ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
2689 } else {
2690 upperCharArray = Character.toUpperCaseCharArray(srcChar);
2691 }
2692 } else if (srcCount == 2) {
2693 resultOffset += Character.toChars(upperChar, result, i + resultOffset) - srcCount;
2694 continue;
2695 } else {
2696 upperCharArray = Character.toChars(upperChar);
2697 }
2698
2699 /* Grow result if needed */
2700 int mapLen = upperCharArray.length;
2701 if (mapLen > srcCount) {
2702 char[] result2 = new char[result.length + mapLen - srcCount];
2703 System.arraycopy(result, 0, result2, 0,
2704 i + resultOffset);
2705 result = result2;
2706 }
2707 for (int x=0; x<mapLen; ++x) {
2708 result[i+resultOffset+x] = upperCharArray[x];
2709 }
2710 resultOffset += (mapLen - srcCount);
2711 } else {
2712 result[i+resultOffset] = (char)upperChar;
2713 }
2714 }
2715 return new String(0, count+resultOffset, result);
2716 }
2717
2718 /**
2719 * Converts all of the characters in this {@code String} to upper
2720 * case using the rules of the default locale. This method is equivalent to
2721 * {@code toUpperCase(Locale.getDefault())}.
2722 * <p>
2723 * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2724 * results if used for strings that are intended to be interpreted locale
2725 * independently.
2726 * Examples are programming language identifiers, protocol keys, and HTML
2727 * tags.
2728 * For instance, {@code "title".toUpperCase()} in a Turkish locale
2729 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
2730 * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
2731 * To obtain correct results for locale insensitive strings, use
2732 * {@code toUpperCase(Locale.ENGLISH)}.
2733 * <p>
2734 * @return the {@code String}, converted to uppercase.
2735 * @see java.lang.String#toUpperCase(Locale)
2753 * {@code String} object representing an empty string is created
2754 * and returned.
2755 * <p>
2756 * Otherwise, let <i>k</i> be the index of the first character in the
2757 * string whose code is greater than {@code '\u005Cu0020'}, and let
2758 * <i>m</i> be the index of the last character in the string whose code
2759 * is greater than {@code '\u005Cu0020'}. A new {@code String}
2760 * object is created, representing the substring of this string that
2761 * begins with the character at index <i>k</i> and ends with the
2762 * character at index <i>m</i>-that is, the result of
2763 * <code>this.substring(<i>k</i>, <i>m</i>+1)</code>.
2764 * <p>
2765 * This method may be used to trim whitespace (as defined above) from
2766 * the beginning and end of a string.
2767 *
2768 * @return A copy of this string with leading and trailing white
2769 * space removed, or this string if it has no leading or
2770 * trailing white space.
2771 */
2772 public String trim() {
2773 int len = count;
2774 int st = 0;
2775 int off = offset; /* avoid getfield opcode */
2776 char[] val = value; /* avoid getfield opcode */
2777
2778 while ((st < len) && (val[off + st] <= ' ')) {
2779 st++;
2780 }
2781 while ((st < len) && (val[off + len - 1] <= ' ')) {
2782 len--;
2783 }
2784 return ((st > 0) || (len < count)) ? substring(st, len) : this;
2785 }
2786
2787 /**
2788 * This object (which is already a string!) is itself returned.
2789 *
2790 * @return the string itself.
2791 */
2792 public String toString() {
2793 return this;
2794 }
2795
2796 /**
2797 * Converts this string to a new character array.
2798 *
2799 * @return a newly allocated character array whose length is the length
2800 * of this string and whose contents are initialized to contain
2801 * the character sequence represented by this string.
2802 */
2803 public char[] toCharArray() {
2804 char result[] = new char[count];
2805 getChars(0, count, result, 0);
2806 return result;
2807 }
2808
2809 /**
2810 * Returns a formatted string using the specified format string and
2811 * arguments.
2812 *
2813 * <p> The locale always used is the one returned by {@link
2814 * java.util.Locale#getDefault() Locale.getDefault()}.
2815 *
2816 * @param format
2817 * A <a href="../util/Formatter.html#syntax">format string</a>
2818 *
2819 * @param args
2820 * Arguments referenced by the format specifiers in the format
2821 * string. If there are more arguments than format specifiers, the
2822 * extra arguments are ignored. The number of arguments is
2823 * variable and may be zero. The maximum number of arguments is
2824 * limited by the maximum dimension of a Java array as defined by
2825 * <cite>The Java™ Virtual Machine Specification</cite>.
2826 * The behaviour on a
2827 * <tt>null</tt> argument depends on the <a
2828 * href="../util/Formatter.html#syntax">conversion</a>.
2829 *
2830 * @throws IllegalFormatException
2831 * If a format string contains an illegal syntax, a format
2832 * specifier that is incompatible with the given arguments,
2833 * insufficient arguments given the format string, or other
2834 * illegal conditions. For specification of all possible
2835 * formatting errors, see the <a
2836 * href="../util/Formatter.html#detail">Details</a> section of the
2837 * formatter class specification.
2838 *
2839 * @throws NullPointerException
2840 * If the <tt>format</tt> is <tt>null</tt>
2841 *
2842 * @return A formatted string
2843 *
2844 * @see java.util.Formatter
2845 * @since 1.5
2846 */
2847 public static String format(String format, Object ... args) {
2848 return new Formatter().format(format, args).toString();
2849 }
2850
2851 /**
2852 * Returns a formatted string using the specified locale, format string,
2853 * and arguments.
2854 *
2855 * @param l
2856 * The {@linkplain java.util.Locale locale} to apply during
2857 * formatting. If <tt>l</tt> is <tt>null</tt> then no localization
2858 * is applied.
2859 *
2860 * @param format
2861 * A <a href="../util/Formatter.html#syntax">format string</a>
2862 *
2863 * @param args
2864 * Arguments referenced by the format specifiers in the format
2865 * string. If there are more arguments than format specifiers, the
2866 * extra arguments are ignored. The number of arguments is
2867 * variable and may be zero. The maximum number of arguments is
2871 * <tt>null</tt> argument depends on the <a
2872 * href="../util/Formatter.html#syntax">conversion</a>.
2873 *
2874 * @throws IllegalFormatException
2875 * If a format string contains an illegal syntax, a format
2876 * specifier that is incompatible with the given arguments,
2877 * insufficient arguments given the format string, or other
2878 * illegal conditions. For specification of all possible
2879 * formatting errors, see the <a
2880 * href="../util/Formatter.html#detail">Details</a> section of the
2881 * formatter class specification
2882 *
2883 * @throws NullPointerException
2884 * If the <tt>format</tt> is <tt>null</tt>
2885 *
2886 * @return A formatted string
2887 *
2888 * @see java.util.Formatter
2889 * @since 1.5
2890 */
2891 public static String format(Locale l, String format, Object ... args) {
2892 return new Formatter(l).format(format, args).toString();
2893 }
2894
2895 /**
2896 * Returns the string representation of the {@code Object} argument.
2897 *
2898 * @param obj an {@code Object}.
2899 * @return if the argument is {@code null}, then a string equal to
2900 * {@code "null"}; otherwise, the value of
2901 * {@code obj.toString()} is returned.
2902 * @see java.lang.Object#toString()
2903 */
2904 public static String valueOf(Object obj) {
2905 return (obj == null) ? "null" : obj.toString();
2906 }
2907
2908 /**
2909 * Returns the string representation of the {@code char} array
2910 * argument. The contents of the character array are copied; subsequent
2911 * modification of the character array does not affect the newly
2976 *
2977 * @param b a {@code boolean}.
2978 * @return if the argument is {@code true}, a string equal to
2979 * {@code "true"} is returned; otherwise, a string equal to
2980 * {@code "false"} is returned.
2981 */
2982 public static String valueOf(boolean b) {
2983 return b ? "true" : "false";
2984 }
2985
2986 /**
2987 * Returns the string representation of the {@code char}
2988 * argument.
2989 *
2990 * @param c a {@code char}.
2991 * @return a string of length {@code 1} containing
2992 * as its single character the argument {@code c}.
2993 */
2994 public static String valueOf(char c) {
2995 char data[] = {c};
2996 return new String(0, 1, data);
2997 }
2998
2999 /**
3000 * Returns the string representation of the {@code int} argument.
3001 * <p>
3002 * The representation is exactly the one returned by the
3003 * {@code Integer.toString} method of one argument.
3004 *
3005 * @param i an {@code int}.
3006 * @return a string representation of the {@code int} argument.
3007 * @see java.lang.Integer#toString(int, int)
3008 */
3009 public static String valueOf(int i) {
3010 return Integer.toString(i);
3011 }
3012
3013 /**
3014 * Returns the string representation of the {@code long} argument.
3015 * <p>
3016 * The representation is exactly the one returned by the
3059 * class {@code String}.
3060 * <p>
3061 * When the intern method is invoked, if the pool already contains a
3062 * string equal to this {@code String} object as determined by
3063 * the {@link #equals(Object)} method, then the string from the pool is
3064 * returned. Otherwise, this {@code String} object is added to the
3065 * pool and a reference to this {@code String} object is returned.
3066 * <p>
3067 * It follows that for any two strings {@code s} and {@code t},
3068 * {@code s.intern() == t.intern()} is {@code true}
3069 * if and only if {@code s.equals(t)} is {@code true}.
3070 * <p>
3071 * All literal strings and string-valued constant expressions are
3072 * interned. String literals are defined in section 3.10.5 of the
3073 * <cite>The Java™ Language Specification</cite>.
3074 *
3075 * @return a string that has the same contents as this string, but is
3076 * guaranteed to be from a pool of unique strings.
3077 */
3078 public native String intern();
3079
3080 }
|
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package java.lang;
26
27 import java.io.ObjectStreamField;
28 import java.io.UnsupportedEncodingException;
29 import java.nio.charset.Charset;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.Comparator;
33 import java.util.Formatter;
34 import java.util.Locale;
35 import java.util.regex.Matcher;
36 import java.util.regex.Pattern;
37 import java.util.regex.PatternSyntaxException;
38
39 /**
40 * The {@code String} class represents character strings. All
41 * string literals in Java programs, such as {@code "abc"}, are
42 * implemented as instances of this class.
43 * <p>
44 * Strings are constant; their values cannot be changed after they
45 * are created. String buffers support mutable strings.
46 * Because String objects are immutable they can be shared. For example:
87 * in which <em>supplementary characters</em> are represented by <em>surrogate
88 * pairs</em> (see the section <a href="Character.html#unicode">Unicode
89 * Character Representations</a> in the {@code Character} class for
90 * more information).
91 * Index values refer to {@code char} code units, so a supplementary
92 * character uses two positions in a {@code String}.
93 * <p>The {@code String} class provides methods for dealing with
94 * Unicode code points (i.e., characters), in addition to those for
95 * dealing with Unicode code units (i.e., {@code char} values).
96 *
97 * @author Lee Boynton
98 * @author Arthur van Hoff
99 * @author Martin Buchholz
100 * @author Ulf Zibis
101 * @see java.lang.Object#toString()
102 * @see java.lang.StringBuffer
103 * @see java.lang.StringBuilder
104 * @see java.nio.charset.Charset
105 * @since JDK1.0
106 */
107 public final class String
108 implements java.io.Serializable, Comparable<String>, CharSequence {
109
110 /** The value is used for character storage. */
111 private final char value[];
112
113 /** Cache the hash code for the string */
114 private int hash; // Default to 0
115
116 /** use serialVersionUID from JDK 1.0.2 for interoperability */
117 private static final long serialVersionUID = -6849794470754667710L;
118
119 /**
120 * Class String is special cased within the Serialization Stream Protocol.
121 *
122 * A String instance is written initially into an ObjectOutputStream in the
123 * following format:
124 * <pre>
125 * {@code TC_STRING} (utf String)
126 * </pre>
127 * The String is written by method {@code DataOutput.writeUTF}.
128 * A new handle is generated to refer to all future references to the
129 * string instance within the stream.
130 */
131 private static final ObjectStreamField[] serialPersistentFields =
132 new ObjectStreamField[0];
133
134 /**
135 * Initializes a newly created {@code String} object so that it represents
136 * an empty character sequence. Note that use of this constructor is
137 * unnecessary since Strings are immutable.
138 */
139 public String() {
140 this.value = new char[0];
141 }
142
143 /**
144 * Initializes a newly created {@code String} object so that it represents
145 * the same sequence of characters as the argument; in other words, the
146 * newly created string is a copy of the argument string. Unless an
147 * explicit copy of {@code original} is needed, use of this constructor is
148 * unnecessary since Strings are immutable.
149 *
150 * @param original
151 * A {@code String}
152 */
153 public String(String original) {
154 this.value = original.value;
155 this.hash = original.hash;
156 }
157
158 /**
159 * Allocates a new {@code String} so that it represents the sequence of
160 * characters currently contained in the character array argument. The
161 * contents of the character array are copied; subsequent modification of
162 * the character array does not affect the newly created string.
163 *
164 * @param value
165 * The initial value of the string
166 */
167 public String(char value[]) {
168 this.value = Arrays.copyOf(value, value.length);
169 }
170
171 /**
172 * Allocates a new {@code String} that contains characters from a subarray
173 * of the character array argument. The {@code offset} argument is the
174 * index of the first character of the subarray and the {@code count}
175 * argument specifies the length of the subarray. The contents of the
176 * subarray are copied; subsequent modification of the character array does
177 * not affect the newly created string.
178 *
179 * @param value
180 * Array that is the source of characters
181 *
182 * @param offset
183 * The initial offset
184 *
185 * @param count
186 * The length
187 *
188 * @throws IndexOutOfBoundsException
189 * If the {@code offset} and {@code count} arguments index
190 * characters outside the bounds of the {@code value} array
191 */
192 public String(char value[], int offset, int count) {
193 if (offset < 0) {
194 throw new StringIndexOutOfBoundsException(offset);
195 }
196 if (count < 0) {
197 throw new StringIndexOutOfBoundsException(count);
198 }
199 // Note: offset or count might be near -1>>>1.
200 if (offset > value.length - count) {
201 throw new StringIndexOutOfBoundsException(offset + count);
202 }
203 this.value = Arrays.copyOfRange(value, offset, offset+count);
204 }
205
206 /**
207 * Allocates a new {@code String} that contains characters from a subarray
208 * of the <a href="Character.html#unicode">Unicode code point</a> array
209 * argument. The {@code offset} argument is the index of the first code
210 * point of the subarray and the {@code count} argument specifies the
211 * length of the subarray. The contents of the subarray are converted to
212 * {@code char}s; subsequent modification of the {@code int} array does not
213 * affect the newly created string.
214 *
215 * @param codePoints
216 * Array that is the source of Unicode code points
217 *
218 * @param offset
219 * The initial offset
220 *
221 * @param count
222 * The length
245
246 final int end = offset + count;
247
248 // Pass 1: Compute precise size of char[]
249 int n = count;
250 for (int i = offset; i < end; i++) {
251 int c = codePoints[i];
252 if (Character.isBmpCodePoint(c))
253 continue;
254 else if (Character.isValidCodePoint(c))
255 n++;
256 else throw new IllegalArgumentException(Integer.toString(c));
257 }
258
259 // Pass 2: Allocate and fill in char[]
260 final char[] v = new char[n];
261
262 for (int i = offset, j = 0; i < end; i++, j++) {
263 int c = codePoints[i];
264 if (Character.isBmpCodePoint(c))
265 v[j] = (char)c;
266 else
267 Character.toSurrogates(c, v, j++);
268 }
269
270 this.value = v;
271 }
272
273 /**
274 * Allocates a new {@code String} constructed from a subarray of an array
275 * of 8-bit integer values.
276 *
277 * <p> The {@code offset} argument is the index of the first byte of the
278 * subarray, and the {@code count} argument specifies the length of the
279 * subarray.
280 *
281 * <p> Each {@code byte} in the subarray is converted to a {@code char} as
282 * specified in the method above.
283 *
284 * @deprecated This method does not properly convert bytes into characters.
285 * As of JDK 1.1, the preferred way to do this is via the
286 * {@code String} constructors that take a {@link
287 * java.nio.charset.Charset}, charset name, or that use the platform's
288 * default charset.
289 *
290 * @param ascii
298 * @param count
299 * The length
300 *
301 * @throws IndexOutOfBoundsException
302 * If the {@code offset} or {@code count} argument is invalid
303 *
304 * @see #String(byte[], int)
305 * @see #String(byte[], int, int, java.lang.String)
306 * @see #String(byte[], int, int, java.nio.charset.Charset)
307 * @see #String(byte[], int, int)
308 * @see #String(byte[], java.lang.String)
309 * @see #String(byte[], java.nio.charset.Charset)
310 * @see #String(byte[])
311 */
312 @Deprecated
313 public String(byte ascii[], int hibyte, int offset, int count) {
314 checkBounds(ascii, offset, count);
315 char value[] = new char[count];
316
317 if (hibyte == 0) {
318 for (int i = count; i-- > 0;) {
319 value[i] = (char)(ascii[i + offset] & 0xff);
320 }
321 } else {
322 hibyte <<= 8;
323 for (int i = count; i-- > 0;) {
324 value[i] = (char)(hibyte | (ascii[i + offset] & 0xff));
325 }
326 }
327 this.value = value;
328 }
329
330 /**
331 * Allocates a new {@code String} containing characters constructed from
332 * an array of 8-bit integer values. Each character <i>c</i>in the
333 * resulting string is constructed from the corresponding component
334 * <i>b</i> in the byte array such that:
335 *
336 * <blockquote><pre>
337 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8)
338 * | (<b><i>b</i></b> & 0xff))
339 * </pre></blockquote>
340 *
341 * @deprecated This method does not properly convert bytes into
342 * characters. As of JDK 1.1, the preferred way to do this is via the
343 * {@code String} constructors that take a {@link
344 * java.nio.charset.Charset}, charset name, or that use the platform's
345 * default charset.
346 *
392 * @param offset
393 * The index of the first byte to decode
394 *
395 * @param length
396 * The number of bytes to decode
397
398 * @param charsetName
399 * The name of a supported {@linkplain java.nio.charset.Charset
400 * charset}
401 *
402 * @throws UnsupportedEncodingException
403 * If the named charset is not supported
404 *
405 * @throws IndexOutOfBoundsException
406 * If the {@code offset} and {@code length} arguments index
407 * characters outside the bounds of the {@code bytes} array
408 *
409 * @since JDK1.1
410 */
411 public String(byte bytes[], int offset, int length, String charsetName)
412 throws UnsupportedEncodingException {
413 if (charsetName == null)
414 throw new NullPointerException("charsetName");
415 checkBounds(bytes, offset, length);
416 this.value = StringCoding.decode(charsetName, bytes, offset, length);
417 }
418
419 /**
420 * Constructs a new {@code String} by decoding the specified subarray of
421 * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
422 * The length of the new {@code String} is a function of the charset, and
423 * hence may not be equal to the length of the subarray.
424 *
425 * <p> This method always replaces malformed-input and unmappable-character
426 * sequences with this charset's default replacement string. The {@link
427 * java.nio.charset.CharsetDecoder} class should be used when more control
428 * over the decoding process is required.
429 *
430 * @param bytes
431 * The bytes to be decoded into characters
432 *
433 * @param offset
434 * The index of the first byte to decode
435 *
436 * @param length
437 * The number of bytes to decode
438 *
439 * @param charset
440 * The {@linkplain java.nio.charset.Charset charset} to be used to
441 * decode the {@code bytes}
442 *
443 * @throws IndexOutOfBoundsException
444 * If the {@code offset} and {@code length} arguments index
445 * characters outside the bounds of the {@code bytes} array
446 *
447 * @since 1.6
448 */
449 public String(byte bytes[], int offset, int length, Charset charset) {
450 if (charset == null)
451 throw new NullPointerException("charset");
452 checkBounds(bytes, offset, length);
453 this.value = StringCoding.decode(charset, bytes, offset, length);
454 }
455
456 /**
457 * Constructs a new {@code String} by decoding the specified array of bytes
458 * using the specified {@linkplain java.nio.charset.Charset charset}. The
459 * length of the new {@code String} is a function of the charset, and hence
460 * may not be equal to the length of the byte array.
461 *
462 * <p> The behavior of this constructor when the given bytes are not valid
463 * in the given charset is unspecified. The {@link
464 * java.nio.charset.CharsetDecoder} class should be used when more control
465 * over the decoding process is required.
466 *
467 * @param bytes
468 * The bytes to be decoded into characters
469 *
470 * @param charsetName
471 * The name of a supported {@linkplain java.nio.charset.Charset
472 * charset}
473 *
474 * @throws UnsupportedEncodingException
475 * If the named charset is not supported
476 *
477 * @since JDK1.1
478 */
479 public String(byte bytes[], String charsetName)
480 throws UnsupportedEncodingException {
481 this(bytes, 0, bytes.length, charsetName);
482 }
483
484 /**
485 * Constructs a new {@code String} by decoding the specified array of
486 * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
487 * The length of the new {@code String} is a function of the charset, and
488 * hence may not be equal to the length of the byte array.
489 *
490 * <p> This method always replaces malformed-input and unmappable-character
491 * sequences with this charset's default replacement string. The {@link
492 * java.nio.charset.CharsetDecoder} class should be used when more control
493 * over the decoding process is required.
494 *
495 * @param bytes
496 * The bytes to be decoded into characters
497 *
498 * @param charset
499 * The {@linkplain java.nio.charset.Charset charset} to be used to
500 * decode the {@code bytes}
516 * java.nio.charset.CharsetDecoder} class should be used when more control
517 * over the decoding process is required.
518 *
519 * @param bytes
520 * The bytes to be decoded into characters
521 *
522 * @param offset
523 * The index of the first byte to decode
524 *
525 * @param length
526 * The number of bytes to decode
527 *
528 * @throws IndexOutOfBoundsException
529 * If the {@code offset} and the {@code length} arguments index
530 * characters outside the bounds of the {@code bytes} array
531 *
532 * @since JDK1.1
533 */
534 public String(byte bytes[], int offset, int length) {
535 checkBounds(bytes, offset, length);
536 this.value = StringCoding.decode(bytes, offset, length);
537 }
538
539 /**
540 * Constructs a new {@code String} by decoding the specified array of bytes
541 * using the platform's default charset. The length of the new {@code
542 * String} is a function of the charset, and hence may not be equal to the
543 * length of the byte array.
544 *
545 * <p> The behavior of this constructor when the given bytes are not valid
546 * in the default charset is unspecified. The {@link
547 * java.nio.charset.CharsetDecoder} class should be used when more control
548 * over the decoding process is required.
549 *
550 * @param bytes
551 * The bytes to be decoded into characters
552 *
553 * @since JDK1.1
554 */
555 public String(byte bytes[]) {
556 this(bytes, 0, bytes.length);
557 }
558
559 /**
560 * Allocates a new string that contains the sequence of characters
561 * currently contained in the string buffer argument. The contents of the
562 * string buffer are copied; subsequent modification of the string buffer
563 * does not affect the newly created string.
564 *
565 * @param buffer
566 * A {@code StringBuffer}
567 */
568 public String(StringBuffer buffer) {
569 synchronized(buffer) {
570 this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
571 }
572 }
573
574 /**
575 * Allocates a new string that contains the sequence of characters
576 * currently contained in the string builder argument. The contents of the
577 * string builder are copied; subsequent modification of the string builder
578 * does not affect the newly created string.
579 *
580 * <p> This constructor is provided to ease migration to {@code
581 * StringBuilder}. Obtaining a string from a string builder via the {@code
582 * toString} method is likely to run faster and is generally preferred.
583 *
584 * @param builder
585 * A {@code StringBuilder}
586 *
587 * @since 1.5
588 */
589 public String(StringBuilder builder) {
590 this.value = Arrays.copyOf(builder.getValue(), builder.length());
591 }
592
593 /*
594 * Package private constructor which shares value array for speed.
595 * this constructor is always expected to be called with share==true.
596 * a separate constructor is needed because we already have a public
597 * String(char[]) constructor that makes a copy of the given char[].
598 */
599 String(char[] value, boolean share) {
600 // assert share : "unshared not supported";
601 this.value = value;
602 }
603
604 /**
605 * Package private constructor
606 *
607 * @deprecated Use {@link #String(char[],int,int)} instead.
608 */
609 @Deprecated
610 String(int offset, int count, char[] value) {
611 this(value, offset, count);
612 }
613
614 /**
615 * Returns the length of this string.
616 * The length is equal to the number of <a href="Character.html#unicode">Unicode
617 * code units</a> in the string.
618 *
619 * @return the length of the sequence of characters represented by this
620 * object.
621 */
622 public int length() {
623 return value.length;
624 }
625
626 /**
627 * Returns <tt>true</tt> if, and only if, {@link #length()} is <tt>0</tt>.
628 *
629 * @return <tt>true</tt> if {@link #length()} is <tt>0</tt>, otherwise
630 * <tt>false</tt>
631 *
632 * @since 1.6
633 */
634 public boolean isEmpty() {
635 return value.length == 0;
636 }
637
638 /**
639 * Returns the {@code char} value at the
640 * specified index. An index ranges from {@code 0} to
641 * {@code length() - 1}. The first {@code char} value of the sequence
642 * is at index {@code 0}, the next at index {@code 1},
643 * and so on, as for array indexing.
644 *
645 * <p>If the {@code char} value specified by the index is a
646 * <a href="Character.html#unicode">surrogate</a>, the surrogate
647 * value is returned.
648 *
649 * @param index the index of the {@code char} value.
650 * @return the {@code char} value at the specified index of this string.
651 * The first {@code char} value is at index {@code 0}.
652 * @exception IndexOutOfBoundsException if the {@code index}
653 * argument is negative or not less than the length of this
654 * string.
655 */
656 public char charAt(int index) {
657 if ((index < 0) || (index >= value.length)) {
658 throw new StringIndexOutOfBoundsException(index);
659 }
660 return value[index];
661 }
662
663 /**
664 * Returns the character (Unicode code point) at the specified
665 * index. The index refers to {@code char} values
666 * (Unicode code units) and ranges from {@code 0} to
667 * {@link #length()}{@code - 1}.
668 *
669 * <p> If the {@code char} value specified at the given index
670 * is in the high-surrogate range, the following index is less
671 * than the length of this {@code String}, and the
672 * {@code char} value at the following index is in the
673 * low-surrogate range, then the supplementary code point
674 * corresponding to this surrogate pair is returned. Otherwise,
675 * the {@code char} value at the given index is returned.
676 *
677 * @param index the index to the {@code char} values
678 * @return the code point value of the character at the
679 * {@code index}
680 * @exception IndexOutOfBoundsException if the {@code index}
681 * argument is negative or not less than the length of this
682 * string.
683 * @since 1.5
684 */
685 public int codePointAt(int index) {
686 if ((index < 0) || (index >= value.length)) {
687 throw new StringIndexOutOfBoundsException(index);
688 }
689 return Character.codePointAtImpl(value, index, value.length);
690 }
691
692 /**
693 * Returns the character (Unicode code point) before the specified
694 * index. The index refers to {@code char} values
695 * (Unicode code units) and ranges from {@code 1} to {@link
696 * CharSequence#length() length}.
697 *
698 * <p> If the {@code char} value at {@code (index - 1)}
699 * is in the low-surrogate range, {@code (index - 2)} is not
700 * negative, and the {@code char} value at {@code (index -
701 * 2)} is in the high-surrogate range, then the
702 * supplementary code point value of the surrogate pair is
703 * returned. If the {@code char} value at {@code index -
704 * 1} is an unpaired low-surrogate or a high-surrogate, the
705 * surrogate value is returned.
706 *
707 * @param index the index following the code point that should be returned
708 * @return the Unicode code point value before the given index.
709 * @exception IndexOutOfBoundsException if the {@code index}
710 * argument is less than 1 or greater than the length
711 * of this string.
712 * @since 1.5
713 */
714 public int codePointBefore(int index) {
715 int i = index - 1;
716 if ((i < 0) || (i >= value.length)) {
717 throw new StringIndexOutOfBoundsException(index);
718 }
719 return Character.codePointBeforeImpl(value, index, 0);
720 }
721
722 /**
723 * Returns the number of Unicode code points in the specified text
724 * range of this {@code String}. The text range begins at the
725 * specified {@code beginIndex} and extends to the
726 * {@code char} at index {@code endIndex - 1}. Thus the
727 * length (in {@code char}s) of the text range is
728 * {@code endIndex-beginIndex}. Unpaired surrogates within
729 * the text range count as one code point each.
730 *
731 * @param beginIndex the index to the first {@code char} of
732 * the text range.
733 * @param endIndex the index after the last {@code char} of
734 * the text range.
735 * @return the number of Unicode code points in the specified text
736 * range
737 * @exception IndexOutOfBoundsException if the
738 * {@code beginIndex} is negative, or {@code endIndex}
739 * is larger than the length of this {@code String}, or
740 * {@code beginIndex} is larger than {@code endIndex}.
741 * @since 1.5
742 */
743 public int codePointCount(int beginIndex, int endIndex) {
744 if (beginIndex < 0 || endIndex > value.length || beginIndex > endIndex) {
745 throw new IndexOutOfBoundsException();
746 }
747 return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex);
748 }
749
750 /**
751 * Returns the index within this {@code String} that is
752 * offset from the given {@code index} by
753 * {@code codePointOffset} code points. Unpaired surrogates
754 * within the text range given by {@code index} and
755 * {@code codePointOffset} count as one code point each.
756 *
757 * @param index the index to be offset
758 * @param codePointOffset the offset in code points
759 * @return the index within this {@code String}
760 * @exception IndexOutOfBoundsException if {@code index}
761 * is negative or larger then the length of this
762 * {@code String}, or if {@code codePointOffset} is positive
763 * and the substring starting with {@code index} has fewer
764 * than {@code codePointOffset} code points,
765 * or if {@code codePointOffset} is negative and the substring
766 * before {@code index} has fewer than the absolute value
767 * of {@code codePointOffset} code points.
768 * @since 1.5
769 */
770 public int offsetByCodePoints(int index, int codePointOffset) {
771 if (index < 0 || index > value.length) {
772 throw new IndexOutOfBoundsException();
773 }
774 return Character.offsetByCodePointsImpl(value, 0, value.length,
775 index, codePointOffset);
776 }
777
778 /**
779 * Copy characters from this string into dst starting at dstBegin.
780 * This method doesn't perform any range checking.
781 */
782 void getChars(char dst[], int dstBegin) {
783 System.arraycopy(value, 0, dst, dstBegin, value.length);
784 }
785
786 /**
787 * Copies characters from this string into the destination character
788 * array.
789 * <p>
790 * The first character to be copied is at index {@code srcBegin};
791 * the last character to be copied is at index {@code srcEnd-1}
792 * (thus the total number of characters to be copied is
793 * {@code srcEnd-srcBegin}). The characters are copied into the
794 * subarray of {@code dst} starting at index {@code dstBegin}
795 * and ending at index:
796 * <p><blockquote><pre>
797 * dstbegin + (srcEnd-srcBegin) - 1
798 * </pre></blockquote>
799 *
800 * @param srcBegin index of the first character in the string
801 * to copy.
802 * @param srcEnd index after the last character in the string
803 * to copy.
804 * @param dst the destination array.
805 * @param dstBegin the start offset in the destination array.
806 * @exception IndexOutOfBoundsException If any of the following
807 * is true:
808 * <ul><li>{@code srcBegin} is negative.
809 * <li>{@code srcBegin} is greater than {@code srcEnd}
810 * <li>{@code srcEnd} is greater than the length of this
811 * string
812 * <li>{@code dstBegin} is negative
813 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
814 * {@code dst.length}</ul>
815 */
816 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
817 if (srcBegin < 0) {
818 throw new StringIndexOutOfBoundsException(srcBegin);
819 }
820 if (srcEnd > value.length) {
821 throw new StringIndexOutOfBoundsException(srcEnd);
822 }
823 if (srcBegin > srcEnd) {
824 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
825 }
826 System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
827 }
828
829 /**
830 * Copies characters from this string into the destination byte array. Each
831 * byte receives the 8 low-order bits of the corresponding character. The
832 * eight high-order bits of each character are not copied and do not
833 * participate in the transfer in any way.
834 *
835 * <p> The first character to be copied is at index {@code srcBegin}; the
836 * last character to be copied is at index {@code srcEnd-1}. The total
837 * number of characters to be copied is {@code srcEnd-srcBegin}. The
838 * characters, converted to bytes, are copied into the subarray of {@code
839 * dst} starting at index {@code dstBegin} and ending at index:
840 *
841 * <blockquote><pre>
842 * dstbegin + (srcEnd-srcBegin) - 1
843 * </pre></blockquote>
844 *
845 * @deprecated This method does not properly convert characters into
846 * bytes. As of JDK 1.1, the preferred way to do this is via the
857 *
858 * @param dstBegin
859 * The start offset in the destination array
860 *
861 * @throws IndexOutOfBoundsException
862 * If any of the following is true:
863 * <ul>
864 * <li> {@code srcBegin} is negative
865 * <li> {@code srcBegin} is greater than {@code srcEnd}
866 * <li> {@code srcEnd} is greater than the length of this String
867 * <li> {@code dstBegin} is negative
868 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
869 * dst.length}
870 * </ul>
871 */
872 @Deprecated
873 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
874 if (srcBegin < 0) {
875 throw new StringIndexOutOfBoundsException(srcBegin);
876 }
877 if (srcEnd > value.length) {
878 throw new StringIndexOutOfBoundsException(srcEnd);
879 }
880 if (srcBegin > srcEnd) {
881 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
882 }
883 int j = dstBegin;
884 int n = srcEnd;
885 int i = srcBegin;
886 char[] val = value; /* avoid getfield opcode */
887
888 while (i < n) {
889 dst[j++] = (byte)val[i++];
890 }
891 }
892
893 /**
894 * Encodes this {@code String} into a sequence of bytes using the named
895 * charset, storing the result into a new byte array.
896 *
897 * <p> The behavior of this method when this string cannot be encoded in
898 * the given charset is unspecified. The {@link
899 * java.nio.charset.CharsetEncoder} class should be used when more control
900 * over the encoding process is required.
901 *
902 * @param charsetName
903 * The name of a supported {@linkplain java.nio.charset.Charset
904 * charset}
905 *
906 * @return The resultant byte array
907 *
908 * @throws UnsupportedEncodingException
909 * If the named charset is not supported
910 *
911 * @since JDK1.1
912 */
913 public byte[] getBytes(String charsetName)
914 throws UnsupportedEncodingException {
915 if (charsetName == null) throw new NullPointerException();
916 return StringCoding.encode(charsetName, value, 0, value.length);
917 }
918
919 /**
920 * Encodes this {@code String} into a sequence of bytes using the given
921 * {@linkplain java.nio.charset.Charset charset}, storing the result into a
922 * new byte array.
923 *
924 * <p> This method always replaces malformed-input and unmappable-character
925 * sequences with this charset's default replacement byte array. The
926 * {@link java.nio.charset.CharsetEncoder} class should be used when more
927 * control over the encoding process is required.
928 *
929 * @param charset
930 * The {@linkplain java.nio.charset.Charset} to be used to encode
931 * the {@code String}
932 *
933 * @return The resultant byte array
934 *
935 * @since 1.6
936 */
937 public byte[] getBytes(Charset charset) {
938 if (charset == null) throw new NullPointerException();
939 return StringCoding.encode(charset, value, 0, value.length);
940 }
941
942 /**
943 * Encodes this {@code String} into a sequence of bytes using the
944 * platform's default charset, storing the result into a new byte array.
945 *
946 * <p> The behavior of this method when this string cannot be encoded in
947 * the default charset is unspecified. The {@link
948 * java.nio.charset.CharsetEncoder} class should be used when more control
949 * over the encoding process is required.
950 *
951 * @return The resultant byte array
952 *
953 * @since JDK1.1
954 */
955 public byte[] getBytes() {
956 return StringCoding.encode(value, 0, value.length);
957 }
958
959 /**
960 * Compares this string to the specified object. The result is {@code
961 * true} if and only if the argument is not {@code null} and is a {@code
962 * String} object that represents the same sequence of characters as this
963 * object.
964 *
965 * @param anObject
966 * The object to compare this {@code String} against
967 *
968 * @return {@code true} if the given object represents a {@code String}
969 * equivalent to this string, {@code false} otherwise
970 *
971 * @see #compareTo(String)
972 * @see #equalsIgnoreCase(String)
973 */
974 public boolean equals(Object anObject) {
975 if (this == anObject) {
976 return true;
977 }
978 if (anObject instanceof String) {
979 String anotherString = (String) anObject;
980 int n = value.length;
981 if (n == anotherString.value.length) {
982 char v1[] = value;
983 char v2[] = anotherString.value;
984 int i = 0;
985 while (n-- != 0) {
986 if (v1[i] != v2[i])
987 return false;
988 i++;
989 }
990 return true;
991 }
992 }
993 return false;
994 }
995
996 /**
997 * Compares this string to the specified {@code StringBuffer}. The result
998 * is {@code true} if and only if this {@code String} represents the same
999 * sequence of characters as the specified {@code StringBuffer}.
1000 *
1001 * @param sb
1002 * The {@code StringBuffer} to compare this {@code String} against
1003 *
1004 * @return {@code true} if this {@code String} represents the same
1005 * sequence of characters as the specified {@code StringBuffer},
1006 * {@code false} otherwise
1007 *
1008 * @since 1.4
1009 */
1010 public boolean contentEquals(StringBuffer sb) {
1011 synchronized (sb) {
1012 return contentEquals((CharSequence) sb);
1013 }
1014 }
1015
1016 /**
1017 * Compares this string to the specified {@code CharSequence}. The result
1018 * is {@code true} if and only if this {@code String} represents the same
1019 * sequence of char values as the specified sequence.
1020 *
1021 * @param cs
1022 * The sequence to compare this {@code String} against
1023 *
1024 * @return {@code true} if this {@code String} represents the same
1025 * sequence of char values as the specified sequence, {@code
1026 * false} otherwise
1027 *
1028 * @since 1.5
1029 */
1030 public boolean contentEquals(CharSequence cs) {
1031 if (value.length != cs.length())
1032 return false;
1033 // Argument is a StringBuffer, StringBuilder
1034 if (cs instanceof AbstractStringBuilder) {
1035 char v1[] = value;
1036 char v2[] = ((AbstractStringBuilder) cs).getValue();
1037 int i = 0;
1038 int n = value.length;
1039 while (n-- != 0) {
1040 if (v1[i] != v2[i])
1041 return false;
1042 i++;
1043 }
1044 return true;
1045 }
1046 // Argument is a String
1047 if (cs.equals(this))
1048 return true;
1049 // Argument is a generic CharSequence
1050 char v1[] = value;
1051 int i = 0;
1052 int n = value.length;
1053 while (n-- != 0) {
1054 if (v1[i] != cs.charAt(i))
1055 return false;
1056 i++;
1057 }
1058 return true;
1059 }
1060
1061 /**
1062 * Compares this {@code String} to another {@code String}, ignoring case
1063 * considerations. Two strings are considered equal ignoring case if they
1064 * are of the same length and corresponding characters in the two strings
1065 * are equal ignoring case.
1066 *
1067 * <p> Two characters {@code c1} and {@code c2} are considered the same
1068 * ignoring case if at least one of the following is true:
1069 * <ul>
1070 * <li> The two characters are the same (as compared by the
1071 * {@code ==} operator)
1072 * <li> Applying the method {@link
1073 * java.lang.Character#toUpperCase(char)} to each character
1074 * produces the same result
1075 * <li> Applying the method {@link
1076 * java.lang.Character#toLowerCase(char)} to each character
1077 * produces the same result
1078 * </ul>
1079 *
1080 * @param anotherString
1081 * The {@code String} to compare this {@code String} against
1082 *
1083 * @return {@code true} if the argument is not {@code null} and it
1084 * represents an equivalent {@code String} ignoring case; {@code
1085 * false} otherwise
1086 *
1087 * @see #equals(Object)
1088 */
1089 public boolean equalsIgnoreCase(String anotherString) {
1090 return (this == anotherString) ? true
1091 : (anotherString != null)
1092 && (anotherString.value.length == value.length)
1093 && regionMatches(true, 0, anotherString, 0, value.length);
1094 }
1095
1096 /**
1097 * Compares two strings lexicographically.
1098 * The comparison is based on the Unicode value of each character in
1099 * the strings. The character sequence represented by this
1100 * {@code String} object is compared lexicographically to the
1101 * character sequence represented by the argument string. The result is
1102 * a negative integer if this {@code String} object
1103 * lexicographically precedes the argument string. The result is a
1104 * positive integer if this {@code String} object lexicographically
1105 * follows the argument string. The result is zero if the strings
1106 * are equal; {@code compareTo} returns {@code 0} exactly when
1107 * the {@link #equals(Object)} method would return {@code true}.
1108 * <p>
1109 * This is the definition of lexicographic ordering. If two strings are
1110 * different, then either they have different characters at some index
1111 * that is a valid index for both strings, or their lengths are different,
1112 * or both. If they have different characters at one or more index
1113 * positions, let <i>k</i> be the smallest such index; then the string
1118 * the two string -- that is, the value:
1119 * <blockquote><pre>
1120 * this.charAt(k)-anotherString.charAt(k)
1121 * </pre></blockquote>
1122 * If there is no index position at which they differ, then the shorter
1123 * string lexicographically precedes the longer string. In this case,
1124 * {@code compareTo} returns the difference of the lengths of the
1125 * strings -- that is, the value:
1126 * <blockquote><pre>
1127 * this.length()-anotherString.length()
1128 * </pre></blockquote>
1129 *
1130 * @param anotherString the {@code String} to be compared.
1131 * @return the value {@code 0} if the argument string is equal to
1132 * this string; a value less than {@code 0} if this string
1133 * is lexicographically less than the string argument; and a
1134 * value greater than {@code 0} if this string is
1135 * lexicographically greater than the string argument.
1136 */
1137 public int compareTo(String anotherString) {
1138 int len1 = value.length;
1139 int len2 = anotherString.value.length;
1140 int lim = Math.min(len1, len2);
1141 char v1[] = value;
1142 char v2[] = anotherString.value;
1143
1144 int k = 0;
1145 while (k < lim) {
1146 char c1 = v1[k];
1147 char c2 = v2[k];
1148 if (c1 != c2) {
1149 return c1 - c2;
1150 }
1151 k++;
1152 }
1153 return len1 - len2;
1154 }
1155
1156 /**
1157 * A Comparator that orders {@code String} objects as by
1158 * {@code compareToIgnoreCase}. This comparator is serializable.
1159 * <p>
1160 * Note that this Comparator does <em>not</em> take locale into account,
1161 * and will result in an unsatisfactory ordering for certain locales.
1162 * The java.text package provides <em>Collators</em> to allow
1163 * locale-sensitive ordering.
1164 *
1165 * @see java.text.Collator#compare(String, String)
1166 * @since 1.2
1167 */
1168 public static final Comparator<String> CASE_INSENSITIVE_ORDER
1169 = new CaseInsensitiveComparator();
1170 private static class CaseInsensitiveComparator
1171 implements Comparator<String>, java.io.Serializable {
1172 // use serialVersionUID from JDK 1.2.2 for interoperability
1240 * <tt>String</tt> object.
1241 * <li><tt>ooffset+len</tt> is greater than the length of the other
1242 * argument.
1243 * <li>There is some nonnegative integer <i>k</i> less than <tt>len</tt>
1244 * such that:
1245 * <tt>this.charAt(toffset+<i>k</i>) != other.charAt(ooffset+<i>k</i>)</tt>
1246 * </ul>
1247 *
1248 * @param toffset the starting offset of the subregion in this string.
1249 * @param other the string argument.
1250 * @param ooffset the starting offset of the subregion in the string
1251 * argument.
1252 * @param len the number of characters to compare.
1253 * @return {@code true} if the specified subregion of this string
1254 * exactly matches the specified subregion of the string argument;
1255 * {@code false} otherwise.
1256 */
1257 public boolean regionMatches(int toffset, String other, int ooffset,
1258 int len) {
1259 char ta[] = value;
1260 int to = toffset;
1261 char pa[] = other.value;
1262 int po = ooffset;
1263 // Note: toffset, ooffset, or len might be near -1>>>1.
1264 if ((ooffset < 0) || (toffset < 0)
1265 || (toffset > (long)value.length - len)
1266 || (ooffset > (long)other.value.length - len)) {
1267 return false;
1268 }
1269 while (len-- > 0) {
1270 if (ta[to++] != pa[po++]) {
1271 return false;
1272 }
1273 }
1274 return true;
1275 }
1276
1277 /**
1278 * Tests if two string regions are equal.
1279 * <p>
1280 * A substring of this <tt>String</tt> object is compared to a substring
1281 * of the argument <tt>other</tt>. The result is <tt>true</tt> if these
1282 * substrings represent character sequences that are the same, ignoring
1283 * case if and only if <tt>ignoreCase</tt> is true. The substring of
1284 * this <tt>String</tt> object to be compared begins at index
1285 * <tt>toffset</tt> and has length <tt>len</tt>. The substring of
1286 * <tt>other</tt> to be compared begins at index <tt>ooffset</tt> and
1310 * </pre></blockquote>
1311 * </ul>
1312 *
1313 * @param ignoreCase if {@code true}, ignore case when comparing
1314 * characters.
1315 * @param toffset the starting offset of the subregion in this
1316 * string.
1317 * @param other the string argument.
1318 * @param ooffset the starting offset of the subregion in the string
1319 * argument.
1320 * @param len the number of characters to compare.
1321 * @return {@code true} if the specified subregion of this string
1322 * matches the specified subregion of the string argument;
1323 * {@code false} otherwise. Whether the matching is exact
1324 * or case insensitive depends on the {@code ignoreCase}
1325 * argument.
1326 */
1327 public boolean regionMatches(boolean ignoreCase, int toffset,
1328 String other, int ooffset, int len) {
1329 char ta[] = value;
1330 int to = toffset;
1331 char pa[] = other.value;
1332 int po = ooffset;
1333 // Note: toffset, ooffset, or len might be near -1>>>1.
1334 if ((ooffset < 0) || (toffset < 0)
1335 || (toffset > (long)value.length - len)
1336 || (ooffset > (long)other.value.length - len)) {
1337 return false;
1338 }
1339 while (len-- > 0) {
1340 char c1 = ta[to++];
1341 char c2 = pa[po++];
1342 if (c1 == c2) {
1343 continue;
1344 }
1345 if (ignoreCase) {
1346 // If characters don't match but case may be ignored,
1347 // try converting both characters to uppercase.
1348 // If the results match, then the comparison scan should
1349 // continue.
1350 char u1 = Character.toUpperCase(c1);
1351 char u2 = Character.toUpperCase(c2);
1352 if (u1 == u2) {
1353 continue;
1354 }
1355 // Unfortunately, conversion to uppercase does not work properly
1356 // for the Georgian alphabet, which has strange rules about case
1367
1368 /**
1369 * Tests if the substring of this string beginning at the
1370 * specified index starts with the specified prefix.
1371 *
1372 * @param prefix the prefix.
1373 * @param toffset where to begin looking in this string.
1374 * @return {@code true} if the character sequence represented by the
1375 * argument is a prefix of the substring of this object starting
1376 * at index {@code toffset}; {@code false} otherwise.
1377 * The result is {@code false} if {@code toffset} is
1378 * negative or greater than the length of this
1379 * {@code String} object; otherwise the result is the same
1380 * as the result of the expression
1381 * <pre>
1382 * this.substring(toffset).startsWith(prefix)
1383 * </pre>
1384 */
1385 public boolean startsWith(String prefix, int toffset) {
1386 char ta[] = value;
1387 int to = toffset;
1388 char pa[] = prefix.value;
1389 int po = 0;
1390 int pc = prefix.value.length;
1391 // Note: toffset might be near -1>>>1.
1392 if ((toffset < 0) || (toffset > value.length - pc)) {
1393 return false;
1394 }
1395 while (--pc >= 0) {
1396 if (ta[to++] != pa[po++]) {
1397 return false;
1398 }
1399 }
1400 return true;
1401 }
1402
1403 /**
1404 * Tests if this string starts with the specified prefix.
1405 *
1406 * @param prefix the prefix.
1407 * @return {@code true} if the character sequence represented by the
1408 * argument is a prefix of the character sequence represented by
1409 * this string; {@code false} otherwise.
1410 * Note also that {@code true} will be returned if the
1411 * argument is an empty string or is equal to this
1412 * {@code String} object as determined by the
1413 * {@link #equals(Object)} method.
1414 * @since 1. 0
1415 */
1416 public boolean startsWith(String prefix) {
1417 return startsWith(prefix, 0);
1418 }
1419
1420 /**
1421 * Tests if this string ends with the specified suffix.
1422 *
1423 * @param suffix the suffix.
1424 * @return {@code true} if the character sequence represented by the
1425 * argument is a suffix of the character sequence represented by
1426 * this object; {@code false} otherwise. Note that the
1427 * result will be {@code true} if the argument is the
1428 * empty string or is equal to this {@code String} object
1429 * as determined by the {@link #equals(Object)} method.
1430 */
1431 public boolean endsWith(String suffix) {
1432 return startsWith(suffix, value.length - suffix.value.length);
1433 }
1434
1435 /**
1436 * Returns a hash code for this string. The hash code for a
1437 * {@code String} object is computed as
1438 * <blockquote><pre>
1439 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
1440 * </pre></blockquote>
1441 * using {@code int} arithmetic, where {@code s[i]} is the
1442 * <i>i</i>th character of the string, {@code n} is the length of
1443 * the string, and {@code ^} indicates exponentiation.
1444 * (The hash value of the empty string is zero.)
1445 *
1446 * @return a hash code value for this object.
1447 */
1448 public int hashCode() {
1449 int h = hash;
1450 if (h == 0 && value.length > 0) {
1451 char val[] = value;
1452
1453 for (int i = 0; i < value.length; i++) {
1454 h = 31 * h + val[i];
1455 }
1456 hash = h;
1457 }
1458 return h;
1459 }
1460
1461 /**
1462 * Returns the index within this string of the first occurrence of
1463 * the specified character. If a character with value
1464 * {@code ch} occurs in the character sequence represented by
1465 * this {@code String} object, then the index (in Unicode
1466 * code units) of the first such occurrence is returned. For
1467 * values of {@code ch} in the range from 0 to 0xFFFF
1468 * (inclusive), this is the smallest value <i>k</i> such that:
1469 * <blockquote><pre>
1470 * this.charAt(<i>k</i>) == ch
1471 * </pre></blockquote>
1472 * is true. For other values of {@code ch}, it is the
1473 * smallest value <i>k</i> such that:
1474 * <blockquote><pre>
1509 * {@code -1} is returned.
1510 *
1511 * <p>
1512 * There is no restriction on the value of {@code fromIndex}. If it
1513 * is negative, it has the same effect as if it were zero: this entire
1514 * string may be searched. If it is greater than the length of this
1515 * string, it has the same effect as if it were equal to the length of
1516 * this string: {@code -1} is returned.
1517 *
1518 * <p>All indices are specified in {@code char} values
1519 * (Unicode code units).
1520 *
1521 * @param ch a character (Unicode code point).
1522 * @param fromIndex the index to start the search from.
1523 * @return the index of the first occurrence of the character in the
1524 * character sequence represented by this object that is greater
1525 * than or equal to {@code fromIndex}, or {@code -1}
1526 * if the character does not occur.
1527 */
1528 public int indexOf(int ch, int fromIndex) {
1529 final int max = value.length;
1530 if (fromIndex < 0) {
1531 fromIndex = 0;
1532 } else if (fromIndex >= max) {
1533 // Note: fromIndex might be near -1>>>1.
1534 return -1;
1535 }
1536
1537 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
1538 // handle most cases here (ch is a BMP code point or a
1539 // negative value (invalid code point))
1540 final char[] value = this.value;
1541 for (int i = fromIndex; i < max; i++) {
1542 if (value[i] == ch) {
1543 return i;
1544 }
1545 }
1546 return -1;
1547 } else {
1548 return indexOfSupplementary(ch, fromIndex);
1549 }
1550 }
1551
1552 /**
1553 * Handles (rare) calls of indexOf with a supplementary character.
1554 */
1555 private int indexOfSupplementary(int ch, int fromIndex) {
1556 if (Character.isValidCodePoint(ch)) {
1557 final char[] value = this.value;
1558 final char hi = Character.highSurrogate(ch);
1559 final char lo = Character.lowSurrogate(ch);
1560 final int max = value.length - 1;
1561 for (int i = fromIndex; i < max; i++) {
1562 if (value[i] == hi && value[i + 1] == lo) {
1563 return i;
1564 }
1565 }
1566 }
1567 return -1;
1568 }
1569
1570 /**
1571 * Returns the index within this string of the last occurrence of
1572 * the specified character. For values of {@code ch} in the
1573 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
1574 * units) returned is the largest value <i>k</i> such that:
1575 * <blockquote><pre>
1576 * this.charAt(<i>k</i>) == ch
1577 * </pre></blockquote>
1578 * is true. For other values of {@code ch}, it is the
1579 * largest value <i>k</i> such that:
1580 * <blockquote><pre>
1581 * this.codePointAt(<i>k</i>) == ch
1582 * </pre></blockquote>
1583 * is true. In either case, if no such character occurs in this
1584 * string, then {@code -1} is returned. The
1585 * {@code String} is searched backwards starting at the last
1586 * character.
1587 *
1588 * @param ch a character (Unicode code point).
1589 * @return the index of the last occurrence of the character in the
1590 * character sequence represented by this object, or
1591 * {@code -1} if the character does not occur.
1592 */
1593 public int lastIndexOf(int ch) {
1594 return lastIndexOf(ch, value.length - 1);
1595 }
1596
1597 /**
1598 * Returns the index within this string of the last occurrence of
1599 * the specified character, searching backward starting at the
1600 * specified index. For values of {@code ch} in the range
1601 * from 0 to 0xFFFF (inclusive), the index returned is the largest
1602 * value <i>k</i> such that:
1603 * <blockquote><pre>
1604 * (this.charAt(<i>k</i>) == ch) && (<i>k</i> <= fromIndex)
1605 * </pre></blockquote>
1606 * is true. For other values of {@code ch}, it is the
1607 * largest value <i>k</i> such that:
1608 * <blockquote><pre>
1609 * (this.codePointAt(<i>k</i>) == ch) && (<i>k</i> <= fromIndex)
1610 * </pre></blockquote>
1611 * is true. In either case, if no such character occurs in this
1612 * string at or before position {@code fromIndex}, then
1613 * {@code -1} is returned.
1614 *
1616 * (Unicode code units).
1617 *
1618 * @param ch a character (Unicode code point).
1619 * @param fromIndex the index to start the search from. There is no
1620 * restriction on the value of {@code fromIndex}. If it is
1621 * greater than or equal to the length of this string, it has
1622 * the same effect as if it were equal to one less than the
1623 * length of this string: this entire string may be searched.
1624 * If it is negative, it has the same effect as if it were -1:
1625 * -1 is returned.
1626 * @return the index of the last occurrence of the character in the
1627 * character sequence represented by this object that is less
1628 * than or equal to {@code fromIndex}, or {@code -1}
1629 * if the character does not occur before that point.
1630 */
1631 public int lastIndexOf(int ch, int fromIndex) {
1632 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
1633 // handle most cases here (ch is a BMP code point or a
1634 // negative value (invalid code point))
1635 final char[] value = this.value;
1636 int i = Math.min(fromIndex, value.length - 1);
1637 for (; i >= 0; i--) {
1638 if (value[i] == ch) {
1639 return i;
1640 }
1641 }
1642 return -1;
1643 } else {
1644 return lastIndexOfSupplementary(ch, fromIndex);
1645 }
1646 }
1647
1648 /**
1649 * Handles (rare) calls of lastIndexOf with a supplementary character.
1650 */
1651 private int lastIndexOfSupplementary(int ch, int fromIndex) {
1652 if (Character.isValidCodePoint(ch)) {
1653 final char[] value = this.value;
1654 char hi = Character.highSurrogate(ch);
1655 char lo = Character.lowSurrogate(ch);
1656 int i = Math.min(fromIndex, value.length - 2);
1657 for (; i >= 0; i--) {
1658 if (value[i] == hi && value[i + 1] == lo) {
1659 return i;
1660 }
1661 }
1662 }
1663 return -1;
1664 }
1665
1666 /**
1667 * Returns the index within this string of the first occurrence of the
1668 * specified substring.
1669 *
1670 * <p>The returned index is the smallest value <i>k</i> for which:
1671 * <blockquote><pre>
1672 * this.startsWith(str, <i>k</i>)
1673 * </pre></blockquote>
1674 * If no such value of <i>k</i> exists, then {@code -1} is returned.
1675 *
1676 * @param str the substring to search for.
1677 * @return the index of the first occurrence of the specified substring,
1678 * or {@code -1} if there is no such occurrence.
1679 */
1681 return indexOf(str, 0);
1682 }
1683
1684 /**
1685 * Returns the index within this string of the first occurrence of the
1686 * specified substring, starting at the specified index.
1687 *
1688 * <p>The returned index is the smallest value <i>k</i> for which:
1689 * <blockquote><pre>
1690 * <i>k</i> >= fromIndex && this.startsWith(str, <i>k</i>)
1691 * </pre></blockquote>
1692 * If no such value of <i>k</i> exists, then {@code -1} is returned.
1693 *
1694 * @param str the substring to search for.
1695 * @param fromIndex the index from which to start the search.
1696 * @return the index of the first occurrence of the specified substring,
1697 * starting at the specified index,
1698 * or {@code -1} if there is no such occurrence.
1699 */
1700 public int indexOf(String str, int fromIndex) {
1701 return indexOf(value, 0, value.length,
1702 str.value, 0, str.value.length, fromIndex);
1703 }
1704
1705 /**
1706 * Code shared by String and StringBuffer to do searches. The
1707 * source is the character array being searched, and the target
1708 * is the string being searched for.
1709 *
1710 * @param source the characters being searched.
1711 * @param sourceOffset offset of the source string.
1712 * @param sourceCount count of the source string.
1713 * @param target the characters being searched for.
1714 * @param targetOffset offset of the target string.
1715 * @param targetCount count of the target string.
1716 * @param fromIndex the index to begin searching from.
1717 */
1718 static int indexOf(char[] source, int sourceOffset, int sourceCount,
1719 char[] target, int targetOffset, int targetCount,
1720 int fromIndex) {
1721 if (fromIndex >= sourceCount) {
1722 return (targetCount == 0 ? sourceCount : -1);
1724 if (fromIndex < 0) {
1725 fromIndex = 0;
1726 }
1727 if (targetCount == 0) {
1728 return fromIndex;
1729 }
1730
1731 char first = target[targetOffset];
1732 int max = sourceOffset + (sourceCount - targetCount);
1733
1734 for (int i = sourceOffset + fromIndex; i <= max; i++) {
1735 /* Look for first character. */
1736 if (source[i] != first) {
1737 while (++i <= max && source[i] != first);
1738 }
1739
1740 /* Found first character, now look at the rest of v2 */
1741 if (i <= max) {
1742 int j = i + 1;
1743 int end = j + targetCount - 1;
1744 for (int k = targetOffset + 1; j < end && source[j]
1745 == target[k]; j++, k++);
1746
1747 if (j == end) {
1748 /* Found whole string. */
1749 return i - sourceOffset;
1750 }
1751 }
1752 }
1753 return -1;
1754 }
1755
1756 /**
1757 * Returns the index within this string of the last occurrence of the
1758 * specified substring. The last occurrence of the empty string ""
1759 * is considered to occur at the index value {@code this.length()}.
1760 *
1761 * <p>The returned index is the largest value <i>k</i> for which:
1762 * <blockquote><pre>
1763 * this.startsWith(str, <i>k</i>)
1764 * </pre></blockquote>
1765 * If no such value of <i>k</i> exists, then {@code -1} is returned.
1766 *
1767 * @param str the substring to search for.
1768 * @return the index of the last occurrence of the specified substring,
1769 * or {@code -1} if there is no such occurrence.
1770 */
1771 public int lastIndexOf(String str) {
1772 return lastIndexOf(str, value.length);
1773 }
1774
1775 /**
1776 * Returns the index within this string of the last occurrence of the
1777 * specified substring, searching backward starting at the specified index.
1778 *
1779 * <p>The returned index is the largest value <i>k</i> for which:
1780 * <blockquote><pre>
1781 * <i>k</i> <= fromIndex && this.startsWith(str, <i>k</i>)
1782 * </pre></blockquote>
1783 * If no such value of <i>k</i> exists, then {@code -1} is returned.
1784 *
1785 * @param str the substring to search for.
1786 * @param fromIndex the index to start the search from.
1787 * @return the index of the last occurrence of the specified substring,
1788 * searching backward from the specified index,
1789 * or {@code -1} if there is no such occurrence.
1790 */
1791 public int lastIndexOf(String str, int fromIndex) {
1792 return lastIndexOf(value, 0, value.length,
1793 str.value, 0, str.value.length, fromIndex);
1794 }
1795
1796 /**
1797 * Code shared by String and StringBuffer to do searches. The
1798 * source is the character array being searched, and the target
1799 * is the string being searched for.
1800 *
1801 * @param source the characters being searched.
1802 * @param sourceOffset offset of the source string.
1803 * @param sourceCount count of the source string.
1804 * @param target the characters being searched for.
1805 * @param targetOffset offset of the target string.
1806 * @param targetCount count of the target string.
1807 * @param fromIndex the index to begin searching from.
1808 */
1809 static int lastIndexOf(char[] source, int sourceOffset, int sourceCount,
1810 char[] target, int targetOffset, int targetCount,
1811 int fromIndex) {
1812 /*
1813 * Check arguments; return immediately where possible. For
1853 }
1854
1855 /**
1856 * Returns a new string that is a substring of this string. The
1857 * substring begins with the character at the specified index and
1858 * extends to the end of this string. <p>
1859 * Examples:
1860 * <blockquote><pre>
1861 * "unhappy".substring(2) returns "happy"
1862 * "Harbison".substring(3) returns "bison"
1863 * "emptiness".substring(9) returns "" (an empty string)
1864 * </pre></blockquote>
1865 *
1866 * @param beginIndex the beginning index, inclusive.
1867 * @return the specified substring.
1868 * @exception IndexOutOfBoundsException if
1869 * {@code beginIndex} is negative or larger than the
1870 * length of this {@code String} object.
1871 */
1872 public String substring(int beginIndex) {
1873 if (beginIndex < 0) {
1874 throw new StringIndexOutOfBoundsException(beginIndex);
1875 }
1876 int subLen = value.length - beginIndex;
1877 if (subLen < 0) {
1878 throw new StringIndexOutOfBoundsException(subLen);
1879 }
1880 return (beginIndex == 0) ? this : new String(value, beginIndex, subLen);
1881 }
1882
1883 /**
1884 * Returns a new string that is a substring of this string. The
1885 * substring begins at the specified {@code beginIndex} and
1886 * extends to the character at index {@code endIndex - 1}.
1887 * Thus the length of the substring is {@code endIndex-beginIndex}.
1888 * <p>
1889 * Examples:
1890 * <blockquote><pre>
1891 * "hamburger".substring(4, 8) returns "urge"
1892 * "smiles".substring(1, 5) returns "mile"
1893 * </pre></blockquote>
1894 *
1895 * @param beginIndex the beginning index, inclusive.
1896 * @param endIndex the ending index, exclusive.
1897 * @return the specified substring.
1898 * @exception IndexOutOfBoundsException if the
1899 * {@code beginIndex} is negative, or
1900 * {@code endIndex} is larger than the length of
1901 * this {@code String} object, or
1902 * {@code beginIndex} is larger than
1903 * {@code endIndex}.
1904 */
1905 public String substring(int beginIndex, int endIndex) {
1906 if (beginIndex < 0) {
1907 throw new StringIndexOutOfBoundsException(beginIndex);
1908 }
1909 if (endIndex > value.length) {
1910 throw new StringIndexOutOfBoundsException(endIndex);
1911 }
1912 int subLen = endIndex - beginIndex;
1913 if (subLen < 0) {
1914 throw new StringIndexOutOfBoundsException(subLen);
1915 }
1916 return ((beginIndex == 0) && (endIndex == value.length)) ? this
1917 : new String(value, beginIndex, subLen);
1918 }
1919
1920 /**
1921 * Returns a new character sequence that is a subsequence of this sequence.
1922 *
1923 * <p> An invocation of this method of the form
1924 *
1925 * <blockquote><pre>
1926 * str.subSequence(begin, end)</pre></blockquote>
1927 *
1928 * behaves in exactly the same way as the invocation
1929 *
1930 * <blockquote><pre>
1931 * str.substring(begin, end)</pre></blockquote>
1932 *
1933 * This method is defined so that the <tt>String</tt> class can implement
1934 * the {@link CharSequence} interface. </p>
1935 *
1936 * @param beginIndex the begin index, inclusive.
1937 * @param endIndex the end index, exclusive.
1957 * {@code String} object is created, representing a character
1958 * sequence that is the concatenation of the character sequence
1959 * represented by this {@code String} object and the character
1960 * sequence represented by the argument string.<p>
1961 * Examples:
1962 * <blockquote><pre>
1963 * "cares".concat("s") returns "caress"
1964 * "to".concat("get").concat("her") returns "together"
1965 * </pre></blockquote>
1966 *
1967 * @param str the {@code String} that is concatenated to the end
1968 * of this {@code String}.
1969 * @return a string that represents the concatenation of this object's
1970 * characters followed by the string argument's characters.
1971 */
1972 public String concat(String str) {
1973 int otherLen = str.length();
1974 if (otherLen == 0) {
1975 return this;
1976 }
1977 int len = value.length;
1978 char buf[] = Arrays.copyOf(value, len + otherLen);
1979 str.getChars(buf, len);
1980 return new String(buf, true);
1981 }
1982
1983 /**
1984 * Returns a new string resulting from replacing all occurrences of
1985 * {@code oldChar} in this string with {@code newChar}.
1986 * <p>
1987 * If the character {@code oldChar} does not occur in the
1988 * character sequence represented by this {@code String} object,
1989 * then a reference to this {@code String} object is returned.
1990 * Otherwise, a new {@code String} object is created that
1991 * represents a character sequence identical to the character sequence
1992 * represented by this {@code String} object, except that every
1993 * occurrence of {@code oldChar} is replaced by an occurrence
1994 * of {@code newChar}.
1995 * <p>
1996 * Examples:
1997 * <blockquote><pre>
1998 * "mesquite in your cellar".replace('e', 'o')
1999 * returns "mosquito in your collar"
2000 * "the war of baronets".replace('r', 'y')
2001 * returns "the way of bayonets"
2002 * "sparring with a purple porpoise".replace('p', 't')
2003 * returns "starring with a turtle tortoise"
2004 * "JonL".replace('q', 'x') returns "JonL" (no change)
2005 * </pre></blockquote>
2006 *
2007 * @param oldChar the old character.
2008 * @param newChar the new character.
2009 * @return a string derived from this string by replacing every
2010 * occurrence of {@code oldChar} with {@code newChar}.
2011 */
2012 public String replace(char oldChar, char newChar) {
2013 if (oldChar != newChar) {
2014 int len = value.length;
2015 int i = -1;
2016 char[] val = value; /* avoid getfield opcode */
2017
2018 while (++i < len) {
2019 if (val[i] == oldChar) {
2020 break;
2021 }
2022 }
2023 if (i < len) {
2024 char buf[] = new char[len];
2025 for (int j = 0; j < i; j++) {
2026 buf[j] = val[j];
2027 }
2028 while (i < len) {
2029 char c = val[i];
2030 buf[i] = (c == oldChar) ? newChar : c;
2031 i++;
2032 }
2033 return new String(buf, true);
2034 }
2035 }
2036 return this;
2037 }
2038
2039 /**
2040 * Tells whether or not this string matches the given <a
2041 * href="../util/regex/Pattern.html#sum">regular expression</a>.
2042 *
2043 * <p> An invocation of this method of the form
2044 * <i>str</i><tt>.matches(</tt><i>regex</i><tt>)</tt> yields exactly the
2045 * same result as the expression
2046 *
2047 * <blockquote><tt> {@link java.util.regex.Pattern}.{@link
2048 * java.util.regex.Pattern#matches(String,CharSequence)
2049 * matches}(</tt><i>regex</i><tt>,</tt> <i>str</i><tt>)</tt></blockquote>
2050 *
2051 * @param regex
2052 * the regular expression to which this string is to be matched
2053 *
2255 *
2256 * @return the array of strings computed by splitting this string
2257 * around matches of the given regular expression
2258 *
2259 * @throws PatternSyntaxException
2260 * if the regular expression's syntax is invalid
2261 *
2262 * @see java.util.regex.Pattern
2263 *
2264 * @since 1.4
2265 * @spec JSR-51
2266 */
2267 public String[] split(String regex, int limit) {
2268 /* fastpath if the regex is a
2269 (1)one-char String and this character is not one of the
2270 RegEx's meta characters ".$|()[{^?*+\\", or
2271 (2)two-char String and the first char is the backslash and
2272 the second is not the ascii digit or ascii letter.
2273 */
2274 char ch = 0;
2275 if (((regex.value.length == 1 &&
2276 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
2277 (regex.length() == 2 &&
2278 regex.charAt(0) == '\\' &&
2279 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
2280 ((ch-'a')|('z'-ch)) < 0 &&
2281 ((ch-'A')|('Z'-ch)) < 0)) &&
2282 (ch < Character.MIN_HIGH_SURROGATE ||
2283 ch > Character.MAX_LOW_SURROGATE))
2284 {
2285 int off = 0;
2286 int next = 0;
2287 boolean limited = limit > 0;
2288 ArrayList<String> list = new ArrayList<>();
2289 while ((next = indexOf(ch, off)) != -1) {
2290 if (!limited || list.size() < limit - 1) {
2291 list.add(substring(off, next));
2292 off = next + 1;
2293 } else { // last one
2294 //assert (list.size() == limit - 1);
2295 list.add(substring(off, value.length));
2296 off = value.length;
2297 break;
2298 }
2299 }
2300 // If no match was found, return this
2301 if (off == 0)
2302 return new String[]{this};
2303
2304 // Add remaining segment
2305 if (!limited || list.size() < limit)
2306 list.add(substring(off, value.length));
2307
2308 // Construct result
2309 int resultSize = list.size();
2310 if (limit == 0)
2311 while (resultSize > 0 && list.get(resultSize - 1).length() == 0)
2312 resultSize--;
2313 String[] result = new String[resultSize];
2314 return list.subList(0, resultSize).toArray(result);
2315 }
2316 return Pattern.compile(regex).split(this, limit);
2317 }
2318
2319 /**
2320 * Splits this string around matches of the given <a
2321 * href="../util/regex/Pattern.html#sum">regular expression</a>.
2322 *
2323 * <p> This method works as if by invoking the two-argument {@link
2324 * #split(String, int) split} method with the given expression and a limit
2325 * argument of zero. Trailing empty strings are therefore not included in
2326 * the resulting array.
2327 *
2328 * <p> The string <tt>"boo:and:foo"</tt>, for example, yields the following
2329 * results with these expressions:
2330 *
2331 * <blockquote><table cellpadding=1 cellspacing=0 summary="Split examples showing regex and result">
2399 * <td><img src="doc-files/iota.gif" alt="iota"><img src="doc-files/chi.gif" alt="chi">
2400 * <img src="doc-files/theta.gif" alt="theta"><img src="doc-files/upsilon.gif" alt="upsilon">
2401 * <img src="doc-files/sigma1.gif" alt="sigma"></td>
2402 * <td>lowercased all chars in String</td>
2403 * </tr>
2404 * </table>
2405 *
2406 * @param locale use the case transformation rules for this locale
2407 * @return the {@code String}, converted to lowercase.
2408 * @see java.lang.String#toLowerCase()
2409 * @see java.lang.String#toUpperCase()
2410 * @see java.lang.String#toUpperCase(Locale)
2411 * @since 1.1
2412 */
2413 public String toLowerCase(Locale locale) {
2414 if (locale == null) {
2415 throw new NullPointerException();
2416 }
2417
2418 int firstUpper;
2419 final int len = value.length;
2420
2421 /* Now check if there are any characters that need to be changed. */
2422 scan: {
2423 for (firstUpper = 0 ; firstUpper < len; ) {
2424 char c = value[firstUpper];
2425 if ((c >= Character.MIN_HIGH_SURROGATE)
2426 && (c <= Character.MAX_HIGH_SURROGATE)) {
2427 int supplChar = codePointAt(firstUpper);
2428 if (supplChar != Character.toLowerCase(supplChar)) {
2429 break scan;
2430 }
2431 firstUpper += Character.charCount(supplChar);
2432 } else {
2433 if (c != Character.toLowerCase(c)) {
2434 break scan;
2435 }
2436 firstUpper++;
2437 }
2438 }
2439 return this;
2440 }
2441
2442 char[] result = new char[len];
2443 int resultOffset = 0; /* result may grow, so i+resultOffset
2444 * is the write location in result */
2445
2446 /* Just copy the first few lowerCase characters. */
2447 System.arraycopy(value, 0, result, 0, firstUpper);
2448
2449 String lang = locale.getLanguage();
2450 boolean localeDependent =
2451 (lang == "tr" || lang == "az" || lang == "lt");
2452 char[] lowerCharArray;
2453 int lowerChar;
2454 int srcChar;
2455 int srcCount;
2456 for (int i = firstUpper; i < len; i += srcCount) {
2457 srcChar = (int)value[i];
2458 if ((char)srcChar >= Character.MIN_HIGH_SURROGATE
2459 && (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
2460 srcChar = codePointAt(i);
2461 srcCount = Character.charCount(srcChar);
2462 } else {
2463 srcCount = 1;
2464 }
2465 if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
2466 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
2467 } else if (srcChar == '\u0130') { // LATIN CAPITAL LETTER I DOT
2468 lowerChar = Character.ERROR;
2469 } else {
2470 lowerChar = Character.toLowerCase(srcChar);
2471 }
2472 if ((lowerChar == Character.ERROR)
2473 || (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
2474 if (lowerChar == Character.ERROR) {
2475 if (!localeDependent && srcChar == '\u0130') {
2476 lowerCharArray =
2477 ConditionalSpecialCasing.toLowerCaseCharArray(this, i, Locale.ENGLISH);
2478 } else {
2479 lowerCharArray =
2480 ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
2481 }
2482 } else if (srcCount == 2) {
2483 resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
2484 continue;
2485 } else {
2486 lowerCharArray = Character.toChars(lowerChar);
2487 }
2488
2489 /* Grow result if needed */
2490 int mapLen = lowerCharArray.length;
2491 if (mapLen > srcCount) {
2492 char[] result2 = new char[result.length + mapLen - srcCount];
2493 System.arraycopy(result, 0, result2, 0, i + resultOffset);
2494 result = result2;
2495 }
2496 for (int x = 0; x < mapLen; ++x) {
2497 result[i + resultOffset + x] = lowerCharArray[x];
2498 }
2499 resultOffset += (mapLen - srcCount);
2500 } else {
2501 result[i + resultOffset] = (char)lowerChar;
2502 }
2503 }
2504 return new String(result, 0, len + resultOffset);
2505 }
2506
2507 /**
2508 * Converts all of the characters in this {@code String} to lower
2509 * case using the rules of the default locale. This is equivalent to calling
2510 * {@code toLowerCase(Locale.getDefault())}.
2511 * <p>
2512 * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2513 * results if used for strings that are intended to be interpreted locale
2514 * independently.
2515 * Examples are programming language identifiers, protocol keys, and HTML
2516 * tags.
2517 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
2518 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
2519 * LATIN SMALL LETTER DOTLESS I character.
2520 * To obtain correct results for locale insensitive strings, use
2521 * {@code toLowerCase(Locale.ENGLISH)}.
2522 * <p>
2523 * @return the {@code String}, converted to lowercase.
2524 * @see java.lang.String#toLowerCase(Locale)
2564 * <tr>
2565 * <td>(all)</td>
2566 * <td>Fahrvergnügen</td>
2567 * <td>FAHRVERGNÜGEN</td>
2568 * <td></td>
2569 * </tr>
2570 * </table>
2571 * @param locale use the case transformation rules for this locale
2572 * @return the {@code String}, converted to uppercase.
2573 * @see java.lang.String#toUpperCase()
2574 * @see java.lang.String#toLowerCase()
2575 * @see java.lang.String#toLowerCase(Locale)
2576 * @since 1.1
2577 */
2578 public String toUpperCase(Locale locale) {
2579 if (locale == null) {
2580 throw new NullPointerException();
2581 }
2582
2583 int firstLower;
2584 final int len = value.length;
2585
2586 /* Now check if there are any characters that need to be changed. */
2587 scan: {
2588 for (firstLower = 0 ; firstLower < len; ) {
2589 int c = (int)value[firstLower];
2590 int srcCount;
2591 if ((c >= Character.MIN_HIGH_SURROGATE)
2592 && (c <= Character.MAX_HIGH_SURROGATE)) {
2593 c = codePointAt(firstLower);
2594 srcCount = Character.charCount(c);
2595 } else {
2596 srcCount = 1;
2597 }
2598 int upperCaseChar = Character.toUpperCaseEx(c);
2599 if ((upperCaseChar == Character.ERROR)
2600 || (c != upperCaseChar)) {
2601 break scan;
2602 }
2603 firstLower += srcCount;
2604 }
2605 return this;
2606 }
2607
2608 char[] result = new char[len]; /* may grow */
2609 int resultOffset = 0; /* result may grow, so i+resultOffset
2610 * is the write location in result */
2611
2612 /* Just copy the first few upperCase characters. */
2613 System.arraycopy(value, 0, result, 0, firstLower);
2614
2615 String lang = locale.getLanguage();
2616 boolean localeDependent =
2617 (lang == "tr" || lang == "az" || lang == "lt");
2618 char[] upperCharArray;
2619 int upperChar;
2620 int srcChar;
2621 int srcCount;
2622 for (int i = firstLower; i < len; i += srcCount) {
2623 srcChar = (int)value[i];
2624 if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
2625 (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
2626 srcChar = codePointAt(i);
2627 srcCount = Character.charCount(srcChar);
2628 } else {
2629 srcCount = 1;
2630 }
2631 if (localeDependent) {
2632 upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
2633 } else {
2634 upperChar = Character.toUpperCaseEx(srcChar);
2635 }
2636 if ((upperChar == Character.ERROR)
2637 || (upperChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
2638 if (upperChar == Character.ERROR) {
2639 if (localeDependent) {
2640 upperCharArray =
2641 ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
2642 } else {
2643 upperCharArray = Character.toUpperCaseCharArray(srcChar);
2644 }
2645 } else if (srcCount == 2) {
2646 resultOffset += Character.toChars(upperChar, result, i + resultOffset) - srcCount;
2647 continue;
2648 } else {
2649 upperCharArray = Character.toChars(upperChar);
2650 }
2651
2652 /* Grow result if needed */
2653 int mapLen = upperCharArray.length;
2654 if (mapLen > srcCount) {
2655 char[] result2 = new char[result.length + mapLen - srcCount];
2656 System.arraycopy(result, 0, result2, 0, i + resultOffset);
2657 result = result2;
2658 }
2659 for (int x = 0; x < mapLen; ++x) {
2660 result[i + resultOffset + x] = upperCharArray[x];
2661 }
2662 resultOffset += (mapLen - srcCount);
2663 } else {
2664 result[i + resultOffset] = (char)upperChar;
2665 }
2666 }
2667 return new String(result, 0, len + resultOffset);
2668 }
2669
2670 /**
2671 * Converts all of the characters in this {@code String} to upper
2672 * case using the rules of the default locale. This method is equivalent to
2673 * {@code toUpperCase(Locale.getDefault())}.
2674 * <p>
2675 * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2676 * results if used for strings that are intended to be interpreted locale
2677 * independently.
2678 * Examples are programming language identifiers, protocol keys, and HTML
2679 * tags.
2680 * For instance, {@code "title".toUpperCase()} in a Turkish locale
2681 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
2682 * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
2683 * To obtain correct results for locale insensitive strings, use
2684 * {@code toUpperCase(Locale.ENGLISH)}.
2685 * <p>
2686 * @return the {@code String}, converted to uppercase.
2687 * @see java.lang.String#toUpperCase(Locale)
2705 * {@code String} object representing an empty string is created
2706 * and returned.
2707 * <p>
2708 * Otherwise, let <i>k</i> be the index of the first character in the
2709 * string whose code is greater than {@code '\u005Cu0020'}, and let
2710 * <i>m</i> be the index of the last character in the string whose code
2711 * is greater than {@code '\u005Cu0020'}. A new {@code String}
2712 * object is created, representing the substring of this string that
2713 * begins with the character at index <i>k</i> and ends with the
2714 * character at index <i>m</i>-that is, the result of
2715 * <code>this.substring(<i>k</i>, <i>m</i>+1)</code>.
2716 * <p>
2717 * This method may be used to trim whitespace (as defined above) from
2718 * the beginning and end of a string.
2719 *
2720 * @return A copy of this string with leading and trailing white
2721 * space removed, or this string if it has no leading or
2722 * trailing white space.
2723 */
2724 public String trim() {
2725 int len = value.length;
2726 int st = 0;
2727 char[] val = value; /* avoid getfield opcode */
2728
2729 while ((st < len) && (val[st] <= ' ')) {
2730 st++;
2731 }
2732 while ((st < len) && (val[len - 1] <= ' ')) {
2733 len--;
2734 }
2735 return ((st > 0) || (len < value.length)) ? substring(st, len) : this;
2736 }
2737
2738 /**
2739 * This object (which is already a string!) is itself returned.
2740 *
2741 * @return the string itself.
2742 */
2743 public String toString() {
2744 return this;
2745 }
2746
2747 /**
2748 * Converts this string to a new character array.
2749 *
2750 * @return a newly allocated character array whose length is the length
2751 * of this string and whose contents are initialized to contain
2752 * the character sequence represented by this string.
2753 */
2754 public char[] toCharArray() {
2755 return Arrays.copyOf(value, value.length);
2756 }
2757
2758 /**
2759 * Returns a formatted string using the specified format string and
2760 * arguments.
2761 *
2762 * <p> The locale always used is the one returned by {@link
2763 * java.util.Locale#getDefault() Locale.getDefault()}.
2764 *
2765 * @param format
2766 * A <a href="../util/Formatter.html#syntax">format string</a>
2767 *
2768 * @param args
2769 * Arguments referenced by the format specifiers in the format
2770 * string. If there are more arguments than format specifiers, the
2771 * extra arguments are ignored. The number of arguments is
2772 * variable and may be zero. The maximum number of arguments is
2773 * limited by the maximum dimension of a Java array as defined by
2774 * <cite>The Java™ Virtual Machine Specification</cite>.
2775 * The behaviour on a
2776 * <tt>null</tt> argument depends on the <a
2777 * href="../util/Formatter.html#syntax">conversion</a>.
2778 *
2779 * @throws IllegalFormatException
2780 * If a format string contains an illegal syntax, a format
2781 * specifier that is incompatible with the given arguments,
2782 * insufficient arguments given the format string, or other
2783 * illegal conditions. For specification of all possible
2784 * formatting errors, see the <a
2785 * href="../util/Formatter.html#detail">Details</a> section of the
2786 * formatter class specification.
2787 *
2788 * @throws NullPointerException
2789 * If the <tt>format</tt> is <tt>null</tt>
2790 *
2791 * @return A formatted string
2792 *
2793 * @see java.util.Formatter
2794 * @since 1.5
2795 */
2796 public static String format(String format, Object... args) {
2797 return new Formatter().format(format, args).toString();
2798 }
2799
2800 /**
2801 * Returns a formatted string using the specified locale, format string,
2802 * and arguments.
2803 *
2804 * @param l
2805 * The {@linkplain java.util.Locale locale} to apply during
2806 * formatting. If <tt>l</tt> is <tt>null</tt> then no localization
2807 * is applied.
2808 *
2809 * @param format
2810 * A <a href="../util/Formatter.html#syntax">format string</a>
2811 *
2812 * @param args
2813 * Arguments referenced by the format specifiers in the format
2814 * string. If there are more arguments than format specifiers, the
2815 * extra arguments are ignored. The number of arguments is
2816 * variable and may be zero. The maximum number of arguments is
2820 * <tt>null</tt> argument depends on the <a
2821 * href="../util/Formatter.html#syntax">conversion</a>.
2822 *
2823 * @throws IllegalFormatException
2824 * If a format string contains an illegal syntax, a format
2825 * specifier that is incompatible with the given arguments,
2826 * insufficient arguments given the format string, or other
2827 * illegal conditions. For specification of all possible
2828 * formatting errors, see the <a
2829 * href="../util/Formatter.html#detail">Details</a> section of the
2830 * formatter class specification
2831 *
2832 * @throws NullPointerException
2833 * If the <tt>format</tt> is <tt>null</tt>
2834 *
2835 * @return A formatted string
2836 *
2837 * @see java.util.Formatter
2838 * @since 1.5
2839 */
2840 public static String format(Locale l, String format, Object... args) {
2841 return new Formatter(l).format(format, args).toString();
2842 }
2843
2844 /**
2845 * Returns the string representation of the {@code Object} argument.
2846 *
2847 * @param obj an {@code Object}.
2848 * @return if the argument is {@code null}, then a string equal to
2849 * {@code "null"}; otherwise, the value of
2850 * {@code obj.toString()} is returned.
2851 * @see java.lang.Object#toString()
2852 */
2853 public static String valueOf(Object obj) {
2854 return (obj == null) ? "null" : obj.toString();
2855 }
2856
2857 /**
2858 * Returns the string representation of the {@code char} array
2859 * argument. The contents of the character array are copied; subsequent
2860 * modification of the character array does not affect the newly
2925 *
2926 * @param b a {@code boolean}.
2927 * @return if the argument is {@code true}, a string equal to
2928 * {@code "true"} is returned; otherwise, a string equal to
2929 * {@code "false"} is returned.
2930 */
2931 public static String valueOf(boolean b) {
2932 return b ? "true" : "false";
2933 }
2934
2935 /**
2936 * Returns the string representation of the {@code char}
2937 * argument.
2938 *
2939 * @param c a {@code char}.
2940 * @return a string of length {@code 1} containing
2941 * as its single character the argument {@code c}.
2942 */
2943 public static String valueOf(char c) {
2944 char data[] = {c};
2945 return new String(data, true);
2946 }
2947
2948 /**
2949 * Returns the string representation of the {@code int} argument.
2950 * <p>
2951 * The representation is exactly the one returned by the
2952 * {@code Integer.toString} method of one argument.
2953 *
2954 * @param i an {@code int}.
2955 * @return a string representation of the {@code int} argument.
2956 * @see java.lang.Integer#toString(int, int)
2957 */
2958 public static String valueOf(int i) {
2959 return Integer.toString(i);
2960 }
2961
2962 /**
2963 * Returns the string representation of the {@code long} argument.
2964 * <p>
2965 * The representation is exactly the one returned by the
3008 * class {@code String}.
3009 * <p>
3010 * When the intern method is invoked, if the pool already contains a
3011 * string equal to this {@code String} object as determined by
3012 * the {@link #equals(Object)} method, then the string from the pool is
3013 * returned. Otherwise, this {@code String} object is added to the
3014 * pool and a reference to this {@code String} object is returned.
3015 * <p>
3016 * It follows that for any two strings {@code s} and {@code t},
3017 * {@code s.intern() == t.intern()} is {@code true}
3018 * if and only if {@code s.equals(t)} is {@code true}.
3019 * <p>
3020 * All literal strings and string-valued constant expressions are
3021 * interned. String literals are defined in section 3.10.5 of the
3022 * <cite>The Java™ Language Specification</cite>.
3023 *
3024 * @return a string that has the same contents as this string, but is
3025 * guaranteed to be from a pool of unique strings.
3026 */
3027 public native String intern();
3028 }
|