src/java.base/share/classes/java/lang/String.java

Print this page




  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.nio.charset.Charset;

  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.Comparator;
  35 import java.util.Formatter;
  36 import java.util.Locale;
  37 import java.util.Objects;
  38 import java.util.Spliterator;
  39 import java.util.StringJoiner;
  40 import java.util.regex.Matcher;
  41 import java.util.regex.Pattern;
  42 import java.util.regex.PatternSyntaxException;
  43 import java.util.stream.IntStream;
  44 import java.util.stream.StreamSupport;
  45 import jdk.internal.HotSpotIntrinsicCandidate;
  46 import jdk.internal.vm.annotation.Stable;
  47 
  48 /**
  49  * The {@code String} class represents character strings. All
  50  * string literals in Java programs, such as {@code "abc"}, are
  51  * implemented as instances of this class.


 445      *         The number of bytes to decode
 446 
 447      * @param  charsetName
 448      *         The name of a supported {@linkplain java.nio.charset.Charset
 449      *         charset}
 450      *
 451      * @throws  UnsupportedEncodingException
 452      *          If the named charset is not supported
 453      *
 454      * @throws  IndexOutOfBoundsException
 455      *          If {@code offset} is negative, {@code length} is negative, or
 456      *          {@code offset} is greater than {@code bytes.length - length}
 457      *
 458      * @since  1.1
 459      */
 460     public String(byte bytes[], int offset, int length, String charsetName)
 461             throws UnsupportedEncodingException {
 462         if (charsetName == null)
 463             throw new NullPointerException("charsetName");
 464         checkBoundsOffCount(offset, length, bytes.length);
 465         StringCoding.Result ret =
 466             StringCoding.decode(charsetName, bytes, offset, length);
 467         this.value = ret.value;
 468         this.coder = ret.coder;
 469     }
 470 
 471     /**
 472      * Constructs a new {@code String} by decoding the specified subarray of
 473      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
 474      * The length of the new {@code String} is a function of the charset, and
 475      * hence may not be equal to the length of the subarray.
 476      *
 477      * <p> This method always replaces malformed-input and unmappable-character
 478      * sequences with this charset's default replacement string.  The {@link
 479      * java.nio.charset.CharsetDecoder} class should be used when more control
 480      * over the decoding process is required.
 481      *
 482      * @param  bytes
 483      *         The bytes to be decoded into characters
 484      *
 485      * @param  offset
 486      *         The index of the first byte to decode
 487      *
 488      * @param  length
 489      *         The number of bytes to decode
 490      *
 491      * @param  charset
 492      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 493      *         decode the {@code bytes}
 494      *
 495      * @throws  IndexOutOfBoundsException
 496      *          If {@code offset} is negative, {@code length} is negative, or
 497      *          {@code offset} is greater than {@code bytes.length - length}
 498      *
 499      * @since  1.6
 500      */
 501     public String(byte bytes[], int offset, int length, Charset charset) {
 502         if (charset == null)
 503             throw new NullPointerException("charset");
 504         checkBoundsOffCount(offset, length, bytes.length);
 505         StringCoding.Result ret =
 506             StringCoding.decode(charset, bytes, offset, length);




































 507         this.value = ret.value;
 508         this.coder = ret.coder;
 509     }
 510 
 511     /**
 512      * Constructs a new {@code String} by decoding the specified array of bytes
 513      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
 514      * length of the new {@code String} is a function of the charset, and hence
 515      * may not be equal to the length of the byte array.
 516      *
 517      * <p> The behavior of this constructor when the given bytes are not valid
 518      * in the given charset is unspecified.  The {@link
 519      * java.nio.charset.CharsetDecoder} class should be used when more control
 520      * over the decoding process is required.
 521      *
 522      * @param  bytes
 523      *         The bytes to be decoded into characters
 524      *
 525      * @param  charsetName
 526      *         The name of a supported {@linkplain java.nio.charset.Charset


 571      * java.nio.charset.CharsetDecoder} class should be used when more control
 572      * over the decoding process is required.
 573      *
 574      * @param  bytes
 575      *         The bytes to be decoded into characters
 576      *
 577      * @param  offset
 578      *         The index of the first byte to decode
 579      *
 580      * @param  length
 581      *         The number of bytes to decode
 582      *
 583      * @throws  IndexOutOfBoundsException
 584      *          If {@code offset} is negative, {@code length} is negative, or
 585      *          {@code offset} is greater than {@code bytes.length - length}
 586      *
 587      * @since  1.1
 588      */
 589     public String(byte bytes[], int offset, int length) {
 590         checkBoundsOffCount(offset, length, bytes.length);
 591         StringCoding.Result ret = StringCoding.decode(bytes, offset, length);
 592         this.value = ret.value;
 593         this.coder = ret.coder;
 594     }
 595 
 596     /**
 597      * Constructs a new {@code String} by decoding the specified array of bytes
 598      * using the platform's default charset.  The length of the new {@code
 599      * String} is a function of the charset, and hence may not be equal to the
 600      * length of the byte array.
 601      *
 602      * <p> The behavior of this constructor when the given bytes are not valid
 603      * in the default charset is unspecified.  The {@link
 604      * java.nio.charset.CharsetDecoder} class should be used when more control
 605      * over the decoding process is required.
 606      *
 607      * @param  bytes
 608      *         The bytes to be decoded into characters
 609      *
 610      * @since  1.1
 611      */


 920      *
 921      * <p> The behavior of this method when this string cannot be encoded in
 922      * the given charset is unspecified.  The {@link
 923      * java.nio.charset.CharsetEncoder} class should be used when more control
 924      * over the encoding process is required.
 925      *
 926      * @param  charsetName
 927      *         The name of a supported {@linkplain java.nio.charset.Charset
 928      *         charset}
 929      *
 930      * @return  The resultant byte array
 931      *
 932      * @throws  UnsupportedEncodingException
 933      *          If the named charset is not supported
 934      *
 935      * @since  1.1
 936      */
 937     public byte[] getBytes(String charsetName)
 938             throws UnsupportedEncodingException {
 939         if (charsetName == null) throw new NullPointerException();
 940         return StringCoding.encode(charsetName, coder(), value);
 941     }
 942 
 943     /**
 944      * Encodes this {@code String} into a sequence of bytes using the given
 945      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
 946      * new byte array.
 947      *
 948      * <p> This method always replaces malformed-input and unmappable-character
 949      * sequences with this charset's default replacement byte array.  The
 950      * {@link java.nio.charset.CharsetEncoder} class should be used when more
 951      * control over the encoding process is required.
 952      *
 953      * @param  charset
 954      *         The {@linkplain java.nio.charset.Charset} to be used to encode
 955      *         the {@code String}
 956      *
 957      * @return  The resultant byte array
 958      *
 959      * @since  1.6
 960      */
 961     public byte[] getBytes(Charset charset) {
 962         if (charset == null) throw new NullPointerException();
 963         return StringCoding.encode(charset, coder(), value);





































 964      }
 965 
 966     /**
 967      * Encodes this {@code String} into a sequence of bytes using the
 968      * platform's default charset, storing the result into a new byte array.
 969      *
 970      * <p> The behavior of this method when this string cannot be encoded in
 971      * the default charset is unspecified.  The {@link
 972      * java.nio.charset.CharsetEncoder} class should be used when more control
 973      * over the encoding process is required.
 974      *
 975      * @return  The resultant byte array
 976      *
 977      * @since      1.1
 978      */
 979     public byte[] getBytes() {
 980         return StringCoding.encode(coder(), value);
 981     }
 982 
 983     /**
 984      * Compares this string to the specified object.  The result is {@code
 985      * true} if and only if the argument is not {@code null} and is a {@code
 986      * String} object that represents the same sequence of characters as this
 987      * object.
 988      *
 989      * <p>For finer-grained String comparison, refer to
 990      * {@link java.text.Collator}.
 991      *
 992      * @param  anObject
 993      *         The object to compare this {@code String} against
 994      *
 995      * @return  {@code true} if the given object represents a {@code String}
 996      *          equivalent to this string, {@code false} otherwise
 997      *
 998      * @see  #compareTo(String)
 999      * @see  #equalsIgnoreCase(String)
1000      */




  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.nio.charset.Charset;
  32 import java.nio.ByteBuffer;
  33 import java.util.ArrayList;
  34 import java.util.Arrays;
  35 import java.util.Comparator;
  36 import java.util.Formatter;
  37 import java.util.Locale;
  38 import java.util.Objects;
  39 import java.util.Spliterator;
  40 import java.util.StringJoiner;
  41 import java.util.regex.Matcher;
  42 import java.util.regex.Pattern;
  43 import java.util.regex.PatternSyntaxException;
  44 import java.util.stream.IntStream;
  45 import java.util.stream.StreamSupport;
  46 import jdk.internal.HotSpotIntrinsicCandidate;
  47 import jdk.internal.vm.annotation.Stable;
  48 
  49 /**
  50  * The {@code String} class represents character strings. All
  51  * string literals in Java programs, such as {@code "abc"}, are
  52  * implemented as instances of this class.


 446      *         The number of bytes to decode
 447 
 448      * @param  charsetName
 449      *         The name of a supported {@linkplain java.nio.charset.Charset
 450      *         charset}
 451      *
 452      * @throws  UnsupportedEncodingException
 453      *          If the named charset is not supported
 454      *
 455      * @throws  IndexOutOfBoundsException
 456      *          If {@code offset} is negative, {@code length} is negative, or
 457      *          {@code offset} is greater than {@code bytes.length - length}
 458      *
 459      * @since  1.1
 460      */
 461     public String(byte bytes[], int offset, int length, String charsetName)
 462             throws UnsupportedEncodingException {
 463         if (charsetName == null)
 464             throw new NullPointerException("charsetName");
 465         checkBoundsOffCount(offset, length, bytes.length);
 466         StringCoder.Result ret =
 467             StringCoder.decode(charsetName, bytes, offset, length);
 468         this.value = ret.value;
 469         this.coder = ret.coder;
 470     }
 471 
 472     /**
 473      * Constructs a new {@code String} by decoding the specified subarray of
 474      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
 475      * The length of the new {@code String} is a function of the charset, and
 476      * hence may not be equal to the length of the subarray.
 477      *
 478      * <p> This method always replaces malformed-input and unmappable-character
 479      * sequences with this charset's default replacement string.  The {@link
 480      * java.nio.charset.CharsetDecoder} class should be used when more control
 481      * over the decoding process is required.
 482      *
 483      * @param  bytes
 484      *         The bytes to be decoded into characters
 485      *
 486      * @param  offset
 487      *         The index of the first byte to decode
 488      *
 489      * @param  length
 490      *         The number of bytes to decode
 491      *
 492      * @param  charset
 493      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 494      *         decode the {@code bytes}
 495      *
 496      * @throws  IndexOutOfBoundsException
 497      *          If {@code offset} is negative, {@code length} is negative, or
 498      *          {@code offset} is greater than {@code bytes.length - length}
 499      *
 500      * @since  1.6
 501      */
 502     public String(byte bytes[], int offset, int length, Charset charset) {
 503         if (charset == null)
 504             throw new NullPointerException("charset");
 505         checkBoundsOffCount(offset, length, bytes.length);
 506         StringCoder.Result ret =
 507             StringCoder.decode(charset, bytes, offset, length);
 508         this.value = ret.value;
 509         this.coder = ret.coder;
 510     }
 511 
 512     /**
 513      * Constructs a new {@code String} by decoding the remaining bytes in the specified 
 514      * {@linkplain java.nio.ByteBuffer byte buffer} using the specified 
 515      * {@linkplain java.nio.charset.Charset charset}.
 516      * The length of the new {@code String} is a function of the charset, and
 517      * hence may not be equal to the remaining number of bytes in the 
 518      * {@linkplain java.nio.ByteBuffer byte buffer}.
 519      *
 520      * <p> This method always replaces malformed-input and unmappable-character
 521      * sequences with this charset's default replacement string.  The {@link
 522      * java.nio.charset.CharsetDecoder} class should be used when more control
 523      * over the decoding process is required.
 524      * 
 525      * <p>
 526      * Bytes are read between <code>position()</code> and <code>limit()</code>
 527      * of the {@link java.nio.ByteBuffer}.
 528      * After the bytes have been read the <code>position()</code> will be
 529      * updated.
 530      *
 531      * @param  src
 532      *         The source of bytes to be decoded into characters
 533      *
 534      * @param  charset
 535      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 536      *         decode the {@code src}
 537      *
 538      * @since  11
 539      */
 540     public String(ByteBuffer src, Charset charset) {
 541         Objects.requireNonNull(src);
 542         Objects.requireNonNull(charset);
 543         StringCoder.Result ret = StringCoder.decode(charset, src);
 544         this.value = ret.value;
 545         this.coder = ret.coder;
 546     }
 547 
 548     /**
 549      * Constructs a new {@code String} by decoding the specified array of bytes
 550      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
 551      * length of the new {@code String} is a function of the charset, and hence
 552      * may not be equal to the length of the byte array.
 553      *
 554      * <p> The behavior of this constructor when the given bytes are not valid
 555      * in the given charset is unspecified.  The {@link
 556      * java.nio.charset.CharsetDecoder} class should be used when more control
 557      * over the decoding process is required.
 558      *
 559      * @param  bytes
 560      *         The bytes to be decoded into characters
 561      *
 562      * @param  charsetName
 563      *         The name of a supported {@linkplain java.nio.charset.Charset


 608      * java.nio.charset.CharsetDecoder} class should be used when more control
 609      * over the decoding process is required.
 610      *
 611      * @param  bytes
 612      *         The bytes to be decoded into characters
 613      *
 614      * @param  offset
 615      *         The index of the first byte to decode
 616      *
 617      * @param  length
 618      *         The number of bytes to decode
 619      *
 620      * @throws  IndexOutOfBoundsException
 621      *          If {@code offset} is negative, {@code length} is negative, or
 622      *          {@code offset} is greater than {@code bytes.length - length}
 623      *
 624      * @since  1.1
 625      */
 626     public String(byte bytes[], int offset, int length) {
 627         checkBoundsOffCount(offset, length, bytes.length);
 628         StringCoder.Result ret = StringCoder.decode(bytes, offset, length);
 629         this.value = ret.value;
 630         this.coder = ret.coder;
 631     }
 632 
 633     /**
 634      * Constructs a new {@code String} by decoding the specified array of bytes
 635      * using the platform's default charset.  The length of the new {@code
 636      * String} is a function of the charset, and hence may not be equal to the
 637      * length of the byte array.
 638      *
 639      * <p> The behavior of this constructor when the given bytes are not valid
 640      * in the default charset is unspecified.  The {@link
 641      * java.nio.charset.CharsetDecoder} class should be used when more control
 642      * over the decoding process is required.
 643      *
 644      * @param  bytes
 645      *         The bytes to be decoded into characters
 646      *
 647      * @since  1.1
 648      */


 957      *
 958      * <p> The behavior of this method when this string cannot be encoded in
 959      * the given charset is unspecified.  The {@link
 960      * java.nio.charset.CharsetEncoder} class should be used when more control
 961      * over the encoding process is required.
 962      *
 963      * @param  charsetName
 964      *         The name of a supported {@linkplain java.nio.charset.Charset
 965      *         charset}
 966      *
 967      * @return  The resultant byte array
 968      *
 969      * @throws  UnsupportedEncodingException
 970      *          If the named charset is not supported
 971      *
 972      * @since  1.1
 973      */
 974     public byte[] getBytes(String charsetName)
 975             throws UnsupportedEncodingException {
 976         if (charsetName == null) throw new NullPointerException();
 977         return StringCoder.encode(charsetName, coder(), value);
 978     }
 979 
 980     /**
 981      * Encodes this {@code String} into a sequence of bytes using the given
 982      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
 983      * new byte array.
 984      *
 985      * <p> This method always replaces malformed-input and unmappable-character
 986      * sequences with this charset's default replacement byte array.  The
 987      * {@link java.nio.charset.CharsetEncoder} class should be used when more
 988      * control over the encoding process is required.
 989      *
 990      * @param  charset
 991      *         The {@linkplain java.nio.charset.Charset} to be used to encode
 992      *         the {@code String}
 993      *
 994      * @return  The resultant byte array
 995      *
 996      * @since  1.6
 997      */
 998     public byte[] getBytes(Charset charset) {
 999         if (charset == null) throw new NullPointerException();
1000         return StringCoder.encode(charset, coder(), value);
1001      }
1002 
1003     /**
1004      * Encodes characters from this string into a sequence of bytes using the given
1005      * {@linkplain java.nio.charset.Charset charset}, storing the result into the
1006      * given destination {@linkplain java.nio.ByteBuffer byte buffer}.
1007      *
1008      * <p> The first character to be encoded is at index {@code srcBegin};
1009      * the last character to be encoded is at index {@code srcEnd-1}. At most
1010      * {@code srcEnd-srcBegin}) of characters will be encoded, and at most
1011      * {@link java.nio.Buffer#remaining  dstBuffer.remaining()} bytes will be written.
1012      * The position of the {@code dstBuffer} will be advanced to reflect the bytes
1013      * written, but its mark and limit will not be modified.
1014      *
1015      * <p> This method always replaces malformed-input and unmappable-character
1016      * sequences with this charset's default replacement string.  The {@link
1017      * java.nio.charset.CharsetDecoder} class should be used when more control
1018      * over the decoding process is required.
1019      * 
1020      * @param  srcBegin
1021      *         index of the first character in the string to encode
1022      * @param  srcEnd
1023      *         index after the last character in the string to encode
1024      * @param  dstBuffer
1025      *         The destination {@linkplain java.nio.ByteBuffer} 
1026      * @param  charset
1027      *         The {@linkplain java.nio.charset.Charset} to be used to encode
1028      *         the {@code String}
1029      * @return the number of characters encoded into the {code dstBuffer};
1030      *
1031      * @since  11
1032      */
1033     public int getBytes(int srcBegin, int srcEnd, ByteBuffer dstBuffer, Charset charset) {
1034         Objects.requireNonNull(dstBuffer);
1035         Objects.requireNonNull(charset);
1036         checkBoundsBeginEnd(srcBegin, srcEnd, length());
1037         return StringCoder.encode(charset, coder(), value, srcBegin, srcEnd, dstBuffer);
1038     }
1039 
1040     /**
1041      * Encodes this {@code String} into a sequence of bytes using the
1042      * platform's default charset, storing the result into a new byte array.
1043      *
1044      * <p> The behavior of this method when this string cannot be encoded in
1045      * the default charset is unspecified.  The {@link
1046      * java.nio.charset.CharsetEncoder} class should be used when more control
1047      * over the encoding process is required.
1048      *
1049      * @return  The resultant byte array
1050      *
1051      * @since      1.1
1052      */
1053     public byte[] getBytes() {
1054         return StringCoder.encode(coder(), value);
1055     }
1056 
1057     /**
1058      * Compares this string to the specified object.  The result is {@code
1059      * true} if and only if the argument is not {@code null} and is a {@code
1060      * String} object that represents the same sequence of characters as this
1061      * object.
1062      *
1063      * <p>For finer-grained String comparison, refer to
1064      * {@link java.text.Collator}.
1065      *
1066      * @param  anObject
1067      *         The object to compare this {@code String} against
1068      *
1069      * @return  {@code true} if the given object represents a {@code String}
1070      *          equivalent to this string, {@code false} otherwise
1071      *
1072      * @see  #compareTo(String)
1073      * @see  #equalsIgnoreCase(String)
1074      */