src/java.base/share/classes/java/lang/String.java

Print this page




  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.nio.charset.Charset;

  31 import java.util.ArrayList;
  32 import java.util.Arrays;
  33 import java.util.Comparator;
  34 import java.util.Formatter;
  35 import java.util.Locale;
  36 import java.util.Objects;
  37 import java.util.StringJoiner;
  38 import java.util.regex.Matcher;
  39 import java.util.regex.Pattern;
  40 import java.util.regex.PatternSyntaxException;
  41 
  42 /**
  43  * The {@code String} class represents character strings. All
  44  * string literals in Java programs, such as {@code "abc"}, are
  45  * implemented as instances of this class.
  46  * <p>
  47  * Strings are constant; their values cannot be changed after they
  48  * are created. String buffers support mutable strings.
  49  * Because String objects are immutable they can be shared. For example:
  50  * <blockquote><pre>


 435      *         The number of bytes to decode
 436      *
 437      * @param  charset
 438      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 439      *         decode the {@code bytes}
 440      *
 441      * @throws  IndexOutOfBoundsException
 442      *          If the {@code offset} and {@code length} arguments index
 443      *          characters outside the bounds of the {@code bytes} array
 444      *
 445      * @since  1.6
 446      */
 447     public String(byte bytes[], int offset, int length, Charset charset) {
 448         if (charset == null)
 449             throw new NullPointerException("charset");
 450         checkBounds(bytes, offset, length);
 451         this.value =  StringCoding.decode(charset, bytes, offset, length);
 452     }
 453 
 454     /**









































































 455      * Constructs a new {@code String} by decoding the specified array of bytes
 456      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
 457      * length of the new {@code String} is a function of the charset, and hence
 458      * may not be equal to the length of the byte array.
 459      *
 460      * <p> The behavior of this constructor when the given bytes are not valid
 461      * in the given charset is unspecified.  The {@link
 462      * java.nio.charset.CharsetDecoder} class should be used when more control
 463      * over the decoding process is required.
 464      *
 465      * @param  bytes
 466      *         The bytes to be decoded into characters
 467      *
 468      * @param  charsetName
 469      *         The name of a supported {@linkplain java.nio.charset.Charset
 470      *         charset}
 471      *
 472      * @throws  UnsupportedEncodingException
 473      *          If the named charset is not supported
 474      *


 908 
 909     /**
 910      * Encodes this {@code String} into a sequence of bytes using the given
 911      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
 912      * new byte array.
 913      *
 914      * <p> This method always replaces malformed-input and unmappable-character
 915      * sequences with this charset's default replacement byte array.  The
 916      * {@link java.nio.charset.CharsetEncoder} class should be used when more
 917      * control over the encoding process is required.
 918      *
 919      * @param  charset
 920      *         The {@linkplain java.nio.charset.Charset} to be used to encode
 921      *         the {@code String}
 922      *
 923      * @return  The resultant byte array
 924      *
 925      * @since  1.6
 926      */
 927     public byte[] getBytes(Charset charset) {
 928         if (charset == null) throw new NullPointerException();
 929         return StringCoding.encode(charset, value, 0, value.length);



















































































































































 930     }
 931 
 932     /**
 933      * Encodes this {@code String} into a sequence of bytes using the
 934      * platform's default charset, storing the result into a new byte array.
 935      *
 936      * <p> The behavior of this method when this string cannot be encoded in
 937      * the default charset is unspecified.  The {@link
 938      * java.nio.charset.CharsetEncoder} class should be used when more control
 939      * over the encoding process is required.
 940      *
 941      * @return  The resultant byte array
 942      *
 943      * @since      1.1
 944      */
 945     public byte[] getBytes() {
 946         return StringCoding.encode(value, 0, value.length);
 947     }
 948 
 949     /**




  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.nio.charset.Charset;
  31 import java.nio.ByteBuffer;
  32 import java.util.ArrayList;
  33 import java.util.Arrays;
  34 import java.util.Comparator;
  35 import java.util.Formatter;
  36 import java.util.Locale;
  37 import java.util.Objects;
  38 import java.util.StringJoiner;
  39 import java.util.regex.Matcher;
  40 import java.util.regex.Pattern;
  41 import java.util.regex.PatternSyntaxException;
  42 
  43 /**
  44  * The {@code String} class represents character strings. All
  45  * string literals in Java programs, such as {@code "abc"}, are
  46  * implemented as instances of this class.
  47  * <p>
  48  * Strings are constant; their values cannot be changed after they
  49  * are created. String buffers support mutable strings.
  50  * Because String objects are immutable they can be shared. For example:
  51  * <blockquote><pre>


 436      *         The number of bytes to decode
 437      *
 438      * @param  charset
 439      *        The {@linkplain java.nio.charset.Charset charset} to be used to
 440      *         decode the {@code bytes}
 441      *
 442      * @throws  IndexOutOfBoundsException
 443      *          If the {@code offset} and {@code length} arguments index
 444      *          characters outside the bounds of the {@code bytes} array
 445      *
 446      * @since  1.6
 447      */
 448     public String(byte bytes[], int offset, int length, Charset charset) {
 449         if (charset == null)
 450             throw new NullPointerException("charset");
 451         checkBounds(bytes, offset, length);
 452         this.value = StringCoding.decode(charset, bytes, offset, length);
 453     }
 454     
 455     /**
 456      * Constructs a new {@code String} by decoding the remaining bytes in the specified 
 457      * {@linkplain java.nio.ByteBuffer byte buffer} using the specified 
 458      * {@linkplain java.nio.charset.Charset charset}.
 459      * The length of the new {@code String} is a function of the charset, and
 460      * hence may not be equal to the remaining number of bytes in the 
 461      * {@linkplain java.nio.ByteBuffer byte buffer}.
 462      *
 463      * <p> This method always replaces malformed-input and unmappable-character
 464      * sequences with this charset's default replacement string.  The {@link
 465      * java.nio.charset.CharsetDecoder} class should be used when more control
 466      * over the decoding process is required.
 467      * 
 468      * <p>
 469      * Bytes are read between <code>position()</code> and <code>limit()</code>
 470      * of the {@link java.nio.ByteBuffer}.
 471      * After the bytes have been read the <code>position()</code> will be
 472      * advanced to its <code>limit()</code>.
 473      *
 474      * @param  bytes
 475      *         The bytes to be decoded into characters
 476      *
 477      * @param  charset
 478      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 479      *         decode the {@code bytes}
 480      *
 481      * @since  1.9
 482      */
 483     public String(ByteBuffer bytes, Charset charset) {
 484         Objects.requireNonNull(bytes);
 485         Objects.requireNonNull(charset);
 486         this.value = StringCoding.decode(charset, bytes);
 487     }
 488 
 489     /**
 490      * Constructs a new {@code String} by decoding the remaining bytes in the specified 
 491      * {@linkplain java.nio.ByteBuffer byte buffer} using the specified 
 492      * charset.
 493      * The length of the new {@code String} is a function of the charset, and
 494      * hence may not be equal to the remaining number of bytes in the 
 495      * {@linkplain java.nio.ByteBuffer byte buffer}.
 496      *
 497      * <p> This method always replaces malformed-input and unmappable-character
 498      * sequences with this charset's default replacement string.  The {@link
 499      * java.nio.charset.CharsetDecoder} class should be used when more control
 500      * over the decoding process is required.
 501      * 
 502      * <p>
 503      * Bytes are read between <code>position()</code> and <code>limit()</code>
 504      * of the {@link java.nio.ByteBuffer}.
 505      * After the bytes have been read the <code>position()</code> will be
 506      * advanced to its <code>limit()</code>.
 507      *
 508      * @param  bytes
 509      *         The bytes to be decoded into characters
 510      *
 511      * @param  charsetName
 512      *         The name of a supported {@linkplain java.nio.charset.Charset
 513      *         charset}
 514      *
 515      * @throws  UnsupportedEncodingException
 516      *          If the named charset is not supported
 517      *
 518      *
 519      * @since  1.9
 520      */
 521     public String(ByteBuffer bytes, String charsetName)
 522             throws UnsupportedEncodingException {
 523         Objects.requireNonNull(bytes);
 524         Objects.requireNonNull(charsetName);
 525         this.value = StringCoding.decode(charsetName, bytes);
 526     }
 527 
 528     /**
 529      * Constructs a new {@code String} by decoding the specified array of bytes
 530      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
 531      * length of the new {@code String} is a function of the charset, and hence
 532      * may not be equal to the length of the byte array.
 533      *
 534      * <p> The behavior of this constructor when the given bytes are not valid
 535      * in the given charset is unspecified.  The {@link
 536      * java.nio.charset.CharsetDecoder} class should be used when more control
 537      * over the decoding process is required.
 538      *
 539      * @param  bytes
 540      *         The bytes to be decoded into characters
 541      *
 542      * @param  charsetName
 543      *         The name of a supported {@linkplain java.nio.charset.Charset
 544      *         charset}
 545      *
 546      * @throws  UnsupportedEncodingException
 547      *          If the named charset is not supported
 548      *


 982 
 983     /**
 984      * Encodes this {@code String} into a sequence of bytes using the given
 985      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
 986      * new byte array.
 987      *
 988      * <p> This method always replaces malformed-input and unmappable-character
 989      * sequences with this charset's default replacement byte array.  The
 990      * {@link java.nio.charset.CharsetEncoder} class should be used when more
 991      * control over the encoding process is required.
 992      *
 993      * @param  charset
 994      *         The {@linkplain java.nio.charset.Charset} to be used to encode
 995      *         the {@code String}
 996      *
 997      * @return  The resultant byte array
 998      *
 999      * @since  1.6
1000      */
1001     public byte[] getBytes(Charset charset) {
1002         Objects.requireNonNull(charset);
1003         return StringCoding.encode(charset, value, 0, value.length);
1004     }
1005     
1006     /**
1007      * Encodes this {@code String} into a sequence of bytes using the given
1008      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
1009      * byte array that has been passed as an argument.
1010      *
1011      * <p> This method always replaces malformed-input and unmappable-character
1012      * sequences with this charset's default replacement byte array.  The {@link
1013      * java.nio.charset.CharsetDecoder} class should be used when more control
1014      * over the decoding process is required.
1015      * 
1016      * <p> The number of bytes written during encoding is a function of the charset
1017      * used to perform the encoding and is returned from this method. It may not
1018      * be equal to the length of this
1019      * String. At most <code>destBuffer.length - destOffset</code> bytes will be written.
1020      *
1021      * @param  destBuffer
1022      *         The destination array
1023      *
1024      * @param  destOffset
1025      *         The start offset in the destination array
1026      *
1027      * @param  charset
1028      *         The {@linkplain java.nio.charset.Charset} to be used to encode
1029      *         the {@code String}
1030      *
1031      * @return the number of bytes written
1032      *
1033      * @since  1.9
1034      */
1035     public int getBytes(byte[] destBuffer, int destOffset, Charset charset) {
1036         Objects.requireNonNull(destBuffer);
1037         Objects.requireNonNull(charset);
1038         return StringCoding.encode(charset, value, 0, value.length, destBuffer, destOffset);
1039     }
1040 
1041     /**
1042      * Encodes as many characters as possible from this {@code String} into a 
1043      * sequence of bytes using the given
1044      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
1045      * {@linkplain java.nio.ByteBuffer byte buffer} that has been passed as an argument.
1046      *
1047      * <p> The number of bytes written during encoding is a function of the charset
1048      * used to perform the encoding and is returned from this method. It may not
1049      * be equal to the length of this
1050      * String. At most <code>destBuffer.remaining()</code> bytes will be written.
1051      *
1052      * <p> This method always replaces malformed-input and unmappable-character
1053      * sequences with this charset's default replacement byte array.  The {@link
1054      * java.nio.charset.CharsetDecoder} class should be used when more control
1055      * over the decoding process is required.
1056      * 
1057      * <p> The buffer's position will be advanced to reflect the number of
1058      * the bytes written, but its mark and limit will not be modified.
1059      *
1060      * @param  destBuffer
1061      *         The destination {@linkplain java.nio.ByteBuffer} 
1062      *
1063      * @param  charset
1064      *         The {@linkplain java.nio.charset.Charset} to be used to encode
1065      *         the {@code String}
1066      *
1067      * @return the number of bytes written
1068      *
1069      * @since  1.9
1070      */
1071     public int getBytes(ByteBuffer destBuffer, Charset charset) {
1072         Objects.requireNonNull(destBuffer);
1073         Objects.requireNonNull(charset);
1074         return StringCoding.encode(charset, value, 0, value.length, destBuffer);
1075     }
1076 
1077     /**
1078      * Encodes as many characters as possible from this {@code String} into a
1079      * sequence of bytes using the specified charset, storing the result into a
1080      * {@linkplain java.nio.ByteBuffer byte buffer} that has been passed as an argument.
1081      *
1082      * <p> The number of bytes written during encoding is a function of the charset
1083      * used to perform the encoding and is returned from this method. It may not
1084      * be equal to the length of this
1085      * String. At most <code>destBuffer.remaining()</code> bytes will be written.
1086      *
1087      * <p> This method always replaces malformed-input and unmappable-character
1088      * sequences with this charset's default replacement byte array.  The {@link
1089      * java.nio.charset.CharsetDecoder} class should be used when more control
1090      * over the decoding process is required.
1091      *
1092      * <p> The buffer's position will be advanced to reflect the number of
1093      * the bytes written, but its mark and limit will not be modified.
1094      *
1095      * @param  destBuffer
1096      *         The destination {@linkplain java.nio.ByteBuffer}
1097      *
1098      * @param  charsetName
1099      *         The name of a supported {@linkplain java.nio.charset.Charset
1100      *         charset}
1101      *
1102      * @return the number of bytes written
1103      *
1104      * @throws  UnsupportedEncodingException
1105      *          If the named charset is not supported
1106      *
1107      * @since  1.9
1108      */
1109     public int getBytes(ByteBuffer destBuffer, String charsetName) throws UnsupportedEncodingException {
1110         Objects.requireNonNull(destBuffer);
1111         Objects.requireNonNull(charsetName);
1112         return StringCoding.encode(charsetName, value, 0, value.length, destBuffer);
1113     }
1114 
1115     /**
1116      * Encodes as many characters as possible from this {@code String} into a
1117      * sequence of bytes using the specified charset, storing the result into a
1118      * byte array that has been passed as an argument.
1119      *
1120      * <p> The number of bytes written during encoding is a function of the charset
1121      * used to perform the encoding and is returned from this method. It may not
1122      * be equal to the length of this
1123      * String. At most <code>destBuffer.length - destOffset</code> bytes will be written.
1124      *
1125      * <p> This method always replaces malformed-input and unmappable-character
1126      * sequences with this charset's default replacement byte array.  The {@link
1127      * java.nio.charset.CharsetDecoder} class should be used when more control
1128      * over the decoding process is required.
1129      *
1130      * @param  destBuffer
1131      *         The destination array
1132      *
1133      * @param  destOffset
1134      *         The start offset in the destination array
1135      *
1136      * @param  charsetName
1137      *         The name of a supported {@linkplain java.nio.charset.Charset
1138      *         charset}
1139      *
1140      * @return the number of bytes written
1141      *
1142      * @throws  UnsupportedEncodingException
1143      *          If the named charset is not supported
1144      *
1145      * @since  1.9
1146      */
1147     public int getBytes(byte[] destBuffer, int destOffset, String charsetName) throws UnsupportedEncodingException {
1148         Objects.requireNonNull(destBuffer);
1149         Objects.requireNonNull(charsetName);
1150         return StringCoding.encode(charsetName, value, 0, value.length, destBuffer, destOffset);
1151     }
1152 
1153     /**
1154      * Encodes this {@code String} into a sequence of bytes using the
1155      * platform's default charset, storing the result into a new byte array.
1156      *
1157      * <p> The behavior of this method when this string cannot be encoded in
1158      * the default charset is unspecified.  The {@link
1159      * java.nio.charset.CharsetEncoder} class should be used when more control
1160      * over the encoding process is required.
1161      *
1162      * @return  The resultant byte array
1163      *
1164      * @since      1.1
1165      */
1166     public byte[] getBytes() {
1167         return StringCoding.encode(value, 0, value.length);
1168     }
1169 
1170     /**