1 /*
   2  * Copyright (c) 1994, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.lang.invoke.MethodHandles;
  32 import java.lang.constant.Constable;
  33 import java.lang.constant.ConstantDesc;
  34 import java.nio.charset.Charset;
  35 import java.util.ArrayList;
  36 import java.util.Arrays;
  37 import java.util.Comparator;
  38 import java.util.Formatter;
  39 import java.util.List;
  40 import java.util.Locale;
  41 import java.util.Objects;
  42 import java.util.Optional;
  43 import java.util.Spliterator;
  44 import java.util.StringJoiner;
  45 import java.util.function.Function;
  46 import java.util.regex.Matcher;
  47 import java.util.regex.Pattern;
  48 import java.util.regex.PatternSyntaxException;
  49 import java.util.stream.Collectors;
  50 import java.util.stream.IntStream;
  51 import java.util.stream.Stream;
  52 import java.util.stream.StreamSupport;
  53 import jdk.internal.HotSpotIntrinsicCandidate;
  54 import jdk.internal.vm.annotation.Stable;
  55 
  56 import static java.util.function.Predicate.not;
  57 
  58 /**
  59  * The {@code String} class represents character strings. All
  60  * string literals in Java programs, such as {@code "abc"}, are
  61  * implemented as instances of this class.
  62  * <p>
  63  * Strings are constant; their values cannot be changed after they
  64  * are created. String buffers support mutable strings.
  65  * Because String objects are immutable they can be shared. For example:
  66  * <blockquote><pre>
  67  *     String str = "abc";
  68  * </pre></blockquote><p>
  69  * is equivalent to:
  70  * <blockquote><pre>
  71  *     char data[] = {'a', 'b', 'c'};
  72  *     String str = new String(data);
  73  * </pre></blockquote><p>
  74  * Here are some more examples of how strings can be used:
  75  * <blockquote><pre>
  76  *     System.out.println("abc");
  77  *     String cde = "cde";
  78  *     System.out.println("abc" + cde);
  79  *     String c = "abc".substring(2, 3);
  80  *     String d = cde.substring(1, 2);
  81  * </pre></blockquote>
  82  * <p>
  83  * The class {@code String} includes methods for examining
  84  * individual characters of the sequence, for comparing strings, for
  85  * searching strings, for extracting substrings, and for creating a
  86  * copy of a string with all characters translated to uppercase or to
  87  * lowercase. Case mapping is based on the Unicode Standard version
  88  * specified by the {@link java.lang.Character Character} class.
  89  * <p>
  90  * The Java language provides special support for the string
  91  * concatenation operator (&nbsp;+&nbsp;), and for conversion of
  92  * other objects to strings. For additional information on string
  93  * concatenation and conversion, see <i>The Java&trade; Language Specification</i>.
  94  *
  95  * <p> Unless otherwise noted, passing a {@code null} argument to a constructor
  96  * or method in this class will cause a {@link NullPointerException} to be
  97  * thrown.
  98  *
  99  * <p>A {@code String} represents a string in the UTF-16 format
 100  * in which <em>supplementary characters</em> are represented by <em>surrogate
 101  * pairs</em> (see the section <a href="Character.html#unicode">Unicode
 102  * Character Representations</a> in the {@code Character} class for
 103  * more information).
 104  * Index values refer to {@code char} code units, so a supplementary
 105  * character uses two positions in a {@code String}.
 106  * <p>The {@code String} class provides methods for dealing with
 107  * Unicode code points (i.e., characters), in addition to those for
 108  * dealing with Unicode code units (i.e., {@code char} values).
 109  *
 110  * <p>Unless otherwise noted, methods for comparing Strings do not take locale
 111  * into account.  The {@link java.text.Collator} class provides methods for
 112  * finer-grain, locale-sensitive String comparison.
 113  *
 114  * @implNote The implementation of the string concatenation operator is left to
 115  * the discretion of a Java compiler, as long as the compiler ultimately conforms
 116  * to <i>The Java&trade; Language Specification</i>. For example, the {@code javac} compiler
 117  * may implement the operator with {@code StringBuffer}, {@code StringBuilder},
 118  * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The
 119  * implementation of string conversion is typically through the method {@code toString},
 120  * defined by {@code Object} and inherited by all classes in Java.
 121  *
 122  * @author  Lee Boynton
 123  * @author  Arthur van Hoff
 124  * @author  Martin Buchholz
 125  * @author  Ulf Zibis
 126  * @see     java.lang.Object#toString()
 127  * @see     java.lang.StringBuffer
 128  * @see     java.lang.StringBuilder
 129  * @see     java.nio.charset.Charset
 130  * @since   1.0
 131  * @jls     15.18.1 String Concatenation Operator +
 132  */
 133 
 134 public final class String
 135     implements java.io.Serializable, Comparable<String>, CharSequence,
 136                Constable, ConstantDesc {
 137 
 138     /**
 139      * The value is used for character storage.
 140      *
 141      * @implNote This field is trusted by the VM, and is a subject to
 142      * constant folding if String instance is constant. Overwriting this
 143      * field after construction will cause problems.
 144      *
 145      * Additionally, it is marked with {@link Stable} to trust the contents
 146      * of the array. No other facility in JDK provides this functionality (yet).
 147      * {@link Stable} is safe here, because value is never null.
 148      */
 149     @Stable
 150     private final byte[] value;
 151 
 152     /**
 153      * The identifier of the encoding used to encode the bytes in
 154      * {@code value}. The supported values in this implementation are
 155      *
 156      * LATIN1
 157      * UTF16
 158      *
 159      * @implNote This field is trusted by the VM, and is a subject to
 160      * constant folding if String instance is constant. Overwriting this
 161      * field after construction will cause problems.
 162      */
 163     private final byte coder;
 164 
 165     /** Cache the hash code for the string */
 166     private int hash; // Default to 0
 167 
 168     /**
 169      * Cache if the hash has been calculated as actually being zero, enabling
 170      * us to avoid recalculating this.
 171      */
 172     private boolean hashIsZero; // Default to false;
 173 
 174     /** use serialVersionUID from JDK 1.0.2 for interoperability */
 175     @java.io.Serial
 176     private static final long serialVersionUID = -6849794470754667710L;
 177 
 178     /**
 179      * If String compaction is disabled, the bytes in {@code value} are
 180      * always encoded in UTF16.
 181      *
 182      * For methods with several possible implementation paths, when String
 183      * compaction is disabled, only one code path is taken.
 184      *
 185      * The instance field value is generally opaque to optimizing JIT
 186      * compilers. Therefore, in performance-sensitive place, an explicit
 187      * check of the static boolean {@code COMPACT_STRINGS} is done first
 188      * before checking the {@code coder} field since the static boolean
 189      * {@code COMPACT_STRINGS} would be constant folded away by an
 190      * optimizing JIT compiler. The idioms for these cases are as follows.
 191      *
 192      * For code such as:
 193      *
 194      *    if (coder == LATIN1) { ... }
 195      *
 196      * can be written more optimally as
 197      *
 198      *    if (coder() == LATIN1) { ... }
 199      *
 200      * or:
 201      *
 202      *    if (COMPACT_STRINGS && coder == LATIN1) { ... }
 203      *
 204      * An optimizing JIT compiler can fold the above conditional as:
 205      *
 206      *    COMPACT_STRINGS == true  => if (coder == LATIN1) { ... }
 207      *    COMPACT_STRINGS == false => if (false)           { ... }
 208      *
 209      * @implNote
 210      * The actual value for this field is injected by JVM. The static
 211      * initialization block is used to set the value here to communicate
 212      * that this static final field is not statically foldable, and to
 213      * avoid any possible circular dependency during vm initialization.
 214      */
 215     static final boolean COMPACT_STRINGS;
 216 
 217     static {
 218         COMPACT_STRINGS = true;
 219     }
 220 
 221     /**
 222      * Class String is special cased within the Serialization Stream Protocol.
 223      *
 224      * A String instance is written into an ObjectOutputStream according to
 225      * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements">
 226      * Object Serialization Specification, Section 6.2, "Stream Elements"</a>
 227      */
 228     @java.io.Serial
 229     private static final ObjectStreamField[] serialPersistentFields =
 230         new ObjectStreamField[0];
 231 
 232     /**
 233      * Initializes a newly created {@code String} object so that it represents
 234      * an empty character sequence.  Note that use of this constructor is
 235      * unnecessary since Strings are immutable.
 236      */
 237     public String() {
 238         this.value = "".value;
 239         this.coder = "".coder;
 240     }
 241 
 242     /**
 243      * Initializes a newly created {@code String} object so that it represents
 244      * the same sequence of characters as the argument; in other words, the
 245      * newly created string is a copy of the argument string. Unless an
 246      * explicit copy of {@code original} is needed, use of this constructor is
 247      * unnecessary since Strings are immutable.
 248      *
 249      * @param  original
 250      *         A {@code String}
 251      */
 252     @HotSpotIntrinsicCandidate
 253     public String(String original) {
 254         this.value = original.value;
 255         this.coder = original.coder;
 256         this.hash = original.hash;
 257     }
 258 
 259     /**
 260      * Allocates a new {@code String} so that it represents the sequence of
 261      * characters currently contained in the character array argument. The
 262      * contents of the character array are copied; subsequent modification of
 263      * the character array does not affect the newly created string.
 264      *
 265      * @param  value
 266      *         The initial value of the string
 267      */
 268     public String(char value[]) {
 269         this(value, 0, value.length, null);
 270     }
 271 
 272     /**
 273      * Allocates a new {@code String} that contains characters from a subarray
 274      * of the character array argument. The {@code offset} argument is the
 275      * index of the first character of the subarray and the {@code count}
 276      * argument specifies the length of the subarray. The contents of the
 277      * subarray are copied; subsequent modification of the character array does
 278      * not affect the newly created string.
 279      *
 280      * @param  value
 281      *         Array that is the source of characters
 282      *
 283      * @param  offset
 284      *         The initial offset
 285      *
 286      * @param  count
 287      *         The length
 288      *
 289      * @throws  IndexOutOfBoundsException
 290      *          If {@code offset} is negative, {@code count} is negative, or
 291      *          {@code offset} is greater than {@code value.length - count}
 292      */
 293     public String(char value[], int offset, int count) {
 294         this(value, offset, count, rangeCheck(value, offset, count));
 295     }
 296 
 297     private static Void rangeCheck(char[] value, int offset, int count) {
 298         checkBoundsOffCount(offset, count, value.length);
 299         return null;
 300     }
 301 
 302     /**
 303      * Allocates a new {@code String} that contains characters from a subarray
 304      * of the <a href="Character.html#unicode">Unicode code point</a> array
 305      * argument.  The {@code offset} argument is the index of the first code
 306      * point of the subarray and the {@code count} argument specifies the
 307      * length of the subarray.  The contents of the subarray are converted to
 308      * {@code char}s; subsequent modification of the {@code int} array does not
 309      * affect the newly created string.
 310      *
 311      * @param  codePoints
 312      *         Array that is the source of Unicode code points
 313      *
 314      * @param  offset
 315      *         The initial offset
 316      *
 317      * @param  count
 318      *         The length
 319      *
 320      * @throws  IllegalArgumentException
 321      *          If any invalid Unicode code point is found in {@code
 322      *          codePoints}
 323      *
 324      * @throws  IndexOutOfBoundsException
 325      *          If {@code offset} is negative, {@code count} is negative, or
 326      *          {@code offset} is greater than {@code codePoints.length - count}
 327      *
 328      * @since  1.5
 329      */
 330     public String(int[] codePoints, int offset, int count) {
 331         checkBoundsOffCount(offset, count, codePoints.length);
 332         if (count == 0) {
 333             this.value = "".value;
 334             this.coder = "".coder;
 335             return;
 336         }
 337         if (COMPACT_STRINGS) {
 338             byte[] val = StringLatin1.toBytes(codePoints, offset, count);
 339             if (val != null) {
 340                 this.coder = LATIN1;
 341                 this.value = val;
 342                 return;
 343             }
 344         }
 345         this.coder = UTF16;
 346         this.value = StringUTF16.toBytes(codePoints, offset, count);
 347     }
 348 
 349     /**
 350      * Allocates a new {@code String} constructed from a subarray of an array
 351      * of 8-bit integer values.
 352      *
 353      * <p> The {@code offset} argument is the index of the first byte of the
 354      * subarray, and the {@code count} argument specifies the length of the
 355      * subarray.
 356      *
 357      * <p> Each {@code byte} in the subarray is converted to a {@code char} as
 358      * specified in the {@link #String(byte[],int) String(byte[],int)} constructor.
 359      *
 360      * @deprecated This method does not properly convert bytes into characters.
 361      * As of JDK&nbsp;1.1, the preferred way to do this is via the
 362      * {@code String} constructors that take a {@link
 363      * java.nio.charset.Charset}, charset name, or that use the platform's
 364      * default charset.
 365      *
 366      * @param  ascii
 367      *         The bytes to be converted to characters
 368      *
 369      * @param  hibyte
 370      *         The top 8 bits of each 16-bit Unicode code unit
 371      *
 372      * @param  offset
 373      *         The initial offset
 374      * @param  count
 375      *         The length
 376      *
 377      * @throws  IndexOutOfBoundsException
 378      *          If {@code offset} is negative, {@code count} is negative, or
 379      *          {@code offset} is greater than {@code ascii.length - count}
 380      *
 381      * @see  #String(byte[], int)
 382      * @see  #String(byte[], int, int, java.lang.String)
 383      * @see  #String(byte[], int, int, java.nio.charset.Charset)
 384      * @see  #String(byte[], int, int)
 385      * @see  #String(byte[], java.lang.String)
 386      * @see  #String(byte[], java.nio.charset.Charset)
 387      * @see  #String(byte[])
 388      */
 389     @Deprecated(since="1.1")
 390     public String(byte ascii[], int hibyte, int offset, int count) {
 391         checkBoundsOffCount(offset, count, ascii.length);
 392         if (count == 0) {
 393             this.value = "".value;
 394             this.coder = "".coder;
 395             return;
 396         }
 397         if (COMPACT_STRINGS && (byte)hibyte == 0) {
 398             this.value = Arrays.copyOfRange(ascii, offset, offset + count);
 399             this.coder = LATIN1;
 400         } else {
 401             hibyte <<= 8;
 402             byte[] val = StringUTF16.newBytesFor(count);
 403             for (int i = 0; i < count; i++) {
 404                 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff));
 405             }
 406             this.value = val;
 407             this.coder = UTF16;
 408         }
 409     }
 410 
 411     /**
 412      * Allocates a new {@code String} containing characters constructed from
 413      * an array of 8-bit integer values. Each character <i>c</i> in the
 414      * resulting string is constructed from the corresponding component
 415      * <i>b</i> in the byte array such that:
 416      *
 417      * <blockquote><pre>
 418      *     <b><i>c</i></b> == (char)(((hibyte &amp; 0xff) &lt;&lt; 8)
 419      *                         | (<b><i>b</i></b> &amp; 0xff))
 420      * </pre></blockquote>
 421      *
 422      * @deprecated  This method does not properly convert bytes into
 423      * characters.  As of JDK&nbsp;1.1, the preferred way to do this is via the
 424      * {@code String} constructors that take a {@link
 425      * java.nio.charset.Charset}, charset name, or that use the platform's
 426      * default charset.
 427      *
 428      * @param  ascii
 429      *         The bytes to be converted to characters
 430      *
 431      * @param  hibyte
 432      *         The top 8 bits of each 16-bit Unicode code unit
 433      *
 434      * @see  #String(byte[], int, int, java.lang.String)
 435      * @see  #String(byte[], int, int, java.nio.charset.Charset)
 436      * @see  #String(byte[], int, int)
 437      * @see  #String(byte[], java.lang.String)
 438      * @see  #String(byte[], java.nio.charset.Charset)
 439      * @see  #String(byte[])
 440      */
 441     @Deprecated(since="1.1")
 442     public String(byte ascii[], int hibyte) {
 443         this(ascii, hibyte, 0, ascii.length);
 444     }
 445 
 446     /**
 447      * Constructs a new {@code String} by decoding the specified subarray of
 448      * bytes using the specified charset.  The length of the new {@code String}
 449      * is a function of the charset, and hence may not be equal to the length
 450      * of the subarray.
 451      *
 452      * <p> The behavior of this constructor when the given bytes are not valid
 453      * in the given charset is unspecified.  The {@link
 454      * java.nio.charset.CharsetDecoder} class should be used when more control
 455      * over the decoding process is required.
 456      *
 457      * @param  bytes
 458      *         The bytes to be decoded into characters
 459      *
 460      * @param  offset
 461      *         The index of the first byte to decode
 462      *
 463      * @param  length
 464      *         The number of bytes to decode
 465 
 466      * @param  charsetName
 467      *         The name of a supported {@linkplain java.nio.charset.Charset
 468      *         charset}
 469      *
 470      * @throws  UnsupportedEncodingException
 471      *          If the named charset is not supported
 472      *
 473      * @throws  IndexOutOfBoundsException
 474      *          If {@code offset} is negative, {@code length} is negative, or
 475      *          {@code offset} is greater than {@code bytes.length - length}
 476      *
 477      * @since  1.1
 478      */
 479     public String(byte bytes[], int offset, int length, String charsetName)
 480             throws UnsupportedEncodingException {
 481         if (charsetName == null)
 482             throw new NullPointerException("charsetName");
 483         checkBoundsOffCount(offset, length, bytes.length);
 484         StringCoding.Result ret =
 485             StringCoding.decode(charsetName, bytes, offset, length);
 486         this.value = ret.value;
 487         this.coder = ret.coder;
 488     }
 489 
 490     /**
 491      * Constructs a new {@code String} by decoding the specified subarray of
 492      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
 493      * The length of the new {@code String} is a function of the charset, and
 494      * hence may not be equal to the length of the subarray.
 495      *
 496      * <p> This method always replaces malformed-input and unmappable-character
 497      * sequences with this charset's default replacement string.  The {@link
 498      * java.nio.charset.CharsetDecoder} class should be used when more control
 499      * over the decoding process is required.
 500      *
 501      * @param  bytes
 502      *         The bytes to be decoded into characters
 503      *
 504      * @param  offset
 505      *         The index of the first byte to decode
 506      *
 507      * @param  length
 508      *         The number of bytes to decode
 509      *
 510      * @param  charset
 511      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 512      *         decode the {@code bytes}
 513      *
 514      * @throws  IndexOutOfBoundsException
 515      *          If {@code offset} is negative, {@code length} is negative, or
 516      *          {@code offset} is greater than {@code bytes.length - length}
 517      *
 518      * @since  1.6
 519      */
 520     public String(byte bytes[], int offset, int length, Charset charset) {
 521         if (charset == null)
 522             throw new NullPointerException("charset");
 523         checkBoundsOffCount(offset, length, bytes.length);
 524         StringCoding.Result ret =
 525             StringCoding.decode(charset, bytes, offset, length);
 526         this.value = ret.value;
 527         this.coder = ret.coder;
 528     }
 529 
 530     /**
 531      * Constructs a new {@code String} by decoding the specified array of bytes
 532      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
 533      * length of the new {@code String} is a function of the charset, and hence
 534      * may not be equal to the length of the byte array.
 535      *
 536      * <p> The behavior of this constructor when the given bytes are not valid
 537      * in the given charset is unspecified.  The {@link
 538      * java.nio.charset.CharsetDecoder} class should be used when more control
 539      * over the decoding process is required.
 540      *
 541      * @param  bytes
 542      *         The bytes to be decoded into characters
 543      *
 544      * @param  charsetName
 545      *         The name of a supported {@linkplain java.nio.charset.Charset
 546      *         charset}
 547      *
 548      * @throws  UnsupportedEncodingException
 549      *          If the named charset is not supported
 550      *
 551      * @since  1.1
 552      */
 553     public String(byte bytes[], String charsetName)
 554             throws UnsupportedEncodingException {
 555         this(bytes, 0, bytes.length, charsetName);
 556     }
 557 
 558     /**
 559      * Constructs a new {@code String} by decoding the specified array of
 560      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
 561      * The length of the new {@code String} is a function of the charset, and
 562      * hence may not be equal to the length of the byte array.
 563      *
 564      * <p> This method always replaces malformed-input and unmappable-character
 565      * sequences with this charset's default replacement string.  The {@link
 566      * java.nio.charset.CharsetDecoder} class should be used when more control
 567      * over the decoding process is required.
 568      *
 569      * @param  bytes
 570      *         The bytes to be decoded into characters
 571      *
 572      * @param  charset
 573      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 574      *         decode the {@code bytes}
 575      *
 576      * @since  1.6
 577      */
 578     public String(byte bytes[], Charset charset) {
 579         this(bytes, 0, bytes.length, charset);
 580     }
 581 
 582     /**
 583      * Constructs a new {@code String} by decoding the specified subarray of
 584      * bytes using the platform's default charset.  The length of the new
 585      * {@code String} is a function of the charset, and hence may not be equal
 586      * to the length of the subarray.
 587      *
 588      * <p> The behavior of this constructor when the given bytes are not valid
 589      * in the default charset is unspecified.  The {@link
 590      * java.nio.charset.CharsetDecoder} class should be used when more control
 591      * over the decoding process is required.
 592      *
 593      * @param  bytes
 594      *         The bytes to be decoded into characters
 595      *
 596      * @param  offset
 597      *         The index of the first byte to decode
 598      *
 599      * @param  length
 600      *         The number of bytes to decode
 601      *
 602      * @throws  IndexOutOfBoundsException
 603      *          If {@code offset} is negative, {@code length} is negative, or
 604      *          {@code offset} is greater than {@code bytes.length - length}
 605      *
 606      * @since  1.1
 607      */
 608     public String(byte bytes[], int offset, int length) {
 609         checkBoundsOffCount(offset, length, bytes.length);
 610         StringCoding.Result ret = StringCoding.decode(bytes, offset, length);
 611         this.value = ret.value;
 612         this.coder = ret.coder;
 613     }
 614 
 615     /**
 616      * Constructs a new {@code String} by decoding the specified array of bytes
 617      * using the platform's default charset.  The length of the new {@code
 618      * String} is a function of the charset, and hence may not be equal to the
 619      * length of the byte array.
 620      *
 621      * <p> The behavior of this constructor when the given bytes are not valid
 622      * in the default charset is unspecified.  The {@link
 623      * java.nio.charset.CharsetDecoder} class should be used when more control
 624      * over the decoding process is required.
 625      *
 626      * @param  bytes
 627      *         The bytes to be decoded into characters
 628      *
 629      * @since  1.1
 630      */
 631     public String(byte[] bytes) {
 632         this(bytes, 0, bytes.length);
 633     }
 634 
 635     /**
 636      * Allocates a new string that contains the sequence of characters
 637      * currently contained in the string buffer argument. The contents of the
 638      * string buffer are copied; subsequent modification of the string buffer
 639      * does not affect the newly created string.
 640      *
 641      * @param  buffer
 642      *         A {@code StringBuffer}
 643      */
 644     public String(StringBuffer buffer) {
 645         this(buffer.toString());
 646     }
 647 
 648     /**
 649      * Allocates a new string that contains the sequence of characters
 650      * currently contained in the string builder argument. The contents of the
 651      * string builder are copied; subsequent modification of the string builder
 652      * does not affect the newly created string.
 653      *
 654      * <p> This constructor is provided to ease migration to {@code
 655      * StringBuilder}. Obtaining a string from a string builder via the {@code
 656      * toString} method is likely to run faster and is generally preferred.
 657      *
 658      * @param   builder
 659      *          A {@code StringBuilder}
 660      *
 661      * @since  1.5
 662      */
 663     public String(StringBuilder builder) {
 664         this(builder, null);
 665     }
 666 
 667     /**
 668      * Returns the length of this string.
 669      * The length is equal to the number of <a href="Character.html#unicode">Unicode
 670      * code units</a> in the string.
 671      *
 672      * @return  the length of the sequence of characters represented by this
 673      *          object.
 674      */
 675     public int length() {
 676         return value.length >> coder();
 677     }
 678 
 679     /**
 680      * Returns {@code true} if, and only if, {@link #length()} is {@code 0}.
 681      *
 682      * @return {@code true} if {@link #length()} is {@code 0}, otherwise
 683      * {@code false}
 684      *
 685      * @since 1.6
 686      */
 687     public boolean isEmpty() {
 688         return value.length == 0;
 689     }
 690 
 691     /**
 692      * Returns the {@code char} value at the
 693      * specified index. An index ranges from {@code 0} to
 694      * {@code length() - 1}. The first {@code char} value of the sequence
 695      * is at index {@code 0}, the next at index {@code 1},
 696      * and so on, as for array indexing.
 697      *
 698      * <p>If the {@code char} value specified by the index is a
 699      * <a href="Character.html#unicode">surrogate</a>, the surrogate
 700      * value is returned.
 701      *
 702      * @param      index   the index of the {@code char} value.
 703      * @return     the {@code char} value at the specified index of this string.
 704      *             The first {@code char} value is at index {@code 0}.
 705      * @throws     IndexOutOfBoundsException  if the {@code index}
 706      *             argument is negative or not less than the length of this
 707      *             string.
 708      */
 709     public char charAt(int index) {
 710         if (isLatin1()) {
 711             return StringLatin1.charAt(value, index);
 712         } else {
 713             return StringUTF16.charAt(value, index);
 714         }
 715     }
 716 
 717     /**
 718      * Returns the character (Unicode code point) at the specified
 719      * index. The index refers to {@code char} values
 720      * (Unicode code units) and ranges from {@code 0} to
 721      * {@link #length()}{@code  - 1}.
 722      *
 723      * <p> If the {@code char} value specified at the given index
 724      * is in the high-surrogate range, the following index is less
 725      * than the length of this {@code String}, and the
 726      * {@code char} value at the following index is in the
 727      * low-surrogate range, then the supplementary code point
 728      * corresponding to this surrogate pair is returned. Otherwise,
 729      * the {@code char} value at the given index is returned.
 730      *
 731      * @param      index the index to the {@code char} values
 732      * @return     the code point value of the character at the
 733      *             {@code index}
 734      * @throws     IndexOutOfBoundsException  if the {@code index}
 735      *             argument is negative or not less than the length of this
 736      *             string.
 737      * @since      1.5
 738      */
 739     public int codePointAt(int index) {
 740         if (isLatin1()) {
 741             checkIndex(index, value.length);
 742             return value[index] & 0xff;
 743         }
 744         int length = value.length >> 1;
 745         checkIndex(index, length);
 746         return StringUTF16.codePointAt(value, index, length);
 747     }
 748 
 749     /**
 750      * Returns the character (Unicode code point) before the specified
 751      * index. The index refers to {@code char} values
 752      * (Unicode code units) and ranges from {@code 1} to {@link
 753      * CharSequence#length() length}.
 754      *
 755      * <p> If the {@code char} value at {@code (index - 1)}
 756      * is in the low-surrogate range, {@code (index - 2)} is not
 757      * negative, and the {@code char} value at {@code (index -
 758      * 2)} is in the high-surrogate range, then the
 759      * supplementary code point value of the surrogate pair is
 760      * returned. If the {@code char} value at {@code index -
 761      * 1} is an unpaired low-surrogate or a high-surrogate, the
 762      * surrogate value is returned.
 763      *
 764      * @param     index the index following the code point that should be returned
 765      * @return    the Unicode code point value before the given index.
 766      * @throws    IndexOutOfBoundsException if the {@code index}
 767      *            argument is less than 1 or greater than the length
 768      *            of this string.
 769      * @since     1.5
 770      */
 771     public int codePointBefore(int index) {
 772         int i = index - 1;
 773         if (i < 0 || i >= length()) {
 774             throw new StringIndexOutOfBoundsException(index);
 775         }
 776         if (isLatin1()) {
 777             return (value[i] & 0xff);
 778         }
 779         return StringUTF16.codePointBefore(value, index);
 780     }
 781 
 782     /**
 783      * Returns the number of Unicode code points in the specified text
 784      * range of this {@code String}. The text range begins at the
 785      * specified {@code beginIndex} and extends to the
 786      * {@code char} at index {@code endIndex - 1}. Thus the
 787      * length (in {@code char}s) of the text range is
 788      * {@code endIndex-beginIndex}. Unpaired surrogates within
 789      * the text range count as one code point each.
 790      *
 791      * @param beginIndex the index to the first {@code char} of
 792      * the text range.
 793      * @param endIndex the index after the last {@code char} of
 794      * the text range.
 795      * @return the number of Unicode code points in the specified text
 796      * range
 797      * @throws    IndexOutOfBoundsException if the
 798      * {@code beginIndex} is negative, or {@code endIndex}
 799      * is larger than the length of this {@code String}, or
 800      * {@code beginIndex} is larger than {@code endIndex}.
 801      * @since  1.5
 802      */
 803     public int codePointCount(int beginIndex, int endIndex) {
 804         if (beginIndex < 0 || beginIndex > endIndex ||
 805             endIndex > length()) {
 806             throw new IndexOutOfBoundsException();
 807         }
 808         if (isLatin1()) {
 809             return endIndex - beginIndex;
 810         }
 811         return StringUTF16.codePointCount(value, beginIndex, endIndex);
 812     }
 813 
 814     /**
 815      * Returns the index within this {@code String} that is
 816      * offset from the given {@code index} by
 817      * {@code codePointOffset} code points. Unpaired surrogates
 818      * within the text range given by {@code index} and
 819      * {@code codePointOffset} count as one code point each.
 820      *
 821      * @param index the index to be offset
 822      * @param codePointOffset the offset in code points
 823      * @return the index within this {@code String}
 824      * @throws    IndexOutOfBoundsException if {@code index}
 825      *   is negative or larger then the length of this
 826      *   {@code String}, or if {@code codePointOffset} is positive
 827      *   and the substring starting with {@code index} has fewer
 828      *   than {@code codePointOffset} code points,
 829      *   or if {@code codePointOffset} is negative and the substring
 830      *   before {@code index} has fewer than the absolute value
 831      *   of {@code codePointOffset} code points.
 832      * @since 1.5
 833      */
 834     public int offsetByCodePoints(int index, int codePointOffset) {
 835         if (index < 0 || index > length()) {
 836             throw new IndexOutOfBoundsException();
 837         }
 838         return Character.offsetByCodePoints(this, index, codePointOffset);
 839     }
 840 
 841     /**
 842      * Copies characters from this string into the destination character
 843      * array.
 844      * <p>
 845      * The first character to be copied is at index {@code srcBegin};
 846      * the last character to be copied is at index {@code srcEnd-1}
 847      * (thus the total number of characters to be copied is
 848      * {@code srcEnd-srcBegin}). The characters are copied into the
 849      * subarray of {@code dst} starting at index {@code dstBegin}
 850      * and ending at index:
 851      * <blockquote><pre>
 852      *     dstBegin + (srcEnd-srcBegin) - 1
 853      * </pre></blockquote>
 854      *
 855      * @param      srcBegin   index of the first character in the string
 856      *                        to copy.
 857      * @param      srcEnd     index after the last character in the string
 858      *                        to copy.
 859      * @param      dst        the destination array.
 860      * @param      dstBegin   the start offset in the destination array.
 861      * @throws    IndexOutOfBoundsException If any of the following
 862      *            is true:
 863      *            <ul><li>{@code srcBegin} is negative.
 864      *            <li>{@code srcBegin} is greater than {@code srcEnd}
 865      *            <li>{@code srcEnd} is greater than the length of this
 866      *                string
 867      *            <li>{@code dstBegin} is negative
 868      *            <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
 869      *                {@code dst.length}</ul>
 870      */
 871     public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
 872         checkBoundsBeginEnd(srcBegin, srcEnd, length());
 873         checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
 874         if (isLatin1()) {
 875             StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin);
 876         } else {
 877             StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin);
 878         }
 879     }
 880 
 881     /**
 882      * Copies characters from this string into the destination byte array. Each
 883      * byte receives the 8 low-order bits of the corresponding character. The
 884      * eight high-order bits of each character are not copied and do not
 885      * participate in the transfer in any way.
 886      *
 887      * <p> The first character to be copied is at index {@code srcBegin}; the
 888      * last character to be copied is at index {@code srcEnd-1}.  The total
 889      * number of characters to be copied is {@code srcEnd-srcBegin}. The
 890      * characters, converted to bytes, are copied into the subarray of {@code
 891      * dst} starting at index {@code dstBegin} and ending at index:
 892      *
 893      * <blockquote><pre>
 894      *     dstBegin + (srcEnd-srcBegin) - 1
 895      * </pre></blockquote>
 896      *
 897      * @deprecated  This method does not properly convert characters into
 898      * bytes.  As of JDK&nbsp;1.1, the preferred way to do this is via the
 899      * {@link #getBytes()} method, which uses the platform's default charset.
 900      *
 901      * @param  srcBegin
 902      *         Index of the first character in the string to copy
 903      *
 904      * @param  srcEnd
 905      *         Index after the last character in the string to copy
 906      *
 907      * @param  dst
 908      *         The destination array
 909      *
 910      * @param  dstBegin
 911      *         The start offset in the destination array
 912      *
 913      * @throws  IndexOutOfBoundsException
 914      *          If any of the following is true:
 915      *          <ul>
 916      *            <li> {@code srcBegin} is negative
 917      *            <li> {@code srcBegin} is greater than {@code srcEnd}
 918      *            <li> {@code srcEnd} is greater than the length of this String
 919      *            <li> {@code dstBegin} is negative
 920      *            <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
 921      *                 dst.length}
 922      *          </ul>
 923      */
 924     @Deprecated(since="1.1")
 925     public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 926         checkBoundsBeginEnd(srcBegin, srcEnd, length());
 927         Objects.requireNonNull(dst);
 928         checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
 929         if (isLatin1()) {
 930             StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
 931         } else {
 932             StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
 933         }
 934     }
 935 
 936     /**
 937      * Encodes this {@code String} into a sequence of bytes using the named
 938      * charset, storing the result into a new byte array.
 939      *
 940      * <p> The behavior of this method when this string cannot be encoded in
 941      * the given charset is unspecified.  The {@link
 942      * java.nio.charset.CharsetEncoder} class should be used when more control
 943      * over the encoding process is required.
 944      *
 945      * @param  charsetName
 946      *         The name of a supported {@linkplain java.nio.charset.Charset
 947      *         charset}
 948      *
 949      * @return  The resultant byte array
 950      *
 951      * @throws  UnsupportedEncodingException
 952      *          If the named charset is not supported
 953      *
 954      * @since  1.1
 955      */
 956     public byte[] getBytes(String charsetName)
 957             throws UnsupportedEncodingException {
 958         if (charsetName == null) throw new NullPointerException();
 959         return StringCoding.encode(charsetName, coder(), value);
 960     }
 961 
 962     /**
 963      * Encodes this {@code String} into a sequence of bytes using the given
 964      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
 965      * new byte array.
 966      *
 967      * <p> This method always replaces malformed-input and unmappable-character
 968      * sequences with this charset's default replacement byte array.  The
 969      * {@link java.nio.charset.CharsetEncoder} class should be used when more
 970      * control over the encoding process is required.
 971      *
 972      * @param  charset
 973      *         The {@linkplain java.nio.charset.Charset} to be used to encode
 974      *         the {@code String}
 975      *
 976      * @return  The resultant byte array
 977      *
 978      * @since  1.6
 979      */
 980     public byte[] getBytes(Charset charset) {
 981         if (charset == null) throw new NullPointerException();
 982         return StringCoding.encode(charset, coder(), value);
 983      }
 984 
 985     /**
 986      * Encodes this {@code String} into a sequence of bytes using the
 987      * platform's default charset, storing the result into a new byte array.
 988      *
 989      * <p> The behavior of this method when this string cannot be encoded in
 990      * the default charset is unspecified.  The {@link
 991      * java.nio.charset.CharsetEncoder} class should be used when more control
 992      * over the encoding process is required.
 993      *
 994      * @return  The resultant byte array
 995      *
 996      * @since      1.1
 997      */
 998     public byte[] getBytes() {
 999         return StringCoding.encode(coder(), value);
1000     }
1001 
1002     /**
1003      * Compares this string to the specified object.  The result is {@code
1004      * true} if and only if the argument is not {@code null} and is a {@code
1005      * String} object that represents the same sequence of characters as this
1006      * object.
1007      *
1008      * <p>For finer-grained String comparison, refer to
1009      * {@link java.text.Collator}.
1010      *
1011      * @param  anObject
1012      *         The object to compare this {@code String} against
1013      *
1014      * @return  {@code true} if the given object represents a {@code String}
1015      *          equivalent to this string, {@code false} otherwise
1016      *
1017      * @see  #compareTo(String)
1018      * @see  #equalsIgnoreCase(String)
1019      */
1020     public boolean equals(Object anObject) {
1021         if (this == anObject) {
1022             return true;
1023         }
1024         if (anObject instanceof String) {
1025             String aString = (String)anObject;
1026             if (!COMPACT_STRINGS || this.coder == aString.coder) {
1027                 return StringLatin1.equals(value, aString.value);
1028             }
1029         }
1030         return false;
1031     }
1032 
1033     /**
1034      * Compares this string to the specified {@code StringBuffer}.  The result
1035      * is {@code true} if and only if this {@code String} represents the same
1036      * sequence of characters as the specified {@code StringBuffer}. This method
1037      * synchronizes on the {@code StringBuffer}.
1038      *
1039      * <p>For finer-grained String comparison, refer to
1040      * {@link java.text.Collator}.
1041      *
1042      * @param  sb
1043      *         The {@code StringBuffer} to compare this {@code String} against
1044      *
1045      * @return  {@code true} if this {@code String} represents the same
1046      *          sequence of characters as the specified {@code StringBuffer},
1047      *          {@code false} otherwise
1048      *
1049      * @since  1.4
1050      */
1051     public boolean contentEquals(StringBuffer sb) {
1052         return contentEquals((CharSequence)sb);
1053     }
1054 
1055     private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
1056         int len = length();
1057         if (len != sb.length()) {
1058             return false;
1059         }
1060         byte v1[] = value;
1061         byte v2[] = sb.getValue();
1062         byte coder = coder();
1063         if (coder == sb.getCoder()) {
1064             int n = v1.length;
1065             for (int i = 0; i < n; i++) {
1066                 if (v1[i] != v2[i]) {
1067                     return false;
1068                 }
1069             }
1070         } else {
1071             if (coder != LATIN1) {  // utf16 str and latin1 abs can never be "equal"
1072                 return false;
1073             }
1074             return StringUTF16.contentEquals(v1, v2, len);
1075         }
1076         return true;
1077     }
1078 
1079     /**
1080      * Compares this string to the specified {@code CharSequence}.  The
1081      * result is {@code true} if and only if this {@code String} represents the
1082      * same sequence of char values as the specified sequence. Note that if the
1083      * {@code CharSequence} is a {@code StringBuffer} then the method
1084      * synchronizes on it.
1085      *
1086      * <p>For finer-grained String comparison, refer to
1087      * {@link java.text.Collator}.
1088      *
1089      * @param  cs
1090      *         The sequence to compare this {@code String} against
1091      *
1092      * @return  {@code true} if this {@code String} represents the same
1093      *          sequence of char values as the specified sequence, {@code
1094      *          false} otherwise
1095      *
1096      * @since  1.5
1097      */
1098     public boolean contentEquals(CharSequence cs) {
1099         // Argument is a StringBuffer, StringBuilder
1100         if (cs instanceof AbstractStringBuilder) {
1101             if (cs instanceof StringBuffer) {
1102                 synchronized(cs) {
1103                    return nonSyncContentEquals((AbstractStringBuilder)cs);
1104                 }
1105             } else {
1106                 return nonSyncContentEquals((AbstractStringBuilder)cs);
1107             }
1108         }
1109         // Argument is a String
1110         if (cs instanceof String) {
1111             return equals(cs);
1112         }
1113         // Argument is a generic CharSequence
1114         int n = cs.length();
1115         if (n != length()) {
1116             return false;
1117         }
1118         byte[] val = this.value;
1119         if (isLatin1()) {
1120             for (int i = 0; i < n; i++) {
1121                 if ((val[i] & 0xff) != cs.charAt(i)) {
1122                     return false;
1123                 }
1124             }
1125         } else {
1126             if (!StringUTF16.contentEquals(val, cs, n)) {
1127                 return false;
1128             }
1129         }
1130         return true;
1131     }
1132 
1133     /**
1134      * Compares this {@code String} to another {@code String}, ignoring case
1135      * considerations.  Two strings are considered equal ignoring case if they
1136      * are of the same length and corresponding characters in the two strings
1137      * are equal ignoring case.
1138      *
1139      * <p> Two characters {@code c1} and {@code c2} are considered the same
1140      * ignoring case if at least one of the following is true:
1141      * <ul>
1142      *   <li> The two characters are the same (as compared by the
1143      *        {@code ==} operator)
1144      *   <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))}
1145      *        on each character produces the same result
1146      * </ul>
1147      *
1148      * <p>Note that this method does <em>not</em> take locale into account, and
1149      * will result in unsatisfactory results for certain locales.  The
1150      * {@link java.text.Collator} class provides locale-sensitive comparison.
1151      *
1152      * @param  anotherString
1153      *         The {@code String} to compare this {@code String} against
1154      *
1155      * @return  {@code true} if the argument is not {@code null} and it
1156      *          represents an equivalent {@code String} ignoring case; {@code
1157      *          false} otherwise
1158      *
1159      * @see  #equals(Object)
1160      */
1161     public boolean equalsIgnoreCase(String anotherString) {
1162         return (this == anotherString) ? true
1163                 : (anotherString != null)
1164                 && (anotherString.length() == length())
1165                 && regionMatches(true, 0, anotherString, 0, length());
1166     }
1167 
1168     /**
1169      * Compares two strings lexicographically.
1170      * The comparison is based on the Unicode value of each character in
1171      * the strings. The character sequence represented by this
1172      * {@code String} object is compared lexicographically to the
1173      * character sequence represented by the argument string. The result is
1174      * a negative integer if this {@code String} object
1175      * lexicographically precedes the argument string. The result is a
1176      * positive integer if this {@code String} object lexicographically
1177      * follows the argument string. The result is zero if the strings
1178      * are equal; {@code compareTo} returns {@code 0} exactly when
1179      * the {@link #equals(Object)} method would return {@code true}.
1180      * <p>
1181      * This is the definition of lexicographic ordering. If two strings are
1182      * different, then either they have different characters at some index
1183      * that is a valid index for both strings, or their lengths are different,
1184      * or both. If they have different characters at one or more index
1185      * positions, let <i>k</i> be the smallest such index; then the string
1186      * whose character at position <i>k</i> has the smaller value, as
1187      * determined by using the {@code <} operator, lexicographically precedes the
1188      * other string. In this case, {@code compareTo} returns the
1189      * difference of the two character values at position {@code k} in
1190      * the two string -- that is, the value:
1191      * <blockquote><pre>
1192      * this.charAt(k)-anotherString.charAt(k)
1193      * </pre></blockquote>
1194      * If there is no index position at which they differ, then the shorter
1195      * string lexicographically precedes the longer string. In this case,
1196      * {@code compareTo} returns the difference of the lengths of the
1197      * strings -- that is, the value:
1198      * <blockquote><pre>
1199      * this.length()-anotherString.length()
1200      * </pre></blockquote>
1201      *
1202      * <p>For finer-grained String comparison, refer to
1203      * {@link java.text.Collator}.
1204      *
1205      * @param   anotherString   the {@code String} to be compared.
1206      * @return  the value {@code 0} if the argument string is equal to
1207      *          this string; a value less than {@code 0} if this string
1208      *          is lexicographically less than the string argument; and a
1209      *          value greater than {@code 0} if this string is
1210      *          lexicographically greater than the string argument.
1211      */
1212     public int compareTo(String anotherString) {
1213         byte v1[] = value;
1214         byte v2[] = anotherString.value;
1215         byte coder = coder();
1216         if (coder == anotherString.coder()) {
1217             return coder == LATIN1 ? StringLatin1.compareTo(v1, v2)
1218                                    : StringUTF16.compareTo(v1, v2);
1219         }
1220         return coder == LATIN1 ? StringLatin1.compareToUTF16(v1, v2)
1221                                : StringUTF16.compareToLatin1(v1, v2);
1222      }
1223 
1224     /**
1225      * A Comparator that orders {@code String} objects as by
1226      * {@code compareToIgnoreCase}. This comparator is serializable.
1227      * <p>
1228      * Note that this Comparator does <em>not</em> take locale into account,
1229      * and will result in an unsatisfactory ordering for certain locales.
1230      * The {@link java.text.Collator} class provides locale-sensitive comparison.
1231      *
1232      * @see     java.text.Collator
1233      * @since   1.2
1234      */
1235     public static final Comparator<String> CASE_INSENSITIVE_ORDER
1236                                          = new CaseInsensitiveComparator();
1237     private static class CaseInsensitiveComparator
1238             implements Comparator<String>, java.io.Serializable {
1239         // use serialVersionUID from JDK 1.2.2 for interoperability
1240         @java.io.Serial
1241         private static final long serialVersionUID = 8575799808933029326L;
1242 
1243         public int compare(String s1, String s2) {
1244             byte v1[] = s1.value;
1245             byte v2[] = s2.value;
1246             byte coder = s1.coder();
1247             if (coder == s2.coder()) {
1248                 return coder == LATIN1 ? StringLatin1.compareToCI(v1, v2)
1249                                        : StringUTF16.compareToCI(v1, v2);
1250             }
1251             return coder == LATIN1 ? StringLatin1.compareToCI_UTF16(v1, v2)
1252                                    : StringUTF16.compareToCI_Latin1(v1, v2);
1253         }
1254 
1255         /** Replaces the de-serialized object. */
1256         @java.io.Serial
1257         private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
1258     }
1259 
1260     /**
1261      * Compares two strings lexicographically, ignoring case
1262      * differences. This method returns an integer whose sign is that of
1263      * calling {@code compareTo} with normalized versions of the strings
1264      * where case differences have been eliminated by calling
1265      * {@code Character.toLowerCase(Character.toUpperCase(character))} on
1266      * each character.
1267      * <p>
1268      * Note that this method does <em>not</em> take locale into account,
1269      * and will result in an unsatisfactory ordering for certain locales.
1270      * The {@link java.text.Collator} class provides locale-sensitive comparison.
1271      *
1272      * @param   str   the {@code String} to be compared.
1273      * @return  a negative integer, zero, or a positive integer as the
1274      *          specified String is greater than, equal to, or less
1275      *          than this String, ignoring case considerations.
1276      * @see     java.text.Collator
1277      * @since   1.2
1278      */
1279     public int compareToIgnoreCase(String str) {
1280         return CASE_INSENSITIVE_ORDER.compare(this, str);
1281     }
1282 
1283     /**
1284      * Tests if two string regions are equal.
1285      * <p>
1286      * A substring of this {@code String} object is compared to a substring
1287      * of the argument other. The result is true if these substrings
1288      * represent identical character sequences. The substring of this
1289      * {@code String} object to be compared begins at index {@code toffset}
1290      * and has length {@code len}. The substring of other to be compared
1291      * begins at index {@code ooffset} and has length {@code len}. The
1292      * result is {@code false} if and only if at least one of the following
1293      * is true:
1294      * <ul><li>{@code toffset} is negative.
1295      * <li>{@code ooffset} is negative.
1296      * <li>{@code toffset+len} is greater than the length of this
1297      * {@code String} object.
1298      * <li>{@code ooffset+len} is greater than the length of the other
1299      * argument.
1300      * <li>There is some nonnegative integer <i>k</i> less than {@code len}
1301      * such that:
1302      * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + }
1303      * <i>k</i>{@code )}
1304      * </ul>
1305      *
1306      * <p>Note that this method does <em>not</em> take locale into account.  The
1307      * {@link java.text.Collator} class provides locale-sensitive comparison.
1308      *
1309      * @param   toffset   the starting offset of the subregion in this string.
1310      * @param   other     the string argument.
1311      * @param   ooffset   the starting offset of the subregion in the string
1312      *                    argument.
1313      * @param   len       the number of characters to compare.
1314      * @return  {@code true} if the specified subregion of this string
1315      *          exactly matches the specified subregion of the string argument;
1316      *          {@code false} otherwise.
1317      */
1318     public boolean regionMatches(int toffset, String other, int ooffset, int len) {
1319         byte tv[] = value;
1320         byte ov[] = other.value;
1321         // Note: toffset, ooffset, or len might be near -1>>>1.
1322         if ((ooffset < 0) || (toffset < 0) ||
1323              (toffset > (long)length() - len) ||
1324              (ooffset > (long)other.length() - len)) {
1325             return false;
1326         }
1327         byte coder = coder();
1328         if (coder == other.coder()) {
1329             if (!isLatin1() && (len > 0)) {
1330                 toffset = toffset << 1;
1331                 ooffset = ooffset << 1;
1332                 len = len << 1;
1333             }
1334             while (len-- > 0) {
1335                 if (tv[toffset++] != ov[ooffset++]) {
1336                     return false;
1337                 }
1338             }
1339         } else {
1340             if (coder == LATIN1) {
1341                 while (len-- > 0) {
1342                     if (StringLatin1.getChar(tv, toffset++) !=
1343                         StringUTF16.getChar(ov, ooffset++)) {
1344                         return false;
1345                     }
1346                 }
1347             } else {
1348                 while (len-- > 0) {
1349                     if (StringUTF16.getChar(tv, toffset++) !=
1350                         StringLatin1.getChar(ov, ooffset++)) {
1351                         return false;
1352                     }
1353                 }
1354             }
1355         }
1356         return true;
1357     }
1358 
1359     /**
1360      * Tests if two string regions are equal.
1361      * <p>
1362      * A substring of this {@code String} object is compared to a substring
1363      * of the argument {@code other}. The result is {@code true} if these
1364      * substrings represent character sequences that are the same, ignoring
1365      * case if and only if {@code ignoreCase} is true. The substring of
1366      * this {@code String} object to be compared begins at index
1367      * {@code toffset} and has length {@code len}. The substring of
1368      * {@code other} to be compared begins at index {@code ooffset} and
1369      * has length {@code len}. The result is {@code false} if and only if
1370      * at least one of the following is true:
1371      * <ul><li>{@code toffset} is negative.
1372      * <li>{@code ooffset} is negative.
1373      * <li>{@code toffset+len} is greater than the length of this
1374      * {@code String} object.
1375      * <li>{@code ooffset+len} is greater than the length of the other
1376      * argument.
1377      * <li>{@code ignoreCase} is {@code false} and there is some nonnegative
1378      * integer <i>k</i> less than {@code len} such that:
1379      * <blockquote><pre>
1380      * this.charAt(toffset+k) != other.charAt(ooffset+k)
1381      * </pre></blockquote>
1382      * <li>{@code ignoreCase} is {@code true} and there is some nonnegative
1383      * integer <i>k</i> less than {@code len} such that:
1384      * <blockquote><pre>
1385      * Character.toLowerCase(Character.toUpperCase(this.charAt(toffset+k))) !=
1386      * Character.toLowerCase(Character.toUpperCase(other.charAt(ooffset+k)))
1387      * </pre></blockquote>
1388      * </ul>
1389      *
1390      * <p>Note that this method does <em>not</em> take locale into account,
1391      * and will result in unsatisfactory results for certain locales when
1392      * {@code ignoreCase} is {@code true}.  The {@link java.text.Collator} class
1393      * provides locale-sensitive comparison.
1394      *
1395      * @param   ignoreCase   if {@code true}, ignore case when comparing
1396      *                       characters.
1397      * @param   toffset      the starting offset of the subregion in this
1398      *                       string.
1399      * @param   other        the string argument.
1400      * @param   ooffset      the starting offset of the subregion in the string
1401      *                       argument.
1402      * @param   len          the number of characters to compare.
1403      * @return  {@code true} if the specified subregion of this string
1404      *          matches the specified subregion of the string argument;
1405      *          {@code false} otherwise. Whether the matching is exact
1406      *          or case insensitive depends on the {@code ignoreCase}
1407      *          argument.
1408      */
1409     public boolean regionMatches(boolean ignoreCase, int toffset,
1410             String other, int ooffset, int len) {
1411         if (!ignoreCase) {
1412             return regionMatches(toffset, other, ooffset, len);
1413         }
1414         // Note: toffset, ooffset, or len might be near -1>>>1.
1415         if ((ooffset < 0) || (toffset < 0)
1416                 || (toffset > (long)length() - len)
1417                 || (ooffset > (long)other.length() - len)) {
1418             return false;
1419         }
1420         byte tv[] = value;
1421         byte ov[] = other.value;
1422         byte coder = coder();
1423         if (coder == other.coder()) {
1424             return coder == LATIN1
1425               ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len)
1426               : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len);
1427         }
1428         return coder == LATIN1
1429               ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len)
1430               : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len);
1431     }
1432 
1433     /**
1434      * Tests if the substring of this string beginning at the
1435      * specified index starts with the specified prefix.
1436      *
1437      * @param   prefix    the prefix.
1438      * @param   toffset   where to begin looking in this string.
1439      * @return  {@code true} if the character sequence represented by the
1440      *          argument is a prefix of the substring of this object starting
1441      *          at index {@code toffset}; {@code false} otherwise.
1442      *          The result is {@code false} if {@code toffset} is
1443      *          negative or greater than the length of this
1444      *          {@code String} object; otherwise the result is the same
1445      *          as the result of the expression
1446      *          <pre>
1447      *          this.substring(toffset).startsWith(prefix)
1448      *          </pre>
1449      */
1450     public boolean startsWith(String prefix, int toffset) {
1451         // Note: toffset might be near -1>>>1.
1452         if (toffset < 0 || toffset > length() - prefix.length()) {
1453             return false;
1454         }
1455         byte ta[] = value;
1456         byte pa[] = prefix.value;
1457         int po = 0;
1458         int pc = pa.length;
1459         byte coder = coder();
1460         if (coder == prefix.coder()) {
1461             int to = (coder == LATIN1) ? toffset : toffset << 1;
1462             while (po < pc) {
1463                 if (ta[to++] != pa[po++]) {
1464                     return false;
1465                 }
1466             }
1467         } else {
1468             if (coder == LATIN1) {  // && pcoder == UTF16
1469                 return false;
1470             }
1471             // coder == UTF16 && pcoder == LATIN1)
1472             while (po < pc) {
1473                 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
1474                     return false;
1475                }
1476             }
1477         }
1478         return true;
1479     }
1480 
1481     /**
1482      * Tests if this string starts with the specified prefix.
1483      *
1484      * @param   prefix   the prefix.
1485      * @return  {@code true} if the character sequence represented by the
1486      *          argument is a prefix of the character sequence represented by
1487      *          this string; {@code false} otherwise.
1488      *          Note also that {@code true} will be returned if the
1489      *          argument is an empty string or is equal to this
1490      *          {@code String} object as determined by the
1491      *          {@link #equals(Object)} method.
1492      * @since   1.0
1493      */
1494     public boolean startsWith(String prefix) {
1495         return startsWith(prefix, 0);
1496     }
1497 
1498     /**
1499      * Tests if this string ends with the specified suffix.
1500      *
1501      * @param   suffix   the suffix.
1502      * @return  {@code true} if the character sequence represented by the
1503      *          argument is a suffix of the character sequence represented by
1504      *          this object; {@code false} otherwise. Note that the
1505      *          result will be {@code true} if the argument is the
1506      *          empty string or is equal to this {@code String} object
1507      *          as determined by the {@link #equals(Object)} method.
1508      */
1509     public boolean endsWith(String suffix) {
1510         return startsWith(suffix, length() - suffix.length());
1511     }
1512 
1513     /**
1514      * Returns a hash code for this string. The hash code for a
1515      * {@code String} object is computed as
1516      * <blockquote><pre>
1517      * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
1518      * </pre></blockquote>
1519      * using {@code int} arithmetic, where {@code s[i]} is the
1520      * <i>i</i>th character of the string, {@code n} is the length of
1521      * the string, and {@code ^} indicates exponentiation.
1522      * (The hash value of the empty string is zero.)
1523      *
1524      * @return  a hash code value for this object.
1525      */
1526     public int hashCode() {
1527         // The hash or hashIsZero fields are subject to a benign data race,
1528         // making it crucial to ensure that any observable result of the
1529         // calculation in this method stays correct under any possible read of
1530         // these fields. Necessary restrictions to allow this to be correct
1531         // without explicit memory fences or similar concurrency primitives is
1532         // that we can ever only write to one of these two fields for a given
1533         // String instance, and that the computation is idempotent and derived
1534         // from immutable state
1535         int h = hash;
1536         if (h == 0 && !hashIsZero) {
1537             h = isLatin1() ? StringLatin1.hashCode(value)
1538                            : StringUTF16.hashCode(value);
1539             if (h == 0) {
1540                 hashIsZero = true;
1541             } else {
1542                 hash = h;
1543             }
1544         }
1545         return h;
1546     }
1547 
1548     /**
1549      * Returns the index within this string of the first occurrence of
1550      * the specified character. If a character with value
1551      * {@code ch} occurs in the character sequence represented by
1552      * this {@code String} object, then the index (in Unicode
1553      * code units) of the first such occurrence is returned. For
1554      * values of {@code ch} in the range from 0 to 0xFFFF
1555      * (inclusive), this is the smallest value <i>k</i> such that:
1556      * <blockquote><pre>
1557      * this.charAt(<i>k</i>) == ch
1558      * </pre></blockquote>
1559      * is true. For other values of {@code ch}, it is the
1560      * smallest value <i>k</i> such that:
1561      * <blockquote><pre>
1562      * this.codePointAt(<i>k</i>) == ch
1563      * </pre></blockquote>
1564      * is true. In either case, if no such character occurs in this
1565      * string, then {@code -1} is returned.
1566      *
1567      * @param   ch   a character (Unicode code point).
1568      * @return  the index of the first occurrence of the character in the
1569      *          character sequence represented by this object, or
1570      *          {@code -1} if the character does not occur.
1571      */
1572     public int indexOf(int ch) {
1573         return indexOf(ch, 0);
1574     }
1575 
1576     /**
1577      * Returns the index within this string of the first occurrence of the
1578      * specified character, starting the search at the specified index.
1579      * <p>
1580      * If a character with value {@code ch} occurs in the
1581      * character sequence represented by this {@code String}
1582      * object at an index no smaller than {@code fromIndex}, then
1583      * the index of the first such occurrence is returned. For values
1584      * of {@code ch} in the range from 0 to 0xFFFF (inclusive),
1585      * this is the smallest value <i>k</i> such that:
1586      * <blockquote><pre>
1587      * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
1588      * </pre></blockquote>
1589      * is true. For other values of {@code ch}, it is the
1590      * smallest value <i>k</i> such that:
1591      * <blockquote><pre>
1592      * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
1593      * </pre></blockquote>
1594      * is true. In either case, if no such character occurs in this
1595      * string at or after position {@code fromIndex}, then
1596      * {@code -1} is returned.
1597      *
1598      * <p>
1599      * There is no restriction on the value of {@code fromIndex}. If it
1600      * is negative, it has the same effect as if it were zero: this entire
1601      * string may be searched. If it is greater than the length of this
1602      * string, it has the same effect as if it were equal to the length of
1603      * this string: {@code -1} is returned.
1604      *
1605      * <p>All indices are specified in {@code char} values
1606      * (Unicode code units).
1607      *
1608      * @param   ch          a character (Unicode code point).
1609      * @param   fromIndex   the index to start the search from.
1610      * @return  the index of the first occurrence of the character in the
1611      *          character sequence represented by this object that is greater
1612      *          than or equal to {@code fromIndex}, or {@code -1}
1613      *          if the character does not occur.
1614      */
1615     public int indexOf(int ch, int fromIndex) {
1616         return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex)
1617                           : StringUTF16.indexOf(value, ch, fromIndex);
1618     }
1619 
1620     /**
1621      * Returns the index within this string of the last occurrence of
1622      * the specified character. For values of {@code ch} in the
1623      * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
1624      * units) returned is the largest value <i>k</i> such that:
1625      * <blockquote><pre>
1626      * this.charAt(<i>k</i>) == ch
1627      * </pre></blockquote>
1628      * is true. For other values of {@code ch}, it is the
1629      * largest value <i>k</i> such that:
1630      * <blockquote><pre>
1631      * this.codePointAt(<i>k</i>) == ch
1632      * </pre></blockquote>
1633      * is true.  In either case, if no such character occurs in this
1634      * string, then {@code -1} is returned.  The
1635      * {@code String} is searched backwards starting at the last
1636      * character.
1637      *
1638      * @param   ch   a character (Unicode code point).
1639      * @return  the index of the last occurrence of the character in the
1640      *          character sequence represented by this object, or
1641      *          {@code -1} if the character does not occur.
1642      */
1643     public int lastIndexOf(int ch) {
1644         return lastIndexOf(ch, length() - 1);
1645     }
1646 
1647     /**
1648      * Returns the index within this string of the last occurrence of
1649      * the specified character, searching backward starting at the
1650      * specified index. For values of {@code ch} in the range
1651      * from 0 to 0xFFFF (inclusive), the index returned is the largest
1652      * value <i>k</i> such that:
1653      * <blockquote><pre>
1654      * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
1655      * </pre></blockquote>
1656      * is true. For other values of {@code ch}, it is the
1657      * largest value <i>k</i> such that:
1658      * <blockquote><pre>
1659      * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
1660      * </pre></blockquote>
1661      * is true. In either case, if no such character occurs in this
1662      * string at or before position {@code fromIndex}, then
1663      * {@code -1} is returned.
1664      *
1665      * <p>All indices are specified in {@code char} values
1666      * (Unicode code units).
1667      *
1668      * @param   ch          a character (Unicode code point).
1669      * @param   fromIndex   the index to start the search from. There is no
1670      *          restriction on the value of {@code fromIndex}. If it is
1671      *          greater than or equal to the length of this string, it has
1672      *          the same effect as if it were equal to one less than the
1673      *          length of this string: this entire string may be searched.
1674      *          If it is negative, it has the same effect as if it were -1:
1675      *          -1 is returned.
1676      * @return  the index of the last occurrence of the character in the
1677      *          character sequence represented by this object that is less
1678      *          than or equal to {@code fromIndex}, or {@code -1}
1679      *          if the character does not occur before that point.
1680      */
1681     public int lastIndexOf(int ch, int fromIndex) {
1682         return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex)
1683                           : StringUTF16.lastIndexOf(value, ch, fromIndex);
1684     }
1685 
1686     /**
1687      * Returns the index within this string of the first occurrence of the
1688      * specified substring.
1689      *
1690      * <p>The returned index is the smallest value {@code k} for which:
1691      * <pre>{@code
1692      * this.startsWith(str, k)
1693      * }</pre>
1694      * If no such value of {@code k} exists, then {@code -1} is returned.
1695      *
1696      * @param   str   the substring to search for.
1697      * @return  the index of the first occurrence of the specified substring,
1698      *          or {@code -1} if there is no such occurrence.
1699      */
1700     public int indexOf(String str) {
1701         byte coder = coder();
1702         if (coder == str.coder()) {
1703             return isLatin1() ? StringLatin1.indexOf(value, str.value)
1704                               : StringUTF16.indexOf(value, str.value);
1705         }
1706         if (coder == LATIN1) {  // str.coder == UTF16
1707             return -1;
1708         }
1709         return StringUTF16.indexOfLatin1(value, str.value);
1710     }
1711 
1712     /**
1713      * Returns the index within this string of the first occurrence of the
1714      * specified substring, starting at the specified index.
1715      *
1716      * <p>The returned index is the smallest value {@code k} for which:
1717      * <pre>{@code
1718      *     k >= Math.min(fromIndex, this.length()) &&
1719      *                   this.startsWith(str, k)
1720      * }</pre>
1721      * If no such value of {@code k} exists, then {@code -1} is returned.
1722      *
1723      * @param   str         the substring to search for.
1724      * @param   fromIndex   the index from which to start the search.
1725      * @return  the index of the first occurrence of the specified substring,
1726      *          starting at the specified index,
1727      *          or {@code -1} if there is no such occurrence.
1728      */
1729     public int indexOf(String str, int fromIndex) {
1730         return indexOf(value, coder(), length(), str, fromIndex);
1731     }
1732 
1733     /**
1734      * Code shared by String and AbstractStringBuilder to do searches. The
1735      * source is the character array being searched, and the target
1736      * is the string being searched for.
1737      *
1738      * @param   src       the characters being searched.
1739      * @param   srcCoder  the coder of the source string.
1740      * @param   srcCount  length of the source string.
1741      * @param   tgtStr    the characters being searched for.
1742      * @param   fromIndex the index to begin searching from.
1743      */
1744     static int indexOf(byte[] src, byte srcCoder, int srcCount,
1745                        String tgtStr, int fromIndex) {
1746         byte[] tgt    = tgtStr.value;
1747         byte tgtCoder = tgtStr.coder();
1748         int tgtCount  = tgtStr.length();
1749 
1750         if (fromIndex >= srcCount) {
1751             return (tgtCount == 0 ? srcCount : -1);
1752         }
1753         if (fromIndex < 0) {
1754             fromIndex = 0;
1755         }
1756         if (tgtCount == 0) {
1757             return fromIndex;
1758         }
1759         if (tgtCount > srcCount) {
1760             return -1;
1761         }
1762         if (srcCoder == tgtCoder) {
1763             return srcCoder == LATIN1
1764                 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex)
1765                 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex);
1766         }
1767         if (srcCoder == LATIN1) {    //  && tgtCoder == UTF16
1768             return -1;
1769         }
1770         // srcCoder == UTF16 && tgtCoder == LATIN1) {
1771         return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
1772     }
1773 
1774     /**
1775      * Returns the index within this string of the last occurrence of the
1776      * specified substring.  The last occurrence of the empty string ""
1777      * is considered to occur at the index value {@code this.length()}.
1778      *
1779      * <p>The returned index is the largest value {@code k} for which:
1780      * <pre>{@code
1781      * this.startsWith(str, k)
1782      * }</pre>
1783      * If no such value of {@code k} exists, then {@code -1} is returned.
1784      *
1785      * @param   str   the substring to search for.
1786      * @return  the index of the last occurrence of the specified substring,
1787      *          or {@code -1} if there is no such occurrence.
1788      */
1789     public int lastIndexOf(String str) {
1790         return lastIndexOf(str, length());
1791     }
1792 
1793     /**
1794      * Returns the index within this string of the last occurrence of the
1795      * specified substring, searching backward starting at the specified index.
1796      *
1797      * <p>The returned index is the largest value {@code k} for which:
1798      * <pre>{@code
1799      *     k <= Math.min(fromIndex, this.length()) &&
1800      *                   this.startsWith(str, k)
1801      * }</pre>
1802      * If no such value of {@code k} exists, then {@code -1} is returned.
1803      *
1804      * @param   str         the substring to search for.
1805      * @param   fromIndex   the index to start the search from.
1806      * @return  the index of the last occurrence of the specified substring,
1807      *          searching backward from the specified index,
1808      *          or {@code -1} if there is no such occurrence.
1809      */
1810     public int lastIndexOf(String str, int fromIndex) {
1811         return lastIndexOf(value, coder(), length(), str, fromIndex);
1812     }
1813 
1814     /**
1815      * Code shared by String and AbstractStringBuilder to do searches. The
1816      * source is the character array being searched, and the target
1817      * is the string being searched for.
1818      *
1819      * @param   src         the characters being searched.
1820      * @param   srcCoder    coder handles the mapping between bytes/chars
1821      * @param   srcCount    count of the source string.
1822      * @param   tgtStr      the characters being searched for.
1823      * @param   fromIndex   the index to begin searching from.
1824      */
1825     static int lastIndexOf(byte[] src, byte srcCoder, int srcCount,
1826                            String tgtStr, int fromIndex) {
1827         byte[] tgt = tgtStr.value;
1828         byte tgtCoder = tgtStr.coder();
1829         int tgtCount = tgtStr.length();
1830         /*
1831          * Check arguments; return immediately where possible. For
1832          * consistency, don't check for null str.
1833          */
1834         int rightIndex = srcCount - tgtCount;
1835         if (fromIndex > rightIndex) {
1836             fromIndex = rightIndex;
1837         }
1838         if (fromIndex < 0) {
1839             return -1;
1840         }
1841         /* Empty string always matches. */
1842         if (tgtCount == 0) {
1843             return fromIndex;
1844         }
1845         if (srcCoder == tgtCoder) {
1846             return srcCoder == LATIN1
1847                 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex)
1848                 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex);
1849         }
1850         if (srcCoder == LATIN1) {    // && tgtCoder == UTF16
1851             return -1;
1852         }
1853         // srcCoder == UTF16 && tgtCoder == LATIN1
1854         return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
1855     }
1856 
1857     /**
1858      * Returns a string that is a substring of this string. The
1859      * substring begins with the character at the specified index and
1860      * extends to the end of this string. <p>
1861      * Examples:
1862      * <blockquote><pre>
1863      * "unhappy".substring(2) returns "happy"
1864      * "Harbison".substring(3) returns "bison"
1865      * "emptiness".substring(9) returns "" (an empty string)
1866      * </pre></blockquote>
1867      *
1868      * @param      beginIndex   the beginning index, inclusive.
1869      * @return     the specified substring.
1870      * @throws     IndexOutOfBoundsException  if
1871      *             {@code beginIndex} is negative or larger than the
1872      *             length of this {@code String} object.
1873      */
1874     public String substring(int beginIndex) {
1875         return substring(beginIndex, length());
1876     }
1877 
1878     /**
1879      * Returns a string that is a substring of this string. The
1880      * substring begins at the specified {@code beginIndex} and
1881      * extends to the character at index {@code endIndex - 1}.
1882      * Thus the length of the substring is {@code endIndex-beginIndex}.
1883      * <p>
1884      * Examples:
1885      * <blockquote><pre>
1886      * "hamburger".substring(4, 8) returns "urge"
1887      * "smiles".substring(1, 5) returns "mile"
1888      * </pre></blockquote>
1889      *
1890      * @param      beginIndex   the beginning index, inclusive.
1891      * @param      endIndex     the ending index, exclusive.
1892      * @return     the specified substring.
1893      * @throws     IndexOutOfBoundsException  if the
1894      *             {@code beginIndex} is negative, or
1895      *             {@code endIndex} is larger than the length of
1896      *             this {@code String} object, or
1897      *             {@code beginIndex} is larger than
1898      *             {@code endIndex}.
1899      */
1900     public String substring(int beginIndex, int endIndex) {
1901         int length = length();
1902         checkBoundsBeginEnd(beginIndex, endIndex, length);
1903         if (beginIndex == 0 && endIndex == length) {
1904             return this;
1905         }
1906         int subLen = endIndex - beginIndex;
1907         return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
1908                           : StringUTF16.newString(value, beginIndex, subLen);
1909     }
1910 
1911     /**
1912      * Returns a character sequence that is a subsequence of this sequence.
1913      *
1914      * <p> An invocation of this method of the form
1915      *
1916      * <blockquote><pre>
1917      * str.subSequence(begin,&nbsp;end)</pre></blockquote>
1918      *
1919      * behaves in exactly the same way as the invocation
1920      *
1921      * <blockquote><pre>
1922      * str.substring(begin,&nbsp;end)</pre></blockquote>
1923      *
1924      * @apiNote
1925      * This method is defined so that the {@code String} class can implement
1926      * the {@link CharSequence} interface.
1927      *
1928      * @param   beginIndex   the begin index, inclusive.
1929      * @param   endIndex     the end index, exclusive.
1930      * @return  the specified subsequence.
1931      *
1932      * @throws  IndexOutOfBoundsException
1933      *          if {@code beginIndex} or {@code endIndex} is negative,
1934      *          if {@code endIndex} is greater than {@code length()},
1935      *          or if {@code beginIndex} is greater than {@code endIndex}
1936      *
1937      * @since 1.4
1938      * @spec JSR-51
1939      */
1940     public CharSequence subSequence(int beginIndex, int endIndex) {
1941         return this.substring(beginIndex, endIndex);
1942     }
1943 
1944     /**
1945      * Concatenates the specified string to the end of this string.
1946      * <p>
1947      * If the length of the argument string is {@code 0}, then this
1948      * {@code String} object is returned. Otherwise, a
1949      * {@code String} object is returned that represents a character
1950      * sequence that is the concatenation of the character sequence
1951      * represented by this {@code String} object and the character
1952      * sequence represented by the argument string.<p>
1953      * Examples:
1954      * <blockquote><pre>
1955      * "cares".concat("s") returns "caress"
1956      * "to".concat("get").concat("her") returns "together"
1957      * </pre></blockquote>
1958      *
1959      * @param   str   the {@code String} that is concatenated to the end
1960      *                of this {@code String}.
1961      * @return  a string that represents the concatenation of this object's
1962      *          characters followed by the string argument's characters.
1963      */
1964     public String concat(String str) {
1965         if (str.isEmpty()) {
1966             return this;
1967         }
1968         return StringConcatHelper.simpleConcat(this, str);
1969     }
1970 
1971     /**
1972      * Returns a string resulting from replacing all occurrences of
1973      * {@code oldChar} in this string with {@code newChar}.
1974      * <p>
1975      * If the character {@code oldChar} does not occur in the
1976      * character sequence represented by this {@code String} object,
1977      * then a reference to this {@code String} object is returned.
1978      * Otherwise, a {@code String} object is returned that
1979      * represents a character sequence identical to the character sequence
1980      * represented by this {@code String} object, except that every
1981      * occurrence of {@code oldChar} is replaced by an occurrence
1982      * of {@code newChar}.
1983      * <p>
1984      * Examples:
1985      * <blockquote><pre>
1986      * "mesquite in your cellar".replace('e', 'o')
1987      *         returns "mosquito in your collar"
1988      * "the war of baronets".replace('r', 'y')
1989      *         returns "the way of bayonets"
1990      * "sparring with a purple porpoise".replace('p', 't')
1991      *         returns "starring with a turtle tortoise"
1992      * "JonL".replace('q', 'x') returns "JonL" (no change)
1993      * </pre></blockquote>
1994      *
1995      * @param   oldChar   the old character.
1996      * @param   newChar   the new character.
1997      * @return  a string derived from this string by replacing every
1998      *          occurrence of {@code oldChar} with {@code newChar}.
1999      */
2000     public String replace(char oldChar, char newChar) {
2001         if (oldChar != newChar) {
2002             String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
2003                                     : StringUTF16.replace(value, oldChar, newChar);
2004             if (ret != null) {
2005                 return ret;
2006             }
2007         }
2008         return this;
2009     }
2010 
2011     /**
2012      * Tells whether or not this string matches the given <a
2013      * href="../util/regex/Pattern.html#sum">regular expression</a>.
2014      *
2015      * <p> An invocation of this method of the form
2016      * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the
2017      * same result as the expression
2018      *
2019      * <blockquote>
2020      * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence)
2021      * matches(<i>regex</i>, <i>str</i>)}
2022      * </blockquote>
2023      *
2024      * @param   regex
2025      *          the regular expression to which this string is to be matched
2026      *
2027      * @return  {@code true} if, and only if, this string matches the
2028      *          given regular expression
2029      *
2030      * @throws  PatternSyntaxException
2031      *          if the regular expression's syntax is invalid
2032      *
2033      * @see java.util.regex.Pattern
2034      *
2035      * @since 1.4
2036      * @spec JSR-51
2037      */
2038     public boolean matches(String regex) {
2039         return Pattern.matches(regex, this);
2040     }
2041 
2042     /**
2043      * Returns true if and only if this string contains the specified
2044      * sequence of char values.
2045      *
2046      * @param s the sequence to search for
2047      * @return true if this string contains {@code s}, false otherwise
2048      * @since 1.5
2049      */
2050     public boolean contains(CharSequence s) {
2051         return indexOf(s.toString()) >= 0;
2052     }
2053 
2054     /**
2055      * Replaces the first substring of this string that matches the given <a
2056      * href="../util/regex/Pattern.html#sum">regular expression</a> with the
2057      * given replacement.
2058      *
2059      * <p> An invocation of this method of the form
2060      * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
2061      * yields exactly the same result as the expression
2062      *
2063      * <blockquote>
2064      * <code>
2065      * {@link java.util.regex.Pattern}.{@link
2066      * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link
2067      * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
2068      * java.util.regex.Matcher#replaceFirst replaceFirst}(<i>repl</i>)
2069      * </code>
2070      * </blockquote>
2071      *
2072      *<p>
2073      * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
2074      * replacement string may cause the results to be different than if it were
2075      * being treated as a literal replacement string; see
2076      * {@link java.util.regex.Matcher#replaceFirst}.
2077      * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
2078      * meaning of these characters, if desired.
2079      *
2080      * @param   regex
2081      *          the regular expression to which this string is to be matched
2082      * @param   replacement
2083      *          the string to be substituted for the first match
2084      *
2085      * @return  The resulting {@code String}
2086      *
2087      * @throws  PatternSyntaxException
2088      *          if the regular expression's syntax is invalid
2089      *
2090      * @see java.util.regex.Pattern
2091      *
2092      * @since 1.4
2093      * @spec JSR-51
2094      */
2095     public String replaceFirst(String regex, String replacement) {
2096         return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
2097     }
2098 
2099     /**
2100      * Replaces each substring of this string that matches the given <a
2101      * href="../util/regex/Pattern.html#sum">regular expression</a> with the
2102      * given replacement.
2103      *
2104      * <p> An invocation of this method of the form
2105      * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
2106      * yields exactly the same result as the expression
2107      *
2108      * <blockquote>
2109      * <code>
2110      * {@link java.util.regex.Pattern}.{@link
2111      * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link
2112      * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
2113      * java.util.regex.Matcher#replaceAll replaceAll}(<i>repl</i>)
2114      * </code>
2115      * </blockquote>
2116      *
2117      *<p>
2118      * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
2119      * replacement string may cause the results to be different than if it were
2120      * being treated as a literal replacement string; see
2121      * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}.
2122      * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
2123      * meaning of these characters, if desired.
2124      *
2125      * @param   regex
2126      *          the regular expression to which this string is to be matched
2127      * @param   replacement
2128      *          the string to be substituted for each match
2129      *
2130      * @return  The resulting {@code String}
2131      *
2132      * @throws  PatternSyntaxException
2133      *          if the regular expression's syntax is invalid
2134      *
2135      * @see java.util.regex.Pattern
2136      *
2137      * @since 1.4
2138      * @spec JSR-51
2139      */
2140     public String replaceAll(String regex, String replacement) {
2141         return Pattern.compile(regex).matcher(this).replaceAll(replacement);
2142     }
2143 
2144     /**
2145      * Replaces each substring of this string that matches the literal target
2146      * sequence with the specified literal replacement sequence. The
2147      * replacement proceeds from the beginning of the string to the end, for
2148      * example, replacing "aa" with "b" in the string "aaa" will result in
2149      * "ba" rather than "ab".
2150      *
2151      * @param  target The sequence of char values to be replaced
2152      * @param  replacement The replacement sequence of char values
2153      * @return  The resulting string
2154      * @since 1.5
2155      */
2156     public String replace(CharSequence target, CharSequence replacement) {
2157         String trgtStr = target.toString();
2158         String replStr = replacement.toString();
2159         int thisLen = length();
2160         int trgtLen = trgtStr.length();
2161         int replLen = replStr.length();
2162 
2163         if (trgtLen > 0) {
2164             if (trgtLen == 1 && replLen == 1) {
2165                 return replace(trgtStr.charAt(0), replStr.charAt(0));
2166             }
2167 
2168             boolean thisIsLatin1 = this.isLatin1();
2169             boolean trgtIsLatin1 = trgtStr.isLatin1();
2170             boolean replIsLatin1 = replStr.isLatin1();
2171             String ret = (thisIsLatin1 && trgtIsLatin1 && replIsLatin1)
2172                     ? StringLatin1.replace(value, thisLen,
2173                                            trgtStr.value, trgtLen,
2174                                            replStr.value, replLen)
2175                     : StringUTF16.replace(value, thisLen, thisIsLatin1,
2176                                           trgtStr.value, trgtLen, trgtIsLatin1,
2177                                           replStr.value, replLen, replIsLatin1);
2178             if (ret != null) {
2179                 return ret;
2180             }
2181             return this;
2182 
2183         } else { // trgtLen == 0
2184             int resultLen;
2185             try {
2186                 resultLen = Math.addExact(thisLen, Math.multiplyExact(
2187                         Math.addExact(thisLen, 1), replLen));
2188             } catch (ArithmeticException ignored) {
2189                 throw new OutOfMemoryError();
2190             }
2191 
2192             StringBuilder sb = new StringBuilder(resultLen);
2193             sb.append(replStr);
2194             for (int i = 0; i < thisLen; ++i) {
2195                 sb.append(charAt(i)).append(replStr);
2196             }
2197             return sb.toString();
2198         }
2199     }
2200 
2201     /**
2202      * Splits this string around matches of the given
2203      * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
2204      *
2205      * <p> The array returned by this method contains each substring of this
2206      * string that is terminated by another substring that matches the given
2207      * expression or is terminated by the end of the string.  The substrings in
2208      * the array are in the order in which they occur in this string.  If the
2209      * expression does not match any part of the input then the resulting array
2210      * has just one element, namely this string.
2211      *
2212      * <p> When there is a positive-width match at the beginning of this
2213      * string then an empty leading substring is included at the beginning
2214      * of the resulting array. A zero-width match at the beginning however
2215      * never produces such empty leading substring.
2216      *
2217      * <p> The {@code limit} parameter controls the number of times the
2218      * pattern is applied and therefore affects the length of the resulting
2219      * array.
2220      * <ul>
2221      *    <li><p>
2222      *    If the <i>limit</i> is positive then the pattern will be applied
2223      *    at most <i>limit</i>&nbsp;-&nbsp;1 times, the array's length will be
2224      *    no greater than <i>limit</i>, and the array's last entry will contain
2225      *    all input beyond the last matched delimiter.</p></li>
2226      *
2227      *    <li><p>
2228      *    If the <i>limit</i> is zero then the pattern will be applied as
2229      *    many times as possible, the array can have any length, and trailing
2230      *    empty strings will be discarded.</p></li>
2231      *
2232      *    <li><p>
2233      *    If the <i>limit</i> is negative then the pattern will be applied
2234      *    as many times as possible and the array can have any length.</p></li>
2235      * </ul>
2236      *
2237      * <p> The string {@code "boo:and:foo"}, for example, yields the
2238      * following results with these parameters:
2239      *
2240      * <blockquote><table class="plain">
2241      * <caption style="display:none">Split example showing regex, limit, and result</caption>
2242      * <thead>
2243      * <tr>
2244      *     <th scope="col">Regex</th>
2245      *     <th scope="col">Limit</th>
2246      *     <th scope="col">Result</th>
2247      * </tr>
2248      * </thead>
2249      * <tbody>
2250      * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th>
2251      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
2252      *     <td>{@code { "boo", "and:foo" }}</td></tr>
2253      * <tr><!-- : -->
2254      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
2255      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
2256      * <tr><!-- : -->
2257      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
2258      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
2259      * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
2260      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
2261      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
2262      * <tr><!-- o -->
2263      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
2264      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
2265      * <tr><!-- o -->
2266      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
2267      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
2268      * </tbody>
2269      * </table></blockquote>
2270      *
2271      * <p> An invocation of this method of the form
2272      * <i>str.</i>{@code split(}<i>regex</i>{@code ,}&nbsp;<i>n</i>{@code )}
2273      * yields the same result as the expression
2274      *
2275      * <blockquote>
2276      * <code>
2277      * {@link java.util.regex.Pattern}.{@link
2278      * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link
2279      * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>,&nbsp;<i>n</i>)
2280      * </code>
2281      * </blockquote>
2282      *
2283      *
2284      * @param  regex
2285      *         the delimiting regular expression
2286      *
2287      * @param  limit
2288      *         the result threshold, as described above
2289      *
2290      * @return  the array of strings computed by splitting this string
2291      *          around matches of the given regular expression
2292      *
2293      * @throws  PatternSyntaxException
2294      *          if the regular expression's syntax is invalid
2295      *
2296      * @see java.util.regex.Pattern
2297      *
2298      * @since 1.4
2299      * @spec JSR-51
2300      */
2301     public String[] split(String regex, int limit) {
2302         /* fastpath if the regex is a
2303          (1)one-char String and this character is not one of the
2304             RegEx's meta characters ".$|()[{^?*+\\", or
2305          (2)two-char String and the first char is the backslash and
2306             the second is not the ascii digit or ascii letter.
2307          */
2308         char ch = 0;
2309         if (((regex.length() == 1 &&
2310              ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
2311              (regex.length() == 2 &&
2312               regex.charAt(0) == '\\' &&
2313               (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
2314               ((ch-'a')|('z'-ch)) < 0 &&
2315               ((ch-'A')|('Z'-ch)) < 0)) &&
2316             (ch < Character.MIN_HIGH_SURROGATE ||
2317              ch > Character.MAX_LOW_SURROGATE))
2318         {
2319             int off = 0;
2320             int next = 0;
2321             boolean limited = limit > 0;
2322             ArrayList<String> list = new ArrayList<>();
2323             while ((next = indexOf(ch, off)) != -1) {
2324                 if (!limited || list.size() < limit - 1) {
2325                     list.add(substring(off, next));
2326                     off = next + 1;
2327                 } else {    // last one
2328                     //assert (list.size() == limit - 1);
2329                     int last = length();
2330                     list.add(substring(off, last));
2331                     off = last;
2332                     break;
2333                 }
2334             }
2335             // If no match was found, return this
2336             if (off == 0)
2337                 return new String[]{this};
2338 
2339             // Add remaining segment
2340             if (!limited || list.size() < limit)
2341                 list.add(substring(off, length()));
2342 
2343             // Construct result
2344             int resultSize = list.size();
2345             if (limit == 0) {
2346                 while (resultSize > 0 && list.get(resultSize - 1).isEmpty()) {
2347                     resultSize--;
2348                 }
2349             }
2350             String[] result = new String[resultSize];
2351             return list.subList(0, resultSize).toArray(result);
2352         }
2353         return Pattern.compile(regex).split(this, limit);
2354     }
2355 
2356     /**
2357      * Splits this string around matches of the given <a
2358      * href="../util/regex/Pattern.html#sum">regular expression</a>.
2359      *
2360      * <p> This method works as if by invoking the two-argument {@link
2361      * #split(String, int) split} method with the given expression and a limit
2362      * argument of zero.  Trailing empty strings are therefore not included in
2363      * the resulting array.
2364      *
2365      * <p> The string {@code "boo:and:foo"}, for example, yields the following
2366      * results with these expressions:
2367      *
2368      * <blockquote><table class="plain">
2369      * <caption style="display:none">Split examples showing regex and result</caption>
2370      * <thead>
2371      * <tr>
2372      *  <th scope="col">Regex</th>
2373      *  <th scope="col">Result</th>
2374      * </tr>
2375      * </thead>
2376      * <tbody>
2377      * <tr><th scope="row" style="text-weight:normal">:</th>
2378      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
2379      * <tr><th scope="row" style="text-weight:normal">o</th>
2380      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
2381      * </tbody>
2382      * </table></blockquote>
2383      *
2384      *
2385      * @param  regex
2386      *         the delimiting regular expression
2387      *
2388      * @return  the array of strings computed by splitting this string
2389      *          around matches of the given regular expression
2390      *
2391      * @throws  PatternSyntaxException
2392      *          if the regular expression's syntax is invalid
2393      *
2394      * @see java.util.regex.Pattern
2395      *
2396      * @since 1.4
2397      * @spec JSR-51
2398      */
2399     public String[] split(String regex) {
2400         return split(regex, 0);
2401     }
2402 
2403     /**
2404      * Returns a new String composed of copies of the
2405      * {@code CharSequence elements} joined together with a copy of
2406      * the specified {@code delimiter}.
2407      *
2408      * <blockquote>For example,
2409      * <pre>{@code
2410      *     String message = String.join("-", "Java", "is", "cool");
2411      *     // message returned is: "Java-is-cool"
2412      * }</pre></blockquote>
2413      *
2414      * Note that if an element is null, then {@code "null"} is added.
2415      *
2416      * @param  delimiter the delimiter that separates each element
2417      * @param  elements the elements to join together.
2418      *
2419      * @return a new {@code String} that is composed of the {@code elements}
2420      *         separated by the {@code delimiter}
2421      *
2422      * @throws NullPointerException If {@code delimiter} or {@code elements}
2423      *         is {@code null}
2424      *
2425      * @see java.util.StringJoiner
2426      * @since 1.8
2427      */
2428     public static String join(CharSequence delimiter, CharSequence... elements) {
2429         Objects.requireNonNull(delimiter);
2430         Objects.requireNonNull(elements);
2431         // Number of elements not likely worth Arrays.stream overhead.
2432         StringJoiner joiner = new StringJoiner(delimiter);
2433         for (CharSequence cs: elements) {
2434             joiner.add(cs);
2435         }
2436         return joiner.toString();
2437     }
2438 
2439     /**
2440      * Returns a new {@code String} composed of copies of the
2441      * {@code CharSequence elements} joined together with a copy of the
2442      * specified {@code delimiter}.
2443      *
2444      * <blockquote>For example,
2445      * <pre>{@code
2446      *     List<String> strings = List.of("Java", "is", "cool");
2447      *     String message = String.join(" ", strings);
2448      *     // message returned is: "Java is cool"
2449      *
2450      *     Set<String> strings =
2451      *         new LinkedHashSet<>(List.of("Java", "is", "very", "cool"));
2452      *     String message = String.join("-", strings);
2453      *     // message returned is: "Java-is-very-cool"
2454      * }</pre></blockquote>
2455      *
2456      * Note that if an individual element is {@code null}, then {@code "null"} is added.
2457      *
2458      * @param  delimiter a sequence of characters that is used to separate each
2459      *         of the {@code elements} in the resulting {@code String}
2460      * @param  elements an {@code Iterable} that will have its {@code elements}
2461      *         joined together.
2462      *
2463      * @return a new {@code String} that is composed from the {@code elements}
2464      *         argument
2465      *
2466      * @throws NullPointerException If {@code delimiter} or {@code elements}
2467      *         is {@code null}
2468      *
2469      * @see    #join(CharSequence,CharSequence...)
2470      * @see    java.util.StringJoiner
2471      * @since 1.8
2472      */
2473     public static String join(CharSequence delimiter,
2474             Iterable<? extends CharSequence> elements) {
2475         Objects.requireNonNull(delimiter);
2476         Objects.requireNonNull(elements);
2477         StringJoiner joiner = new StringJoiner(delimiter);
2478         for (CharSequence cs: elements) {
2479             joiner.add(cs);
2480         }
2481         return joiner.toString();
2482     }
2483 
2484     /**
2485      * Converts all of the characters in this {@code String} to lower
2486      * case using the rules of the given {@code Locale}.  Case mapping is based
2487      * on the Unicode Standard version specified by the {@link java.lang.Character Character}
2488      * class. Since case mappings are not always 1:1 char mappings, the resulting
2489      * {@code String} may be a different length than the original {@code String}.
2490      * <p>
2491      * Examples of lowercase  mappings are in the following table:
2492      * <table class="plain">
2493      * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption>
2494      * <thead>
2495      * <tr>
2496      *   <th scope="col">Language Code of Locale</th>
2497      *   <th scope="col">Upper Case</th>
2498      *   <th scope="col">Lower Case</th>
2499      *   <th scope="col">Description</th>
2500      * </tr>
2501      * </thead>
2502      * <tbody>
2503      * <tr>
2504      *   <td>tr (Turkish)</td>
2505      *   <th scope="row" style="font-weight:normal; text-align:left">\u0130</th>
2506      *   <td>\u0069</td>
2507      *   <td>capital letter I with dot above -&gt; small letter i</td>
2508      * </tr>
2509      * <tr>
2510      *   <td>tr (Turkish)</td>
2511      *   <th scope="row" style="font-weight:normal; text-align:left">\u0049</th>
2512      *   <td>\u0131</td>
2513      *   <td>capital letter I -&gt; small letter dotless i </td>
2514      * </tr>
2515      * <tr>
2516      *   <td>(all)</td>
2517      *   <th scope="row" style="font-weight:normal; text-align:left">French Fries</th>
2518      *   <td>french fries</td>
2519      *   <td>lowercased all chars in String</td>
2520      * </tr>
2521      * <tr>
2522      *   <td>(all)</td>
2523      *   <th scope="row" style="font-weight:normal; text-align:left">
2524      *       &Iota;&Chi;&Theta;&Upsilon;&Sigma;</th>
2525      *   <td>&iota;&chi;&theta;&upsilon;&sigma;</td>
2526      *   <td>lowercased all chars in String</td>
2527      * </tr>
2528      * </tbody>
2529      * </table>
2530      *
2531      * @param locale use the case transformation rules for this locale
2532      * @return the {@code String}, converted to lowercase.
2533      * @see     java.lang.String#toLowerCase()
2534      * @see     java.lang.String#toUpperCase()
2535      * @see     java.lang.String#toUpperCase(Locale)
2536      * @since   1.1
2537      */
2538     public String toLowerCase(Locale locale) {
2539         return isLatin1() ? StringLatin1.toLowerCase(this, value, locale)
2540                           : StringUTF16.toLowerCase(this, value, locale);
2541     }
2542 
2543     /**
2544      * Converts all of the characters in this {@code String} to lower
2545      * case using the rules of the default locale. This is equivalent to calling
2546      * {@code toLowerCase(Locale.getDefault())}.
2547      * <p>
2548      * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2549      * results if used for strings that are intended to be interpreted locale
2550      * independently.
2551      * Examples are programming language identifiers, protocol keys, and HTML
2552      * tags.
2553      * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
2554      * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
2555      * LATIN SMALL LETTER DOTLESS I character.
2556      * To obtain correct results for locale insensitive strings, use
2557      * {@code toLowerCase(Locale.ROOT)}.
2558      *
2559      * @return  the {@code String}, converted to lowercase.
2560      * @see     java.lang.String#toLowerCase(Locale)
2561      */
2562     public String toLowerCase() {
2563         return toLowerCase(Locale.getDefault());
2564     }
2565 
2566     /**
2567      * Converts all of the characters in this {@code String} to upper
2568      * case using the rules of the given {@code Locale}. Case mapping is based
2569      * on the Unicode Standard version specified by the {@link java.lang.Character Character}
2570      * class. Since case mappings are not always 1:1 char mappings, the resulting
2571      * {@code String} may be a different length than the original {@code String}.
2572      * <p>
2573      * Examples of locale-sensitive and 1:M case mappings are in the following table.
2574      *
2575      * <table class="plain">
2576      * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption>
2577      * <thead>
2578      * <tr>
2579      *   <th scope="col">Language Code of Locale</th>
2580      *   <th scope="col">Lower Case</th>
2581      *   <th scope="col">Upper Case</th>
2582      *   <th scope="col">Description</th>
2583      * </tr>
2584      * </thead>
2585      * <tbody>
2586      * <tr>
2587      *   <td>tr (Turkish)</td>
2588      *   <th scope="row" style="font-weight:normal; text-align:left">\u0069</th>
2589      *   <td>\u0130</td>
2590      *   <td>small letter i -&gt; capital letter I with dot above</td>
2591      * </tr>
2592      * <tr>
2593      *   <td>tr (Turkish)</td>
2594      *   <th scope="row" style="font-weight:normal; text-align:left">\u0131</th>
2595      *   <td>\u0049</td>
2596      *   <td>small letter dotless i -&gt; capital letter I</td>
2597      * </tr>
2598      * <tr>
2599      *   <td>(all)</td>
2600      *   <th scope="row" style="font-weight:normal; text-align:left">\u00df</th>
2601      *   <td>\u0053 \u0053</td>
2602      *   <td>small letter sharp s -&gt; two letters: SS</td>
2603      * </tr>
2604      * <tr>
2605      *   <td>(all)</td>
2606      *   <th scope="row" style="font-weight:normal; text-align:left">Fahrvergn&uuml;gen</th>
2607      *   <td>FAHRVERGN&Uuml;GEN</td>
2608      *   <td></td>
2609      * </tr>
2610      * </tbody>
2611      * </table>
2612      * @param locale use the case transformation rules for this locale
2613      * @return the {@code String}, converted to uppercase.
2614      * @see     java.lang.String#toUpperCase()
2615      * @see     java.lang.String#toLowerCase()
2616      * @see     java.lang.String#toLowerCase(Locale)
2617      * @since   1.1
2618      */
2619     public String toUpperCase(Locale locale) {
2620         return isLatin1() ? StringLatin1.toUpperCase(this, value, locale)
2621                           : StringUTF16.toUpperCase(this, value, locale);
2622     }
2623 
2624     /**
2625      * Converts all of the characters in this {@code String} to upper
2626      * case using the rules of the default locale. This method is equivalent to
2627      * {@code toUpperCase(Locale.getDefault())}.
2628      * <p>
2629      * <b>Note:</b> This method is locale sensitive, and may produce unexpected
2630      * results if used for strings that are intended to be interpreted locale
2631      * independently.
2632      * Examples are programming language identifiers, protocol keys, and HTML
2633      * tags.
2634      * For instance, {@code "title".toUpperCase()} in a Turkish locale
2635      * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
2636      * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
2637      * To obtain correct results for locale insensitive strings, use
2638      * {@code toUpperCase(Locale.ROOT)}.
2639      *
2640      * @return  the {@code String}, converted to uppercase.
2641      * @see     java.lang.String#toUpperCase(Locale)
2642      */
2643     public String toUpperCase() {
2644         return toUpperCase(Locale.getDefault());
2645     }
2646 
2647     /**
2648      * Returns a string whose value is this string, with all leading
2649      * and trailing space removed, where space is defined
2650      * as any character whose codepoint is less than or equal to
2651      * {@code 'U+0020'} (the space character).
2652      * <p>
2653      * If this {@code String} object represents an empty character
2654      * sequence, or the first and last characters of character sequence
2655      * represented by this {@code String} object both have codes
2656      * that are not space (as defined above), then a
2657      * reference to this {@code String} object is returned.
2658      * <p>
2659      * Otherwise, if all characters in this string are space (as
2660      * defined above), then a  {@code String} object representing an
2661      * empty string is returned.
2662      * <p>
2663      * Otherwise, let <i>k</i> be the index of the first character in the
2664      * string whose code is not a space (as defined above) and let
2665      * <i>m</i> be the index of the last character in the string whose code
2666      * is not a space (as defined above). A {@code String}
2667      * object is returned, representing the substring of this string that
2668      * begins with the character at index <i>k</i> and ends with the
2669      * character at index <i>m</i>-that is, the result of
2670      * {@code this.substring(k, m + 1)}.
2671      * <p>
2672      * This method may be used to trim space (as defined above) from
2673      * the beginning and end of a string.
2674      *
2675      * @return  a string whose value is this string, with all leading
2676      *          and trailing space removed, or this string if it
2677      *          has no leading or trailing space.
2678      */
2679     public String trim() {
2680         String ret = isLatin1() ? StringLatin1.trim(value)
2681                                 : StringUTF16.trim(value);
2682         return ret == null ? this : ret;
2683     }
2684 
2685     /**
2686      * Returns a string whose value is this string, with all leading
2687      * and trailing {@linkplain Character#isWhitespace(int) white space}
2688      * removed.
2689      * <p>
2690      * If this {@code String} object represents an empty string,
2691      * or if all code points in this string are
2692      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
2693      * is returned.
2694      * <p>
2695      * Otherwise, returns a substring of this string beginning with the first
2696      * code point that is not a {@linkplain Character#isWhitespace(int) white space}
2697      * up to and including the last code point that is not a
2698      * {@linkplain Character#isWhitespace(int) white space}.
2699      * <p>
2700      * This method may be used to strip
2701      * {@linkplain Character#isWhitespace(int) white space} from
2702      * the beginning and end of a string.
2703      *
2704      * @return  a string whose value is this string, with all leading
2705      *          and trailing white space removed
2706      *
2707      * @see Character#isWhitespace(int)
2708      *
2709      * @since 11
2710      */
2711     public String strip() {
2712         String ret = isLatin1() ? StringLatin1.strip(value)
2713                                 : StringUTF16.strip(value);
2714         return ret == null ? this : ret;
2715     }
2716 
2717     /**
2718      * Returns a string whose value is this string, with all leading
2719      * {@linkplain Character#isWhitespace(int) white space} removed.
2720      * <p>
2721      * If this {@code String} object represents an empty string,
2722      * or if all code points in this string are
2723      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
2724      * is returned.
2725      * <p>
2726      * Otherwise, returns a substring of this string beginning with the first
2727      * code point that is not a {@linkplain Character#isWhitespace(int) white space}
2728      * up to and including the last code point of this string.
2729      * <p>
2730      * This method may be used to trim
2731      * {@linkplain Character#isWhitespace(int) white space} from
2732      * the beginning of a string.
2733      *
2734      * @return  a string whose value is this string, with all leading white
2735      *          space removed
2736      *
2737      * @see Character#isWhitespace(int)
2738      *
2739      * @since 11
2740      */
2741     public String stripLeading() {
2742         String ret = isLatin1() ? StringLatin1.stripLeading(value)
2743                                 : StringUTF16.stripLeading(value);
2744         return ret == null ? this : ret;
2745     }
2746 
2747     /**
2748      * Returns a string whose value is this string, with all trailing
2749      * {@linkplain Character#isWhitespace(int) white space} removed.
2750      * <p>
2751      * If this {@code String} object represents an empty string,
2752      * or if all characters in this string are
2753      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
2754      * is returned.
2755      * <p>
2756      * Otherwise, returns a substring of this string beginning with the first
2757      * code point of this string up to and including the last code point
2758      * that is not a {@linkplain Character#isWhitespace(int) white space}.
2759      * <p>
2760      * This method may be used to trim
2761      * {@linkplain Character#isWhitespace(int) white space} from
2762      * the end of a string.
2763      *
2764      * @return  a string whose value is this string, with all trailing white
2765      *          space removed
2766      *
2767      * @see Character#isWhitespace(int)
2768      *
2769      * @since 11
2770      */
2771     public String stripTrailing() {
2772         String ret = isLatin1() ? StringLatin1.stripTrailing(value)
2773                                 : StringUTF16.stripTrailing(value);
2774         return ret == null ? this : ret;
2775     }
2776 
2777     /**
2778      * Returns {@code true} if the string is empty or contains only
2779      * {@linkplain Character#isWhitespace(int) white space} codepoints,
2780      * otherwise {@code false}.
2781      *
2782      * @return {@code true} if the string is empty or contains only
2783      *         {@linkplain Character#isWhitespace(int) white space} codepoints,
2784      *         otherwise {@code false}
2785      *
2786      * @see Character#isWhitespace(int)
2787      *
2788      * @since 11
2789      */
2790     public boolean isBlank() {
2791         return indexOfNonWhitespace() == length();
2792     }
2793 
2794     /**
2795      * Returns a stream of lines extracted from this string,
2796      * separated by line terminators.
2797      * <p>
2798      * A <i>line terminator</i> is one of the following:
2799      * a line feed character {@code "\n"} (U+000A),
2800      * a carriage return character {@code "\r"} (U+000D),
2801      * or a carriage return followed immediately by a line feed
2802      * {@code "\r\n"} (U+000D U+000A).
2803      * <p>
2804      * A <i>line</i> is either a sequence of zero or more characters
2805      * followed by a line terminator, or it is a sequence of one or
2806      * more characters followed by the end of the string. A
2807      * line does not include the line terminator.
2808      * <p>
2809      * The stream returned by this method contains the lines from
2810      * this string in the order in which they occur.
2811      *
2812      * @apiNote This definition of <i>line</i> implies that an empty
2813      *          string has zero lines and that there is no empty line
2814      *          following a line terminator at the end of a string.
2815      *
2816      * @implNote This method provides better performance than
2817      *           split("\R") by supplying elements lazily and
2818      *           by faster search of new line terminators.
2819      *
2820      * @return  the stream of lines extracted from this string
2821      *
2822      * @since 11
2823      */
2824     public Stream<String> lines() {
2825         return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value);
2826     }
2827 
2828     /**
2829      * Adjusts the indentation of each line of this string based on the value of
2830      * {@code n}, and normalizes line termination characters.
2831      * <p>
2832      * This string is conceptually separated into lines using
2833      * {@link String#lines()}. Each line is then adjusted as described below
2834      * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
2835      * lines are then concatenated and returned.
2836      * <p>
2837      * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
2838      * beginning of each line.
2839      * <p>
2840      * If {@code n < 0} then up to {@code n}
2841      * {@linkplain Character#isWhitespace(int) white space characters} are removed
2842      * from the beginning of each line. If a given line does not contain
2843      * sufficient white space then all leading
2844      * {@linkplain Character#isWhitespace(int) white space characters} are removed.
2845      * Each white space character is treated as a single character. In
2846      * particular, the tab character {@code "\t"} (U+0009) is considered a
2847      * single character; it is not expanded.
2848      * <p>
2849      * If {@code n == 0} then the line remains unchanged. However, line
2850      * terminators are still normalized.
2851      *
2852      * @param n  number of leading
2853      *           {@linkplain Character#isWhitespace(int) white space characters}
2854      *           to add or remove
2855      *
2856      * @return string with indentation adjusted and line endings normalized
2857      *
2858      * @see String#lines()
2859      * @see String#isBlank()
2860      * @see Character#isWhitespace(int)
2861      *
2862      * @since 12
2863      */
2864     public String indent(int n) {
2865         if (isEmpty()) {
2866             return "";
2867         }
2868         Stream<String> stream = lines();
2869         if (n > 0) {
2870             final String spaces = " ".repeat(n);
2871             stream = stream.map(s -> spaces + s);
2872         } else if (n == Integer.MIN_VALUE) {
2873             stream = stream.map(s -> s.stripLeading());
2874         } else if (n < 0) {
2875             stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
2876         }
2877         return stream.collect(Collectors.joining("\n", "", "\n"));
2878     }
2879 
2880     private int indexOfNonWhitespace() {
2881         return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
2882                           : StringUTF16.indexOfNonWhitespace(value);
2883     }
2884 
2885     private int lastIndexOfNonWhitespace() {
2886         return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
2887                           : StringUTF16.lastIndexOfNonWhitespace(value);
2888     }
2889 
2890     /**
2891      * {@preview Associated with text blocks, a preview feature of
2892      *           the Java language.
2893      *
2894      *           This method is associated with <i>text blocks</i>, a preview
2895      *           feature of the Java language. Programs can only use this
2896      *           method when preview features are enabled. Preview features
2897      *           may be removed in a future release, or upgraded to permanent
2898      *           features of the Java language.}
2899      *
2900      * Returns a string whose value is this string, with incidental
2901      * {@linkplain Character#isWhitespace(int) white space} removed from
2902      * the beginning and end of every line.
2903      * <p>
2904      * Incidental {@linkplain Character#isWhitespace(int) white space}
2905      * is often present in a text block to align the content with the opening
2906      * delimiter. For example, in the following code, dots represent incidental
2907      * {@linkplain Character#isWhitespace(int) white space}:
2908      * <blockquote><pre>
2909      * String html = """
2910      * ..............&lt;html&gt;
2911      * ..............    &lt;body&gt;
2912      * ..............        &lt;p&gt;Hello, world&lt;/p&gt;
2913      * ..............    &lt;/body&gt;
2914      * ..............&lt;/html&gt;
2915      * ..............""";
2916      * </pre></blockquote>
2917      * This method treats the incidental
2918      * {@linkplain Character#isWhitespace(int) white space} as indentation to be
2919      * stripped, producing a string that preserves the relative indentation of
2920      * the content. Using | to visualize the start of each line of the string:
2921      * <blockquote><pre>
2922      * |&lt;html&gt;
2923      * |    &lt;body&gt;
2924      * |        &lt;p&gt;Hello, world&lt;/p&gt;
2925      * |    &lt;/body&gt;
2926      * |&lt;/html&gt;
2927      * </pre></blockquote>
2928      * First, the individual lines of this string are extracted as if by using
2929      * {@link String#lines()}.
2930      * <p>
2931      * Then, the <i>minimum indentation</i> (min) is determined as follows.
2932      * For each non-blank line (as defined by {@link String#isBlank()}), the
2933      * leading {@linkplain Character#isWhitespace(int) white space} characters are
2934      * counted. The leading {@linkplain Character#isWhitespace(int) white space}
2935      * characters on the last line are also counted even if
2936      * {@linkplain String#isBlank() blank}. The <i>min</i> value is the smallest
2937      * of these counts.
2938      * <p>
2939      * For each {@linkplain String#isBlank() non-blank} line, <i>min</i> leading
2940      * {@linkplain Character#isWhitespace(int) white space} characters are removed,
2941      * and any trailing {@linkplain Character#isWhitespace(int) white space}
2942      * characters are removed. {@linkplain String#isBlank() Blank} lines are
2943      * replaced with the empty string.
2944      *
2945      * <p>
2946      * Finally, the lines are joined into a new string, using the LF character
2947      * {@code "\n"} (U+000A) to separate lines.
2948      *
2949      * @apiNote
2950      * This method's primary purpose is to shift a block of lines as far as
2951      * possible to the left, while preserving relative indentation. Lines
2952      * that were indented the least will thus have no leading
2953      * {@linkplain Character#isWhitespace(int) white space}.
2954      * The line count of the result will be the same as line count of this
2955      * string.
2956      * If this string ends with a line terminator then the result will end
2957      * with a line terminator.
2958      *
2959      * @implNote
2960      * This method treats all {@linkplain Character#isWhitespace(int) white space}
2961      * characters as having equal width. As long as the indentation on every
2962      * line is consistently composed of the same character sequences, then the
2963      * result will be as described above.
2964      *
2965      * @return string with incidental indentation removed and line
2966      *         terminators normalized
2967      *
2968      * @see String#lines()
2969      * @see String#isBlank()
2970      * @see String#indent(int)
2971      * @see Character#isWhitespace(int)
2972      *
2973      * @since 13
2974      *
2975      */
2976     @jdk.internal.PreviewFeature(feature=jdk.internal.PreviewFeature.Feature.TEXT_BLOCKS,
2977                                  essentialAPI=true)
2978     public String stripIndent() {
2979         int length = length();
2980         if (length == 0) {
2981             return "";
2982         }
2983         char lastChar = charAt(length - 1);
2984         boolean optOut = lastChar == '\n' || lastChar == '\r';
2985         List<String> lines = lines().collect(Collectors.toList());
2986         final int outdent = optOut ? 0 : outdent(lines);
2987         return lines.stream()
2988             .map(line -> {
2989                 int firstNonWhitespace = line.indexOfNonWhitespace();
2990                 int lastNonWhitespace = line.lastIndexOfNonWhitespace();
2991                 int incidentalWhitespace = Math.min(outdent, firstNonWhitespace);
2992                 return firstNonWhitespace > lastNonWhitespace
2993                     ? "" : line.substring(incidentalWhitespace, lastNonWhitespace);
2994             })
2995             .collect(Collectors.joining("\n", "", optOut ? "\n" : ""));
2996     }
2997 
2998     private static int outdent(List<String> lines) {
2999         // Note: outdent is guaranteed to be zero or positive number.
3000         // If there isn't a non-blank line then the last must be blank
3001         int outdent = Integer.MAX_VALUE;
3002         for (String line : lines) {
3003             int leadingWhitespace = line.indexOfNonWhitespace();
3004             if (leadingWhitespace != line.length()) {
3005                 outdent = Integer.min(outdent, leadingWhitespace);
3006             }
3007         }
3008         String lastLine = lines.get(lines.size() - 1);
3009         if (lastLine.isBlank()) {
3010             outdent = Integer.min(outdent, lastLine.length());
3011         }
3012         return outdent;
3013     }
3014 
3015     /**
3016      * {@preview Associated with text blocks, a preview feature of
3017      *           the Java language.
3018      *
3019      *           This method is associated with <i>text blocks</i>, a preview
3020      *           feature of the Java language. Programs can only use this
3021      *           method when preview features are enabled. Preview features
3022      *           may be removed in a future release, or upgraded to permanent
3023      *           features of the Java language.}
3024      *
3025      * Returns a string whose value is this string, with escape sequences
3026      * translated as if in a string literal.
3027      * <p>
3028      * Escape sequences are translated as follows;
3029      * <table class="striped">
3030      *   <caption style="display:none">Translation</caption>
3031      *   <thead>
3032      *   <tr>
3033      *     <th scope="col">Escape</th>
3034      *     <th scope="col">Name</th>
3035      *     <th scope="col">Translation</th>
3036      *   </tr>
3037      *   </thead>
3038      *   <tbody>
3039      *   <tr>
3040      *     <th scope="row">{@code \u005Cb}</th>
3041      *     <td>backspace</td>
3042      *     <td>{@code U+0008}</td>
3043      *   </tr>
3044      *   <tr>
3045      *     <th scope="row">{@code \u005Ct}</th>
3046      *     <td>horizontal tab</td>
3047      *     <td>{@code U+0009}</td>
3048      *   </tr>
3049      *   <tr>
3050      *     <th scope="row">{@code \u005Cn}</th>
3051      *     <td>line feed</td>
3052      *     <td>{@code U+000A}</td>
3053      *   </tr>
3054      *   <tr>
3055      *     <th scope="row">{@code \u005Cf}</th>
3056      *     <td>form feed</td>
3057      *     <td>{@code U+000C}</td>
3058      *   </tr>
3059      *   <tr>
3060      *     <th scope="row">{@code \u005Cr}</th>
3061      *     <td>carriage return</td>
3062      *     <td>{@code U+000D}</td>
3063      *   </tr>
3064      *   <tr>
3065      *     <th scope="row">{@code \u005Cs}</th>
3066      *     <td>space</td>
3067      *     <td>{@code U+0020}</td>
3068      *   </tr>
3069      *   <tr>
3070      *     <th scope="row">{@code \u005C"}</th>
3071      *     <td>double quote</td>
3072      *     <td>{@code U+0022}</td>
3073      *   </tr>
3074      *   <tr>
3075      *     <th scope="row">{@code \u005C'}</th>
3076      *     <td>single quote</td>
3077      *     <td>{@code U+0027}</td>
3078      *   </tr>
3079      *   <tr>
3080      *     <th scope="row">{@code \u005C\u005C}</th>
3081      *     <td>backslash</td>
3082      *     <td>{@code U+005C}</td>
3083      *   </tr>
3084      *   <tr>
3085      *     <th scope="row">{@code \u005C0 - \u005C377}</th>
3086      *     <td>octal escape</td>
3087      *     <td>code point equivalents</td>
3088      *   </tr>
3089      *   <tr>
3090      *     <th scope="row">{@code \u005C<line-terminator>}</th>
3091      *     <td>continuation</td>
3092      *     <td>discard</td>
3093      *   </tr>
3094      *   </tbody>
3095      * </table>
3096      *
3097      * @implNote
3098      * This method does <em>not</em> translate Unicode escapes such as "{@code \u005cu2022}".
3099      * Unicode escapes are translated by the Java compiler when reading input characters and
3100      * are not part of the string literal specification.
3101      *
3102      * @throws IllegalArgumentException when an escape sequence is malformed.
3103      *
3104      * @return String with escape sequences translated.
3105      *
3106      * @jls 3.10.7 Escape Sequences
3107      *
3108      * @since 13
3109      */
3110     @jdk.internal.PreviewFeature(feature=jdk.internal.PreviewFeature.Feature.TEXT_BLOCKS,
3111                                  essentialAPI=true)
3112     public String translateEscapes() {
3113         if (isEmpty()) {
3114             return "";
3115         }
3116         char[] chars = toCharArray();
3117         int length = chars.length;
3118         int from = 0;
3119         int to = 0;
3120         while (from < length) {
3121             char ch = chars[from++];
3122             if (ch == '\\') {
3123                 ch = from < length ? chars[from++] : '\0';
3124                 switch (ch) {
3125                 case 'b':
3126                     ch = '\b';
3127                     break;
3128                 case 'f':
3129                     ch = '\f';
3130                     break;
3131                 case 'n':
3132                     ch = '\n';
3133                     break;
3134                 case 'r':
3135                     ch = '\r';
3136                     break;
3137                 case 's':
3138                     ch = ' ';
3139                     break;
3140                 case 't':
3141                     ch = '\t';
3142                     break;
3143                 case '\'':
3144                 case '\"':
3145                 case '\\':
3146                     // as is
3147                     break;
3148                 case '0': case '1': case '2': case '3':
3149                 case '4': case '5': case '6': case '7':
3150                     int limit = Integer.min(from + (ch <= '3' ? 2 : 1), length);
3151                     int code = ch - '0';
3152                     while (from < limit) {
3153                         ch = chars[from];
3154                         if (ch < '0' || '7' < ch) {
3155                             break;
3156                         }
3157                         from++;
3158                         code = (code << 3) | (ch - '0');
3159                     }
3160                     ch = (char)code;
3161                     break;
3162                 case '\n':
3163                     continue;
3164                 case '\r':
3165                     if (from < length && chars[from] == '\n') {
3166                         from++;
3167                     }
3168                     continue;
3169                 default: {
3170                     String msg = String.format(
3171                         "Invalid escape sequence: \\%c \\\\u%04X",
3172                         ch, (int)ch);
3173                     throw new IllegalArgumentException(msg);
3174                 }
3175                 }
3176             }
3177 
3178             chars[to++] = ch;
3179         }
3180 
3181         return new String(chars, 0, to);
3182     }
3183 
3184     /**
3185      * This method allows the application of a function to {@code this}
3186      * string. The function should expect a single String argument
3187      * and produce an {@code R} result.
3188      * <p>
3189      * Any exception thrown by {@code f()} will be propagated to the
3190      * caller.
3191      *
3192      * @param f    functional interface to a apply
3193      *
3194      * @param <R>  class of the result
3195      *
3196      * @return     the result of applying the function to this string
3197      *
3198      * @see java.util.function.Function
3199      *
3200      * @since 12
3201      */
3202     public <R> R transform(Function<? super String, ? extends R> f) {
3203         return f.apply(this);
3204     }
3205 
3206     /**
3207      * This object (which is already a string!) is itself returned.
3208      *
3209      * @return  the string itself.
3210      */
3211     public String toString() {
3212         return this;
3213     }
3214 
3215     /**
3216      * Returns a stream of {@code int} zero-extending the {@code char} values
3217      * from this sequence.  Any char which maps to a <a
3218      * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code
3219      * point</a> is passed through uninterpreted.
3220      *
3221      * @return an IntStream of char values from this sequence
3222      * @since 9
3223      */
3224     @Override
3225     public IntStream chars() {
3226         return StreamSupport.intStream(
3227             isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
3228                        : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE),
3229             false);
3230     }
3231 
3232 
3233     /**
3234      * Returns a stream of code point values from this sequence.  Any surrogate
3235      * pairs encountered in the sequence are combined as if by {@linkplain
3236      * Character#toCodePoint Character.toCodePoint} and the result is passed
3237      * to the stream. Any other code units, including ordinary BMP characters,
3238      * unpaired surrogates, and undefined code units, are zero-extended to
3239      * {@code int} values which are then passed to the stream.
3240      *
3241      * @return an IntStream of Unicode code points from this sequence
3242      * @since 9
3243      */
3244     @Override
3245     public IntStream codePoints() {
3246         return StreamSupport.intStream(
3247             isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
3248                        : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE),
3249             false);
3250     }
3251 
3252     /**
3253      * Converts this string to a new character array.
3254      *
3255      * @return  a newly allocated character array whose length is the length
3256      *          of this string and whose contents are initialized to contain
3257      *          the character sequence represented by this string.
3258      */
3259     public char[] toCharArray() {
3260         return isLatin1() ? StringLatin1.toChars(value)
3261                           : StringUTF16.toChars(value);
3262     }
3263 
3264     /**
3265      * Returns a formatted string using the specified format string and
3266      * arguments.
3267      *
3268      * <p> The locale always used is the one returned by {@link
3269      * java.util.Locale#getDefault(java.util.Locale.Category)
3270      * Locale.getDefault(Locale.Category)} with
3271      * {@link java.util.Locale.Category#FORMAT FORMAT} category specified.
3272      *
3273      * @param  format
3274      *         A <a href="../util/Formatter.html#syntax">format string</a>
3275      *
3276      * @param  args
3277      *         Arguments referenced by the format specifiers in the format
3278      *         string.  If there are more arguments than format specifiers, the
3279      *         extra arguments are ignored.  The number of arguments is
3280      *         variable and may be zero.  The maximum number of arguments is
3281      *         limited by the maximum dimension of a Java array as defined by
3282      *         <cite>The Java&trade; Virtual Machine Specification</cite>.
3283      *         The behaviour on a
3284      *         {@code null} argument depends on the <a
3285      *         href="../util/Formatter.html#syntax">conversion</a>.
3286      *
3287      * @throws  java.util.IllegalFormatException
3288      *          If a format string contains an illegal syntax, a format
3289      *          specifier that is incompatible with the given arguments,
3290      *          insufficient arguments given the format string, or other
3291      *          illegal conditions.  For specification of all possible
3292      *          formatting errors, see the <a
3293      *          href="../util/Formatter.html#detail">Details</a> section of the
3294      *          formatter class specification.
3295      *
3296      * @return  A formatted string
3297      *
3298      * @see  java.util.Formatter
3299      * @since  1.5
3300      */
3301     public static String format(String format, Object... args) {
3302         return new Formatter().format(format, args).toString();
3303     }
3304 
3305     /**
3306      * Returns a formatted string using the specified locale, format string,
3307      * and arguments.
3308      *
3309      * @param  l
3310      *         The {@linkplain java.util.Locale locale} to apply during
3311      *         formatting.  If {@code l} is {@code null} then no localization
3312      *         is applied.
3313      *
3314      * @param  format
3315      *         A <a href="../util/Formatter.html#syntax">format string</a>
3316      *
3317      * @param  args
3318      *         Arguments referenced by the format specifiers in the format
3319      *         string.  If there are more arguments than format specifiers, the
3320      *         extra arguments are ignored.  The number of arguments is
3321      *         variable and may be zero.  The maximum number of arguments is
3322      *         limited by the maximum dimension of a Java array as defined by
3323      *         <cite>The Java&trade; Virtual Machine Specification</cite>.
3324      *         The behaviour on a
3325      *         {@code null} argument depends on the
3326      *         <a href="../util/Formatter.html#syntax">conversion</a>.
3327      *
3328      * @throws  java.util.IllegalFormatException
3329      *          If a format string contains an illegal syntax, a format
3330      *          specifier that is incompatible with the given arguments,
3331      *          insufficient arguments given the format string, or other
3332      *          illegal conditions.  For specification of all possible
3333      *          formatting errors, see the <a
3334      *          href="../util/Formatter.html#detail">Details</a> section of the
3335      *          formatter class specification
3336      *
3337      * @return  A formatted string
3338      *
3339      * @see  java.util.Formatter
3340      * @since  1.5
3341      */
3342     public static String format(Locale l, String format, Object... args) {
3343         return new Formatter(l).format(format, args).toString();
3344     }
3345 
3346     /**
3347      * {@preview Associated with text blocks, a preview feature of
3348      *           the Java language.
3349      *
3350      *           This method is associated with <i>text blocks</i>, a preview
3351      *           feature of the Java language. Programs can only use this
3352      *           method when preview features are enabled. Preview features
3353      *           may be removed in a future release, or upgraded to permanent
3354      *           features of the Java language.}
3355      *
3356      * Formats using this string as the format string, and the supplied
3357      * arguments.
3358      *
3359      * @implSpec This method is equivalent to {@code String.format(this, args)}.
3360      *
3361      * @param  args
3362      *         Arguments referenced by the format specifiers in this string.
3363      *
3364      * @return  A formatted string
3365      *
3366      * @see  java.lang.String#format(String,Object...)
3367      * @see  java.util.Formatter
3368      *
3369      * @since 13
3370      *
3371      */
3372     @jdk.internal.PreviewFeature(feature=jdk.internal.PreviewFeature.Feature.TEXT_BLOCKS,
3373                                  essentialAPI=true)
3374     public String formatted(Object... args) {
3375         return new Formatter().format(this, args).toString();
3376     }
3377 
3378     /**
3379      * Returns the string representation of the {@code Object} argument.
3380      *
3381      * @param   obj   an {@code Object}.
3382      * @return  if the argument is {@code null}, then a string equal to
3383      *          {@code "null"}; otherwise, the value of
3384      *          {@code obj.toString()} is returned.
3385      * @see     java.lang.Object#toString()
3386      */
3387     public static String valueOf(Object obj) {
3388         return (obj == null) ? "null" : obj.toString();
3389     }
3390 
3391     /**
3392      * Returns the string representation of the {@code char} array
3393      * argument. The contents of the character array are copied; subsequent
3394      * modification of the character array does not affect the returned
3395      * string.
3396      *
3397      * @param   data     the character array.
3398      * @return  a {@code String} that contains the characters of the
3399      *          character array.
3400      */
3401     public static String valueOf(char data[]) {
3402         return new String(data);
3403     }
3404 
3405     /**
3406      * Returns the string representation of a specific subarray of the
3407      * {@code char} array argument.
3408      * <p>
3409      * The {@code offset} argument is the index of the first
3410      * character of the subarray. The {@code count} argument
3411      * specifies the length of the subarray. The contents of the subarray
3412      * are copied; subsequent modification of the character array does not
3413      * affect the returned string.
3414      *
3415      * @param   data     the character array.
3416      * @param   offset   initial offset of the subarray.
3417      * @param   count    length of the subarray.
3418      * @return  a {@code String} that contains the characters of the
3419      *          specified subarray of the character array.
3420      * @throws    IndexOutOfBoundsException if {@code offset} is
3421      *          negative, or {@code count} is negative, or
3422      *          {@code offset+count} is larger than
3423      *          {@code data.length}.
3424      */
3425     public static String valueOf(char data[], int offset, int count) {
3426         return new String(data, offset, count);
3427     }
3428 
3429     /**
3430      * Equivalent to {@link #valueOf(char[], int, int)}.
3431      *
3432      * @param   data     the character array.
3433      * @param   offset   initial offset of the subarray.
3434      * @param   count    length of the subarray.
3435      * @return  a {@code String} that contains the characters of the
3436      *          specified subarray of the character array.
3437      * @throws    IndexOutOfBoundsException if {@code offset} is
3438      *          negative, or {@code count} is negative, or
3439      *          {@code offset+count} is larger than
3440      *          {@code data.length}.
3441      */
3442     public static String copyValueOf(char data[], int offset, int count) {
3443         return new String(data, offset, count);
3444     }
3445 
3446     /**
3447      * Equivalent to {@link #valueOf(char[])}.
3448      *
3449      * @param   data   the character array.
3450      * @return  a {@code String} that contains the characters of the
3451      *          character array.
3452      */
3453     public static String copyValueOf(char data[]) {
3454         return new String(data);
3455     }
3456 
3457     /**
3458      * Returns the string representation of the {@code boolean} argument.
3459      *
3460      * @param   b   a {@code boolean}.
3461      * @return  if the argument is {@code true}, a string equal to
3462      *          {@code "true"} is returned; otherwise, a string equal to
3463      *          {@code "false"} is returned.
3464      */
3465     public static String valueOf(boolean b) {
3466         return b ? "true" : "false";
3467     }
3468 
3469     /**
3470      * Returns the string representation of the {@code char}
3471      * argument.
3472      *
3473      * @param   c   a {@code char}.
3474      * @return  a string of length {@code 1} containing
3475      *          as its single character the argument {@code c}.
3476      */
3477     public static String valueOf(char c) {
3478         if (COMPACT_STRINGS && StringLatin1.canEncode(c)) {
3479             return new String(StringLatin1.toBytes(c), LATIN1);
3480         }
3481         return new String(StringUTF16.toBytes(c), UTF16);
3482     }
3483 
3484     /**
3485      * Returns the string representation of the {@code int} argument.
3486      * <p>
3487      * The representation is exactly the one returned by the
3488      * {@code Integer.toString} method of one argument.
3489      *
3490      * @param   i   an {@code int}.
3491      * @return  a string representation of the {@code int} argument.
3492      * @see     java.lang.Integer#toString(int, int)
3493      */
3494     public static String valueOf(int i) {
3495         return Integer.toString(i);
3496     }
3497 
3498     /**
3499      * Returns the string representation of the {@code long} argument.
3500      * <p>
3501      * The representation is exactly the one returned by the
3502      * {@code Long.toString} method of one argument.
3503      *
3504      * @param   l   a {@code long}.
3505      * @return  a string representation of the {@code long} argument.
3506      * @see     java.lang.Long#toString(long)
3507      */
3508     public static String valueOf(long l) {
3509         return Long.toString(l);
3510     }
3511 
3512     /**
3513      * Returns the string representation of the {@code float} argument.
3514      * <p>
3515      * The representation is exactly the one returned by the
3516      * {@code Float.toString} method of one argument.
3517      *
3518      * @param   f   a {@code float}.
3519      * @return  a string representation of the {@code float} argument.
3520      * @see     java.lang.Float#toString(float)
3521      */
3522     public static String valueOf(float f) {
3523         return Float.toString(f);
3524     }
3525 
3526     /**
3527      * Returns the string representation of the {@code double} argument.
3528      * <p>
3529      * The representation is exactly the one returned by the
3530      * {@code Double.toString} method of one argument.
3531      *
3532      * @param   d   a {@code double}.
3533      * @return  a  string representation of the {@code double} argument.
3534      * @see     java.lang.Double#toString(double)
3535      */
3536     public static String valueOf(double d) {
3537         return Double.toString(d);
3538     }
3539 
3540     /**
3541      * Returns a canonical representation for the string object.
3542      * <p>
3543      * A pool of strings, initially empty, is maintained privately by the
3544      * class {@code String}.
3545      * <p>
3546      * When the intern method is invoked, if the pool already contains a
3547      * string equal to this {@code String} object as determined by
3548      * the {@link #equals(Object)} method, then the string from the pool is
3549      * returned. Otherwise, this {@code String} object is added to the
3550      * pool and a reference to this {@code String} object is returned.
3551      * <p>
3552      * It follows that for any two strings {@code s} and {@code t},
3553      * {@code s.intern() == t.intern()} is {@code true}
3554      * if and only if {@code s.equals(t)} is {@code true}.
3555      * <p>
3556      * All literal strings and string-valued constant expressions are
3557      * interned. String literals are defined in section 3.10.5 of the
3558      * <cite>The Java&trade; Language Specification</cite>.
3559      *
3560      * @return  a string that has the same contents as this string, but is
3561      *          guaranteed to be from a pool of unique strings.
3562      * @jls 3.10.5 String Literals
3563      */
3564     public native String intern();
3565 
3566     /**
3567      * Returns a string whose value is the concatenation of this
3568      * string repeated {@code count} times.
3569      * <p>
3570      * If this string is empty or count is zero then the empty
3571      * string is returned.
3572      *
3573      * @param   count number of times to repeat
3574      *
3575      * @return  A string composed of this string repeated
3576      *          {@code count} times or the empty string if this
3577      *          string is empty or count is zero
3578      *
3579      * @throws  IllegalArgumentException if the {@code count} is
3580      *          negative.
3581      *
3582      * @since 11
3583      */
3584     public String repeat(int count) {
3585         if (count < 0) {
3586             throw new IllegalArgumentException("count is negative: " + count);
3587         }
3588         if (count == 1) {
3589             return this;
3590         }
3591         final int len = value.length;
3592         if (len == 0 || count == 0) {
3593             return "";
3594         }
3595         if (len == 1) {
3596             final byte[] single = new byte[count];
3597             Arrays.fill(single, value[0]);
3598             return new String(single, coder);
3599         }
3600         if (Integer.MAX_VALUE / count < len) {
3601             throw new OutOfMemoryError("Repeating " + len + " bytes String " + count +
3602                     " times will produce a String exceeding maximum size.");
3603         }
3604         final int limit = len * count;
3605         final byte[] multiple = new byte[limit];
3606         System.arraycopy(value, 0, multiple, 0, len);
3607         int copied = len;
3608         for (; copied < limit - copied; copied <<= 1) {
3609             System.arraycopy(multiple, 0, multiple, copied, copied);
3610         }
3611         System.arraycopy(multiple, 0, multiple, copied, limit - copied);
3612         return new String(multiple, coder);
3613     }
3614 
3615     ////////////////////////////////////////////////////////////////
3616 
3617     /**
3618      * Copy character bytes from this string into dst starting at dstBegin.
3619      * This method doesn't perform any range checking.
3620      *
3621      * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
3622      * coders are different, and dst is big enough (range check)
3623      *
3624      * @param dstBegin  the char index, not offset of byte[]
3625      * @param coder     the coder of dst[]
3626      */
3627     void getBytes(byte dst[], int dstBegin, byte coder) {
3628         if (coder() == coder) {
3629             System.arraycopy(value, 0, dst, dstBegin << coder, value.length);
3630         } else {    // this.coder == LATIN && coder == UTF16
3631             StringLatin1.inflate(value, 0, dst, dstBegin, value.length);
3632         }
3633     }
3634 
3635     /*
3636      * Package private constructor. Trailing Void argument is there for
3637      * disambiguating it against other (public) constructors.
3638      *
3639      * Stores the char[] value into a byte[] that each byte represents
3640      * the8 low-order bits of the corresponding character, if the char[]
3641      * contains only latin1 character. Or a byte[] that stores all
3642      * characters in their byte sequences defined by the {@code StringUTF16}.
3643      */
3644     String(char[] value, int off, int len, Void sig) {
3645         if (len == 0) {
3646             this.value = "".value;
3647             this.coder = "".coder;
3648             return;
3649         }
3650         if (COMPACT_STRINGS) {
3651             byte[] val = StringUTF16.compress(value, off, len);
3652             if (val != null) {
3653                 this.value = val;
3654                 this.coder = LATIN1;
3655                 return;
3656             }
3657         }
3658         this.coder = UTF16;
3659         this.value = StringUTF16.toBytes(value, off, len);
3660     }
3661 
3662     /*
3663      * Package private constructor. Trailing Void argument is there for
3664      * disambiguating it against other (public) constructors.
3665      */
3666     String(AbstractStringBuilder asb, Void sig) {
3667         byte[] val = asb.getValue();
3668         int length = asb.length();
3669         if (asb.isLatin1()) {
3670             this.coder = LATIN1;
3671             this.value = Arrays.copyOfRange(val, 0, length);
3672         } else {
3673             if (COMPACT_STRINGS) {
3674                 byte[] buf = StringUTF16.compress(val, 0, length);
3675                 if (buf != null) {
3676                     this.coder = LATIN1;
3677                     this.value = buf;
3678                     return;
3679                 }
3680             }
3681             this.coder = UTF16;
3682             this.value = Arrays.copyOfRange(val, 0, length << 1);
3683         }
3684     }
3685 
3686    /*
3687     * Package private constructor which shares value array for speed.
3688     */
3689     String(byte[] value, byte coder) {
3690         this.value = value;
3691         this.coder = coder;
3692     }
3693 
3694     byte coder() {
3695         return COMPACT_STRINGS ? coder : UTF16;
3696     }
3697 
3698     byte[] value() {
3699         return value;
3700     }
3701 
3702     boolean isLatin1() {
3703         return COMPACT_STRINGS && coder == LATIN1;
3704     }
3705 
3706     @Native static final byte LATIN1 = 0;
3707     @Native static final byte UTF16  = 1;
3708 
3709     /*
3710      * StringIndexOutOfBoundsException  if {@code index} is
3711      * negative or greater than or equal to {@code length}.
3712      */
3713     static void checkIndex(int index, int length) {
3714         if (index < 0 || index >= length) {
3715             throw new StringIndexOutOfBoundsException("index " + index +
3716                                                       ", length " + length);
3717         }
3718     }
3719 
3720     /*
3721      * StringIndexOutOfBoundsException  if {@code offset}
3722      * is negative or greater than {@code length}.
3723      */
3724     static void checkOffset(int offset, int length) {
3725         if (offset < 0 || offset > length) {
3726             throw new StringIndexOutOfBoundsException("offset " + offset +
3727                                                       ", length " + length);
3728         }
3729     }
3730 
3731     /*
3732      * Check {@code offset}, {@code count} against {@code 0} and {@code length}
3733      * bounds.
3734      *
3735      * @throws  StringIndexOutOfBoundsException
3736      *          If {@code offset} is negative, {@code count} is negative,
3737      *          or {@code offset} is greater than {@code length - count}
3738      */
3739     static void checkBoundsOffCount(int offset, int count, int length) {
3740         if (offset < 0 || count < 0 || offset > length - count) {
3741             throw new StringIndexOutOfBoundsException(
3742                 "offset " + offset + ", count " + count + ", length " + length);
3743         }
3744     }
3745 
3746     /*
3747      * Check {@code begin}, {@code end} against {@code 0} and {@code length}
3748      * bounds.
3749      *
3750      * @throws  StringIndexOutOfBoundsException
3751      *          If {@code begin} is negative, {@code begin} is greater than
3752      *          {@code end}, or {@code end} is greater than {@code length}.
3753      */
3754     static void checkBoundsBeginEnd(int begin, int end, int length) {
3755         if (begin < 0 || begin > end || end > length) {
3756             throw new StringIndexOutOfBoundsException(
3757                 "begin " + begin + ", end " + end + ", length " + length);
3758         }
3759     }
3760 
3761     /**
3762      * Returns the string representation of the {@code codePoint}
3763      * argument.
3764      *
3765      * @param   codePoint a {@code codePoint}.
3766      * @return  a string of length {@code 1} or {@code 2} containing
3767      *          as its single character the argument {@code codePoint}.
3768      * @throws IllegalArgumentException if the specified
3769      *          {@code codePoint} is not a {@linkplain Character#isValidCodePoint
3770      *          valid Unicode code point}.
3771      */
3772     static String valueOfCodePoint(int codePoint) {
3773         if (COMPACT_STRINGS && StringLatin1.canEncode(codePoint)) {
3774             return new String(StringLatin1.toBytes((char)codePoint), LATIN1);
3775         } else if (Character.isBmpCodePoint(codePoint)) {
3776             return new String(StringUTF16.toBytes((char)codePoint), UTF16);
3777         } else if (Character.isSupplementaryCodePoint(codePoint)) {
3778             return new String(StringUTF16.toBytesSupplementary(codePoint), UTF16);
3779         }
3780 
3781         throw new IllegalArgumentException(
3782             format("Not a valid Unicode code point: 0x%X", codePoint));
3783     }
3784 
3785     /**
3786      * Returns an {@link Optional} containing the nominal descriptor for this
3787      * instance, which is the instance itself.
3788      *
3789      * @return an {@link Optional} describing the {@linkplain String} instance
3790      * @since 12
3791      */
3792     @Override
3793     public Optional<String> describeConstable() {
3794         return Optional.of(this);
3795     }
3796 
3797     /**
3798      * Resolves this instance as a {@link ConstantDesc}, the result of which is
3799      * the instance itself.
3800      *
3801      * @param lookup ignored
3802      * @return the {@linkplain String} instance
3803      * @since 12
3804      */
3805     @Override
3806     public String resolveConstantDesc(MethodHandles.Lookup lookup) {
3807         return this;
3808     }
3809 
3810 }