1 /* 2 * Copyright (c) 1994, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.ObjectStreamField; 29 import java.io.UnsupportedEncodingException; 30 import java.lang.annotation.Native; 31 import java.lang.invoke.MethodHandles; 32 import java.lang.constant.Constable; 33 import java.lang.constant.ConstantDesc; 34 import java.nio.charset.Charset; 35 import java.util.ArrayList; 36 import java.util.Arrays; 37 import java.util.Comparator; 38 import java.util.Formatter; 39 import java.util.List; 40 import java.util.Locale; 41 import java.util.Objects; 42 import java.util.Optional; 43 import java.util.Spliterator; 44 import java.util.StringJoiner; 45 import java.util.function.Function; 46 import java.util.regex.Matcher; 47 import java.util.regex.Pattern; 48 import java.util.regex.PatternSyntaxException; 49 import java.util.stream.Collectors; 50 import java.util.stream.IntStream; 51 import java.util.stream.Stream; 52 import java.util.stream.StreamSupport; 53 import jdk.internal.HotSpotIntrinsicCandidate; 54 import jdk.internal.vm.annotation.Stable; 55 56 import static java.util.function.Predicate.not; 57 58 /** 59 * The {@code String} class represents character strings. All 60 * string literals in Java programs, such as {@code "abc"}, are 61 * implemented as instances of this class. 62 * <p> 63 * Strings are constant; their values cannot be changed after they 64 * are created. String buffers support mutable strings. 65 * Because String objects are immutable they can be shared. For example: 66 * <blockquote><pre> 67 * String str = "abc"; 68 * </pre></blockquote><p> 69 * is equivalent to: 70 * <blockquote><pre> 71 * char data[] = {'a', 'b', 'c'}; 72 * String str = new String(data); 73 * </pre></blockquote><p> 74 * Here are some more examples of how strings can be used: 75 * <blockquote><pre> 76 * System.out.println("abc"); 77 * String cde = "cde"; 78 * System.out.println("abc" + cde); 79 * String c = "abc".substring(2, 3); 80 * String d = cde.substring(1, 2); 81 * </pre></blockquote> 82 * <p> 83 * The class {@code String} includes methods for examining 84 * individual characters of the sequence, for comparing strings, for 85 * searching strings, for extracting substrings, and for creating a 86 * copy of a string with all characters translated to uppercase or to 87 * lowercase. Case mapping is based on the Unicode Standard version 88 * specified by the {@link java.lang.Character Character} class. 89 * <p> 90 * The Java language provides special support for the string 91 * concatenation operator ( + ), and for conversion of 92 * other objects to strings. For additional information on string 93 * concatenation and conversion, see <i>The Java™ Language Specification</i>. 94 * 95 * <p> Unless otherwise noted, passing a {@code null} argument to a constructor 96 * or method in this class will cause a {@link NullPointerException} to be 97 * thrown. 98 * 99 * <p>A {@code String} represents a string in the UTF-16 format 100 * in which <em>supplementary characters</em> are represented by <em>surrogate 101 * pairs</em> (see the section <a href="Character.html#unicode">Unicode 102 * Character Representations</a> in the {@code Character} class for 103 * more information). 104 * Index values refer to {@code char} code units, so a supplementary 105 * character uses two positions in a {@code String}. 106 * <p>The {@code String} class provides methods for dealing with 107 * Unicode code points (i.e., characters), in addition to those for 108 * dealing with Unicode code units (i.e., {@code char} values). 109 * 110 * <p>Unless otherwise noted, methods for comparing Strings do not take locale 111 * into account. The {@link java.text.Collator} class provides methods for 112 * finer-grain, locale-sensitive String comparison. 113 * 114 * @implNote The implementation of the string concatenation operator is left to 115 * the discretion of a Java compiler, as long as the compiler ultimately conforms 116 * to <i>The Java™ Language Specification</i>. For example, the {@code javac} compiler 117 * may implement the operator with {@code StringBuffer}, {@code StringBuilder}, 118 * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The 119 * implementation of string conversion is typically through the method {@code toString}, 120 * defined by {@code Object} and inherited by all classes in Java. 121 * 122 * @author Lee Boynton 123 * @author Arthur van Hoff 124 * @author Martin Buchholz 125 * @author Ulf Zibis 126 * @see java.lang.Object#toString() 127 * @see java.lang.StringBuffer 128 * @see java.lang.StringBuilder 129 * @see java.nio.charset.Charset 130 * @since 1.0 131 * @jls 15.18.1 String Concatenation Operator + 132 */ 133 134 public final class String 135 implements java.io.Serializable, Comparable<String>, CharSequence, 136 Constable, ConstantDesc { 137 138 /** 139 * The value is used for character storage. 140 * 141 * @implNote This field is trusted by the VM, and is a subject to 142 * constant folding if String instance is constant. Overwriting this 143 * field after construction will cause problems. 144 * 145 * Additionally, it is marked with {@link Stable} to trust the contents 146 * of the array. No other facility in JDK provides this functionality (yet). 147 * {@link Stable} is safe here, because value is never null. 148 */ 149 @Stable 150 private final byte[] value; 151 152 /** 153 * The identifier of the encoding used to encode the bytes in 154 * {@code value}. The supported values in this implementation are 155 * 156 * LATIN1 157 * UTF16 158 * 159 * @implNote This field is trusted by the VM, and is a subject to 160 * constant folding if String instance is constant. Overwriting this 161 * field after construction will cause problems. 162 */ 163 private final byte coder; 164 165 /** Cache the hash code for the string */ 166 private int hash; // Default to 0 167 168 /** 169 * Cache if the hash has been calculated as actually being zero, enabling 170 * us to avoid recalculating this. 171 */ 172 private boolean hashIsZero; // Default to false; 173 174 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 175 private static final long serialVersionUID = -6849794470754667710L; 176 177 /** 178 * If String compaction is disabled, the bytes in {@code value} are 179 * always encoded in UTF16. 180 * 181 * For methods with several possible implementation paths, when String 182 * compaction is disabled, only one code path is taken. 183 * 184 * The instance field value is generally opaque to optimizing JIT 185 * compilers. Therefore, in performance-sensitive place, an explicit 186 * check of the static boolean {@code COMPACT_STRINGS} is done first 187 * before checking the {@code coder} field since the static boolean 188 * {@code COMPACT_STRINGS} would be constant folded away by an 189 * optimizing JIT compiler. The idioms for these cases are as follows. 190 * 191 * For code such as: 192 * 193 * if (coder == LATIN1) { ... } 194 * 195 * can be written more optimally as 196 * 197 * if (coder() == LATIN1) { ... } 198 * 199 * or: 200 * 201 * if (COMPACT_STRINGS && coder == LATIN1) { ... } 202 * 203 * An optimizing JIT compiler can fold the above conditional as: 204 * 205 * COMPACT_STRINGS == true => if (coder == LATIN1) { ... } 206 * COMPACT_STRINGS == false => if (false) { ... } 207 * 208 * @implNote 209 * The actual value for this field is injected by JVM. The static 210 * initialization block is used to set the value here to communicate 211 * that this static final field is not statically foldable, and to 212 * avoid any possible circular dependency during vm initialization. 213 */ 214 static final boolean COMPACT_STRINGS; 215 216 static { 217 COMPACT_STRINGS = true; 218 } 219 220 /** 221 * Class String is special cased within the Serialization Stream Protocol. 222 * 223 * A String instance is written into an ObjectOutputStream according to 224 * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements"> 225 * Object Serialization Specification, Section 6.2, "Stream Elements"</a> 226 */ 227 private static final ObjectStreamField[] serialPersistentFields = 228 new ObjectStreamField[0]; 229 230 /** 231 * Initializes a newly created {@code String} object so that it represents 232 * an empty character sequence. Note that use of this constructor is 233 * unnecessary since Strings are immutable. 234 */ 235 public String() { 236 this.value = "".value; 237 this.coder = "".coder; 238 } 239 240 /** 241 * Initializes a newly created {@code String} object so that it represents 242 * the same sequence of characters as the argument; in other words, the 243 * newly created string is a copy of the argument string. Unless an 244 * explicit copy of {@code original} is needed, use of this constructor is 245 * unnecessary since Strings are immutable. 246 * 247 * @param original 248 * A {@code String} 249 */ 250 @HotSpotIntrinsicCandidate 251 public String(String original) { 252 this.value = original.value; 253 this.coder = original.coder; 254 this.hash = original.hash; 255 } 256 257 /** 258 * Allocates a new {@code String} so that it represents the sequence of 259 * characters currently contained in the character array argument. The 260 * contents of the character array are copied; subsequent modification of 261 * the character array does not affect the newly created string. 262 * 263 * @param value 264 * The initial value of the string 265 */ 266 public String(char value[]) { 267 this(value, 0, value.length, null); 268 } 269 270 /** 271 * Allocates a new {@code String} that contains characters from a subarray 272 * of the character array argument. The {@code offset} argument is the 273 * index of the first character of the subarray and the {@code count} 274 * argument specifies the length of the subarray. The contents of the 275 * subarray are copied; subsequent modification of the character array does 276 * not affect the newly created string. 277 * 278 * @param value 279 * Array that is the source of characters 280 * 281 * @param offset 282 * The initial offset 283 * 284 * @param count 285 * The length 286 * 287 * @throws IndexOutOfBoundsException 288 * If {@code offset} is negative, {@code count} is negative, or 289 * {@code offset} is greater than {@code value.length - count} 290 */ 291 public String(char value[], int offset, int count) { 292 this(value, offset, count, rangeCheck(value, offset, count)); 293 } 294 295 private static Void rangeCheck(char[] value, int offset, int count) { 296 checkBoundsOffCount(offset, count, value.length); 297 return null; 298 } 299 300 /** 301 * Allocates a new {@code String} that contains characters from a subarray 302 * of the <a href="Character.html#unicode">Unicode code point</a> array 303 * argument. The {@code offset} argument is the index of the first code 304 * point of the subarray and the {@code count} argument specifies the 305 * length of the subarray. The contents of the subarray are converted to 306 * {@code char}s; subsequent modification of the {@code int} array does not 307 * affect the newly created string. 308 * 309 * @param codePoints 310 * Array that is the source of Unicode code points 311 * 312 * @param offset 313 * The initial offset 314 * 315 * @param count 316 * The length 317 * 318 * @throws IllegalArgumentException 319 * If any invalid Unicode code point is found in {@code 320 * codePoints} 321 * 322 * @throws IndexOutOfBoundsException 323 * If {@code offset} is negative, {@code count} is negative, or 324 * {@code offset} is greater than {@code codePoints.length - count} 325 * 326 * @since 1.5 327 */ 328 public String(int[] codePoints, int offset, int count) { 329 checkBoundsOffCount(offset, count, codePoints.length); 330 if (count == 0) { 331 this.value = "".value; 332 this.coder = "".coder; 333 return; 334 } 335 if (COMPACT_STRINGS) { 336 byte[] val = StringLatin1.toBytes(codePoints, offset, count); 337 if (val != null) { 338 this.coder = LATIN1; 339 this.value = val; 340 return; 341 } 342 } 343 this.coder = UTF16; 344 this.value = StringUTF16.toBytes(codePoints, offset, count); 345 } 346 347 /** 348 * Allocates a new {@code String} constructed from a subarray of an array 349 * of 8-bit integer values. 350 * 351 * <p> The {@code offset} argument is the index of the first byte of the 352 * subarray, and the {@code count} argument specifies the length of the 353 * subarray. 354 * 355 * <p> Each {@code byte} in the subarray is converted to a {@code char} as 356 * specified in the {@link #String(byte[],int) String(byte[],int)} constructor. 357 * 358 * @deprecated This method does not properly convert bytes into characters. 359 * As of JDK 1.1, the preferred way to do this is via the 360 * {@code String} constructors that take a {@link 361 * java.nio.charset.Charset}, charset name, or that use the platform's 362 * default charset. 363 * 364 * @param ascii 365 * The bytes to be converted to characters 366 * 367 * @param hibyte 368 * The top 8 bits of each 16-bit Unicode code unit 369 * 370 * @param offset 371 * The initial offset 372 * @param count 373 * The length 374 * 375 * @throws IndexOutOfBoundsException 376 * If {@code offset} is negative, {@code count} is negative, or 377 * {@code offset} is greater than {@code ascii.length - count} 378 * 379 * @see #String(byte[], int) 380 * @see #String(byte[], int, int, java.lang.String) 381 * @see #String(byte[], int, int, java.nio.charset.Charset) 382 * @see #String(byte[], int, int) 383 * @see #String(byte[], java.lang.String) 384 * @see #String(byte[], java.nio.charset.Charset) 385 * @see #String(byte[]) 386 */ 387 @Deprecated(since="1.1") 388 public String(byte ascii[], int hibyte, int offset, int count) { 389 checkBoundsOffCount(offset, count, ascii.length); 390 if (count == 0) { 391 this.value = "".value; 392 this.coder = "".coder; 393 return; 394 } 395 if (COMPACT_STRINGS && (byte)hibyte == 0) { 396 this.value = Arrays.copyOfRange(ascii, offset, offset + count); 397 this.coder = LATIN1; 398 } else { 399 hibyte <<= 8; 400 byte[] val = StringUTF16.newBytesFor(count); 401 for (int i = 0; i < count; i++) { 402 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff)); 403 } 404 this.value = val; 405 this.coder = UTF16; 406 } 407 } 408 409 /** 410 * Allocates a new {@code String} containing characters constructed from 411 * an array of 8-bit integer values. Each character <i>c</i> in the 412 * resulting string is constructed from the corresponding component 413 * <i>b</i> in the byte array such that: 414 * 415 * <blockquote><pre> 416 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8) 417 * | (<b><i>b</i></b> & 0xff)) 418 * </pre></blockquote> 419 * 420 * @deprecated This method does not properly convert bytes into 421 * characters. As of JDK 1.1, the preferred way to do this is via the 422 * {@code String} constructors that take a {@link 423 * java.nio.charset.Charset}, charset name, or that use the platform's 424 * default charset. 425 * 426 * @param ascii 427 * The bytes to be converted to characters 428 * 429 * @param hibyte 430 * The top 8 bits of each 16-bit Unicode code unit 431 * 432 * @see #String(byte[], int, int, java.lang.String) 433 * @see #String(byte[], int, int, java.nio.charset.Charset) 434 * @see #String(byte[], int, int) 435 * @see #String(byte[], java.lang.String) 436 * @see #String(byte[], java.nio.charset.Charset) 437 * @see #String(byte[]) 438 */ 439 @Deprecated(since="1.1") 440 public String(byte ascii[], int hibyte) { 441 this(ascii, hibyte, 0, ascii.length); 442 } 443 444 /** 445 * Constructs a new {@code String} by decoding the specified subarray of 446 * bytes using the specified charset. The length of the new {@code String} 447 * is a function of the charset, and hence may not be equal to the length 448 * of the subarray. 449 * 450 * <p> The behavior of this constructor when the given bytes are not valid 451 * in the given charset is unspecified. The {@link 452 * java.nio.charset.CharsetDecoder} class should be used when more control 453 * over the decoding process is required. 454 * 455 * @param bytes 456 * The bytes to be decoded into characters 457 * 458 * @param offset 459 * The index of the first byte to decode 460 * 461 * @param length 462 * The number of bytes to decode 463 464 * @param charsetName 465 * The name of a supported {@linkplain java.nio.charset.Charset 466 * charset} 467 * 468 * @throws UnsupportedEncodingException 469 * If the named charset is not supported 470 * 471 * @throws IndexOutOfBoundsException 472 * If {@code offset} is negative, {@code length} is negative, or 473 * {@code offset} is greater than {@code bytes.length - length} 474 * 475 * @since 1.1 476 */ 477 public String(byte bytes[], int offset, int length, String charsetName) 478 throws UnsupportedEncodingException { 479 if (charsetName == null) 480 throw new NullPointerException("charsetName"); 481 checkBoundsOffCount(offset, length, bytes.length); 482 StringCoding.Result ret = 483 StringCoding.decode(charsetName, bytes, offset, length); 484 this.value = ret.value; 485 this.coder = ret.coder; 486 } 487 488 /** 489 * Constructs a new {@code String} by decoding the specified subarray of 490 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 491 * The length of the new {@code String} is a function of the charset, and 492 * hence may not be equal to the length of the subarray. 493 * 494 * <p> This method always replaces malformed-input and unmappable-character 495 * sequences with this charset's default replacement string. The {@link 496 * java.nio.charset.CharsetDecoder} class should be used when more control 497 * over the decoding process is required. 498 * 499 * @param bytes 500 * The bytes to be decoded into characters 501 * 502 * @param offset 503 * The index of the first byte to decode 504 * 505 * @param length 506 * The number of bytes to decode 507 * 508 * @param charset 509 * The {@linkplain java.nio.charset.Charset charset} to be used to 510 * decode the {@code bytes} 511 * 512 * @throws IndexOutOfBoundsException 513 * If {@code offset} is negative, {@code length} is negative, or 514 * {@code offset} is greater than {@code bytes.length - length} 515 * 516 * @since 1.6 517 */ 518 public String(byte bytes[], int offset, int length, Charset charset) { 519 if (charset == null) 520 throw new NullPointerException("charset"); 521 checkBoundsOffCount(offset, length, bytes.length); 522 StringCoding.Result ret = 523 StringCoding.decode(charset, bytes, offset, length); 524 this.value = ret.value; 525 this.coder = ret.coder; 526 } 527 528 /** 529 * Constructs a new {@code String} by decoding the specified array of bytes 530 * using the specified {@linkplain java.nio.charset.Charset charset}. The 531 * length of the new {@code String} is a function of the charset, and hence 532 * may not be equal to the length of the byte array. 533 * 534 * <p> The behavior of this constructor when the given bytes are not valid 535 * in the given charset is unspecified. The {@link 536 * java.nio.charset.CharsetDecoder} class should be used when more control 537 * over the decoding process is required. 538 * 539 * @param bytes 540 * The bytes to be decoded into characters 541 * 542 * @param charsetName 543 * The name of a supported {@linkplain java.nio.charset.Charset 544 * charset} 545 * 546 * @throws UnsupportedEncodingException 547 * If the named charset is not supported 548 * 549 * @since 1.1 550 */ 551 public String(byte bytes[], String charsetName) 552 throws UnsupportedEncodingException { 553 this(bytes, 0, bytes.length, charsetName); 554 } 555 556 /** 557 * Constructs a new {@code String} by decoding the specified array of 558 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 559 * The length of the new {@code String} is a function of the charset, and 560 * hence may not be equal to the length of the byte array. 561 * 562 * <p> This method always replaces malformed-input and unmappable-character 563 * sequences with this charset's default replacement string. The {@link 564 * java.nio.charset.CharsetDecoder} class should be used when more control 565 * over the decoding process is required. 566 * 567 * @param bytes 568 * The bytes to be decoded into characters 569 * 570 * @param charset 571 * The {@linkplain java.nio.charset.Charset charset} to be used to 572 * decode the {@code bytes} 573 * 574 * @since 1.6 575 */ 576 public String(byte bytes[], Charset charset) { 577 this(bytes, 0, bytes.length, charset); 578 } 579 580 /** 581 * Constructs a new {@code String} by decoding the specified subarray of 582 * bytes using the platform's default charset. The length of the new 583 * {@code String} is a function of the charset, and hence may not be equal 584 * to the length of the subarray. 585 * 586 * <p> The behavior of this constructor when the given bytes are not valid 587 * in the default charset is unspecified. The {@link 588 * java.nio.charset.CharsetDecoder} class should be used when more control 589 * over the decoding process is required. 590 * 591 * @param bytes 592 * The bytes to be decoded into characters 593 * 594 * @param offset 595 * The index of the first byte to decode 596 * 597 * @param length 598 * The number of bytes to decode 599 * 600 * @throws IndexOutOfBoundsException 601 * If {@code offset} is negative, {@code length} is negative, or 602 * {@code offset} is greater than {@code bytes.length - length} 603 * 604 * @since 1.1 605 */ 606 public String(byte bytes[], int offset, int length) { 607 checkBoundsOffCount(offset, length, bytes.length); 608 StringCoding.Result ret = StringCoding.decode(bytes, offset, length); 609 this.value = ret.value; 610 this.coder = ret.coder; 611 } 612 613 /** 614 * Constructs a new {@code String} by decoding the specified array of bytes 615 * using the platform's default charset. The length of the new {@code 616 * String} is a function of the charset, and hence may not be equal to the 617 * length of the byte array. 618 * 619 * <p> The behavior of this constructor when the given bytes are not valid 620 * in the default charset is unspecified. The {@link 621 * java.nio.charset.CharsetDecoder} class should be used when more control 622 * over the decoding process is required. 623 * 624 * @param bytes 625 * The bytes to be decoded into characters 626 * 627 * @since 1.1 628 */ 629 public String(byte[] bytes) { 630 this(bytes, 0, bytes.length); 631 } 632 633 /** 634 * Allocates a new string that contains the sequence of characters 635 * currently contained in the string buffer argument. The contents of the 636 * string buffer are copied; subsequent modification of the string buffer 637 * does not affect the newly created string. 638 * 639 * @param buffer 640 * A {@code StringBuffer} 641 */ 642 public String(StringBuffer buffer) { 643 this(buffer.toString()); 644 } 645 646 /** 647 * Allocates a new string that contains the sequence of characters 648 * currently contained in the string builder argument. The contents of the 649 * string builder are copied; subsequent modification of the string builder 650 * does not affect the newly created string. 651 * 652 * <p> This constructor is provided to ease migration to {@code 653 * StringBuilder}. Obtaining a string from a string builder via the {@code 654 * toString} method is likely to run faster and is generally preferred. 655 * 656 * @param builder 657 * A {@code StringBuilder} 658 * 659 * @since 1.5 660 */ 661 public String(StringBuilder builder) { 662 this(builder, null); 663 } 664 665 /** 666 * Returns the length of this string. 667 * The length is equal to the number of <a href="Character.html#unicode">Unicode 668 * code units</a> in the string. 669 * 670 * @return the length of the sequence of characters represented by this 671 * object. 672 */ 673 public int length() { 674 return value.length >> coder(); 675 } 676 677 /** 678 * Returns {@code true} if, and only if, {@link #length()} is {@code 0}. 679 * 680 * @return {@code true} if {@link #length()} is {@code 0}, otherwise 681 * {@code false} 682 * 683 * @since 1.6 684 */ 685 public boolean isEmpty() { 686 return value.length == 0; 687 } 688 689 /** 690 * Returns the {@code char} value at the 691 * specified index. An index ranges from {@code 0} to 692 * {@code length() - 1}. The first {@code char} value of the sequence 693 * is at index {@code 0}, the next at index {@code 1}, 694 * and so on, as for array indexing. 695 * 696 * <p>If the {@code char} value specified by the index is a 697 * <a href="Character.html#unicode">surrogate</a>, the surrogate 698 * value is returned. 699 * 700 * @param index the index of the {@code char} value. 701 * @return the {@code char} value at the specified index of this string. 702 * The first {@code char} value is at index {@code 0}. 703 * @exception IndexOutOfBoundsException if the {@code index} 704 * argument is negative or not less than the length of this 705 * string. 706 */ 707 public char charAt(int index) { 708 if (isLatin1()) { 709 return StringLatin1.charAt(value, index); 710 } else { 711 return StringUTF16.charAt(value, index); 712 } 713 } 714 715 /** 716 * Returns the character (Unicode code point) at the specified 717 * index. The index refers to {@code char} values 718 * (Unicode code units) and ranges from {@code 0} to 719 * {@link #length()}{@code - 1}. 720 * 721 * <p> If the {@code char} value specified at the given index 722 * is in the high-surrogate range, the following index is less 723 * than the length of this {@code String}, and the 724 * {@code char} value at the following index is in the 725 * low-surrogate range, then the supplementary code point 726 * corresponding to this surrogate pair is returned. Otherwise, 727 * the {@code char} value at the given index is returned. 728 * 729 * @param index the index to the {@code char} values 730 * @return the code point value of the character at the 731 * {@code index} 732 * @exception IndexOutOfBoundsException if the {@code index} 733 * argument is negative or not less than the length of this 734 * string. 735 * @since 1.5 736 */ 737 public int codePointAt(int index) { 738 if (isLatin1()) { 739 checkIndex(index, value.length); 740 return value[index] & 0xff; 741 } 742 int length = value.length >> 1; 743 checkIndex(index, length); 744 return StringUTF16.codePointAt(value, index, length); 745 } 746 747 /** 748 * Returns the character (Unicode code point) before the specified 749 * index. The index refers to {@code char} values 750 * (Unicode code units) and ranges from {@code 1} to {@link 751 * CharSequence#length() length}. 752 * 753 * <p> If the {@code char} value at {@code (index - 1)} 754 * is in the low-surrogate range, {@code (index - 2)} is not 755 * negative, and the {@code char} value at {@code (index - 756 * 2)} is in the high-surrogate range, then the 757 * supplementary code point value of the surrogate pair is 758 * returned. If the {@code char} value at {@code index - 759 * 1} is an unpaired low-surrogate or a high-surrogate, the 760 * surrogate value is returned. 761 * 762 * @param index the index following the code point that should be returned 763 * @return the Unicode code point value before the given index. 764 * @exception IndexOutOfBoundsException if the {@code index} 765 * argument is less than 1 or greater than the length 766 * of this string. 767 * @since 1.5 768 */ 769 public int codePointBefore(int index) { 770 int i = index - 1; 771 if (i < 0 || i >= length()) { 772 throw new StringIndexOutOfBoundsException(index); 773 } 774 if (isLatin1()) { 775 return (value[i] & 0xff); 776 } 777 return StringUTF16.codePointBefore(value, index); 778 } 779 780 /** 781 * Returns the number of Unicode code points in the specified text 782 * range of this {@code String}. The text range begins at the 783 * specified {@code beginIndex} and extends to the 784 * {@code char} at index {@code endIndex - 1}. Thus the 785 * length (in {@code char}s) of the text range is 786 * {@code endIndex-beginIndex}. Unpaired surrogates within 787 * the text range count as one code point each. 788 * 789 * @param beginIndex the index to the first {@code char} of 790 * the text range. 791 * @param endIndex the index after the last {@code char} of 792 * the text range. 793 * @return the number of Unicode code points in the specified text 794 * range 795 * @exception IndexOutOfBoundsException if the 796 * {@code beginIndex} is negative, or {@code endIndex} 797 * is larger than the length of this {@code String}, or 798 * {@code beginIndex} is larger than {@code endIndex}. 799 * @since 1.5 800 */ 801 public int codePointCount(int beginIndex, int endIndex) { 802 if (beginIndex < 0 || beginIndex > endIndex || 803 endIndex > length()) { 804 throw new IndexOutOfBoundsException(); 805 } 806 if (isLatin1()) { 807 return endIndex - beginIndex; 808 } 809 return StringUTF16.codePointCount(value, beginIndex, endIndex); 810 } 811 812 /** 813 * Returns the index within this {@code String} that is 814 * offset from the given {@code index} by 815 * {@code codePointOffset} code points. Unpaired surrogates 816 * within the text range given by {@code index} and 817 * {@code codePointOffset} count as one code point each. 818 * 819 * @param index the index to be offset 820 * @param codePointOffset the offset in code points 821 * @return the index within this {@code String} 822 * @exception IndexOutOfBoundsException if {@code index} 823 * is negative or larger then the length of this 824 * {@code String}, or if {@code codePointOffset} is positive 825 * and the substring starting with {@code index} has fewer 826 * than {@code codePointOffset} code points, 827 * or if {@code codePointOffset} is negative and the substring 828 * before {@code index} has fewer than the absolute value 829 * of {@code codePointOffset} code points. 830 * @since 1.5 831 */ 832 public int offsetByCodePoints(int index, int codePointOffset) { 833 if (index < 0 || index > length()) { 834 throw new IndexOutOfBoundsException(); 835 } 836 return Character.offsetByCodePoints(this, index, codePointOffset); 837 } 838 839 /** 840 * Copies characters from this string into the destination character 841 * array. 842 * <p> 843 * The first character to be copied is at index {@code srcBegin}; 844 * the last character to be copied is at index {@code srcEnd-1} 845 * (thus the total number of characters to be copied is 846 * {@code srcEnd-srcBegin}). The characters are copied into the 847 * subarray of {@code dst} starting at index {@code dstBegin} 848 * and ending at index: 849 * <blockquote><pre> 850 * dstBegin + (srcEnd-srcBegin) - 1 851 * </pre></blockquote> 852 * 853 * @param srcBegin index of the first character in the string 854 * to copy. 855 * @param srcEnd index after the last character in the string 856 * to copy. 857 * @param dst the destination array. 858 * @param dstBegin the start offset in the destination array. 859 * @exception IndexOutOfBoundsException If any of the following 860 * is true: 861 * <ul><li>{@code srcBegin} is negative. 862 * <li>{@code srcBegin} is greater than {@code srcEnd} 863 * <li>{@code srcEnd} is greater than the length of this 864 * string 865 * <li>{@code dstBegin} is negative 866 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than 867 * {@code dst.length}</ul> 868 */ 869 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) { 870 checkBoundsBeginEnd(srcBegin, srcEnd, length()); 871 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); 872 if (isLatin1()) { 873 StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin); 874 } else { 875 StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin); 876 } 877 } 878 879 /** 880 * Copies characters from this string into the destination byte array. Each 881 * byte receives the 8 low-order bits of the corresponding character. The 882 * eight high-order bits of each character are not copied and do not 883 * participate in the transfer in any way. 884 * 885 * <p> The first character to be copied is at index {@code srcBegin}; the 886 * last character to be copied is at index {@code srcEnd-1}. The total 887 * number of characters to be copied is {@code srcEnd-srcBegin}. The 888 * characters, converted to bytes, are copied into the subarray of {@code 889 * dst} starting at index {@code dstBegin} and ending at index: 890 * 891 * <blockquote><pre> 892 * dstBegin + (srcEnd-srcBegin) - 1 893 * </pre></blockquote> 894 * 895 * @deprecated This method does not properly convert characters into 896 * bytes. As of JDK 1.1, the preferred way to do this is via the 897 * {@link #getBytes()} method, which uses the platform's default charset. 898 * 899 * @param srcBegin 900 * Index of the first character in the string to copy 901 * 902 * @param srcEnd 903 * Index after the last character in the string to copy 904 * 905 * @param dst 906 * The destination array 907 * 908 * @param dstBegin 909 * The start offset in the destination array 910 * 911 * @throws IndexOutOfBoundsException 912 * If any of the following is true: 913 * <ul> 914 * <li> {@code srcBegin} is negative 915 * <li> {@code srcBegin} is greater than {@code srcEnd} 916 * <li> {@code srcEnd} is greater than the length of this String 917 * <li> {@code dstBegin} is negative 918 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code 919 * dst.length} 920 * </ul> 921 */ 922 @Deprecated(since="1.1") 923 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) { 924 checkBoundsBeginEnd(srcBegin, srcEnd, length()); 925 Objects.requireNonNull(dst); 926 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); 927 if (isLatin1()) { 928 StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin); 929 } else { 930 StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin); 931 } 932 } 933 934 /** 935 * Encodes this {@code String} into a sequence of bytes using the named 936 * charset, storing the result into a new byte array. 937 * 938 * <p> The behavior of this method when this string cannot be encoded in 939 * the given charset is unspecified. The {@link 940 * java.nio.charset.CharsetEncoder} class should be used when more control 941 * over the encoding process is required. 942 * 943 * @param charsetName 944 * The name of a supported {@linkplain java.nio.charset.Charset 945 * charset} 946 * 947 * @return The resultant byte array 948 * 949 * @throws UnsupportedEncodingException 950 * If the named charset is not supported 951 * 952 * @since 1.1 953 */ 954 public byte[] getBytes(String charsetName) 955 throws UnsupportedEncodingException { 956 if (charsetName == null) throw new NullPointerException(); 957 return StringCoding.encode(charsetName, coder(), value); 958 } 959 960 /** 961 * Encodes this {@code String} into a sequence of bytes using the given 962 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 963 * new byte array. 964 * 965 * <p> This method always replaces malformed-input and unmappable-character 966 * sequences with this charset's default replacement byte array. The 967 * {@link java.nio.charset.CharsetEncoder} class should be used when more 968 * control over the encoding process is required. 969 * 970 * @param charset 971 * The {@linkplain java.nio.charset.Charset} to be used to encode 972 * the {@code String} 973 * 974 * @return The resultant byte array 975 * 976 * @since 1.6 977 */ 978 public byte[] getBytes(Charset charset) { 979 if (charset == null) throw new NullPointerException(); 980 return StringCoding.encode(charset, coder(), value); 981 } 982 983 /** 984 * Encodes this {@code String} into a sequence of bytes using the 985 * platform's default charset, storing the result into a new byte array. 986 * 987 * <p> The behavior of this method when this string cannot be encoded in 988 * the default charset is unspecified. The {@link 989 * java.nio.charset.CharsetEncoder} class should be used when more control 990 * over the encoding process is required. 991 * 992 * @return The resultant byte array 993 * 994 * @since 1.1 995 */ 996 public byte[] getBytes() { 997 return StringCoding.encode(coder(), value); 998 } 999 1000 /** 1001 * Compares this string to the specified object. The result is {@code 1002 * true} if and only if the argument is not {@code null} and is a {@code 1003 * String} object that represents the same sequence of characters as this 1004 * object. 1005 * 1006 * <p>For finer-grained String comparison, refer to 1007 * {@link java.text.Collator}. 1008 * 1009 * @param anObject 1010 * The object to compare this {@code String} against 1011 * 1012 * @return {@code true} if the given object represents a {@code String} 1013 * equivalent to this string, {@code false} otherwise 1014 * 1015 * @see #compareTo(String) 1016 * @see #equalsIgnoreCase(String) 1017 */ 1018 public boolean equals(Object anObject) { 1019 if (this == anObject) { 1020 return true; 1021 } 1022 if (anObject instanceof String) { 1023 String aString = (String)anObject; 1024 if (!COMPACT_STRINGS || this.coder == aString.coder) { 1025 return StringLatin1.equals(value, aString.value); 1026 } 1027 } 1028 return false; 1029 } 1030 1031 /** 1032 * Compares this string to the specified {@code StringBuffer}. The result 1033 * is {@code true} if and only if this {@code String} represents the same 1034 * sequence of characters as the specified {@code StringBuffer}. This method 1035 * synchronizes on the {@code StringBuffer}. 1036 * 1037 * <p>For finer-grained String comparison, refer to 1038 * {@link java.text.Collator}. 1039 * 1040 * @param sb 1041 * The {@code StringBuffer} to compare this {@code String} against 1042 * 1043 * @return {@code true} if this {@code String} represents the same 1044 * sequence of characters as the specified {@code StringBuffer}, 1045 * {@code false} otherwise 1046 * 1047 * @since 1.4 1048 */ 1049 public boolean contentEquals(StringBuffer sb) { 1050 return contentEquals((CharSequence)sb); 1051 } 1052 1053 private boolean nonSyncContentEquals(AbstractStringBuilder sb) { 1054 int len = length(); 1055 if (len != sb.length()) { 1056 return false; 1057 } 1058 byte v1[] = value; 1059 byte v2[] = sb.getValue(); 1060 byte coder = coder(); 1061 if (coder == sb.getCoder()) { 1062 int n = v1.length; 1063 for (int i = 0; i < n; i++) { 1064 if (v1[i] != v2[i]) { 1065 return false; 1066 } 1067 } 1068 } else { 1069 if (coder != LATIN1) { // utf16 str and latin1 abs can never be "equal" 1070 return false; 1071 } 1072 return StringUTF16.contentEquals(v1, v2, len); 1073 } 1074 return true; 1075 } 1076 1077 /** 1078 * Compares this string to the specified {@code CharSequence}. The 1079 * result is {@code true} if and only if this {@code String} represents the 1080 * same sequence of char values as the specified sequence. Note that if the 1081 * {@code CharSequence} is a {@code StringBuffer} then the method 1082 * synchronizes on it. 1083 * 1084 * <p>For finer-grained String comparison, refer to 1085 * {@link java.text.Collator}. 1086 * 1087 * @param cs 1088 * The sequence to compare this {@code String} against 1089 * 1090 * @return {@code true} if this {@code String} represents the same 1091 * sequence of char values as the specified sequence, {@code 1092 * false} otherwise 1093 * 1094 * @since 1.5 1095 */ 1096 public boolean contentEquals(CharSequence cs) { 1097 // Argument is a StringBuffer, StringBuilder 1098 if (cs instanceof AbstractStringBuilder) { 1099 if (cs instanceof StringBuffer) { 1100 synchronized(cs) { 1101 return nonSyncContentEquals((AbstractStringBuilder)cs); 1102 } 1103 } else { 1104 return nonSyncContentEquals((AbstractStringBuilder)cs); 1105 } 1106 } 1107 // Argument is a String 1108 if (cs instanceof String) { 1109 return equals(cs); 1110 } 1111 // Argument is a generic CharSequence 1112 int n = cs.length(); 1113 if (n != length()) { 1114 return false; 1115 } 1116 byte[] val = this.value; 1117 if (isLatin1()) { 1118 for (int i = 0; i < n; i++) { 1119 if ((val[i] & 0xff) != cs.charAt(i)) { 1120 return false; 1121 } 1122 } 1123 } else { 1124 if (!StringUTF16.contentEquals(val, cs, n)) { 1125 return false; 1126 } 1127 } 1128 return true; 1129 } 1130 1131 /** 1132 * Compares this {@code String} to another {@code String}, ignoring case 1133 * considerations. Two strings are considered equal ignoring case if they 1134 * are of the same length and corresponding characters in the two strings 1135 * are equal ignoring case. 1136 * 1137 * <p> Two characters {@code c1} and {@code c2} are considered the same 1138 * ignoring case if at least one of the following is true: 1139 * <ul> 1140 * <li> The two characters are the same (as compared by the 1141 * {@code ==} operator) 1142 * <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))} 1143 * on each character produces the same result 1144 * </ul> 1145 * 1146 * <p>Note that this method does <em>not</em> take locale into account, and 1147 * will result in unsatisfactory results for certain locales. The 1148 * {@link java.text.Collator} class provides locale-sensitive comparison. 1149 * 1150 * @param anotherString 1151 * The {@code String} to compare this {@code String} against 1152 * 1153 * @return {@code true} if the argument is not {@code null} and it 1154 * represents an equivalent {@code String} ignoring case; {@code 1155 * false} otherwise 1156 * 1157 * @see #equals(Object) 1158 */ 1159 public boolean equalsIgnoreCase(String anotherString) { 1160 return (this == anotherString) ? true 1161 : (anotherString != null) 1162 && (anotherString.length() == length()) 1163 && regionMatches(true, 0, anotherString, 0, length()); 1164 } 1165 1166 /** 1167 * Compares two strings lexicographically. 1168 * The comparison is based on the Unicode value of each character in 1169 * the strings. The character sequence represented by this 1170 * {@code String} object is compared lexicographically to the 1171 * character sequence represented by the argument string. The result is 1172 * a negative integer if this {@code String} object 1173 * lexicographically precedes the argument string. The result is a 1174 * positive integer if this {@code String} object lexicographically 1175 * follows the argument string. The result is zero if the strings 1176 * are equal; {@code compareTo} returns {@code 0} exactly when 1177 * the {@link #equals(Object)} method would return {@code true}. 1178 * <p> 1179 * This is the definition of lexicographic ordering. If two strings are 1180 * different, then either they have different characters at some index 1181 * that is a valid index for both strings, or their lengths are different, 1182 * or both. If they have different characters at one or more index 1183 * positions, let <i>k</i> be the smallest such index; then the string 1184 * whose character at position <i>k</i> has the smaller value, as 1185 * determined by using the {@code <} operator, lexicographically precedes the 1186 * other string. In this case, {@code compareTo} returns the 1187 * difference of the two character values at position {@code k} in 1188 * the two string -- that is, the value: 1189 * <blockquote><pre> 1190 * this.charAt(k)-anotherString.charAt(k) 1191 * </pre></blockquote> 1192 * If there is no index position at which they differ, then the shorter 1193 * string lexicographically precedes the longer string. In this case, 1194 * {@code compareTo} returns the difference of the lengths of the 1195 * strings -- that is, the value: 1196 * <blockquote><pre> 1197 * this.length()-anotherString.length() 1198 * </pre></blockquote> 1199 * 1200 * <p>For finer-grained String comparison, refer to 1201 * {@link java.text.Collator}. 1202 * 1203 * @param anotherString the {@code String} to be compared. 1204 * @return the value {@code 0} if the argument string is equal to 1205 * this string; a value less than {@code 0} if this string 1206 * is lexicographically less than the string argument; and a 1207 * value greater than {@code 0} if this string is 1208 * lexicographically greater than the string argument. 1209 */ 1210 public int compareTo(String anotherString) { 1211 byte v1[] = value; 1212 byte v2[] = anotherString.value; 1213 byte coder = coder(); 1214 if (coder == anotherString.coder()) { 1215 return coder == LATIN1 ? StringLatin1.compareTo(v1, v2) 1216 : StringUTF16.compareTo(v1, v2); 1217 } 1218 return coder == LATIN1 ? StringLatin1.compareToUTF16(v1, v2) 1219 : StringUTF16.compareToLatin1(v1, v2); 1220 } 1221 1222 /** 1223 * A Comparator that orders {@code String} objects as by 1224 * {@code compareToIgnoreCase}. This comparator is serializable. 1225 * <p> 1226 * Note that this Comparator does <em>not</em> take locale into account, 1227 * and will result in an unsatisfactory ordering for certain locales. 1228 * The {@link java.text.Collator} class provides locale-sensitive comparison. 1229 * 1230 * @see java.text.Collator 1231 * @since 1.2 1232 */ 1233 public static final Comparator<String> CASE_INSENSITIVE_ORDER 1234 = new CaseInsensitiveComparator(); 1235 private static class CaseInsensitiveComparator 1236 implements Comparator<String>, java.io.Serializable { 1237 // use serialVersionUID from JDK 1.2.2 for interoperability 1238 private static final long serialVersionUID = 8575799808933029326L; 1239 1240 public int compare(String s1, String s2) { 1241 byte v1[] = s1.value; 1242 byte v2[] = s2.value; 1243 byte coder = s1.coder(); 1244 if (coder == s2.coder()) { 1245 return coder == LATIN1 ? StringLatin1.compareToCI(v1, v2) 1246 : StringUTF16.compareToCI(v1, v2); 1247 } 1248 return coder == LATIN1 ? StringLatin1.compareToCI_UTF16(v1, v2) 1249 : StringUTF16.compareToCI_Latin1(v1, v2); 1250 } 1251 1252 /** Replaces the de-serialized object. */ 1253 private Object readResolve() { return CASE_INSENSITIVE_ORDER; } 1254 } 1255 1256 /** 1257 * Compares two strings lexicographically, ignoring case 1258 * differences. This method returns an integer whose sign is that of 1259 * calling {@code compareTo} with normalized versions of the strings 1260 * where case differences have been eliminated by calling 1261 * {@code Character.toLowerCase(Character.toUpperCase(character))} on 1262 * each character. 1263 * <p> 1264 * Note that this method does <em>not</em> take locale into account, 1265 * and will result in an unsatisfactory ordering for certain locales. 1266 * The {@link java.text.Collator} class provides locale-sensitive comparison. 1267 * 1268 * @param str the {@code String} to be compared. 1269 * @return a negative integer, zero, or a positive integer as the 1270 * specified String is greater than, equal to, or less 1271 * than this String, ignoring case considerations. 1272 * @see java.text.Collator 1273 * @since 1.2 1274 */ 1275 public int compareToIgnoreCase(String str) { 1276 return CASE_INSENSITIVE_ORDER.compare(this, str); 1277 } 1278 1279 /** 1280 * Tests if two string regions are equal. 1281 * <p> 1282 * A substring of this {@code String} object is compared to a substring 1283 * of the argument other. The result is true if these substrings 1284 * represent identical character sequences. The substring of this 1285 * {@code String} object to be compared begins at index {@code toffset} 1286 * and has length {@code len}. The substring of other to be compared 1287 * begins at index {@code ooffset} and has length {@code len}. The 1288 * result is {@code false} if and only if at least one of the following 1289 * is true: 1290 * <ul><li>{@code toffset} is negative. 1291 * <li>{@code ooffset} is negative. 1292 * <li>{@code toffset+len} is greater than the length of this 1293 * {@code String} object. 1294 * <li>{@code ooffset+len} is greater than the length of the other 1295 * argument. 1296 * <li>There is some nonnegative integer <i>k</i> less than {@code len} 1297 * such that: 1298 * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + } 1299 * <i>k</i>{@code )} 1300 * </ul> 1301 * 1302 * <p>Note that this method does <em>not</em> take locale into account. The 1303 * {@link java.text.Collator} class provides locale-sensitive comparison. 1304 * 1305 * @param toffset the starting offset of the subregion in this string. 1306 * @param other the string argument. 1307 * @param ooffset the starting offset of the subregion in the string 1308 * argument. 1309 * @param len the number of characters to compare. 1310 * @return {@code true} if the specified subregion of this string 1311 * exactly matches the specified subregion of the string argument; 1312 * {@code false} otherwise. 1313 */ 1314 public boolean regionMatches(int toffset, String other, int ooffset, int len) { 1315 byte tv[] = value; 1316 byte ov[] = other.value; 1317 // Note: toffset, ooffset, or len might be near -1>>>1. 1318 if ((ooffset < 0) || (toffset < 0) || 1319 (toffset > (long)length() - len) || 1320 (ooffset > (long)other.length() - len)) { 1321 return false; 1322 } 1323 byte coder = coder(); 1324 if (coder == other.coder()) { 1325 if (!isLatin1() && (len > 0)) { 1326 toffset = toffset << 1; 1327 ooffset = ooffset << 1; 1328 len = len << 1; 1329 } 1330 while (len-- > 0) { 1331 if (tv[toffset++] != ov[ooffset++]) { 1332 return false; 1333 } 1334 } 1335 } else { 1336 if (coder == LATIN1) { 1337 while (len-- > 0) { 1338 if (StringLatin1.getChar(tv, toffset++) != 1339 StringUTF16.getChar(ov, ooffset++)) { 1340 return false; 1341 } 1342 } 1343 } else { 1344 while (len-- > 0) { 1345 if (StringUTF16.getChar(tv, toffset++) != 1346 StringLatin1.getChar(ov, ooffset++)) { 1347 return false; 1348 } 1349 } 1350 } 1351 } 1352 return true; 1353 } 1354 1355 /** 1356 * Tests if two string regions are equal. 1357 * <p> 1358 * A substring of this {@code String} object is compared to a substring 1359 * of the argument {@code other}. The result is {@code true} if these 1360 * substrings represent character sequences that are the same, ignoring 1361 * case if and only if {@code ignoreCase} is true. The substring of 1362 * this {@code String} object to be compared begins at index 1363 * {@code toffset} and has length {@code len}. The substring of 1364 * {@code other} to be compared begins at index {@code ooffset} and 1365 * has length {@code len}. The result is {@code false} if and only if 1366 * at least one of the following is true: 1367 * <ul><li>{@code toffset} is negative. 1368 * <li>{@code ooffset} is negative. 1369 * <li>{@code toffset+len} is greater than the length of this 1370 * {@code String} object. 1371 * <li>{@code ooffset+len} is greater than the length of the other 1372 * argument. 1373 * <li>{@code ignoreCase} is {@code false} and there is some nonnegative 1374 * integer <i>k</i> less than {@code len} such that: 1375 * <blockquote><pre> 1376 * this.charAt(toffset+k) != other.charAt(ooffset+k) 1377 * </pre></blockquote> 1378 * <li>{@code ignoreCase} is {@code true} and there is some nonnegative 1379 * integer <i>k</i> less than {@code len} such that: 1380 * <blockquote><pre> 1381 * Character.toLowerCase(Character.toUpperCase(this.charAt(toffset+k))) != 1382 Character.toLowerCase(Character.toUpperCase(other.charAt(ooffset+k))) 1383 * </pre></blockquote> 1384 * </ul> 1385 * 1386 * <p>Note that this method does <em>not</em> take locale into account, 1387 * and will result in unsatisfactory results for certain locales when 1388 * {@code ignoreCase} is {@code true}. The {@link java.text.Collator} class 1389 * provides locale-sensitive comparison. 1390 * 1391 * @param ignoreCase if {@code true}, ignore case when comparing 1392 * characters. 1393 * @param toffset the starting offset of the subregion in this 1394 * string. 1395 * @param other the string argument. 1396 * @param ooffset the starting offset of the subregion in the string 1397 * argument. 1398 * @param len the number of characters to compare. 1399 * @return {@code true} if the specified subregion of this string 1400 * matches the specified subregion of the string argument; 1401 * {@code false} otherwise. Whether the matching is exact 1402 * or case insensitive depends on the {@code ignoreCase} 1403 * argument. 1404 */ 1405 public boolean regionMatches(boolean ignoreCase, int toffset, 1406 String other, int ooffset, int len) { 1407 if (!ignoreCase) { 1408 return regionMatches(toffset, other, ooffset, len); 1409 } 1410 // Note: toffset, ooffset, or len might be near -1>>>1. 1411 if ((ooffset < 0) || (toffset < 0) 1412 || (toffset > (long)length() - len) 1413 || (ooffset > (long)other.length() - len)) { 1414 return false; 1415 } 1416 byte tv[] = value; 1417 byte ov[] = other.value; 1418 byte coder = coder(); 1419 if (coder == other.coder()) { 1420 return coder == LATIN1 1421 ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len) 1422 : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len); 1423 } 1424 return coder == LATIN1 1425 ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len) 1426 : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len); 1427 } 1428 1429 /** 1430 * Tests if the substring of this string beginning at the 1431 * specified index starts with the specified prefix. 1432 * 1433 * @param prefix the prefix. 1434 * @param toffset where to begin looking in this string. 1435 * @return {@code true} if the character sequence represented by the 1436 * argument is a prefix of the substring of this object starting 1437 * at index {@code toffset}; {@code false} otherwise. 1438 * The result is {@code false} if {@code toffset} is 1439 * negative or greater than the length of this 1440 * {@code String} object; otherwise the result is the same 1441 * as the result of the expression 1442 * <pre> 1443 * this.substring(toffset).startsWith(prefix) 1444 * </pre> 1445 */ 1446 public boolean startsWith(String prefix, int toffset) { 1447 // Note: toffset might be near -1>>>1. 1448 if (toffset < 0 || toffset > length() - prefix.length()) { 1449 return false; 1450 } 1451 byte ta[] = value; 1452 byte pa[] = prefix.value; 1453 int po = 0; 1454 int pc = pa.length; 1455 byte coder = coder(); 1456 if (coder == prefix.coder()) { 1457 int to = (coder == LATIN1) ? toffset : toffset << 1; 1458 while (po < pc) { 1459 if (ta[to++] != pa[po++]) { 1460 return false; 1461 } 1462 } 1463 } else { 1464 if (coder == LATIN1) { // && pcoder == UTF16 1465 return false; 1466 } 1467 // coder == UTF16 && pcoder == LATIN1) 1468 while (po < pc) { 1469 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) { 1470 return false; 1471 } 1472 } 1473 } 1474 return true; 1475 } 1476 1477 /** 1478 * Tests if this string starts with the specified prefix. 1479 * 1480 * @param prefix the prefix. 1481 * @return {@code true} if the character sequence represented by the 1482 * argument is a prefix of the character sequence represented by 1483 * this string; {@code false} otherwise. 1484 * Note also that {@code true} will be returned if the 1485 * argument is an empty string or is equal to this 1486 * {@code String} object as determined by the 1487 * {@link #equals(Object)} method. 1488 * @since 1.0 1489 */ 1490 public boolean startsWith(String prefix) { 1491 return startsWith(prefix, 0); 1492 } 1493 1494 /** 1495 * Tests if this string ends with the specified suffix. 1496 * 1497 * @param suffix the suffix. 1498 * @return {@code true} if the character sequence represented by the 1499 * argument is a suffix of the character sequence represented by 1500 * this object; {@code false} otherwise. Note that the 1501 * result will be {@code true} if the argument is the 1502 * empty string or is equal to this {@code String} object 1503 * as determined by the {@link #equals(Object)} method. 1504 */ 1505 public boolean endsWith(String suffix) { 1506 return startsWith(suffix, length() - suffix.length()); 1507 } 1508 1509 /** 1510 * Returns a hash code for this string. The hash code for a 1511 * {@code String} object is computed as 1512 * <blockquote><pre> 1513 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] 1514 * </pre></blockquote> 1515 * using {@code int} arithmetic, where {@code s[i]} is the 1516 * <i>i</i>th character of the string, {@code n} is the length of 1517 * the string, and {@code ^} indicates exponentiation. 1518 * (The hash value of the empty string is zero.) 1519 * 1520 * @return a hash code value for this object. 1521 */ 1522 public int hashCode() { 1523 // The hash or hashIsZero fields are subject to a benign data race, 1524 // making it crucial to ensure that any observable result of the 1525 // calculation in this method stays correct under any possible read of 1526 // these fields. Necessary restrictions to allow this to be correct 1527 // without explicit memory fences or similar concurrency primitives is 1528 // that we can ever only write to one of these two fields for a given 1529 // String instance, and that the computation is idempotent and derived 1530 // from immutable state 1531 int h = hash; 1532 if (h == 0 && !hashIsZero) { 1533 h = isLatin1() ? StringLatin1.hashCode(value) 1534 : StringUTF16.hashCode(value); 1535 if (h == 0) { 1536 hashIsZero = true; 1537 } else { 1538 hash = h; 1539 } 1540 } 1541 return h; 1542 } 1543 1544 /** 1545 * Returns the index within this string of the first occurrence of 1546 * the specified character. If a character with value 1547 * {@code ch} occurs in the character sequence represented by 1548 * this {@code String} object, then the index (in Unicode 1549 * code units) of the first such occurrence is returned. For 1550 * values of {@code ch} in the range from 0 to 0xFFFF 1551 * (inclusive), this is the smallest value <i>k</i> such that: 1552 * <blockquote><pre> 1553 * this.charAt(<i>k</i>) == ch 1554 * </pre></blockquote> 1555 * is true. For other values of {@code ch}, it is the 1556 * smallest value <i>k</i> such that: 1557 * <blockquote><pre> 1558 * this.codePointAt(<i>k</i>) == ch 1559 * </pre></blockquote> 1560 * is true. In either case, if no such character occurs in this 1561 * string, then {@code -1} is returned. 1562 * 1563 * @param ch a character (Unicode code point). 1564 * @return the index of the first occurrence of the character in the 1565 * character sequence represented by this object, or 1566 * {@code -1} if the character does not occur. 1567 */ 1568 public int indexOf(int ch) { 1569 return indexOf(ch, 0); 1570 } 1571 1572 /** 1573 * Returns the index within this string of the first occurrence of the 1574 * specified character, starting the search at the specified index. 1575 * <p> 1576 * If a character with value {@code ch} occurs in the 1577 * character sequence represented by this {@code String} 1578 * object at an index no smaller than {@code fromIndex}, then 1579 * the index of the first such occurrence is returned. For values 1580 * of {@code ch} in the range from 0 to 0xFFFF (inclusive), 1581 * this is the smallest value <i>k</i> such that: 1582 * <blockquote><pre> 1583 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1584 * </pre></blockquote> 1585 * is true. For other values of {@code ch}, it is the 1586 * smallest value <i>k</i> such that: 1587 * <blockquote><pre> 1588 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1589 * </pre></blockquote> 1590 * is true. In either case, if no such character occurs in this 1591 * string at or after position {@code fromIndex}, then 1592 * {@code -1} is returned. 1593 * 1594 * <p> 1595 * There is no restriction on the value of {@code fromIndex}. If it 1596 * is negative, it has the same effect as if it were zero: this entire 1597 * string may be searched. If it is greater than the length of this 1598 * string, it has the same effect as if it were equal to the length of 1599 * this string: {@code -1} is returned. 1600 * 1601 * <p>All indices are specified in {@code char} values 1602 * (Unicode code units). 1603 * 1604 * @param ch a character (Unicode code point). 1605 * @param fromIndex the index to start the search from. 1606 * @return the index of the first occurrence of the character in the 1607 * character sequence represented by this object that is greater 1608 * than or equal to {@code fromIndex}, or {@code -1} 1609 * if the character does not occur. 1610 */ 1611 public int indexOf(int ch, int fromIndex) { 1612 return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex) 1613 : StringUTF16.indexOf(value, ch, fromIndex); 1614 } 1615 1616 /** 1617 * Returns the index within this string of the last occurrence of 1618 * the specified character. For values of {@code ch} in the 1619 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code 1620 * units) returned is the largest value <i>k</i> such that: 1621 * <blockquote><pre> 1622 * this.charAt(<i>k</i>) == ch 1623 * </pre></blockquote> 1624 * is true. For other values of {@code ch}, it is the 1625 * largest value <i>k</i> such that: 1626 * <blockquote><pre> 1627 * this.codePointAt(<i>k</i>) == ch 1628 * </pre></blockquote> 1629 * is true. In either case, if no such character occurs in this 1630 * string, then {@code -1} is returned. The 1631 * {@code String} is searched backwards starting at the last 1632 * character. 1633 * 1634 * @param ch a character (Unicode code point). 1635 * @return the index of the last occurrence of the character in the 1636 * character sequence represented by this object, or 1637 * {@code -1} if the character does not occur. 1638 */ 1639 public int lastIndexOf(int ch) { 1640 return lastIndexOf(ch, length() - 1); 1641 } 1642 1643 /** 1644 * Returns the index within this string of the last occurrence of 1645 * the specified character, searching backward starting at the 1646 * specified index. For values of {@code ch} in the range 1647 * from 0 to 0xFFFF (inclusive), the index returned is the largest 1648 * value <i>k</i> such that: 1649 * <blockquote><pre> 1650 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1651 * </pre></blockquote> 1652 * is true. For other values of {@code ch}, it is the 1653 * largest value <i>k</i> such that: 1654 * <blockquote><pre> 1655 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1656 * </pre></blockquote> 1657 * is true. In either case, if no such character occurs in this 1658 * string at or before position {@code fromIndex}, then 1659 * {@code -1} is returned. 1660 * 1661 * <p>All indices are specified in {@code char} values 1662 * (Unicode code units). 1663 * 1664 * @param ch a character (Unicode code point). 1665 * @param fromIndex the index to start the search from. There is no 1666 * restriction on the value of {@code fromIndex}. If it is 1667 * greater than or equal to the length of this string, it has 1668 * the same effect as if it were equal to one less than the 1669 * length of this string: this entire string may be searched. 1670 * If it is negative, it has the same effect as if it were -1: 1671 * -1 is returned. 1672 * @return the index of the last occurrence of the character in the 1673 * character sequence represented by this object that is less 1674 * than or equal to {@code fromIndex}, or {@code -1} 1675 * if the character does not occur before that point. 1676 */ 1677 public int lastIndexOf(int ch, int fromIndex) { 1678 return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex) 1679 : StringUTF16.lastIndexOf(value, ch, fromIndex); 1680 } 1681 1682 /** 1683 * Returns the index within this string of the first occurrence of the 1684 * specified substring. 1685 * 1686 * <p>The returned index is the smallest value {@code k} for which: 1687 * <pre>{@code 1688 * this.startsWith(str, k) 1689 * }</pre> 1690 * If no such value of {@code k} exists, then {@code -1} is returned. 1691 * 1692 * @param str the substring to search for. 1693 * @return the index of the first occurrence of the specified substring, 1694 * or {@code -1} if there is no such occurrence. 1695 */ 1696 public int indexOf(String str) { 1697 byte coder = coder(); 1698 if (coder == str.coder()) { 1699 return isLatin1() ? StringLatin1.indexOf(value, str.value) 1700 : StringUTF16.indexOf(value, str.value); 1701 } 1702 if (coder == LATIN1) { // str.coder == UTF16 1703 return -1; 1704 } 1705 return StringUTF16.indexOfLatin1(value, str.value); 1706 } 1707 1708 /** 1709 * Returns the index within this string of the first occurrence of the 1710 * specified substring, starting at the specified index. 1711 * 1712 * <p>The returned index is the smallest value {@code k} for which: 1713 * <pre>{@code 1714 * k >= Math.min(fromIndex, this.length()) && 1715 * this.startsWith(str, k) 1716 * }</pre> 1717 * If no such value of {@code k} exists, then {@code -1} is returned. 1718 * 1719 * @param str the substring to search for. 1720 * @param fromIndex the index from which to start the search. 1721 * @return the index of the first occurrence of the specified substring, 1722 * starting at the specified index, 1723 * or {@code -1} if there is no such occurrence. 1724 */ 1725 public int indexOf(String str, int fromIndex) { 1726 return indexOf(value, coder(), length(), str, fromIndex); 1727 } 1728 1729 /** 1730 * Code shared by String and AbstractStringBuilder to do searches. The 1731 * source is the character array being searched, and the target 1732 * is the string being searched for. 1733 * 1734 * @param src the characters being searched. 1735 * @param srcCoder the coder of the source string. 1736 * @param srcCount length of the source string. 1737 * @param tgtStr the characters being searched for. 1738 * @param fromIndex the index to begin searching from. 1739 */ 1740 static int indexOf(byte[] src, byte srcCoder, int srcCount, 1741 String tgtStr, int fromIndex) { 1742 byte[] tgt = tgtStr.value; 1743 byte tgtCoder = tgtStr.coder(); 1744 int tgtCount = tgtStr.length(); 1745 1746 if (fromIndex >= srcCount) { 1747 return (tgtCount == 0 ? srcCount : -1); 1748 } 1749 if (fromIndex < 0) { 1750 fromIndex = 0; 1751 } 1752 if (tgtCount == 0) { 1753 return fromIndex; 1754 } 1755 if (tgtCount > srcCount) { 1756 return -1; 1757 } 1758 if (srcCoder == tgtCoder) { 1759 return srcCoder == LATIN1 1760 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex) 1761 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex); 1762 } 1763 if (srcCoder == LATIN1) { // && tgtCoder == UTF16 1764 return -1; 1765 } 1766 // srcCoder == UTF16 && tgtCoder == LATIN1) { 1767 return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex); 1768 } 1769 1770 /** 1771 * Returns the index within this string of the last occurrence of the 1772 * specified substring. The last occurrence of the empty string "" 1773 * is considered to occur at the index value {@code this.length()}. 1774 * 1775 * <p>The returned index is the largest value {@code k} for which: 1776 * <pre>{@code 1777 * this.startsWith(str, k) 1778 * }</pre> 1779 * If no such value of {@code k} exists, then {@code -1} is returned. 1780 * 1781 * @param str the substring to search for. 1782 * @return the index of the last occurrence of the specified substring, 1783 * or {@code -1} if there is no such occurrence. 1784 */ 1785 public int lastIndexOf(String str) { 1786 return lastIndexOf(str, length()); 1787 } 1788 1789 /** 1790 * Returns the index within this string of the last occurrence of the 1791 * specified substring, searching backward starting at the specified index. 1792 * 1793 * <p>The returned index is the largest value {@code k} for which: 1794 * <pre>{@code 1795 * k <= Math.min(fromIndex, this.length()) && 1796 * this.startsWith(str, k) 1797 * }</pre> 1798 * If no such value of {@code k} exists, then {@code -1} is returned. 1799 * 1800 * @param str the substring to search for. 1801 * @param fromIndex the index to start the search from. 1802 * @return the index of the last occurrence of the specified substring, 1803 * searching backward from the specified index, 1804 * or {@code -1} if there is no such occurrence. 1805 */ 1806 public int lastIndexOf(String str, int fromIndex) { 1807 return lastIndexOf(value, coder(), length(), str, fromIndex); 1808 } 1809 1810 /** 1811 * Code shared by String and AbstractStringBuilder to do searches. The 1812 * source is the character array being searched, and the target 1813 * is the string being searched for. 1814 * 1815 * @param src the characters being searched. 1816 * @param srcCoder coder handles the mapping between bytes/chars 1817 * @param srcCount count of the source string. 1818 * @param tgt the characters being searched for. 1819 * @param fromIndex the index to begin searching from. 1820 */ 1821 static int lastIndexOf(byte[] src, byte srcCoder, int srcCount, 1822 String tgtStr, int fromIndex) { 1823 byte[] tgt = tgtStr.value; 1824 byte tgtCoder = tgtStr.coder(); 1825 int tgtCount = tgtStr.length(); 1826 /* 1827 * Check arguments; return immediately where possible. For 1828 * consistency, don't check for null str. 1829 */ 1830 int rightIndex = srcCount - tgtCount; 1831 if (fromIndex > rightIndex) { 1832 fromIndex = rightIndex; 1833 } 1834 if (fromIndex < 0) { 1835 return -1; 1836 } 1837 /* Empty string always matches. */ 1838 if (tgtCount == 0) { 1839 return fromIndex; 1840 } 1841 if (srcCoder == tgtCoder) { 1842 return srcCoder == LATIN1 1843 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex) 1844 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex); 1845 } 1846 if (srcCoder == LATIN1) { // && tgtCoder == UTF16 1847 return -1; 1848 } 1849 // srcCoder == UTF16 && tgtCoder == LATIN1 1850 return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex); 1851 } 1852 1853 /** 1854 * Returns a string that is a substring of this string. The 1855 * substring begins with the character at the specified index and 1856 * extends to the end of this string. <p> 1857 * Examples: 1858 * <blockquote><pre> 1859 * "unhappy".substring(2) returns "happy" 1860 * "Harbison".substring(3) returns "bison" 1861 * "emptiness".substring(9) returns "" (an empty string) 1862 * </pre></blockquote> 1863 * 1864 * @param beginIndex the beginning index, inclusive. 1865 * @return the specified substring. 1866 * @exception IndexOutOfBoundsException if 1867 * {@code beginIndex} is negative or larger than the 1868 * length of this {@code String} object. 1869 */ 1870 public String substring(int beginIndex) { 1871 int subLen = length() - beginIndex; 1872 if (beginIndex <= 0 || subLen <= 0) { 1873 if (beginIndex == 0) { 1874 return this; 1875 } 1876 if (subLen == 0) { 1877 return ""; 1878 } 1879 throw new StringIndexOutOfBoundsException( 1880 "begin " + beginIndex + ", length " + length()); 1881 } 1882 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen) 1883 : StringUTF16.newString(value, beginIndex, subLen); 1884 } 1885 1886 /** 1887 * Returns a string that is a substring of this string. The 1888 * substring begins at the specified {@code beginIndex} and 1889 * extends to the character at index {@code endIndex - 1}. 1890 * Thus the length of the substring is {@code endIndex-beginIndex}. 1891 * <p> 1892 * Examples: 1893 * <blockquote><pre> 1894 * "hamburger".substring(4, 8) returns "urge" 1895 * "smiles".substring(1, 5) returns "mile" 1896 * </pre></blockquote> 1897 * 1898 * @param beginIndex the beginning index, inclusive. 1899 * @param endIndex the ending index, exclusive. 1900 * @return the specified substring. 1901 * @exception IndexOutOfBoundsException if the 1902 * {@code beginIndex} is negative, or 1903 * {@code endIndex} is larger than the length of 1904 * this {@code String} object, or 1905 * {@code beginIndex} is larger than 1906 * {@code endIndex}. 1907 */ 1908 public String substring(int beginIndex, int endIndex) { 1909 int length = length(); 1910 checkBoundsBeginEnd(beginIndex, endIndex, length); 1911 int subLen = endIndex - beginIndex; 1912 if (beginIndex == 0 && endIndex == length) { 1913 return this; 1914 } 1915 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen) 1916 : StringUTF16.newString(value, beginIndex, subLen); 1917 } 1918 1919 /** 1920 * Returns a character sequence that is a subsequence of this sequence. 1921 * 1922 * <p> An invocation of this method of the form 1923 * 1924 * <blockquote><pre> 1925 * str.subSequence(begin, end)</pre></blockquote> 1926 * 1927 * behaves in exactly the same way as the invocation 1928 * 1929 * <blockquote><pre> 1930 * str.substring(begin, end)</pre></blockquote> 1931 * 1932 * @apiNote 1933 * This method is defined so that the {@code String} class can implement 1934 * the {@link CharSequence} interface. 1935 * 1936 * @param beginIndex the begin index, inclusive. 1937 * @param endIndex the end index, exclusive. 1938 * @return the specified subsequence. 1939 * 1940 * @throws IndexOutOfBoundsException 1941 * if {@code beginIndex} or {@code endIndex} is negative, 1942 * if {@code endIndex} is greater than {@code length()}, 1943 * or if {@code beginIndex} is greater than {@code endIndex} 1944 * 1945 * @since 1.4 1946 * @spec JSR-51 1947 */ 1948 public CharSequence subSequence(int beginIndex, int endIndex) { 1949 return this.substring(beginIndex, endIndex); 1950 } 1951 1952 /** 1953 * Concatenates the specified string to the end of this string. 1954 * <p> 1955 * If the length of the argument string is {@code 0}, then this 1956 * {@code String} object is returned. Otherwise, a 1957 * {@code String} object is returned that represents a character 1958 * sequence that is the concatenation of the character sequence 1959 * represented by this {@code String} object and the character 1960 * sequence represented by the argument string.<p> 1961 * Examples: 1962 * <blockquote><pre> 1963 * "cares".concat("s") returns "caress" 1964 * "to".concat("get").concat("her") returns "together" 1965 * </pre></blockquote> 1966 * 1967 * @param str the {@code String} that is concatenated to the end 1968 * of this {@code String}. 1969 * @return a string that represents the concatenation of this object's 1970 * characters followed by the string argument's characters. 1971 */ 1972 public String concat(String str) { 1973 if (str.isEmpty()) { 1974 return this; 1975 } 1976 return StringConcatHelper.simpleConcat(this, str); 1977 } 1978 1979 /** 1980 * Returns a string resulting from replacing all occurrences of 1981 * {@code oldChar} in this string with {@code newChar}. 1982 * <p> 1983 * If the character {@code oldChar} does not occur in the 1984 * character sequence represented by this {@code String} object, 1985 * then a reference to this {@code String} object is returned. 1986 * Otherwise, a {@code String} object is returned that 1987 * represents a character sequence identical to the character sequence 1988 * represented by this {@code String} object, except that every 1989 * occurrence of {@code oldChar} is replaced by an occurrence 1990 * of {@code newChar}. 1991 * <p> 1992 * Examples: 1993 * <blockquote><pre> 1994 * "mesquite in your cellar".replace('e', 'o') 1995 * returns "mosquito in your collar" 1996 * "the war of baronets".replace('r', 'y') 1997 * returns "the way of bayonets" 1998 * "sparring with a purple porpoise".replace('p', 't') 1999 * returns "starring with a turtle tortoise" 2000 * "JonL".replace('q', 'x') returns "JonL" (no change) 2001 * </pre></blockquote> 2002 * 2003 * @param oldChar the old character. 2004 * @param newChar the new character. 2005 * @return a string derived from this string by replacing every 2006 * occurrence of {@code oldChar} with {@code newChar}. 2007 */ 2008 public String replace(char oldChar, char newChar) { 2009 if (oldChar != newChar) { 2010 String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar) 2011 : StringUTF16.replace(value, oldChar, newChar); 2012 if (ret != null) { 2013 return ret; 2014 } 2015 } 2016 return this; 2017 } 2018 2019 /** 2020 * Tells whether or not this string matches the given <a 2021 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2022 * 2023 * <p> An invocation of this method of the form 2024 * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the 2025 * same result as the expression 2026 * 2027 * <blockquote> 2028 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence) 2029 * matches(<i>regex</i>, <i>str</i>)} 2030 * </blockquote> 2031 * 2032 * @param regex 2033 * the regular expression to which this string is to be matched 2034 * 2035 * @return {@code true} if, and only if, this string matches the 2036 * given regular expression 2037 * 2038 * @throws PatternSyntaxException 2039 * if the regular expression's syntax is invalid 2040 * 2041 * @see java.util.regex.Pattern 2042 * 2043 * @since 1.4 2044 * @spec JSR-51 2045 */ 2046 public boolean matches(String regex) { 2047 return Pattern.matches(regex, this); 2048 } 2049 2050 /** 2051 * Returns true if and only if this string contains the specified 2052 * sequence of char values. 2053 * 2054 * @param s the sequence to search for 2055 * @return true if this string contains {@code s}, false otherwise 2056 * @since 1.5 2057 */ 2058 public boolean contains(CharSequence s) { 2059 return indexOf(s.toString()) >= 0; 2060 } 2061 2062 /** 2063 * Replaces the first substring of this string that matches the given <a 2064 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2065 * given replacement. 2066 * 2067 * <p> An invocation of this method of the form 2068 * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2069 * yields exactly the same result as the expression 2070 * 2071 * <blockquote> 2072 * <code> 2073 * {@link java.util.regex.Pattern}.{@link 2074 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2075 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2076 * java.util.regex.Matcher#replaceFirst replaceFirst}(<i>repl</i>) 2077 * </code> 2078 * </blockquote> 2079 * 2080 *<p> 2081 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2082 * replacement string may cause the results to be different than if it were 2083 * being treated as a literal replacement string; see 2084 * {@link java.util.regex.Matcher#replaceFirst}. 2085 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2086 * meaning of these characters, if desired. 2087 * 2088 * @param regex 2089 * the regular expression to which this string is to be matched 2090 * @param replacement 2091 * the string to be substituted for the first match 2092 * 2093 * @return The resulting {@code String} 2094 * 2095 * @throws PatternSyntaxException 2096 * if the regular expression's syntax is invalid 2097 * 2098 * @see java.util.regex.Pattern 2099 * 2100 * @since 1.4 2101 * @spec JSR-51 2102 */ 2103 public String replaceFirst(String regex, String replacement) { 2104 return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 2105 } 2106 2107 /** 2108 * Replaces each substring of this string that matches the given <a 2109 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2110 * given replacement. 2111 * 2112 * <p> An invocation of this method of the form 2113 * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2114 * yields exactly the same result as the expression 2115 * 2116 * <blockquote> 2117 * <code> 2118 * {@link java.util.regex.Pattern}.{@link 2119 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2120 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2121 * java.util.regex.Matcher#replaceAll replaceAll}(<i>repl</i>) 2122 * </code> 2123 * </blockquote> 2124 * 2125 *<p> 2126 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2127 * replacement string may cause the results to be different than if it were 2128 * being treated as a literal replacement string; see 2129 * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}. 2130 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2131 * meaning of these characters, if desired. 2132 * 2133 * @param regex 2134 * the regular expression to which this string is to be matched 2135 * @param replacement 2136 * the string to be substituted for each match 2137 * 2138 * @return The resulting {@code String} 2139 * 2140 * @throws PatternSyntaxException 2141 * if the regular expression's syntax is invalid 2142 * 2143 * @see java.util.regex.Pattern 2144 * 2145 * @since 1.4 2146 * @spec JSR-51 2147 */ 2148 public String replaceAll(String regex, String replacement) { 2149 return Pattern.compile(regex).matcher(this).replaceAll(replacement); 2150 } 2151 2152 /** 2153 * Replaces each substring of this string that matches the literal target 2154 * sequence with the specified literal replacement sequence. The 2155 * replacement proceeds from the beginning of the string to the end, for 2156 * example, replacing "aa" with "b" in the string "aaa" will result in 2157 * "ba" rather than "ab". 2158 * 2159 * @param target The sequence of char values to be replaced 2160 * @param replacement The replacement sequence of char values 2161 * @return The resulting string 2162 * @since 1.5 2163 */ 2164 public String replace(CharSequence target, CharSequence replacement) { 2165 String trgtStr = target.toString(); 2166 String replStr = replacement.toString(); 2167 int thisLen = length(); 2168 int trgtLen = trgtStr.length(); 2169 int replLen = replStr.length(); 2170 2171 if (trgtLen > 0) { 2172 if (trgtLen == 1 && replLen == 1) { 2173 return replace(trgtStr.charAt(0), replStr.charAt(0)); 2174 } 2175 2176 boolean thisIsLatin1 = this.isLatin1(); 2177 boolean trgtIsLatin1 = trgtStr.isLatin1(); 2178 boolean replIsLatin1 = replStr.isLatin1(); 2179 String ret = (thisIsLatin1 && trgtIsLatin1 && replIsLatin1) 2180 ? StringLatin1.replace(value, thisLen, 2181 trgtStr.value, trgtLen, 2182 replStr.value, replLen) 2183 : StringUTF16.replace(value, thisLen, thisIsLatin1, 2184 trgtStr.value, trgtLen, trgtIsLatin1, 2185 replStr.value, replLen, replIsLatin1); 2186 if (ret != null) { 2187 return ret; 2188 } 2189 return this; 2190 2191 } else { // trgtLen == 0 2192 int resultLen; 2193 try { 2194 resultLen = Math.addExact(thisLen, Math.multiplyExact( 2195 Math.addExact(thisLen, 1), replLen)); 2196 } catch (ArithmeticException ignored) { 2197 throw new OutOfMemoryError(); 2198 } 2199 2200 StringBuilder sb = new StringBuilder(resultLen); 2201 sb.append(replStr); 2202 for (int i = 0; i < thisLen; ++i) { 2203 sb.append(charAt(i)).append(replStr); 2204 } 2205 return sb.toString(); 2206 } 2207 } 2208 2209 /** 2210 * Splits this string around matches of the given 2211 * <a href="../util/regex/Pattern.html#sum">regular expression</a>. 2212 * 2213 * <p> The array returned by this method contains each substring of this 2214 * string that is terminated by another substring that matches the given 2215 * expression or is terminated by the end of the string. The substrings in 2216 * the array are in the order in which they occur in this string. If the 2217 * expression does not match any part of the input then the resulting array 2218 * has just one element, namely this string. 2219 * 2220 * <p> When there is a positive-width match at the beginning of this 2221 * string then an empty leading substring is included at the beginning 2222 * of the resulting array. A zero-width match at the beginning however 2223 * never produces such empty leading substring. 2224 * 2225 * <p> The {@code limit} parameter controls the number of times the 2226 * pattern is applied and therefore affects the length of the resulting 2227 * array. 2228 * <ul> 2229 * <li><p> 2230 * If the <i>limit</i> is positive then the pattern will be applied 2231 * at most <i>limit</i> - 1 times, the array's length will be 2232 * no greater than <i>limit</i>, and the array's last entry will contain 2233 * all input beyond the last matched delimiter.</p></li> 2234 * 2235 * <li><p> 2236 * If the <i>limit</i> is zero then the pattern will be applied as 2237 * many times as possible, the array can have any length, and trailing 2238 * empty strings will be discarded.</p></li> 2239 * 2240 * <li><p> 2241 * If the <i>limit</i> is negative then the pattern will be applied 2242 * as many times as possible and the array can have any length.</p></li> 2243 * </ul> 2244 * 2245 * <p> The string {@code "boo:and:foo"}, for example, yields the 2246 * following results with these parameters: 2247 * 2248 * <blockquote><table class="plain"> 2249 * <caption style="display:none">Split example showing regex, limit, and result</caption> 2250 * <thead> 2251 * <tr> 2252 * <th scope="col">Regex</th> 2253 * <th scope="col">Limit</th> 2254 * <th scope="col">Result</th> 2255 * </tr> 2256 * </thead> 2257 * <tbody> 2258 * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th> 2259 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th> 2260 * <td>{@code { "boo", "and:foo" }}</td></tr> 2261 * <tr><!-- : --> 2262 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th> 2263 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2264 * <tr><!-- : --> 2265 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th> 2266 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2267 * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th> 2268 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th> 2269 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2270 * <tr><!-- o --> 2271 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th> 2272 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2273 * <tr><!-- o --> 2274 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th> 2275 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2276 * </tbody> 2277 * </table></blockquote> 2278 * 2279 * <p> An invocation of this method of the form 2280 * <i>str.</i>{@code split(}<i>regex</i>{@code ,} <i>n</i>{@code )} 2281 * yields the same result as the expression 2282 * 2283 * <blockquote> 2284 * <code> 2285 * {@link java.util.regex.Pattern}.{@link 2286 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2287 * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>, <i>n</i>) 2288 * </code> 2289 * </blockquote> 2290 * 2291 * 2292 * @param regex 2293 * the delimiting regular expression 2294 * 2295 * @param limit 2296 * the result threshold, as described above 2297 * 2298 * @return the array of strings computed by splitting this string 2299 * around matches of the given regular expression 2300 * 2301 * @throws PatternSyntaxException 2302 * if the regular expression's syntax is invalid 2303 * 2304 * @see java.util.regex.Pattern 2305 * 2306 * @since 1.4 2307 * @spec JSR-51 2308 */ 2309 public String[] split(String regex, int limit) { 2310 /* fastpath if the regex is a 2311 (1)one-char String and this character is not one of the 2312 RegEx's meta characters ".$|()[{^?*+\\", or 2313 (2)two-char String and the first char is the backslash and 2314 the second is not the ascii digit or ascii letter. 2315 */ 2316 char ch = 0; 2317 if (((regex.length() == 1 && 2318 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || 2319 (regex.length() == 2 && 2320 regex.charAt(0) == '\\' && 2321 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && 2322 ((ch-'a')|('z'-ch)) < 0 && 2323 ((ch-'A')|('Z'-ch)) < 0)) && 2324 (ch < Character.MIN_HIGH_SURROGATE || 2325 ch > Character.MAX_LOW_SURROGATE)) 2326 { 2327 int off = 0; 2328 int next = 0; 2329 boolean limited = limit > 0; 2330 ArrayList<String> list = new ArrayList<>(); 2331 while ((next = indexOf(ch, off)) != -1) { 2332 if (!limited || list.size() < limit - 1) { 2333 list.add(substring(off, next)); 2334 off = next + 1; 2335 } else { // last one 2336 //assert (list.size() == limit - 1); 2337 int last = length(); 2338 list.add(substring(off, last)); 2339 off = last; 2340 break; 2341 } 2342 } 2343 // If no match was found, return this 2344 if (off == 0) 2345 return new String[]{this}; 2346 2347 // Add remaining segment 2348 if (!limited || list.size() < limit) 2349 list.add(substring(off, length())); 2350 2351 // Construct result 2352 int resultSize = list.size(); 2353 if (limit == 0) { 2354 while (resultSize > 0 && list.get(resultSize - 1).isEmpty()) { 2355 resultSize--; 2356 } 2357 } 2358 String[] result = new String[resultSize]; 2359 return list.subList(0, resultSize).toArray(result); 2360 } 2361 return Pattern.compile(regex).split(this, limit); 2362 } 2363 2364 /** 2365 * Splits this string around matches of the given <a 2366 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2367 * 2368 * <p> This method works as if by invoking the two-argument {@link 2369 * #split(String, int) split} method with the given expression and a limit 2370 * argument of zero. Trailing empty strings are therefore not included in 2371 * the resulting array. 2372 * 2373 * <p> The string {@code "boo:and:foo"}, for example, yields the following 2374 * results with these expressions: 2375 * 2376 * <blockquote><table class="plain"> 2377 * <caption style="display:none">Split examples showing regex and result</caption> 2378 * <thead> 2379 * <tr> 2380 * <th scope="col">Regex</th> 2381 * <th scope="col">Result</th> 2382 * </tr> 2383 * </thead> 2384 * <tbody> 2385 * <tr><th scope="row" style="text-weight:normal">:</th> 2386 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2387 * <tr><th scope="row" style="text-weight:normal">o</th> 2388 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2389 * </tbody> 2390 * </table></blockquote> 2391 * 2392 * 2393 * @param regex 2394 * the delimiting regular expression 2395 * 2396 * @return the array of strings computed by splitting this string 2397 * around matches of the given regular expression 2398 * 2399 * @throws PatternSyntaxException 2400 * if the regular expression's syntax is invalid 2401 * 2402 * @see java.util.regex.Pattern 2403 * 2404 * @since 1.4 2405 * @spec JSR-51 2406 */ 2407 public String[] split(String regex) { 2408 return split(regex, 0); 2409 } 2410 2411 /** 2412 * Returns a new String composed of copies of the 2413 * {@code CharSequence elements} joined together with a copy of 2414 * the specified {@code delimiter}. 2415 * 2416 * <blockquote>For example, 2417 * <pre>{@code 2418 * String message = String.join("-", "Java", "is", "cool"); 2419 * // message returned is: "Java-is-cool" 2420 * }</pre></blockquote> 2421 * 2422 * Note that if an element is null, then {@code "null"} is added. 2423 * 2424 * @param delimiter the delimiter that separates each element 2425 * @param elements the elements to join together. 2426 * 2427 * @return a new {@code String} that is composed of the {@code elements} 2428 * separated by the {@code delimiter} 2429 * 2430 * @throws NullPointerException If {@code delimiter} or {@code elements} 2431 * is {@code null} 2432 * 2433 * @see java.util.StringJoiner 2434 * @since 1.8 2435 */ 2436 public static String join(CharSequence delimiter, CharSequence... elements) { 2437 Objects.requireNonNull(delimiter); 2438 Objects.requireNonNull(elements); 2439 // Number of elements not likely worth Arrays.stream overhead. 2440 StringJoiner joiner = new StringJoiner(delimiter); 2441 for (CharSequence cs: elements) { 2442 joiner.add(cs); 2443 } 2444 return joiner.toString(); 2445 } 2446 2447 /** 2448 * Returns a new {@code String} composed of copies of the 2449 * {@code CharSequence elements} joined together with a copy of the 2450 * specified {@code delimiter}. 2451 * 2452 * <blockquote>For example, 2453 * <pre>{@code 2454 * List<String> strings = List.of("Java", "is", "cool"); 2455 * String message = String.join(" ", strings); 2456 * //message returned is: "Java is cool" 2457 * 2458 * Set<String> strings = 2459 * new LinkedHashSet<>(List.of("Java", "is", "very", "cool")); 2460 * String message = String.join("-", strings); 2461 * //message returned is: "Java-is-very-cool" 2462 * }</pre></blockquote> 2463 * 2464 * Note that if an individual element is {@code null}, then {@code "null"} is added. 2465 * 2466 * @param delimiter a sequence of characters that is used to separate each 2467 * of the {@code elements} in the resulting {@code String} 2468 * @param elements an {@code Iterable} that will have its {@code elements} 2469 * joined together. 2470 * 2471 * @return a new {@code String} that is composed from the {@code elements} 2472 * argument 2473 * 2474 * @throws NullPointerException If {@code delimiter} or {@code elements} 2475 * is {@code null} 2476 * 2477 * @see #join(CharSequence,CharSequence...) 2478 * @see java.util.StringJoiner 2479 * @since 1.8 2480 */ 2481 public static String join(CharSequence delimiter, 2482 Iterable<? extends CharSequence> elements) { 2483 Objects.requireNonNull(delimiter); 2484 Objects.requireNonNull(elements); 2485 StringJoiner joiner = new StringJoiner(delimiter); 2486 for (CharSequence cs: elements) { 2487 joiner.add(cs); 2488 } 2489 return joiner.toString(); 2490 } 2491 2492 /** 2493 * Converts all of the characters in this {@code String} to lower 2494 * case using the rules of the given {@code Locale}. Case mapping is based 2495 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2496 * class. Since case mappings are not always 1:1 char mappings, the resulting 2497 * {@code String} may be a different length than the original {@code String}. 2498 * <p> 2499 * Examples of lowercase mappings are in the following table: 2500 * <table class="plain"> 2501 * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption> 2502 * <thead> 2503 * <tr> 2504 * <th scope="col">Language Code of Locale</th> 2505 * <th scope="col">Upper Case</th> 2506 * <th scope="col">Lower Case</th> 2507 * <th scope="col">Description</th> 2508 * </tr> 2509 * </thead> 2510 * <tbody> 2511 * <tr> 2512 * <td>tr (Turkish)</td> 2513 * <th scope="row" style="font-weight:normal; text-align:left">\u0130</th> 2514 * <td>\u0069</td> 2515 * <td>capital letter I with dot above -> small letter i</td> 2516 * </tr> 2517 * <tr> 2518 * <td>tr (Turkish)</td> 2519 * <th scope="row" style="font-weight:normal; text-align:left">\u0049</th> 2520 * <td>\u0131</td> 2521 * <td>capital letter I -> small letter dotless i </td> 2522 * </tr> 2523 * <tr> 2524 * <td>(all)</td> 2525 * <th scope="row" style="font-weight:normal; text-align:left">French Fries</th> 2526 * <td>french fries</td> 2527 * <td>lowercased all chars in String</td> 2528 * </tr> 2529 * <tr> 2530 * <td>(all)</td> 2531 * <th scope="row" style="font-weight:normal; text-align:left"> 2532 * ΙΧΘΥΣ</th> 2533 * <td>ιχθυσ</td> 2534 * <td>lowercased all chars in String</td> 2535 * </tr> 2536 * </tbody> 2537 * </table> 2538 * 2539 * @param locale use the case transformation rules for this locale 2540 * @return the {@code String}, converted to lowercase. 2541 * @see java.lang.String#toLowerCase() 2542 * @see java.lang.String#toUpperCase() 2543 * @see java.lang.String#toUpperCase(Locale) 2544 * @since 1.1 2545 */ 2546 public String toLowerCase(Locale locale) { 2547 return isLatin1() ? StringLatin1.toLowerCase(this, value, locale) 2548 : StringUTF16.toLowerCase(this, value, locale); 2549 } 2550 2551 /** 2552 * Converts all of the characters in this {@code String} to lower 2553 * case using the rules of the default locale. This is equivalent to calling 2554 * {@code toLowerCase(Locale.getDefault())}. 2555 * <p> 2556 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2557 * results if used for strings that are intended to be interpreted locale 2558 * independently. 2559 * Examples are programming language identifiers, protocol keys, and HTML 2560 * tags. 2561 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale 2562 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the 2563 * LATIN SMALL LETTER DOTLESS I character. 2564 * To obtain correct results for locale insensitive strings, use 2565 * {@code toLowerCase(Locale.ROOT)}. 2566 * 2567 * @return the {@code String}, converted to lowercase. 2568 * @see java.lang.String#toLowerCase(Locale) 2569 */ 2570 public String toLowerCase() { 2571 return toLowerCase(Locale.getDefault()); 2572 } 2573 2574 /** 2575 * Converts all of the characters in this {@code String} to upper 2576 * case using the rules of the given {@code Locale}. Case mapping is based 2577 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2578 * class. Since case mappings are not always 1:1 char mappings, the resulting 2579 * {@code String} may be a different length than the original {@code String}. 2580 * <p> 2581 * Examples of locale-sensitive and 1:M case mappings are in the following table. 2582 * 2583 * <table class="plain"> 2584 * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption> 2585 * <thead> 2586 * <tr> 2587 * <th scope="col">Language Code of Locale</th> 2588 * <th scope="col">Lower Case</th> 2589 * <th scope="col">Upper Case</th> 2590 * <th scope="col">Description</th> 2591 * </tr> 2592 * </thead> 2593 * <tbody> 2594 * <tr> 2595 * <td>tr (Turkish)</td> 2596 * <th scope="row" style="font-weight:normal; text-align:left">\u0069</th> 2597 * <td>\u0130</td> 2598 * <td>small letter i -> capital letter I with dot above</td> 2599 * </tr> 2600 * <tr> 2601 * <td>tr (Turkish)</td> 2602 * <th scope="row" style="font-weight:normal; text-align:left">\u0131</th> 2603 * <td>\u0049</td> 2604 * <td>small letter dotless i -> capital letter I</td> 2605 * </tr> 2606 * <tr> 2607 * <td>(all)</td> 2608 * <th scope="row" style="font-weight:normal; text-align:left">\u00df</th> 2609 * <td>\u0053 \u0053</td> 2610 * <td>small letter sharp s -> two letters: SS</td> 2611 * </tr> 2612 * <tr> 2613 * <td>(all)</td> 2614 * <th scope="row" style="font-weight:normal; text-align:left">Fahrvergnügen</th> 2615 * <td>FAHRVERGNÜGEN</td> 2616 * <td></td> 2617 * </tr> 2618 * </tbody> 2619 * </table> 2620 * @param locale use the case transformation rules for this locale 2621 * @return the {@code String}, converted to uppercase. 2622 * @see java.lang.String#toUpperCase() 2623 * @see java.lang.String#toLowerCase() 2624 * @see java.lang.String#toLowerCase(Locale) 2625 * @since 1.1 2626 */ 2627 public String toUpperCase(Locale locale) { 2628 return isLatin1() ? StringLatin1.toUpperCase(this, value, locale) 2629 : StringUTF16.toUpperCase(this, value, locale); 2630 } 2631 2632 /** 2633 * Converts all of the characters in this {@code String} to upper 2634 * case using the rules of the default locale. This method is equivalent to 2635 * {@code toUpperCase(Locale.getDefault())}. 2636 * <p> 2637 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2638 * results if used for strings that are intended to be interpreted locale 2639 * independently. 2640 * Examples are programming language identifiers, protocol keys, and HTML 2641 * tags. 2642 * For instance, {@code "title".toUpperCase()} in a Turkish locale 2643 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the 2644 * LATIN CAPITAL LETTER I WITH DOT ABOVE character. 2645 * To obtain correct results for locale insensitive strings, use 2646 * {@code toUpperCase(Locale.ROOT)}. 2647 * 2648 * @return the {@code String}, converted to uppercase. 2649 * @see java.lang.String#toUpperCase(Locale) 2650 */ 2651 public String toUpperCase() { 2652 return toUpperCase(Locale.getDefault()); 2653 } 2654 2655 /** 2656 * Returns a string whose value is this string, with all leading 2657 * and trailing space removed, where space is defined 2658 * as any character whose codepoint is less than or equal to 2659 * {@code 'U+0020'} (the space character). 2660 * <p> 2661 * If this {@code String} object represents an empty character 2662 * sequence, or the first and last characters of character sequence 2663 * represented by this {@code String} object both have codes 2664 * that are not space (as defined above), then a 2665 * reference to this {@code String} object is returned. 2666 * <p> 2667 * Otherwise, if all characters in this string are space (as 2668 * defined above), then a {@code String} object representing an 2669 * empty string is returned. 2670 * <p> 2671 * Otherwise, let <i>k</i> be the index of the first character in the 2672 * string whose code is not a space (as defined above) and let 2673 * <i>m</i> be the index of the last character in the string whose code 2674 * is not a space (as defined above). A {@code String} 2675 * object is returned, representing the substring of this string that 2676 * begins with the character at index <i>k</i> and ends with the 2677 * character at index <i>m</i>-that is, the result of 2678 * {@code this.substring(k, m + 1)}. 2679 * <p> 2680 * This method may be used to trim space (as defined above) from 2681 * the beginning and end of a string. 2682 * 2683 * @return a string whose value is this string, with all leading 2684 * and trailing space removed, or this string if it 2685 * has no leading or trailing space. 2686 */ 2687 public String trim() { 2688 String ret = isLatin1() ? StringLatin1.trim(value) 2689 : StringUTF16.trim(value); 2690 return ret == null ? this : ret; 2691 } 2692 2693 /** 2694 * Returns a string whose value is this string, with all leading 2695 * and trailing {@linkplain Character#isWhitespace(int) white space} 2696 * removed. 2697 * <p> 2698 * If this {@code String} object represents an empty string, 2699 * or if all code points in this string are 2700 * {@linkplain Character#isWhitespace(int) white space}, then an empty string 2701 * is returned. 2702 * <p> 2703 * Otherwise, returns a substring of this string beginning with the first 2704 * code point that is not a {@linkplain Character#isWhitespace(int) white space} 2705 * up to and including the last code point that is not a 2706 * {@linkplain Character#isWhitespace(int) white space}. 2707 * <p> 2708 * This method may be used to strip 2709 * {@linkplain Character#isWhitespace(int) white space} from 2710 * the beginning and end of a string. 2711 * 2712 * @return a string whose value is this string, with all leading 2713 * and trailing white space removed 2714 * 2715 * @see Character#isWhitespace(int) 2716 * 2717 * @since 11 2718 */ 2719 public String strip() { 2720 String ret = isLatin1() ? StringLatin1.strip(value) 2721 : StringUTF16.strip(value); 2722 return ret == null ? this : ret; 2723 } 2724 2725 /** 2726 * Returns a string whose value is this string, with all leading 2727 * {@linkplain Character#isWhitespace(int) white space} removed. 2728 * <p> 2729 * If this {@code String} object represents an empty string, 2730 * or if all code points in this string are 2731 * {@linkplain Character#isWhitespace(int) white space}, then an empty string 2732 * is returned. 2733 * <p> 2734 * Otherwise, returns a substring of this string beginning with the first 2735 * code point that is not a {@linkplain Character#isWhitespace(int) white space} 2736 * up to and including the last code point of this string. 2737 * <p> 2738 * This method may be used to trim 2739 * {@linkplain Character#isWhitespace(int) white space} from 2740 * the beginning of a string. 2741 * 2742 * @return a string whose value is this string, with all leading white 2743 * space removed 2744 * 2745 * @see Character#isWhitespace(int) 2746 * 2747 * @since 11 2748 */ 2749 public String stripLeading() { 2750 String ret = isLatin1() ? StringLatin1.stripLeading(value) 2751 : StringUTF16.stripLeading(value); 2752 return ret == null ? this : ret; 2753 } 2754 2755 /** 2756 * Returns a string whose value is this string, with all trailing 2757 * {@linkplain Character#isWhitespace(int) white space} removed. 2758 * <p> 2759 * If this {@code String} object represents an empty string, 2760 * or if all characters in this string are 2761 * {@linkplain Character#isWhitespace(int) white space}, then an empty string 2762 * is returned. 2763 * <p> 2764 * Otherwise, returns a substring of this string beginning with the first 2765 * code point of this string up to and including the last code point 2766 * that is not a {@linkplain Character#isWhitespace(int) white space}. 2767 * <p> 2768 * This method may be used to trim 2769 * {@linkplain Character#isWhitespace(int) white space} from 2770 * the end of a string. 2771 * 2772 * @return a string whose value is this string, with all trailing white 2773 * space removed 2774 * 2775 * @see Character#isWhitespace(int) 2776 * 2777 * @since 11 2778 */ 2779 public String stripTrailing() { 2780 String ret = isLatin1() ? StringLatin1.stripTrailing(value) 2781 : StringUTF16.stripTrailing(value); 2782 return ret == null ? this : ret; 2783 } 2784 2785 /** 2786 * Returns {@code true} if the string is empty or contains only 2787 * {@linkplain Character#isWhitespace(int) white space} codepoints, 2788 * otherwise {@code false}. 2789 * 2790 * @return {@code true} if the string is empty or contains only 2791 * {@linkplain Character#isWhitespace(int) white space} codepoints, 2792 * otherwise {@code false} 2793 * 2794 * @see Character#isWhitespace(int) 2795 * 2796 * @since 11 2797 */ 2798 public boolean isBlank() { 2799 return indexOfNonWhitespace() == length(); 2800 } 2801 2802 /** 2803 * Returns a stream of lines extracted from this string, 2804 * separated by line terminators. 2805 * <p> 2806 * A <i>line terminator</i> is one of the following: 2807 * a line feed character {@code "\n"} (U+000A), 2808 * a carriage return character {@code "\r"} (U+000D), 2809 * or a carriage return followed immediately by a line feed 2810 * {@code "\r\n"} (U+000D U+000A). 2811 * <p> 2812 * A <i>line</i> is either a sequence of zero or more characters 2813 * followed by a line terminator, or it is a sequence of one or 2814 * more characters followed by the end of the string. A 2815 * line does not include the line terminator. 2816 * <p> 2817 * The stream returned by this method contains the lines from 2818 * this string in the order in which they occur. 2819 * 2820 * @apiNote This definition of <i>line</i> implies that an empty 2821 * string has zero lines and that there is no empty line 2822 * following a line terminator at the end of a string. 2823 * 2824 * @implNote This method provides better performance than 2825 * split("\R") by supplying elements lazily and 2826 * by faster search of new line terminators. 2827 * 2828 * @return the stream of lines extracted from this string 2829 * 2830 * @since 11 2831 */ 2832 public Stream<String> lines() { 2833 return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value); 2834 } 2835 2836 /** 2837 * Adjusts the indentation of each line of this string based on the value of 2838 * {@code n}, and normalizes line termination characters. 2839 * <p> 2840 * This string is conceptually separated into lines using 2841 * {@link String#lines()}. Each line is then adjusted as described below 2842 * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting 2843 * lines are then concatenated and returned. 2844 * <p> 2845 * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the 2846 * beginning of each line. 2847 * <p> 2848 * If {@code n < 0} then up to {@code n} 2849 * {@linkplain Character#isWhitespace(int) white space characters} are removed 2850 * from the beginning of each line. If a given line does not contain 2851 * sufficient white space then all leading 2852 * {@linkplain Character#isWhitespace(int) white space characters} are removed. 2853 * Each white space character is treated as a single character. In 2854 * particular, the tab character {@code "\t"} (U+0009) is considered a 2855 * single character; it is not expanded. 2856 * <p> 2857 * If {@code n == 0} then the line remains unchanged. However, line 2858 * terminators are still normalized. 2859 * 2860 * @param n number of leading 2861 * {@linkplain Character#isWhitespace(int) white space characters} 2862 * to add or remove 2863 * 2864 * @return string with indentation adjusted and line endings normalized 2865 * 2866 * @see String#lines() 2867 * @see String#isBlank() 2868 * @see Character#isWhitespace(int) 2869 * 2870 * @since 12 2871 */ 2872 public String indent(int n) { 2873 if (isEmpty()) { 2874 return ""; 2875 } 2876 Stream<String> stream = lines(); 2877 if (n > 0) { 2878 final String spaces = " ".repeat(n); 2879 stream = stream.map(s -> spaces + s); 2880 } else if (n == Integer.MIN_VALUE) { 2881 stream = stream.map(s -> s.stripLeading()); 2882 } else if (n < 0) { 2883 stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace()))); 2884 } 2885 return stream.collect(Collectors.joining("\n", "", "\n")); 2886 } 2887 2888 private int indexOfNonWhitespace() { 2889 return isLatin1() ? StringLatin1.indexOfNonWhitespace(value) 2890 : StringUTF16.indexOfNonWhitespace(value); 2891 } 2892 2893 private int lastIndexOfNonWhitespace() { 2894 return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value) 2895 : StringUTF16.lastIndexOfNonWhitespace(value); 2896 } 2897 2898 /** 2899 * Returns a string whose value is this string, with incidental 2900 * {@linkplain Character#isWhitespace(int) white space} removed from 2901 * the beginning and end of every line. 2902 * <p> 2903 * Incidental {@linkplain Character#isWhitespace(int) white space} 2904 * is often present in a text block to align the content with the opening 2905 * delimiter. For example, in the following code, dots represent incidental 2906 * {@linkplain Character#isWhitespace(int) white space}: 2907 * <blockquote><pre> 2908 * String html = """ 2909 * ..............<html> 2910 * .............. <body> 2911 * .............. <p>Hello, world</p> 2912 * .............. </body> 2913 * ..............</html> 2914 * .............."""; 2915 * </pre></blockquote> 2916 * This method treats the incidental 2917 * {@linkplain Character#isWhitespace(int) white space} as indentation to be 2918 * stripped, producing a string that preserves the relative indentation of 2919 * the content. Using | to visualize the start of each line of the string: 2920 * <blockquote><pre> 2921 * |<html> 2922 * | <body> 2923 * | <p>Hello, world</p> 2924 * | </body> 2925 * |</html> 2926 * </pre></blockquote> 2927 * First, the individual lines of this string are extracted as if by using 2928 * {@link String#lines()}. 2929 * <p> 2930 * Then, the <i>minimum indentation</i> (min) is determined as follows. 2931 * For each non-blank line (as defined by {@link String#isBlank()}), the 2932 * leading {@linkplain Character#isWhitespace(int) white space} characters are 2933 * counted. The leading {@linkplain Character#isWhitespace(int) white space} 2934 * characters on the last line are also counted even if 2935 * {@linkplain String#isBlank() blank}. The <i>min</i> value is the smallest 2936 * of these counts. 2937 * <p> 2938 * For each {@linkplain String#isBlank() non-blank} line, <i>min</i> leading 2939 * {@linkplain Character#isWhitespace(int) white space} characters are removed, 2940 * and any trailing {@linkplain Character#isWhitespace(int) white space} 2941 * characters are removed. {@linkplain String#isBlank() Blank} lines are 2942 * replaced with the empty string. 2943 * 2944 * <p> 2945 * Finally, the lines are joined into a new string, using the LF character 2946 * {@code "\n"} (U+000A) to separate lines. 2947 * 2948 * @apiNote 2949 * This method's primary purpose is to shift a block of lines as far as 2950 * possible to the left, while preserving relative indentation. Lines 2951 * that were indented the least will thus have no leading 2952 * {@linkplain Character#isWhitespace(int) white space}. 2953 * The line count of the result will be the same as line count of this 2954 * string. 2955 * If this string ends with a line terminator then the result will end 2956 * with a line terminator. 2957 * 2958 * @implNote 2959 * This method treats all {@linkplain Character#isWhitespace(int) white space} 2960 * characters as having equal width. As long as the indentation on every 2961 * line is consistently composed of the same character sequences, then the 2962 * result will be as described above. 2963 * 2964 * @return string with incidental indentation removed and line 2965 * terminators normalized 2966 * 2967 * @see String#lines() 2968 * @see String#isBlank() 2969 * @see String#indent(int) 2970 * @see Character#isWhitespace(int) 2971 * 2972 * @since 13 2973 * 2974 * @deprecated This method is associated with text blocks, a preview language feature. 2975 * Text blocks and/or this method may be changed or removed in a future release. 2976 */ 2977 @Deprecated(forRemoval=true, since="13") 2978 public String stripIndent() { 2979 int length = length(); 2980 if (length == 0) { 2981 return ""; 2982 } 2983 char lastChar = charAt(length - 1); 2984 boolean optOut = lastChar == '\n' || lastChar == '\r'; 2985 List<String> lines = lines().collect(Collectors.toList()); 2986 final int outdent = optOut ? 0 : outdent(lines); 2987 return lines.stream() 2988 .map(line -> { 2989 int firstNonWhitespace = line.indexOfNonWhitespace(); 2990 int lastNonWhitespace = line.lastIndexOfNonWhitespace(); 2991 int incidentalWhitespace = Math.min(outdent, firstNonWhitespace); 2992 return firstNonWhitespace > lastNonWhitespace 2993 ? "" : line.substring(incidentalWhitespace, lastNonWhitespace); 2994 }) 2995 .collect(Collectors.joining("\n", "", optOut ? "\n" : "")); 2996 } 2997 2998 private static int outdent(List<String> lines) { 2999 // Note: outdent is guaranteed to be zero or positive number. 3000 // If there isn't a non-blank line then the last must be blank 3001 int outdent = Integer.MAX_VALUE; 3002 for (String line : lines) { 3003 int leadingWhitespace = line.indexOfNonWhitespace(); 3004 if (leadingWhitespace != line.length()) { 3005 outdent = Integer.min(outdent, leadingWhitespace); 3006 } 3007 } 3008 String lastLine = lines.get(lines.size() - 1); 3009 if (lastLine.isBlank()) { 3010 outdent = Integer.min(outdent, lastLine.length()); 3011 } 3012 return outdent; 3013 } 3014 3015 /** 3016 * Returns a string whose value is this string, with escape sequences 3017 * translated as if in a string literal. 3018 * <p> 3019 * Escape sequences are translated as follows; 3020 * <table class="striped"> 3021 * <caption style="display:none">Translation</caption> 3022 * <thead> 3023 * <tr> 3024 * <th scope="col">Escape</th> 3025 * <th scope="col">Name</th> 3026 * <th scope="col">Translation</th> 3027 * </tr> 3028 * </thead> 3029 * <tbody> 3030 * <tr> 3031 * <th scope="row">{@code \u005Cb}</th> 3032 * <td>backspace</td> 3033 * <td>{@code U+0008}</td> 3034 * </tr> 3035 * <tr> 3036 * <th scope="row">{@code \u005Ct}</th> 3037 * <td>horizontal tab</td> 3038 * <td>{@code U+0009}</td> 3039 * </tr> 3040 * <tr> 3041 * <th scope="row">{@code \u005Cn}</th> 3042 * <td>line feed</td> 3043 * <td>{@code U+000A}</td> 3044 * </tr> 3045 * <tr> 3046 * <th scope="row">{@code \u005Cf}</th> 3047 * <td>form feed</td> 3048 * <td>{@code U+000C}</td> 3049 * </tr> 3050 * <tr> 3051 * <th scope="row">{@code \u005Cr}</th> 3052 * <td>carriage return</td> 3053 * <td>{@code U+000D}</td> 3054 * </tr> 3055 * <tr> 3056 * <th scope="row">{@code \u005C"}</th> 3057 * <td>double quote</td> 3058 * <td>{@code U+0022}</td> 3059 * </tr> 3060 * <tr> 3061 * <th scope="row">{@code \u005C'}</th> 3062 * <td>single quote</td> 3063 * <td>{@code U+0027}</td> 3064 * </tr> 3065 * <tr> 3066 * <th scope="row">{@code \u005C\u005C}</th> 3067 * <td>backslash</td> 3068 * <td>{@code U+005C}</td> 3069 * </tr> 3070 * <tr> 3071 * <th scope="row">{@code \u005C0 - \u005C377}</th> 3072 * <td>octal escape</td> 3073 * <td>code point equivalents</td> 3074 * </tr> 3075 * </tbody> 3076 * </table> 3077 * 3078 * @implNote 3079 * This method does <em>not</em> translate Unicode escapes such as "{@code \u005cu2022}". 3080 * Unicode escapes are translated by the Java compiler when reading input characters and 3081 * are not part of the string literal specification. 3082 * 3083 * @throws IllegalArgumentException when an escape sequence is malformed. 3084 * 3085 * @return String with escape sequences translated. 3086 * 3087 * @jls 3.10.7 Escape Sequences 3088 * 3089 * @since 13 3090 * 3091 * @deprecated This method is associated with text blocks, a preview language feature. 3092 * Text blocks and/or this method may be changed or removed in a future release. 3093 */ 3094 @Deprecated(forRemoval=true, since="13") 3095 public String translateEscapes() { 3096 if (isEmpty()) { 3097 return ""; 3098 } 3099 char[] chars = toCharArray(); 3100 int length = chars.length; 3101 int from = 0; 3102 int to = 0; 3103 while (from < length) { 3104 char ch = chars[from++]; 3105 if (ch == '\\') { 3106 ch = from < length ? chars[from++] : '\0'; 3107 switch (ch) { 3108 case 'b': 3109 ch = '\b'; 3110 break; 3111 case 'f': 3112 ch = '\f'; 3113 break; 3114 case 'n': 3115 ch = '\n'; 3116 break; 3117 case 'r': 3118 ch = '\r'; 3119 break; 3120 case 't': 3121 ch = '\t'; 3122 break; 3123 case '\'': 3124 case '\"': 3125 case '\\': 3126 // as is 3127 break; 3128 case '0': case '1': case '2': case '3': 3129 case '4': case '5': case '6': case '7': 3130 int limit = Integer.min(from + (ch <= '3' ? 2 : 1), length); 3131 int code = ch - '0'; 3132 while (from < limit) { 3133 ch = chars[from]; 3134 if (ch < '0' || '7' < ch) { 3135 break; 3136 } 3137 from++; 3138 code = (code << 3) | (ch - '0'); 3139 } 3140 ch = (char)code; 3141 break; 3142 default: { 3143 String msg = String.format( 3144 "Invalid escape sequence: \\%c \\\\u%04X", 3145 ch, (int)ch); 3146 throw new IllegalArgumentException(msg); 3147 } 3148 } 3149 } 3150 3151 chars[to++] = ch; 3152 } 3153 3154 return new String(chars, 0, to); 3155 } 3156 3157 /** 3158 * This method allows the application of a function to {@code this} 3159 * string. The function should expect a single String argument 3160 * and produce an {@code R} result. 3161 * <p> 3162 * Any exception thrown by {@code f()} will be propagated to the 3163 * caller. 3164 * 3165 * @param f functional interface to a apply 3166 * 3167 * @param <R> class of the result 3168 * 3169 * @return the result of applying the function to this string 3170 * 3171 * @see java.util.function.Function 3172 * 3173 * @since 12 3174 */ 3175 public <R> R transform(Function<? super String, ? extends R> f) { 3176 return f.apply(this); 3177 } 3178 3179 /** 3180 * This object (which is already a string!) is itself returned. 3181 * 3182 * @return the string itself. 3183 */ 3184 public String toString() { 3185 return this; 3186 } 3187 3188 /** 3189 * Returns a stream of {@code int} zero-extending the {@code char} values 3190 * from this sequence. Any char which maps to a <a 3191 * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code 3192 * point</a> is passed through uninterpreted. 3193 * 3194 * @return an IntStream of char values from this sequence 3195 * @since 9 3196 */ 3197 @Override 3198 public IntStream chars() { 3199 return StreamSupport.intStream( 3200 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE) 3201 : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE), 3202 false); 3203 } 3204 3205 3206 /** 3207 * Returns a stream of code point values from this sequence. Any surrogate 3208 * pairs encountered in the sequence are combined as if by {@linkplain 3209 * Character#toCodePoint Character.toCodePoint} and the result is passed 3210 * to the stream. Any other code units, including ordinary BMP characters, 3211 * unpaired surrogates, and undefined code units, are zero-extended to 3212 * {@code int} values which are then passed to the stream. 3213 * 3214 * @return an IntStream of Unicode code points from this sequence 3215 * @since 9 3216 */ 3217 @Override 3218 public IntStream codePoints() { 3219 return StreamSupport.intStream( 3220 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE) 3221 : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE), 3222 false); 3223 } 3224 3225 /** 3226 * Converts this string to a new character array. 3227 * 3228 * @return a newly allocated character array whose length is the length 3229 * of this string and whose contents are initialized to contain 3230 * the character sequence represented by this string. 3231 */ 3232 public char[] toCharArray() { 3233 return isLatin1() ? StringLatin1.toChars(value) 3234 : StringUTF16.toChars(value); 3235 } 3236 3237 /** 3238 * Returns a formatted string using the specified format string and 3239 * arguments. 3240 * 3241 * <p> The locale always used is the one returned by {@link 3242 * java.util.Locale#getDefault(java.util.Locale.Category) 3243 * Locale.getDefault(Locale.Category)} with 3244 * {@link java.util.Locale.Category#FORMAT FORMAT} category specified. 3245 * 3246 * @param format 3247 * A <a href="../util/Formatter.html#syntax">format string</a> 3248 * 3249 * @param args 3250 * Arguments referenced by the format specifiers in the format 3251 * string. If there are more arguments than format specifiers, the 3252 * extra arguments are ignored. The number of arguments is 3253 * variable and may be zero. The maximum number of arguments is 3254 * limited by the maximum dimension of a Java array as defined by 3255 * <cite>The Java™ Virtual Machine Specification</cite>. 3256 * The behaviour on a 3257 * {@code null} argument depends on the <a 3258 * href="../util/Formatter.html#syntax">conversion</a>. 3259 * 3260 * @throws java.util.IllegalFormatException 3261 * If a format string contains an illegal syntax, a format 3262 * specifier that is incompatible with the given arguments, 3263 * insufficient arguments given the format string, or other 3264 * illegal conditions. For specification of all possible 3265 * formatting errors, see the <a 3266 * href="../util/Formatter.html#detail">Details</a> section of the 3267 * formatter class specification. 3268 * 3269 * @return A formatted string 3270 * 3271 * @see java.util.Formatter 3272 * @since 1.5 3273 */ 3274 public static String format(String format, Object... args) { 3275 return new Formatter().format(format, args).toString(); 3276 } 3277 3278 /** 3279 * Returns a formatted string using the specified locale, format string, 3280 * and arguments. 3281 * 3282 * @param l 3283 * The {@linkplain java.util.Locale locale} to apply during 3284 * formatting. If {@code l} is {@code null} then no localization 3285 * is applied. 3286 * 3287 * @param format 3288 * A <a href="../util/Formatter.html#syntax">format string</a> 3289 * 3290 * @param args 3291 * Arguments referenced by the format specifiers in the format 3292 * string. If there are more arguments than format specifiers, the 3293 * extra arguments are ignored. The number of arguments is 3294 * variable and may be zero. The maximum number of arguments is 3295 * limited by the maximum dimension of a Java array as defined by 3296 * <cite>The Java™ Virtual Machine Specification</cite>. 3297 * The behaviour on a 3298 * {@code null} argument depends on the 3299 * <a href="../util/Formatter.html#syntax">conversion</a>. 3300 * 3301 * @throws java.util.IllegalFormatException 3302 * If a format string contains an illegal syntax, a format 3303 * specifier that is incompatible with the given arguments, 3304 * insufficient arguments given the format string, or other 3305 * illegal conditions. For specification of all possible 3306 * formatting errors, see the <a 3307 * href="../util/Formatter.html#detail">Details</a> section of the 3308 * formatter class specification 3309 * 3310 * @return A formatted string 3311 * 3312 * @see java.util.Formatter 3313 * @since 1.5 3314 */ 3315 public static String format(Locale l, String format, Object... args) { 3316 return new Formatter(l).format(format, args).toString(); 3317 } 3318 3319 /** 3320 * Formats using this string as the format string, and the supplied 3321 * arguments. 3322 * 3323 * @implSpec This method is equivalent to {@code String.format(this, args)}. 3324 * 3325 * @param args 3326 * Arguments referenced by the format specifiers in this string. 3327 * 3328 * @return A formatted string 3329 * 3330 * @see java.lang.String#format(String,Object...) 3331 * @see java.util.Formatter 3332 * 3333 * @since 13 3334 * 3335 * @deprecated This method is associated with text blocks, a preview language feature. 3336 * Text blocks and/or this method may be changed or removed in a future release. 3337 */ 3338 @Deprecated(forRemoval=true, since="13") 3339 public String formatted(Object... args) { 3340 return new Formatter().format(this, args).toString(); 3341 } 3342 3343 /** 3344 * Returns the string representation of the {@code Object} argument. 3345 * 3346 * @param obj an {@code Object}. 3347 * @return if the argument is {@code null}, then a string equal to 3348 * {@code "null"}; otherwise, the value of 3349 * {@code obj.toString()} is returned. 3350 * @see java.lang.Object#toString() 3351 */ 3352 public static String valueOf(Object obj) { 3353 return (obj == null) ? "null" : obj.toString(); 3354 } 3355 3356 /** 3357 * Returns the string representation of the {@code char} array 3358 * argument. The contents of the character array are copied; subsequent 3359 * modification of the character array does not affect the returned 3360 * string. 3361 * 3362 * @param data the character array. 3363 * @return a {@code String} that contains the characters of the 3364 * character array. 3365 */ 3366 public static String valueOf(char data[]) { 3367 return new String(data); 3368 } 3369 3370 /** 3371 * Returns the string representation of a specific subarray of the 3372 * {@code char} array argument. 3373 * <p> 3374 * The {@code offset} argument is the index of the first 3375 * character of the subarray. The {@code count} argument 3376 * specifies the length of the subarray. The contents of the subarray 3377 * are copied; subsequent modification of the character array does not 3378 * affect the returned string. 3379 * 3380 * @param data the character array. 3381 * @param offset initial offset of the subarray. 3382 * @param count length of the subarray. 3383 * @return a {@code String} that contains the characters of the 3384 * specified subarray of the character array. 3385 * @exception IndexOutOfBoundsException if {@code offset} is 3386 * negative, or {@code count} is negative, or 3387 * {@code offset+count} is larger than 3388 * {@code data.length}. 3389 */ 3390 public static String valueOf(char data[], int offset, int count) { 3391 return new String(data, offset, count); 3392 } 3393 3394 /** 3395 * Equivalent to {@link #valueOf(char[], int, int)}. 3396 * 3397 * @param data the character array. 3398 * @param offset initial offset of the subarray. 3399 * @param count length of the subarray. 3400 * @return a {@code String} that contains the characters of the 3401 * specified subarray of the character array. 3402 * @exception IndexOutOfBoundsException if {@code offset} is 3403 * negative, or {@code count} is negative, or 3404 * {@code offset+count} is larger than 3405 * {@code data.length}. 3406 */ 3407 public static String copyValueOf(char data[], int offset, int count) { 3408 return new String(data, offset, count); 3409 } 3410 3411 /** 3412 * Equivalent to {@link #valueOf(char[])}. 3413 * 3414 * @param data the character array. 3415 * @return a {@code String} that contains the characters of the 3416 * character array. 3417 */ 3418 public static String copyValueOf(char data[]) { 3419 return new String(data); 3420 } 3421 3422 /** 3423 * Returns the string representation of the {@code boolean} argument. 3424 * 3425 * @param b a {@code boolean}. 3426 * @return if the argument is {@code true}, a string equal to 3427 * {@code "true"} is returned; otherwise, a string equal to 3428 * {@code "false"} is returned. 3429 */ 3430 public static String valueOf(boolean b) { 3431 return b ? "true" : "false"; 3432 } 3433 3434 /** 3435 * Returns the string representation of the {@code char} 3436 * argument. 3437 * 3438 * @param c a {@code char}. 3439 * @return a string of length {@code 1} containing 3440 * as its single character the argument {@code c}. 3441 */ 3442 public static String valueOf(char c) { 3443 if (COMPACT_STRINGS && StringLatin1.canEncode(c)) { 3444 return new String(StringLatin1.toBytes(c), LATIN1); 3445 } 3446 return new String(StringUTF16.toBytes(c), UTF16); 3447 } 3448 3449 /** 3450 * Returns the string representation of the {@code int} argument. 3451 * <p> 3452 * The representation is exactly the one returned by the 3453 * {@code Integer.toString} method of one argument. 3454 * 3455 * @param i an {@code int}. 3456 * @return a string representation of the {@code int} argument. 3457 * @see java.lang.Integer#toString(int, int) 3458 */ 3459 public static String valueOf(int i) { 3460 return Integer.toString(i); 3461 } 3462 3463 /** 3464 * Returns the string representation of the {@code long} argument. 3465 * <p> 3466 * The representation is exactly the one returned by the 3467 * {@code Long.toString} method of one argument. 3468 * 3469 * @param l a {@code long}. 3470 * @return a string representation of the {@code long} argument. 3471 * @see java.lang.Long#toString(long) 3472 */ 3473 public static String valueOf(long l) { 3474 return Long.toString(l); 3475 } 3476 3477 /** 3478 * Returns the string representation of the {@code float} argument. 3479 * <p> 3480 * The representation is exactly the one returned by the 3481 * {@code Float.toString} method of one argument. 3482 * 3483 * @param f a {@code float}. 3484 * @return a string representation of the {@code float} argument. 3485 * @see java.lang.Float#toString(float) 3486 */ 3487 public static String valueOf(float f) { 3488 return Float.toString(f); 3489 } 3490 3491 /** 3492 * Returns the string representation of the {@code double} argument. 3493 * <p> 3494 * The representation is exactly the one returned by the 3495 * {@code Double.toString} method of one argument. 3496 * 3497 * @param d a {@code double}. 3498 * @return a string representation of the {@code double} argument. 3499 * @see java.lang.Double#toString(double) 3500 */ 3501 public static String valueOf(double d) { 3502 return Double.toString(d); 3503 } 3504 3505 /** 3506 * Returns a canonical representation for the string object. 3507 * <p> 3508 * A pool of strings, initially empty, is maintained privately by the 3509 * class {@code String}. 3510 * <p> 3511 * When the intern method is invoked, if the pool already contains a 3512 * string equal to this {@code String} object as determined by 3513 * the {@link #equals(Object)} method, then the string from the pool is 3514 * returned. Otherwise, this {@code String} object is added to the 3515 * pool and a reference to this {@code String} object is returned. 3516 * <p> 3517 * It follows that for any two strings {@code s} and {@code t}, 3518 * {@code s.intern() == t.intern()} is {@code true} 3519 * if and only if {@code s.equals(t)} is {@code true}. 3520 * <p> 3521 * All literal strings and string-valued constant expressions are 3522 * interned. String literals are defined in section 3.10.5 of the 3523 * <cite>The Java™ Language Specification</cite>. 3524 * 3525 * @return a string that has the same contents as this string, but is 3526 * guaranteed to be from a pool of unique strings. 3527 * @jls 3.10.5 String Literals 3528 */ 3529 public native String intern(); 3530 3531 /** 3532 * Returns a string whose value is the concatenation of this 3533 * string repeated {@code count} times. 3534 * <p> 3535 * If this string is empty or count is zero then the empty 3536 * string is returned. 3537 * 3538 * @param count number of times to repeat 3539 * 3540 * @return A string composed of this string repeated 3541 * {@code count} times or the empty string if this 3542 * string is empty or count is zero 3543 * 3544 * @throws IllegalArgumentException if the {@code count} is 3545 * negative. 3546 * 3547 * @since 11 3548 */ 3549 public String repeat(int count) { 3550 if (count < 0) { 3551 throw new IllegalArgumentException("count is negative: " + count); 3552 } 3553 if (count == 1) { 3554 return this; 3555 } 3556 final int len = value.length; 3557 if (len == 0 || count == 0) { 3558 return ""; 3559 } 3560 if (len == 1) { 3561 final byte[] single = new byte[count]; 3562 Arrays.fill(single, value[0]); 3563 return new String(single, coder); 3564 } 3565 if (Integer.MAX_VALUE / count < len) { 3566 throw new OutOfMemoryError("Repeating " + len + " bytes String " + count + 3567 " times will produce a String exceeding maximum size."); 3568 } 3569 final int limit = len * count; 3570 final byte[] multiple = new byte[limit]; 3571 System.arraycopy(value, 0, multiple, 0, len); 3572 int copied = len; 3573 for (; copied < limit - copied; copied <<= 1) { 3574 System.arraycopy(multiple, 0, multiple, copied, copied); 3575 } 3576 System.arraycopy(multiple, 0, multiple, copied, limit - copied); 3577 return new String(multiple, coder); 3578 } 3579 3580 //////////////////////////////////////////////////////////////// 3581 3582 /** 3583 * Copy character bytes from this string into dst starting at dstBegin. 3584 * This method doesn't perform any range checking. 3585 * 3586 * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two 3587 * coders are different, and dst is big enough (range check) 3588 * 3589 * @param dstBegin the char index, not offset of byte[] 3590 * @param coder the coder of dst[] 3591 */ 3592 void getBytes(byte dst[], int dstBegin, byte coder) { 3593 if (coder() == coder) { 3594 System.arraycopy(value, 0, dst, dstBegin << coder, value.length); 3595 } else { // this.coder == LATIN && coder == UTF16 3596 StringLatin1.inflate(value, 0, dst, dstBegin, value.length); 3597 } 3598 } 3599 3600 /* 3601 * Package private constructor. Trailing Void argument is there for 3602 * disambiguating it against other (public) constructors. 3603 * 3604 * Stores the char[] value into a byte[] that each byte represents 3605 * the8 low-order bits of the corresponding character, if the char[] 3606 * contains only latin1 character. Or a byte[] that stores all 3607 * characters in their byte sequences defined by the {@code StringUTF16}. 3608 */ 3609 String(char[] value, int off, int len, Void sig) { 3610 if (len == 0) { 3611 this.value = "".value; 3612 this.coder = "".coder; 3613 return; 3614 } 3615 if (COMPACT_STRINGS) { 3616 byte[] val = StringUTF16.compress(value, off, len); 3617 if (val != null) { 3618 this.value = val; 3619 this.coder = LATIN1; 3620 return; 3621 } 3622 } 3623 this.coder = UTF16; 3624 this.value = StringUTF16.toBytes(value, off, len); 3625 } 3626 3627 /* 3628 * Package private constructor. Trailing Void argument is there for 3629 * disambiguating it against other (public) constructors. 3630 */ 3631 String(AbstractStringBuilder asb, Void sig) { 3632 byte[] val = asb.getValue(); 3633 int length = asb.length(); 3634 if (asb.isLatin1()) { 3635 this.coder = LATIN1; 3636 this.value = Arrays.copyOfRange(val, 0, length); 3637 } else { 3638 if (COMPACT_STRINGS) { 3639 byte[] buf = StringUTF16.compress(val, 0, length); 3640 if (buf != null) { 3641 this.coder = LATIN1; 3642 this.value = buf; 3643 return; 3644 } 3645 } 3646 this.coder = UTF16; 3647 this.value = Arrays.copyOfRange(val, 0, length << 1); 3648 } 3649 } 3650 3651 /* 3652 * Package private constructor which shares value array for speed. 3653 */ 3654 String(byte[] value, byte coder) { 3655 this.value = value; 3656 this.coder = coder; 3657 } 3658 3659 byte coder() { 3660 return COMPACT_STRINGS ? coder : UTF16; 3661 } 3662 3663 byte[] value() { 3664 return value; 3665 } 3666 3667 boolean isLatin1() { 3668 return COMPACT_STRINGS && coder == LATIN1; 3669 } 3670 3671 @Native static final byte LATIN1 = 0; 3672 @Native static final byte UTF16 = 1; 3673 3674 /* 3675 * StringIndexOutOfBoundsException if {@code index} is 3676 * negative or greater than or equal to {@code length}. 3677 */ 3678 static void checkIndex(int index, int length) { 3679 if (index < 0 || index >= length) { 3680 throw new StringIndexOutOfBoundsException("index " + index + 3681 ",length " + length); 3682 } 3683 } 3684 3685 /* 3686 * StringIndexOutOfBoundsException if {@code offset} 3687 * is negative or greater than {@code length}. 3688 */ 3689 static void checkOffset(int offset, int length) { 3690 if (offset < 0 || offset > length) { 3691 throw new StringIndexOutOfBoundsException("offset " + offset + 3692 ",length " + length); 3693 } 3694 } 3695 3696 /* 3697 * Check {@code offset}, {@code count} against {@code 0} and {@code length} 3698 * bounds. 3699 * 3700 * @throws StringIndexOutOfBoundsException 3701 * If {@code offset} is negative, {@code count} is negative, 3702 * or {@code offset} is greater than {@code length - count} 3703 */ 3704 static void checkBoundsOffCount(int offset, int count, int length) { 3705 if (offset < 0 || count < 0 || offset > length - count) { 3706 throw new StringIndexOutOfBoundsException( 3707 "offset " + offset + ", count " + count + ", length " + length); 3708 } 3709 } 3710 3711 /* 3712 * Check {@code begin}, {@code end} against {@code 0} and {@code length} 3713 * bounds. 3714 * 3715 * @throws StringIndexOutOfBoundsException 3716 * If {@code begin} is negative, {@code begin} is greater than 3717 * {@code end}, or {@code end} is greater than {@code length}. 3718 */ 3719 static void checkBoundsBeginEnd(int begin, int end, int length) { 3720 if (begin < 0 || begin > end || end > length) { 3721 throw new StringIndexOutOfBoundsException( 3722 "begin " + begin + ", end " + end + ", length " + length); 3723 } 3724 } 3725 3726 /** 3727 * Returns the string representation of the {@code codePoint} 3728 * argument. 3729 * 3730 * @param codePoint a {@code codePoint}. 3731 * @return a string of length {@code 1} or {@code 2} containing 3732 * as its single character the argument {@code codePoint}. 3733 * @throws IllegalArgumentException if the specified 3734 * {@code codePoint} is not a {@linkplain Character#isValidCodePoint 3735 * valid Unicode code point}. 3736 */ 3737 static String valueOfCodePoint(int codePoint) { 3738 if (COMPACT_STRINGS && StringLatin1.canEncode(codePoint)) { 3739 return new String(StringLatin1.toBytes((char)codePoint), LATIN1); 3740 } else if (Character.isBmpCodePoint(codePoint)) { 3741 return new String(StringUTF16.toBytes((char)codePoint), UTF16); 3742 } else if (Character.isSupplementaryCodePoint(codePoint)) { 3743 return new String(StringUTF16.toBytesSupplementary(codePoint), UTF16); 3744 } 3745 3746 throw new IllegalArgumentException( 3747 format("Not a valid Unicode code point: 0x%X", codePoint)); 3748 } 3749 3750 /** 3751 * Returns an {@link Optional} containing the nominal descriptor for this 3752 * instance, which is the instance itself. 3753 * 3754 * @return an {@link Optional} describing the {@linkplain String} instance 3755 * @since 12 3756 */ 3757 @Override 3758 public Optional<String> describeConstable() { 3759 return Optional.of(this); 3760 } 3761 3762 /** 3763 * Resolves this instance as a {@link ConstantDesc}, the result of which is 3764 * the instance itself. 3765 * 3766 * @param lookup ignored 3767 * @return the {@linkplain String} instance 3768 * @since 12 3769 */ 3770 @Override 3771 public String resolveConstantDesc(MethodHandles.Lookup lookup) { 3772 return this; 3773 } 3774 3775 }