1 /* 2 * Copyright (c) 1994, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.ObjectStreamField; 29 import java.io.UnsupportedEncodingException; 30 import java.lang.annotation.Native; 31 import java.nio.charset.Charset; 32 import java.util.ArrayList; 33 import java.util.Arrays; 34 import java.util.Comparator; 35 import java.util.Formatter; 36 import java.util.Locale; 37 import java.util.Objects; 38 import java.util.Spliterator; 39 import java.util.StringJoiner; 40 import java.util.regex.Matcher; 41 import java.util.regex.Pattern; 42 import java.util.regex.PatternSyntaxException; 43 import java.util.stream.Collectors; 44 import java.util.stream.IntStream; 45 import java.util.stream.Stream; 46 import java.util.stream.StreamSupport; 47 import jdk.internal.HotSpotIntrinsicCandidate; 48 import jdk.internal.vm.annotation.Stable; 49 50 import static java.util.function.Predicate.not; 51 52 /** 53 * The {@code String} class represents character strings. All 54 * string literals in Java programs, such as {@code "abc"}, are 55 * implemented as instances of this class. 56 * <p> 57 * Strings are constant; their values cannot be changed after they 58 * are created. String buffers support mutable strings. 59 * Because String objects are immutable they can be shared. For example: 60 * <blockquote><pre> 61 * String str = "abc"; 62 * </pre></blockquote><p> 63 * is equivalent to: 64 * <blockquote><pre> 65 * char data[] = {'a', 'b', 'c'}; 66 * String str = new String(data); 67 * </pre></blockquote><p> 68 * Here are some more examples of how strings can be used: 69 * <blockquote><pre> 70 * System.out.println("abc"); 71 * String cde = "cde"; 72 * System.out.println("abc" + cde); 73 * String c = "abc".substring(2,3); 74 * String d = cde.substring(1, 2); 75 * </pre></blockquote> 76 * <p> 77 * The class {@code String} includes methods for examining 78 * individual characters of the sequence, for comparing strings, for 79 * searching strings, for extracting substrings, and for creating a 80 * copy of a string with all characters translated to uppercase or to 81 * lowercase. Case mapping is based on the Unicode Standard version 82 * specified by the {@link java.lang.Character Character} class. 83 * <p> 84 * The Java language provides special support for the string 85 * concatenation operator ( + ), and for conversion of 86 * other objects to strings. For additional information on string 87 * concatenation and conversion, see <i>The Java™ Language Specification</i>. 88 * 89 * <p> Unless otherwise noted, passing a {@code null} argument to a constructor 90 * or method in this class will cause a {@link NullPointerException} to be 91 * thrown. 92 * 93 * <p>A {@code String} represents a string in the UTF-16 format 94 * in which <em>supplementary characters</em> are represented by <em>surrogate 95 * pairs</em> (see the section <a href="Character.html#unicode">Unicode 96 * Character Representations</a> in the {@code Character} class for 97 * more information). 98 * Index values refer to {@code char} code units, so a supplementary 99 * character uses two positions in a {@code String}. 100 * <p>The {@code String} class provides methods for dealing with 101 * Unicode code points (i.e., characters), in addition to those for 102 * dealing with Unicode code units (i.e., {@code char} values). 103 * 104 * <p>Unless otherwise noted, methods for comparing Strings do not take locale 105 * into account. The {@link java.text.Collator} class provides methods for 106 * finer-grain, locale-sensitive String comparison. 107 * 108 * @implNote The implementation of the string concatenation operator is left to 109 * the discretion of a Java compiler, as long as the compiler ultimately conforms 110 * to <i>The Java™ Language Specification</i>. For example, the {@code javac} compiler 111 * may implement the operator with {@code StringBuffer}, {@code StringBuilder}, 112 * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The 113 * implementation of string conversion is typically through the method {@code toString}, 114 * defined by {@code Object} and inherited by all classes in Java. 115 * 116 * @author Lee Boynton 117 * @author Arthur van Hoff 118 * @author Martin Buchholz 119 * @author Ulf Zibis 120 * @see java.lang.Object#toString() 121 * @see java.lang.StringBuffer 122 * @see java.lang.StringBuilder 123 * @see java.nio.charset.Charset 124 * @since 1.0 125 * @jls 15.18.1 String Concatenation Operator + 126 */ 127 128 public final class String 129 implements java.io.Serializable, Comparable<String>, CharSequence { 130 131 /** 132 * The value is used for character storage. 133 * 134 * @implNote This field is trusted by the VM, and is a subject to 135 * constant folding if String instance is constant. Overwriting this 136 * field after construction will cause problems. 137 * 138 * Additionally, it is marked with {@link Stable} to trust the contents 139 * of the array. No other facility in JDK provides this functionality (yet). 140 * {@link Stable} is safe here, because value is never null. 141 */ 142 @Stable 143 private final byte[] value; 144 145 /** 146 * The identifier of the encoding used to encode the bytes in 147 * {@code value}. The supported values in this implementation are 148 * 149 * LATIN1 150 * UTF16 151 * 152 * @implNote This field is trusted by the VM, and is a subject to 153 * constant folding if String instance is constant. Overwriting this 154 * field after construction will cause problems. 155 */ 156 private final byte coder; 157 158 /** Cache the hash code for the string */ 159 private int hash; // Default to 0 160 161 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 162 private static final long serialVersionUID = -6849794470754667710L; 163 164 /** 165 * If String compaction is disabled, the bytes in {@code value} are 166 * always encoded in UTF16. 167 * 168 * For methods with several possible implementation paths, when String 169 * compaction is disabled, only one code path is taken. 170 * 171 * The instance field value is generally opaque to optimizing JIT 172 * compilers. Therefore, in performance-sensitive place, an explicit 173 * check of the static boolean {@code COMPACT_STRINGS} is done first 174 * before checking the {@code coder} field since the static boolean 175 * {@code COMPACT_STRINGS} would be constant folded away by an 176 * optimizing JIT compiler. The idioms for these cases are as follows. 177 * 178 * For code such as: 179 * 180 * if (coder == LATIN1) { ... } 181 * 182 * can be written more optimally as 183 * 184 * if (coder() == LATIN1) { ... } 185 * 186 * or: 187 * 188 * if (COMPACT_STRINGS && coder == LATIN1) { ... } 189 * 190 * An optimizing JIT compiler can fold the above conditional as: 191 * 192 * COMPACT_STRINGS == true => if (coder == LATIN1) { ... } 193 * COMPACT_STRINGS == false => if (false) { ... } 194 * 195 * @implNote 196 * The actual value for this field is injected by JVM. The static 197 * initialization block is used to set the value here to communicate 198 * that this static final field is not statically foldable, and to 199 * avoid any possible circular dependency during vm initialization. 200 */ 201 static final boolean COMPACT_STRINGS; 202 203 static { 204 COMPACT_STRINGS = true; 205 } 206 207 /** 208 * Class String is special cased within the Serialization Stream Protocol. 209 * 210 * A String instance is written into an ObjectOutputStream according to 211 * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements"> 212 * Object Serialization Specification, Section 6.2, "Stream Elements"</a> 213 */ 214 private static final ObjectStreamField[] serialPersistentFields = 215 new ObjectStreamField[0]; 216 217 /** 218 * Initializes a newly created {@code String} object so that it represents 219 * an empty character sequence. Note that use of this constructor is 220 * unnecessary since Strings are immutable. 221 */ 222 public String() { 223 this.value = "".value; 224 this.coder = "".coder; 225 } 226 227 /** 228 * Initializes a newly created {@code String} object so that it represents 229 * the same sequence of characters as the argument; in other words, the 230 * newly created string is a copy of the argument string. Unless an 231 * explicit copy of {@code original} is needed, use of this constructor is 232 * unnecessary since Strings are immutable. 233 * 234 * @param original 235 * A {@code String} 236 */ 237 @HotSpotIntrinsicCandidate 238 public String(String original) { 239 this.value = original.value; 240 this.coder = original.coder; 241 this.hash = original.hash; 242 } 243 244 /** 245 * Allocates a new {@code String} so that it represents the sequence of 246 * characters currently contained in the character array argument. The 247 * contents of the character array are copied; subsequent modification of 248 * the character array does not affect the newly created string. 249 * 250 * @param value 251 * The initial value of the string 252 */ 253 public String(char value[]) { 254 this(value, 0, value.length, null); 255 } 256 257 /** 258 * Allocates a new {@code String} that contains characters from a subarray 259 * of the character array argument. The {@code offset} argument is the 260 * index of the first character of the subarray and the {@code count} 261 * argument specifies the length of the subarray. The contents of the 262 * subarray are copied; subsequent modification of the character array does 263 * not affect the newly created string. 264 * 265 * @param value 266 * Array that is the source of characters 267 * 268 * @param offset 269 * The initial offset 270 * 271 * @param count 272 * The length 273 * 274 * @throws IndexOutOfBoundsException 275 * If {@code offset} is negative, {@code count} is negative, or 276 * {@code offset} is greater than {@code value.length - count} 277 */ 278 public String(char value[], int offset, int count) { 279 this(value, offset, count, rangeCheck(value, offset, count)); 280 } 281 282 private static Void rangeCheck(char[] value, int offset, int count) { 283 checkBoundsOffCount(offset, count, value.length); 284 return null; 285 } 286 287 /** 288 * Allocates a new {@code String} that contains characters from a subarray 289 * of the <a href="Character.html#unicode">Unicode code point</a> array 290 * argument. The {@code offset} argument is the index of the first code 291 * point of the subarray and the {@code count} argument specifies the 292 * length of the subarray. The contents of the subarray are converted to 293 * {@code char}s; subsequent modification of the {@code int} array does not 294 * affect the newly created string. 295 * 296 * @param codePoints 297 * Array that is the source of Unicode code points 298 * 299 * @param offset 300 * The initial offset 301 * 302 * @param count 303 * The length 304 * 305 * @throws IllegalArgumentException 306 * If any invalid Unicode code point is found in {@code 307 * codePoints} 308 * 309 * @throws IndexOutOfBoundsException 310 * If {@code offset} is negative, {@code count} is negative, or 311 * {@code offset} is greater than {@code codePoints.length - count} 312 * 313 * @since 1.5 314 */ 315 public String(int[] codePoints, int offset, int count) { 316 checkBoundsOffCount(offset, count, codePoints.length); 317 if (count == 0) { 318 this.value = "".value; 319 this.coder = "".coder; 320 return; 321 } 322 if (COMPACT_STRINGS) { 323 byte[] val = StringLatin1.toBytes(codePoints, offset, count); 324 if (val != null) { 325 this.coder = LATIN1; 326 this.value = val; 327 return; 328 } 329 } 330 this.coder = UTF16; 331 this.value = StringUTF16.toBytes(codePoints, offset, count); 332 } 333 334 /** 335 * Allocates a new {@code String} constructed from a subarray of an array 336 * of 8-bit integer values. 337 * 338 * <p> The {@code offset} argument is the index of the first byte of the 339 * subarray, and the {@code count} argument specifies the length of the 340 * subarray. 341 * 342 * <p> Each {@code byte} in the subarray is converted to a {@code char} as 343 * specified in the {@link #String(byte[],int) String(byte[],int)} constructor. 344 * 345 * @deprecated This method does not properly convert bytes into characters. 346 * As of JDK 1.1, the preferred way to do this is via the 347 * {@code String} constructors that take a {@link 348 * java.nio.charset.Charset}, charset name, or that use the platform's 349 * default charset. 350 * 351 * @param ascii 352 * The bytes to be converted to characters 353 * 354 * @param hibyte 355 * The top 8 bits of each 16-bit Unicode code unit 356 * 357 * @param offset 358 * The initial offset 359 * @param count 360 * The length 361 * 362 * @throws IndexOutOfBoundsException 363 * If {@code offset} is negative, {@code count} is negative, or 364 * {@code offset} is greater than {@code ascii.length - count} 365 * 366 * @see #String(byte[], int) 367 * @see #String(byte[], int, int, java.lang.String) 368 * @see #String(byte[], int, int, java.nio.charset.Charset) 369 * @see #String(byte[], int, int) 370 * @see #String(byte[], java.lang.String) 371 * @see #String(byte[], java.nio.charset.Charset) 372 * @see #String(byte[]) 373 */ 374 @Deprecated(since="1.1") 375 public String(byte ascii[], int hibyte, int offset, int count) { 376 checkBoundsOffCount(offset, count, ascii.length); 377 if (count == 0) { 378 this.value = "".value; 379 this.coder = "".coder; 380 return; 381 } 382 if (COMPACT_STRINGS && (byte)hibyte == 0) { 383 this.value = Arrays.copyOfRange(ascii, offset, offset + count); 384 this.coder = LATIN1; 385 } else { 386 hibyte <<= 8; 387 byte[] val = StringUTF16.newBytesFor(count); 388 for (int i = 0; i < count; i++) { 389 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff)); 390 } 391 this.value = val; 392 this.coder = UTF16; 393 } 394 } 395 396 /** 397 * Allocates a new {@code String} containing characters constructed from 398 * an array of 8-bit integer values. Each character <i>c</i> in the 399 * resulting string is constructed from the corresponding component 400 * <i>b</i> in the byte array such that: 401 * 402 * <blockquote><pre> 403 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8) 404 * | (<b><i>b</i></b> & 0xff)) 405 * </pre></blockquote> 406 * 407 * @deprecated This method does not properly convert bytes into 408 * characters. As of JDK 1.1, the preferred way to do this is via the 409 * {@code String} constructors that take a {@link 410 * java.nio.charset.Charset}, charset name, or that use the platform's 411 * default charset. 412 * 413 * @param ascii 414 * The bytes to be converted to characters 415 * 416 * @param hibyte 417 * The top 8 bits of each 16-bit Unicode code unit 418 * 419 * @see #String(byte[], int, int, java.lang.String) 420 * @see #String(byte[], int, int, java.nio.charset.Charset) 421 * @see #String(byte[], int, int) 422 * @see #String(byte[], java.lang.String) 423 * @see #String(byte[], java.nio.charset.Charset) 424 * @see #String(byte[]) 425 */ 426 @Deprecated(since="1.1") 427 public String(byte ascii[], int hibyte) { 428 this(ascii, hibyte, 0, ascii.length); 429 } 430 431 /** 432 * Constructs a new {@code String} by decoding the specified subarray of 433 * bytes using the specified charset. The length of the new {@code String} 434 * is a function of the charset, and hence may not be equal to the length 435 * of the subarray. 436 * 437 * <p> The behavior of this constructor when the given bytes are not valid 438 * in the given charset is unspecified. The {@link 439 * java.nio.charset.CharsetDecoder} class should be used when more control 440 * over the decoding process is required. 441 * 442 * @param bytes 443 * The bytes to be decoded into characters 444 * 445 * @param offset 446 * The index of the first byte to decode 447 * 448 * @param length 449 * The number of bytes to decode 450 451 * @param charsetName 452 * The name of a supported {@linkplain java.nio.charset.Charset 453 * charset} 454 * 455 * @throws UnsupportedEncodingException 456 * If the named charset is not supported 457 * 458 * @throws IndexOutOfBoundsException 459 * If {@code offset} is negative, {@code length} is negative, or 460 * {@code offset} is greater than {@code bytes.length - length} 461 * 462 * @since 1.1 463 */ 464 public String(byte bytes[], int offset, int length, String charsetName) 465 throws UnsupportedEncodingException { 466 if (charsetName == null) 467 throw new NullPointerException("charsetName"); 468 checkBoundsOffCount(offset, length, bytes.length); 469 StringCoding.Result ret = 470 StringCoding.decode(charsetName, bytes, offset, length); 471 this.value = ret.value; 472 this.coder = ret.coder; 473 } 474 475 /** 476 * Constructs a new {@code String} by decoding the specified subarray of 477 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 478 * The length of the new {@code String} is a function of the charset, and 479 * hence may not be equal to the length of the subarray. 480 * 481 * <p> This method always replaces malformed-input and unmappable-character 482 * sequences with this charset's default replacement string. The {@link 483 * java.nio.charset.CharsetDecoder} class should be used when more control 484 * over the decoding process is required. 485 * 486 * @param bytes 487 * The bytes to be decoded into characters 488 * 489 * @param offset 490 * The index of the first byte to decode 491 * 492 * @param length 493 * The number of bytes to decode 494 * 495 * @param charset 496 * The {@linkplain java.nio.charset.Charset charset} to be used to 497 * decode the {@code bytes} 498 * 499 * @throws IndexOutOfBoundsException 500 * If {@code offset} is negative, {@code length} is negative, or 501 * {@code offset} is greater than {@code bytes.length - length} 502 * 503 * @since 1.6 504 */ 505 public String(byte bytes[], int offset, int length, Charset charset) { 506 if (charset == null) 507 throw new NullPointerException("charset"); 508 checkBoundsOffCount(offset, length, bytes.length); 509 StringCoding.Result ret = 510 StringCoding.decode(charset, bytes, offset, length); 511 this.value = ret.value; 512 this.coder = ret.coder; 513 } 514 515 /** 516 * Constructs a new {@code String} by decoding the specified array of bytes 517 * using the specified {@linkplain java.nio.charset.Charset charset}. The 518 * length of the new {@code String} is a function of the charset, and hence 519 * may not be equal to the length of the byte array. 520 * 521 * <p> The behavior of this constructor when the given bytes are not valid 522 * in the given charset is unspecified. The {@link 523 * java.nio.charset.CharsetDecoder} class should be used when more control 524 * over the decoding process is required. 525 * 526 * @param bytes 527 * The bytes to be decoded into characters 528 * 529 * @param charsetName 530 * The name of a supported {@linkplain java.nio.charset.Charset 531 * charset} 532 * 533 * @throws UnsupportedEncodingException 534 * If the named charset is not supported 535 * 536 * @since 1.1 537 */ 538 public String(byte bytes[], String charsetName) 539 throws UnsupportedEncodingException { 540 this(bytes, 0, bytes.length, charsetName); 541 } 542 543 /** 544 * Constructs a new {@code String} by decoding the specified array of 545 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 546 * The length of the new {@code String} is a function of the charset, and 547 * hence may not be equal to the length of the byte array. 548 * 549 * <p> This method always replaces malformed-input and unmappable-character 550 * sequences with this charset's default replacement string. The {@link 551 * java.nio.charset.CharsetDecoder} class should be used when more control 552 * over the decoding process is required. 553 * 554 * @param bytes 555 * The bytes to be decoded into characters 556 * 557 * @param charset 558 * The {@linkplain java.nio.charset.Charset charset} to be used to 559 * decode the {@code bytes} 560 * 561 * @since 1.6 562 */ 563 public String(byte bytes[], Charset charset) { 564 this(bytes, 0, bytes.length, charset); 565 } 566 567 /** 568 * Constructs a new {@code String} by decoding the specified subarray of 569 * bytes using the platform's default charset. The length of the new 570 * {@code String} is a function of the charset, and hence may not be equal 571 * to the length of the subarray. 572 * 573 * <p> The behavior of this constructor when the given bytes are not valid 574 * in the default charset is unspecified. The {@link 575 * java.nio.charset.CharsetDecoder} class should be used when more control 576 * over the decoding process is required. 577 * 578 * @param bytes 579 * The bytes to be decoded into characters 580 * 581 * @param offset 582 * The index of the first byte to decode 583 * 584 * @param length 585 * The number of bytes to decode 586 * 587 * @throws IndexOutOfBoundsException 588 * If {@code offset} is negative, {@code length} is negative, or 589 * {@code offset} is greater than {@code bytes.length - length} 590 * 591 * @since 1.1 592 */ 593 public String(byte bytes[], int offset, int length) { 594 checkBoundsOffCount(offset, length, bytes.length); 595 StringCoding.Result ret = StringCoding.decode(bytes, offset, length); 596 this.value = ret.value; 597 this.coder = ret.coder; 598 } 599 600 /** 601 * Constructs a new {@code String} by decoding the specified array of bytes 602 * using the platform's default charset. The length of the new {@code 603 * String} is a function of the charset, and hence may not be equal to the 604 * length of the byte array. 605 * 606 * <p> The behavior of this constructor when the given bytes are not valid 607 * in the default charset is unspecified. The {@link 608 * java.nio.charset.CharsetDecoder} class should be used when more control 609 * over the decoding process is required. 610 * 611 * @param bytes 612 * The bytes to be decoded into characters 613 * 614 * @since 1.1 615 */ 616 public String(byte[] bytes) { 617 this(bytes, 0, bytes.length); 618 } 619 620 /** 621 * Allocates a new string that contains the sequence of characters 622 * currently contained in the string buffer argument. The contents of the 623 * string buffer are copied; subsequent modification of the string buffer 624 * does not affect the newly created string. 625 * 626 * @param buffer 627 * A {@code StringBuffer} 628 */ 629 public String(StringBuffer buffer) { 630 this(buffer.toString()); 631 } 632 633 /** 634 * Allocates a new string that contains the sequence of characters 635 * currently contained in the string builder argument. The contents of the 636 * string builder are copied; subsequent modification of the string builder 637 * does not affect the newly created string. 638 * 639 * <p> This constructor is provided to ease migration to {@code 640 * StringBuilder}. Obtaining a string from a string builder via the {@code 641 * toString} method is likely to run faster and is generally preferred. 642 * 643 * @param builder 644 * A {@code StringBuilder} 645 * 646 * @since 1.5 647 */ 648 public String(StringBuilder builder) { 649 this(builder, null); 650 } 651 652 /** 653 * Returns the length of this string. 654 * The length is equal to the number of <a href="Character.html#unicode">Unicode 655 * code units</a> in the string. 656 * 657 * @return the length of the sequence of characters represented by this 658 * object. 659 */ 660 public int length() { 661 return value.length >> coder(); 662 } 663 664 /** 665 * Returns {@code true} if, and only if, {@link #length()} is {@code 0}. 666 * 667 * @return {@code true} if {@link #length()} is {@code 0}, otherwise 668 * {@code false} 669 * 670 * @since 1.6 671 */ 672 public boolean isEmpty() { 673 return value.length == 0; 674 } 675 676 /** 677 * Returns the {@code char} value at the 678 * specified index. An index ranges from {@code 0} to 679 * {@code length() - 1}. The first {@code char} value of the sequence 680 * is at index {@code 0}, the next at index {@code 1}, 681 * and so on, as for array indexing. 682 * 683 * <p>If the {@code char} value specified by the index is a 684 * <a href="Character.html#unicode">surrogate</a>, the surrogate 685 * value is returned. 686 * 687 * @param index the index of the {@code char} value. 688 * @return the {@code char} value at the specified index of this string. 689 * The first {@code char} value is at index {@code 0}. 690 * @exception IndexOutOfBoundsException if the {@code index} 691 * argument is negative or not less than the length of this 692 * string. 693 */ 694 public char charAt(int index) { 695 if (isLatin1()) { 696 return StringLatin1.charAt(value, index); 697 } else { 698 return StringUTF16.charAt(value, index); 699 } 700 } 701 702 /** 703 * Returns the character (Unicode code point) at the specified 704 * index. The index refers to {@code char} values 705 * (Unicode code units) and ranges from {@code 0} to 706 * {@link #length()}{@code - 1}. 707 * 708 * <p> If the {@code char} value specified at the given index 709 * is in the high-surrogate range, the following index is less 710 * than the length of this {@code String}, and the 711 * {@code char} value at the following index is in the 712 * low-surrogate range, then the supplementary code point 713 * corresponding to this surrogate pair is returned. Otherwise, 714 * the {@code char} value at the given index is returned. 715 * 716 * @param index the index to the {@code char} values 717 * @return the code point value of the character at the 718 * {@code index} 719 * @exception IndexOutOfBoundsException if the {@code index} 720 * argument is negative or not less than the length of this 721 * string. 722 * @since 1.5 723 */ 724 public int codePointAt(int index) { 725 if (isLatin1()) { 726 checkIndex(index, value.length); 727 return value[index] & 0xff; 728 } 729 int length = value.length >> 1; 730 checkIndex(index, length); 731 return StringUTF16.codePointAt(value, index, length); 732 } 733 734 /** 735 * Returns the character (Unicode code point) before the specified 736 * index. The index refers to {@code char} values 737 * (Unicode code units) and ranges from {@code 1} to {@link 738 * CharSequence#length() length}. 739 * 740 * <p> If the {@code char} value at {@code (index - 1)} 741 * is in the low-surrogate range, {@code (index - 2)} is not 742 * negative, and the {@code char} value at {@code (index - 743 * 2)} is in the high-surrogate range, then the 744 * supplementary code point value of the surrogate pair is 745 * returned. If the {@code char} value at {@code index - 746 * 1} is an unpaired low-surrogate or a high-surrogate, the 747 * surrogate value is returned. 748 * 749 * @param index the index following the code point that should be returned 750 * @return the Unicode code point value before the given index. 751 * @exception IndexOutOfBoundsException if the {@code index} 752 * argument is less than 1 or greater than the length 753 * of this string. 754 * @since 1.5 755 */ 756 public int codePointBefore(int index) { 757 int i = index - 1; 758 if (i < 0 || i >= length()) { 759 throw new StringIndexOutOfBoundsException(index); 760 } 761 if (isLatin1()) { 762 return (value[i] & 0xff); 763 } 764 return StringUTF16.codePointBefore(value, index); 765 } 766 767 /** 768 * Returns the number of Unicode code points in the specified text 769 * range of this {@code String}. The text range begins at the 770 * specified {@code beginIndex} and extends to the 771 * {@code char} at index {@code endIndex - 1}. Thus the 772 * length (in {@code char}s) of the text range is 773 * {@code endIndex-beginIndex}. Unpaired surrogates within 774 * the text range count as one code point each. 775 * 776 * @param beginIndex the index to the first {@code char} of 777 * the text range. 778 * @param endIndex the index after the last {@code char} of 779 * the text range. 780 * @return the number of Unicode code points in the specified text 781 * range 782 * @exception IndexOutOfBoundsException if the 783 * {@code beginIndex} is negative, or {@code endIndex} 784 * is larger than the length of this {@code String}, or 785 * {@code beginIndex} is larger than {@code endIndex}. 786 * @since 1.5 787 */ 788 public int codePointCount(int beginIndex, int endIndex) { 789 if (beginIndex < 0 || beginIndex > endIndex || 790 endIndex > length()) { 791 throw new IndexOutOfBoundsException(); 792 } 793 if (isLatin1()) { 794 return endIndex - beginIndex; 795 } 796 return StringUTF16.codePointCount(value, beginIndex, endIndex); 797 } 798 799 /** 800 * Returns the index within this {@code String} that is 801 * offset from the given {@code index} by 802 * {@code codePointOffset} code points. Unpaired surrogates 803 * within the text range given by {@code index} and 804 * {@code codePointOffset} count as one code point each. 805 * 806 * @param index the index to be offset 807 * @param codePointOffset the offset in code points 808 * @return the index within this {@code String} 809 * @exception IndexOutOfBoundsException if {@code index} 810 * is negative or larger then the length of this 811 * {@code String}, or if {@code codePointOffset} is positive 812 * and the substring starting with {@code index} has fewer 813 * than {@code codePointOffset} code points, 814 * or if {@code codePointOffset} is negative and the substring 815 * before {@code index} has fewer than the absolute value 816 * of {@code codePointOffset} code points. 817 * @since 1.5 818 */ 819 public int offsetByCodePoints(int index, int codePointOffset) { 820 if (index < 0 || index > length()) { 821 throw new IndexOutOfBoundsException(); 822 } 823 return Character.offsetByCodePoints(this, index, codePointOffset); 824 } 825 826 /** 827 * Copies characters from this string into the destination character 828 * array. 829 * <p> 830 * The first character to be copied is at index {@code srcBegin}; 831 * the last character to be copied is at index {@code srcEnd-1} 832 * (thus the total number of characters to be copied is 833 * {@code srcEnd-srcBegin}). The characters are copied into the 834 * subarray of {@code dst} starting at index {@code dstBegin} 835 * and ending at index: 836 * <blockquote><pre> 837 * dstBegin + (srcEnd-srcBegin) - 1 838 * </pre></blockquote> 839 * 840 * @param srcBegin index of the first character in the string 841 * to copy. 842 * @param srcEnd index after the last character in the string 843 * to copy. 844 * @param dst the destination array. 845 * @param dstBegin the start offset in the destination array. 846 * @exception IndexOutOfBoundsException If any of the following 847 * is true: 848 * <ul><li>{@code srcBegin} is negative. 849 * <li>{@code srcBegin} is greater than {@code srcEnd} 850 * <li>{@code srcEnd} is greater than the length of this 851 * string 852 * <li>{@code dstBegin} is negative 853 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than 854 * {@code dst.length}</ul> 855 */ 856 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) { 857 checkBoundsBeginEnd(srcBegin, srcEnd, length()); 858 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); 859 if (isLatin1()) { 860 StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin); 861 } else { 862 StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin); 863 } 864 } 865 866 /** 867 * Copies characters from this string into the destination byte array. Each 868 * byte receives the 8 low-order bits of the corresponding character. The 869 * eight high-order bits of each character are not copied and do not 870 * participate in the transfer in any way. 871 * 872 * <p> The first character to be copied is at index {@code srcBegin}; the 873 * last character to be copied is at index {@code srcEnd-1}. The total 874 * number of characters to be copied is {@code srcEnd-srcBegin}. The 875 * characters, converted to bytes, are copied into the subarray of {@code 876 * dst} starting at index {@code dstBegin} and ending at index: 877 * 878 * <blockquote><pre> 879 * dstBegin + (srcEnd-srcBegin) - 1 880 * </pre></blockquote> 881 * 882 * @deprecated This method does not properly convert characters into 883 * bytes. As of JDK 1.1, the preferred way to do this is via the 884 * {@link #getBytes()} method, which uses the platform's default charset. 885 * 886 * @param srcBegin 887 * Index of the first character in the string to copy 888 * 889 * @param srcEnd 890 * Index after the last character in the string to copy 891 * 892 * @param dst 893 * The destination array 894 * 895 * @param dstBegin 896 * The start offset in the destination array 897 * 898 * @throws IndexOutOfBoundsException 899 * If any of the following is true: 900 * <ul> 901 * <li> {@code srcBegin} is negative 902 * <li> {@code srcBegin} is greater than {@code srcEnd} 903 * <li> {@code srcEnd} is greater than the length of this String 904 * <li> {@code dstBegin} is negative 905 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code 906 * dst.length} 907 * </ul> 908 */ 909 @Deprecated(since="1.1") 910 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) { 911 checkBoundsBeginEnd(srcBegin, srcEnd, length()); 912 Objects.requireNonNull(dst); 913 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); 914 if (isLatin1()) { 915 StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin); 916 } else { 917 StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin); 918 } 919 } 920 921 /** 922 * Encodes this {@code String} into a sequence of bytes using the named 923 * charset, storing the result into a new byte array. 924 * 925 * <p> The behavior of this method when this string cannot be encoded in 926 * the given charset is unspecified. The {@link 927 * java.nio.charset.CharsetEncoder} class should be used when more control 928 * over the encoding process is required. 929 * 930 * @param charsetName 931 * The name of a supported {@linkplain java.nio.charset.Charset 932 * charset} 933 * 934 * @return The resultant byte array 935 * 936 * @throws UnsupportedEncodingException 937 * If the named charset is not supported 938 * 939 * @since 1.1 940 */ 941 public byte[] getBytes(String charsetName) 942 throws UnsupportedEncodingException { 943 if (charsetName == null) throw new NullPointerException(); 944 return StringCoding.encode(charsetName, coder(), value); 945 } 946 947 /** 948 * Encodes this {@code String} into a sequence of bytes using the given 949 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 950 * new byte array. 951 * 952 * <p> This method always replaces malformed-input and unmappable-character 953 * sequences with this charset's default replacement byte array. The 954 * {@link java.nio.charset.CharsetEncoder} class should be used when more 955 * control over the encoding process is required. 956 * 957 * @param charset 958 * The {@linkplain java.nio.charset.Charset} to be used to encode 959 * the {@code String} 960 * 961 * @return The resultant byte array 962 * 963 * @since 1.6 964 */ 965 public byte[] getBytes(Charset charset) { 966 if (charset == null) throw new NullPointerException(); 967 return StringCoding.encode(charset, coder(), value); 968 } 969 970 /** 971 * Encodes this {@code String} into a sequence of bytes using the 972 * platform's default charset, storing the result into a new byte array. 973 * 974 * <p> The behavior of this method when this string cannot be encoded in 975 * the default charset is unspecified. The {@link 976 * java.nio.charset.CharsetEncoder} class should be used when more control 977 * over the encoding process is required. 978 * 979 * @return The resultant byte array 980 * 981 * @since 1.1 982 */ 983 public byte[] getBytes() { 984 return StringCoding.encode(coder(), value); 985 } 986 987 /** 988 * Compares this string to the specified object. The result is {@code 989 * true} if and only if the argument is not {@code null} and is a {@code 990 * String} object that represents the same sequence of characters as this 991 * object. 992 * 993 * <p>For finer-grained String comparison, refer to 994 * {@link java.text.Collator}. 995 * 996 * @param anObject 997 * The object to compare this {@code String} against 998 * 999 * @return {@code true} if the given object represents a {@code String} 1000 * equivalent to this string, {@code false} otherwise 1001 * 1002 * @see #compareTo(String) 1003 * @see #equalsIgnoreCase(String) 1004 */ 1005 public boolean equals(Object anObject) { 1006 if (this == anObject) { 1007 return true; 1008 } 1009 if (anObject instanceof String) { 1010 String aString = (String)anObject; 1011 if (coder() == aString.coder()) { 1012 return isLatin1() ? StringLatin1.equals(value, aString.value) 1013 : StringUTF16.equals(value, aString.value); 1014 } 1015 } 1016 return false; 1017 } 1018 1019 /** 1020 * Compares this string to the specified {@code StringBuffer}. The result 1021 * is {@code true} if and only if this {@code String} represents the same 1022 * sequence of characters as the specified {@code StringBuffer}. This method 1023 * synchronizes on the {@code StringBuffer}. 1024 * 1025 * <p>For finer-grained String comparison, refer to 1026 * {@link java.text.Collator}. 1027 * 1028 * @param sb 1029 * The {@code StringBuffer} to compare this {@code String} against 1030 * 1031 * @return {@code true} if this {@code String} represents the same 1032 * sequence of characters as the specified {@code StringBuffer}, 1033 * {@code false} otherwise 1034 * 1035 * @since 1.4 1036 */ 1037 public boolean contentEquals(StringBuffer sb) { 1038 return contentEquals((CharSequence)sb); 1039 } 1040 1041 private boolean nonSyncContentEquals(AbstractStringBuilder sb) { 1042 int len = length(); 1043 if (len != sb.length()) { 1044 return false; 1045 } 1046 byte v1[] = value; 1047 byte v2[] = sb.getValue(); 1048 if (coder() == sb.getCoder()) { 1049 int n = v1.length; 1050 for (int i = 0; i < n; i++) { 1051 if (v1[i] != v2[i]) { 1052 return false; 1053 } 1054 } 1055 } else { 1056 if (!isLatin1()) { // utf16 str and latin1 abs can never be "equal" 1057 return false; 1058 } 1059 return StringUTF16.contentEquals(v1, v2, len); 1060 } 1061 return true; 1062 } 1063 1064 /** 1065 * Compares this string to the specified {@code CharSequence}. The 1066 * result is {@code true} if and only if this {@code String} represents the 1067 * same sequence of char values as the specified sequence. Note that if the 1068 * {@code CharSequence} is a {@code StringBuffer} then the method 1069 * synchronizes on it. 1070 * 1071 * <p>For finer-grained String comparison, refer to 1072 * {@link java.text.Collator}. 1073 * 1074 * @param cs 1075 * The sequence to compare this {@code String} against 1076 * 1077 * @return {@code true} if this {@code String} represents the same 1078 * sequence of char values as the specified sequence, {@code 1079 * false} otherwise 1080 * 1081 * @since 1.5 1082 */ 1083 public boolean contentEquals(CharSequence cs) { 1084 // Argument is a StringBuffer, StringBuilder 1085 if (cs instanceof AbstractStringBuilder) { 1086 if (cs instanceof StringBuffer) { 1087 synchronized(cs) { 1088 return nonSyncContentEquals((AbstractStringBuilder)cs); 1089 } 1090 } else { 1091 return nonSyncContentEquals((AbstractStringBuilder)cs); 1092 } 1093 } 1094 // Argument is a String 1095 if (cs instanceof String) { 1096 return equals(cs); 1097 } 1098 // Argument is a generic CharSequence 1099 int n = cs.length(); 1100 if (n != length()) { 1101 return false; 1102 } 1103 byte[] val = this.value; 1104 if (isLatin1()) { 1105 for (int i = 0; i < n; i++) { 1106 if ((val[i] & 0xff) != cs.charAt(i)) { 1107 return false; 1108 } 1109 } 1110 } else { 1111 if (!StringUTF16.contentEquals(val, cs, n)) { 1112 return false; 1113 } 1114 } 1115 return true; 1116 } 1117 1118 /** 1119 * Compares this {@code String} to another {@code String}, ignoring case 1120 * considerations. Two strings are considered equal ignoring case if they 1121 * are of the same length and corresponding characters in the two strings 1122 * are equal ignoring case. 1123 * 1124 * <p> Two characters {@code c1} and {@code c2} are considered the same 1125 * ignoring case if at least one of the following is true: 1126 * <ul> 1127 * <li> The two characters are the same (as compared by the 1128 * {@code ==} operator) 1129 * <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))} 1130 * on each character produces the same result 1131 * </ul> 1132 * 1133 * <p>Note that this method does <em>not</em> take locale into account, and 1134 * will result in unsatisfactory results for certain locales. The 1135 * {@link java.text.Collator} class provides locale-sensitive comparison. 1136 * 1137 * @param anotherString 1138 * The {@code String} to compare this {@code String} against 1139 * 1140 * @return {@code true} if the argument is not {@code null} and it 1141 * represents an equivalent {@code String} ignoring case; {@code 1142 * false} otherwise 1143 * 1144 * @see #equals(Object) 1145 */ 1146 public boolean equalsIgnoreCase(String anotherString) { 1147 return (this == anotherString) ? true 1148 : (anotherString != null) 1149 && (anotherString.length() == length()) 1150 && regionMatches(true, 0, anotherString, 0, length()); 1151 } 1152 1153 /** 1154 * Compares two strings lexicographically. 1155 * The comparison is based on the Unicode value of each character in 1156 * the strings. The character sequence represented by this 1157 * {@code String} object is compared lexicographically to the 1158 * character sequence represented by the argument string. The result is 1159 * a negative integer if this {@code String} object 1160 * lexicographically precedes the argument string. The result is a 1161 * positive integer if this {@code String} object lexicographically 1162 * follows the argument string. The result is zero if the strings 1163 * are equal; {@code compareTo} returns {@code 0} exactly when 1164 * the {@link #equals(Object)} method would return {@code true}. 1165 * <p> 1166 * This is the definition of lexicographic ordering. If two strings are 1167 * different, then either they have different characters at some index 1168 * that is a valid index for both strings, or their lengths are different, 1169 * or both. If they have different characters at one or more index 1170 * positions, let <i>k</i> be the smallest such index; then the string 1171 * whose character at position <i>k</i> has the smaller value, as 1172 * determined by using the {@code <} operator, lexicographically precedes the 1173 * other string. In this case, {@code compareTo} returns the 1174 * difference of the two character values at position {@code k} in 1175 * the two string -- that is, the value: 1176 * <blockquote><pre> 1177 * this.charAt(k)-anotherString.charAt(k) 1178 * </pre></blockquote> 1179 * If there is no index position at which they differ, then the shorter 1180 * string lexicographically precedes the longer string. In this case, 1181 * {@code compareTo} returns the difference of the lengths of the 1182 * strings -- that is, the value: 1183 * <blockquote><pre> 1184 * this.length()-anotherString.length() 1185 * </pre></blockquote> 1186 * 1187 * <p>For finer-grained String comparison, refer to 1188 * {@link java.text.Collator}. 1189 * 1190 * @param anotherString the {@code String} to be compared. 1191 * @return the value {@code 0} if the argument string is equal to 1192 * this string; a value less than {@code 0} if this string 1193 * is lexicographically less than the string argument; and a 1194 * value greater than {@code 0} if this string is 1195 * lexicographically greater than the string argument. 1196 */ 1197 public int compareTo(String anotherString) { 1198 byte v1[] = value; 1199 byte v2[] = anotherString.value; 1200 if (coder() == anotherString.coder()) { 1201 return isLatin1() ? StringLatin1.compareTo(v1, v2) 1202 : StringUTF16.compareTo(v1, v2); 1203 } 1204 return isLatin1() ? StringLatin1.compareToUTF16(v1, v2) 1205 : StringUTF16.compareToLatin1(v1, v2); 1206 } 1207 1208 /** 1209 * A Comparator that orders {@code String} objects as by 1210 * {@code compareToIgnoreCase}. This comparator is serializable. 1211 * <p> 1212 * Note that this Comparator does <em>not</em> take locale into account, 1213 * and will result in an unsatisfactory ordering for certain locales. 1214 * The {@link java.text.Collator} class provides locale-sensitive comparison. 1215 * 1216 * @see java.text.Collator 1217 * @since 1.2 1218 */ 1219 public static final Comparator<String> CASE_INSENSITIVE_ORDER 1220 = new CaseInsensitiveComparator(); 1221 private static class CaseInsensitiveComparator 1222 implements Comparator<String>, java.io.Serializable { 1223 // use serialVersionUID from JDK 1.2.2 for interoperability 1224 private static final long serialVersionUID = 8575799808933029326L; 1225 1226 public int compare(String s1, String s2) { 1227 byte v1[] = s1.value; 1228 byte v2[] = s2.value; 1229 if (s1.coder() == s2.coder()) { 1230 return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2) 1231 : StringUTF16.compareToCI(v1, v2); 1232 } 1233 return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2) 1234 : StringUTF16.compareToCI_Latin1(v1, v2); 1235 } 1236 1237 /** Replaces the de-serialized object. */ 1238 private Object readResolve() { return CASE_INSENSITIVE_ORDER; } 1239 } 1240 1241 /** 1242 * Compares two strings lexicographically, ignoring case 1243 * differences. This method returns an integer whose sign is that of 1244 * calling {@code compareTo} with normalized versions of the strings 1245 * where case differences have been eliminated by calling 1246 * {@code Character.toLowerCase(Character.toUpperCase(character))} on 1247 * each character. 1248 * <p> 1249 * Note that this method does <em>not</em> take locale into account, 1250 * and will result in an unsatisfactory ordering for certain locales. 1251 * The {@link java.text.Collator} class provides locale-sensitive comparison. 1252 * 1253 * @param str the {@code String} to be compared. 1254 * @return a negative integer, zero, or a positive integer as the 1255 * specified String is greater than, equal to, or less 1256 * than this String, ignoring case considerations. 1257 * @see java.text.Collator 1258 * @since 1.2 1259 */ 1260 public int compareToIgnoreCase(String str) { 1261 return CASE_INSENSITIVE_ORDER.compare(this, str); 1262 } 1263 1264 /** 1265 * Tests if two string regions are equal. 1266 * <p> 1267 * A substring of this {@code String} object is compared to a substring 1268 * of the argument other. The result is true if these substrings 1269 * represent identical character sequences. The substring of this 1270 * {@code String} object to be compared begins at index {@code toffset} 1271 * and has length {@code len}. The substring of other to be compared 1272 * begins at index {@code ooffset} and has length {@code len}. The 1273 * result is {@code false} if and only if at least one of the following 1274 * is true: 1275 * <ul><li>{@code toffset} is negative. 1276 * <li>{@code ooffset} is negative. 1277 * <li>{@code toffset+len} is greater than the length of this 1278 * {@code String} object. 1279 * <li>{@code ooffset+len} is greater than the length of the other 1280 * argument. 1281 * <li>There is some nonnegative integer <i>k</i> less than {@code len} 1282 * such that: 1283 * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + } 1284 * <i>k</i>{@code )} 1285 * </ul> 1286 * 1287 * <p>Note that this method does <em>not</em> take locale into account. The 1288 * {@link java.text.Collator} class provides locale-sensitive comparison. 1289 * 1290 * @param toffset the starting offset of the subregion in this string. 1291 * @param other the string argument. 1292 * @param ooffset the starting offset of the subregion in the string 1293 * argument. 1294 * @param len the number of characters to compare. 1295 * @return {@code true} if the specified subregion of this string 1296 * exactly matches the specified subregion of the string argument; 1297 * {@code false} otherwise. 1298 */ 1299 public boolean regionMatches(int toffset, String other, int ooffset, int len) { 1300 byte tv[] = value; 1301 byte ov[] = other.value; 1302 // Note: toffset, ooffset, or len might be near -1>>>1. 1303 if ((ooffset < 0) || (toffset < 0) || 1304 (toffset > (long)length() - len) || 1305 (ooffset > (long)other.length() - len)) { 1306 return false; 1307 } 1308 if (coder() == other.coder()) { 1309 if (!isLatin1() && (len > 0)) { 1310 toffset = toffset << 1; 1311 ooffset = ooffset << 1; 1312 len = len << 1; 1313 } 1314 while (len-- > 0) { 1315 if (tv[toffset++] != ov[ooffset++]) { 1316 return false; 1317 } 1318 } 1319 } else { 1320 if (coder() == LATIN1) { 1321 while (len-- > 0) { 1322 if (StringLatin1.getChar(tv, toffset++) != 1323 StringUTF16.getChar(ov, ooffset++)) { 1324 return false; 1325 } 1326 } 1327 } else { 1328 while (len-- > 0) { 1329 if (StringUTF16.getChar(tv, toffset++) != 1330 StringLatin1.getChar(ov, ooffset++)) { 1331 return false; 1332 } 1333 } 1334 } 1335 } 1336 return true; 1337 } 1338 1339 /** 1340 * Tests if two string regions are equal. 1341 * <p> 1342 * A substring of this {@code String} object is compared to a substring 1343 * of the argument {@code other}. The result is {@code true} if these 1344 * substrings represent character sequences that are the same, ignoring 1345 * case if and only if {@code ignoreCase} is true. The substring of 1346 * this {@code String} object to be compared begins at index 1347 * {@code toffset} and has length {@code len}. The substring of 1348 * {@code other} to be compared begins at index {@code ooffset} and 1349 * has length {@code len}. The result is {@code false} if and only if 1350 * at least one of the following is true: 1351 * <ul><li>{@code toffset} is negative. 1352 * <li>{@code ooffset} is negative. 1353 * <li>{@code toffset+len} is greater than the length of this 1354 * {@code String} object. 1355 * <li>{@code ooffset+len} is greater than the length of the other 1356 * argument. 1357 * <li>{@code ignoreCase} is {@code false} and there is some nonnegative 1358 * integer <i>k</i> less than {@code len} such that: 1359 * <blockquote><pre> 1360 * this.charAt(toffset+k) != other.charAt(ooffset+k) 1361 * </pre></blockquote> 1362 * <li>{@code ignoreCase} is {@code true} and there is some nonnegative 1363 * integer <i>k</i> less than {@code len} such that: 1364 * <blockquote><pre> 1365 * Character.toLowerCase(Character.toUpperCase(this.charAt(toffset+k))) != 1366 Character.toLowerCase(Character.toUpperCase(other.charAt(ooffset+k))) 1367 * </pre></blockquote> 1368 * </ul> 1369 * 1370 * <p>Note that this method does <em>not</em> take locale into account, 1371 * and will result in unsatisfactory results for certain locales when 1372 * {@code ignoreCase} is {@code true}. The {@link java.text.Collator} class 1373 * provides locale-sensitive comparison. 1374 * 1375 * @param ignoreCase if {@code true}, ignore case when comparing 1376 * characters. 1377 * @param toffset the starting offset of the subregion in this 1378 * string. 1379 * @param other the string argument. 1380 * @param ooffset the starting offset of the subregion in the string 1381 * argument. 1382 * @param len the number of characters to compare. 1383 * @return {@code true} if the specified subregion of this string 1384 * matches the specified subregion of the string argument; 1385 * {@code false} otherwise. Whether the matching is exact 1386 * or case insensitive depends on the {@code ignoreCase} 1387 * argument. 1388 */ 1389 public boolean regionMatches(boolean ignoreCase, int toffset, 1390 String other, int ooffset, int len) { 1391 if (!ignoreCase) { 1392 return regionMatches(toffset, other, ooffset, len); 1393 } 1394 // Note: toffset, ooffset, or len might be near -1>>>1. 1395 if ((ooffset < 0) || (toffset < 0) 1396 || (toffset > (long)length() - len) 1397 || (ooffset > (long)other.length() - len)) { 1398 return false; 1399 } 1400 byte tv[] = value; 1401 byte ov[] = other.value; 1402 if (coder() == other.coder()) { 1403 return isLatin1() 1404 ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len) 1405 : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len); 1406 } 1407 return isLatin1() 1408 ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len) 1409 : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len); 1410 } 1411 1412 /** 1413 * Tests if the substring of this string beginning at the 1414 * specified index starts with the specified prefix. 1415 * 1416 * @param prefix the prefix. 1417 * @param toffset where to begin looking in this string. 1418 * @return {@code true} if the character sequence represented by the 1419 * argument is a prefix of the substring of this object starting 1420 * at index {@code toffset}; {@code false} otherwise. 1421 * The result is {@code false} if {@code toffset} is 1422 * negative or greater than the length of this 1423 * {@code String} object; otherwise the result is the same 1424 * as the result of the expression 1425 * <pre> 1426 * this.substring(toffset).startsWith(prefix) 1427 * </pre> 1428 */ 1429 public boolean startsWith(String prefix, int toffset) { 1430 // Note: toffset might be near -1>>>1. 1431 if (toffset < 0 || toffset > length() - prefix.length()) { 1432 return false; 1433 } 1434 byte ta[] = value; 1435 byte pa[] = prefix.value; 1436 int po = 0; 1437 int pc = pa.length; 1438 if (coder() == prefix.coder()) { 1439 int to = isLatin1() ? toffset : toffset << 1; 1440 while (po < pc) { 1441 if (ta[to++] != pa[po++]) { 1442 return false; 1443 } 1444 } 1445 } else { 1446 if (isLatin1()) { // && pcoder == UTF16 1447 return false; 1448 } 1449 // coder == UTF16 && pcoder == LATIN1) 1450 while (po < pc) { 1451 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) { 1452 return false; 1453 } 1454 } 1455 } 1456 return true; 1457 } 1458 1459 /** 1460 * Tests if this string starts with the specified prefix. 1461 * 1462 * @param prefix the prefix. 1463 * @return {@code true} if the character sequence represented by the 1464 * argument is a prefix of the character sequence represented by 1465 * this string; {@code false} otherwise. 1466 * Note also that {@code true} will be returned if the 1467 * argument is an empty string or is equal to this 1468 * {@code String} object as determined by the 1469 * {@link #equals(Object)} method. 1470 * @since 1.0 1471 */ 1472 public boolean startsWith(String prefix) { 1473 return startsWith(prefix, 0); 1474 } 1475 1476 /** 1477 * Tests if this string ends with the specified suffix. 1478 * 1479 * @param suffix the suffix. 1480 * @return {@code true} if the character sequence represented by the 1481 * argument is a suffix of the character sequence represented by 1482 * this object; {@code false} otherwise. Note that the 1483 * result will be {@code true} if the argument is the 1484 * empty string or is equal to this {@code String} object 1485 * as determined by the {@link #equals(Object)} method. 1486 */ 1487 public boolean endsWith(String suffix) { 1488 return startsWith(suffix, length() - suffix.length()); 1489 } 1490 1491 /** 1492 * Returns a hash code for this string. The hash code for a 1493 * {@code String} object is computed as 1494 * <blockquote><pre> 1495 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] 1496 * </pre></blockquote> 1497 * using {@code int} arithmetic, where {@code s[i]} is the 1498 * <i>i</i>th character of the string, {@code n} is the length of 1499 * the string, and {@code ^} indicates exponentiation. 1500 * (The hash value of the empty string is zero.) 1501 * 1502 * @return a hash code value for this object. 1503 */ 1504 public int hashCode() { 1505 int h = hash; 1506 if (h == 0 && value.length > 0) { 1507 hash = h = isLatin1() ? StringLatin1.hashCode(value) 1508 : StringUTF16.hashCode(value); 1509 } 1510 return h; 1511 } 1512 1513 /** 1514 * Returns the index within this string of the first occurrence of 1515 * the specified character. If a character with value 1516 * {@code ch} occurs in the character sequence represented by 1517 * this {@code String} object, then the index (in Unicode 1518 * code units) of the first such occurrence is returned. For 1519 * values of {@code ch} in the range from 0 to 0xFFFF 1520 * (inclusive), this is the smallest value <i>k</i> such that: 1521 * <blockquote><pre> 1522 * this.charAt(<i>k</i>) == ch 1523 * </pre></blockquote> 1524 * is true. For other values of {@code ch}, it is the 1525 * smallest value <i>k</i> such that: 1526 * <blockquote><pre> 1527 * this.codePointAt(<i>k</i>) == ch 1528 * </pre></blockquote> 1529 * is true. In either case, if no such character occurs in this 1530 * string, then {@code -1} is returned. 1531 * 1532 * @param ch a character (Unicode code point). 1533 * @return the index of the first occurrence of the character in the 1534 * character sequence represented by this object, or 1535 * {@code -1} if the character does not occur. 1536 */ 1537 public int indexOf(int ch) { 1538 return indexOf(ch, 0); 1539 } 1540 1541 /** 1542 * Returns the index within this string of the first occurrence of the 1543 * specified character, starting the search at the specified index. 1544 * <p> 1545 * If a character with value {@code ch} occurs in the 1546 * character sequence represented by this {@code String} 1547 * object at an index no smaller than {@code fromIndex}, then 1548 * the index of the first such occurrence is returned. For values 1549 * of {@code ch} in the range from 0 to 0xFFFF (inclusive), 1550 * this is the smallest value <i>k</i> such that: 1551 * <blockquote><pre> 1552 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1553 * </pre></blockquote> 1554 * is true. For other values of {@code ch}, it is the 1555 * smallest value <i>k</i> such that: 1556 * <blockquote><pre> 1557 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1558 * </pre></blockquote> 1559 * is true. In either case, if no such character occurs in this 1560 * string at or after position {@code fromIndex}, then 1561 * {@code -1} is returned. 1562 * 1563 * <p> 1564 * There is no restriction on the value of {@code fromIndex}. If it 1565 * is negative, it has the same effect as if it were zero: this entire 1566 * string may be searched. If it is greater than the length of this 1567 * string, it has the same effect as if it were equal to the length of 1568 * this string: {@code -1} is returned. 1569 * 1570 * <p>All indices are specified in {@code char} values 1571 * (Unicode code units). 1572 * 1573 * @param ch a character (Unicode code point). 1574 * @param fromIndex the index to start the search from. 1575 * @return the index of the first occurrence of the character in the 1576 * character sequence represented by this object that is greater 1577 * than or equal to {@code fromIndex}, or {@code -1} 1578 * if the character does not occur. 1579 */ 1580 public int indexOf(int ch, int fromIndex) { 1581 return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex) 1582 : StringUTF16.indexOf(value, ch, fromIndex); 1583 } 1584 1585 /** 1586 * Returns the index within this string of the last occurrence of 1587 * the specified character. For values of {@code ch} in the 1588 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code 1589 * units) returned is the largest value <i>k</i> such that: 1590 * <blockquote><pre> 1591 * this.charAt(<i>k</i>) == ch 1592 * </pre></blockquote> 1593 * is true. For other values of {@code ch}, it is the 1594 * largest value <i>k</i> such that: 1595 * <blockquote><pre> 1596 * this.codePointAt(<i>k</i>) == ch 1597 * </pre></blockquote> 1598 * is true. In either case, if no such character occurs in this 1599 * string, then {@code -1} is returned. The 1600 * {@code String} is searched backwards starting at the last 1601 * character. 1602 * 1603 * @param ch a character (Unicode code point). 1604 * @return the index of the last occurrence of the character in the 1605 * character sequence represented by this object, or 1606 * {@code -1} if the character does not occur. 1607 */ 1608 public int lastIndexOf(int ch) { 1609 return lastIndexOf(ch, length() - 1); 1610 } 1611 1612 /** 1613 * Returns the index within this string of the last occurrence of 1614 * the specified character, searching backward starting at the 1615 * specified index. For values of {@code ch} in the range 1616 * from 0 to 0xFFFF (inclusive), the index returned is the largest 1617 * value <i>k</i> such that: 1618 * <blockquote><pre> 1619 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1620 * </pre></blockquote> 1621 * is true. For other values of {@code ch}, it is the 1622 * largest value <i>k</i> such that: 1623 * <blockquote><pre> 1624 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1625 * </pre></blockquote> 1626 * is true. In either case, if no such character occurs in this 1627 * string at or before position {@code fromIndex}, then 1628 * {@code -1} is returned. 1629 * 1630 * <p>All indices are specified in {@code char} values 1631 * (Unicode code units). 1632 * 1633 * @param ch a character (Unicode code point). 1634 * @param fromIndex the index to start the search from. There is no 1635 * restriction on the value of {@code fromIndex}. If it is 1636 * greater than or equal to the length of this string, it has 1637 * the same effect as if it were equal to one less than the 1638 * length of this string: this entire string may be searched. 1639 * If it is negative, it has the same effect as if it were -1: 1640 * -1 is returned. 1641 * @return the index of the last occurrence of the character in the 1642 * character sequence represented by this object that is less 1643 * than or equal to {@code fromIndex}, or {@code -1} 1644 * if the character does not occur before that point. 1645 */ 1646 public int lastIndexOf(int ch, int fromIndex) { 1647 return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex) 1648 : StringUTF16.lastIndexOf(value, ch, fromIndex); 1649 } 1650 1651 /** 1652 * Returns the index within this string of the first occurrence of the 1653 * specified substring. 1654 * 1655 * <p>The returned index is the smallest value {@code k} for which: 1656 * <pre>{@code 1657 * this.startsWith(str, k) 1658 * }</pre> 1659 * If no such value of {@code k} exists, then {@code -1} is returned. 1660 * 1661 * @param str the substring to search for. 1662 * @return the index of the first occurrence of the specified substring, 1663 * or {@code -1} if there is no such occurrence. 1664 */ 1665 public int indexOf(String str) { 1666 if (coder() == str.coder()) { 1667 return isLatin1() ? StringLatin1.indexOf(value, str.value) 1668 : StringUTF16.indexOf(value, str.value); 1669 } 1670 if (coder() == LATIN1) { // str.coder == UTF16 1671 return -1; 1672 } 1673 return StringUTF16.indexOfLatin1(value, str.value); 1674 } 1675 1676 /** 1677 * Returns the index within this string of the first occurrence of the 1678 * specified substring, starting at the specified index. 1679 * 1680 * <p>The returned index is the smallest value {@code k} for which: 1681 * <pre>{@code 1682 * k >= Math.min(fromIndex, this.length()) && 1683 * this.startsWith(str, k) 1684 * }</pre> 1685 * If no such value of {@code k} exists, then {@code -1} is returned. 1686 * 1687 * @param str the substring to search for. 1688 * @param fromIndex the index from which to start the search. 1689 * @return the index of the first occurrence of the specified substring, 1690 * starting at the specified index, 1691 * or {@code -1} if there is no such occurrence. 1692 */ 1693 public int indexOf(String str, int fromIndex) { 1694 return indexOf(value, coder(), length(), str, fromIndex); 1695 } 1696 1697 /** 1698 * Code shared by String and AbstractStringBuilder to do searches. The 1699 * source is the character array being searched, and the target 1700 * is the string being searched for. 1701 * 1702 * @param src the characters being searched. 1703 * @param srcCoder the coder of the source string. 1704 * @param srcCount length of the source string. 1705 * @param tgtStr the characters being searched for. 1706 * @param fromIndex the index to begin searching from. 1707 */ 1708 static int indexOf(byte[] src, byte srcCoder, int srcCount, 1709 String tgtStr, int fromIndex) { 1710 byte[] tgt = tgtStr.value; 1711 byte tgtCoder = tgtStr.coder(); 1712 int tgtCount = tgtStr.length(); 1713 1714 if (fromIndex >= srcCount) { 1715 return (tgtCount == 0 ? srcCount : -1); 1716 } 1717 if (fromIndex < 0) { 1718 fromIndex = 0; 1719 } 1720 if (tgtCount == 0) { 1721 return fromIndex; 1722 } 1723 if (tgtCount > srcCount) { 1724 return -1; 1725 } 1726 if (srcCoder == tgtCoder) { 1727 return srcCoder == LATIN1 1728 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex) 1729 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex); 1730 } 1731 if (srcCoder == LATIN1) { // && tgtCoder == UTF16 1732 return -1; 1733 } 1734 // srcCoder == UTF16 && tgtCoder == LATIN1) { 1735 return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex); 1736 } 1737 1738 /** 1739 * Returns the index within this string of the last occurrence of the 1740 * specified substring. The last occurrence of the empty string "" 1741 * is considered to occur at the index value {@code this.length()}. 1742 * 1743 * <p>The returned index is the largest value {@code k} for which: 1744 * <pre>{@code 1745 * this.startsWith(str, k) 1746 * }</pre> 1747 * If no such value of {@code k} exists, then {@code -1} is returned. 1748 * 1749 * @param str the substring to search for. 1750 * @return the index of the last occurrence of the specified substring, 1751 * or {@code -1} if there is no such occurrence. 1752 */ 1753 public int lastIndexOf(String str) { 1754 return lastIndexOf(str, length()); 1755 } 1756 1757 /** 1758 * Returns the index within this string of the last occurrence of the 1759 * specified substring, searching backward starting at the specified index. 1760 * 1761 * <p>The returned index is the largest value {@code k} for which: 1762 * <pre>{@code 1763 * k <= Math.min(fromIndex, this.length()) && 1764 * this.startsWith(str, k) 1765 * }</pre> 1766 * If no such value of {@code k} exists, then {@code -1} is returned. 1767 * 1768 * @param str the substring to search for. 1769 * @param fromIndex the index to start the search from. 1770 * @return the index of the last occurrence of the specified substring, 1771 * searching backward from the specified index, 1772 * or {@code -1} if there is no such occurrence. 1773 */ 1774 public int lastIndexOf(String str, int fromIndex) { 1775 return lastIndexOf(value, coder(), length(), str, fromIndex); 1776 } 1777 1778 /** 1779 * Code shared by String and AbstractStringBuilder to do searches. The 1780 * source is the character array being searched, and the target 1781 * is the string being searched for. 1782 * 1783 * @param src the characters being searched. 1784 * @param srcCoder coder handles the mapping between bytes/chars 1785 * @param srcCount count of the source string. 1786 * @param tgt the characters being searched for. 1787 * @param fromIndex the index to begin searching from. 1788 */ 1789 static int lastIndexOf(byte[] src, byte srcCoder, int srcCount, 1790 String tgtStr, int fromIndex) { 1791 byte[] tgt = tgtStr.value; 1792 byte tgtCoder = tgtStr.coder(); 1793 int tgtCount = tgtStr.length(); 1794 /* 1795 * Check arguments; return immediately where possible. For 1796 * consistency, don't check for null str. 1797 */ 1798 int rightIndex = srcCount - tgtCount; 1799 if (fromIndex > rightIndex) { 1800 fromIndex = rightIndex; 1801 } 1802 if (fromIndex < 0) { 1803 return -1; 1804 } 1805 /* Empty string always matches. */ 1806 if (tgtCount == 0) { 1807 return fromIndex; 1808 } 1809 if (srcCoder == tgtCoder) { 1810 return srcCoder == LATIN1 1811 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex) 1812 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex); 1813 } 1814 if (srcCoder == LATIN1) { // && tgtCoder == UTF16 1815 return -1; 1816 } 1817 // srcCoder == UTF16 && tgtCoder == LATIN1 1818 return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex); 1819 } 1820 1821 /** 1822 * Returns a string that is a substring of this string. The 1823 * substring begins with the character at the specified index and 1824 * extends to the end of this string. <p> 1825 * Examples: 1826 * <blockquote><pre> 1827 * "unhappy".substring(2) returns "happy" 1828 * "Harbison".substring(3) returns "bison" 1829 * "emptiness".substring(9) returns "" (an empty string) 1830 * </pre></blockquote> 1831 * 1832 * @param beginIndex the beginning index, inclusive. 1833 * @return the specified substring. 1834 * @exception IndexOutOfBoundsException if 1835 * {@code beginIndex} is negative or larger than the 1836 * length of this {@code String} object. 1837 */ 1838 public String substring(int beginIndex) { 1839 if (beginIndex < 0) { 1840 throw new StringIndexOutOfBoundsException(beginIndex); 1841 } 1842 int subLen = length() - beginIndex; 1843 if (subLen < 0) { 1844 throw new StringIndexOutOfBoundsException(subLen); 1845 } 1846 if (beginIndex == 0) { 1847 return this; 1848 } 1849 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen) 1850 : StringUTF16.newString(value, beginIndex, subLen); 1851 } 1852 1853 /** 1854 * Returns a string that is a substring of this string. The 1855 * substring begins at the specified {@code beginIndex} and 1856 * extends to the character at index {@code endIndex - 1}. 1857 * Thus the length of the substring is {@code endIndex-beginIndex}. 1858 * <p> 1859 * Examples: 1860 * <blockquote><pre> 1861 * "hamburger".substring(4, 8) returns "urge" 1862 * "smiles".substring(1, 5) returns "mile" 1863 * </pre></blockquote> 1864 * 1865 * @param beginIndex the beginning index, inclusive. 1866 * @param endIndex the ending index, exclusive. 1867 * @return the specified substring. 1868 * @exception IndexOutOfBoundsException if the 1869 * {@code beginIndex} is negative, or 1870 * {@code endIndex} is larger than the length of 1871 * this {@code String} object, or 1872 * {@code beginIndex} is larger than 1873 * {@code endIndex}. 1874 */ 1875 public String substring(int beginIndex, int endIndex) { 1876 int length = length(); 1877 checkBoundsBeginEnd(beginIndex, endIndex, length); 1878 int subLen = endIndex - beginIndex; 1879 if (beginIndex == 0 && endIndex == length) { 1880 return this; 1881 } 1882 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen) 1883 : StringUTF16.newString(value, beginIndex, subLen); 1884 } 1885 1886 /** 1887 * Returns a character sequence that is a subsequence of this sequence. 1888 * 1889 * <p> An invocation of this method of the form 1890 * 1891 * <blockquote><pre> 1892 * str.subSequence(begin, end)</pre></blockquote> 1893 * 1894 * behaves in exactly the same way as the invocation 1895 * 1896 * <blockquote><pre> 1897 * str.substring(begin, end)</pre></blockquote> 1898 * 1899 * @apiNote 1900 * This method is defined so that the {@code String} class can implement 1901 * the {@link CharSequence} interface. 1902 * 1903 * @param beginIndex the begin index, inclusive. 1904 * @param endIndex the end index, exclusive. 1905 * @return the specified subsequence. 1906 * 1907 * @throws IndexOutOfBoundsException 1908 * if {@code beginIndex} or {@code endIndex} is negative, 1909 * if {@code endIndex} is greater than {@code length()}, 1910 * or if {@code beginIndex} is greater than {@code endIndex} 1911 * 1912 * @since 1.4 1913 * @spec JSR-51 1914 */ 1915 public CharSequence subSequence(int beginIndex, int endIndex) { 1916 return this.substring(beginIndex, endIndex); 1917 } 1918 1919 /** 1920 * Concatenates the specified string to the end of this string. 1921 * <p> 1922 * If the length of the argument string is {@code 0}, then this 1923 * {@code String} object is returned. Otherwise, a 1924 * {@code String} object is returned that represents a character 1925 * sequence that is the concatenation of the character sequence 1926 * represented by this {@code String} object and the character 1927 * sequence represented by the argument string.<p> 1928 * Examples: 1929 * <blockquote><pre> 1930 * "cares".concat("s") returns "caress" 1931 * "to".concat("get").concat("her") returns "together" 1932 * </pre></blockquote> 1933 * 1934 * @param str the {@code String} that is concatenated to the end 1935 * of this {@code String}. 1936 * @return a string that represents the concatenation of this object's 1937 * characters followed by the string argument's characters. 1938 */ 1939 public String concat(String str) { 1940 int olen = str.length(); 1941 if (olen == 0) { 1942 return this; 1943 } 1944 if (coder() == str.coder()) { 1945 byte[] val = this.value; 1946 byte[] oval = str.value; 1947 int len = val.length + oval.length; 1948 byte[] buf = Arrays.copyOf(val, len); 1949 System.arraycopy(oval, 0, buf, val.length, oval.length); 1950 return new String(buf, coder); 1951 } 1952 int len = length(); 1953 byte[] buf = StringUTF16.newBytesFor(len + olen); 1954 getBytes(buf, 0, UTF16); 1955 str.getBytes(buf, len, UTF16); 1956 return new String(buf, UTF16); 1957 } 1958 1959 /** 1960 * Returns a string resulting from replacing all occurrences of 1961 * {@code oldChar} in this string with {@code newChar}. 1962 * <p> 1963 * If the character {@code oldChar} does not occur in the 1964 * character sequence represented by this {@code String} object, 1965 * then a reference to this {@code String} object is returned. 1966 * Otherwise, a {@code String} object is returned that 1967 * represents a character sequence identical to the character sequence 1968 * represented by this {@code String} object, except that every 1969 * occurrence of {@code oldChar} is replaced by an occurrence 1970 * of {@code newChar}. 1971 * <p> 1972 * Examples: 1973 * <blockquote><pre> 1974 * "mesquite in your cellar".replace('e', 'o') 1975 * returns "mosquito in your collar" 1976 * "the war of baronets".replace('r', 'y') 1977 * returns "the way of bayonets" 1978 * "sparring with a purple porpoise".replace('p', 't') 1979 * returns "starring with a turtle tortoise" 1980 * "JonL".replace('q', 'x') returns "JonL" (no change) 1981 * </pre></blockquote> 1982 * 1983 * @param oldChar the old character. 1984 * @param newChar the new character. 1985 * @return a string derived from this string by replacing every 1986 * occurrence of {@code oldChar} with {@code newChar}. 1987 */ 1988 public String replace(char oldChar, char newChar) { 1989 if (oldChar != newChar) { 1990 String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar) 1991 : StringUTF16.replace(value, oldChar, newChar); 1992 if (ret != null) { 1993 return ret; 1994 } 1995 } 1996 return this; 1997 } 1998 1999 /** 2000 * Tells whether or not this string matches the given <a 2001 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2002 * 2003 * <p> An invocation of this method of the form 2004 * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the 2005 * same result as the expression 2006 * 2007 * <blockquote> 2008 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence) 2009 * matches(<i>regex</i>, <i>str</i>)} 2010 * </blockquote> 2011 * 2012 * @param regex 2013 * the regular expression to which this string is to be matched 2014 * 2015 * @return {@code true} if, and only if, this string matches the 2016 * given regular expression 2017 * 2018 * @throws PatternSyntaxException 2019 * if the regular expression's syntax is invalid 2020 * 2021 * @see java.util.regex.Pattern 2022 * 2023 * @since 1.4 2024 * @spec JSR-51 2025 */ 2026 public boolean matches(String regex) { 2027 return Pattern.matches(regex, this); 2028 } 2029 2030 /** 2031 * Returns true if and only if this string contains the specified 2032 * sequence of char values. 2033 * 2034 * @param s the sequence to search for 2035 * @return true if this string contains {@code s}, false otherwise 2036 * @since 1.5 2037 */ 2038 public boolean contains(CharSequence s) { 2039 return indexOf(s.toString()) >= 0; 2040 } 2041 2042 /** 2043 * Replaces the first substring of this string that matches the given <a 2044 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2045 * given replacement. 2046 * 2047 * <p> An invocation of this method of the form 2048 * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2049 * yields exactly the same result as the expression 2050 * 2051 * <blockquote> 2052 * <code> 2053 * {@link java.util.regex.Pattern}.{@link 2054 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2055 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2056 * java.util.regex.Matcher#replaceFirst replaceFirst}(<i>repl</i>) 2057 * </code> 2058 * </blockquote> 2059 * 2060 *<p> 2061 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2062 * replacement string may cause the results to be different than if it were 2063 * being treated as a literal replacement string; see 2064 * {@link java.util.regex.Matcher#replaceFirst}. 2065 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2066 * meaning of these characters, if desired. 2067 * 2068 * @param regex 2069 * the regular expression to which this string is to be matched 2070 * @param replacement 2071 * the string to be substituted for the first match 2072 * 2073 * @return The resulting {@code String} 2074 * 2075 * @throws PatternSyntaxException 2076 * if the regular expression's syntax is invalid 2077 * 2078 * @see java.util.regex.Pattern 2079 * 2080 * @since 1.4 2081 * @spec JSR-51 2082 */ 2083 public String replaceFirst(String regex, String replacement) { 2084 return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 2085 } 2086 2087 /** 2088 * Replaces each substring of this string that matches the given <a 2089 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2090 * given replacement. 2091 * 2092 * <p> An invocation of this method of the form 2093 * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2094 * yields exactly the same result as the expression 2095 * 2096 * <blockquote> 2097 * <code> 2098 * {@link java.util.regex.Pattern}.{@link 2099 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2100 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2101 * java.util.regex.Matcher#replaceAll replaceAll}(<i>repl</i>) 2102 * </code> 2103 * </blockquote> 2104 * 2105 *<p> 2106 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2107 * replacement string may cause the results to be different than if it were 2108 * being treated as a literal replacement string; see 2109 * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}. 2110 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2111 * meaning of these characters, if desired. 2112 * 2113 * @param regex 2114 * the regular expression to which this string is to be matched 2115 * @param replacement 2116 * the string to be substituted for each match 2117 * 2118 * @return The resulting {@code String} 2119 * 2120 * @throws PatternSyntaxException 2121 * if the regular expression's syntax is invalid 2122 * 2123 * @see java.util.regex.Pattern 2124 * 2125 * @since 1.4 2126 * @spec JSR-51 2127 */ 2128 public String replaceAll(String regex, String replacement) { 2129 return Pattern.compile(regex).matcher(this).replaceAll(replacement); 2130 } 2131 2132 /** 2133 * Replaces each substring of this string that matches the literal target 2134 * sequence with the specified literal replacement sequence. The 2135 * replacement proceeds from the beginning of the string to the end, for 2136 * example, replacing "aa" with "b" in the string "aaa" will result in 2137 * "ba" rather than "ab". 2138 * 2139 * @param target The sequence of char values to be replaced 2140 * @param replacement The replacement sequence of char values 2141 * @return The resulting string 2142 * @since 1.5 2143 */ 2144 public String replace(CharSequence target, CharSequence replacement) { 2145 String tgtStr = target.toString(); 2146 String replStr = replacement.toString(); 2147 int j = indexOf(tgtStr); 2148 if (j < 0) { 2149 return this; 2150 } 2151 int tgtLen = tgtStr.length(); 2152 int tgtLen1 = Math.max(tgtLen, 1); 2153 int thisLen = length(); 2154 2155 int newLenHint = thisLen - tgtLen + replStr.length(); 2156 if (newLenHint < 0) { 2157 throw new OutOfMemoryError(); 2158 } 2159 StringBuilder sb = new StringBuilder(newLenHint); 2160 int i = 0; 2161 do { 2162 sb.append(this, i, j).append(replStr); 2163 i = j + tgtLen; 2164 } while (j < thisLen && (j = indexOf(tgtStr, j + tgtLen1)) > 0); 2165 return sb.append(this, i, thisLen).toString(); 2166 } 2167 2168 /** 2169 * Splits this string around matches of the given 2170 * <a href="../util/regex/Pattern.html#sum">regular expression</a>. 2171 * 2172 * <p> The array returned by this method contains each substring of this 2173 * string that is terminated by another substring that matches the given 2174 * expression or is terminated by the end of the string. The substrings in 2175 * the array are in the order in which they occur in this string. If the 2176 * expression does not match any part of the input then the resulting array 2177 * has just one element, namely this string. 2178 * 2179 * <p> When there is a positive-width match at the beginning of this 2180 * string then an empty leading substring is included at the beginning 2181 * of the resulting array. A zero-width match at the beginning however 2182 * never produces such empty leading substring. 2183 * 2184 * <p> The {@code limit} parameter controls the number of times the 2185 * pattern is applied and therefore affects the length of the resulting 2186 * array. 2187 * <ul> 2188 * <li><p> 2189 * If the <i>limit</i> is positive then the pattern will be applied 2190 * at most <i>limit</i> - 1 times, the array's length will be 2191 * no greater than <i>limit</i>, and the array's last entry will contain 2192 * all input beyond the last matched delimiter.</p></li> 2193 * 2194 * <li><p> 2195 * If the <i>limit</i> is zero then the pattern will be applied as 2196 * many times as possible, the array can have any length, and trailing 2197 * empty strings will be discarded.</p></li> 2198 * 2199 * <li><p> 2200 * If the <i>limit</i> is negative then the pattern will be applied 2201 * as many times as possible and the array can have any length.</p></li> 2202 * </ul> 2203 * 2204 * <p> The string {@code "boo:and:foo"}, for example, yields the 2205 * following results with these parameters: 2206 * 2207 * <blockquote><table class="plain"> 2208 * <caption style="display:none">Split example showing regex, limit, and result</caption> 2209 * <thead> 2210 * <tr> 2211 * <th scope="col">Regex</th> 2212 * <th scope="col">Limit</th> 2213 * <th scope="col">Result</th> 2214 * </tr> 2215 * </thead> 2216 * <tbody> 2217 * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th> 2218 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th> 2219 * <td>{@code { "boo", "and:foo" }}</td></tr> 2220 * <tr><!-- : --> 2221 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th> 2222 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2223 * <tr><!-- : --> 2224 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th> 2225 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2226 * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th> 2227 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th> 2228 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2229 * <tr><!-- o --> 2230 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th> 2231 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2232 * <tr><!-- o --> 2233 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th> 2234 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2235 * </tbody> 2236 * </table></blockquote> 2237 * 2238 * <p> An invocation of this method of the form 2239 * <i>str.</i>{@code split(}<i>regex</i>{@code ,} <i>n</i>{@code )} 2240 * yields the same result as the expression 2241 * 2242 * <blockquote> 2243 * <code> 2244 * {@link java.util.regex.Pattern}.{@link 2245 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2246 * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>, <i>n</i>) 2247 * </code> 2248 * </blockquote> 2249 * 2250 * 2251 * @param regex 2252 * the delimiting regular expression 2253 * 2254 * @param limit 2255 * the result threshold, as described above 2256 * 2257 * @return the array of strings computed by splitting this string 2258 * around matches of the given regular expression 2259 * 2260 * @throws PatternSyntaxException 2261 * if the regular expression's syntax is invalid 2262 * 2263 * @see java.util.regex.Pattern 2264 * 2265 * @since 1.4 2266 * @spec JSR-51 2267 */ 2268 public String[] split(String regex, int limit) { 2269 /* fastpath if the regex is a 2270 (1)one-char String and this character is not one of the 2271 RegEx's meta characters ".$|()[{^?*+\\", or 2272 (2)two-char String and the first char is the backslash and 2273 the second is not the ascii digit or ascii letter. 2274 */ 2275 char ch = 0; 2276 if (((regex.length() == 1 && 2277 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || 2278 (regex.length() == 2 && 2279 regex.charAt(0) == '\\' && 2280 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && 2281 ((ch-'a')|('z'-ch)) < 0 && 2282 ((ch-'A')|('Z'-ch)) < 0)) && 2283 (ch < Character.MIN_HIGH_SURROGATE || 2284 ch > Character.MAX_LOW_SURROGATE)) 2285 { 2286 int off = 0; 2287 int next = 0; 2288 boolean limited = limit > 0; 2289 ArrayList<String> list = new ArrayList<>(); 2290 while ((next = indexOf(ch, off)) != -1) { 2291 if (!limited || list.size() < limit - 1) { 2292 list.add(substring(off, next)); 2293 off = next + 1; 2294 } else { // last one 2295 //assert (list.size() == limit - 1); 2296 int last = length(); 2297 list.add(substring(off, last)); 2298 off = last; 2299 break; 2300 } 2301 } 2302 // If no match was found, return this 2303 if (off == 0) 2304 return new String[]{this}; 2305 2306 // Add remaining segment 2307 if (!limited || list.size() < limit) 2308 list.add(substring(off, length())); 2309 2310 // Construct result 2311 int resultSize = list.size(); 2312 if (limit == 0) { 2313 while (resultSize > 0 && list.get(resultSize - 1).length() == 0) { 2314 resultSize--; 2315 } 2316 } 2317 String[] result = new String[resultSize]; 2318 return list.subList(0, resultSize).toArray(result); 2319 } 2320 return Pattern.compile(regex).split(this, limit); 2321 } 2322 2323 /** 2324 * Splits this string around matches of the given <a 2325 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2326 * 2327 * <p> This method works as if by invoking the two-argument {@link 2328 * #split(String, int) split} method with the given expression and a limit 2329 * argument of zero. Trailing empty strings are therefore not included in 2330 * the resulting array. 2331 * 2332 * <p> The string {@code "boo:and:foo"}, for example, yields the following 2333 * results with these expressions: 2334 * 2335 * <blockquote><table class="plain"> 2336 * <caption style="display:none">Split examples showing regex and result</caption> 2337 * <thead> 2338 * <tr> 2339 * <th scope="col">Regex</th> 2340 * <th scope="col">Result</th> 2341 * </tr> 2342 * </thead> 2343 * <tbody> 2344 * <tr><th scope="row" style="text-weight:normal">:</th> 2345 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2346 * <tr><th scope="row" style="text-weight:normal">o</th> 2347 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2348 * </tbody> 2349 * </table></blockquote> 2350 * 2351 * 2352 * @param regex 2353 * the delimiting regular expression 2354 * 2355 * @return the array of strings computed by splitting this string 2356 * around matches of the given regular expression 2357 * 2358 * @throws PatternSyntaxException 2359 * if the regular expression's syntax is invalid 2360 * 2361 * @see java.util.regex.Pattern 2362 * 2363 * @since 1.4 2364 * @spec JSR-51 2365 */ 2366 public String[] split(String regex) { 2367 return split(regex, 0); 2368 } 2369 2370 /** 2371 * Returns a new String composed of copies of the 2372 * {@code CharSequence elements} joined together with a copy of 2373 * the specified {@code delimiter}. 2374 * 2375 * <blockquote>For example, 2376 * <pre>{@code 2377 * String message = String.join("-", "Java", "is", "cool"); 2378 * // message returned is: "Java-is-cool" 2379 * }</pre></blockquote> 2380 * 2381 * Note that if an element is null, then {@code "null"} is added. 2382 * 2383 * @param delimiter the delimiter that separates each element 2384 * @param elements the elements to join together. 2385 * 2386 * @return a new {@code String} that is composed of the {@code elements} 2387 * separated by the {@code delimiter} 2388 * 2389 * @throws NullPointerException If {@code delimiter} or {@code elements} 2390 * is {@code null} 2391 * 2392 * @see java.util.StringJoiner 2393 * @since 1.8 2394 */ 2395 public static String join(CharSequence delimiter, CharSequence... elements) { 2396 Objects.requireNonNull(delimiter); 2397 Objects.requireNonNull(elements); 2398 // Number of elements not likely worth Arrays.stream overhead. 2399 StringJoiner joiner = new StringJoiner(delimiter); 2400 for (CharSequence cs: elements) { 2401 joiner.add(cs); 2402 } 2403 return joiner.toString(); 2404 } 2405 2406 /** 2407 * Returns a new {@code String} composed of copies of the 2408 * {@code CharSequence elements} joined together with a copy of the 2409 * specified {@code delimiter}. 2410 * 2411 * <blockquote>For example, 2412 * <pre>{@code 2413 * List<String> strings = List.of("Java", "is", "cool"); 2414 * String message = String.join(" ", strings); 2415 * //message returned is: "Java is cool" 2416 * 2417 * Set<String> strings = 2418 * new LinkedHashSet<>(List.of("Java", "is", "very", "cool")); 2419 * String message = String.join("-", strings); 2420 * //message returned is: "Java-is-very-cool" 2421 * }</pre></blockquote> 2422 * 2423 * Note that if an individual element is {@code null}, then {@code "null"} is added. 2424 * 2425 * @param delimiter a sequence of characters that is used to separate each 2426 * of the {@code elements} in the resulting {@code String} 2427 * @param elements an {@code Iterable} that will have its {@code elements} 2428 * joined together. 2429 * 2430 * @return a new {@code String} that is composed from the {@code elements} 2431 * argument 2432 * 2433 * @throws NullPointerException If {@code delimiter} or {@code elements} 2434 * is {@code null} 2435 * 2436 * @see #join(CharSequence,CharSequence...) 2437 * @see java.util.StringJoiner 2438 * @since 1.8 2439 */ 2440 public static String join(CharSequence delimiter, 2441 Iterable<? extends CharSequence> elements) { 2442 Objects.requireNonNull(delimiter); 2443 Objects.requireNonNull(elements); 2444 StringJoiner joiner = new StringJoiner(delimiter); 2445 for (CharSequence cs: elements) { 2446 joiner.add(cs); 2447 } 2448 return joiner.toString(); 2449 } 2450 2451 /** 2452 * Converts all of the characters in this {@code String} to lower 2453 * case using the rules of the given {@code Locale}. Case mapping is based 2454 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2455 * class. Since case mappings are not always 1:1 char mappings, the resulting 2456 * {@code String} may be a different length than the original {@code String}. 2457 * <p> 2458 * Examples of lowercase mappings are in the following table: 2459 * <table class="plain"> 2460 * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption> 2461 * <thead> 2462 * <tr> 2463 * <th scope="col">Language Code of Locale</th> 2464 * <th scope="col">Upper Case</th> 2465 * <th scope="col">Lower Case</th> 2466 * <th scope="col">Description</th> 2467 * </tr> 2468 * </thead> 2469 * <tbody> 2470 * <tr> 2471 * <td>tr (Turkish)</td> 2472 * <th scope="row" style="font-weight:normal; text-align:left">\u0130</th> 2473 * <td>\u0069</td> 2474 * <td>capital letter I with dot above -> small letter i</td> 2475 * </tr> 2476 * <tr> 2477 * <td>tr (Turkish)</td> 2478 * <th scope="row" style="font-weight:normal; text-align:left">\u0049</th> 2479 * <td>\u0131</td> 2480 * <td>capital letter I -> small letter dotless i </td> 2481 * </tr> 2482 * <tr> 2483 * <td>(all)</td> 2484 * <th scope="row" style="font-weight:normal; text-align:left">French Fries</th> 2485 * <td>french fries</td> 2486 * <td>lowercased all chars in String</td> 2487 * </tr> 2488 * <tr> 2489 * <td>(all)</td> 2490 * <th scope="row" style="font-weight:normal; text-align:left"> 2491 * ΙΧΘΥΣ</th> 2492 * <td>ιχθυσ</td> 2493 * <td>lowercased all chars in String</td> 2494 * </tr> 2495 * </tbody> 2496 * </table> 2497 * 2498 * @param locale use the case transformation rules for this locale 2499 * @return the {@code String}, converted to lowercase. 2500 * @see java.lang.String#toLowerCase() 2501 * @see java.lang.String#toUpperCase() 2502 * @see java.lang.String#toUpperCase(Locale) 2503 * @since 1.1 2504 */ 2505 public String toLowerCase(Locale locale) { 2506 return isLatin1() ? StringLatin1.toLowerCase(this, value, locale) 2507 : StringUTF16.toLowerCase(this, value, locale); 2508 } 2509 2510 /** 2511 * Converts all of the characters in this {@code String} to lower 2512 * case using the rules of the default locale. This is equivalent to calling 2513 * {@code toLowerCase(Locale.getDefault())}. 2514 * <p> 2515 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2516 * results if used for strings that are intended to be interpreted locale 2517 * independently. 2518 * Examples are programming language identifiers, protocol keys, and HTML 2519 * tags. 2520 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale 2521 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the 2522 * LATIN SMALL LETTER DOTLESS I character. 2523 * To obtain correct results for locale insensitive strings, use 2524 * {@code toLowerCase(Locale.ROOT)}. 2525 * 2526 * @return the {@code String}, converted to lowercase. 2527 * @see java.lang.String#toLowerCase(Locale) 2528 */ 2529 public String toLowerCase() { 2530 return toLowerCase(Locale.getDefault()); 2531 } 2532 2533 /** 2534 * Converts all of the characters in this {@code String} to upper 2535 * case using the rules of the given {@code Locale}. Case mapping is based 2536 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2537 * class. Since case mappings are not always 1:1 char mappings, the resulting 2538 * {@code String} may be a different length than the original {@code String}. 2539 * <p> 2540 * Examples of locale-sensitive and 1:M case mappings are in the following table. 2541 * 2542 * <table class="plain"> 2543 * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption> 2544 * <thead> 2545 * <tr> 2546 * <th scope="col">Language Code of Locale</th> 2547 * <th scope="col">Lower Case</th> 2548 * <th scope="col">Upper Case</th> 2549 * <th scope="col">Description</th> 2550 * </tr> 2551 * </thead> 2552 * <tbody> 2553 * <tr> 2554 * <td>tr (Turkish)</td> 2555 * <th scope="row" style="font-weight:normal; text-align:left">\u0069</th> 2556 * <td>\u0130</td> 2557 * <td>small letter i -> capital letter I with dot above</td> 2558 * </tr> 2559 * <tr> 2560 * <td>tr (Turkish)</td> 2561 * <th scope="row" style="font-weight:normal; text-align:left">\u0131</th> 2562 * <td>\u0049</td> 2563 * <td>small letter dotless i -> capital letter I</td> 2564 * </tr> 2565 * <tr> 2566 * <td>(all)</td> 2567 * <th scope="row" style="font-weight:normal; text-align:left">\u00df</th> 2568 * <td>\u0053 \u0053</td> 2569 * <td>small letter sharp s -> two letters: SS</td> 2570 * </tr> 2571 * <tr> 2572 * <td>(all)</td> 2573 * <th scope="row" style="font-weight:normal; text-align:left">Fahrvergnügen</th> 2574 * <td>FAHRVERGNÜGEN</td> 2575 * <td></td> 2576 * </tr> 2577 * </tbody> 2578 * </table> 2579 * @param locale use the case transformation rules for this locale 2580 * @return the {@code String}, converted to uppercase. 2581 * @see java.lang.String#toUpperCase() 2582 * @see java.lang.String#toLowerCase() 2583 * @see java.lang.String#toLowerCase(Locale) 2584 * @since 1.1 2585 */ 2586 public String toUpperCase(Locale locale) { 2587 return isLatin1() ? StringLatin1.toUpperCase(this, value, locale) 2588 : StringUTF16.toUpperCase(this, value, locale); 2589 } 2590 2591 /** 2592 * Converts all of the characters in this {@code String} to upper 2593 * case using the rules of the default locale. This method is equivalent to 2594 * {@code toUpperCase(Locale.getDefault())}. 2595 * <p> 2596 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2597 * results if used for strings that are intended to be interpreted locale 2598 * independently. 2599 * Examples are programming language identifiers, protocol keys, and HTML 2600 * tags. 2601 * For instance, {@code "title".toUpperCase()} in a Turkish locale 2602 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the 2603 * LATIN CAPITAL LETTER I WITH DOT ABOVE character. 2604 * To obtain correct results for locale insensitive strings, use 2605 * {@code toUpperCase(Locale.ROOT)}. 2606 * 2607 * @return the {@code String}, converted to uppercase. 2608 * @see java.lang.String#toUpperCase(Locale) 2609 */ 2610 public String toUpperCase() { 2611 return toUpperCase(Locale.getDefault()); 2612 } 2613 2614 /** 2615 * Returns a string whose value is this string, with all leading 2616 * and trailing space removed, where space is defined 2617 * as any character whose codepoint is less than or equal to 2618 * {@code 'U+0020'} (the space character). 2619 * <p> 2620 * If this {@code String} object represents an empty character 2621 * sequence, or the first and last characters of character sequence 2622 * represented by this {@code String} object both have codes 2623 * that are not space (as defined above), then a 2624 * reference to this {@code String} object is returned. 2625 * <p> 2626 * Otherwise, if all characters in this string are space (as 2627 * defined above), then a {@code String} object representing an 2628 * empty string is returned. 2629 * <p> 2630 * Otherwise, let <i>k</i> be the index of the first character in the 2631 * string whose code is not a space (as defined above) and let 2632 * <i>m</i> be the index of the last character in the string whose code 2633 * is not a space (as defined above). A {@code String} 2634 * object is returned, representing the substring of this string that 2635 * begins with the character at index <i>k</i> and ends with the 2636 * character at index <i>m</i>-that is, the result of 2637 * {@code this.substring(k, m + 1)}. 2638 * <p> 2639 * This method may be used to trim space (as defined above) from 2640 * the beginning and end of a string. 2641 * 2642 * @return a string whose value is this string, with all leading 2643 * and trailing space removed, or this string if it 2644 * has no leading or trailing space. 2645 */ 2646 public String trim() { 2647 String ret = isLatin1() ? StringLatin1.trim(value) 2648 : StringUTF16.trim(value); 2649 return ret == null ? this : ret; 2650 } 2651 2652 /** 2653 * Returns a string whose value is this string, with all leading 2654 * and trailing {@link Character#isWhitespace(int) white space} 2655 * removed. 2656 * <p> 2657 * If this {@code String} object represents an empty string, 2658 * or if all code points in this string are 2659 * {@link Character#isWhitespace(int) white space}, then an empty string 2660 * is returned. 2661 * <p> 2662 * Otherwise, returns a substring of this string beginning with the first 2663 * code point that is not a {@link Character#isWhitespace(int) white space} 2664 * up to and including the last code point that is not a 2665 * {@link Character#isWhitespace(int) white space}. 2666 * <p> 2667 * This method may be used to strip 2668 * {@link Character#isWhitespace(int) white space} from 2669 * the beginning and end of a string. 2670 * 2671 * @return a string whose value is this string, with all leading 2672 * and trailing white space removed 2673 * 2674 * @see Character#isWhitespace(int) 2675 * 2676 * @since 11 2677 */ 2678 public String strip() { 2679 String ret = isLatin1() ? StringLatin1.strip(value) 2680 : StringUTF16.strip(value); 2681 return ret == null ? this : ret; 2682 } 2683 2684 /** 2685 * Returns a string whose value is this string, with all leading 2686 * {@link Character#isWhitespace(int) white space} removed. 2687 * <p> 2688 * If this {@code String} object represents an empty string, 2689 * or if all code points in this string are 2690 * {@link Character#isWhitespace(int) white space}, then an empty string 2691 * is returned. 2692 * <p> 2693 * Otherwise, returns a substring of this string beginning with the first 2694 * code point that is not a {@link Character#isWhitespace(int) white space} 2695 * up to to and including the last code point of this string. 2696 * <p> 2697 * This method may be used to trim 2698 * {@link Character#isWhitespace(int) white space} from 2699 * the beginning of a string. 2700 * 2701 * @return a string whose value is this string, with all leading white 2702 * space removed 2703 * 2704 * @see Character#isWhitespace(int) 2705 * 2706 * @since 11 2707 */ 2708 public String stripLeading() { 2709 String ret = isLatin1() ? StringLatin1.stripLeading(value) 2710 : StringUTF16.stripLeading(value); 2711 return ret == null ? this : ret; 2712 } 2713 2714 /** 2715 * Returns a string whose value is this string, with all trailing 2716 * {@link Character#isWhitespace(int) white space} removed. 2717 * <p> 2718 * If this {@code String} object represents an empty string, 2719 * or if all characters in this string are 2720 * {@link Character#isWhitespace(int) white space}, then an empty string 2721 * is returned. 2722 * <p> 2723 * Otherwise, returns a substring of this string beginning with the first 2724 * code point of this string up to and including the last code point 2725 * that is not a {@link Character#isWhitespace(int) white space}. 2726 * <p> 2727 * This method may be used to trim 2728 * {@link Character#isWhitespace(int) white space} from 2729 * the end of a string. 2730 * 2731 * @return a string whose value is this string, with all trailing white 2732 * space removed 2733 * 2734 * @see Character#isWhitespace(int) 2735 * 2736 * @since 11 2737 */ 2738 public String stripTrailing() { 2739 String ret = isLatin1() ? StringLatin1.stripTrailing(value) 2740 : StringUTF16.stripTrailing(value); 2741 return ret == null ? this : ret; 2742 } 2743 2744 /** 2745 * Returns {@code true} if the string is empty or contains only 2746 * {@link Character#isWhitespace(int) white space} codepoints, 2747 * otherwise {@code false}. 2748 * 2749 * @return {@code true} if the string is empty or contains only 2750 * {@link Character#isWhitespace(int) white space} codepoints, 2751 * otherwise {@code false} 2752 * 2753 * @see Character#isWhitespace(int) 2754 * 2755 * @since 11 2756 */ 2757 public boolean isBlank() { 2758 return indexOfNonWhitespace() == length(); 2759 } 2760 2761 private Stream<String> lines(int maxLeading, int maxTrailing) { 2762 return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing) 2763 : StringUTF16.lines(value, maxLeading, maxTrailing); 2764 } 2765 2766 /** 2767 * Returns a stream of lines extracted from this string, 2768 * separated by line terminators. 2769 * <p> 2770 * A <i>line terminator</i> is one of the following: 2771 * a line feed character {@code "\n"} (U+000A), 2772 * a carriage return character {@code "\r"} (U+000D), 2773 * or a carriage return followed immediately by a line feed 2774 * {@code "\r\n"} (U+000D U+000A). 2775 * <p> 2776 * A <i>line</i> is either a sequence of zero or more characters 2777 * followed by a line terminator, or it is a sequence of one or 2778 * more characters followed by the end of the string. A 2779 * line does not include the line terminator. 2780 * <p> 2781 * The stream returned by this method contains the lines from 2782 * this string in the order in which they occur. 2783 * 2784 * @apiNote This definition of <i>line</i> implies that an empty 2785 * string has zero lines and that there is no empty line 2786 * following a line terminator at the end of a string. 2787 * 2788 * @implNote This method provides better performance than 2789 * split("\R") by supplying elements lazily and 2790 * by faster search of new line terminators. 2791 * 2792 * @return the stream of lines extracted from this string 2793 * 2794 * @since 11 2795 */ 2796 public Stream<String> lines() { 2797 return lines(0, 0); 2798 } 2799 2800 /** 2801 * Adjusts the indentation of each line of this string based on the value of 2802 * {@code n}, and normalizes line termination characters. 2803 * <p> 2804 * This string is conceptually separated into lines using 2805 * {@link String#lines()}. Each line is then adjusted as described below 2806 * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting 2807 * lines are then concatenated and returned. 2808 * <p> 2809 * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the 2810 * beginning of each line. {@link String#isBlank() Blank lines} are 2811 * unaffected. 2812 * <p> 2813 * If {@code n < 0} then up to {@code n} 2814 * {@link Character#isWhitespace(int) white space characters} are removed 2815 * from the beginning of each line. If a given line does not contain 2816 * sufficient white space then all leading 2817 * {@link Character#isWhitespace(int) white space characters} are removed. 2818 * Each white space character is treated as a single character. In 2819 * particular, the tab character {@code "\t"} (U+0009) is considered a 2820 * single character; it is not expanded. 2821 * <p> 2822 * If {@code n == 0} then the line remains unchanged. However, line 2823 * terminators are still normalized. 2824 * <p> 2825 * 2826 * @param n number of leading 2827 * {@link Character#isWhitespace(int) white space characters} 2828 * to add or remove 2829 * 2830 * @return string with indentation adjusted and line endings normalized 2831 * 2832 * @see String#lines() 2833 * @see String#isBlank() 2834 * @see Character#isWhitespace(int) 2835 * 2836 * @since 12 2837 */ 2838 public String indent(int n) { 2839 return isEmpty() ? "" : indent(n, false); 2840 } 2841 2842 private String indent(int n, boolean removeBlanks) { 2843 Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE) 2844 : lines(); 2845 if (n > 0) { 2846 final String spaces = " ".repeat(n); 2847 stream = stream.map(s -> s.isBlank() ? s : spaces + s); 2848 } else if (n == Integer.MIN_VALUE) { 2849 stream = stream.map(s -> s.stripLeading()); 2850 } else if (n < 0) { 2851 stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace()))); 2852 } 2853 return stream.collect(Collectors.joining("\n", "", "\n")); 2854 } 2855 2856 private int indexOfNonWhitespace() { 2857 return isLatin1() ? StringLatin1.indexOfNonWhitespace(value) 2858 : StringUTF16.indexOfNonWhitespace(value); 2859 } 2860 2861 private int lastIndexOfNonWhitespace() { 2862 return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value) 2863 : StringUTF16.lastIndexOfNonWhitespace(value); 2864 } 2865 2866 /** 2867 * Removes vertical and horizontal white space margins from around the 2868 * essential body of a multi-line string, while preserving relative 2869 * indentation. 2870 * <p> 2871 * This string is first conceptually separated into lines as if by 2872 * {@link String#lines()}. 2873 * <p> 2874 * Then, the <i>minimum indentation</i> (min) is determined as follows. For 2875 * each non-blank line (as defined by {@link String#isBlank()}), the 2876 * leading {@link Character#isWhitespace(int) white space} characters are 2877 * counted. The <i>min</i> value is the smallest of these counts. 2878 * <p> 2879 * For each non-blank line, <i>min</i> leading white space characters are 2880 * removed. Each white space character is treated as a single character. In 2881 * particular, the tab character {@code "\t"} (U+0009) is considered a 2882 * single character; it is not expanded. 2883 * <p> 2884 * Leading and trailing blank lines, if any, are removed. Trailing spaces are 2885 * preserved. 2886 * <p> 2887 * Each line is suffixed with a line feed character {@code "\n"} (U+000A). 2888 * <p> 2889 * Finally, the lines are concatenated into a single string and returned. 2890 * 2891 * @apiNote 2892 * This method's primary purpose is to shift a block of lines as far as 2893 * possible to the left, while preserving relative indentation. Lines 2894 * that were indented the least will thus have no leading white space. 2895 * 2896 * Example: 2897 * <blockquote><pre> 2898 * ` 2899 * This is the first line 2900 * This is the second line 2901 * `.align(); 2902 * 2903 * returns 2904 * This is the first line 2905 * This is the second line 2906 * </pre></blockquote> 2907 * 2908 * @return string with margins removed and line terminators normalized 2909 * 2910 * @see String#lines() 2911 * @see String#isBlank() 2912 * @see String#indent(int) 2913 * @see Character#isWhitespace(int) 2914 * 2915 * @since 12 2916 */ 2917 public String align() { 2918 return align(0); 2919 } 2920 2921 /** 2922 * Removes vertical and horizontal white space margins from around the 2923 * essential body of a multi-line string, while preserving relative 2924 * indentation and with optional indentation adjustment. 2925 * <p> 2926 * Invoking this method is equivalent to: 2927 * <blockquote> 2928 * {@code this.align().indent(n)} 2929 * </blockquote> 2930 * 2931 * @apiNote 2932 * Examples: 2933 * <blockquote><pre> 2934 * ` 2935 * This is the first line 2936 * This is the second line 2937 * `.align(0); 2938 * 2939 * returns 2940 * This is the first line 2941 * This is the second line 2942 * 2943 * 2944 * ` 2945 * This is the first line 2946 * This is the second line 2947 * `.align(4); 2948 * returns 2949 * This is the first line 2950 * This is the second line 2951 * </pre></blockquote> 2952 * 2953 * @param n number of leading white space characters 2954 * to add or remove 2955 * 2956 * @return string with margins removed, indentation adjusted and 2957 * line terminators normalized 2958 * 2959 * @see String#align() 2960 * 2961 * @since 12 2962 */ 2963 public String align(int n) { 2964 if (isEmpty()) { 2965 return ""; 2966 } 2967 int outdent = lines().filter(not(String::isBlank)) 2968 .mapToInt(String::indexOfNonWhitespace) 2969 .min() 2970 .orElse(0); 2971 return indent(n - outdent, true); 2972 } 2973 2974 /** 2975 * This object (which is already a string!) is itself returned. 2976 * 2977 * @return the string itself. 2978 */ 2979 public String toString() { 2980 return this; 2981 } 2982 2983 /** 2984 * Returns a stream of {@code int} zero-extending the {@code char} values 2985 * from this sequence. Any char which maps to a <a 2986 * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code 2987 * point</a> is passed through uninterpreted. 2988 * 2989 * @return an IntStream of char values from this sequence 2990 * @since 9 2991 */ 2992 @Override 2993 public IntStream chars() { 2994 return StreamSupport.intStream( 2995 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE) 2996 : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE), 2997 false); 2998 } 2999 3000 3001 /** 3002 * Returns a stream of code point values from this sequence. Any surrogate 3003 * pairs encountered in the sequence are combined as if by {@linkplain 3004 * Character#toCodePoint Character.toCodePoint} and the result is passed 3005 * to the stream. Any other code units, including ordinary BMP characters, 3006 * unpaired surrogates, and undefined code units, are zero-extended to 3007 * {@code int} values which are then passed to the stream. 3008 * 3009 * @return an IntStream of Unicode code points from this sequence 3010 * @since 9 3011 */ 3012 @Override 3013 public IntStream codePoints() { 3014 return StreamSupport.intStream( 3015 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE) 3016 : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE), 3017 false); 3018 } 3019 3020 /** 3021 * Converts this string to a new character array. 3022 * 3023 * @return a newly allocated character array whose length is the length 3024 * of this string and whose contents are initialized to contain 3025 * the character sequence represented by this string. 3026 */ 3027 public char[] toCharArray() { 3028 return isLatin1() ? StringLatin1.toChars(value) 3029 : StringUTF16.toChars(value); 3030 } 3031 3032 /** 3033 * Returns a formatted string using the specified format string and 3034 * arguments. 3035 * 3036 * <p> The locale always used is the one returned by {@link 3037 * java.util.Locale#getDefault(java.util.Locale.Category) 3038 * Locale.getDefault(Locale.Category)} with 3039 * {@link java.util.Locale.Category#FORMAT FORMAT} category specified. 3040 * 3041 * @param format 3042 * A <a href="../util/Formatter.html#syntax">format string</a> 3043 * 3044 * @param args 3045 * Arguments referenced by the format specifiers in the format 3046 * string. If there are more arguments than format specifiers, the 3047 * extra arguments are ignored. The number of arguments is 3048 * variable and may be zero. The maximum number of arguments is 3049 * limited by the maximum dimension of a Java array as defined by 3050 * <cite>The Java™ Virtual Machine Specification</cite>. 3051 * The behaviour on a 3052 * {@code null} argument depends on the <a 3053 * href="../util/Formatter.html#syntax">conversion</a>. 3054 * 3055 * @throws java.util.IllegalFormatException 3056 * If a format string contains an illegal syntax, a format 3057 * specifier that is incompatible with the given arguments, 3058 * insufficient arguments given the format string, or other 3059 * illegal conditions. For specification of all possible 3060 * formatting errors, see the <a 3061 * href="../util/Formatter.html#detail">Details</a> section of the 3062 * formatter class specification. 3063 * 3064 * @return A formatted string 3065 * 3066 * @see java.util.Formatter 3067 * @since 1.5 3068 */ 3069 public static String format(String format, Object... args) { 3070 return new Formatter().format(format, args).toString(); 3071 } 3072 3073 /** 3074 * Returns a formatted string using the specified locale, format string, 3075 * and arguments. 3076 * 3077 * @param l 3078 * The {@linkplain java.util.Locale locale} to apply during 3079 * formatting. If {@code l} is {@code null} then no localization 3080 * is applied. 3081 * 3082 * @param format 3083 * A <a href="../util/Formatter.html#syntax">format string</a> 3084 * 3085 * @param args 3086 * Arguments referenced by the format specifiers in the format 3087 * string. If there are more arguments than format specifiers, the 3088 * extra arguments are ignored. The number of arguments is 3089 * variable and may be zero. The maximum number of arguments is 3090 * limited by the maximum dimension of a Java array as defined by 3091 * <cite>The Java™ Virtual Machine Specification</cite>. 3092 * The behaviour on a 3093 * {@code null} argument depends on the 3094 * <a href="../util/Formatter.html#syntax">conversion</a>. 3095 * 3096 * @throws java.util.IllegalFormatException 3097 * If a format string contains an illegal syntax, a format 3098 * specifier that is incompatible with the given arguments, 3099 * insufficient arguments given the format string, or other 3100 * illegal conditions. For specification of all possible 3101 * formatting errors, see the <a 3102 * href="../util/Formatter.html#detail">Details</a> section of the 3103 * formatter class specification 3104 * 3105 * @return A formatted string 3106 * 3107 * @see java.util.Formatter 3108 * @since 1.5 3109 */ 3110 public static String format(Locale l, String format, Object... args) { 3111 return new Formatter(l).format(format, args).toString(); 3112 } 3113 3114 /** 3115 * Returns the string representation of the {@code Object} argument. 3116 * 3117 * @param obj an {@code Object}. 3118 * @return if the argument is {@code null}, then a string equal to 3119 * {@code "null"}; otherwise, the value of 3120 * {@code obj.toString()} is returned. 3121 * @see java.lang.Object#toString() 3122 */ 3123 public static String valueOf(Object obj) { 3124 return (obj == null) ? "null" : obj.toString(); 3125 } 3126 3127 /** 3128 * Returns the string representation of the {@code char} array 3129 * argument. The contents of the character array are copied; subsequent 3130 * modification of the character array does not affect the returned 3131 * string. 3132 * 3133 * @param data the character array. 3134 * @return a {@code String} that contains the characters of the 3135 * character array. 3136 */ 3137 public static String valueOf(char data[]) { 3138 return new String(data); 3139 } 3140 3141 /** 3142 * Returns the string representation of a specific subarray of the 3143 * {@code char} array argument. 3144 * <p> 3145 * The {@code offset} argument is the index of the first 3146 * character of the subarray. The {@code count} argument 3147 * specifies the length of the subarray. The contents of the subarray 3148 * are copied; subsequent modification of the character array does not 3149 * affect the returned string. 3150 * 3151 * @param data the character array. 3152 * @param offset initial offset of the subarray. 3153 * @param count length of the subarray. 3154 * @return a {@code String} that contains the characters of the 3155 * specified subarray of the character array. 3156 * @exception IndexOutOfBoundsException if {@code offset} is 3157 * negative, or {@code count} is negative, or 3158 * {@code offset+count} is larger than 3159 * {@code data.length}. 3160 */ 3161 public static String valueOf(char data[], int offset, int count) { 3162 return new String(data, offset, count); 3163 } 3164 3165 /** 3166 * Equivalent to {@link #valueOf(char[], int, int)}. 3167 * 3168 * @param data the character array. 3169 * @param offset initial offset of the subarray. 3170 * @param count length of the subarray. 3171 * @return a {@code String} that contains the characters of the 3172 * specified subarray of the character array. 3173 * @exception IndexOutOfBoundsException if {@code offset} is 3174 * negative, or {@code count} is negative, or 3175 * {@code offset+count} is larger than 3176 * {@code data.length}. 3177 */ 3178 public static String copyValueOf(char data[], int offset, int count) { 3179 return new String(data, offset, count); 3180 } 3181 3182 /** 3183 * Equivalent to {@link #valueOf(char[])}. 3184 * 3185 * @param data the character array. 3186 * @return a {@code String} that contains the characters of the 3187 * character array. 3188 */ 3189 public static String copyValueOf(char data[]) { 3190 return new String(data); 3191 } 3192 3193 /** 3194 * Returns the string representation of the {@code boolean} argument. 3195 * 3196 * @param b a {@code boolean}. 3197 * @return if the argument is {@code true}, a string equal to 3198 * {@code "true"} is returned; otherwise, a string equal to 3199 * {@code "false"} is returned. 3200 */ 3201 public static String valueOf(boolean b) { 3202 return b ? "true" : "false"; 3203 } 3204 3205 /** 3206 * Returns the string representation of the {@code char} 3207 * argument. 3208 * 3209 * @param c a {@code char}. 3210 * @return a string of length {@code 1} containing 3211 * as its single character the argument {@code c}. 3212 */ 3213 public static String valueOf(char c) { 3214 if (COMPACT_STRINGS && StringLatin1.canEncode(c)) { 3215 return new String(StringLatin1.toBytes(c), LATIN1); 3216 } 3217 return new String(StringUTF16.toBytes(c), UTF16); 3218 } 3219 3220 /** 3221 * Returns the string representation of the {@code int} argument. 3222 * <p> 3223 * The representation is exactly the one returned by the 3224 * {@code Integer.toString} method of one argument. 3225 * 3226 * @param i an {@code int}. 3227 * @return a string representation of the {@code int} argument. 3228 * @see java.lang.Integer#toString(int, int) 3229 */ 3230 public static String valueOf(int i) { 3231 return Integer.toString(i); 3232 } 3233 3234 /** 3235 * Returns the string representation of the {@code long} argument. 3236 * <p> 3237 * The representation is exactly the one returned by the 3238 * {@code Long.toString} method of one argument. 3239 * 3240 * @param l a {@code long}. 3241 * @return a string representation of the {@code long} argument. 3242 * @see java.lang.Long#toString(long) 3243 */ 3244 public static String valueOf(long l) { 3245 return Long.toString(l); 3246 } 3247 3248 /** 3249 * Returns the string representation of the {@code float} argument. 3250 * <p> 3251 * The representation is exactly the one returned by the 3252 * {@code Float.toString} method of one argument. 3253 * 3254 * @param f a {@code float}. 3255 * @return a string representation of the {@code float} argument. 3256 * @see java.lang.Float#toString(float) 3257 */ 3258 public static String valueOf(float f) { 3259 return Float.toString(f); 3260 } 3261 3262 /** 3263 * Returns the string representation of the {@code double} argument. 3264 * <p> 3265 * The representation is exactly the one returned by the 3266 * {@code Double.toString} method of one argument. 3267 * 3268 * @param d a {@code double}. 3269 * @return a string representation of the {@code double} argument. 3270 * @see java.lang.Double#toString(double) 3271 */ 3272 public static String valueOf(double d) { 3273 return Double.toString(d); 3274 } 3275 3276 /** 3277 * Returns a canonical representation for the string object. 3278 * <p> 3279 * A pool of strings, initially empty, is maintained privately by the 3280 * class {@code String}. 3281 * <p> 3282 * When the intern method is invoked, if the pool already contains a 3283 * string equal to this {@code String} object as determined by 3284 * the {@link #equals(Object)} method, then the string from the pool is 3285 * returned. Otherwise, this {@code String} object is added to the 3286 * pool and a reference to this {@code String} object is returned. 3287 * <p> 3288 * It follows that for any two strings {@code s} and {@code t}, 3289 * {@code s.intern() == t.intern()} is {@code true} 3290 * if and only if {@code s.equals(t)} is {@code true}. 3291 * <p> 3292 * All literal strings and string-valued constant expressions are 3293 * interned. String literals are defined in section 3.10.5 of the 3294 * <cite>The Java™ Language Specification</cite>. 3295 * 3296 * @return a string that has the same contents as this string, but is 3297 * guaranteed to be from a pool of unique strings. 3298 * @jls 3.10.5 String Literals 3299 */ 3300 public native String intern(); 3301 3302 /** 3303 * Returns a string whose value is the concatenation of this 3304 * string repeated {@code count} times. 3305 * <p> 3306 * If this string is empty or count is zero then the empty 3307 * string is returned. 3308 * 3309 * @param count number of times to repeat 3310 * 3311 * @return A string composed of this string repeated 3312 * {@code count} times or the empty string if this 3313 * string is empty or count is zero 3314 * 3315 * @throws IllegalArgumentException if the {@code count} is 3316 * negative. 3317 * 3318 * @since 11 3319 */ 3320 public String repeat(int count) { 3321 if (count < 0) { 3322 throw new IllegalArgumentException("count is negative: " + count); 3323 } 3324 if (count == 1) { 3325 return this; 3326 } 3327 final int len = value.length; 3328 if (len == 0 || count == 0) { 3329 return ""; 3330 } 3331 if (len == 1) { 3332 final byte[] single = new byte[count]; 3333 Arrays.fill(single, value[0]); 3334 return new String(single, coder); 3335 } 3336 if (Integer.MAX_VALUE / count < len) { 3337 throw new OutOfMemoryError("Repeating " + len + " bytes String " + count + 3338 " times will produce a String exceeding maximum size."); 3339 } 3340 final int limit = len * count; 3341 final byte[] multiple = new byte[limit]; 3342 System.arraycopy(value, 0, multiple, 0, len); 3343 int copied = len; 3344 for (; copied < limit - copied; copied <<= 1) { 3345 System.arraycopy(multiple, 0, multiple, copied, copied); 3346 } 3347 System.arraycopy(multiple, 0, multiple, copied, limit - copied); 3348 return new String(multiple, coder); 3349 } 3350 3351 //////////////////////////////////////////////////////////////// 3352 3353 /** 3354 * Copy character bytes from this string into dst starting at dstBegin. 3355 * This method doesn't perform any range checking. 3356 * 3357 * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two 3358 * coders are different, and dst is big enough (range check) 3359 * 3360 * @param dstBegin the char index, not offset of byte[] 3361 * @param coder the coder of dst[] 3362 */ 3363 void getBytes(byte dst[], int dstBegin, byte coder) { 3364 if (coder() == coder) { 3365 System.arraycopy(value, 0, dst, dstBegin << coder, value.length); 3366 } else { // this.coder == LATIN && coder == UTF16 3367 StringLatin1.inflate(value, 0, dst, dstBegin, value.length); 3368 } 3369 } 3370 3371 /* 3372 * Package private constructor. Trailing Void argument is there for 3373 * disambiguating it against other (public) constructors. 3374 * 3375 * Stores the char[] value into a byte[] that each byte represents 3376 * the8 low-order bits of the corresponding character, if the char[] 3377 * contains only latin1 character. Or a byte[] that stores all 3378 * characters in their byte sequences defined by the {@code StringUTF16}. 3379 */ 3380 String(char[] value, int off, int len, Void sig) { 3381 if (len == 0) { 3382 this.value = "".value; 3383 this.coder = "".coder; 3384 return; 3385 } 3386 if (COMPACT_STRINGS) { 3387 byte[] val = StringUTF16.compress(value, off, len); 3388 if (val != null) { 3389 this.value = val; 3390 this.coder = LATIN1; 3391 return; 3392 } 3393 } 3394 this.coder = UTF16; 3395 this.value = StringUTF16.toBytes(value, off, len); 3396 } 3397 3398 /* 3399 * Package private constructor. Trailing Void argument is there for 3400 * disambiguating it against other (public) constructors. 3401 */ 3402 String(AbstractStringBuilder asb, Void sig) { 3403 byte[] val = asb.getValue(); 3404 int length = asb.length(); 3405 if (asb.isLatin1()) { 3406 this.coder = LATIN1; 3407 this.value = Arrays.copyOfRange(val, 0, length); 3408 } else { 3409 if (COMPACT_STRINGS) { 3410 byte[] buf = StringUTF16.compress(val, 0, length); 3411 if (buf != null) { 3412 this.coder = LATIN1; 3413 this.value = buf; 3414 return; 3415 } 3416 } 3417 this.coder = UTF16; 3418 this.value = Arrays.copyOfRange(val, 0, length << 1); 3419 } 3420 } 3421 3422 /* 3423 * Package private constructor which shares value array for speed. 3424 */ 3425 String(byte[] value, byte coder) { 3426 this.value = value; 3427 this.coder = coder; 3428 } 3429 3430 byte coder() { 3431 return COMPACT_STRINGS ? coder : UTF16; 3432 } 3433 3434 byte[] value() { 3435 return value; 3436 } 3437 3438 private boolean isLatin1() { 3439 return COMPACT_STRINGS && coder == LATIN1; 3440 } 3441 3442 @Native static final byte LATIN1 = 0; 3443 @Native static final byte UTF16 = 1; 3444 3445 /* 3446 * StringIndexOutOfBoundsException if {@code index} is 3447 * negative or greater than or equal to {@code length}. 3448 */ 3449 static void checkIndex(int index, int length) { 3450 if (index < 0 || index >= length) { 3451 throw new StringIndexOutOfBoundsException("index " + index + 3452 ",length " + length); 3453 } 3454 } 3455 3456 /* 3457 * StringIndexOutOfBoundsException if {@code offset} 3458 * is negative or greater than {@code length}. 3459 */ 3460 static void checkOffset(int offset, int length) { 3461 if (offset < 0 || offset > length) { 3462 throw new StringIndexOutOfBoundsException("offset " + offset + 3463 ",length " + length); 3464 } 3465 } 3466 3467 /* 3468 * Check {@code offset}, {@code count} against {@code 0} and {@code length} 3469 * bounds. 3470 * 3471 * @throws StringIndexOutOfBoundsException 3472 * If {@code offset} is negative, {@code count} is negative, 3473 * or {@code offset} is greater than {@code length - count} 3474 */ 3475 static void checkBoundsOffCount(int offset, int count, int length) { 3476 if (offset < 0 || count < 0 || offset > length - count) { 3477 throw new StringIndexOutOfBoundsException( 3478 "offset " + offset + ", count " + count + ", length " + length); 3479 } 3480 } 3481 3482 /* 3483 * Check {@code begin}, {@code end} against {@code 0} and {@code length} 3484 * bounds. 3485 * 3486 * @throws StringIndexOutOfBoundsException 3487 * If {@code begin} is negative, {@code begin} is greater than 3488 * {@code end}, or {@code end} is greater than {@code length}. 3489 */ 3490 static void checkBoundsBeginEnd(int begin, int end, int length) { 3491 if (begin < 0 || begin > end || end > length) { 3492 throw new StringIndexOutOfBoundsException( 3493 "begin " + begin + ", end " + end + ", length " + length); 3494 } 3495 } 3496 3497 /** 3498 * Returns the string representation of the {@code codePoint} 3499 * argument. 3500 * 3501 * @param codePoint a {@code codePoint}. 3502 * @return a string of length {@code 1} or {@code 2} containing 3503 * as its single character the argument {@code codePoint}. 3504 * @throws IllegalArgumentException if the specified 3505 * {@code codePoint} is not a {@linkplain Character#isValidCodePoint 3506 * valid Unicode code point}. 3507 */ 3508 static String valueOfCodePoint(int codePoint) { 3509 if (COMPACT_STRINGS && StringLatin1.canEncode(codePoint)) { 3510 return new String(StringLatin1.toBytes((char)codePoint), LATIN1); 3511 } else if (Character.isBmpCodePoint(codePoint)) { 3512 return new String(StringUTF16.toBytes((char)codePoint), UTF16); 3513 } else if (Character.isSupplementaryCodePoint(codePoint)) { 3514 return new String(StringUTF16.toBytesSupplementary(codePoint), UTF16); 3515 } 3516 3517 throw new IllegalArgumentException( 3518 format("Not a valid Unicode code point: 0x%X", codePoint)); 3519 } 3520 }