1 /* 2 * Copyright (c) 1994, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.ObjectStreamField; 29 import java.io.UnsupportedEncodingException; 30 import java.lang.annotation.Native; 31 import java.nio.charset.Charset; 32 import java.util.ArrayList; 33 import java.util.Arrays; 34 import java.util.Comparator; 35 import java.util.Formatter; 36 import java.util.Locale; 37 import java.util.Objects; 38 import java.util.Spliterator; 39 import java.util.StringJoiner; 40 import java.util.function.Function; 41 import java.util.regex.Matcher; 42 import java.util.regex.Pattern; 43 import java.util.regex.PatternSyntaxException; 44 import java.util.stream.Collectors; 45 import java.util.stream.IntStream; 46 import java.util.stream.Stream; 47 import java.util.stream.StreamSupport; 48 import jdk.internal.HotSpotIntrinsicCandidate; 49 import jdk.internal.vm.annotation.Stable; 50 51 import static java.util.function.Predicate.not; 52 53 /** 54 * The {@code String} class represents character strings. All 55 * string literals in Java programs, such as {@code "abc"}, are 56 * implemented as instances of this class. 57 * <p> 58 * Strings are constant; their values cannot be changed after they 59 * are created. String buffers support mutable strings. 60 * Because String objects are immutable they can be shared. For example: 61 * <blockquote><pre> 62 * String str = "abc"; 63 * </pre></blockquote><p> 64 * is equivalent to: 65 * <blockquote><pre> 66 * char data[] = {'a', 'b', 'c'}; 67 * String str = new String(data); 68 * </pre></blockquote><p> 69 * Here are some more examples of how strings can be used: 70 * <blockquote><pre> 71 * System.out.println("abc"); 72 * String cde = "cde"; 73 * System.out.println("abc" + cde); 74 * String c = "abc".substring(2,3); 75 * String d = cde.substring(1, 2); 76 * </pre></blockquote> 77 * <p> 78 * The class {@code String} includes methods for examining 79 * individual characters of the sequence, for comparing strings, for 80 * searching strings, for extracting substrings, and for creating a 81 * copy of a string with all characters translated to uppercase or to 82 * lowercase. Case mapping is based on the Unicode Standard version 83 * specified by the {@link java.lang.Character Character} class. 84 * <p> 85 * The Java language provides special support for the string 86 * concatenation operator ( + ), and for conversion of 87 * other objects to strings. For additional information on string 88 * concatenation and conversion, see <i>The Java™ Language Specification</i>. 89 * 90 * <p> Unless otherwise noted, passing a {@code null} argument to a constructor 91 * or method in this class will cause a {@link NullPointerException} to be 92 * thrown. 93 * 94 * <p>A {@code String} represents a string in the UTF-16 format 95 * in which <em>supplementary characters</em> are represented by <em>surrogate 96 * pairs</em> (see the section <a href="Character.html#unicode">Unicode 97 * Character Representations</a> in the {@code Character} class for 98 * more information). 99 * Index values refer to {@code char} code units, so a supplementary 100 * character uses two positions in a {@code String}. 101 * <p>The {@code String} class provides methods for dealing with 102 * Unicode code points (i.e., characters), in addition to those for 103 * dealing with Unicode code units (i.e., {@code char} values). 104 * 105 * <p>Unless otherwise noted, methods for comparing Strings do not take locale 106 * into account. The {@link java.text.Collator} class provides methods for 107 * finer-grain, locale-sensitive String comparison. 108 * 109 * @implNote The implementation of the string concatenation operator is left to 110 * the discretion of a Java compiler, as long as the compiler ultimately conforms 111 * to <i>The Java™ Language Specification</i>. For example, the {@code javac} compiler 112 * may implement the operator with {@code StringBuffer}, {@code StringBuilder}, 113 * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The 114 * implementation of string conversion is typically through the method {@code toString}, 115 * defined by {@code Object} and inherited by all classes in Java. 116 * 117 * @author Lee Boynton 118 * @author Arthur van Hoff 119 * @author Martin Buchholz 120 * @author Ulf Zibis 121 * @see java.lang.Object#toString() 122 * @see java.lang.StringBuffer 123 * @see java.lang.StringBuilder 124 * @see java.nio.charset.Charset 125 * @since 1.0 126 * @jls 15.18.1 String Concatenation Operator + 127 */ 128 129 public final class String 130 implements java.io.Serializable, Comparable<String>, CharSequence { 131 132 /** 133 * The value is used for character storage. 134 * 135 * @implNote This field is trusted by the VM, and is a subject to 136 * constant folding if String instance is constant. Overwriting this 137 * field after construction will cause problems. 138 * 139 * Additionally, it is marked with {@link Stable} to trust the contents 140 * of the array. No other facility in JDK provides this functionality (yet). 141 * {@link Stable} is safe here, because value is never null. 142 */ 143 @Stable 144 private final byte[] value; 145 146 /** 147 * The identifier of the encoding used to encode the bytes in 148 * {@code value}. The supported values in this implementation are 149 * 150 * LATIN1 151 * UTF16 152 * 153 * @implNote This field is trusted by the VM, and is a subject to 154 * constant folding if String instance is constant. Overwriting this 155 * field after construction will cause problems. 156 */ 157 private final byte coder; 158 159 /** Cache the hash code for the string */ 160 private int hash; // Default to 0 161 162 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 163 private static final long serialVersionUID = -6849794470754667710L; 164 165 /** 166 * If String compaction is disabled, the bytes in {@code value} are 167 * always encoded in UTF16. 168 * 169 * For methods with several possible implementation paths, when String 170 * compaction is disabled, only one code path is taken. 171 * 172 * The instance field value is generally opaque to optimizing JIT 173 * compilers. Therefore, in performance-sensitive place, an explicit 174 * check of the static boolean {@code COMPACT_STRINGS} is done first 175 * before checking the {@code coder} field since the static boolean 176 * {@code COMPACT_STRINGS} would be constant folded away by an 177 * optimizing JIT compiler. The idioms for these cases are as follows. 178 * 179 * For code such as: 180 * 181 * if (coder == LATIN1) { ... } 182 * 183 * can be written more optimally as 184 * 185 * if (coder() == LATIN1) { ... } 186 * 187 * or: 188 * 189 * if (COMPACT_STRINGS && coder == LATIN1) { ... } 190 * 191 * An optimizing JIT compiler can fold the above conditional as: 192 * 193 * COMPACT_STRINGS == true => if (coder == LATIN1) { ... } 194 * COMPACT_STRINGS == false => if (false) { ... } 195 * 196 * @implNote 197 * The actual value for this field is injected by JVM. The static 198 * initialization block is used to set the value here to communicate 199 * that this static final field is not statically foldable, and to 200 * avoid any possible circular dependency during vm initialization. 201 */ 202 static final boolean COMPACT_STRINGS; 203 204 static { 205 COMPACT_STRINGS = true; 206 } 207 208 /** 209 * Class String is special cased within the Serialization Stream Protocol. 210 * 211 * A String instance is written into an ObjectOutputStream according to 212 * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements"> 213 * Object Serialization Specification, Section 6.2, "Stream Elements"</a> 214 */ 215 private static final ObjectStreamField[] serialPersistentFields = 216 new ObjectStreamField[0]; 217 218 /** 219 * Initializes a newly created {@code String} object so that it represents 220 * an empty character sequence. Note that use of this constructor is 221 * unnecessary since Strings are immutable. 222 */ 223 public String() { 224 this.value = "".value; 225 this.coder = "".coder; 226 } 227 228 /** 229 * Initializes a newly created {@code String} object so that it represents 230 * the same sequence of characters as the argument; in other words, the 231 * newly created string is a copy of the argument string. Unless an 232 * explicit copy of {@code original} is needed, use of this constructor is 233 * unnecessary since Strings are immutable. 234 * 235 * @param original 236 * A {@code String} 237 */ 238 @HotSpotIntrinsicCandidate 239 public String(String original) { 240 this.value = original.value; 241 this.coder = original.coder; 242 this.hash = original.hash; 243 } 244 245 /** 246 * Allocates a new {@code String} so that it represents the sequence of 247 * characters currently contained in the character array argument. The 248 * contents of the character array are copied; subsequent modification of 249 * the character array does not affect the newly created string. 250 * 251 * @param value 252 * The initial value of the string 253 */ 254 public String(char value[]) { 255 this(value, 0, value.length, null); 256 } 257 258 /** 259 * Allocates a new {@code String} that contains characters from a subarray 260 * of the character array argument. The {@code offset} argument is the 261 * index of the first character of the subarray and the {@code count} 262 * argument specifies the length of the subarray. The contents of the 263 * subarray are copied; subsequent modification of the character array does 264 * not affect the newly created string. 265 * 266 * @param value 267 * Array that is the source of characters 268 * 269 * @param offset 270 * The initial offset 271 * 272 * @param count 273 * The length 274 * 275 * @throws IndexOutOfBoundsException 276 * If {@code offset} is negative, {@code count} is negative, or 277 * {@code offset} is greater than {@code value.length - count} 278 */ 279 public String(char value[], int offset, int count) { 280 this(value, offset, count, rangeCheck(value, offset, count)); 281 } 282 283 private static Void rangeCheck(char[] value, int offset, int count) { 284 checkBoundsOffCount(offset, count, value.length); 285 return null; 286 } 287 288 /** 289 * Allocates a new {@code String} that contains characters from a subarray 290 * of the <a href="Character.html#unicode">Unicode code point</a> array 291 * argument. The {@code offset} argument is the index of the first code 292 * point of the subarray and the {@code count} argument specifies the 293 * length of the subarray. The contents of the subarray are converted to 294 * {@code char}s; subsequent modification of the {@code int} array does not 295 * affect the newly created string. 296 * 297 * @param codePoints 298 * Array that is the source of Unicode code points 299 * 300 * @param offset 301 * The initial offset 302 * 303 * @param count 304 * The length 305 * 306 * @throws IllegalArgumentException 307 * If any invalid Unicode code point is found in {@code 308 * codePoints} 309 * 310 * @throws IndexOutOfBoundsException 311 * If {@code offset} is negative, {@code count} is negative, or 312 * {@code offset} is greater than {@code codePoints.length - count} 313 * 314 * @since 1.5 315 */ 316 public String(int[] codePoints, int offset, int count) { 317 checkBoundsOffCount(offset, count, codePoints.length); 318 if (count == 0) { 319 this.value = "".value; 320 this.coder = "".coder; 321 return; 322 } 323 if (COMPACT_STRINGS) { 324 byte[] val = StringLatin1.toBytes(codePoints, offset, count); 325 if (val != null) { 326 this.coder = LATIN1; 327 this.value = val; 328 return; 329 } 330 } 331 this.coder = UTF16; 332 this.value = StringUTF16.toBytes(codePoints, offset, count); 333 } 334 335 /** 336 * Allocates a new {@code String} constructed from a subarray of an array 337 * of 8-bit integer values. 338 * 339 * <p> The {@code offset} argument is the index of the first byte of the 340 * subarray, and the {@code count} argument specifies the length of the 341 * subarray. 342 * 343 * <p> Each {@code byte} in the subarray is converted to a {@code char} as 344 * specified in the {@link #String(byte[],int) String(byte[],int)} constructor. 345 * 346 * @deprecated This method does not properly convert bytes into characters. 347 * As of JDK 1.1, the preferred way to do this is via the 348 * {@code String} constructors that take a {@link 349 * java.nio.charset.Charset}, charset name, or that use the platform's 350 * default charset. 351 * 352 * @param ascii 353 * The bytes to be converted to characters 354 * 355 * @param hibyte 356 * The top 8 bits of each 16-bit Unicode code unit 357 * 358 * @param offset 359 * The initial offset 360 * @param count 361 * The length 362 * 363 * @throws IndexOutOfBoundsException 364 * If {@code offset} is negative, {@code count} is negative, or 365 * {@code offset} is greater than {@code ascii.length - count} 366 * 367 * @see #String(byte[], int) 368 * @see #String(byte[], int, int, java.lang.String) 369 * @see #String(byte[], int, int, java.nio.charset.Charset) 370 * @see #String(byte[], int, int) 371 * @see #String(byte[], java.lang.String) 372 * @see #String(byte[], java.nio.charset.Charset) 373 * @see #String(byte[]) 374 */ 375 @Deprecated(since="1.1") 376 public String(byte ascii[], int hibyte, int offset, int count) { 377 checkBoundsOffCount(offset, count, ascii.length); 378 if (count == 0) { 379 this.value = "".value; 380 this.coder = "".coder; 381 return; 382 } 383 if (COMPACT_STRINGS && (byte)hibyte == 0) { 384 this.value = Arrays.copyOfRange(ascii, offset, offset + count); 385 this.coder = LATIN1; 386 } else { 387 hibyte <<= 8; 388 byte[] val = StringUTF16.newBytesFor(count); 389 for (int i = 0; i < count; i++) { 390 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff)); 391 } 392 this.value = val; 393 this.coder = UTF16; 394 } 395 } 396 397 /** 398 * Allocates a new {@code String} containing characters constructed from 399 * an array of 8-bit integer values. Each character <i>c</i> in the 400 * resulting string is constructed from the corresponding component 401 * <i>b</i> in the byte array such that: 402 * 403 * <blockquote><pre> 404 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8) 405 * | (<b><i>b</i></b> & 0xff)) 406 * </pre></blockquote> 407 * 408 * @deprecated This method does not properly convert bytes into 409 * characters. As of JDK 1.1, the preferred way to do this is via the 410 * {@code String} constructors that take a {@link 411 * java.nio.charset.Charset}, charset name, or that use the platform's 412 * default charset. 413 * 414 * @param ascii 415 * The bytes to be converted to characters 416 * 417 * @param hibyte 418 * The top 8 bits of each 16-bit Unicode code unit 419 * 420 * @see #String(byte[], int, int, java.lang.String) 421 * @see #String(byte[], int, int, java.nio.charset.Charset) 422 * @see #String(byte[], int, int) 423 * @see #String(byte[], java.lang.String) 424 * @see #String(byte[], java.nio.charset.Charset) 425 * @see #String(byte[]) 426 */ 427 @Deprecated(since="1.1") 428 public String(byte ascii[], int hibyte) { 429 this(ascii, hibyte, 0, ascii.length); 430 } 431 432 /** 433 * Constructs a new {@code String} by decoding the specified subarray of 434 * bytes using the specified charset. The length of the new {@code String} 435 * is a function of the charset, and hence may not be equal to the length 436 * of the subarray. 437 * 438 * <p> The behavior of this constructor when the given bytes are not valid 439 * in the given charset is unspecified. The {@link 440 * java.nio.charset.CharsetDecoder} class should be used when more control 441 * over the decoding process is required. 442 * 443 * @param bytes 444 * The bytes to be decoded into characters 445 * 446 * @param offset 447 * The index of the first byte to decode 448 * 449 * @param length 450 * The number of bytes to decode 451 452 * @param charsetName 453 * The name of a supported {@linkplain java.nio.charset.Charset 454 * charset} 455 * 456 * @throws UnsupportedEncodingException 457 * If the named charset is not supported 458 * 459 * @throws IndexOutOfBoundsException 460 * If {@code offset} is negative, {@code length} is negative, or 461 * {@code offset} is greater than {@code bytes.length - length} 462 * 463 * @since 1.1 464 */ 465 public String(byte bytes[], int offset, int length, String charsetName) 466 throws UnsupportedEncodingException { 467 if (charsetName == null) 468 throw new NullPointerException("charsetName"); 469 checkBoundsOffCount(offset, length, bytes.length); 470 StringCoding.Result ret = 471 StringCoding.decode(charsetName, bytes, offset, length); 472 this.value = ret.value; 473 this.coder = ret.coder; 474 } 475 476 /** 477 * Constructs a new {@code String} by decoding the specified subarray of 478 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 479 * The length of the new {@code String} is a function of the charset, and 480 * hence may not be equal to the length of the subarray. 481 * 482 * <p> This method always replaces malformed-input and unmappable-character 483 * sequences with this charset's default replacement string. The {@link 484 * java.nio.charset.CharsetDecoder} class should be used when more control 485 * over the decoding process is required. 486 * 487 * @param bytes 488 * The bytes to be decoded into characters 489 * 490 * @param offset 491 * The index of the first byte to decode 492 * 493 * @param length 494 * The number of bytes to decode 495 * 496 * @param charset 497 * The {@linkplain java.nio.charset.Charset charset} to be used to 498 * decode the {@code bytes} 499 * 500 * @throws IndexOutOfBoundsException 501 * If {@code offset} is negative, {@code length} is negative, or 502 * {@code offset} is greater than {@code bytes.length - length} 503 * 504 * @since 1.6 505 */ 506 public String(byte bytes[], int offset, int length, Charset charset) { 507 if (charset == null) 508 throw new NullPointerException("charset"); 509 checkBoundsOffCount(offset, length, bytes.length); 510 StringCoding.Result ret = 511 StringCoding.decode(charset, bytes, offset, length); 512 this.value = ret.value; 513 this.coder = ret.coder; 514 } 515 516 /** 517 * Constructs a new {@code String} by decoding the specified array of bytes 518 * using the specified {@linkplain java.nio.charset.Charset charset}. The 519 * length of the new {@code String} is a function of the charset, and hence 520 * may not be equal to the length of the byte array. 521 * 522 * <p> The behavior of this constructor when the given bytes are not valid 523 * in the given charset is unspecified. The {@link 524 * java.nio.charset.CharsetDecoder} class should be used when more control 525 * over the decoding process is required. 526 * 527 * @param bytes 528 * The bytes to be decoded into characters 529 * 530 * @param charsetName 531 * The name of a supported {@linkplain java.nio.charset.Charset 532 * charset} 533 * 534 * @throws UnsupportedEncodingException 535 * If the named charset is not supported 536 * 537 * @since 1.1 538 */ 539 public String(byte bytes[], String charsetName) 540 throws UnsupportedEncodingException { 541 this(bytes, 0, bytes.length, charsetName); 542 } 543 544 /** 545 * Constructs a new {@code String} by decoding the specified array of 546 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 547 * The length of the new {@code String} is a function of the charset, and 548 * hence may not be equal to the length of the byte array. 549 * 550 * <p> This method always replaces malformed-input and unmappable-character 551 * sequences with this charset's default replacement string. The {@link 552 * java.nio.charset.CharsetDecoder} class should be used when more control 553 * over the decoding process is required. 554 * 555 * @param bytes 556 * The bytes to be decoded into characters 557 * 558 * @param charset 559 * The {@linkplain java.nio.charset.Charset charset} to be used to 560 * decode the {@code bytes} 561 * 562 * @since 1.6 563 */ 564 public String(byte bytes[], Charset charset) { 565 this(bytes, 0, bytes.length, charset); 566 } 567 568 /** 569 * Constructs a new {@code String} by decoding the specified subarray of 570 * bytes using the platform's default charset. The length of the new 571 * {@code String} is a function of the charset, and hence may not be equal 572 * to the length of the subarray. 573 * 574 * <p> The behavior of this constructor when the given bytes are not valid 575 * in the default charset is unspecified. The {@link 576 * java.nio.charset.CharsetDecoder} class should be used when more control 577 * over the decoding process is required. 578 * 579 * @param bytes 580 * The bytes to be decoded into characters 581 * 582 * @param offset 583 * The index of the first byte to decode 584 * 585 * @param length 586 * The number of bytes to decode 587 * 588 * @throws IndexOutOfBoundsException 589 * If {@code offset} is negative, {@code length} is negative, or 590 * {@code offset} is greater than {@code bytes.length - length} 591 * 592 * @since 1.1 593 */ 594 public String(byte bytes[], int offset, int length) { 595 checkBoundsOffCount(offset, length, bytes.length); 596 StringCoding.Result ret = StringCoding.decode(bytes, offset, length); 597 this.value = ret.value; 598 this.coder = ret.coder; 599 } 600 601 /** 602 * Constructs a new {@code String} by decoding the specified array of bytes 603 * using the platform's default charset. The length of the new {@code 604 * String} is a function of the charset, and hence may not be equal to the 605 * length of the byte array. 606 * 607 * <p> The behavior of this constructor when the given bytes are not valid 608 * in the default charset is unspecified. The {@link 609 * java.nio.charset.CharsetDecoder} class should be used when more control 610 * over the decoding process is required. 611 * 612 * @param bytes 613 * The bytes to be decoded into characters 614 * 615 * @since 1.1 616 */ 617 public String(byte[] bytes) { 618 this(bytes, 0, bytes.length); 619 } 620 621 /** 622 * Allocates a new string that contains the sequence of characters 623 * currently contained in the string buffer argument. The contents of the 624 * string buffer are copied; subsequent modification of the string buffer 625 * does not affect the newly created string. 626 * 627 * @param buffer 628 * A {@code StringBuffer} 629 */ 630 public String(StringBuffer buffer) { 631 this(buffer.toString()); 632 } 633 634 /** 635 * Allocates a new string that contains the sequence of characters 636 * currently contained in the string builder argument. The contents of the 637 * string builder are copied; subsequent modification of the string builder 638 * does not affect the newly created string. 639 * 640 * <p> This constructor is provided to ease migration to {@code 641 * StringBuilder}. Obtaining a string from a string builder via the {@code 642 * toString} method is likely to run faster and is generally preferred. 643 * 644 * @param builder 645 * A {@code StringBuilder} 646 * 647 * @since 1.5 648 */ 649 public String(StringBuilder builder) { 650 this(builder, null); 651 } 652 653 /** 654 * Returns the length of this string. 655 * The length is equal to the number of <a href="Character.html#unicode">Unicode 656 * code units</a> in the string. 657 * 658 * @return the length of the sequence of characters represented by this 659 * object. 660 */ 661 public int length() { 662 return value.length >> coder(); 663 } 664 665 /** 666 * Returns {@code true} if, and only if, {@link #length()} is {@code 0}. 667 * 668 * @return {@code true} if {@link #length()} is {@code 0}, otherwise 669 * {@code false} 670 * 671 * @since 1.6 672 */ 673 public boolean isEmpty() { 674 return value.length == 0; 675 } 676 677 /** 678 * Returns the {@code char} value at the 679 * specified index. An index ranges from {@code 0} to 680 * {@code length() - 1}. The first {@code char} value of the sequence 681 * is at index {@code 0}, the next at index {@code 1}, 682 * and so on, as for array indexing. 683 * 684 * <p>If the {@code char} value specified by the index is a 685 * <a href="Character.html#unicode">surrogate</a>, the surrogate 686 * value is returned. 687 * 688 * @param index the index of the {@code char} value. 689 * @return the {@code char} value at the specified index of this string. 690 * The first {@code char} value is at index {@code 0}. 691 * @exception IndexOutOfBoundsException if the {@code index} 692 * argument is negative or not less than the length of this 693 * string. 694 */ 695 public char charAt(int index) { 696 if (isLatin1()) { 697 return StringLatin1.charAt(value, index); 698 } else { 699 return StringUTF16.charAt(value, index); 700 } 701 } 702 703 /** 704 * Returns the character (Unicode code point) at the specified 705 * index. The index refers to {@code char} values 706 * (Unicode code units) and ranges from {@code 0} to 707 * {@link #length()}{@code - 1}. 708 * 709 * <p> If the {@code char} value specified at the given index 710 * is in the high-surrogate range, the following index is less 711 * than the length of this {@code String}, and the 712 * {@code char} value at the following index is in the 713 * low-surrogate range, then the supplementary code point 714 * corresponding to this surrogate pair is returned. Otherwise, 715 * the {@code char} value at the given index is returned. 716 * 717 * @param index the index to the {@code char} values 718 * @return the code point value of the character at the 719 * {@code index} 720 * @exception IndexOutOfBoundsException if the {@code index} 721 * argument is negative or not less than the length of this 722 * string. 723 * @since 1.5 724 */ 725 public int codePointAt(int index) { 726 if (isLatin1()) { 727 checkIndex(index, value.length); 728 return value[index] & 0xff; 729 } 730 int length = value.length >> 1; 731 checkIndex(index, length); 732 return StringUTF16.codePointAt(value, index, length); 733 } 734 735 /** 736 * Returns the character (Unicode code point) before the specified 737 * index. The index refers to {@code char} values 738 * (Unicode code units) and ranges from {@code 1} to {@link 739 * CharSequence#length() length}. 740 * 741 * <p> If the {@code char} value at {@code (index - 1)} 742 * is in the low-surrogate range, {@code (index - 2)} is not 743 * negative, and the {@code char} value at {@code (index - 744 * 2)} is in the high-surrogate range, then the 745 * supplementary code point value of the surrogate pair is 746 * returned. If the {@code char} value at {@code index - 747 * 1} is an unpaired low-surrogate or a high-surrogate, the 748 * surrogate value is returned. 749 * 750 * @param index the index following the code point that should be returned 751 * @return the Unicode code point value before the given index. 752 * @exception IndexOutOfBoundsException if the {@code index} 753 * argument is less than 1 or greater than the length 754 * of this string. 755 * @since 1.5 756 */ 757 public int codePointBefore(int index) { 758 int i = index - 1; 759 if (i < 0 || i >= length()) { 760 throw new StringIndexOutOfBoundsException(index); 761 } 762 if (isLatin1()) { 763 return (value[i] & 0xff); 764 } 765 return StringUTF16.codePointBefore(value, index); 766 } 767 768 /** 769 * Returns the number of Unicode code points in the specified text 770 * range of this {@code String}. The text range begins at the 771 * specified {@code beginIndex} and extends to the 772 * {@code char} at index {@code endIndex - 1}. Thus the 773 * length (in {@code char}s) of the text range is 774 * {@code endIndex-beginIndex}. Unpaired surrogates within 775 * the text range count as one code point each. 776 * 777 * @param beginIndex the index to the first {@code char} of 778 * the text range. 779 * @param endIndex the index after the last {@code char} of 780 * the text range. 781 * @return the number of Unicode code points in the specified text 782 * range 783 * @exception IndexOutOfBoundsException if the 784 * {@code beginIndex} is negative, or {@code endIndex} 785 * is larger than the length of this {@code String}, or 786 * {@code beginIndex} is larger than {@code endIndex}. 787 * @since 1.5 788 */ 789 public int codePointCount(int beginIndex, int endIndex) { 790 if (beginIndex < 0 || beginIndex > endIndex || 791 endIndex > length()) { 792 throw new IndexOutOfBoundsException(); 793 } 794 if (isLatin1()) { 795 return endIndex - beginIndex; 796 } 797 return StringUTF16.codePointCount(value, beginIndex, endIndex); 798 } 799 800 /** 801 * Returns the index within this {@code String} that is 802 * offset from the given {@code index} by 803 * {@code codePointOffset} code points. Unpaired surrogates 804 * within the text range given by {@code index} and 805 * {@code codePointOffset} count as one code point each. 806 * 807 * @param index the index to be offset 808 * @param codePointOffset the offset in code points 809 * @return the index within this {@code String} 810 * @exception IndexOutOfBoundsException if {@code index} 811 * is negative or larger then the length of this 812 * {@code String}, or if {@code codePointOffset} is positive 813 * and the substring starting with {@code index} has fewer 814 * than {@code codePointOffset} code points, 815 * or if {@code codePointOffset} is negative and the substring 816 * before {@code index} has fewer than the absolute value 817 * of {@code codePointOffset} code points. 818 * @since 1.5 819 */ 820 public int offsetByCodePoints(int index, int codePointOffset) { 821 if (index < 0 || index > length()) { 822 throw new IndexOutOfBoundsException(); 823 } 824 return Character.offsetByCodePoints(this, index, codePointOffset); 825 } 826 827 /** 828 * Copies characters from this string into the destination character 829 * array. 830 * <p> 831 * The first character to be copied is at index {@code srcBegin}; 832 * the last character to be copied is at index {@code srcEnd-1} 833 * (thus the total number of characters to be copied is 834 * {@code srcEnd-srcBegin}). The characters are copied into the 835 * subarray of {@code dst} starting at index {@code dstBegin} 836 * and ending at index: 837 * <blockquote><pre> 838 * dstBegin + (srcEnd-srcBegin) - 1 839 * </pre></blockquote> 840 * 841 * @param srcBegin index of the first character in the string 842 * to copy. 843 * @param srcEnd index after the last character in the string 844 * to copy. 845 * @param dst the destination array. 846 * @param dstBegin the start offset in the destination array. 847 * @exception IndexOutOfBoundsException If any of the following 848 * is true: 849 * <ul><li>{@code srcBegin} is negative. 850 * <li>{@code srcBegin} is greater than {@code srcEnd} 851 * <li>{@code srcEnd} is greater than the length of this 852 * string 853 * <li>{@code dstBegin} is negative 854 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than 855 * {@code dst.length}</ul> 856 */ 857 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) { 858 checkBoundsBeginEnd(srcBegin, srcEnd, length()); 859 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); 860 if (isLatin1()) { 861 StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin); 862 } else { 863 StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin); 864 } 865 } 866 867 /** 868 * Copies characters from this string into the destination byte array. Each 869 * byte receives the 8 low-order bits of the corresponding character. The 870 * eight high-order bits of each character are not copied and do not 871 * participate in the transfer in any way. 872 * 873 * <p> The first character to be copied is at index {@code srcBegin}; the 874 * last character to be copied is at index {@code srcEnd-1}. The total 875 * number of characters to be copied is {@code srcEnd-srcBegin}. The 876 * characters, converted to bytes, are copied into the subarray of {@code 877 * dst} starting at index {@code dstBegin} and ending at index: 878 * 879 * <blockquote><pre> 880 * dstBegin + (srcEnd-srcBegin) - 1 881 * </pre></blockquote> 882 * 883 * @deprecated This method does not properly convert characters into 884 * bytes. As of JDK 1.1, the preferred way to do this is via the 885 * {@link #getBytes()} method, which uses the platform's default charset. 886 * 887 * @param srcBegin 888 * Index of the first character in the string to copy 889 * 890 * @param srcEnd 891 * Index after the last character in the string to copy 892 * 893 * @param dst 894 * The destination array 895 * 896 * @param dstBegin 897 * The start offset in the destination array 898 * 899 * @throws IndexOutOfBoundsException 900 * If any of the following is true: 901 * <ul> 902 * <li> {@code srcBegin} is negative 903 * <li> {@code srcBegin} is greater than {@code srcEnd} 904 * <li> {@code srcEnd} is greater than the length of this String 905 * <li> {@code dstBegin} is negative 906 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code 907 * dst.length} 908 * </ul> 909 */ 910 @Deprecated(since="1.1") 911 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) { 912 checkBoundsBeginEnd(srcBegin, srcEnd, length()); 913 Objects.requireNonNull(dst); 914 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); 915 if (isLatin1()) { 916 StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin); 917 } else { 918 StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin); 919 } 920 } 921 922 /** 923 * Encodes this {@code String} into a sequence of bytes using the named 924 * charset, storing the result into a new byte array. 925 * 926 * <p> The behavior of this method when this string cannot be encoded in 927 * the given charset is unspecified. The {@link 928 * java.nio.charset.CharsetEncoder} class should be used when more control 929 * over the encoding process is required. 930 * 931 * @param charsetName 932 * The name of a supported {@linkplain java.nio.charset.Charset 933 * charset} 934 * 935 * @return The resultant byte array 936 * 937 * @throws UnsupportedEncodingException 938 * If the named charset is not supported 939 * 940 * @since 1.1 941 */ 942 public byte[] getBytes(String charsetName) 943 throws UnsupportedEncodingException { 944 if (charsetName == null) throw new NullPointerException(); 945 return StringCoding.encode(charsetName, coder(), value); 946 } 947 948 /** 949 * Encodes this {@code String} into a sequence of bytes using the given 950 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 951 * new byte array. 952 * 953 * <p> This method always replaces malformed-input and unmappable-character 954 * sequences with this charset's default replacement byte array. The 955 * {@link java.nio.charset.CharsetEncoder} class should be used when more 956 * control over the encoding process is required. 957 * 958 * @param charset 959 * The {@linkplain java.nio.charset.Charset} to be used to encode 960 * the {@code String} 961 * 962 * @return The resultant byte array 963 * 964 * @since 1.6 965 */ 966 public byte[] getBytes(Charset charset) { 967 if (charset == null) throw new NullPointerException(); 968 return StringCoding.encode(charset, coder(), value); 969 } 970 971 /** 972 * Encodes this {@code String} into a sequence of bytes using the 973 * platform's default charset, storing the result into a new byte array. 974 * 975 * <p> The behavior of this method when this string cannot be encoded in 976 * the default charset is unspecified. The {@link 977 * java.nio.charset.CharsetEncoder} class should be used when more control 978 * over the encoding process is required. 979 * 980 * @return The resultant byte array 981 * 982 * @since 1.1 983 */ 984 public byte[] getBytes() { 985 return StringCoding.encode(coder(), value); 986 } 987 988 /** 989 * Compares this string to the specified object. The result is {@code 990 * true} if and only if the argument is not {@code null} and is a {@code 991 * String} object that represents the same sequence of characters as this 992 * object. 993 * 994 * <p>For finer-grained String comparison, refer to 995 * {@link java.text.Collator}. 996 * 997 * @param anObject 998 * The object to compare this {@code String} against 999 * 1000 * @return {@code true} if the given object represents a {@code String} 1001 * equivalent to this string, {@code false} otherwise 1002 * 1003 * @see #compareTo(String) 1004 * @see #equalsIgnoreCase(String) 1005 */ 1006 public boolean equals(Object anObject) { 1007 if (this == anObject) { 1008 return true; 1009 } 1010 if (anObject instanceof String) { 1011 String aString = (String)anObject; 1012 if (coder() == aString.coder()) { 1013 return isLatin1() ? StringLatin1.equals(value, aString.value) 1014 : StringUTF16.equals(value, aString.value); 1015 } 1016 } 1017 return false; 1018 } 1019 1020 /** 1021 * Compares this string to the specified {@code StringBuffer}. The result 1022 * is {@code true} if and only if this {@code String} represents the same 1023 * sequence of characters as the specified {@code StringBuffer}. This method 1024 * synchronizes on the {@code StringBuffer}. 1025 * 1026 * <p>For finer-grained String comparison, refer to 1027 * {@link java.text.Collator}. 1028 * 1029 * @param sb 1030 * The {@code StringBuffer} to compare this {@code String} against 1031 * 1032 * @return {@code true} if this {@code String} represents the same 1033 * sequence of characters as the specified {@code StringBuffer}, 1034 * {@code false} otherwise 1035 * 1036 * @since 1.4 1037 */ 1038 public boolean contentEquals(StringBuffer sb) { 1039 return contentEquals((CharSequence)sb); 1040 } 1041 1042 private boolean nonSyncContentEquals(AbstractStringBuilder sb) { 1043 int len = length(); 1044 if (len != sb.length()) { 1045 return false; 1046 } 1047 byte v1[] = value; 1048 byte v2[] = sb.getValue(); 1049 if (coder() == sb.getCoder()) { 1050 int n = v1.length; 1051 for (int i = 0; i < n; i++) { 1052 if (v1[i] != v2[i]) { 1053 return false; 1054 } 1055 } 1056 } else { 1057 if (!isLatin1()) { // utf16 str and latin1 abs can never be "equal" 1058 return false; 1059 } 1060 return StringUTF16.contentEquals(v1, v2, len); 1061 } 1062 return true; 1063 } 1064 1065 /** 1066 * Compares this string to the specified {@code CharSequence}. The 1067 * result is {@code true} if and only if this {@code String} represents the 1068 * same sequence of char values as the specified sequence. Note that if the 1069 * {@code CharSequence} is a {@code StringBuffer} then the method 1070 * synchronizes on it. 1071 * 1072 * <p>For finer-grained String comparison, refer to 1073 * {@link java.text.Collator}. 1074 * 1075 * @param cs 1076 * The sequence to compare this {@code String} against 1077 * 1078 * @return {@code true} if this {@code String} represents the same 1079 * sequence of char values as the specified sequence, {@code 1080 * false} otherwise 1081 * 1082 * @since 1.5 1083 */ 1084 public boolean contentEquals(CharSequence cs) { 1085 // Argument is a StringBuffer, StringBuilder 1086 if (cs instanceof AbstractStringBuilder) { 1087 if (cs instanceof StringBuffer) { 1088 synchronized(cs) { 1089 return nonSyncContentEquals((AbstractStringBuilder)cs); 1090 } 1091 } else { 1092 return nonSyncContentEquals((AbstractStringBuilder)cs); 1093 } 1094 } 1095 // Argument is a String 1096 if (cs instanceof String) { 1097 return equals(cs); 1098 } 1099 // Argument is a generic CharSequence 1100 int n = cs.length(); 1101 if (n != length()) { 1102 return false; 1103 } 1104 byte[] val = this.value; 1105 if (isLatin1()) { 1106 for (int i = 0; i < n; i++) { 1107 if ((val[i] & 0xff) != cs.charAt(i)) { 1108 return false; 1109 } 1110 } 1111 } else { 1112 if (!StringUTF16.contentEquals(val, cs, n)) { 1113 return false; 1114 } 1115 } 1116 return true; 1117 } 1118 1119 /** 1120 * Compares this {@code String} to another {@code String}, ignoring case 1121 * considerations. Two strings are considered equal ignoring case if they 1122 * are of the same length and corresponding characters in the two strings 1123 * are equal ignoring case. 1124 * 1125 * <p> Two characters {@code c1} and {@code c2} are considered the same 1126 * ignoring case if at least one of the following is true: 1127 * <ul> 1128 * <li> The two characters are the same (as compared by the 1129 * {@code ==} operator) 1130 * <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))} 1131 * on each character produces the same result 1132 * </ul> 1133 * 1134 * <p>Note that this method does <em>not</em> take locale into account, and 1135 * will result in unsatisfactory results for certain locales. The 1136 * {@link java.text.Collator} class provides locale-sensitive comparison. 1137 * 1138 * @param anotherString 1139 * The {@code String} to compare this {@code String} against 1140 * 1141 * @return {@code true} if the argument is not {@code null} and it 1142 * represents an equivalent {@code String} ignoring case; {@code 1143 * false} otherwise 1144 * 1145 * @see #equals(Object) 1146 */ 1147 public boolean equalsIgnoreCase(String anotherString) { 1148 return (this == anotherString) ? true 1149 : (anotherString != null) 1150 && (anotherString.length() == length()) 1151 && regionMatches(true, 0, anotherString, 0, length()); 1152 } 1153 1154 /** 1155 * Compares two strings lexicographically. 1156 * The comparison is based on the Unicode value of each character in 1157 * the strings. The character sequence represented by this 1158 * {@code String} object is compared lexicographically to the 1159 * character sequence represented by the argument string. The result is 1160 * a negative integer if this {@code String} object 1161 * lexicographically precedes the argument string. The result is a 1162 * positive integer if this {@code String} object lexicographically 1163 * follows the argument string. The result is zero if the strings 1164 * are equal; {@code compareTo} returns {@code 0} exactly when 1165 * the {@link #equals(Object)} method would return {@code true}. 1166 * <p> 1167 * This is the definition of lexicographic ordering. If two strings are 1168 * different, then either they have different characters at some index 1169 * that is a valid index for both strings, or their lengths are different, 1170 * or both. If they have different characters at one or more index 1171 * positions, let <i>k</i> be the smallest such index; then the string 1172 * whose character at position <i>k</i> has the smaller value, as 1173 * determined by using the {@code <} operator, lexicographically precedes the 1174 * other string. In this case, {@code compareTo} returns the 1175 * difference of the two character values at position {@code k} in 1176 * the two string -- that is, the value: 1177 * <blockquote><pre> 1178 * this.charAt(k)-anotherString.charAt(k) 1179 * </pre></blockquote> 1180 * If there is no index position at which they differ, then the shorter 1181 * string lexicographically precedes the longer string. In this case, 1182 * {@code compareTo} returns the difference of the lengths of the 1183 * strings -- that is, the value: 1184 * <blockquote><pre> 1185 * this.length()-anotherString.length() 1186 * </pre></blockquote> 1187 * 1188 * <p>For finer-grained String comparison, refer to 1189 * {@link java.text.Collator}. 1190 * 1191 * @param anotherString the {@code String} to be compared. 1192 * @return the value {@code 0} if the argument string is equal to 1193 * this string; a value less than {@code 0} if this string 1194 * is lexicographically less than the string argument; and a 1195 * value greater than {@code 0} if this string is 1196 * lexicographically greater than the string argument. 1197 */ 1198 public int compareTo(String anotherString) { 1199 byte v1[] = value; 1200 byte v2[] = anotherString.value; 1201 if (coder() == anotherString.coder()) { 1202 return isLatin1() ? StringLatin1.compareTo(v1, v2) 1203 : StringUTF16.compareTo(v1, v2); 1204 } 1205 return isLatin1() ? StringLatin1.compareToUTF16(v1, v2) 1206 : StringUTF16.compareToLatin1(v1, v2); 1207 } 1208 1209 /** 1210 * A Comparator that orders {@code String} objects as by 1211 * {@code compareToIgnoreCase}. This comparator is serializable. 1212 * <p> 1213 * Note that this Comparator does <em>not</em> take locale into account, 1214 * and will result in an unsatisfactory ordering for certain locales. 1215 * The {@link java.text.Collator} class provides locale-sensitive comparison. 1216 * 1217 * @see java.text.Collator 1218 * @since 1.2 1219 */ 1220 public static final Comparator<String> CASE_INSENSITIVE_ORDER 1221 = new CaseInsensitiveComparator(); 1222 private static class CaseInsensitiveComparator 1223 implements Comparator<String>, java.io.Serializable { 1224 // use serialVersionUID from JDK 1.2.2 for interoperability 1225 private static final long serialVersionUID = 8575799808933029326L; 1226 1227 public int compare(String s1, String s2) { 1228 byte v1[] = s1.value; 1229 byte v2[] = s2.value; 1230 if (s1.coder() == s2.coder()) { 1231 return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2) 1232 : StringUTF16.compareToCI(v1, v2); 1233 } 1234 return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2) 1235 : StringUTF16.compareToCI_Latin1(v1, v2); 1236 } 1237 1238 /** Replaces the de-serialized object. */ 1239 private Object readResolve() { return CASE_INSENSITIVE_ORDER; } 1240 } 1241 1242 /** 1243 * Compares two strings lexicographically, ignoring case 1244 * differences. This method returns an integer whose sign is that of 1245 * calling {@code compareTo} with normalized versions of the strings 1246 * where case differences have been eliminated by calling 1247 * {@code Character.toLowerCase(Character.toUpperCase(character))} on 1248 * each character. 1249 * <p> 1250 * Note that this method does <em>not</em> take locale into account, 1251 * and will result in an unsatisfactory ordering for certain locales. 1252 * The {@link java.text.Collator} class provides locale-sensitive comparison. 1253 * 1254 * @param str the {@code String} to be compared. 1255 * @return a negative integer, zero, or a positive integer as the 1256 * specified String is greater than, equal to, or less 1257 * than this String, ignoring case considerations. 1258 * @see java.text.Collator 1259 * @since 1.2 1260 */ 1261 public int compareToIgnoreCase(String str) { 1262 return CASE_INSENSITIVE_ORDER.compare(this, str); 1263 } 1264 1265 /** 1266 * Tests if two string regions are equal. 1267 * <p> 1268 * A substring of this {@code String} object is compared to a substring 1269 * of the argument other. The result is true if these substrings 1270 * represent identical character sequences. The substring of this 1271 * {@code String} object to be compared begins at index {@code toffset} 1272 * and has length {@code len}. The substring of other to be compared 1273 * begins at index {@code ooffset} and has length {@code len}. The 1274 * result is {@code false} if and only if at least one of the following 1275 * is true: 1276 * <ul><li>{@code toffset} is negative. 1277 * <li>{@code ooffset} is negative. 1278 * <li>{@code toffset+len} is greater than the length of this 1279 * {@code String} object. 1280 * <li>{@code ooffset+len} is greater than the length of the other 1281 * argument. 1282 * <li>There is some nonnegative integer <i>k</i> less than {@code len} 1283 * such that: 1284 * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + } 1285 * <i>k</i>{@code )} 1286 * </ul> 1287 * 1288 * <p>Note that this method does <em>not</em> take locale into account. The 1289 * {@link java.text.Collator} class provides locale-sensitive comparison. 1290 * 1291 * @param toffset the starting offset of the subregion in this string. 1292 * @param other the string argument. 1293 * @param ooffset the starting offset of the subregion in the string 1294 * argument. 1295 * @param len the number of characters to compare. 1296 * @return {@code true} if the specified subregion of this string 1297 * exactly matches the specified subregion of the string argument; 1298 * {@code false} otherwise. 1299 */ 1300 public boolean regionMatches(int toffset, String other, int ooffset, int len) { 1301 byte tv[] = value; 1302 byte ov[] = other.value; 1303 // Note: toffset, ooffset, or len might be near -1>>>1. 1304 if ((ooffset < 0) || (toffset < 0) || 1305 (toffset > (long)length() - len) || 1306 (ooffset > (long)other.length() - len)) { 1307 return false; 1308 } 1309 if (coder() == other.coder()) { 1310 if (!isLatin1() && (len > 0)) { 1311 toffset = toffset << 1; 1312 ooffset = ooffset << 1; 1313 len = len << 1; 1314 } 1315 while (len-- > 0) { 1316 if (tv[toffset++] != ov[ooffset++]) { 1317 return false; 1318 } 1319 } 1320 } else { 1321 if (coder() == LATIN1) { 1322 while (len-- > 0) { 1323 if (StringLatin1.getChar(tv, toffset++) != 1324 StringUTF16.getChar(ov, ooffset++)) { 1325 return false; 1326 } 1327 } 1328 } else { 1329 while (len-- > 0) { 1330 if (StringUTF16.getChar(tv, toffset++) != 1331 StringLatin1.getChar(ov, ooffset++)) { 1332 return false; 1333 } 1334 } 1335 } 1336 } 1337 return true; 1338 } 1339 1340 /** 1341 * Tests if two string regions are equal. 1342 * <p> 1343 * A substring of this {@code String} object is compared to a substring 1344 * of the argument {@code other}. The result is {@code true} if these 1345 * substrings represent character sequences that are the same, ignoring 1346 * case if and only if {@code ignoreCase} is true. The substring of 1347 * this {@code String} object to be compared begins at index 1348 * {@code toffset} and has length {@code len}. The substring of 1349 * {@code other} to be compared begins at index {@code ooffset} and 1350 * has length {@code len}. The result is {@code false} if and only if 1351 * at least one of the following is true: 1352 * <ul><li>{@code toffset} is negative. 1353 * <li>{@code ooffset} is negative. 1354 * <li>{@code toffset+len} is greater than the length of this 1355 * {@code String} object. 1356 * <li>{@code ooffset+len} is greater than the length of the other 1357 * argument. 1358 * <li>{@code ignoreCase} is {@code false} and there is some nonnegative 1359 * integer <i>k</i> less than {@code len} such that: 1360 * <blockquote><pre> 1361 * this.charAt(toffset+k) != other.charAt(ooffset+k) 1362 * </pre></blockquote> 1363 * <li>{@code ignoreCase} is {@code true} and there is some nonnegative 1364 * integer <i>k</i> less than {@code len} such that: 1365 * <blockquote><pre> 1366 * Character.toLowerCase(Character.toUpperCase(this.charAt(toffset+k))) != 1367 Character.toLowerCase(Character.toUpperCase(other.charAt(ooffset+k))) 1368 * </pre></blockquote> 1369 * </ul> 1370 * 1371 * <p>Note that this method does <em>not</em> take locale into account, 1372 * and will result in unsatisfactory results for certain locales when 1373 * {@code ignoreCase} is {@code true}. The {@link java.text.Collator} class 1374 * provides locale-sensitive comparison. 1375 * 1376 * @param ignoreCase if {@code true}, ignore case when comparing 1377 * characters. 1378 * @param toffset the starting offset of the subregion in this 1379 * string. 1380 * @param other the string argument. 1381 * @param ooffset the starting offset of the subregion in the string 1382 * argument. 1383 * @param len the number of characters to compare. 1384 * @return {@code true} if the specified subregion of this string 1385 * matches the specified subregion of the string argument; 1386 * {@code false} otherwise. Whether the matching is exact 1387 * or case insensitive depends on the {@code ignoreCase} 1388 * argument. 1389 */ 1390 public boolean regionMatches(boolean ignoreCase, int toffset, 1391 String other, int ooffset, int len) { 1392 if (!ignoreCase) { 1393 return regionMatches(toffset, other, ooffset, len); 1394 } 1395 // Note: toffset, ooffset, or len might be near -1>>>1. 1396 if ((ooffset < 0) || (toffset < 0) 1397 || (toffset > (long)length() - len) 1398 || (ooffset > (long)other.length() - len)) { 1399 return false; 1400 } 1401 byte tv[] = value; 1402 byte ov[] = other.value; 1403 if (coder() == other.coder()) { 1404 return isLatin1() 1405 ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len) 1406 : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len); 1407 } 1408 return isLatin1() 1409 ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len) 1410 : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len); 1411 } 1412 1413 /** 1414 * Tests if the substring of this string beginning at the 1415 * specified index starts with the specified prefix. 1416 * 1417 * @param prefix the prefix. 1418 * @param toffset where to begin looking in this string. 1419 * @return {@code true} if the character sequence represented by the 1420 * argument is a prefix of the substring of this object starting 1421 * at index {@code toffset}; {@code false} otherwise. 1422 * The result is {@code false} if {@code toffset} is 1423 * negative or greater than the length of this 1424 * {@code String} object; otherwise the result is the same 1425 * as the result of the expression 1426 * <pre> 1427 * this.substring(toffset).startsWith(prefix) 1428 * </pre> 1429 */ 1430 public boolean startsWith(String prefix, int toffset) { 1431 // Note: toffset might be near -1>>>1. 1432 if (toffset < 0 || toffset > length() - prefix.length()) { 1433 return false; 1434 } 1435 byte ta[] = value; 1436 byte pa[] = prefix.value; 1437 int po = 0; 1438 int pc = pa.length; 1439 if (coder() == prefix.coder()) { 1440 int to = isLatin1() ? toffset : toffset << 1; 1441 while (po < pc) { 1442 if (ta[to++] != pa[po++]) { 1443 return false; 1444 } 1445 } 1446 } else { 1447 if (isLatin1()) { // && pcoder == UTF16 1448 return false; 1449 } 1450 // coder == UTF16 && pcoder == LATIN1) 1451 while (po < pc) { 1452 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) { 1453 return false; 1454 } 1455 } 1456 } 1457 return true; 1458 } 1459 1460 /** 1461 * Tests if this string starts with the specified prefix. 1462 * 1463 * @param prefix the prefix. 1464 * @return {@code true} if the character sequence represented by the 1465 * argument is a prefix of the character sequence represented by 1466 * this string; {@code false} otherwise. 1467 * Note also that {@code true} will be returned if the 1468 * argument is an empty string or is equal to this 1469 * {@code String} object as determined by the 1470 * {@link #equals(Object)} method. 1471 * @since 1.0 1472 */ 1473 public boolean startsWith(String prefix) { 1474 return startsWith(prefix, 0); 1475 } 1476 1477 /** 1478 * Tests if this string ends with the specified suffix. 1479 * 1480 * @param suffix the suffix. 1481 * @return {@code true} if the character sequence represented by the 1482 * argument is a suffix of the character sequence represented by 1483 * this object; {@code false} otherwise. Note that the 1484 * result will be {@code true} if the argument is the 1485 * empty string or is equal to this {@code String} object 1486 * as determined by the {@link #equals(Object)} method. 1487 */ 1488 public boolean endsWith(String suffix) { 1489 return startsWith(suffix, length() - suffix.length()); 1490 } 1491 1492 /** 1493 * Returns a hash code for this string. The hash code for a 1494 * {@code String} object is computed as 1495 * <blockquote><pre> 1496 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] 1497 * </pre></blockquote> 1498 * using {@code int} arithmetic, where {@code s[i]} is the 1499 * <i>i</i>th character of the string, {@code n} is the length of 1500 * the string, and {@code ^} indicates exponentiation. 1501 * (The hash value of the empty string is zero.) 1502 * 1503 * @return a hash code value for this object. 1504 */ 1505 public int hashCode() { 1506 int h = hash; 1507 if (h == 0 && value.length > 0) { 1508 hash = h = isLatin1() ? StringLatin1.hashCode(value) 1509 : StringUTF16.hashCode(value); 1510 } 1511 return h; 1512 } 1513 1514 /** 1515 * Returns the index within this string of the first occurrence of 1516 * the specified character. If a character with value 1517 * {@code ch} occurs in the character sequence represented by 1518 * this {@code String} object, then the index (in Unicode 1519 * code units) of the first such occurrence is returned. For 1520 * values of {@code ch} in the range from 0 to 0xFFFF 1521 * (inclusive), this is the smallest value <i>k</i> such that: 1522 * <blockquote><pre> 1523 * this.charAt(<i>k</i>) == ch 1524 * </pre></blockquote> 1525 * is true. For other values of {@code ch}, it is the 1526 * smallest value <i>k</i> such that: 1527 * <blockquote><pre> 1528 * this.codePointAt(<i>k</i>) == ch 1529 * </pre></blockquote> 1530 * is true. In either case, if no such character occurs in this 1531 * string, then {@code -1} is returned. 1532 * 1533 * @param ch a character (Unicode code point). 1534 * @return the index of the first occurrence of the character in the 1535 * character sequence represented by this object, or 1536 * {@code -1} if the character does not occur. 1537 */ 1538 public int indexOf(int ch) { 1539 return indexOf(ch, 0); 1540 } 1541 1542 /** 1543 * Returns the index within this string of the first occurrence of the 1544 * specified character, starting the search at the specified index. 1545 * <p> 1546 * If a character with value {@code ch} occurs in the 1547 * character sequence represented by this {@code String} 1548 * object at an index no smaller than {@code fromIndex}, then 1549 * the index of the first such occurrence is returned. For values 1550 * of {@code ch} in the range from 0 to 0xFFFF (inclusive), 1551 * this is the smallest value <i>k</i> such that: 1552 * <blockquote><pre> 1553 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1554 * </pre></blockquote> 1555 * is true. For other values of {@code ch}, it is the 1556 * smallest value <i>k</i> such that: 1557 * <blockquote><pre> 1558 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1559 * </pre></blockquote> 1560 * is true. In either case, if no such character occurs in this 1561 * string at or after position {@code fromIndex}, then 1562 * {@code -1} is returned. 1563 * 1564 * <p> 1565 * There is no restriction on the value of {@code fromIndex}. If it 1566 * is negative, it has the same effect as if it were zero: this entire 1567 * string may be searched. If it is greater than the length of this 1568 * string, it has the same effect as if it were equal to the length of 1569 * this string: {@code -1} is returned. 1570 * 1571 * <p>All indices are specified in {@code char} values 1572 * (Unicode code units). 1573 * 1574 * @param ch a character (Unicode code point). 1575 * @param fromIndex the index to start the search from. 1576 * @return the index of the first occurrence of the character in the 1577 * character sequence represented by this object that is greater 1578 * than or equal to {@code fromIndex}, or {@code -1} 1579 * if the character does not occur. 1580 */ 1581 public int indexOf(int ch, int fromIndex) { 1582 return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex) 1583 : StringUTF16.indexOf(value, ch, fromIndex); 1584 } 1585 1586 /** 1587 * Returns the index within this string of the last occurrence of 1588 * the specified character. For values of {@code ch} in the 1589 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code 1590 * units) returned is the largest value <i>k</i> such that: 1591 * <blockquote><pre> 1592 * this.charAt(<i>k</i>) == ch 1593 * </pre></blockquote> 1594 * is true. For other values of {@code ch}, it is the 1595 * largest value <i>k</i> such that: 1596 * <blockquote><pre> 1597 * this.codePointAt(<i>k</i>) == ch 1598 * </pre></blockquote> 1599 * is true. In either case, if no such character occurs in this 1600 * string, then {@code -1} is returned. The 1601 * {@code String} is searched backwards starting at the last 1602 * character. 1603 * 1604 * @param ch a character (Unicode code point). 1605 * @return the index of the last occurrence of the character in the 1606 * character sequence represented by this object, or 1607 * {@code -1} if the character does not occur. 1608 */ 1609 public int lastIndexOf(int ch) { 1610 return lastIndexOf(ch, length() - 1); 1611 } 1612 1613 /** 1614 * Returns the index within this string of the last occurrence of 1615 * the specified character, searching backward starting at the 1616 * specified index. For values of {@code ch} in the range 1617 * from 0 to 0xFFFF (inclusive), the index returned is the largest 1618 * value <i>k</i> such that: 1619 * <blockquote><pre> 1620 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1621 * </pre></blockquote> 1622 * is true. For other values of {@code ch}, it is the 1623 * largest value <i>k</i> such that: 1624 * <blockquote><pre> 1625 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1626 * </pre></blockquote> 1627 * is true. In either case, if no such character occurs in this 1628 * string at or before position {@code fromIndex}, then 1629 * {@code -1} is returned. 1630 * 1631 * <p>All indices are specified in {@code char} values 1632 * (Unicode code units). 1633 * 1634 * @param ch a character (Unicode code point). 1635 * @param fromIndex the index to start the search from. There is no 1636 * restriction on the value of {@code fromIndex}. If it is 1637 * greater than or equal to the length of this string, it has 1638 * the same effect as if it were equal to one less than the 1639 * length of this string: this entire string may be searched. 1640 * If it is negative, it has the same effect as if it were -1: 1641 * -1 is returned. 1642 * @return the index of the last occurrence of the character in the 1643 * character sequence represented by this object that is less 1644 * than or equal to {@code fromIndex}, or {@code -1} 1645 * if the character does not occur before that point. 1646 */ 1647 public int lastIndexOf(int ch, int fromIndex) { 1648 return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex) 1649 : StringUTF16.lastIndexOf(value, ch, fromIndex); 1650 } 1651 1652 /** 1653 * Returns the index within this string of the first occurrence of the 1654 * specified substring. 1655 * 1656 * <p>The returned index is the smallest value {@code k} for which: 1657 * <pre>{@code 1658 * this.startsWith(str, k) 1659 * }</pre> 1660 * If no such value of {@code k} exists, then {@code -1} is returned. 1661 * 1662 * @param str the substring to search for. 1663 * @return the index of the first occurrence of the specified substring, 1664 * or {@code -1} if there is no such occurrence. 1665 */ 1666 public int indexOf(String str) { 1667 if (coder() == str.coder()) { 1668 return isLatin1() ? StringLatin1.indexOf(value, str.value) 1669 : StringUTF16.indexOf(value, str.value); 1670 } 1671 if (coder() == LATIN1) { // str.coder == UTF16 1672 return -1; 1673 } 1674 return StringUTF16.indexOfLatin1(value, str.value); 1675 } 1676 1677 /** 1678 * Returns the index within this string of the first occurrence of the 1679 * specified substring, starting at the specified index. 1680 * 1681 * <p>The returned index is the smallest value {@code k} for which: 1682 * <pre>{@code 1683 * k >= Math.min(fromIndex, this.length()) && 1684 * this.startsWith(str, k) 1685 * }</pre> 1686 * If no such value of {@code k} exists, then {@code -1} is returned. 1687 * 1688 * @param str the substring to search for. 1689 * @param fromIndex the index from which to start the search. 1690 * @return the index of the first occurrence of the specified substring, 1691 * starting at the specified index, 1692 * or {@code -1} if there is no such occurrence. 1693 */ 1694 public int indexOf(String str, int fromIndex) { 1695 return indexOf(value, coder(), length(), str, fromIndex); 1696 } 1697 1698 /** 1699 * Code shared by String and AbstractStringBuilder to do searches. The 1700 * source is the character array being searched, and the target 1701 * is the string being searched for. 1702 * 1703 * @param src the characters being searched. 1704 * @param srcCoder the coder of the source string. 1705 * @param srcCount length of the source string. 1706 * @param tgtStr the characters being searched for. 1707 * @param fromIndex the index to begin searching from. 1708 */ 1709 static int indexOf(byte[] src, byte srcCoder, int srcCount, 1710 String tgtStr, int fromIndex) { 1711 byte[] tgt = tgtStr.value; 1712 byte tgtCoder = tgtStr.coder(); 1713 int tgtCount = tgtStr.length(); 1714 1715 if (fromIndex >= srcCount) { 1716 return (tgtCount == 0 ? srcCount : -1); 1717 } 1718 if (fromIndex < 0) { 1719 fromIndex = 0; 1720 } 1721 if (tgtCount == 0) { 1722 return fromIndex; 1723 } 1724 if (tgtCount > srcCount) { 1725 return -1; 1726 } 1727 if (srcCoder == tgtCoder) { 1728 return srcCoder == LATIN1 1729 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex) 1730 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex); 1731 } 1732 if (srcCoder == LATIN1) { // && tgtCoder == UTF16 1733 return -1; 1734 } 1735 // srcCoder == UTF16 && tgtCoder == LATIN1) { 1736 return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex); 1737 } 1738 1739 /** 1740 * Returns the index within this string of the last occurrence of the 1741 * specified substring. The last occurrence of the empty string "" 1742 * is considered to occur at the index value {@code this.length()}. 1743 * 1744 * <p>The returned index is the largest value {@code k} for which: 1745 * <pre>{@code 1746 * this.startsWith(str, k) 1747 * }</pre> 1748 * If no such value of {@code k} exists, then {@code -1} is returned. 1749 * 1750 * @param str the substring to search for. 1751 * @return the index of the last occurrence of the specified substring, 1752 * or {@code -1} if there is no such occurrence. 1753 */ 1754 public int lastIndexOf(String str) { 1755 return lastIndexOf(str, length()); 1756 } 1757 1758 /** 1759 * Returns the index within this string of the last occurrence of the 1760 * specified substring, searching backward starting at the specified index. 1761 * 1762 * <p>The returned index is the largest value {@code k} for which: 1763 * <pre>{@code 1764 * k <= Math.min(fromIndex, this.length()) && 1765 * this.startsWith(str, k) 1766 * }</pre> 1767 * If no such value of {@code k} exists, then {@code -1} is returned. 1768 * 1769 * @param str the substring to search for. 1770 * @param fromIndex the index to start the search from. 1771 * @return the index of the last occurrence of the specified substring, 1772 * searching backward from the specified index, 1773 * or {@code -1} if there is no such occurrence. 1774 */ 1775 public int lastIndexOf(String str, int fromIndex) { 1776 return lastIndexOf(value, coder(), length(), str, fromIndex); 1777 } 1778 1779 /** 1780 * Code shared by String and AbstractStringBuilder to do searches. The 1781 * source is the character array being searched, and the target 1782 * is the string being searched for. 1783 * 1784 * @param src the characters being searched. 1785 * @param srcCoder coder handles the mapping between bytes/chars 1786 * @param srcCount count of the source string. 1787 * @param tgt the characters being searched for. 1788 * @param fromIndex the index to begin searching from. 1789 */ 1790 static int lastIndexOf(byte[] src, byte srcCoder, int srcCount, 1791 String tgtStr, int fromIndex) { 1792 byte[] tgt = tgtStr.value; 1793 byte tgtCoder = tgtStr.coder(); 1794 int tgtCount = tgtStr.length(); 1795 /* 1796 * Check arguments; return immediately where possible. For 1797 * consistency, don't check for null str. 1798 */ 1799 int rightIndex = srcCount - tgtCount; 1800 if (fromIndex > rightIndex) { 1801 fromIndex = rightIndex; 1802 } 1803 if (fromIndex < 0) { 1804 return -1; 1805 } 1806 /* Empty string always matches. */ 1807 if (tgtCount == 0) { 1808 return fromIndex; 1809 } 1810 if (srcCoder == tgtCoder) { 1811 return srcCoder == LATIN1 1812 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex) 1813 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex); 1814 } 1815 if (srcCoder == LATIN1) { // && tgtCoder == UTF16 1816 return -1; 1817 } 1818 // srcCoder == UTF16 && tgtCoder == LATIN1 1819 return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex); 1820 } 1821 1822 /** 1823 * Returns a string that is a substring of this string. The 1824 * substring begins with the character at the specified index and 1825 * extends to the end of this string. <p> 1826 * Examples: 1827 * <blockquote><pre> 1828 * "unhappy".substring(2) returns "happy" 1829 * "Harbison".substring(3) returns "bison" 1830 * "emptiness".substring(9) returns "" (an empty string) 1831 * </pre></blockquote> 1832 * 1833 * @param beginIndex the beginning index, inclusive. 1834 * @return the specified substring. 1835 * @exception IndexOutOfBoundsException if 1836 * {@code beginIndex} is negative or larger than the 1837 * length of this {@code String} object. 1838 */ 1839 public String substring(int beginIndex) { 1840 if (beginIndex < 0) { 1841 throw new StringIndexOutOfBoundsException(beginIndex); 1842 } 1843 int subLen = length() - beginIndex; 1844 if (subLen < 0) { 1845 throw new StringIndexOutOfBoundsException(subLen); 1846 } 1847 if (beginIndex == 0) { 1848 return this; 1849 } 1850 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen) 1851 : StringUTF16.newString(value, beginIndex, subLen); 1852 } 1853 1854 /** 1855 * Returns a string that is a substring of this string. The 1856 * substring begins at the specified {@code beginIndex} and 1857 * extends to the character at index {@code endIndex - 1}. 1858 * Thus the length of the substring is {@code endIndex-beginIndex}. 1859 * <p> 1860 * Examples: 1861 * <blockquote><pre> 1862 * "hamburger".substring(4, 8) returns "urge" 1863 * "smiles".substring(1, 5) returns "mile" 1864 * </pre></blockquote> 1865 * 1866 * @param beginIndex the beginning index, inclusive. 1867 * @param endIndex the ending index, exclusive. 1868 * @return the specified substring. 1869 * @exception IndexOutOfBoundsException if the 1870 * {@code beginIndex} is negative, or 1871 * {@code endIndex} is larger than the length of 1872 * this {@code String} object, or 1873 * {@code beginIndex} is larger than 1874 * {@code endIndex}. 1875 */ 1876 public String substring(int beginIndex, int endIndex) { 1877 int length = length(); 1878 checkBoundsBeginEnd(beginIndex, endIndex, length); 1879 int subLen = endIndex - beginIndex; 1880 if (beginIndex == 0 && endIndex == length) { 1881 return this; 1882 } 1883 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen) 1884 : StringUTF16.newString(value, beginIndex, subLen); 1885 } 1886 1887 /** 1888 * Returns a character sequence that is a subsequence of this sequence. 1889 * 1890 * <p> An invocation of this method of the form 1891 * 1892 * <blockquote><pre> 1893 * str.subSequence(begin, end)</pre></blockquote> 1894 * 1895 * behaves in exactly the same way as the invocation 1896 * 1897 * <blockquote><pre> 1898 * str.substring(begin, end)</pre></blockquote> 1899 * 1900 * @apiNote 1901 * This method is defined so that the {@code String} class can implement 1902 * the {@link CharSequence} interface. 1903 * 1904 * @param beginIndex the begin index, inclusive. 1905 * @param endIndex the end index, exclusive. 1906 * @return the specified subsequence. 1907 * 1908 * @throws IndexOutOfBoundsException 1909 * if {@code beginIndex} or {@code endIndex} is negative, 1910 * if {@code endIndex} is greater than {@code length()}, 1911 * or if {@code beginIndex} is greater than {@code endIndex} 1912 * 1913 * @since 1.4 1914 * @spec JSR-51 1915 */ 1916 public CharSequence subSequence(int beginIndex, int endIndex) { 1917 return this.substring(beginIndex, endIndex); 1918 } 1919 1920 /** 1921 * Concatenates the specified string to the end of this string. 1922 * <p> 1923 * If the length of the argument string is {@code 0}, then this 1924 * {@code String} object is returned. Otherwise, a 1925 * {@code String} object is returned that represents a character 1926 * sequence that is the concatenation of the character sequence 1927 * represented by this {@code String} object and the character 1928 * sequence represented by the argument string.<p> 1929 * Examples: 1930 * <blockquote><pre> 1931 * "cares".concat("s") returns "caress" 1932 * "to".concat("get").concat("her") returns "together" 1933 * </pre></blockquote> 1934 * 1935 * @param str the {@code String} that is concatenated to the end 1936 * of this {@code String}. 1937 * @return a string that represents the concatenation of this object's 1938 * characters followed by the string argument's characters. 1939 */ 1940 public String concat(String str) { 1941 int olen = str.length(); 1942 if (olen == 0) { 1943 return this; 1944 } 1945 if (coder() == str.coder()) { 1946 byte[] val = this.value; 1947 byte[] oval = str.value; 1948 int len = val.length + oval.length; 1949 byte[] buf = Arrays.copyOf(val, len); 1950 System.arraycopy(oval, 0, buf, val.length, oval.length); 1951 return new String(buf, coder); 1952 } 1953 int len = length(); 1954 byte[] buf = StringUTF16.newBytesFor(len + olen); 1955 getBytes(buf, 0, UTF16); 1956 str.getBytes(buf, len, UTF16); 1957 return new String(buf, UTF16); 1958 } 1959 1960 /** 1961 * Returns a string resulting from replacing all occurrences of 1962 * {@code oldChar} in this string with {@code newChar}. 1963 * <p> 1964 * If the character {@code oldChar} does not occur in the 1965 * character sequence represented by this {@code String} object, 1966 * then a reference to this {@code String} object is returned. 1967 * Otherwise, a {@code String} object is returned that 1968 * represents a character sequence identical to the character sequence 1969 * represented by this {@code String} object, except that every 1970 * occurrence of {@code oldChar} is replaced by an occurrence 1971 * of {@code newChar}. 1972 * <p> 1973 * Examples: 1974 * <blockquote><pre> 1975 * "mesquite in your cellar".replace('e', 'o') 1976 * returns "mosquito in your collar" 1977 * "the war of baronets".replace('r', 'y') 1978 * returns "the way of bayonets" 1979 * "sparring with a purple porpoise".replace('p', 't') 1980 * returns "starring with a turtle tortoise" 1981 * "JonL".replace('q', 'x') returns "JonL" (no change) 1982 * </pre></blockquote> 1983 * 1984 * @param oldChar the old character. 1985 * @param newChar the new character. 1986 * @return a string derived from this string by replacing every 1987 * occurrence of {@code oldChar} with {@code newChar}. 1988 */ 1989 public String replace(char oldChar, char newChar) { 1990 if (oldChar != newChar) { 1991 String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar) 1992 : StringUTF16.replace(value, oldChar, newChar); 1993 if (ret != null) { 1994 return ret; 1995 } 1996 } 1997 return this; 1998 } 1999 2000 /** 2001 * Tells whether or not this string matches the given <a 2002 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2003 * 2004 * <p> An invocation of this method of the form 2005 * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the 2006 * same result as the expression 2007 * 2008 * <blockquote> 2009 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence) 2010 * matches(<i>regex</i>, <i>str</i>)} 2011 * </blockquote> 2012 * 2013 * @param regex 2014 * the regular expression to which this string is to be matched 2015 * 2016 * @return {@code true} if, and only if, this string matches the 2017 * given regular expression 2018 * 2019 * @throws PatternSyntaxException 2020 * if the regular expression's syntax is invalid 2021 * 2022 * @see java.util.regex.Pattern 2023 * 2024 * @since 1.4 2025 * @spec JSR-51 2026 */ 2027 public boolean matches(String regex) { 2028 return Pattern.matches(regex, this); 2029 } 2030 2031 /** 2032 * Returns true if and only if this string contains the specified 2033 * sequence of char values. 2034 * 2035 * @param s the sequence to search for 2036 * @return true if this string contains {@code s}, false otherwise 2037 * @since 1.5 2038 */ 2039 public boolean contains(CharSequence s) { 2040 return indexOf(s.toString()) >= 0; 2041 } 2042 2043 /** 2044 * Replaces the first substring of this string that matches the given <a 2045 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2046 * given replacement. 2047 * 2048 * <p> An invocation of this method of the form 2049 * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2050 * yields exactly the same result as the expression 2051 * 2052 * <blockquote> 2053 * <code> 2054 * {@link java.util.regex.Pattern}.{@link 2055 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2056 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2057 * java.util.regex.Matcher#replaceFirst replaceFirst}(<i>repl</i>) 2058 * </code> 2059 * </blockquote> 2060 * 2061 *<p> 2062 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2063 * replacement string may cause the results to be different than if it were 2064 * being treated as a literal replacement string; see 2065 * {@link java.util.regex.Matcher#replaceFirst}. 2066 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2067 * meaning of these characters, if desired. 2068 * 2069 * @param regex 2070 * the regular expression to which this string is to be matched 2071 * @param replacement 2072 * the string to be substituted for the first match 2073 * 2074 * @return The resulting {@code String} 2075 * 2076 * @throws PatternSyntaxException 2077 * if the regular expression's syntax is invalid 2078 * 2079 * @see java.util.regex.Pattern 2080 * 2081 * @since 1.4 2082 * @spec JSR-51 2083 */ 2084 public String replaceFirst(String regex, String replacement) { 2085 return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 2086 } 2087 2088 /** 2089 * Replaces each substring of this string that matches the given <a 2090 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2091 * given replacement. 2092 * 2093 * <p> An invocation of this method of the form 2094 * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2095 * yields exactly the same result as the expression 2096 * 2097 * <blockquote> 2098 * <code> 2099 * {@link java.util.regex.Pattern}.{@link 2100 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2101 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2102 * java.util.regex.Matcher#replaceAll replaceAll}(<i>repl</i>) 2103 * </code> 2104 * </blockquote> 2105 * 2106 *<p> 2107 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2108 * replacement string may cause the results to be different than if it were 2109 * being treated as a literal replacement string; see 2110 * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}. 2111 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2112 * meaning of these characters, if desired. 2113 * 2114 * @param regex 2115 * the regular expression to which this string is to be matched 2116 * @param replacement 2117 * the string to be substituted for each match 2118 * 2119 * @return The resulting {@code String} 2120 * 2121 * @throws PatternSyntaxException 2122 * if the regular expression's syntax is invalid 2123 * 2124 * @see java.util.regex.Pattern 2125 * 2126 * @since 1.4 2127 * @spec JSR-51 2128 */ 2129 public String replaceAll(String regex, String replacement) { 2130 return Pattern.compile(regex).matcher(this).replaceAll(replacement); 2131 } 2132 2133 /** 2134 * Replaces each substring of this string that matches the literal target 2135 * sequence with the specified literal replacement sequence. The 2136 * replacement proceeds from the beginning of the string to the end, for 2137 * example, replacing "aa" with "b" in the string "aaa" will result in 2138 * "ba" rather than "ab". 2139 * 2140 * @param target The sequence of char values to be replaced 2141 * @param replacement The replacement sequence of char values 2142 * @return The resulting string 2143 * @since 1.5 2144 */ 2145 public String replace(CharSequence target, CharSequence replacement) { 2146 String tgtStr = target.toString(); 2147 String replStr = replacement.toString(); 2148 int j = indexOf(tgtStr); 2149 if (j < 0) { 2150 return this; 2151 } 2152 int tgtLen = tgtStr.length(); 2153 int tgtLen1 = Math.max(tgtLen, 1); 2154 int thisLen = length(); 2155 2156 int newLenHint = thisLen - tgtLen + replStr.length(); 2157 if (newLenHint < 0) { 2158 throw new OutOfMemoryError(); 2159 } 2160 StringBuilder sb = new StringBuilder(newLenHint); 2161 int i = 0; 2162 do { 2163 sb.append(this, i, j).append(replStr); 2164 i = j + tgtLen; 2165 } while (j < thisLen && (j = indexOf(tgtStr, j + tgtLen1)) > 0); 2166 return sb.append(this, i, thisLen).toString(); 2167 } 2168 2169 /** 2170 * Splits this string around matches of the given 2171 * <a href="../util/regex/Pattern.html#sum">regular expression</a>. 2172 * 2173 * <p> The array returned by this method contains each substring of this 2174 * string that is terminated by another substring that matches the given 2175 * expression or is terminated by the end of the string. The substrings in 2176 * the array are in the order in which they occur in this string. If the 2177 * expression does not match any part of the input then the resulting array 2178 * has just one element, namely this string. 2179 * 2180 * <p> When there is a positive-width match at the beginning of this 2181 * string then an empty leading substring is included at the beginning 2182 * of the resulting array. A zero-width match at the beginning however 2183 * never produces such empty leading substring. 2184 * 2185 * <p> The {@code limit} parameter controls the number of times the 2186 * pattern is applied and therefore affects the length of the resulting 2187 * array. 2188 * <ul> 2189 * <li><p> 2190 * If the <i>limit</i> is positive then the pattern will be applied 2191 * at most <i>limit</i> - 1 times, the array's length will be 2192 * no greater than <i>limit</i>, and the array's last entry will contain 2193 * all input beyond the last matched delimiter.</p></li> 2194 * 2195 * <li><p> 2196 * If the <i>limit</i> is zero then the pattern will be applied as 2197 * many times as possible, the array can have any length, and trailing 2198 * empty strings will be discarded.</p></li> 2199 * 2200 * <li><p> 2201 * If the <i>limit</i> is negative then the pattern will be applied 2202 * as many times as possible and the array can have any length.</p></li> 2203 * </ul> 2204 * 2205 * <p> The string {@code "boo:and:foo"}, for example, yields the 2206 * following results with these parameters: 2207 * 2208 * <blockquote><table class="plain"> 2209 * <caption style="display:none">Split example showing regex, limit, and result</caption> 2210 * <thead> 2211 * <tr> 2212 * <th scope="col">Regex</th> 2213 * <th scope="col">Limit</th> 2214 * <th scope="col">Result</th> 2215 * </tr> 2216 * </thead> 2217 * <tbody> 2218 * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th> 2219 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th> 2220 * <td>{@code { "boo", "and:foo" }}</td></tr> 2221 * <tr><!-- : --> 2222 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th> 2223 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2224 * <tr><!-- : --> 2225 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th> 2226 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2227 * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th> 2228 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th> 2229 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2230 * <tr><!-- o --> 2231 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th> 2232 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2233 * <tr><!-- o --> 2234 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th> 2235 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2236 * </tbody> 2237 * </table></blockquote> 2238 * 2239 * <p> An invocation of this method of the form 2240 * <i>str.</i>{@code split(}<i>regex</i>{@code ,} <i>n</i>{@code )} 2241 * yields the same result as the expression 2242 * 2243 * <blockquote> 2244 * <code> 2245 * {@link java.util.regex.Pattern}.{@link 2246 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2247 * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>, <i>n</i>) 2248 * </code> 2249 * </blockquote> 2250 * 2251 * 2252 * @param regex 2253 * the delimiting regular expression 2254 * 2255 * @param limit 2256 * the result threshold, as described above 2257 * 2258 * @return the array of strings computed by splitting this string 2259 * around matches of the given regular expression 2260 * 2261 * @throws PatternSyntaxException 2262 * if the regular expression's syntax is invalid 2263 * 2264 * @see java.util.regex.Pattern 2265 * 2266 * @since 1.4 2267 * @spec JSR-51 2268 */ 2269 public String[] split(String regex, int limit) { 2270 /* fastpath if the regex is a 2271 (1)one-char String and this character is not one of the 2272 RegEx's meta characters ".$|()[{^?*+\\", or 2273 (2)two-char String and the first char is the backslash and 2274 the second is not the ascii digit or ascii letter. 2275 */ 2276 char ch = 0; 2277 if (((regex.length() == 1 && 2278 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || 2279 (regex.length() == 2 && 2280 regex.charAt(0) == '\\' && 2281 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && 2282 ((ch-'a')|('z'-ch)) < 0 && 2283 ((ch-'A')|('Z'-ch)) < 0)) && 2284 (ch < Character.MIN_HIGH_SURROGATE || 2285 ch > Character.MAX_LOW_SURROGATE)) 2286 { 2287 int off = 0; 2288 int next = 0; 2289 boolean limited = limit > 0; 2290 ArrayList<String> list = new ArrayList<>(); 2291 while ((next = indexOf(ch, off)) != -1) { 2292 if (!limited || list.size() < limit - 1) { 2293 list.add(substring(off, next)); 2294 off = next + 1; 2295 } else { // last one 2296 //assert (list.size() == limit - 1); 2297 int last = length(); 2298 list.add(substring(off, last)); 2299 off = last; 2300 break; 2301 } 2302 } 2303 // If no match was found, return this 2304 if (off == 0) 2305 return new String[]{this}; 2306 2307 // Add remaining segment 2308 if (!limited || list.size() < limit) 2309 list.add(substring(off, length())); 2310 2311 // Construct result 2312 int resultSize = list.size(); 2313 if (limit == 0) { 2314 while (resultSize > 0 && list.get(resultSize - 1).length() == 0) { 2315 resultSize--; 2316 } 2317 } 2318 String[] result = new String[resultSize]; 2319 return list.subList(0, resultSize).toArray(result); 2320 } 2321 return Pattern.compile(regex).split(this, limit); 2322 } 2323 2324 /** 2325 * Splits this string around matches of the given <a 2326 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2327 * 2328 * <p> This method works as if by invoking the two-argument {@link 2329 * #split(String, int) split} method with the given expression and a limit 2330 * argument of zero. Trailing empty strings are therefore not included in 2331 * the resulting array. 2332 * 2333 * <p> The string {@code "boo:and:foo"}, for example, yields the following 2334 * results with these expressions: 2335 * 2336 * <blockquote><table class="plain"> 2337 * <caption style="display:none">Split examples showing regex and result</caption> 2338 * <thead> 2339 * <tr> 2340 * <th scope="col">Regex</th> 2341 * <th scope="col">Result</th> 2342 * </tr> 2343 * </thead> 2344 * <tbody> 2345 * <tr><th scope="row" style="text-weight:normal">:</th> 2346 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2347 * <tr><th scope="row" style="text-weight:normal">o</th> 2348 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2349 * </tbody> 2350 * </table></blockquote> 2351 * 2352 * 2353 * @param regex 2354 * the delimiting regular expression 2355 * 2356 * @return the array of strings computed by splitting this string 2357 * around matches of the given regular expression 2358 * 2359 * @throws PatternSyntaxException 2360 * if the regular expression's syntax is invalid 2361 * 2362 * @see java.util.regex.Pattern 2363 * 2364 * @since 1.4 2365 * @spec JSR-51 2366 */ 2367 public String[] split(String regex) { 2368 return split(regex, 0); 2369 } 2370 2371 /** 2372 * Returns a new String composed of copies of the 2373 * {@code CharSequence elements} joined together with a copy of 2374 * the specified {@code delimiter}. 2375 * 2376 * <blockquote>For example, 2377 * <pre>{@code 2378 * String message = String.join("-", "Java", "is", "cool"); 2379 * // message returned is: "Java-is-cool" 2380 * }</pre></blockquote> 2381 * 2382 * Note that if an element is null, then {@code "null"} is added. 2383 * 2384 * @param delimiter the delimiter that separates each element 2385 * @param elements the elements to join together. 2386 * 2387 * @return a new {@code String} that is composed of the {@code elements} 2388 * separated by the {@code delimiter} 2389 * 2390 * @throws NullPointerException If {@code delimiter} or {@code elements} 2391 * is {@code null} 2392 * 2393 * @see java.util.StringJoiner 2394 * @since 1.8 2395 */ 2396 public static String join(CharSequence delimiter, CharSequence... elements) { 2397 Objects.requireNonNull(delimiter); 2398 Objects.requireNonNull(elements); 2399 // Number of elements not likely worth Arrays.stream overhead. 2400 StringJoiner joiner = new StringJoiner(delimiter); 2401 for (CharSequence cs: elements) { 2402 joiner.add(cs); 2403 } 2404 return joiner.toString(); 2405 } 2406 2407 /** 2408 * Returns a new {@code String} composed of copies of the 2409 * {@code CharSequence elements} joined together with a copy of the 2410 * specified {@code delimiter}. 2411 * 2412 * <blockquote>For example, 2413 * <pre>{@code 2414 * List<String> strings = List.of("Java", "is", "cool"); 2415 * String message = String.join(" ", strings); 2416 * //message returned is: "Java is cool" 2417 * 2418 * Set<String> strings = 2419 * new LinkedHashSet<>(List.of("Java", "is", "very", "cool")); 2420 * String message = String.join("-", strings); 2421 * //message returned is: "Java-is-very-cool" 2422 * }</pre></blockquote> 2423 * 2424 * Note that if an individual element is {@code null}, then {@code "null"} is added. 2425 * 2426 * @param delimiter a sequence of characters that is used to separate each 2427 * of the {@code elements} in the resulting {@code String} 2428 * @param elements an {@code Iterable} that will have its {@code elements} 2429 * joined together. 2430 * 2431 * @return a new {@code String} that is composed from the {@code elements} 2432 * argument 2433 * 2434 * @throws NullPointerException If {@code delimiter} or {@code elements} 2435 * is {@code null} 2436 * 2437 * @see #join(CharSequence,CharSequence...) 2438 * @see java.util.StringJoiner 2439 * @since 1.8 2440 */ 2441 public static String join(CharSequence delimiter, 2442 Iterable<? extends CharSequence> elements) { 2443 Objects.requireNonNull(delimiter); 2444 Objects.requireNonNull(elements); 2445 StringJoiner joiner = new StringJoiner(delimiter); 2446 for (CharSequence cs: elements) { 2447 joiner.add(cs); 2448 } 2449 return joiner.toString(); 2450 } 2451 2452 /** 2453 * Converts all of the characters in this {@code String} to lower 2454 * case using the rules of the given {@code Locale}. Case mapping is based 2455 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2456 * class. Since case mappings are not always 1:1 char mappings, the resulting 2457 * {@code String} may be a different length than the original {@code String}. 2458 * <p> 2459 * Examples of lowercase mappings are in the following table: 2460 * <table class="plain"> 2461 * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption> 2462 * <thead> 2463 * <tr> 2464 * <th scope="col">Language Code of Locale</th> 2465 * <th scope="col">Upper Case</th> 2466 * <th scope="col">Lower Case</th> 2467 * <th scope="col">Description</th> 2468 * </tr> 2469 * </thead> 2470 * <tbody> 2471 * <tr> 2472 * <td>tr (Turkish)</td> 2473 * <th scope="row" style="font-weight:normal; text-align:left">\u0130</th> 2474 * <td>\u0069</td> 2475 * <td>capital letter I with dot above -> small letter i</td> 2476 * </tr> 2477 * <tr> 2478 * <td>tr (Turkish)</td> 2479 * <th scope="row" style="font-weight:normal; text-align:left">\u0049</th> 2480 * <td>\u0131</td> 2481 * <td>capital letter I -> small letter dotless i </td> 2482 * </tr> 2483 * <tr> 2484 * <td>(all)</td> 2485 * <th scope="row" style="font-weight:normal; text-align:left">French Fries</th> 2486 * <td>french fries</td> 2487 * <td>lowercased all chars in String</td> 2488 * </tr> 2489 * <tr> 2490 * <td>(all)</td> 2491 * <th scope="row" style="font-weight:normal; text-align:left"> 2492 * ΙΧΘΥΣ</th> 2493 * <td>ιχθυσ</td> 2494 * <td>lowercased all chars in String</td> 2495 * </tr> 2496 * </tbody> 2497 * </table> 2498 * 2499 * @param locale use the case transformation rules for this locale 2500 * @return the {@code String}, converted to lowercase. 2501 * @see java.lang.String#toLowerCase() 2502 * @see java.lang.String#toUpperCase() 2503 * @see java.lang.String#toUpperCase(Locale) 2504 * @since 1.1 2505 */ 2506 public String toLowerCase(Locale locale) { 2507 return isLatin1() ? StringLatin1.toLowerCase(this, value, locale) 2508 : StringUTF16.toLowerCase(this, value, locale); 2509 } 2510 2511 /** 2512 * Converts all of the characters in this {@code String} to lower 2513 * case using the rules of the default locale. This is equivalent to calling 2514 * {@code toLowerCase(Locale.getDefault())}. 2515 * <p> 2516 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2517 * results if used for strings that are intended to be interpreted locale 2518 * independently. 2519 * Examples are programming language identifiers, protocol keys, and HTML 2520 * tags. 2521 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale 2522 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the 2523 * LATIN SMALL LETTER DOTLESS I character. 2524 * To obtain correct results for locale insensitive strings, use 2525 * {@code toLowerCase(Locale.ROOT)}. 2526 * 2527 * @return the {@code String}, converted to lowercase. 2528 * @see java.lang.String#toLowerCase(Locale) 2529 */ 2530 public String toLowerCase() { 2531 return toLowerCase(Locale.getDefault()); 2532 } 2533 2534 /** 2535 * Converts all of the characters in this {@code String} to upper 2536 * case using the rules of the given {@code Locale}. Case mapping is based 2537 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2538 * class. Since case mappings are not always 1:1 char mappings, the resulting 2539 * {@code String} may be a different length than the original {@code String}. 2540 * <p> 2541 * Examples of locale-sensitive and 1:M case mappings are in the following table. 2542 * 2543 * <table class="plain"> 2544 * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption> 2545 * <thead> 2546 * <tr> 2547 * <th scope="col">Language Code of Locale</th> 2548 * <th scope="col">Lower Case</th> 2549 * <th scope="col">Upper Case</th> 2550 * <th scope="col">Description</th> 2551 * </tr> 2552 * </thead> 2553 * <tbody> 2554 * <tr> 2555 * <td>tr (Turkish)</td> 2556 * <th scope="row" style="font-weight:normal; text-align:left">\u0069</th> 2557 * <td>\u0130</td> 2558 * <td>small letter i -> capital letter I with dot above</td> 2559 * </tr> 2560 * <tr> 2561 * <td>tr (Turkish)</td> 2562 * <th scope="row" style="font-weight:normal; text-align:left">\u0131</th> 2563 * <td>\u0049</td> 2564 * <td>small letter dotless i -> capital letter I</td> 2565 * </tr> 2566 * <tr> 2567 * <td>(all)</td> 2568 * <th scope="row" style="font-weight:normal; text-align:left">\u00df</th> 2569 * <td>\u0053 \u0053</td> 2570 * <td>small letter sharp s -> two letters: SS</td> 2571 * </tr> 2572 * <tr> 2573 * <td>(all)</td> 2574 * <th scope="row" style="font-weight:normal; text-align:left">Fahrvergnügen</th> 2575 * <td>FAHRVERGNÜGEN</td> 2576 * <td></td> 2577 * </tr> 2578 * </tbody> 2579 * </table> 2580 * @param locale use the case transformation rules for this locale 2581 * @return the {@code String}, converted to uppercase. 2582 * @see java.lang.String#toUpperCase() 2583 * @see java.lang.String#toLowerCase() 2584 * @see java.lang.String#toLowerCase(Locale) 2585 * @since 1.1 2586 */ 2587 public String toUpperCase(Locale locale) { 2588 return isLatin1() ? StringLatin1.toUpperCase(this, value, locale) 2589 : StringUTF16.toUpperCase(this, value, locale); 2590 } 2591 2592 /** 2593 * Converts all of the characters in this {@code String} to upper 2594 * case using the rules of the default locale. This method is equivalent to 2595 * {@code toUpperCase(Locale.getDefault())}. 2596 * <p> 2597 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2598 * results if used for strings that are intended to be interpreted locale 2599 * independently. 2600 * Examples are programming language identifiers, protocol keys, and HTML 2601 * tags. 2602 * For instance, {@code "title".toUpperCase()} in a Turkish locale 2603 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the 2604 * LATIN CAPITAL LETTER I WITH DOT ABOVE character. 2605 * To obtain correct results for locale insensitive strings, use 2606 * {@code toUpperCase(Locale.ROOT)}. 2607 * 2608 * @return the {@code String}, converted to uppercase. 2609 * @see java.lang.String#toUpperCase(Locale) 2610 */ 2611 public String toUpperCase() { 2612 return toUpperCase(Locale.getDefault()); 2613 } 2614 2615 /** 2616 * Returns a string whose value is this string, with all leading 2617 * and trailing space removed, where space is defined 2618 * as any character whose codepoint is less than or equal to 2619 * {@code 'U+0020'} (the space character). 2620 * <p> 2621 * If this {@code String} object represents an empty character 2622 * sequence, or the first and last characters of character sequence 2623 * represented by this {@code String} object both have codes 2624 * that are not space (as defined above), then a 2625 * reference to this {@code String} object is returned. 2626 * <p> 2627 * Otherwise, if all characters in this string are space (as 2628 * defined above), then a {@code String} object representing an 2629 * empty string is returned. 2630 * <p> 2631 * Otherwise, let <i>k</i> be the index of the first character in the 2632 * string whose code is not a space (as defined above) and let 2633 * <i>m</i> be the index of the last character in the string whose code 2634 * is not a space (as defined above). A {@code String} 2635 * object is returned, representing the substring of this string that 2636 * begins with the character at index <i>k</i> and ends with the 2637 * character at index <i>m</i>-that is, the result of 2638 * {@code this.substring(k, m + 1)}. 2639 * <p> 2640 * This method may be used to trim space (as defined above) from 2641 * the beginning and end of a string. 2642 * 2643 * @return a string whose value is this string, with all leading 2644 * and trailing space removed, or this string if it 2645 * has no leading or trailing space. 2646 */ 2647 public String trim() { 2648 String ret = isLatin1() ? StringLatin1.trim(value) 2649 : StringUTF16.trim(value); 2650 return ret == null ? this : ret; 2651 } 2652 2653 /** 2654 * Returns a string whose value is this string, with all leading 2655 * and trailing {@link Character#isWhitespace(int) white space} 2656 * removed. 2657 * <p> 2658 * If this {@code String} object represents an empty string, 2659 * or if all code points in this string are 2660 * {@link Character#isWhitespace(int) white space}, then an empty string 2661 * is returned. 2662 * <p> 2663 * Otherwise, returns a substring of this string beginning with the first 2664 * code point that is not a {@link Character#isWhitespace(int) white space} 2665 * up to and including the last code point that is not a 2666 * {@link Character#isWhitespace(int) white space}. 2667 * <p> 2668 * This method may be used to strip 2669 * {@link Character#isWhitespace(int) white space} from 2670 * the beginning and end of a string. 2671 * 2672 * @return a string whose value is this string, with all leading 2673 * and trailing white space removed 2674 * 2675 * @see Character#isWhitespace(int) 2676 * 2677 * @since 11 2678 */ 2679 public String strip() { 2680 String ret = isLatin1() ? StringLatin1.strip(value) 2681 : StringUTF16.strip(value); 2682 return ret == null ? this : ret; 2683 } 2684 2685 /** 2686 * Returns a string whose value is this string, with all leading 2687 * {@link Character#isWhitespace(int) white space} removed. 2688 * <p> 2689 * If this {@code String} object represents an empty string, 2690 * or if all code points in this string are 2691 * {@link Character#isWhitespace(int) white space}, then an empty string 2692 * is returned. 2693 * <p> 2694 * Otherwise, returns a substring of this string beginning with the first 2695 * code point that is not a {@link Character#isWhitespace(int) white space} 2696 * up to to and including the last code point of this string. 2697 * <p> 2698 * This method may be used to trim 2699 * {@link Character#isWhitespace(int) white space} from 2700 * the beginning of a string. 2701 * 2702 * @return a string whose value is this string, with all leading white 2703 * space removed 2704 * 2705 * @see Character#isWhitespace(int) 2706 * 2707 * @since 11 2708 */ 2709 public String stripLeading() { 2710 String ret = isLatin1() ? StringLatin1.stripLeading(value) 2711 : StringUTF16.stripLeading(value); 2712 return ret == null ? this : ret; 2713 } 2714 2715 /** 2716 * Returns a string whose value is this string, with all trailing 2717 * {@link Character#isWhitespace(int) white space} removed. 2718 * <p> 2719 * If this {@code String} object represents an empty string, 2720 * or if all characters in this string are 2721 * {@link Character#isWhitespace(int) white space}, then an empty string 2722 * is returned. 2723 * <p> 2724 * Otherwise, returns a substring of this string beginning with the first 2725 * code point of this string up to and including the last code point 2726 * that is not a {@link Character#isWhitespace(int) white space}. 2727 * <p> 2728 * This method may be used to trim 2729 * {@link Character#isWhitespace(int) white space} from 2730 * the end of a string. 2731 * 2732 * @return a string whose value is this string, with all trailing white 2733 * space removed 2734 * 2735 * @see Character#isWhitespace(int) 2736 * 2737 * @since 11 2738 */ 2739 public String stripTrailing() { 2740 String ret = isLatin1() ? StringLatin1.stripTrailing(value) 2741 : StringUTF16.stripTrailing(value); 2742 return ret == null ? this : ret; 2743 } 2744 2745 /** 2746 * Returns {@code true} if the string is empty or contains only 2747 * {@link Character#isWhitespace(int) white space} codepoints, 2748 * otherwise {@code false}. 2749 * 2750 * @return {@code true} if the string is empty or contains only 2751 * {@link Character#isWhitespace(int) white space} codepoints, 2752 * otherwise {@code false} 2753 * 2754 * @see Character#isWhitespace(int) 2755 * 2756 * @since 11 2757 */ 2758 public boolean isBlank() { 2759 return indexOfNonWhitespace() == length(); 2760 } 2761 2762 private Stream<String> lines(int maxLeading, int maxTrailing) { 2763 return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing) 2764 : StringUTF16.lines(value, maxLeading, maxTrailing); 2765 } 2766 2767 /** 2768 * Returns a stream of lines extracted from this string, 2769 * separated by line terminators. 2770 * <p> 2771 * A <i>line terminator</i> is one of the following: 2772 * a line feed character {@code "\n"} (U+000A), 2773 * a carriage return character {@code "\r"} (U+000D), 2774 * or a carriage return followed immediately by a line feed 2775 * {@code "\r\n"} (U+000D U+000A). 2776 * <p> 2777 * A <i>line</i> is either a sequence of zero or more characters 2778 * followed by a line terminator, or it is a sequence of one or 2779 * more characters followed by the end of the string. A 2780 * line does not include the line terminator. 2781 * <p> 2782 * The stream returned by this method contains the lines from 2783 * this string in the order in which they occur. 2784 * 2785 * @apiNote This definition of <i>line</i> implies that an empty 2786 * string has zero lines and that there is no empty line 2787 * following a line terminator at the end of a string. 2788 * 2789 * @implNote This method provides better performance than 2790 * split("\R") by supplying elements lazily and 2791 * by faster search of new line terminators. 2792 * 2793 * @return the stream of lines extracted from this string 2794 * 2795 * @since 11 2796 */ 2797 public Stream<String> lines() { 2798 return lines(0, 0); 2799 } 2800 2801 /** 2802 * Adjusts the indentation of each line of this string based on the value of 2803 * {@code n}, and normalizes line termination characters. 2804 * <p> 2805 * This string is conceptually separated into lines using 2806 * {@link String#lines()}. Each line is then adjusted as described below 2807 * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting 2808 * lines are then concatenated and returned. 2809 * <p> 2810 * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the 2811 * beginning of each line. {@link String#isBlank() Blank lines} are 2812 * unaffected. 2813 * <p> 2814 * If {@code n < 0} then up to {@code n} 2815 * {@link Character#isWhitespace(int) white space characters} are removed 2816 * from the beginning of each line. If a given line does not contain 2817 * sufficient white space then all leading 2818 * {@link Character#isWhitespace(int) white space characters} are removed. 2819 * Each white space character is treated as a single character. In 2820 * particular, the tab character {@code "\t"} (U+0009) is considered a 2821 * single character; it is not expanded. 2822 * <p> 2823 * If {@code n == 0} then the line remains unchanged. However, line 2824 * terminators are still normalized. 2825 * <p> 2826 * 2827 * @param n number of leading 2828 * {@link Character#isWhitespace(int) white space characters} 2829 * to add or remove 2830 * 2831 * @return string with indentation adjusted and line endings normalized 2832 * 2833 * @see String#lines() 2834 * @see String#isBlank() 2835 * @see Character#isWhitespace(int) 2836 * 2837 * @since 12 2838 */ 2839 public String indent(int n) { 2840 return isEmpty() ? "" : indent(n, false); 2841 } 2842 2843 private String indent(int n, boolean removeBlanks) { 2844 Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE) 2845 : lines(); 2846 if (n > 0) { 2847 final String spaces = " ".repeat(n); 2848 stream = stream.map(s -> s.isBlank() ? s : spaces + s); 2849 } else if (n == Integer.MIN_VALUE) { 2850 stream = stream.map(s -> s.stripLeading()); 2851 } else if (n < 0) { 2852 stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace()))); 2853 } 2854 return stream.collect(Collectors.joining("\n", "", "\n")); 2855 } 2856 2857 private int indexOfNonWhitespace() { 2858 return isLatin1() ? StringLatin1.indexOfNonWhitespace(value) 2859 : StringUTF16.indexOfNonWhitespace(value); 2860 } 2861 2862 private int lastIndexOfNonWhitespace() { 2863 return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value) 2864 : StringUTF16.lastIndexOfNonWhitespace(value); 2865 } 2866 2867 /** 2868 * Removes vertical and horizontal white space margins from around the 2869 * essential body of a multi-line string, while preserving relative 2870 * indentation. 2871 * <p> 2872 * This string is first conceptually separated into lines as if by 2873 * {@link String#lines()}. 2874 * <p> 2875 * Then, the <i>minimum indentation</i> (min) is determined as follows. For 2876 * each non-blank line (as defined by {@link String#isBlank()}), the 2877 * leading {@link Character#isWhitespace(int) white space} characters are 2878 * counted. The <i>min</i> value is the smallest of these counts. 2879 * <p> 2880 * For each non-blank line, <i>min</i> leading white space characters are 2881 * removed. Each white space character is treated as a single character. In 2882 * particular, the tab character {@code "\t"} (U+0009) is considered a 2883 * single character; it is not expanded. 2884 * <p> 2885 * Leading and trailing blank lines, if any, are removed. Trailing spaces are 2886 * preserved. 2887 * <p> 2888 * Each line is suffixed with a line feed character {@code "\n"} (U+000A). 2889 * <p> 2890 * Finally, the lines are concatenated into a single string and returned. 2891 * 2892 * @apiNote 2893 * This method's primary purpose is to shift a block of lines as far as 2894 * possible to the left, while preserving relative indentation. Lines 2895 * that were indented the least will thus have no leading white space. 2896 * 2897 * Example: 2898 * <blockquote><pre> 2899 * ` 2900 * This is the first line 2901 * This is the second line 2902 * `.align(); 2903 * 2904 * returns 2905 * This is the first line 2906 * This is the second line 2907 * </pre></blockquote> 2908 * 2909 * @return string with margins removed and line terminators normalized 2910 * 2911 * @see String#lines() 2912 * @see String#isBlank() 2913 * @see String#indent(int) 2914 * @see Character#isWhitespace(int) 2915 * 2916 * @since 12 2917 */ 2918 public String align() { 2919 return align(0); 2920 } 2921 2922 /** 2923 * Removes vertical and horizontal white space margins from around the 2924 * essential body of a multi-line string, while preserving relative 2925 * indentation and with optional indentation adjustment. 2926 * <p> 2927 * Invoking this method is equivalent to: 2928 * <blockquote> 2929 * {@code this.align().indent(n)} 2930 * </blockquote> 2931 * 2932 * @apiNote 2933 * Examples: 2934 * <blockquote><pre> 2935 * ` 2936 * This is the first line 2937 * This is the second line 2938 * `.align(0); 2939 * 2940 * returns 2941 * This is the first line 2942 * This is the second line 2943 * 2944 * 2945 * ` 2946 * This is the first line 2947 * This is the second line 2948 * `.align(4); 2949 * returns 2950 * This is the first line 2951 * This is the second line 2952 * </pre></blockquote> 2953 * 2954 * @param n number of leading white space characters 2955 * to add or remove 2956 * 2957 * @return string with margins removed, indentation adjusted and 2958 * line terminators normalized 2959 * 2960 * @see String#align() 2961 * 2962 * @since 12 2963 */ 2964 public String align(int n) { 2965 if (isEmpty()) { 2966 return ""; 2967 } 2968 int outdent = lines().filter(not(String::isBlank)) 2969 .mapToInt(String::indexOfNonWhitespace) 2970 .min() 2971 .orElse(0); 2972 return indent(n - outdent, true); 2973 } 2974 2975 /** 2976 * This method allows the application of a function to {@code this} 2977 * string. The function should expect a single String argument 2978 * and produce an {@code R} result. 2979 * 2980 * @param f functional interface to a apply 2981 * 2982 * @param <R> class of the result 2983 * 2984 * @return the result of applying the function to this string 2985 * 2986 * @see java.util.function.Function 2987 * 2988 * @since 12 2989 */ 2990 public <R> R transform(Function<? super String, ? extends R> f) { 2991 return f.apply(this); 2992 } 2993 2994 /** 2995 * This object (which is already a string!) is itself returned. 2996 * 2997 * @return the string itself. 2998 */ 2999 public String toString() { 3000 return this; 3001 } 3002 3003 /** 3004 * Returns a stream of {@code int} zero-extending the {@code char} values 3005 * from this sequence. Any char which maps to a <a 3006 * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code 3007 * point</a> is passed through uninterpreted. 3008 * 3009 * @return an IntStream of char values from this sequence 3010 * @since 9 3011 */ 3012 @Override 3013 public IntStream chars() { 3014 return StreamSupport.intStream( 3015 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE) 3016 : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE), 3017 false); 3018 } 3019 3020 3021 /** 3022 * Returns a stream of code point values from this sequence. Any surrogate 3023 * pairs encountered in the sequence are combined as if by {@linkplain 3024 * Character#toCodePoint Character.toCodePoint} and the result is passed 3025 * to the stream. Any other code units, including ordinary BMP characters, 3026 * unpaired surrogates, and undefined code units, are zero-extended to 3027 * {@code int} values which are then passed to the stream. 3028 * 3029 * @return an IntStream of Unicode code points from this sequence 3030 * @since 9 3031 */ 3032 @Override 3033 public IntStream codePoints() { 3034 return StreamSupport.intStream( 3035 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE) 3036 : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE), 3037 false); 3038 } 3039 3040 /** 3041 * Converts this string to a new character array. 3042 * 3043 * @return a newly allocated character array whose length is the length 3044 * of this string and whose contents are initialized to contain 3045 * the character sequence represented by this string. 3046 */ 3047 public char[] toCharArray() { 3048 return isLatin1() ? StringLatin1.toChars(value) 3049 : StringUTF16.toChars(value); 3050 } 3051 3052 /** 3053 * Returns a formatted string using the specified format string and 3054 * arguments. 3055 * 3056 * <p> The locale always used is the one returned by {@link 3057 * java.util.Locale#getDefault(java.util.Locale.Category) 3058 * Locale.getDefault(Locale.Category)} with 3059 * {@link java.util.Locale.Category#FORMAT FORMAT} category specified. 3060 * 3061 * @param format 3062 * A <a href="../util/Formatter.html#syntax">format string</a> 3063 * 3064 * @param args 3065 * Arguments referenced by the format specifiers in the format 3066 * string. If there are more arguments than format specifiers, the 3067 * extra arguments are ignored. The number of arguments is 3068 * variable and may be zero. The maximum number of arguments is 3069 * limited by the maximum dimension of a Java array as defined by 3070 * <cite>The Java™ Virtual Machine Specification</cite>. 3071 * The behaviour on a 3072 * {@code null} argument depends on the <a 3073 * href="../util/Formatter.html#syntax">conversion</a>. 3074 * 3075 * @throws java.util.IllegalFormatException 3076 * If a format string contains an illegal syntax, a format 3077 * specifier that is incompatible with the given arguments, 3078 * insufficient arguments given the format string, or other 3079 * illegal conditions. For specification of all possible 3080 * formatting errors, see the <a 3081 * href="../util/Formatter.html#detail">Details</a> section of the 3082 * formatter class specification. 3083 * 3084 * @return A formatted string 3085 * 3086 * @see java.util.Formatter 3087 * @since 1.5 3088 */ 3089 public static String format(String format, Object... args) { 3090 return new Formatter().format(format, args).toString(); 3091 } 3092 3093 /** 3094 * Returns a formatted string using the specified locale, format string, 3095 * and arguments. 3096 * 3097 * @param l 3098 * The {@linkplain java.util.Locale locale} to apply during 3099 * formatting. If {@code l} is {@code null} then no localization 3100 * is applied. 3101 * 3102 * @param format 3103 * A <a href="../util/Formatter.html#syntax">format string</a> 3104 * 3105 * @param args 3106 * Arguments referenced by the format specifiers in the format 3107 * string. If there are more arguments than format specifiers, the 3108 * extra arguments are ignored. The number of arguments is 3109 * variable and may be zero. The maximum number of arguments is 3110 * limited by the maximum dimension of a Java array as defined by 3111 * <cite>The Java™ Virtual Machine Specification</cite>. 3112 * The behaviour on a 3113 * {@code null} argument depends on the 3114 * <a href="../util/Formatter.html#syntax">conversion</a>. 3115 * 3116 * @throws java.util.IllegalFormatException 3117 * If a format string contains an illegal syntax, a format 3118 * specifier that is incompatible with the given arguments, 3119 * insufficient arguments given the format string, or other 3120 * illegal conditions. For specification of all possible 3121 * formatting errors, see the <a 3122 * href="../util/Formatter.html#detail">Details</a> section of the 3123 * formatter class specification 3124 * 3125 * @return A formatted string 3126 * 3127 * @see java.util.Formatter 3128 * @since 1.5 3129 */ 3130 public static String format(Locale l, String format, Object... args) { 3131 return new Formatter(l).format(format, args).toString(); 3132 } 3133 3134 /** 3135 * Returns the string representation of the {@code Object} argument. 3136 * 3137 * @param obj an {@code Object}. 3138 * @return if the argument is {@code null}, then a string equal to 3139 * {@code "null"}; otherwise, the value of 3140 * {@code obj.toString()} is returned. 3141 * @see java.lang.Object#toString() 3142 */ 3143 public static String valueOf(Object obj) { 3144 return (obj == null) ? "null" : obj.toString(); 3145 } 3146 3147 /** 3148 * Returns the string representation of the {@code char} array 3149 * argument. The contents of the character array are copied; subsequent 3150 * modification of the character array does not affect the returned 3151 * string. 3152 * 3153 * @param data the character array. 3154 * @return a {@code String} that contains the characters of the 3155 * character array. 3156 */ 3157 public static String valueOf(char data[]) { 3158 return new String(data); 3159 } 3160 3161 /** 3162 * Returns the string representation of a specific subarray of the 3163 * {@code char} array argument. 3164 * <p> 3165 * The {@code offset} argument is the index of the first 3166 * character of the subarray. The {@code count} argument 3167 * specifies the length of the subarray. The contents of the subarray 3168 * are copied; subsequent modification of the character array does not 3169 * affect the returned string. 3170 * 3171 * @param data the character array. 3172 * @param offset initial offset of the subarray. 3173 * @param count length of the subarray. 3174 * @return a {@code String} that contains the characters of the 3175 * specified subarray of the character array. 3176 * @exception IndexOutOfBoundsException if {@code offset} is 3177 * negative, or {@code count} is negative, or 3178 * {@code offset+count} is larger than 3179 * {@code data.length}. 3180 */ 3181 public static String valueOf(char data[], int offset, int count) { 3182 return new String(data, offset, count); 3183 } 3184 3185 /** 3186 * Equivalent to {@link #valueOf(char[], int, int)}. 3187 * 3188 * @param data the character array. 3189 * @param offset initial offset of the subarray. 3190 * @param count length of the subarray. 3191 * @return a {@code String} that contains the characters of the 3192 * specified subarray of the character array. 3193 * @exception IndexOutOfBoundsException if {@code offset} is 3194 * negative, or {@code count} is negative, or 3195 * {@code offset+count} is larger than 3196 * {@code data.length}. 3197 */ 3198 public static String copyValueOf(char data[], int offset, int count) { 3199 return new String(data, offset, count); 3200 } 3201 3202 /** 3203 * Equivalent to {@link #valueOf(char[])}. 3204 * 3205 * @param data the character array. 3206 * @return a {@code String} that contains the characters of the 3207 * character array. 3208 */ 3209 public static String copyValueOf(char data[]) { 3210 return new String(data); 3211 } 3212 3213 /** 3214 * Returns the string representation of the {@code boolean} argument. 3215 * 3216 * @param b a {@code boolean}. 3217 * @return if the argument is {@code true}, a string equal to 3218 * {@code "true"} is returned; otherwise, a string equal to 3219 * {@code "false"} is returned. 3220 */ 3221 public static String valueOf(boolean b) { 3222 return b ? "true" : "false"; 3223 } 3224 3225 /** 3226 * Returns the string representation of the {@code char} 3227 * argument. 3228 * 3229 * @param c a {@code char}. 3230 * @return a string of length {@code 1} containing 3231 * as its single character the argument {@code c}. 3232 */ 3233 public static String valueOf(char c) { 3234 if (COMPACT_STRINGS && StringLatin1.canEncode(c)) { 3235 return new String(StringLatin1.toBytes(c), LATIN1); 3236 } 3237 return new String(StringUTF16.toBytes(c), UTF16); 3238 } 3239 3240 /** 3241 * Returns the string representation of the {@code int} argument. 3242 * <p> 3243 * The representation is exactly the one returned by the 3244 * {@code Integer.toString} method of one argument. 3245 * 3246 * @param i an {@code int}. 3247 * @return a string representation of the {@code int} argument. 3248 * @see java.lang.Integer#toString(int, int) 3249 */ 3250 public static String valueOf(int i) { 3251 return Integer.toString(i); 3252 } 3253 3254 /** 3255 * Returns the string representation of the {@code long} argument. 3256 * <p> 3257 * The representation is exactly the one returned by the 3258 * {@code Long.toString} method of one argument. 3259 * 3260 * @param l a {@code long}. 3261 * @return a string representation of the {@code long} argument. 3262 * @see java.lang.Long#toString(long) 3263 */ 3264 public static String valueOf(long l) { 3265 return Long.toString(l); 3266 } 3267 3268 /** 3269 * Returns the string representation of the {@code float} argument. 3270 * <p> 3271 * The representation is exactly the one returned by the 3272 * {@code Float.toString} method of one argument. 3273 * 3274 * @param f a {@code float}. 3275 * @return a string representation of the {@code float} argument. 3276 * @see java.lang.Float#toString(float) 3277 */ 3278 public static String valueOf(float f) { 3279 return Float.toString(f); 3280 } 3281 3282 /** 3283 * Returns the string representation of the {@code double} argument. 3284 * <p> 3285 * The representation is exactly the one returned by the 3286 * {@code Double.toString} method of one argument. 3287 * 3288 * @param d a {@code double}. 3289 * @return a string representation of the {@code double} argument. 3290 * @see java.lang.Double#toString(double) 3291 */ 3292 public static String valueOf(double d) { 3293 return Double.toString(d); 3294 } 3295 3296 /** 3297 * Returns a canonical representation for the string object. 3298 * <p> 3299 * A pool of strings, initially empty, is maintained privately by the 3300 * class {@code String}. 3301 * <p> 3302 * When the intern method is invoked, if the pool already contains a 3303 * string equal to this {@code String} object as determined by 3304 * the {@link #equals(Object)} method, then the string from the pool is 3305 * returned. Otherwise, this {@code String} object is added to the 3306 * pool and a reference to this {@code String} object is returned. 3307 * <p> 3308 * It follows that for any two strings {@code s} and {@code t}, 3309 * {@code s.intern() == t.intern()} is {@code true} 3310 * if and only if {@code s.equals(t)} is {@code true}. 3311 * <p> 3312 * All literal strings and string-valued constant expressions are 3313 * interned. String literals are defined in section 3.10.5 of the 3314 * <cite>The Java™ Language Specification</cite>. 3315 * 3316 * @return a string that has the same contents as this string, but is 3317 * guaranteed to be from a pool of unique strings. 3318 * @jls 3.10.5 String Literals 3319 */ 3320 public native String intern(); 3321 3322 /** 3323 * Returns a string whose value is the concatenation of this 3324 * string repeated {@code count} times. 3325 * <p> 3326 * If this string is empty or count is zero then the empty 3327 * string is returned. 3328 * 3329 * @param count number of times to repeat 3330 * 3331 * @return A string composed of this string repeated 3332 * {@code count} times or the empty string if this 3333 * string is empty or count is zero 3334 * 3335 * @throws IllegalArgumentException if the {@code count} is 3336 * negative. 3337 * 3338 * @since 11 3339 */ 3340 public String repeat(int count) { 3341 if (count < 0) { 3342 throw new IllegalArgumentException("count is negative: " + count); 3343 } 3344 if (count == 1) { 3345 return this; 3346 } 3347 final int len = value.length; 3348 if (len == 0 || count == 0) { 3349 return ""; 3350 } 3351 if (len == 1) { 3352 final byte[] single = new byte[count]; 3353 Arrays.fill(single, value[0]); 3354 return new String(single, coder); 3355 } 3356 if (Integer.MAX_VALUE / count < len) { 3357 throw new OutOfMemoryError("Repeating " + len + " bytes String " + count + 3358 " times will produce a String exceeding maximum size."); 3359 } 3360 final int limit = len * count; 3361 final byte[] multiple = new byte[limit]; 3362 System.arraycopy(value, 0, multiple, 0, len); 3363 int copied = len; 3364 for (; copied < limit - copied; copied <<= 1) { 3365 System.arraycopy(multiple, 0, multiple, copied, copied); 3366 } 3367 System.arraycopy(multiple, 0, multiple, copied, limit - copied); 3368 return new String(multiple, coder); 3369 } 3370 3371 //////////////////////////////////////////////////////////////// 3372 3373 /** 3374 * Copy character bytes from this string into dst starting at dstBegin. 3375 * This method doesn't perform any range checking. 3376 * 3377 * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two 3378 * coders are different, and dst is big enough (range check) 3379 * 3380 * @param dstBegin the char index, not offset of byte[] 3381 * @param coder the coder of dst[] 3382 */ 3383 void getBytes(byte dst[], int dstBegin, byte coder) { 3384 if (coder() == coder) { 3385 System.arraycopy(value, 0, dst, dstBegin << coder, value.length); 3386 } else { // this.coder == LATIN && coder == UTF16 3387 StringLatin1.inflate(value, 0, dst, dstBegin, value.length); 3388 } 3389 } 3390 3391 /* 3392 * Package private constructor. Trailing Void argument is there for 3393 * disambiguating it against other (public) constructors. 3394 * 3395 * Stores the char[] value into a byte[] that each byte represents 3396 * the8 low-order bits of the corresponding character, if the char[] 3397 * contains only latin1 character. Or a byte[] that stores all 3398 * characters in their byte sequences defined by the {@code StringUTF16}. 3399 */ 3400 String(char[] value, int off, int len, Void sig) { 3401 if (len == 0) { 3402 this.value = "".value; 3403 this.coder = "".coder; 3404 return; 3405 } 3406 if (COMPACT_STRINGS) { 3407 byte[] val = StringUTF16.compress(value, off, len); 3408 if (val != null) { 3409 this.value = val; 3410 this.coder = LATIN1; 3411 return; 3412 } 3413 } 3414 this.coder = UTF16; 3415 this.value = StringUTF16.toBytes(value, off, len); 3416 } 3417 3418 /* 3419 * Package private constructor. Trailing Void argument is there for 3420 * disambiguating it against other (public) constructors. 3421 */ 3422 String(AbstractStringBuilder asb, Void sig) { 3423 byte[] val = asb.getValue(); 3424 int length = asb.length(); 3425 if (asb.isLatin1()) { 3426 this.coder = LATIN1; 3427 this.value = Arrays.copyOfRange(val, 0, length); 3428 } else { 3429 if (COMPACT_STRINGS) { 3430 byte[] buf = StringUTF16.compress(val, 0, length); 3431 if (buf != null) { 3432 this.coder = LATIN1; 3433 this.value = buf; 3434 return; 3435 } 3436 } 3437 this.coder = UTF16; 3438 this.value = Arrays.copyOfRange(val, 0, length << 1); 3439 } 3440 } 3441 3442 /* 3443 * Package private constructor which shares value array for speed. 3444 */ 3445 String(byte[] value, byte coder) { 3446 this.value = value; 3447 this.coder = coder; 3448 } 3449 3450 byte coder() { 3451 return COMPACT_STRINGS ? coder : UTF16; 3452 } 3453 3454 byte[] value() { 3455 return value; 3456 } 3457 3458 private boolean isLatin1() { 3459 return COMPACT_STRINGS && coder == LATIN1; 3460 } 3461 3462 @Native static final byte LATIN1 = 0; 3463 @Native static final byte UTF16 = 1; 3464 3465 /* 3466 * StringIndexOutOfBoundsException if {@code index} is 3467 * negative or greater than or equal to {@code length}. 3468 */ 3469 static void checkIndex(int index, int length) { 3470 if (index < 0 || index >= length) { 3471 throw new StringIndexOutOfBoundsException("index " + index + 3472 ",length " + length); 3473 } 3474 } 3475 3476 /* 3477 * StringIndexOutOfBoundsException if {@code offset} 3478 * is negative or greater than {@code length}. 3479 */ 3480 static void checkOffset(int offset, int length) { 3481 if (offset < 0 || offset > length) { 3482 throw new StringIndexOutOfBoundsException("offset " + offset + 3483 ",length " + length); 3484 } 3485 } 3486 3487 /* 3488 * Check {@code offset}, {@code count} against {@code 0} and {@code length} 3489 * bounds. 3490 * 3491 * @throws StringIndexOutOfBoundsException 3492 * If {@code offset} is negative, {@code count} is negative, 3493 * or {@code offset} is greater than {@code length - count} 3494 */ 3495 static void checkBoundsOffCount(int offset, int count, int length) { 3496 if (offset < 0 || count < 0 || offset > length - count) { 3497 throw new StringIndexOutOfBoundsException( 3498 "offset " + offset + ", count " + count + ", length " + length); 3499 } 3500 } 3501 3502 /* 3503 * Check {@code begin}, {@code end} against {@code 0} and {@code length} 3504 * bounds. 3505 * 3506 * @throws StringIndexOutOfBoundsException 3507 * If {@code begin} is negative, {@code begin} is greater than 3508 * {@code end}, or {@code end} is greater than {@code length}. 3509 */ 3510 static void checkBoundsBeginEnd(int begin, int end, int length) { 3511 if (begin < 0 || begin > end || end > length) { 3512 throw new StringIndexOutOfBoundsException( 3513 "begin " + begin + ", end " + end + ", length " + length); 3514 } 3515 } 3516 3517 /** 3518 * Returns the string representation of the {@code codePoint} 3519 * argument. 3520 * 3521 * @param codePoint a {@code codePoint}. 3522 * @return a string of length {@code 1} or {@code 2} containing 3523 * as its single character the argument {@code codePoint}. 3524 * @throws IllegalArgumentException if the specified 3525 * {@code codePoint} is not a {@linkplain Character#isValidCodePoint 3526 * valid Unicode code point}. 3527 */ 3528 static String valueOfCodePoint(int codePoint) { 3529 if (COMPACT_STRINGS && StringLatin1.canEncode(codePoint)) { 3530 return new String(StringLatin1.toBytes((char)codePoint), LATIN1); 3531 } else if (Character.isBmpCodePoint(codePoint)) { 3532 return new String(StringUTF16.toBytes((char)codePoint), UTF16); 3533 } else if (Character.isSupplementaryCodePoint(codePoint)) { 3534 return new String(StringUTF16.toBytesSupplementary(codePoint), UTF16); 3535 } 3536 3537 throw new IllegalArgumentException( 3538 format("Not a valid Unicode code point: 0x%X", codePoint)); 3539 } 3540 }