1 /* 2 * Copyright (c) 1994, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.ObjectStreamField; 29 import java.io.UnsupportedEncodingException; 30 import java.nio.charset.Charset; 31 import java.nio.ByteBuffer; 32 import java.util.ArrayList; 33 import java.util.Arrays; 34 import java.util.Comparator; 35 import java.util.Formatter; 36 import java.util.Locale; 37 import java.util.Objects; 38 import java.util.StringJoiner; 39 import java.util.regex.Matcher; 40 import java.util.regex.Pattern; 41 import java.util.regex.PatternSyntaxException; 42 43 /** 44 * The {@code String} class represents character strings. All 45 * string literals in Java programs, such as {@code "abc"}, are 46 * implemented as instances of this class. 47 * <p> 48 * Strings are constant; their values cannot be changed after they 49 * are created. String buffers support mutable strings. 50 * Because String objects are immutable they can be shared. For example: 51 * <blockquote><pre> 52 * String str = "abc"; 53 * </pre></blockquote><p> 54 * is equivalent to: 55 * <blockquote><pre> 56 * char data[] = {'a', 'b', 'c'}; 57 * String str = new String(data); 58 * </pre></blockquote><p> 59 * Here are some more examples of how strings can be used: 60 * <blockquote><pre> 61 * System.out.println("abc"); 62 * String cde = "cde"; 63 * System.out.println("abc" + cde); 64 * String c = "abc".substring(2,3); 65 * String d = cde.substring(1, 2); 66 * </pre></blockquote> 67 * <p> 68 * The class {@code String} includes methods for examining 69 * individual characters of the sequence, for comparing strings, for 70 * searching strings, for extracting substrings, and for creating a 71 * copy of a string with all characters translated to uppercase or to 72 * lowercase. Case mapping is based on the Unicode Standard version 73 * specified by the {@link java.lang.Character Character} class. 74 * <p> 75 * The Java language provides special support for the string 76 * concatenation operator ( + ), and for conversion of 77 * other objects to strings. String concatenation is implemented 78 * through the {@code StringBuilder}(or {@code StringBuffer}) 79 * class and its {@code append} method. 80 * String conversions are implemented through the method 81 * {@code toString}, defined by {@code Object} and 82 * inherited by all classes in Java. For additional information on 83 * string concatenation and conversion, see Gosling, Joy, and Steele, 84 * <i>The Java Language Specification</i>. 85 * 86 * <p> Unless otherwise noted, passing a <tt>null</tt> argument to a constructor 87 * or method in this class will cause a {@link NullPointerException} to be 88 * thrown. 89 * 90 * <p>A {@code String} represents a string in the UTF-16 format 91 * in which <em>supplementary characters</em> are represented by <em>surrogate 92 * pairs</em> (see the section <a href="Character.html#unicode">Unicode 93 * Character Representations</a> in the {@code Character} class for 94 * more information). 95 * Index values refer to {@code char} code units, so a supplementary 96 * character uses two positions in a {@code String}. 97 * <p>The {@code String} class provides methods for dealing with 98 * Unicode code points (i.e., characters), in addition to those for 99 * dealing with Unicode code units (i.e., {@code char} values). 100 * 101 * @author Lee Boynton 102 * @author Arthur van Hoff 103 * @author Martin Buchholz 104 * @author Ulf Zibis 105 * @see java.lang.Object#toString() 106 * @see java.lang.StringBuffer 107 * @see java.lang.StringBuilder 108 * @see java.nio.charset.Charset 109 * @since 1.0 110 */ 111 112 public final class String 113 implements java.io.Serializable, Comparable<String>, CharSequence { 114 /** The value is used for character storage. */ 115 private final char value[]; 116 117 /** Cache the hash code for the string */ 118 private int hash; // Default to 0 119 120 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 121 private static final long serialVersionUID = -6849794470754667710L; 122 123 /** 124 * Class String is special cased within the Serialization Stream Protocol. 125 * 126 * A String instance is written into an ObjectOutputStream according to 127 * <a href="{@docRoot}/../platform/serialization/spec/output.html"> 128 * Object Serialization Specification, Section 6.2, "Stream Elements"</a> 129 */ 130 private static final ObjectStreamField[] serialPersistentFields = 131 new ObjectStreamField[0]; 132 133 /** 134 * Initializes a newly created {@code String} object so that it represents 135 * an empty character sequence. Note that use of this constructor is 136 * unnecessary since Strings are immutable. 137 */ 138 public String() { 139 this.value = new char[0]; 140 } 141 142 /** 143 * Initializes a newly created {@code String} object so that it represents 144 * the same sequence of characters as the argument; in other words, the 145 * newly created string is a copy of the argument string. Unless an 146 * explicit copy of {@code original} is needed, use of this constructor is 147 * unnecessary since Strings are immutable. 148 * 149 * @param original 150 * A {@code String} 151 */ 152 public String(String original) { 153 this.value = original.value; 154 this.hash = original.hash; 155 } 156 157 /** 158 * Allocates a new {@code String} so that it represents the sequence of 159 * characters currently contained in the character array argument. The 160 * contents of the character array are copied; subsequent modification of 161 * the character array does not affect the newly created string. 162 * 163 * @param value 164 * The initial value of the string 165 */ 166 public String(char value[]) { 167 this.value = Arrays.copyOf(value, value.length); 168 } 169 170 /** 171 * Allocates a new {@code String} that contains characters from a subarray 172 * of the character array argument. The {@code offset} argument is the 173 * index of the first character of the subarray and the {@code count} 174 * argument specifies the length of the subarray. The contents of the 175 * subarray are copied; subsequent modification of the character array does 176 * not affect the newly created string. 177 * 178 * @param value 179 * Array that is the source of characters 180 * 181 * @param offset 182 * The initial offset 183 * 184 * @param count 185 * The length 186 * 187 * @throws IndexOutOfBoundsException 188 * If the {@code offset} and {@code count} arguments index 189 * characters outside the bounds of the {@code value} array 190 */ 191 public String(char value[], int offset, int count) { 192 if (offset < 0) { 193 throw new StringIndexOutOfBoundsException(offset); 194 } 195 if (count < 0) { 196 throw new StringIndexOutOfBoundsException(count); 197 } 198 // Note: offset or count might be near -1>>>1. 199 if (offset > value.length - count) { 200 throw new StringIndexOutOfBoundsException(offset + count); 201 } 202 this.value = Arrays.copyOfRange(value, offset, offset+count); 203 } 204 205 /** 206 * Allocates a new {@code String} that contains characters from a subarray 207 * of the <a href="Character.html#unicode">Unicode code point</a> array 208 * argument. The {@code offset} argument is the index of the first code 209 * point of the subarray and the {@code count} argument specifies the 210 * length of the subarray. The contents of the subarray are converted to 211 * {@code char}s; subsequent modification of the {@code int} array does not 212 * affect the newly created string. 213 * 214 * @param codePoints 215 * Array that is the source of Unicode code points 216 * 217 * @param offset 218 * The initial offset 219 * 220 * @param count 221 * The length 222 * 223 * @throws IllegalArgumentException 224 * If any invalid Unicode code point is found in {@code 225 * codePoints} 226 * 227 * @throws IndexOutOfBoundsException 228 * If the {@code offset} and {@code count} arguments index 229 * characters outside the bounds of the {@code codePoints} array 230 * 231 * @since 1.5 232 */ 233 public String(int[] codePoints, int offset, int count) { 234 if (offset < 0) { 235 throw new StringIndexOutOfBoundsException(offset); 236 } 237 if (count < 0) { 238 throw new StringIndexOutOfBoundsException(count); 239 } 240 // Note: offset or count might be near -1>>>1. 241 if (offset > codePoints.length - count) { 242 throw new StringIndexOutOfBoundsException(offset + count); 243 } 244 245 final int end = offset + count; 246 247 // Pass 1: Compute precise size of char[] 248 int n = count; 249 for (int i = offset; i < end; i++) { 250 int c = codePoints[i]; 251 if (Character.isBmpCodePoint(c)) 252 continue; 253 else if (Character.isValidCodePoint(c)) 254 n++; 255 else throw new IllegalArgumentException(Integer.toString(c)); 256 } 257 258 // Pass 2: Allocate and fill in char[] 259 final char[] v = new char[n]; 260 261 for (int i = offset, j = 0; i < end; i++, j++) { 262 int c = codePoints[i]; 263 if (Character.isBmpCodePoint(c)) 264 v[j] = (char)c; 265 else 266 Character.toSurrogates(c, v, j++); 267 } 268 269 this.value = v; 270 } 271 272 /** 273 * Allocates a new {@code String} constructed from a subarray of an array 274 * of 8-bit integer values. 275 * 276 * <p> The {@code offset} argument is the index of the first byte of the 277 * subarray, and the {@code count} argument specifies the length of the 278 * subarray. 279 * 280 * <p> Each {@code byte} in the subarray is converted to a {@code char} as 281 * specified in the method above. 282 * 283 * @deprecated This method does not properly convert bytes into characters. 284 * As of JDK 1.1, the preferred way to do this is via the 285 * {@code String} constructors that take a {@link 286 * java.nio.charset.Charset}, charset name, or that use the platform's 287 * default charset. 288 * 289 * @param ascii 290 * The bytes to be converted to characters 291 * 292 * @param hibyte 293 * The top 8 bits of each 16-bit Unicode code unit 294 * 295 * @param offset 296 * The initial offset 297 * @param count 298 * The length 299 * 300 * @throws IndexOutOfBoundsException 301 * If the {@code offset} or {@code count} argument is invalid 302 * 303 * @see #String(byte[], int) 304 * @see #String(byte[], int, int, java.lang.String) 305 * @see #String(byte[], int, int, java.nio.charset.Charset) 306 * @see #String(byte[], int, int) 307 * @see #String(byte[], java.lang.String) 308 * @see #String(byte[], java.nio.charset.Charset) 309 * @see #String(byte[]) 310 */ 311 @Deprecated 312 public String(byte ascii[], int hibyte, int offset, int count) { 313 checkBounds(ascii, offset, count); 314 char value[] = new char[count]; 315 316 if (hibyte == 0) { 317 for (int i = count; i-- > 0;) { 318 value[i] = (char)(ascii[i + offset] & 0xff); 319 } 320 } else { 321 hibyte <<= 8; 322 for (int i = count; i-- > 0;) { 323 value[i] = (char)(hibyte | (ascii[i + offset] & 0xff)); 324 } 325 } 326 this.value = value; 327 } 328 329 /** 330 * Allocates a new {@code String} containing characters constructed from 331 * an array of 8-bit integer values. Each character <i>c</i>in the 332 * resulting string is constructed from the corresponding component 333 * <i>b</i> in the byte array such that: 334 * 335 * <blockquote><pre> 336 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8) 337 * | (<b><i>b</i></b> & 0xff)) 338 * </pre></blockquote> 339 * 340 * @deprecated This method does not properly convert bytes into 341 * characters. As of JDK 1.1, the preferred way to do this is via the 342 * {@code String} constructors that take a {@link 343 * java.nio.charset.Charset}, charset name, or that use the platform's 344 * default charset. 345 * 346 * @param ascii 347 * The bytes to be converted to characters 348 * 349 * @param hibyte 350 * The top 8 bits of each 16-bit Unicode code unit 351 * 352 * @see #String(byte[], int, int, java.lang.String) 353 * @see #String(byte[], int, int, java.nio.charset.Charset) 354 * @see #String(byte[], int, int) 355 * @see #String(byte[], java.lang.String) 356 * @see #String(byte[], java.nio.charset.Charset) 357 * @see #String(byte[]) 358 */ 359 @Deprecated 360 public String(byte ascii[], int hibyte) { 361 this(ascii, hibyte, 0, ascii.length); 362 } 363 364 /* Common private utility method used to bounds check the byte array 365 * and requested offset & length values used by the String(byte[],..) 366 * constructors. 367 */ 368 private static void checkBounds(byte[] bytes, int offset, int length) { 369 if (length < 0) 370 throw new StringIndexOutOfBoundsException(length); 371 if (offset < 0) 372 throw new StringIndexOutOfBoundsException(offset); 373 if (offset > bytes.length - length) 374 throw new StringIndexOutOfBoundsException(offset + length); 375 } 376 377 /** 378 * Constructs a new {@code String} by decoding the specified subarray of 379 * bytes using the specified charset. The length of the new {@code String} 380 * is a function of the charset, and hence may not be equal to the length 381 * of the subarray. 382 * 383 * <p> The behavior of this constructor when the given bytes are not valid 384 * in the given charset is unspecified. The {@link 385 * java.nio.charset.CharsetDecoder} class should be used when more control 386 * over the decoding process is required. 387 * 388 * @param bytes 389 * The bytes to be decoded into characters 390 * 391 * @param offset 392 * The index of the first byte to decode 393 * 394 * @param length 395 * The number of bytes to decode 396 397 * @param charsetName 398 * The name of a supported {@linkplain java.nio.charset.Charset 399 * charset} 400 * 401 * @throws UnsupportedEncodingException 402 * If the named charset is not supported 403 * 404 * @throws IndexOutOfBoundsException 405 * If the {@code offset} and {@code length} arguments index 406 * characters outside the bounds of the {@code bytes} array 407 * 408 * @since 1.1 409 */ 410 public String(byte bytes[], int offset, int length, String charsetName) 411 throws UnsupportedEncodingException { 412 if (charsetName == null) 413 throw new NullPointerException("charsetName"); 414 checkBounds(bytes, offset, length); 415 this.value = StringCoding.decode(charsetName, bytes, offset, length); 416 } 417 418 /** 419 * Constructs a new {@code String} by decoding the specified subarray of 420 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 421 * The length of the new {@code String} is a function of the charset, and 422 * hence may not be equal to the length of the subarray. 423 * 424 * <p> This method always replaces malformed-input and unmappable-character 425 * sequences with this charset's default replacement string. The {@link 426 * java.nio.charset.CharsetDecoder} class should be used when more control 427 * over the decoding process is required. 428 * 429 * @param bytes 430 * The bytes to be decoded into characters 431 * 432 * @param offset 433 * The index of the first byte to decode 434 * 435 * @param length 436 * The number of bytes to decode 437 * 438 * @param charset 439 * The {@linkplain java.nio.charset.Charset charset} to be used to 440 * decode the {@code bytes} 441 * 442 * @throws IndexOutOfBoundsException 443 * If the {@code offset} and {@code length} arguments index 444 * characters outside the bounds of the {@code bytes} array 445 * 446 * @since 1.6 447 */ 448 public String(byte bytes[], int offset, int length, Charset charset) { 449 if (charset == null) 450 throw new NullPointerException("charset"); 451 checkBounds(bytes, offset, length); 452 this.value = StringCoding.decode(charset, bytes, offset, length); 453 } 454 455 /** 456 * Constructs a new {@code String} by decoding the specified 457 * {@linkplain java.nio.ByteBuffer byte buffer} using the specified 458 * {@linkplain java.nio.charset.Charset charset}. 459 * The length of the new {@code String} is a function of the charset, and 460 * hence may not be equal to the remaining number of bytes in the 461 * {@linkplain java.nio.ByteBuffer byte buffer}. 462 * 463 * <p> This method always replaces malformed-input and unmappable-character 464 * sequences with this charset's default replacement string. The {@link 465 * java.nio.charset.CharsetDecoder} class should be used when more control 466 * over the decoding process is required. 467 * 468 * <p> 469 * Bytes are read between <code>position()</code> and <code>limit()</code> 470 * of the {@link java.nio.ByteBuffer}. 471 * After the bytes have been read the <code>position()</code> will be 472 * updated. 473 * 474 * @param bytes 475 * The bytes to be decoded into characters 476 * 477 * @param charset 478 * The {@linkplain java.nio.charset.Charset charset} to be used to 479 * decode the {@code bytes} 480 * 481 * @since 1.9 482 */ 483 public String(ByteBuffer bytes, Charset charset) { 484 if (charset == null) 485 throw new NullPointerException("charset"); 486 this.value = StringCoding.decode(charset, bytes); 487 } 488 489 /** 490 * Constructs a new {@code String} by decoding the specified array of bytes 491 * using the specified {@linkplain java.nio.charset.Charset charset}. The 492 * length of the new {@code String} is a function of the charset, and hence 493 * may not be equal to the length of the byte array. 494 * 495 * <p> The behavior of this constructor when the given bytes are not valid 496 * in the given charset is unspecified. The {@link 497 * java.nio.charset.CharsetDecoder} class should be used when more control 498 * over the decoding process is required. 499 * 500 * @param bytes 501 * The bytes to be decoded into characters 502 * 503 * @param charsetName 504 * The name of a supported {@linkplain java.nio.charset.Charset 505 * charset} 506 * 507 * @throws UnsupportedEncodingException 508 * If the named charset is not supported 509 * 510 * @since 1.1 511 */ 512 public String(byte bytes[], String charsetName) 513 throws UnsupportedEncodingException { 514 this(bytes, 0, bytes.length, charsetName); 515 } 516 517 /** 518 * Constructs a new {@code String} by decoding the specified array of 519 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 520 * The length of the new {@code String} is a function of the charset, and 521 * hence may not be equal to the length of the byte array. 522 * 523 * <p> This method always replaces malformed-input and unmappable-character 524 * sequences with this charset's default replacement string. The {@link 525 * java.nio.charset.CharsetDecoder} class should be used when more control 526 * over the decoding process is required. 527 * 528 * @param bytes 529 * The bytes to be decoded into characters 530 * 531 * @param charset 532 * The {@linkplain java.nio.charset.Charset charset} to be used to 533 * decode the {@code bytes} 534 * 535 * @since 1.6 536 */ 537 public String(byte bytes[], Charset charset) { 538 this(bytes, 0, bytes.length, charset); 539 } 540 541 /** 542 * Constructs a new {@code String} by decoding the specified subarray of 543 * bytes using the platform's default charset. The length of the new 544 * {@code String} is a function of the charset, and hence may not be equal 545 * to the length of the subarray. 546 * 547 * <p> The behavior of this constructor when the given bytes are not valid 548 * in the default charset is unspecified. The {@link 549 * java.nio.charset.CharsetDecoder} class should be used when more control 550 * over the decoding process is required. 551 * 552 * @param bytes 553 * The bytes to be decoded into characters 554 * 555 * @param offset 556 * The index of the first byte to decode 557 * 558 * @param length 559 * The number of bytes to decode 560 * 561 * @throws IndexOutOfBoundsException 562 * If the {@code offset} and the {@code length} arguments index 563 * characters outside the bounds of the {@code bytes} array 564 * 565 * @since 1.1 566 */ 567 public String(byte bytes[], int offset, int length) { 568 checkBounds(bytes, offset, length); 569 this.value = StringCoding.decode(bytes, offset, length); 570 } 571 572 /** 573 * Constructs a new {@code String} by decoding the specified array of bytes 574 * using the platform's default charset. The length of the new {@code 575 * String} is a function of the charset, and hence may not be equal to the 576 * length of the byte array. 577 * 578 * <p> The behavior of this constructor when the given bytes are not valid 579 * in the default charset is unspecified. The {@link 580 * java.nio.charset.CharsetDecoder} class should be used when more control 581 * over the decoding process is required. 582 * 583 * @param bytes 584 * The bytes to be decoded into characters 585 * 586 * @since 1.1 587 */ 588 public String(byte bytes[]) { 589 this(bytes, 0, bytes.length); 590 } 591 592 /** 593 * Allocates a new string that contains the sequence of characters 594 * currently contained in the string buffer argument. The contents of the 595 * string buffer are copied; subsequent modification of the string buffer 596 * does not affect the newly created string. 597 * 598 * @param buffer 599 * A {@code StringBuffer} 600 */ 601 public String(StringBuffer buffer) { 602 synchronized(buffer) { 603 this.value = Arrays.copyOf(buffer.getValue(), buffer.length()); 604 } 605 } 606 607 /** 608 * Allocates a new string that contains the sequence of characters 609 * currently contained in the string builder argument. The contents of the 610 * string builder are copied; subsequent modification of the string builder 611 * does not affect the newly created string. 612 * 613 * <p> This constructor is provided to ease migration to {@code 614 * StringBuilder}. Obtaining a string from a string builder via the {@code 615 * toString} method is likely to run faster and is generally preferred. 616 * 617 * @param builder 618 * A {@code StringBuilder} 619 * 620 * @since 1.5 621 */ 622 public String(StringBuilder builder) { 623 this.value = Arrays.copyOf(builder.getValue(), builder.length()); 624 } 625 626 /* 627 * Package private constructor which shares value array for speed. 628 * this constructor is always expected to be called with share==true. 629 * a separate constructor is needed because we already have a public 630 * String(char[]) constructor that makes a copy of the given char[]. 631 */ 632 String(char[] value, boolean share) { 633 // assert share : "unshared not supported"; 634 this.value = value; 635 } 636 637 /** 638 * Returns the length of this string. 639 * The length is equal to the number of <a href="Character.html#unicode">Unicode 640 * code units</a> in the string. 641 * 642 * @return the length of the sequence of characters represented by this 643 * object. 644 */ 645 public int length() { 646 return value.length; 647 } 648 649 /** 650 * Returns {@code true} if, and only if, {@link #length()} is {@code 0}. 651 * 652 * @return {@code true} if {@link #length()} is {@code 0}, otherwise 653 * {@code false} 654 * 655 * @since 1.6 656 */ 657 public boolean isEmpty() { 658 return value.length == 0; 659 } 660 661 /** 662 * Returns the {@code char} value at the 663 * specified index. An index ranges from {@code 0} to 664 * {@code length() - 1}. The first {@code char} value of the sequence 665 * is at index {@code 0}, the next at index {@code 1}, 666 * and so on, as for array indexing. 667 * 668 * <p>If the {@code char} value specified by the index is a 669 * <a href="Character.html#unicode">surrogate</a>, the surrogate 670 * value is returned. 671 * 672 * @param index the index of the {@code char} value. 673 * @return the {@code char} value at the specified index of this string. 674 * The first {@code char} value is at index {@code 0}. 675 * @exception IndexOutOfBoundsException if the {@code index} 676 * argument is negative or not less than the length of this 677 * string. 678 */ 679 public char charAt(int index) { 680 if ((index < 0) || (index >= value.length)) { 681 throw new StringIndexOutOfBoundsException(index); 682 } 683 return value[index]; 684 } 685 686 /** 687 * Returns the character (Unicode code point) at the specified 688 * index. The index refers to {@code char} values 689 * (Unicode code units) and ranges from {@code 0} to 690 * {@link #length()}{@code - 1}. 691 * 692 * <p> If the {@code char} value specified at the given index 693 * is in the high-surrogate range, the following index is less 694 * than the length of this {@code String}, and the 695 * {@code char} value at the following index is in the 696 * low-surrogate range, then the supplementary code point 697 * corresponding to this surrogate pair is returned. Otherwise, 698 * the {@code char} value at the given index is returned. 699 * 700 * @param index the index to the {@code char} values 701 * @return the code point value of the character at the 702 * {@code index} 703 * @exception IndexOutOfBoundsException if the {@code index} 704 * argument is negative or not less than the length of this 705 * string. 706 * @since 1.5 707 */ 708 public int codePointAt(int index) { 709 if ((index < 0) || (index >= value.length)) { 710 throw new StringIndexOutOfBoundsException(index); 711 } 712 return Character.codePointAtImpl(value, index, value.length); 713 } 714 715 /** 716 * Returns the character (Unicode code point) before the specified 717 * index. The index refers to {@code char} values 718 * (Unicode code units) and ranges from {@code 1} to {@link 719 * CharSequence#length() length}. 720 * 721 * <p> If the {@code char} value at {@code (index - 1)} 722 * is in the low-surrogate range, {@code (index - 2)} is not 723 * negative, and the {@code char} value at {@code (index - 724 * 2)} is in the high-surrogate range, then the 725 * supplementary code point value of the surrogate pair is 726 * returned. If the {@code char} value at {@code index - 727 * 1} is an unpaired low-surrogate or a high-surrogate, the 728 * surrogate value is returned. 729 * 730 * @param index the index following the code point that should be returned 731 * @return the Unicode code point value before the given index. 732 * @exception IndexOutOfBoundsException if the {@code index} 733 * argument is less than 1 or greater than the length 734 * of this string. 735 * @since 1.5 736 */ 737 public int codePointBefore(int index) { 738 int i = index - 1; 739 if ((i < 0) || (i >= value.length)) { 740 throw new StringIndexOutOfBoundsException(index); 741 } 742 return Character.codePointBeforeImpl(value, index, 0); 743 } 744 745 /** 746 * Returns the number of Unicode code points in the specified text 747 * range of this {@code String}. The text range begins at the 748 * specified {@code beginIndex} and extends to the 749 * {@code char} at index {@code endIndex - 1}. Thus the 750 * length (in {@code char}s) of the text range is 751 * {@code endIndex-beginIndex}. Unpaired surrogates within 752 * the text range count as one code point each. 753 * 754 * @param beginIndex the index to the first {@code char} of 755 * the text range. 756 * @param endIndex the index after the last {@code char} of 757 * the text range. 758 * @return the number of Unicode code points in the specified text 759 * range 760 * @exception IndexOutOfBoundsException if the 761 * {@code beginIndex} is negative, or {@code endIndex} 762 * is larger than the length of this {@code String}, or 763 * {@code beginIndex} is larger than {@code endIndex}. 764 * @since 1.5 765 */ 766 public int codePointCount(int beginIndex, int endIndex) { 767 if (beginIndex < 0 || endIndex > value.length || beginIndex > endIndex) { 768 throw new IndexOutOfBoundsException(); 769 } 770 return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex); 771 } 772 773 /** 774 * Returns the index within this {@code String} that is 775 * offset from the given {@code index} by 776 * {@code codePointOffset} code points. Unpaired surrogates 777 * within the text range given by {@code index} and 778 * {@code codePointOffset} count as one code point each. 779 * 780 * @param index the index to be offset 781 * @param codePointOffset the offset in code points 782 * @return the index within this {@code String} 783 * @exception IndexOutOfBoundsException if {@code index} 784 * is negative or larger then the length of this 785 * {@code String}, or if {@code codePointOffset} is positive 786 * and the substring starting with {@code index} has fewer 787 * than {@code codePointOffset} code points, 788 * or if {@code codePointOffset} is negative and the substring 789 * before {@code index} has fewer than the absolute value 790 * of {@code codePointOffset} code points. 791 * @since 1.5 792 */ 793 public int offsetByCodePoints(int index, int codePointOffset) { 794 if (index < 0 || index > value.length) { 795 throw new IndexOutOfBoundsException(); 796 } 797 return Character.offsetByCodePointsImpl(value, 0, value.length, 798 index, codePointOffset); 799 } 800 801 /** 802 * Copy characters from this string into dst starting at dstBegin. 803 * This method doesn't perform any range checking. 804 */ 805 void getChars(char dst[], int dstBegin) { 806 System.arraycopy(value, 0, dst, dstBegin, value.length); 807 } 808 809 /** 810 * Copies characters from this string into the destination character 811 * array. 812 * <p> 813 * The first character to be copied is at index {@code srcBegin}; 814 * the last character to be copied is at index {@code srcEnd-1} 815 * (thus the total number of characters to be copied is 816 * {@code srcEnd-srcBegin}). The characters are copied into the 817 * subarray of {@code dst} starting at index {@code dstBegin} 818 * and ending at index: 819 * <blockquote><pre> 820 * dstbegin + (srcEnd-srcBegin) - 1 821 * </pre></blockquote> 822 * 823 * @param srcBegin index of the first character in the string 824 * to copy. 825 * @param srcEnd index after the last character in the string 826 * to copy. 827 * @param dst the destination array. 828 * @param dstBegin the start offset in the destination array. 829 * @exception IndexOutOfBoundsException If any of the following 830 * is true: 831 * <ul><li>{@code srcBegin} is negative. 832 * <li>{@code srcBegin} is greater than {@code srcEnd} 833 * <li>{@code srcEnd} is greater than the length of this 834 * string 835 * <li>{@code dstBegin} is negative 836 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than 837 * {@code dst.length}</ul> 838 */ 839 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) { 840 if (srcBegin < 0) { 841 throw new StringIndexOutOfBoundsException(srcBegin); 842 } 843 if (srcEnd > value.length) { 844 throw new StringIndexOutOfBoundsException(srcEnd); 845 } 846 if (srcBegin > srcEnd) { 847 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin); 848 } 849 System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); 850 } 851 852 /** 853 * Copies characters from this string into the destination byte array. Each 854 * byte receives the 8 low-order bits of the corresponding character. The 855 * eight high-order bits of each character are not copied and do not 856 * participate in the transfer in any way. 857 * 858 * <p> The first character to be copied is at index {@code srcBegin}; the 859 * last character to be copied is at index {@code srcEnd-1}. The total 860 * number of characters to be copied is {@code srcEnd-srcBegin}. The 861 * characters, converted to bytes, are copied into the subarray of {@code 862 * dst} starting at index {@code dstBegin} and ending at index: 863 * 864 * <blockquote><pre> 865 * dstbegin + (srcEnd-srcBegin) - 1 866 * </pre></blockquote> 867 * 868 * @deprecated This method does not properly convert characters into 869 * bytes. As of JDK 1.1, the preferred way to do this is via the 870 * {@link #getBytes()} method, which uses the platform's default charset. 871 * 872 * @param srcBegin 873 * Index of the first character in the string to copy 874 * 875 * @param srcEnd 876 * Index after the last character in the string to copy 877 * 878 * @param dst 879 * The destination array 880 * 881 * @param dstBegin 882 * The start offset in the destination array 883 * 884 * @throws IndexOutOfBoundsException 885 * If any of the following is true: 886 * <ul> 887 * <li> {@code srcBegin} is negative 888 * <li> {@code srcBegin} is greater than {@code srcEnd} 889 * <li> {@code srcEnd} is greater than the length of this String 890 * <li> {@code dstBegin} is negative 891 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code 892 * dst.length} 893 * </ul> 894 */ 895 @Deprecated 896 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) { 897 if (srcBegin < 0) { 898 throw new StringIndexOutOfBoundsException(srcBegin); 899 } 900 if (srcEnd > value.length) { 901 throw new StringIndexOutOfBoundsException(srcEnd); 902 } 903 if (srcBegin > srcEnd) { 904 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin); 905 } 906 Objects.requireNonNull(dst); 907 908 int j = dstBegin; 909 int n = srcEnd; 910 int i = srcBegin; 911 char[] val = value; /* avoid getfield opcode */ 912 913 while (i < n) { 914 dst[j++] = (byte)val[i++]; 915 } 916 } 917 918 /** 919 * Encodes this {@code String} into a sequence of bytes using the named 920 * charset, storing the result into a new byte array. 921 * 922 * <p> The behavior of this method when this string cannot be encoded in 923 * the given charset is unspecified. The {@link 924 * java.nio.charset.CharsetEncoder} class should be used when more control 925 * over the encoding process is required. 926 * 927 * @param charsetName 928 * The name of a supported {@linkplain java.nio.charset.Charset 929 * charset} 930 * 931 * @return The resultant byte array 932 * 933 * @throws UnsupportedEncodingException 934 * If the named charset is not supported 935 * 936 * @since 1.1 937 */ 938 public byte[] getBytes(String charsetName) 939 throws UnsupportedEncodingException { 940 if (charsetName == null) throw new NullPointerException(); 941 return StringCoding.encode(charsetName, value, 0, value.length); 942 } 943 944 /** 945 * Encodes this {@code String} into a sequence of bytes using the given 946 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 947 * new byte array. 948 * 949 * <p> This method always replaces malformed-input and unmappable-character 950 * sequences with this charset's default replacement byte array. The 951 * {@link java.nio.charset.CharsetEncoder} class should be used when more 952 * control over the encoding process is required. 953 * 954 * @param charset 955 * The {@linkplain java.nio.charset.Charset} to be used to encode 956 * the {@code String} 957 * 958 * @return The resultant byte array 959 * 960 * @since 1.6 961 */ 962 public byte[] getBytes(Charset charset) { 963 Objects.requireNonNull(charset); 964 return StringCoding.encode(charset, value, 0, value.length); 965 } 966 967 /** 968 * Encodes this {@code String} into a sequence of bytes using the given 969 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 970 * byte array that has been passed as an argument. 971 * <p> 972 * The number of bytes written during encoding is a function of the charset 973 * used to perform the encoding and is returned from this method. It may not 974 * be equal to the length of this 975 * String. At most <code>destBuffer.length - destOffset</code> bytes will be written. 976 * 977 * @param destBuffer 978 * The destination array 979 * 980 * @param destOffset 981 * The start offset in the destination array 982 * 983 * @param charset 984 * The {@linkplain java.nio.charset.Charset} to be used to encode 985 * the {@code String} 986 * 987 * @return the number of bytes written 988 * 989 * @since 1.9 990 */ 991 public int getBytes(byte[] destBuffer, int destOffset, Charset charset) { 992 Objects.requireNonNull(destBuffer); 993 Objects.requireNonNull(charset); 994 return StringCoding.encode(charset, value, 0, value.length, destBuffer, destOffset); 995 } 996 997 /** 998 * Encodes as many characters as possible from this {@code String} into a 999 * sequence of bytes using the given 1000 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 1001 * {@linkplain java.nio.ByteBuffer byte buffer} that has been passed as an argument. 1002 * <p> 1003 * The number of bytes written during encoding is a function of the charset 1004 * used to perform the encoding and is returned from this method. It may not 1005 * be equal to the length of this 1006 * String. At most <code>destBuffer.remaining()</code> bytes will be written. 1007 * <p> 1008 * The buffer's position will be advanced to reflect the characters read and 1009 * the bytes written, but its mark and limit will not be modified. 1010 * 1011 * @param destBuffer 1012 * The destination {@linkplain java.nio.ByteBuffer} 1013 * 1014 * @param charset 1015 * The {@linkplain java.nio.charset.Charset} to be used to encode 1016 * the {@code String} 1017 * 1018 * @return the number of bytes written 1019 * 1020 * @since 1.9 1021 */ 1022 public int getBytes(ByteBuffer destBuffer, Charset charset) { 1023 Objects.requireNonNull(destBuffer); 1024 Objects.requireNonNull(charset); 1025 return StringCoding.encode(charset, value, 0, value.length, destBuffer); 1026 } 1027 1028 /** 1029 * Encodes this {@code String} into a sequence of bytes using the 1030 * platform's default charset, storing the result into a new byte array. 1031 * 1032 * <p> The behavior of this method when this string cannot be encoded in 1033 * the default charset is unspecified. The {@link 1034 * java.nio.charset.CharsetEncoder} class should be used when more control 1035 * over the encoding process is required. 1036 * 1037 * @return The resultant byte array 1038 * 1039 * @since 1.1 1040 */ 1041 public byte[] getBytes() { 1042 return StringCoding.encode(value, 0, value.length); 1043 } 1044 1045 /** 1046 * Compares this string to the specified object. The result is {@code 1047 * true} if and only if the argument is not {@code null} and is a {@code 1048 * String} object that represents the same sequence of characters as this 1049 * object. 1050 * 1051 * @param anObject 1052 * The object to compare this {@code String} against 1053 * 1054 * @return {@code true} if the given object represents a {@code String} 1055 * equivalent to this string, {@code false} otherwise 1056 * 1057 * @see #compareTo(String) 1058 * @see #equalsIgnoreCase(String) 1059 */ 1060 public boolean equals(Object anObject) { 1061 if (this == anObject) { 1062 return true; 1063 } 1064 if (anObject instanceof String) { 1065 String anotherString = (String)anObject; 1066 int n = value.length; 1067 if (n == anotherString.value.length) { 1068 char v1[] = value; 1069 char v2[] = anotherString.value; 1070 int i = 0; 1071 while (n-- != 0) { 1072 if (v1[i] != v2[i]) 1073 return false; 1074 i++; 1075 } 1076 return true; 1077 } 1078 } 1079 return false; 1080 } 1081 1082 /** 1083 * Compares this string to the specified {@code StringBuffer}. The result 1084 * is {@code true} if and only if this {@code String} represents the same 1085 * sequence of characters as the specified {@code StringBuffer}. This method 1086 * synchronizes on the {@code StringBuffer}. 1087 * 1088 * @param sb 1089 * The {@code StringBuffer} to compare this {@code String} against 1090 * 1091 * @return {@code true} if this {@code String} represents the same 1092 * sequence of characters as the specified {@code StringBuffer}, 1093 * {@code false} otherwise 1094 * 1095 * @since 1.4 1096 */ 1097 public boolean contentEquals(StringBuffer sb) { 1098 return contentEquals((CharSequence)sb); 1099 } 1100 1101 private boolean nonSyncContentEquals(AbstractStringBuilder sb) { 1102 char v1[] = value; 1103 char v2[] = sb.getValue(); 1104 int n = v1.length; 1105 if (n != sb.length()) { 1106 return false; 1107 } 1108 for (int i = 0; i < n; i++) { 1109 if (v1[i] != v2[i]) { 1110 return false; 1111 } 1112 } 1113 return true; 1114 } 1115 1116 /** 1117 * Compares this string to the specified {@code CharSequence}. The 1118 * result is {@code true} if and only if this {@code String} represents the 1119 * same sequence of char values as the specified sequence. Note that if the 1120 * {@code CharSequence} is a {@code StringBuffer} then the method 1121 * synchronizes on it. 1122 * 1123 * @param cs 1124 * The sequence to compare this {@code String} against 1125 * 1126 * @return {@code true} if this {@code String} represents the same 1127 * sequence of char values as the specified sequence, {@code 1128 * false} otherwise 1129 * 1130 * @since 1.5 1131 */ 1132 public boolean contentEquals(CharSequence cs) { 1133 // Argument is a StringBuffer, StringBuilder 1134 if (cs instanceof AbstractStringBuilder) { 1135 if (cs instanceof StringBuffer) { 1136 synchronized(cs) { 1137 return nonSyncContentEquals((AbstractStringBuilder)cs); 1138 } 1139 } else { 1140 return nonSyncContentEquals((AbstractStringBuilder)cs); 1141 } 1142 } 1143 // Argument is a String 1144 if (cs instanceof String) { 1145 return equals(cs); 1146 } 1147 // Argument is a generic CharSequence 1148 char v1[] = value; 1149 int n = v1.length; 1150 if (n != cs.length()) { 1151 return false; 1152 } 1153 for (int i = 0; i < n; i++) { 1154 if (v1[i] != cs.charAt(i)) { 1155 return false; 1156 } 1157 } 1158 return true; 1159 } 1160 1161 /** 1162 * Compares this {@code String} to another {@code String}, ignoring case 1163 * considerations. Two strings are considered equal ignoring case if they 1164 * are of the same length and corresponding characters in the two strings 1165 * are equal ignoring case. 1166 * 1167 * <p> Two characters {@code c1} and {@code c2} are considered the same 1168 * ignoring case if at least one of the following is true: 1169 * <ul> 1170 * <li> The two characters are the same (as compared by the 1171 * {@code ==} operator) 1172 * <li> Applying the method {@link 1173 * java.lang.Character#toUpperCase(char)} to each character 1174 * produces the same result 1175 * <li> Applying the method {@link 1176 * java.lang.Character#toLowerCase(char)} to each character 1177 * produces the same result 1178 * </ul> 1179 * 1180 * @param anotherString 1181 * The {@code String} to compare this {@code String} against 1182 * 1183 * @return {@code true} if the argument is not {@code null} and it 1184 * represents an equivalent {@code String} ignoring case; {@code 1185 * false} otherwise 1186 * 1187 * @see #equals(Object) 1188 */ 1189 public boolean equalsIgnoreCase(String anotherString) { 1190 return (this == anotherString) ? true 1191 : (anotherString != null) 1192 && (anotherString.value.length == value.length) 1193 && regionMatches(true, 0, anotherString, 0, value.length); 1194 } 1195 1196 /** 1197 * Compares two strings lexicographically. 1198 * The comparison is based on the Unicode value of each character in 1199 * the strings. The character sequence represented by this 1200 * {@code String} object is compared lexicographically to the 1201 * character sequence represented by the argument string. The result is 1202 * a negative integer if this {@code String} object 1203 * lexicographically precedes the argument string. The result is a 1204 * positive integer if this {@code String} object lexicographically 1205 * follows the argument string. The result is zero if the strings 1206 * are equal; {@code compareTo} returns {@code 0} exactly when 1207 * the {@link #equals(Object)} method would return {@code true}. 1208 * <p> 1209 * This is the definition of lexicographic ordering. If two strings are 1210 * different, then either they have different characters at some index 1211 * that is a valid index for both strings, or their lengths are different, 1212 * or both. If they have different characters at one or more index 1213 * positions, let <i>k</i> be the smallest such index; then the string 1214 * whose character at position <i>k</i> has the smaller value, as 1215 * determined by using the < operator, lexicographically precedes the 1216 * other string. In this case, {@code compareTo} returns the 1217 * difference of the two character values at position {@code k} in 1218 * the two string -- that is, the value: 1219 * <blockquote><pre> 1220 * this.charAt(k)-anotherString.charAt(k) 1221 * </pre></blockquote> 1222 * If there is no index position at which they differ, then the shorter 1223 * string lexicographically precedes the longer string. In this case, 1224 * {@code compareTo} returns the difference of the lengths of the 1225 * strings -- that is, the value: 1226 * <blockquote><pre> 1227 * this.length()-anotherString.length() 1228 * </pre></blockquote> 1229 * 1230 * @param anotherString the {@code String} to be compared. 1231 * @return the value {@code 0} if the argument string is equal to 1232 * this string; a value less than {@code 0} if this string 1233 * is lexicographically less than the string argument; and a 1234 * value greater than {@code 0} if this string is 1235 * lexicographically greater than the string argument. 1236 */ 1237 public int compareTo(String anotherString) { 1238 int len1 = value.length; 1239 int len2 = anotherString.value.length; 1240 int lim = Math.min(len1, len2); 1241 char v1[] = value; 1242 char v2[] = anotherString.value; 1243 1244 int k = 0; 1245 while (k < lim) { 1246 char c1 = v1[k]; 1247 char c2 = v2[k]; 1248 if (c1 != c2) { 1249 return c1 - c2; 1250 } 1251 k++; 1252 } 1253 return len1 - len2; 1254 } 1255 1256 /** 1257 * A Comparator that orders {@code String} objects as by 1258 * {@code compareToIgnoreCase}. This comparator is serializable. 1259 * <p> 1260 * Note that this Comparator does <em>not</em> take locale into account, 1261 * and will result in an unsatisfactory ordering for certain locales. 1262 * The java.text package provides <em>Collators</em> to allow 1263 * locale-sensitive ordering. 1264 * 1265 * @see java.text.Collator#compare(String, String) 1266 * @since 1.2 1267 */ 1268 public static final Comparator<String> CASE_INSENSITIVE_ORDER 1269 = new CaseInsensitiveComparator(); 1270 private static class CaseInsensitiveComparator 1271 implements Comparator<String>, java.io.Serializable { 1272 // use serialVersionUID from JDK 1.2.2 for interoperability 1273 private static final long serialVersionUID = 8575799808933029326L; 1274 1275 public int compare(String s1, String s2) { 1276 int n1 = s1.length(); 1277 int n2 = s2.length(); 1278 int min = Math.min(n1, n2); 1279 for (int i = 0; i < min; i++) { 1280 char c1 = s1.charAt(i); 1281 char c2 = s2.charAt(i); 1282 if (c1 != c2) { 1283 c1 = Character.toUpperCase(c1); 1284 c2 = Character.toUpperCase(c2); 1285 if (c1 != c2) { 1286 c1 = Character.toLowerCase(c1); 1287 c2 = Character.toLowerCase(c2); 1288 if (c1 != c2) { 1289 // No overflow because of numeric promotion 1290 return c1 - c2; 1291 } 1292 } 1293 } 1294 } 1295 return n1 - n2; 1296 } 1297 1298 /** Replaces the de-serialized object. */ 1299 private Object readResolve() { return CASE_INSENSITIVE_ORDER; } 1300 } 1301 1302 /** 1303 * Compares two strings lexicographically, ignoring case 1304 * differences. This method returns an integer whose sign is that of 1305 * calling {@code compareTo} with normalized versions of the strings 1306 * where case differences have been eliminated by calling 1307 * {@code Character.toLowerCase(Character.toUpperCase(character))} on 1308 * each character. 1309 * <p> 1310 * Note that this method does <em>not</em> take locale into account, 1311 * and will result in an unsatisfactory ordering for certain locales. 1312 * The java.text package provides <em>collators</em> to allow 1313 * locale-sensitive ordering. 1314 * 1315 * @param str the {@code String} to be compared. 1316 * @return a negative integer, zero, or a positive integer as the 1317 * specified String is greater than, equal to, or less 1318 * than this String, ignoring case considerations. 1319 * @see java.text.Collator#compare(String, String) 1320 * @since 1.2 1321 */ 1322 public int compareToIgnoreCase(String str) { 1323 return CASE_INSENSITIVE_ORDER.compare(this, str); 1324 } 1325 1326 /** 1327 * Tests if two string regions are equal. 1328 * <p> 1329 * A substring of this {@code String} object is compared to a substring 1330 * of the argument other. The result is true if these substrings 1331 * represent identical character sequences. The substring of this 1332 * {@code String} object to be compared begins at index {@code toffset} 1333 * and has length {@code len}. The substring of other to be compared 1334 * begins at index {@code ooffset} and has length {@code len}. The 1335 * result is {@code false} if and only if at least one of the following 1336 * is true: 1337 * <ul><li>{@code toffset} is negative. 1338 * <li>{@code ooffset} is negative. 1339 * <li>{@code toffset+len} is greater than the length of this 1340 * {@code String} object. 1341 * <li>{@code ooffset+len} is greater than the length of the other 1342 * argument. 1343 * <li>There is some nonnegative integer <i>k</i> less than {@code len} 1344 * such that: 1345 * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + } 1346 * <i>k</i>{@code )} 1347 * </ul> 1348 * 1349 * @param toffset the starting offset of the subregion in this string. 1350 * @param other the string argument. 1351 * @param ooffset the starting offset of the subregion in the string 1352 * argument. 1353 * @param len the number of characters to compare. 1354 * @return {@code true} if the specified subregion of this string 1355 * exactly matches the specified subregion of the string argument; 1356 * {@code false} otherwise. 1357 */ 1358 public boolean regionMatches(int toffset, String other, int ooffset, 1359 int len) { 1360 char ta[] = value; 1361 int to = toffset; 1362 char pa[] = other.value; 1363 int po = ooffset; 1364 // Note: toffset, ooffset, or len might be near -1>>>1. 1365 if ((ooffset < 0) || (toffset < 0) 1366 || (toffset > (long)value.length - len) 1367 || (ooffset > (long)other.value.length - len)) { 1368 return false; 1369 } 1370 while (len-- > 0) { 1371 if (ta[to++] != pa[po++]) { 1372 return false; 1373 } 1374 } 1375 return true; 1376 } 1377 1378 /** 1379 * Tests if two string regions are equal. 1380 * <p> 1381 * A substring of this {@code String} object is compared to a substring 1382 * of the argument {@code other}. The result is {@code true} if these 1383 * substrings represent character sequences that are the same, ignoring 1384 * case if and only if {@code ignoreCase} is true. The substring of 1385 * this {@code String} object to be compared begins at index 1386 * {@code toffset} and has length {@code len}. The substring of 1387 * {@code other} to be compared begins at index {@code ooffset} and 1388 * has length {@code len}. The result is {@code false} if and only if 1389 * at least one of the following is true: 1390 * <ul><li>{@code toffset} is negative. 1391 * <li>{@code ooffset} is negative. 1392 * <li>{@code toffset+len} is greater than the length of this 1393 * {@code String} object. 1394 * <li>{@code ooffset+len} is greater than the length of the other 1395 * argument. 1396 * <li>{@code ignoreCase} is {@code false} and there is some nonnegative 1397 * integer <i>k</i> less than {@code len} such that: 1398 * <blockquote><pre> 1399 * this.charAt(toffset+k) != other.charAt(ooffset+k) 1400 * </pre></blockquote> 1401 * <li>{@code ignoreCase} is {@code true} and there is some nonnegative 1402 * integer <i>k</i> less than {@code len} such that: 1403 * <blockquote><pre> 1404 * Character.toLowerCase(this.charAt(toffset+k)) != 1405 Character.toLowerCase(other.charAt(ooffset+k)) 1406 * </pre></blockquote> 1407 * and: 1408 * <blockquote><pre> 1409 * Character.toUpperCase(this.charAt(toffset+k)) != 1410 * Character.toUpperCase(other.charAt(ooffset+k)) 1411 * </pre></blockquote> 1412 * </ul> 1413 * 1414 * @param ignoreCase if {@code true}, ignore case when comparing 1415 * characters. 1416 * @param toffset the starting offset of the subregion in this 1417 * string. 1418 * @param other the string argument. 1419 * @param ooffset the starting offset of the subregion in the string 1420 * argument. 1421 * @param len the number of characters to compare. 1422 * @return {@code true} if the specified subregion of this string 1423 * matches the specified subregion of the string argument; 1424 * {@code false} otherwise. Whether the matching is exact 1425 * or case insensitive depends on the {@code ignoreCase} 1426 * argument. 1427 */ 1428 public boolean regionMatches(boolean ignoreCase, int toffset, 1429 String other, int ooffset, int len) { 1430 char ta[] = value; 1431 int to = toffset; 1432 char pa[] = other.value; 1433 int po = ooffset; 1434 // Note: toffset, ooffset, or len might be near -1>>>1. 1435 if ((ooffset < 0) || (toffset < 0) 1436 || (toffset > (long)value.length - len) 1437 || (ooffset > (long)other.value.length - len)) { 1438 return false; 1439 } 1440 while (len-- > 0) { 1441 char c1 = ta[to++]; 1442 char c2 = pa[po++]; 1443 if (c1 == c2) { 1444 continue; 1445 } 1446 if (ignoreCase) { 1447 // If characters don't match but case may be ignored, 1448 // try converting both characters to uppercase. 1449 // If the results match, then the comparison scan should 1450 // continue. 1451 char u1 = Character.toUpperCase(c1); 1452 char u2 = Character.toUpperCase(c2); 1453 if (u1 == u2) { 1454 continue; 1455 } 1456 // Unfortunately, conversion to uppercase does not work properly 1457 // for the Georgian alphabet, which has strange rules about case 1458 // conversion. So we need to make one last check before 1459 // exiting. 1460 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 1461 continue; 1462 } 1463 } 1464 return false; 1465 } 1466 return true; 1467 } 1468 1469 /** 1470 * Tests if the substring of this string beginning at the 1471 * specified index starts with the specified prefix. 1472 * 1473 * @param prefix the prefix. 1474 * @param toffset where to begin looking in this string. 1475 * @return {@code true} if the character sequence represented by the 1476 * argument is a prefix of the substring of this object starting 1477 * at index {@code toffset}; {@code false} otherwise. 1478 * The result is {@code false} if {@code toffset} is 1479 * negative or greater than the length of this 1480 * {@code String} object; otherwise the result is the same 1481 * as the result of the expression 1482 * <pre> 1483 * this.substring(toffset).startsWith(prefix) 1484 * </pre> 1485 */ 1486 public boolean startsWith(String prefix, int toffset) { 1487 char ta[] = value; 1488 int to = toffset; 1489 char pa[] = prefix.value; 1490 int po = 0; 1491 int pc = prefix.value.length; 1492 // Note: toffset might be near -1>>>1. 1493 if ((toffset < 0) || (toffset > value.length - pc)) { 1494 return false; 1495 } 1496 while (--pc >= 0) { 1497 if (ta[to++] != pa[po++]) { 1498 return false; 1499 } 1500 } 1501 return true; 1502 } 1503 1504 /** 1505 * Tests if this string starts with the specified prefix. 1506 * 1507 * @param prefix the prefix. 1508 * @return {@code true} if the character sequence represented by the 1509 * argument is a prefix of the character sequence represented by 1510 * this string; {@code false} otherwise. 1511 * Note also that {@code true} will be returned if the 1512 * argument is an empty string or is equal to this 1513 * {@code String} object as determined by the 1514 * {@link #equals(Object)} method. 1515 * @since 1.0 1516 */ 1517 public boolean startsWith(String prefix) { 1518 return startsWith(prefix, 0); 1519 } 1520 1521 /** 1522 * Tests if this string ends with the specified suffix. 1523 * 1524 * @param suffix the suffix. 1525 * @return {@code true} if the character sequence represented by the 1526 * argument is a suffix of the character sequence represented by 1527 * this object; {@code false} otherwise. Note that the 1528 * result will be {@code true} if the argument is the 1529 * empty string or is equal to this {@code String} object 1530 * as determined by the {@link #equals(Object)} method. 1531 */ 1532 public boolean endsWith(String suffix) { 1533 return startsWith(suffix, value.length - suffix.value.length); 1534 } 1535 1536 /** 1537 * Returns a hash code for this string. The hash code for a 1538 * {@code String} object is computed as 1539 * <blockquote><pre> 1540 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] 1541 * </pre></blockquote> 1542 * using {@code int} arithmetic, where {@code s[i]} is the 1543 * <i>i</i>th character of the string, {@code n} is the length of 1544 * the string, and {@code ^} indicates exponentiation. 1545 * (The hash value of the empty string is zero.) 1546 * 1547 * @return a hash code value for this object. 1548 */ 1549 public int hashCode() { 1550 int h = hash; 1551 if (h == 0) { 1552 for (char v : value) { 1553 h = 31 * h + v; 1554 } 1555 hash = h; 1556 } 1557 return h; 1558 } 1559 1560 /** 1561 * Returns the index within this string of the first occurrence of 1562 * the specified character. If a character with value 1563 * {@code ch} occurs in the character sequence represented by 1564 * this {@code String} object, then the index (in Unicode 1565 * code units) of the first such occurrence is returned. For 1566 * values of {@code ch} in the range from 0 to 0xFFFF 1567 * (inclusive), this is the smallest value <i>k</i> such that: 1568 * <blockquote><pre> 1569 * this.charAt(<i>k</i>) == ch 1570 * </pre></blockquote> 1571 * is true. For other values of {@code ch}, it is the 1572 * smallest value <i>k</i> such that: 1573 * <blockquote><pre> 1574 * this.codePointAt(<i>k</i>) == ch 1575 * </pre></blockquote> 1576 * is true. In either case, if no such character occurs in this 1577 * string, then {@code -1} is returned. 1578 * 1579 * @param ch a character (Unicode code point). 1580 * @return the index of the first occurrence of the character in the 1581 * character sequence represented by this object, or 1582 * {@code -1} if the character does not occur. 1583 */ 1584 public int indexOf(int ch) { 1585 return indexOf(ch, 0); 1586 } 1587 1588 /** 1589 * Returns the index within this string of the first occurrence of the 1590 * specified character, starting the search at the specified index. 1591 * <p> 1592 * If a character with value {@code ch} occurs in the 1593 * character sequence represented by this {@code String} 1594 * object at an index no smaller than {@code fromIndex}, then 1595 * the index of the first such occurrence is returned. For values 1596 * of {@code ch} in the range from 0 to 0xFFFF (inclusive), 1597 * this is the smallest value <i>k</i> such that: 1598 * <blockquote><pre> 1599 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1600 * </pre></blockquote> 1601 * is true. For other values of {@code ch}, it is the 1602 * smallest value <i>k</i> such that: 1603 * <blockquote><pre> 1604 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1605 * </pre></blockquote> 1606 * is true. In either case, if no such character occurs in this 1607 * string at or after position {@code fromIndex}, then 1608 * {@code -1} is returned. 1609 * 1610 * <p> 1611 * There is no restriction on the value of {@code fromIndex}. If it 1612 * is negative, it has the same effect as if it were zero: this entire 1613 * string may be searched. If it is greater than the length of this 1614 * string, it has the same effect as if it were equal to the length of 1615 * this string: {@code -1} is returned. 1616 * 1617 * <p>All indices are specified in {@code char} values 1618 * (Unicode code units). 1619 * 1620 * @param ch a character (Unicode code point). 1621 * @param fromIndex the index to start the search from. 1622 * @return the index of the first occurrence of the character in the 1623 * character sequence represented by this object that is greater 1624 * than or equal to {@code fromIndex}, or {@code -1} 1625 * if the character does not occur. 1626 */ 1627 public int indexOf(int ch, int fromIndex) { 1628 final int max = value.length; 1629 if (fromIndex < 0) { 1630 fromIndex = 0; 1631 } else if (fromIndex >= max) { 1632 // Note: fromIndex might be near -1>>>1. 1633 return -1; 1634 } 1635 1636 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 1637 // handle most cases here (ch is a BMP code point or a 1638 // negative value (invalid code point)) 1639 final char[] value = this.value; 1640 for (int i = fromIndex; i < max; i++) { 1641 if (value[i] == ch) { 1642 return i; 1643 } 1644 } 1645 return -1; 1646 } else { 1647 return indexOfSupplementary(ch, fromIndex); 1648 } 1649 } 1650 1651 /** 1652 * Handles (rare) calls of indexOf with a supplementary character. 1653 */ 1654 private int indexOfSupplementary(int ch, int fromIndex) { 1655 if (Character.isValidCodePoint(ch)) { 1656 final char[] value = this.value; 1657 final char hi = Character.highSurrogate(ch); 1658 final char lo = Character.lowSurrogate(ch); 1659 final int max = value.length - 1; 1660 for (int i = fromIndex; i < max; i++) { 1661 if (value[i] == hi && value[i + 1] == lo) { 1662 return i; 1663 } 1664 } 1665 } 1666 return -1; 1667 } 1668 1669 /** 1670 * Returns the index within this string of the last occurrence of 1671 * the specified character. For values of {@code ch} in the 1672 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code 1673 * units) returned is the largest value <i>k</i> such that: 1674 * <blockquote><pre> 1675 * this.charAt(<i>k</i>) == ch 1676 * </pre></blockquote> 1677 * is true. For other values of {@code ch}, it is the 1678 * largest value <i>k</i> such that: 1679 * <blockquote><pre> 1680 * this.codePointAt(<i>k</i>) == ch 1681 * </pre></blockquote> 1682 * is true. In either case, if no such character occurs in this 1683 * string, then {@code -1} is returned. The 1684 * {@code String} is searched backwards starting at the last 1685 * character. 1686 * 1687 * @param ch a character (Unicode code point). 1688 * @return the index of the last occurrence of the character in the 1689 * character sequence represented by this object, or 1690 * {@code -1} if the character does not occur. 1691 */ 1692 public int lastIndexOf(int ch) { 1693 return lastIndexOf(ch, value.length - 1); 1694 } 1695 1696 /** 1697 * Returns the index within this string of the last occurrence of 1698 * the specified character, searching backward starting at the 1699 * specified index. For values of {@code ch} in the range 1700 * from 0 to 0xFFFF (inclusive), the index returned is the largest 1701 * value <i>k</i> such that: 1702 * <blockquote><pre> 1703 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1704 * </pre></blockquote> 1705 * is true. For other values of {@code ch}, it is the 1706 * largest value <i>k</i> such that: 1707 * <blockquote><pre> 1708 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1709 * </pre></blockquote> 1710 * is true. In either case, if no such character occurs in this 1711 * string at or before position {@code fromIndex}, then 1712 * {@code -1} is returned. 1713 * 1714 * <p>All indices are specified in {@code char} values 1715 * (Unicode code units). 1716 * 1717 * @param ch a character (Unicode code point). 1718 * @param fromIndex the index to start the search from. There is no 1719 * restriction on the value of {@code fromIndex}. If it is 1720 * greater than or equal to the length of this string, it has 1721 * the same effect as if it were equal to one less than the 1722 * length of this string: this entire string may be searched. 1723 * If it is negative, it has the same effect as if it were -1: 1724 * -1 is returned. 1725 * @return the index of the last occurrence of the character in the 1726 * character sequence represented by this object that is less 1727 * than or equal to {@code fromIndex}, or {@code -1} 1728 * if the character does not occur before that point. 1729 */ 1730 public int lastIndexOf(int ch, int fromIndex) { 1731 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 1732 // handle most cases here (ch is a BMP code point or a 1733 // negative value (invalid code point)) 1734 final char[] value = this.value; 1735 int i = Math.min(fromIndex, value.length - 1); 1736 for (; i >= 0; i--) { 1737 if (value[i] == ch) { 1738 return i; 1739 } 1740 } 1741 return -1; 1742 } else { 1743 return lastIndexOfSupplementary(ch, fromIndex); 1744 } 1745 } 1746 1747 /** 1748 * Handles (rare) calls of lastIndexOf with a supplementary character. 1749 */ 1750 private int lastIndexOfSupplementary(int ch, int fromIndex) { 1751 if (Character.isValidCodePoint(ch)) { 1752 final char[] value = this.value; 1753 char hi = Character.highSurrogate(ch); 1754 char lo = Character.lowSurrogate(ch); 1755 int i = Math.min(fromIndex, value.length - 2); 1756 for (; i >= 0; i--) { 1757 if (value[i] == hi && value[i + 1] == lo) { 1758 return i; 1759 } 1760 } 1761 } 1762 return -1; 1763 } 1764 1765 /** 1766 * Returns the index within this string of the first occurrence of the 1767 * specified substring. 1768 * 1769 * <p>The returned index is the smallest value {@code k} for which: 1770 * <pre>{@code 1771 * this.startsWith(str, k) 1772 * }</pre> 1773 * If no such value of {@code k} exists, then {@code -1} is returned. 1774 * 1775 * @param str the substring to search for. 1776 * @return the index of the first occurrence of the specified substring, 1777 * or {@code -1} if there is no such occurrence. 1778 */ 1779 public int indexOf(String str) { 1780 return indexOf(str, 0); 1781 } 1782 1783 /** 1784 * Returns the index within this string of the first occurrence of the 1785 * specified substring, starting at the specified index. 1786 * 1787 * <p>The returned index is the smallest value {@code k} for which: 1788 * <pre>{@code 1789 * k >= Math.min(fromIndex, this.length()) && 1790 * this.startsWith(str, k) 1791 * }</pre> 1792 * If no such value of {@code k} exists, then {@code -1} is returned. 1793 * 1794 * @param str the substring to search for. 1795 * @param fromIndex the index from which to start the search. 1796 * @return the index of the first occurrence of the specified substring, 1797 * starting at the specified index, 1798 * or {@code -1} if there is no such occurrence. 1799 */ 1800 public int indexOf(String str, int fromIndex) { 1801 return indexOf(value, 0, value.length, 1802 str.value, 0, str.value.length, fromIndex); 1803 } 1804 1805 /** 1806 * Code shared by String and AbstractStringBuilder to do searches. The 1807 * source is the character array being searched, and the target 1808 * is the string being searched for. 1809 * 1810 * @param source the characters being searched. 1811 * @param sourceOffset offset of the source string. 1812 * @param sourceCount count of the source string. 1813 * @param target the characters being searched for. 1814 * @param fromIndex the index to begin searching from. 1815 */ 1816 static int indexOf(char[] source, int sourceOffset, int sourceCount, 1817 String target, int fromIndex) { 1818 return indexOf(source, sourceOffset, sourceCount, 1819 target.value, 0, target.value.length, 1820 fromIndex); 1821 } 1822 1823 /** 1824 * Code shared by String and StringBuffer to do searches. The 1825 * source is the character array being searched, and the target 1826 * is the string being searched for. 1827 * 1828 * @param source the characters being searched. 1829 * @param sourceOffset offset of the source string. 1830 * @param sourceCount count of the source string. 1831 * @param target the characters being searched for. 1832 * @param targetOffset offset of the target string. 1833 * @param targetCount count of the target string. 1834 * @param fromIndex the index to begin searching from. 1835 */ 1836 static int indexOf(char[] source, int sourceOffset, int sourceCount, 1837 char[] target, int targetOffset, int targetCount, 1838 int fromIndex) { 1839 if (fromIndex >= sourceCount) { 1840 return (targetCount == 0 ? sourceCount : -1); 1841 } 1842 if (fromIndex < 0) { 1843 fromIndex = 0; 1844 } 1845 if (targetCount == 0) { 1846 return fromIndex; 1847 } 1848 1849 char first = target[targetOffset]; 1850 int max = sourceOffset + (sourceCount - targetCount); 1851 1852 for (int i = sourceOffset + fromIndex; i <= max; i++) { 1853 /* Look for first character. */ 1854 if (source[i] != first) { 1855 while (++i <= max && source[i] != first); 1856 } 1857 1858 /* Found first character, now look at the rest of v2 */ 1859 if (i <= max) { 1860 int j = i + 1; 1861 int end = j + targetCount - 1; 1862 for (int k = targetOffset + 1; j < end && source[j] 1863 == target[k]; j++, k++); 1864 1865 if (j == end) { 1866 /* Found whole string. */ 1867 return i - sourceOffset; 1868 } 1869 } 1870 } 1871 return -1; 1872 } 1873 1874 /** 1875 * Returns the index within this string of the last occurrence of the 1876 * specified substring. The last occurrence of the empty string "" 1877 * is considered to occur at the index value {@code this.length()}. 1878 * 1879 * <p>The returned index is the largest value {@code k} for which: 1880 * <pre>{@code 1881 * this.startsWith(str, k) 1882 * }</pre> 1883 * If no such value of {@code k} exists, then {@code -1} is returned. 1884 * 1885 * @param str the substring to search for. 1886 * @return the index of the last occurrence of the specified substring, 1887 * or {@code -1} if there is no such occurrence. 1888 */ 1889 public int lastIndexOf(String str) { 1890 return lastIndexOf(str, value.length); 1891 } 1892 1893 /** 1894 * Returns the index within this string of the last occurrence of the 1895 * specified substring, searching backward starting at the specified index. 1896 * 1897 * <p>The returned index is the largest value {@code k} for which: 1898 * <pre>{@code 1899 * k <= Math.min(fromIndex, this.length()) && 1900 * this.startsWith(str, k) 1901 * }</pre> 1902 * If no such value of {@code k} exists, then {@code -1} is returned. 1903 * 1904 * @param str the substring to search for. 1905 * @param fromIndex the index to start the search from. 1906 * @return the index of the last occurrence of the specified substring, 1907 * searching backward from the specified index, 1908 * or {@code -1} if there is no such occurrence. 1909 */ 1910 public int lastIndexOf(String str, int fromIndex) { 1911 return lastIndexOf(value, 0, value.length, 1912 str.value, 0, str.value.length, fromIndex); 1913 } 1914 1915 /** 1916 * Code shared by String and AbstractStringBuilder to do searches. The 1917 * source is the character array being searched, and the target 1918 * is the string being searched for. 1919 * 1920 * @param source the characters being searched. 1921 * @param sourceOffset offset of the source string. 1922 * @param sourceCount count of the source string. 1923 * @param target the characters being searched for. 1924 * @param fromIndex the index to begin searching from. 1925 */ 1926 static int lastIndexOf(char[] source, int sourceOffset, int sourceCount, 1927 String target, int fromIndex) { 1928 return lastIndexOf(source, sourceOffset, sourceCount, 1929 target.value, 0, target.value.length, 1930 fromIndex); 1931 } 1932 1933 /** 1934 * Code shared by String and StringBuffer to do searches. The 1935 * source is the character array being searched, and the target 1936 * is the string being searched for. 1937 * 1938 * @param source the characters being searched. 1939 * @param sourceOffset offset of the source string. 1940 * @param sourceCount count of the source string. 1941 * @param target the characters being searched for. 1942 * @param targetOffset offset of the target string. 1943 * @param targetCount count of the target string. 1944 * @param fromIndex the index to begin searching from. 1945 */ 1946 static int lastIndexOf(char[] source, int sourceOffset, int sourceCount, 1947 char[] target, int targetOffset, int targetCount, 1948 int fromIndex) { 1949 /* 1950 * Check arguments; return immediately where possible. For 1951 * consistency, don't check for null str. 1952 */ 1953 int rightIndex = sourceCount - targetCount; 1954 if (fromIndex < 0) { 1955 return -1; 1956 } 1957 if (fromIndex > rightIndex) { 1958 fromIndex = rightIndex; 1959 } 1960 /* Empty string always matches. */ 1961 if (targetCount == 0) { 1962 return fromIndex; 1963 } 1964 1965 int strLastIndex = targetOffset + targetCount - 1; 1966 char strLastChar = target[strLastIndex]; 1967 int min = sourceOffset + targetCount - 1; 1968 int i = min + fromIndex; 1969 1970 startSearchForLastChar: 1971 while (true) { 1972 while (i >= min && source[i] != strLastChar) { 1973 i--; 1974 } 1975 if (i < min) { 1976 return -1; 1977 } 1978 int j = i - 1; 1979 int start = j - (targetCount - 1); 1980 int k = strLastIndex - 1; 1981 1982 while (j > start) { 1983 if (source[j--] != target[k--]) { 1984 i--; 1985 continue startSearchForLastChar; 1986 } 1987 } 1988 return start - sourceOffset + 1; 1989 } 1990 } 1991 1992 /** 1993 * Returns a string that is a substring of this string. The 1994 * substring begins with the character at the specified index and 1995 * extends to the end of this string. <p> 1996 * Examples: 1997 * <blockquote><pre> 1998 * "unhappy".substring(2) returns "happy" 1999 * "Harbison".substring(3) returns "bison" 2000 * "emptiness".substring(9) returns "" (an empty string) 2001 * </pre></blockquote> 2002 * 2003 * @param beginIndex the beginning index, inclusive. 2004 * @return the specified substring. 2005 * @exception IndexOutOfBoundsException if 2006 * {@code beginIndex} is negative or larger than the 2007 * length of this {@code String} object. 2008 */ 2009 public String substring(int beginIndex) { 2010 if (beginIndex < 0) { 2011 throw new StringIndexOutOfBoundsException(beginIndex); 2012 } 2013 int subLen = value.length - beginIndex; 2014 if (subLen < 0) { 2015 throw new StringIndexOutOfBoundsException(subLen); 2016 } 2017 return (beginIndex == 0) ? this : new String(value, beginIndex, subLen); 2018 } 2019 2020 /** 2021 * Returns a string that is a substring of this string. The 2022 * substring begins at the specified {@code beginIndex} and 2023 * extends to the character at index {@code endIndex - 1}. 2024 * Thus the length of the substring is {@code endIndex-beginIndex}. 2025 * <p> 2026 * Examples: 2027 * <blockquote><pre> 2028 * "hamburger".substring(4, 8) returns "urge" 2029 * "smiles".substring(1, 5) returns "mile" 2030 * </pre></blockquote> 2031 * 2032 * @param beginIndex the beginning index, inclusive. 2033 * @param endIndex the ending index, exclusive. 2034 * @return the specified substring. 2035 * @exception IndexOutOfBoundsException if the 2036 * {@code beginIndex} is negative, or 2037 * {@code endIndex} is larger than the length of 2038 * this {@code String} object, or 2039 * {@code beginIndex} is larger than 2040 * {@code endIndex}. 2041 */ 2042 public String substring(int beginIndex, int endIndex) { 2043 if (beginIndex < 0) { 2044 throw new StringIndexOutOfBoundsException(beginIndex); 2045 } 2046 if (endIndex > value.length) { 2047 throw new StringIndexOutOfBoundsException(endIndex); 2048 } 2049 int subLen = endIndex - beginIndex; 2050 if (subLen < 0) { 2051 throw new StringIndexOutOfBoundsException(subLen); 2052 } 2053 return ((beginIndex == 0) && (endIndex == value.length)) ? this 2054 : new String(value, beginIndex, subLen); 2055 } 2056 2057 /** 2058 * Returns a character sequence that is a subsequence of this sequence. 2059 * 2060 * <p> An invocation of this method of the form 2061 * 2062 * <blockquote><pre> 2063 * str.subSequence(begin, end)</pre></blockquote> 2064 * 2065 * behaves in exactly the same way as the invocation 2066 * 2067 * <blockquote><pre> 2068 * str.substring(begin, end)</pre></blockquote> 2069 * 2070 * @apiNote 2071 * This method is defined so that the {@code String} class can implement 2072 * the {@link CharSequence} interface. 2073 * 2074 * @param beginIndex the begin index, inclusive. 2075 * @param endIndex the end index, exclusive. 2076 * @return the specified subsequence. 2077 * 2078 * @throws IndexOutOfBoundsException 2079 * if {@code beginIndex} or {@code endIndex} is negative, 2080 * if {@code endIndex} is greater than {@code length()}, 2081 * or if {@code beginIndex} is greater than {@code endIndex} 2082 * 2083 * @since 1.4 2084 * @spec JSR-51 2085 */ 2086 public CharSequence subSequence(int beginIndex, int endIndex) { 2087 return this.substring(beginIndex, endIndex); 2088 } 2089 2090 /** 2091 * Concatenates the specified string to the end of this string. 2092 * <p> 2093 * If the length of the argument string is {@code 0}, then this 2094 * {@code String} object is returned. Otherwise, a 2095 * {@code String} object is returned that represents a character 2096 * sequence that is the concatenation of the character sequence 2097 * represented by this {@code String} object and the character 2098 * sequence represented by the argument string.<p> 2099 * Examples: 2100 * <blockquote><pre> 2101 * "cares".concat("s") returns "caress" 2102 * "to".concat("get").concat("her") returns "together" 2103 * </pre></blockquote> 2104 * 2105 * @param str the {@code String} that is concatenated to the end 2106 * of this {@code String}. 2107 * @return a string that represents the concatenation of this object's 2108 * characters followed by the string argument's characters. 2109 */ 2110 public String concat(String str) { 2111 int otherLen = str.length(); 2112 if (otherLen == 0) { 2113 return this; 2114 } 2115 int len = value.length; 2116 char buf[] = Arrays.copyOf(value, len + otherLen); 2117 str.getChars(buf, len); 2118 return new String(buf, true); 2119 } 2120 2121 /** 2122 * Returns a string resulting from replacing all occurrences of 2123 * {@code oldChar} in this string with {@code newChar}. 2124 * <p> 2125 * If the character {@code oldChar} does not occur in the 2126 * character sequence represented by this {@code String} object, 2127 * then a reference to this {@code String} object is returned. 2128 * Otherwise, a {@code String} object is returned that 2129 * represents a character sequence identical to the character sequence 2130 * represented by this {@code String} object, except that every 2131 * occurrence of {@code oldChar} is replaced by an occurrence 2132 * of {@code newChar}. 2133 * <p> 2134 * Examples: 2135 * <blockquote><pre> 2136 * "mesquite in your cellar".replace('e', 'o') 2137 * returns "mosquito in your collar" 2138 * "the war of baronets".replace('r', 'y') 2139 * returns "the way of bayonets" 2140 * "sparring with a purple porpoise".replace('p', 't') 2141 * returns "starring with a turtle tortoise" 2142 * "JonL".replace('q', 'x') returns "JonL" (no change) 2143 * </pre></blockquote> 2144 * 2145 * @param oldChar the old character. 2146 * @param newChar the new character. 2147 * @return a string derived from this string by replacing every 2148 * occurrence of {@code oldChar} with {@code newChar}. 2149 */ 2150 public String replace(char oldChar, char newChar) { 2151 if (oldChar != newChar) { 2152 int len = value.length; 2153 int i = -1; 2154 char[] val = value; /* avoid getfield opcode */ 2155 2156 while (++i < len) { 2157 if (val[i] == oldChar) { 2158 break; 2159 } 2160 } 2161 if (i < len) { 2162 char buf[] = new char[len]; 2163 for (int j = 0; j < i; j++) { 2164 buf[j] = val[j]; 2165 } 2166 while (i < len) { 2167 char c = val[i]; 2168 buf[i] = (c == oldChar) ? newChar : c; 2169 i++; 2170 } 2171 return new String(buf, true); 2172 } 2173 } 2174 return this; 2175 } 2176 2177 /** 2178 * Tells whether or not this string matches the given <a 2179 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2180 * 2181 * <p> An invocation of this method of the form 2182 * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the 2183 * same result as the expression 2184 * 2185 * <blockquote> 2186 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence) 2187 * matches(<i>regex</i>, <i>str</i>)} 2188 * </blockquote> 2189 * 2190 * @param regex 2191 * the regular expression to which this string is to be matched 2192 * 2193 * @return {@code true} if, and only if, this string matches the 2194 * given regular expression 2195 * 2196 * @throws PatternSyntaxException 2197 * if the regular expression's syntax is invalid 2198 * 2199 * @see java.util.regex.Pattern 2200 * 2201 * @since 1.4 2202 * @spec JSR-51 2203 */ 2204 public boolean matches(String regex) { 2205 return Pattern.matches(regex, this); 2206 } 2207 2208 /** 2209 * Returns true if and only if this string contains the specified 2210 * sequence of char values. 2211 * 2212 * @param s the sequence to search for 2213 * @return true if this string contains {@code s}, false otherwise 2214 * @since 1.5 2215 */ 2216 public boolean contains(CharSequence s) { 2217 return indexOf(s.toString()) >= 0; 2218 } 2219 2220 /** 2221 * Replaces the first substring of this string that matches the given <a 2222 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2223 * given replacement. 2224 * 2225 * <p> An invocation of this method of the form 2226 * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2227 * yields exactly the same result as the expression 2228 * 2229 * <blockquote> 2230 * <code> 2231 * {@link java.util.regex.Pattern}.{@link 2232 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2233 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2234 * java.util.regex.Matcher#replaceFirst replaceFirst}(<i>repl</i>) 2235 * </code> 2236 * </blockquote> 2237 * 2238 *<p> 2239 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2240 * replacement string may cause the results to be different than if it were 2241 * being treated as a literal replacement string; see 2242 * {@link java.util.regex.Matcher#replaceFirst}. 2243 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2244 * meaning of these characters, if desired. 2245 * 2246 * @param regex 2247 * the regular expression to which this string is to be matched 2248 * @param replacement 2249 * the string to be substituted for the first match 2250 * 2251 * @return The resulting {@code String} 2252 * 2253 * @throws PatternSyntaxException 2254 * if the regular expression's syntax is invalid 2255 * 2256 * @see java.util.regex.Pattern 2257 * 2258 * @since 1.4 2259 * @spec JSR-51 2260 */ 2261 public String replaceFirst(String regex, String replacement) { 2262 return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 2263 } 2264 2265 /** 2266 * Replaces each substring of this string that matches the given <a 2267 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2268 * given replacement. 2269 * 2270 * <p> An invocation of this method of the form 2271 * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2272 * yields exactly the same result as the expression 2273 * 2274 * <blockquote> 2275 * <code> 2276 * {@link java.util.regex.Pattern}.{@link 2277 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2278 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2279 * java.util.regex.Matcher#replaceAll replaceAll}(<i>repl</i>) 2280 * </code> 2281 * </blockquote> 2282 * 2283 *<p> 2284 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2285 * replacement string may cause the results to be different than if it were 2286 * being treated as a literal replacement string; see 2287 * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}. 2288 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2289 * meaning of these characters, if desired. 2290 * 2291 * @param regex 2292 * the regular expression to which this string is to be matched 2293 * @param replacement 2294 * the string to be substituted for each match 2295 * 2296 * @return The resulting {@code String} 2297 * 2298 * @throws PatternSyntaxException 2299 * if the regular expression's syntax is invalid 2300 * 2301 * @see java.util.regex.Pattern 2302 * 2303 * @since 1.4 2304 * @spec JSR-51 2305 */ 2306 public String replaceAll(String regex, String replacement) { 2307 return Pattern.compile(regex).matcher(this).replaceAll(replacement); 2308 } 2309 2310 /** 2311 * Replaces each substring of this string that matches the literal target 2312 * sequence with the specified literal replacement sequence. The 2313 * replacement proceeds from the beginning of the string to the end, for 2314 * example, replacing "aa" with "b" in the string "aaa" will result in 2315 * "ba" rather than "ab". 2316 * 2317 * @param target The sequence of char values to be replaced 2318 * @param replacement The replacement sequence of char values 2319 * @return The resulting string 2320 * @since 1.5 2321 */ 2322 public String replace(CharSequence target, CharSequence replacement) { 2323 return Pattern.compile(target.toString(), Pattern.LITERAL).matcher( 2324 this).replaceAll(Matcher.quoteReplacement(replacement.toString())); 2325 } 2326 2327 /** 2328 * Splits this string around matches of the given 2329 * <a href="../util/regex/Pattern.html#sum">regular expression</a>. 2330 * 2331 * <p> The array returned by this method contains each substring of this 2332 * string that is terminated by another substring that matches the given 2333 * expression or is terminated by the end of the string. The substrings in 2334 * the array are in the order in which they occur in this string. If the 2335 * expression does not match any part of the input then the resulting array 2336 * has just one element, namely this string. 2337 * 2338 * <p> When there is a positive-width match at the beginning of this 2339 * string then an empty leading substring is included at the beginning 2340 * of the resulting array. A zero-width match at the beginning however 2341 * never produces such empty leading substring. 2342 * 2343 * <p> The {@code limit} parameter controls the number of times the 2344 * pattern is applied and therefore affects the length of the resulting 2345 * array. If the limit <i>n</i> is greater than zero then the pattern 2346 * will be applied at most <i>n</i> - 1 times, the array's 2347 * length will be no greater than <i>n</i>, and the array's last entry 2348 * will contain all input beyond the last matched delimiter. If <i>n</i> 2349 * is non-positive then the pattern will be applied as many times as 2350 * possible and the array can have any length. If <i>n</i> is zero then 2351 * the pattern will be applied as many times as possible, the array can 2352 * have any length, and trailing empty strings will be discarded. 2353 * 2354 * <p> The string {@code "boo:and:foo"}, for example, yields the 2355 * following results with these parameters: 2356 * 2357 * <blockquote><table cellpadding=1 cellspacing=0 summary="Split example showing regex, limit, and result"> 2358 * <tr> 2359 * <th>Regex</th> 2360 * <th>Limit</th> 2361 * <th>Result</th> 2362 * </tr> 2363 * <tr><td align=center>:</td> 2364 * <td align=center>2</td> 2365 * <td>{@code { "boo", "and:foo" }}</td></tr> 2366 * <tr><td align=center>:</td> 2367 * <td align=center>5</td> 2368 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2369 * <tr><td align=center>:</td> 2370 * <td align=center>-2</td> 2371 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2372 * <tr><td align=center>o</td> 2373 * <td align=center>5</td> 2374 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2375 * <tr><td align=center>o</td> 2376 * <td align=center>-2</td> 2377 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2378 * <tr><td align=center>o</td> 2379 * <td align=center>0</td> 2380 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2381 * </table></blockquote> 2382 * 2383 * <p> An invocation of this method of the form 2384 * <i>str.</i>{@code split(}<i>regex</i>{@code ,} <i>n</i>{@code )} 2385 * yields the same result as the expression 2386 * 2387 * <blockquote> 2388 * <code> 2389 * {@link java.util.regex.Pattern}.{@link 2390 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2391 * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>, <i>n</i>) 2392 * </code> 2393 * </blockquote> 2394 * 2395 * 2396 * @param regex 2397 * the delimiting regular expression 2398 * 2399 * @param limit 2400 * the result threshold, as described above 2401 * 2402 * @return the array of strings computed by splitting this string 2403 * around matches of the given regular expression 2404 * 2405 * @throws PatternSyntaxException 2406 * if the regular expression's syntax is invalid 2407 * 2408 * @see java.util.regex.Pattern 2409 * 2410 * @since 1.4 2411 * @spec JSR-51 2412 */ 2413 public String[] split(String regex, int limit) { 2414 /* fastpath if the regex is a 2415 (1)one-char String and this character is not one of the 2416 RegEx's meta characters ".$|()[{^?*+\\", or 2417 (2)two-char String and the first char is the backslash and 2418 the second is not the ascii digit or ascii letter. 2419 */ 2420 char ch = 0; 2421 if (((regex.value.length == 1 && 2422 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || 2423 (regex.length() == 2 && 2424 regex.charAt(0) == '\\' && 2425 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && 2426 ((ch-'a')|('z'-ch)) < 0 && 2427 ((ch-'A')|('Z'-ch)) < 0)) && 2428 (ch < Character.MIN_HIGH_SURROGATE || 2429 ch > Character.MAX_LOW_SURROGATE)) 2430 { 2431 int off = 0; 2432 int next = 0; 2433 boolean limited = limit > 0; 2434 ArrayList<String> list = new ArrayList<>(); 2435 while ((next = indexOf(ch, off)) != -1) { 2436 if (!limited || list.size() < limit - 1) { 2437 list.add(substring(off, next)); 2438 off = next + 1; 2439 } else { // last one 2440 //assert (list.size() == limit - 1); 2441 list.add(substring(off, value.length)); 2442 off = value.length; 2443 break; 2444 } 2445 } 2446 // If no match was found, return this 2447 if (off == 0) 2448 return new String[]{this}; 2449 2450 // Add remaining segment 2451 if (!limited || list.size() < limit) 2452 list.add(substring(off, value.length)); 2453 2454 // Construct result 2455 int resultSize = list.size(); 2456 if (limit == 0) { 2457 while (resultSize > 0 && list.get(resultSize - 1).length() == 0) { 2458 resultSize--; 2459 } 2460 } 2461 String[] result = new String[resultSize]; 2462 return list.subList(0, resultSize).toArray(result); 2463 } 2464 return Pattern.compile(regex).split(this, limit); 2465 } 2466 2467 /** 2468 * Splits this string around matches of the given <a 2469 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2470 * 2471 * <p> This method works as if by invoking the two-argument {@link 2472 * #split(String, int) split} method with the given expression and a limit 2473 * argument of zero. Trailing empty strings are therefore not included in 2474 * the resulting array. 2475 * 2476 * <p> The string {@code "boo:and:foo"}, for example, yields the following 2477 * results with these expressions: 2478 * 2479 * <blockquote><table cellpadding=1 cellspacing=0 summary="Split examples showing regex and result"> 2480 * <tr> 2481 * <th>Regex</th> 2482 * <th>Result</th> 2483 * </tr> 2484 * <tr><td align=center>:</td> 2485 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2486 * <tr><td align=center>o</td> 2487 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2488 * </table></blockquote> 2489 * 2490 * 2491 * @param regex 2492 * the delimiting regular expression 2493 * 2494 * @return the array of strings computed by splitting this string 2495 * around matches of the given regular expression 2496 * 2497 * @throws PatternSyntaxException 2498 * if the regular expression's syntax is invalid 2499 * 2500 * @see java.util.regex.Pattern 2501 * 2502 * @since 1.4 2503 * @spec JSR-51 2504 */ 2505 public String[] split(String regex) { 2506 return split(regex, 0); 2507 } 2508 2509 /** 2510 * Returns a new String composed of copies of the 2511 * {@code CharSequence elements} joined together with a copy of 2512 * the specified {@code delimiter}. 2513 * 2514 * <blockquote>For example, 2515 * <pre>{@code 2516 * String message = String.join("-", "Java", "is", "cool"); 2517 * // message returned is: "Java-is-cool" 2518 * }</pre></blockquote> 2519 * 2520 * Note that if an element is null, then {@code "null"} is added. 2521 * 2522 * @param delimiter the delimiter that separates each element 2523 * @param elements the elements to join together. 2524 * 2525 * @return a new {@code String} that is composed of the {@code elements} 2526 * separated by the {@code delimiter} 2527 * 2528 * @throws NullPointerException If {@code delimiter} or {@code elements} 2529 * is {@code null} 2530 * 2531 * @see java.util.StringJoiner 2532 * @since 1.8 2533 */ 2534 public static String join(CharSequence delimiter, CharSequence... elements) { 2535 Objects.requireNonNull(delimiter); 2536 Objects.requireNonNull(elements); 2537 // Number of elements not likely worth Arrays.stream overhead. 2538 StringJoiner joiner = new StringJoiner(delimiter); 2539 for (CharSequence cs: elements) { 2540 joiner.add(cs); 2541 } 2542 return joiner.toString(); 2543 } 2544 2545 /** 2546 * Returns a new {@code String} composed of copies of the 2547 * {@code CharSequence elements} joined together with a copy of the 2548 * specified {@code delimiter}. 2549 * 2550 * <blockquote>For example, 2551 * <pre>{@code 2552 * List<String> strings = new LinkedList<>(); 2553 * strings.add("Java");strings.add("is"); 2554 * strings.add("cool"); 2555 * String message = String.join(" ", strings); 2556 * //message returned is: "Java is cool" 2557 * 2558 * Set<String> strings = new LinkedHashSet<>(); 2559 * strings.add("Java"); strings.add("is"); 2560 * strings.add("very"); strings.add("cool"); 2561 * String message = String.join("-", strings); 2562 * //message returned is: "Java-is-very-cool" 2563 * }</pre></blockquote> 2564 * 2565 * Note that if an individual element is {@code null}, then {@code "null"} is added. 2566 * 2567 * @param delimiter a sequence of characters that is used to separate each 2568 * of the {@code elements} in the resulting {@code String} 2569 * @param elements an {@code Iterable} that will have its {@code elements} 2570 * joined together. 2571 * 2572 * @return a new {@code String} that is composed from the {@code elements} 2573 * argument 2574 * 2575 * @throws NullPointerException If {@code delimiter} or {@code elements} 2576 * is {@code null} 2577 * 2578 * @see #join(CharSequence,CharSequence...) 2579 * @see java.util.StringJoiner 2580 * @since 1.8 2581 */ 2582 public static String join(CharSequence delimiter, 2583 Iterable<? extends CharSequence> elements) { 2584 Objects.requireNonNull(delimiter); 2585 Objects.requireNonNull(elements); 2586 StringJoiner joiner = new StringJoiner(delimiter); 2587 for (CharSequence cs: elements) { 2588 joiner.add(cs); 2589 } 2590 return joiner.toString(); 2591 } 2592 2593 /** 2594 * Converts all of the characters in this {@code String} to lower 2595 * case using the rules of the given {@code Locale}. Case mapping is based 2596 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2597 * class. Since case mappings are not always 1:1 char mappings, the resulting 2598 * {@code String} may be a different length than the original {@code String}. 2599 * <p> 2600 * Examples of lowercase mappings are in the following table: 2601 * <table border="1" summary="Lowercase mapping examples showing language code of locale, upper case, lower case, and description"> 2602 * <tr> 2603 * <th>Language Code of Locale</th> 2604 * <th>Upper Case</th> 2605 * <th>Lower Case</th> 2606 * <th>Description</th> 2607 * </tr> 2608 * <tr> 2609 * <td>tr (Turkish)</td> 2610 * <td>\u0130</td> 2611 * <td>\u0069</td> 2612 * <td>capital letter I with dot above -> small letter i</td> 2613 * </tr> 2614 * <tr> 2615 * <td>tr (Turkish)</td> 2616 * <td>\u0049</td> 2617 * <td>\u0131</td> 2618 * <td>capital letter I -> small letter dotless i </td> 2619 * </tr> 2620 * <tr> 2621 * <td>(all)</td> 2622 * <td>French Fries</td> 2623 * <td>french fries</td> 2624 * <td>lowercased all chars in String</td> 2625 * </tr> 2626 * <tr> 2627 * <td>(all)</td> 2628 * <td><img src="doc-files/capiota.gif" alt="capiota"><img src="doc-files/capchi.gif" alt="capchi"> 2629 * <img src="doc-files/captheta.gif" alt="captheta"><img src="doc-files/capupsil.gif" alt="capupsil"> 2630 * <img src="doc-files/capsigma.gif" alt="capsigma"></td> 2631 * <td><img src="doc-files/iota.gif" alt="iota"><img src="doc-files/chi.gif" alt="chi"> 2632 * <img src="doc-files/theta.gif" alt="theta"><img src="doc-files/upsilon.gif" alt="upsilon"> 2633 * <img src="doc-files/sigma1.gif" alt="sigma"></td> 2634 * <td>lowercased all chars in String</td> 2635 * </tr> 2636 * </table> 2637 * 2638 * @param locale use the case transformation rules for this locale 2639 * @return the {@code String}, converted to lowercase. 2640 * @see java.lang.String#toLowerCase() 2641 * @see java.lang.String#toUpperCase() 2642 * @see java.lang.String#toUpperCase(Locale) 2643 * @since 1.1 2644 */ 2645 public String toLowerCase(Locale locale) { 2646 if (locale == null) { 2647 throw new NullPointerException(); 2648 } 2649 int first; 2650 boolean hasSurr = false; 2651 final int len = value.length; 2652 2653 // Now check if there are any characters that need to be changed, or are surrogate 2654 for (first = 0 ; first < len; first++) { 2655 int cp = (int)value[first]; 2656 if (Character.isSurrogate((char)cp)) { 2657 hasSurr = true; 2658 break; 2659 } 2660 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 2661 break; 2662 } 2663 } 2664 if (first == len) 2665 return this; 2666 char[] result = new char[len]; 2667 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 2668 // lowerCase characters. 2669 String lang = locale.getLanguage(); 2670 if (lang == "tr" || lang == "az" || lang == "lt") { 2671 return toLowerCaseEx(result, first, locale, true); 2672 } 2673 if (hasSurr) { 2674 return toLowerCaseEx(result, first, locale, false); 2675 } 2676 for (int i = first; i < len; i++) { 2677 int cp = (int)value[i]; 2678 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 2679 Character.isSurrogate((char)cp)) { 2680 return toLowerCaseEx(result, i, locale, false); 2681 } 2682 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 2683 return toLowerCaseEx(result, i, locale, true); 2684 } 2685 cp = Character.toLowerCase(cp); 2686 if (!Character.isBmpCodePoint(cp)) { 2687 return toLowerCaseEx(result, i, locale, false); 2688 } 2689 result[i] = (char)cp; 2690 } 2691 return new String(result, true); 2692 } 2693 2694 private String toLowerCaseEx(char[] result, int first, Locale locale, boolean localeDependent) { 2695 int resultOffset = first; 2696 int srcCount; 2697 for (int i = first; i < value.length; i += srcCount) { 2698 int srcChar = (int)value[i]; 2699 int lowerChar; 2700 char[] lowerCharArray; 2701 srcCount = 1; 2702 if (Character.isSurrogate((char)srcChar)) { 2703 srcChar = codePointAt(i); 2704 srcCount = Character.charCount(srcChar); 2705 } 2706 if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA 2707 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale); 2708 } else { 2709 lowerChar = Character.toLowerCase(srcChar); 2710 } 2711 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 2712 result[resultOffset++] = (char)lowerChar; 2713 } else { 2714 if (lowerChar == Character.ERROR) { 2715 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale); 2716 } else if (srcCount == 2) { 2717 resultOffset += Character.toChars(lowerChar, result, resultOffset); 2718 continue; 2719 } else { 2720 lowerCharArray = Character.toChars(lowerChar); 2721 } 2722 /* Grow result if needed */ 2723 int mapLen = lowerCharArray.length; 2724 if (mapLen > srcCount) { 2725 char[] result2 = new char[result.length + mapLen - srcCount]; 2726 System.arraycopy(result, 0, result2, 0, resultOffset); 2727 result = result2; 2728 } 2729 for (int x = 0; x < mapLen; ++x) { 2730 result[resultOffset++] = lowerCharArray[x]; 2731 } 2732 } 2733 } 2734 return new String(result, 0, resultOffset); 2735 } 2736 2737 /** 2738 * Converts all of the characters in this {@code String} to lower 2739 * case using the rules of the default locale. This is equivalent to calling 2740 * {@code toLowerCase(Locale.getDefault())}. 2741 * <p> 2742 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2743 * results if used for strings that are intended to be interpreted locale 2744 * independently. 2745 * Examples are programming language identifiers, protocol keys, and HTML 2746 * tags. 2747 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale 2748 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the 2749 * LATIN SMALL LETTER DOTLESS I character. 2750 * To obtain correct results for locale insensitive strings, use 2751 * {@code toLowerCase(Locale.ROOT)}. 2752 * 2753 * @return the {@code String}, converted to lowercase. 2754 * @see java.lang.String#toLowerCase(Locale) 2755 */ 2756 public String toLowerCase() { 2757 return toLowerCase(Locale.getDefault()); 2758 } 2759 2760 /** 2761 * Converts all of the characters in this {@code String} to upper 2762 * case using the rules of the given {@code Locale}. Case mapping is based 2763 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2764 * class. Since case mappings are not always 1:1 char mappings, the resulting 2765 * {@code String} may be a different length than the original {@code String}. 2766 * <p> 2767 * Examples of locale-sensitive and 1:M case mappings are in the following table. 2768 * 2769 * <table border="1" summary="Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description."> 2770 * <tr> 2771 * <th>Language Code of Locale</th> 2772 * <th>Lower Case</th> 2773 * <th>Upper Case</th> 2774 * <th>Description</th> 2775 * </tr> 2776 * <tr> 2777 * <td>tr (Turkish)</td> 2778 * <td>\u0069</td> 2779 * <td>\u0130</td> 2780 * <td>small letter i -> capital letter I with dot above</td> 2781 * </tr> 2782 * <tr> 2783 * <td>tr (Turkish)</td> 2784 * <td>\u0131</td> 2785 * <td>\u0049</td> 2786 * <td>small letter dotless i -> capital letter I</td> 2787 * </tr> 2788 * <tr> 2789 * <td>(all)</td> 2790 * <td>\u00df</td> 2791 * <td>\u0053 \u0053</td> 2792 * <td>small letter sharp s -> two letters: SS</td> 2793 * </tr> 2794 * <tr> 2795 * <td>(all)</td> 2796 * <td>Fahrvergnügen</td> 2797 * <td>FAHRVERGNÜGEN</td> 2798 * <td></td> 2799 * </tr> 2800 * </table> 2801 * @param locale use the case transformation rules for this locale 2802 * @return the {@code String}, converted to uppercase. 2803 * @see java.lang.String#toUpperCase() 2804 * @see java.lang.String#toLowerCase() 2805 * @see java.lang.String#toLowerCase(Locale) 2806 * @since 1.1 2807 */ 2808 public String toUpperCase(Locale locale) { 2809 if (locale == null) { 2810 throw new NullPointerException(); 2811 } 2812 int first; 2813 boolean hasSurr = false; 2814 final int len = value.length; 2815 2816 // Now check if there are any characters that need to be changed, or are surrogate 2817 for (first = 0 ; first < len; first++ ) { 2818 int cp = (int)value[first]; 2819 if (Character.isSurrogate((char)cp)) { 2820 hasSurr = true; 2821 break; 2822 } 2823 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 2824 break; 2825 } 2826 } 2827 if (first == len) { 2828 return this; 2829 } 2830 char[] result = new char[len]; 2831 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 2832 // upperCase characters. 2833 String lang = locale.getLanguage(); 2834 if (lang == "tr" || lang == "az" || lang == "lt") { 2835 return toUpperCaseEx(result, first, locale, true); 2836 } 2837 if (hasSurr) { 2838 return toUpperCaseEx(result, first, locale, false); 2839 } 2840 for (int i = first; i < len; i++) { 2841 int cp = (int)value[i]; 2842 if (Character.isSurrogate((char)cp)) { 2843 return toUpperCaseEx(result, i, locale, false); 2844 } 2845 cp = Character.toUpperCaseEx(cp); 2846 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 2847 return toUpperCaseEx(result, i, locale, false); 2848 } 2849 result[i] = (char)cp; 2850 } 2851 return new String(result, true); 2852 } 2853 2854 private String toUpperCaseEx(char[] result, int first, Locale locale, 2855 boolean localeDependent) { 2856 int resultOffset = first; 2857 int srcCount; 2858 for (int i = first; i < value.length; i += srcCount) { 2859 int srcChar = (int)value[i]; 2860 int upperChar; 2861 char[] upperCharArray; 2862 srcCount = 1; 2863 if (Character.isSurrogate((char)srcChar)) { 2864 srcChar = codePointAt(i); 2865 srcCount = Character.charCount(srcChar); 2866 } 2867 if (localeDependent) { 2868 upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale); 2869 } else { 2870 upperChar = Character.toUpperCaseEx(srcChar); 2871 } 2872 if (Character.isBmpCodePoint(upperChar)) { 2873 result[resultOffset++] = (char)upperChar; 2874 } else { 2875 if (upperChar == Character.ERROR) { 2876 if (localeDependent) { 2877 upperCharArray = 2878 ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale); 2879 } else { 2880 upperCharArray = Character.toUpperCaseCharArray(srcChar); 2881 } 2882 } else if (srcCount == 2) { 2883 resultOffset += Character.toChars(upperChar, result, resultOffset); 2884 continue; 2885 } else { 2886 upperCharArray = Character.toChars(upperChar); 2887 } 2888 /* Grow result if needed */ 2889 int mapLen = upperCharArray.length; 2890 if (mapLen > srcCount) { 2891 char[] result2 = new char[result.length + mapLen - srcCount]; 2892 System.arraycopy(result, 0, result2, 0, resultOffset); 2893 result = result2; 2894 } 2895 for (int x = 0; x < mapLen; ++x) { 2896 result[resultOffset++] = upperCharArray[x]; 2897 } 2898 } 2899 } 2900 return new String(result, 0, resultOffset); 2901 } 2902 2903 /** 2904 * Converts all of the characters in this {@code String} to upper 2905 * case using the rules of the default locale. This method is equivalent to 2906 * {@code toUpperCase(Locale.getDefault())}. 2907 * <p> 2908 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2909 * results if used for strings that are intended to be interpreted locale 2910 * independently. 2911 * Examples are programming language identifiers, protocol keys, and HTML 2912 * tags. 2913 * For instance, {@code "title".toUpperCase()} in a Turkish locale 2914 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the 2915 * LATIN CAPITAL LETTER I WITH DOT ABOVE character. 2916 * To obtain correct results for locale insensitive strings, use 2917 * {@code toUpperCase(Locale.ROOT)}. 2918 * 2919 * @return the {@code String}, converted to uppercase. 2920 * @see java.lang.String#toUpperCase(Locale) 2921 */ 2922 public String toUpperCase() { 2923 return toUpperCase(Locale.getDefault()); 2924 } 2925 2926 /** 2927 * Returns a string whose value is this string, with any leading and trailing 2928 * whitespace removed. 2929 * <p> 2930 * If this {@code String} object represents an empty character 2931 * sequence, or the first and last characters of character sequence 2932 * represented by this {@code String} object both have codes 2933 * greater than {@code '\u005Cu0020'} (the space character), then a 2934 * reference to this {@code String} object is returned. 2935 * <p> 2936 * Otherwise, if there is no character with a code greater than 2937 * {@code '\u005Cu0020'} in the string, then a 2938 * {@code String} object representing an empty string is 2939 * returned. 2940 * <p> 2941 * Otherwise, let <i>k</i> be the index of the first character in the 2942 * string whose code is greater than {@code '\u005Cu0020'}, and let 2943 * <i>m</i> be the index of the last character in the string whose code 2944 * is greater than {@code '\u005Cu0020'}. A {@code String} 2945 * object is returned, representing the substring of this string that 2946 * begins with the character at index <i>k</i> and ends with the 2947 * character at index <i>m</i>-that is, the result of 2948 * {@code this.substring(k, m + 1)}. 2949 * <p> 2950 * This method may be used to trim whitespace (as defined above) from 2951 * the beginning and end of a string. 2952 * 2953 * @return A string whose value is this string, with any leading and trailing white 2954 * space removed, or this string if it has no leading or 2955 * trailing white space. 2956 */ 2957 public String trim() { 2958 int len = value.length; 2959 int st = 0; 2960 char[] val = value; /* avoid getfield opcode */ 2961 2962 while ((st < len) && (val[st] <= ' ')) { 2963 st++; 2964 } 2965 while ((st < len) && (val[len - 1] <= ' ')) { 2966 len--; 2967 } 2968 return ((st > 0) || (len < value.length)) ? substring(st, len) : this; 2969 } 2970 2971 /** 2972 * This object (which is already a string!) is itself returned. 2973 * 2974 * @return the string itself. 2975 */ 2976 public String toString() { 2977 return this; 2978 } 2979 2980 /** 2981 * Converts this string to a new character array. 2982 * 2983 * @return a newly allocated character array whose length is the length 2984 * of this string and whose contents are initialized to contain 2985 * the character sequence represented by this string. 2986 */ 2987 public char[] toCharArray() { 2988 // Cannot use Arrays.copyOf because of class initialization order issues 2989 char result[] = new char[value.length]; 2990 System.arraycopy(value, 0, result, 0, value.length); 2991 return result; 2992 } 2993 2994 /** 2995 * Returns a formatted string using the specified format string and 2996 * arguments. 2997 * 2998 * <p> The locale always used is the one returned by {@link 2999 * java.util.Locale#getDefault() Locale.getDefault()}. 3000 * 3001 * @param format 3002 * A <a href="../util/Formatter.html#syntax">format string</a> 3003 * 3004 * @param args 3005 * Arguments referenced by the format specifiers in the format 3006 * string. If there are more arguments than format specifiers, the 3007 * extra arguments are ignored. The number of arguments is 3008 * variable and may be zero. The maximum number of arguments is 3009 * limited by the maximum dimension of a Java array as defined by 3010 * <cite>The Java™ Virtual Machine Specification</cite>. 3011 * The behaviour on a 3012 * {@code null} argument depends on the <a 3013 * href="../util/Formatter.html#syntax">conversion</a>. 3014 * 3015 * @throws java.util.IllegalFormatException 3016 * If a format string contains an illegal syntax, a format 3017 * specifier that is incompatible with the given arguments, 3018 * insufficient arguments given the format string, or other 3019 * illegal conditions. For specification of all possible 3020 * formatting errors, see the <a 3021 * href="../util/Formatter.html#detail">Details</a> section of the 3022 * formatter class specification. 3023 * 3024 * @return A formatted string 3025 * 3026 * @see java.util.Formatter 3027 * @since 1.5 3028 */ 3029 public static String format(String format, Object... args) { 3030 return new Formatter().format(format, args).toString(); 3031 } 3032 3033 /** 3034 * Returns a formatted string using the specified locale, format string, 3035 * and arguments. 3036 * 3037 * @param l 3038 * The {@linkplain java.util.Locale locale} to apply during 3039 * formatting. If {@code l} is {@code null} then no localization 3040 * is applied. 3041 * 3042 * @param format 3043 * A <a href="../util/Formatter.html#syntax">format string</a> 3044 * 3045 * @param args 3046 * Arguments referenced by the format specifiers in the format 3047 * string. If there are more arguments than format specifiers, the 3048 * extra arguments are ignored. The number of arguments is 3049 * variable and may be zero. The maximum number of arguments is 3050 * limited by the maximum dimension of a Java array as defined by 3051 * <cite>The Java™ Virtual Machine Specification</cite>. 3052 * The behaviour on a 3053 * {@code null} argument depends on the 3054 * <a href="../util/Formatter.html#syntax">conversion</a>. 3055 * 3056 * @throws java.util.IllegalFormatException 3057 * If a format string contains an illegal syntax, a format 3058 * specifier that is incompatible with the given arguments, 3059 * insufficient arguments given the format string, or other 3060 * illegal conditions. For specification of all possible 3061 * formatting errors, see the <a 3062 * href="../util/Formatter.html#detail">Details</a> section of the 3063 * formatter class specification 3064 * 3065 * @return A formatted string 3066 * 3067 * @see java.util.Formatter 3068 * @since 1.5 3069 */ 3070 public static String format(Locale l, String format, Object... args) { 3071 return new Formatter(l).format(format, args).toString(); 3072 } 3073 3074 /** 3075 * Returns the string representation of the {@code Object} argument. 3076 * 3077 * @param obj an {@code Object}. 3078 * @return if the argument is {@code null}, then a string equal to 3079 * {@code "null"}; otherwise, the value of 3080 * {@code obj.toString()} is returned. 3081 * @see java.lang.Object#toString() 3082 */ 3083 public static String valueOf(Object obj) { 3084 return (obj == null) ? "null" : obj.toString(); 3085 } 3086 3087 /** 3088 * Returns the string representation of the {@code char} array 3089 * argument. The contents of the character array are copied; subsequent 3090 * modification of the character array does not affect the returned 3091 * string. 3092 * 3093 * @param data the character array. 3094 * @return a {@code String} that contains the characters of the 3095 * character array. 3096 */ 3097 public static String valueOf(char data[]) { 3098 return new String(data); 3099 } 3100 3101 /** 3102 * Returns the string representation of a specific subarray of the 3103 * {@code char} array argument. 3104 * <p> 3105 * The {@code offset} argument is the index of the first 3106 * character of the subarray. The {@code count} argument 3107 * specifies the length of the subarray. The contents of the subarray 3108 * are copied; subsequent modification of the character array does not 3109 * affect the returned string. 3110 * 3111 * @param data the character array. 3112 * @param offset initial offset of the subarray. 3113 * @param count length of the subarray. 3114 * @return a {@code String} that contains the characters of the 3115 * specified subarray of the character array. 3116 * @exception IndexOutOfBoundsException if {@code offset} is 3117 * negative, or {@code count} is negative, or 3118 * {@code offset+count} is larger than 3119 * {@code data.length}. 3120 */ 3121 public static String valueOf(char data[], int offset, int count) { 3122 return new String(data, offset, count); 3123 } 3124 3125 /** 3126 * Equivalent to {@link #valueOf(char[], int, int)}. 3127 * 3128 * @param data the character array. 3129 * @param offset initial offset of the subarray. 3130 * @param count length of the subarray. 3131 * @return a {@code String} that contains the characters of the 3132 * specified subarray of the character array. 3133 * @exception IndexOutOfBoundsException if {@code offset} is 3134 * negative, or {@code count} is negative, or 3135 * {@code offset+count} is larger than 3136 * {@code data.length}. 3137 */ 3138 public static String copyValueOf(char data[], int offset, int count) { 3139 return new String(data, offset, count); 3140 } 3141 3142 /** 3143 * Equivalent to {@link #valueOf(char[])}. 3144 * 3145 * @param data the character array. 3146 * @return a {@code String} that contains the characters of the 3147 * character array. 3148 */ 3149 public static String copyValueOf(char data[]) { 3150 return new String(data); 3151 } 3152 3153 /** 3154 * Returns the string representation of the {@code boolean} argument. 3155 * 3156 * @param b a {@code boolean}. 3157 * @return if the argument is {@code true}, a string equal to 3158 * {@code "true"} is returned; otherwise, a string equal to 3159 * {@code "false"} is returned. 3160 */ 3161 public static String valueOf(boolean b) { 3162 return b ? "true" : "false"; 3163 } 3164 3165 /** 3166 * Returns the string representation of the {@code char} 3167 * argument. 3168 * 3169 * @param c a {@code char}. 3170 * @return a string of length {@code 1} containing 3171 * as its single character the argument {@code c}. 3172 */ 3173 public static String valueOf(char c) { 3174 char data[] = {c}; 3175 return new String(data, true); 3176 } 3177 3178 /** 3179 * Returns the string representation of the {@code int} argument. 3180 * <p> 3181 * The representation is exactly the one returned by the 3182 * {@code Integer.toString} method of one argument. 3183 * 3184 * @param i an {@code int}. 3185 * @return a string representation of the {@code int} argument. 3186 * @see java.lang.Integer#toString(int, int) 3187 */ 3188 public static String valueOf(int i) { 3189 return Integer.toString(i); 3190 } 3191 3192 /** 3193 * Returns the string representation of the {@code long} argument. 3194 * <p> 3195 * The representation is exactly the one returned by the 3196 * {@code Long.toString} method of one argument. 3197 * 3198 * @param l a {@code long}. 3199 * @return a string representation of the {@code long} argument. 3200 * @see java.lang.Long#toString(long) 3201 */ 3202 public static String valueOf(long l) { 3203 return Long.toString(l); 3204 } 3205 3206 /** 3207 * Returns the string representation of the {@code float} argument. 3208 * <p> 3209 * The representation is exactly the one returned by the 3210 * {@code Float.toString} method of one argument. 3211 * 3212 * @param f a {@code float}. 3213 * @return a string representation of the {@code float} argument. 3214 * @see java.lang.Float#toString(float) 3215 */ 3216 public static String valueOf(float f) { 3217 return Float.toString(f); 3218 } 3219 3220 /** 3221 * Returns the string representation of the {@code double} argument. 3222 * <p> 3223 * The representation is exactly the one returned by the 3224 * {@code Double.toString} method of one argument. 3225 * 3226 * @param d a {@code double}. 3227 * @return a string representation of the {@code double} argument. 3228 * @see java.lang.Double#toString(double) 3229 */ 3230 public static String valueOf(double d) { 3231 return Double.toString(d); 3232 } 3233 3234 /** 3235 * Returns a canonical representation for the string object. 3236 * <p> 3237 * A pool of strings, initially empty, is maintained privately by the 3238 * class {@code String}. 3239 * <p> 3240 * When the intern method is invoked, if the pool already contains a 3241 * string equal to this {@code String} object as determined by 3242 * the {@link #equals(Object)} method, then the string from the pool is 3243 * returned. Otherwise, this {@code String} object is added to the 3244 * pool and a reference to this {@code String} object is returned. 3245 * <p> 3246 * It follows that for any two strings {@code s} and {@code t}, 3247 * {@code s.intern() == t.intern()} is {@code true} 3248 * if and only if {@code s.equals(t)} is {@code true}. 3249 * <p> 3250 * All literal strings and string-valued constant expressions are 3251 * interned. String literals are defined in section 3.10.5 of the 3252 * <cite>The Java™ Language Specification</cite>. 3253 * 3254 * @return a string that has the same contents as this string, but is 3255 * guaranteed to be from a pool of unique strings. 3256 */ 3257 public native String intern(); 3258 }