1 /* 2 * Copyright (c) 1994, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.ObjectStreamField; 29 import java.io.UnsupportedEncodingException; 30 import java.nio.charset.Charset; 31 import java.nio.ByteBuffer; 32 import java.util.ArrayList; 33 import java.util.Arrays; 34 import java.util.Comparator; 35 import java.util.Formatter; 36 import java.util.Locale; 37 import java.util.Objects; 38 import java.util.StringJoiner; 39 import java.util.regex.Matcher; 40 import java.util.regex.Pattern; 41 import java.util.regex.PatternSyntaxException; 42 43 /** 44 * The {@code String} class represents character strings. All 45 * string literals in Java programs, such as {@code "abc"}, are 46 * implemented as instances of this class. 47 * <p> 48 * Strings are constant; their values cannot be changed after they 49 * are created. String buffers support mutable strings. 50 * Because String objects are immutable they can be shared. For example: 51 * <blockquote><pre> 52 * String str = "abc"; 53 * </pre></blockquote><p> 54 * is equivalent to: 55 * <blockquote><pre> 56 * char data[] = {'a', 'b', 'c'}; 57 * String str = new String(data); 58 * </pre></blockquote><p> 59 * Here are some more examples of how strings can be used: 60 * <blockquote><pre> 61 * System.out.println("abc"); 62 * String cde = "cde"; 63 * System.out.println("abc" + cde); 64 * String c = "abc".substring(2,3); 65 * String d = cde.substring(1, 2); 66 * </pre></blockquote> 67 * <p> 68 * The class {@code String} includes methods for examining 69 * individual characters of the sequence, for comparing strings, for 70 * searching strings, for extracting substrings, and for creating a 71 * copy of a string with all characters translated to uppercase or to 72 * lowercase. Case mapping is based on the Unicode Standard version 73 * specified by the {@link java.lang.Character Character} class. 74 * <p> 75 * The Java language provides special support for the string 76 * concatenation operator ( + ), and for conversion of 77 * other objects to strings. String concatenation is implemented 78 * through the {@code StringBuilder}(or {@code StringBuffer}) 79 * class and its {@code append} method. 80 * String conversions are implemented through the method 81 * {@code toString}, defined by {@code Object} and 82 * inherited by all classes in Java. For additional information on 83 * string concatenation and conversion, see Gosling, Joy, and Steele, 84 * <i>The Java Language Specification</i>. 85 * 86 * <p> Unless otherwise noted, passing a <tt>null</tt> argument to a constructor 87 * or method in this class will cause a {@link NullPointerException} to be 88 * thrown. 89 * 90 * <p>A {@code String} represents a string in the UTF-16 format 91 * in which <em>supplementary characters</em> are represented by <em>surrogate 92 * pairs</em> (see the section <a href="Character.html#unicode">Unicode 93 * Character Representations</a> in the {@code Character} class for 94 * more information). 95 * Index values refer to {@code char} code units, so a supplementary 96 * character uses two positions in a {@code String}. 97 * <p>The {@code String} class provides methods for dealing with 98 * Unicode code points (i.e., characters), in addition to those for 99 * dealing with Unicode code units (i.e., {@code char} values). 100 * 101 * @author Lee Boynton 102 * @author Arthur van Hoff 103 * @author Martin Buchholz 104 * @author Ulf Zibis 105 * @see java.lang.Object#toString() 106 * @see java.lang.StringBuffer 107 * @see java.lang.StringBuilder 108 * @see java.nio.charset.Charset 109 * @since 1.0 110 */ 111 112 public final class String 113 implements java.io.Serializable, Comparable<String>, CharSequence { 114 /** The value is used for character storage. */ 115 private final char value[]; 116 117 /** Cache the hash code for the string */ 118 private int hash; // Default to 0 119 120 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 121 private static final long serialVersionUID = -6849794470754667710L; 122 123 /** 124 * Class String is special cased within the Serialization Stream Protocol. 125 * 126 * A String instance is written into an ObjectOutputStream according to 127 * <a href="{@docRoot}/../platform/serialization/spec/output.html"> 128 * Object Serialization Specification, Section 6.2, "Stream Elements"</a> 129 */ 130 private static final ObjectStreamField[] serialPersistentFields = 131 new ObjectStreamField[0]; 132 133 /** 134 * Initializes a newly created {@code String} object so that it represents 135 * an empty character sequence. Note that use of this constructor is 136 * unnecessary since Strings are immutable. 137 */ 138 public String() { 139 this.value = new char[0]; 140 } 141 142 /** 143 * Initializes a newly created {@code String} object so that it represents 144 * the same sequence of characters as the argument; in other words, the 145 * newly created string is a copy of the argument string. Unless an 146 * explicit copy of {@code original} is needed, use of this constructor is 147 * unnecessary since Strings are immutable. 148 * 149 * @param original 150 * A {@code String} 151 */ 152 public String(String original) { 153 this.value = original.value; 154 this.hash = original.hash; 155 } 156 157 /** 158 * Allocates a new {@code String} so that it represents the sequence of 159 * characters currently contained in the character array argument. The 160 * contents of the character array are copied; subsequent modification of 161 * the character array does not affect the newly created string. 162 * 163 * @param value 164 * The initial value of the string 165 */ 166 public String(char value[]) { 167 this.value = Arrays.copyOf(value, value.length); 168 } 169 170 /** 171 * Allocates a new {@code String} that contains characters from a subarray 172 * of the character array argument. The {@code offset} argument is the 173 * index of the first character of the subarray and the {@code count} 174 * argument specifies the length of the subarray. The contents of the 175 * subarray are copied; subsequent modification of the character array does 176 * not affect the newly created string. 177 * 178 * @param value 179 * Array that is the source of characters 180 * 181 * @param offset 182 * The initial offset 183 * 184 * @param count 185 * The length 186 * 187 * @throws IndexOutOfBoundsException 188 * If the {@code offset} and {@code count} arguments index 189 * characters outside the bounds of the {@code value} array 190 */ 191 public String(char value[], int offset, int count) { 192 if (offset < 0) { 193 throw new StringIndexOutOfBoundsException(offset); 194 } 195 if (count < 0) { 196 throw new StringIndexOutOfBoundsException(count); 197 } 198 // Note: offset or count might be near -1>>>1. 199 if (offset > value.length - count) { 200 throw new StringIndexOutOfBoundsException(offset + count); 201 } 202 this.value = Arrays.copyOfRange(value, offset, offset+count); 203 } 204 205 /** 206 * Allocates a new {@code String} that contains characters from a subarray 207 * of the <a href="Character.html#unicode">Unicode code point</a> array 208 * argument. The {@code offset} argument is the index of the first code 209 * point of the subarray and the {@code count} argument specifies the 210 * length of the subarray. The contents of the subarray are converted to 211 * {@code char}s; subsequent modification of the {@code int} array does not 212 * affect the newly created string. 213 * 214 * @param codePoints 215 * Array that is the source of Unicode code points 216 * 217 * @param offset 218 * The initial offset 219 * 220 * @param count 221 * The length 222 * 223 * @throws IllegalArgumentException 224 * If any invalid Unicode code point is found in {@code 225 * codePoints} 226 * 227 * @throws IndexOutOfBoundsException 228 * If the {@code offset} and {@code count} arguments index 229 * characters outside the bounds of the {@code codePoints} array 230 * 231 * @since 1.5 232 */ 233 public String(int[] codePoints, int offset, int count) { 234 if (offset < 0) { 235 throw new StringIndexOutOfBoundsException(offset); 236 } 237 if (count < 0) { 238 throw new StringIndexOutOfBoundsException(count); 239 } 240 // Note: offset or count might be near -1>>>1. 241 if (offset > codePoints.length - count) { 242 throw new StringIndexOutOfBoundsException(offset + count); 243 } 244 245 final int end = offset + count; 246 247 // Pass 1: Compute precise size of char[] 248 int n = count; 249 for (int i = offset; i < end; i++) { 250 int c = codePoints[i]; 251 if (Character.isBmpCodePoint(c)) 252 continue; 253 else if (Character.isValidCodePoint(c)) 254 n++; 255 else throw new IllegalArgumentException(Integer.toString(c)); 256 } 257 258 // Pass 2: Allocate and fill in char[] 259 final char[] v = new char[n]; 260 261 for (int i = offset, j = 0; i < end; i++, j++) { 262 int c = codePoints[i]; 263 if (Character.isBmpCodePoint(c)) 264 v[j] = (char)c; 265 else 266 Character.toSurrogates(c, v, j++); 267 } 268 269 this.value = v; 270 } 271 272 /** 273 * Allocates a new {@code String} constructed from a subarray of an array 274 * of 8-bit integer values. 275 * 276 * <p> The {@code offset} argument is the index of the first byte of the 277 * subarray, and the {@code count} argument specifies the length of the 278 * subarray. 279 * 280 * <p> Each {@code byte} in the subarray is converted to a {@code char} as 281 * specified in the method above. 282 * 283 * @deprecated This method does not properly convert bytes into characters. 284 * As of JDK 1.1, the preferred way to do this is via the 285 * {@code String} constructors that take a {@link 286 * java.nio.charset.Charset}, charset name, or that use the platform's 287 * default charset. 288 * 289 * @param ascii 290 * The bytes to be converted to characters 291 * 292 * @param hibyte 293 * The top 8 bits of each 16-bit Unicode code unit 294 * 295 * @param offset 296 * The initial offset 297 * @param count 298 * The length 299 * 300 * @throws IndexOutOfBoundsException 301 * If the {@code offset} or {@code count} argument is invalid 302 * 303 * @see #String(byte[], int) 304 * @see #String(byte[], int, int, java.lang.String) 305 * @see #String(byte[], int, int, java.nio.charset.Charset) 306 * @see #String(byte[], int, int) 307 * @see #String(byte[], java.lang.String) 308 * @see #String(byte[], java.nio.charset.Charset) 309 * @see #String(byte[]) 310 */ 311 @Deprecated 312 public String(byte ascii[], int hibyte, int offset, int count) { 313 checkBounds(ascii, offset, count); 314 char value[] = new char[count]; 315 316 if (hibyte == 0) { 317 for (int i = count; i-- > 0;) { 318 value[i] = (char)(ascii[i + offset] & 0xff); 319 } 320 } else { 321 hibyte <<= 8; 322 for (int i = count; i-- > 0;) { 323 value[i] = (char)(hibyte | (ascii[i + offset] & 0xff)); 324 } 325 } 326 this.value = value; 327 } 328 329 /** 330 * Allocates a new {@code String} containing characters constructed from 331 * an array of 8-bit integer values. Each character <i>c</i>in the 332 * resulting string is constructed from the corresponding component 333 * <i>b</i> in the byte array such that: 334 * 335 * <blockquote><pre> 336 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8) 337 * | (<b><i>b</i></b> & 0xff)) 338 * </pre></blockquote> 339 * 340 * @deprecated This method does not properly convert bytes into 341 * characters. As of JDK 1.1, the preferred way to do this is via the 342 * {@code String} constructors that take a {@link 343 * java.nio.charset.Charset}, charset name, or that use the platform's 344 * default charset. 345 * 346 * @param ascii 347 * The bytes to be converted to characters 348 * 349 * @param hibyte 350 * The top 8 bits of each 16-bit Unicode code unit 351 * 352 * @see #String(byte[], int, int, java.lang.String) 353 * @see #String(byte[], int, int, java.nio.charset.Charset) 354 * @see #String(byte[], int, int) 355 * @see #String(byte[], java.lang.String) 356 * @see #String(byte[], java.nio.charset.Charset) 357 * @see #String(byte[]) 358 */ 359 @Deprecated 360 public String(byte ascii[], int hibyte) { 361 this(ascii, hibyte, 0, ascii.length); 362 } 363 364 private static void checkBounds(byte[] bytes, int offset, int length) { 365 checkBounds(bytes.length, offset, length); 366 } 367 368 private static void checkBounds(ByteBuffer bytes, int offset, int length) { 369 checkBounds(bytes.capacity(), offset, length); 370 } 371 372 /* Common private utility method used to bounds check the byte array 373 * and requested offset & length values used by the String(byte[],..) 374 * constructors. 375 */ 376 private static void checkBounds(int lengthOfBuffer, int offset, int length) { 377 if (length < 0) 378 throw new StringIndexOutOfBoundsException(length); 379 if (offset < 0) 380 throw new StringIndexOutOfBoundsException(offset); 381 if (offset > lengthOfBuffer - length) 382 throw new StringIndexOutOfBoundsException(offset + length); 383 } 384 385 /** 386 * Constructs a new {@code String} by decoding the specified subarray of 387 * bytes using the specified charset. The length of the new {@code String} 388 * is a function of the charset, and hence may not be equal to the length 389 * of the subarray. 390 * 391 * <p> The behavior of this constructor when the given bytes are not valid 392 * in the given charset is unspecified. The {@link 393 * java.nio.charset.CharsetDecoder} class should be used when more control 394 * over the decoding process is required. 395 * 396 * @param bytes 397 * The bytes to be decoded into characters 398 * 399 * @param offset 400 * The index of the first byte to decode 401 * 402 * @param length 403 * The number of bytes to decode 404 405 * @param charsetName 406 * The name of a supported {@linkplain java.nio.charset.Charset 407 * charset} 408 * 409 * @throws UnsupportedEncodingException 410 * If the named charset is not supported 411 * 412 * @throws IndexOutOfBoundsException 413 * If the {@code offset} and {@code length} arguments index 414 * characters outside the bounds of the {@code bytes} array 415 * 416 * @since 1.1 417 */ 418 public String(byte bytes[], int offset, int length, String charsetName) 419 throws UnsupportedEncodingException { 420 if (charsetName == null) 421 throw new NullPointerException("charsetName"); 422 checkBounds(bytes, offset, length); 423 this.value = StringCoding.decode(charsetName, bytes, offset, length); 424 } 425 426 /** 427 * Constructs a new {@code String} by decoding the specified subarray of 428 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 429 * The length of the new {@code String} is a function of the charset, and 430 * hence may not be equal to the length of the subarray. 431 * 432 * <p> This method always replaces malformed-input and unmappable-character 433 * sequences with this charset's default replacement string. The {@link 434 * java.nio.charset.CharsetDecoder} class should be used when more control 435 * over the decoding process is required. 436 * 437 * @param bytes 438 * The bytes to be decoded into characters 439 * 440 * @param offset 441 * The index of the first byte to decode 442 * 443 * @param length 444 * The number of bytes to decode 445 * 446 * @param charset 447 * The {@linkplain java.nio.charset.Charset charset} to be used to 448 * decode the {@code bytes} 449 * 450 * @throws IndexOutOfBoundsException 451 * If the {@code offset} and {@code length} arguments index 452 * characters outside the bounds of the {@code bytes} array 453 * 454 * @since 1.6 455 */ 456 public String(byte bytes[], int offset, int length, Charset charset) { 457 if (charset == null) 458 throw new NullPointerException("charset"); 459 checkBounds(bytes, offset, length); 460 this.value = StringCoding.decode(charset, bytes, offset, length); 461 } 462 463 /** 464 * Constructs a new {@code String} by decoding the specified 465 * {@linkplain java.nio.ByteBuffer byte buffer} using the specified 466 * {@linkplain java.nio.charset.Charset charset}. 467 * The length of the new {@code String} is a function of the charset, and 468 * hence may not be equal to the length of the subarray. 469 * 470 * <p> This method always replaces malformed-input and unmappable-character 471 * sequences with this charset's default replacement string. The {@link 472 * java.nio.charset.CharsetDecoder} class should be used when more control 473 * over the decoding process is required. 474 * 475 * @param bytes 476 * The bytes to be decoded into characters 477 * 478 * @param offset 479 * The index of the first byte to decode 480 * 481 * @param length 482 * The number of bytes to decode 483 * 484 * @param charset 485 * The {@linkplain java.nio.charset.Charset charset} to be used to 486 * decode the {@code bytes} 487 * 488 * @throws IndexOutOfBoundsException 489 * If the {@code offset} and {@code length} arguments index 490 * characters outside the bounds of the {@code bytes} array 491 * 492 * @since 1.9 493 */ 494 public String(ByteBuffer bytes, int offset, int length, Charset charset) { 495 if (charset == null) 496 throw new NullPointerException("charset"); 497 checkBounds(bytes, offset, length); 498 this.value = StringCoding.decode(charset, bytes, offset, length); 499 } 500 501 /** 502 * Constructs a new {@code String} by decoding the specified array of bytes 503 * using the specified {@linkplain java.nio.charset.Charset charset}. The 504 * length of the new {@code String} is a function of the charset, and hence 505 * may not be equal to the length of the byte array. 506 * 507 * <p> The behavior of this constructor when the given bytes are not valid 508 * in the given charset is unspecified. The {@link 509 * java.nio.charset.CharsetDecoder} class should be used when more control 510 * over the decoding process is required. 511 * 512 * @param bytes 513 * The bytes to be decoded into characters 514 * 515 * @param charsetName 516 * The name of a supported {@linkplain java.nio.charset.Charset 517 * charset} 518 * 519 * @throws UnsupportedEncodingException 520 * If the named charset is not supported 521 * 522 * @since 1.1 523 */ 524 public String(byte bytes[], String charsetName) 525 throws UnsupportedEncodingException { 526 this(bytes, 0, bytes.length, charsetName); 527 } 528 529 /** 530 * Constructs a new {@code String} by decoding the specified array of 531 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 532 * The length of the new {@code String} is a function of the charset, and 533 * hence may not be equal to the length of the byte array. 534 * 535 * <p> This method always replaces malformed-input and unmappable-character 536 * sequences with this charset's default replacement string. The {@link 537 * java.nio.charset.CharsetDecoder} class should be used when more control 538 * over the decoding process is required. 539 * 540 * @param bytes 541 * The bytes to be decoded into characters 542 * 543 * @param charset 544 * The {@linkplain java.nio.charset.Charset charset} to be used to 545 * decode the {@code bytes} 546 * 547 * @since 1.6 548 */ 549 public String(byte bytes[], Charset charset) { 550 this(bytes, 0, bytes.length, charset); 551 } 552 553 /** 554 * Constructs a new {@code String} by decoding the specified subarray of 555 * bytes using the platform's default charset. The length of the new 556 * {@code String} is a function of the charset, and hence may not be equal 557 * to the length of the subarray. 558 * 559 * <p> The behavior of this constructor when the given bytes are not valid 560 * in the default charset is unspecified. The {@link 561 * java.nio.charset.CharsetDecoder} class should be used when more control 562 * over the decoding process is required. 563 * 564 * @param bytes 565 * The bytes to be decoded into characters 566 * 567 * @param offset 568 * The index of the first byte to decode 569 * 570 * @param length 571 * The number of bytes to decode 572 * 573 * @throws IndexOutOfBoundsException 574 * If the {@code offset} and the {@code length} arguments index 575 * characters outside the bounds of the {@code bytes} array 576 * 577 * @since 1.1 578 */ 579 public String(byte bytes[], int offset, int length) { 580 checkBounds(bytes, offset, length); 581 this.value = StringCoding.decode(bytes, offset, length); 582 } 583 584 /** 585 * Constructs a new {@code String} by decoding the specified array of bytes 586 * using the platform's default charset. The length of the new {@code 587 * String} is a function of the charset, and hence may not be equal to the 588 * length of the byte array. 589 * 590 * <p> The behavior of this constructor when the given bytes are not valid 591 * in the default charset is unspecified. The {@link 592 * java.nio.charset.CharsetDecoder} class should be used when more control 593 * over the decoding process is required. 594 * 595 * @param bytes 596 * The bytes to be decoded into characters 597 * 598 * @since 1.1 599 */ 600 public String(byte bytes[]) { 601 this(bytes, 0, bytes.length); 602 } 603 604 /** 605 * Allocates a new string that contains the sequence of characters 606 * currently contained in the string buffer argument. The contents of the 607 * string buffer are copied; subsequent modification of the string buffer 608 * does not affect the newly created string. 609 * 610 * @param buffer 611 * A {@code StringBuffer} 612 */ 613 public String(StringBuffer buffer) { 614 synchronized(buffer) { 615 this.value = Arrays.copyOf(buffer.getValue(), buffer.length()); 616 } 617 } 618 619 /** 620 * Allocates a new string that contains the sequence of characters 621 * currently contained in the string builder argument. The contents of the 622 * string builder are copied; subsequent modification of the string builder 623 * does not affect the newly created string. 624 * 625 * <p> This constructor is provided to ease migration to {@code 626 * StringBuilder}. Obtaining a string from a string builder via the {@code 627 * toString} method is likely to run faster and is generally preferred. 628 * 629 * @param builder 630 * A {@code StringBuilder} 631 * 632 * @since 1.5 633 */ 634 public String(StringBuilder builder) { 635 this.value = Arrays.copyOf(builder.getValue(), builder.length()); 636 } 637 638 /* 639 * Package private constructor which shares value array for speed. 640 * this constructor is always expected to be called with share==true. 641 * a separate constructor is needed because we already have a public 642 * String(char[]) constructor that makes a copy of the given char[]. 643 */ 644 String(char[] value, boolean share) { 645 // assert share : "unshared not supported"; 646 this.value = value; 647 } 648 649 /** 650 * Returns the length of this string. 651 * The length is equal to the number of <a href="Character.html#unicode">Unicode 652 * code units</a> in the string. 653 * 654 * @return the length of the sequence of characters represented by this 655 * object. 656 */ 657 public int length() { 658 return value.length; 659 } 660 661 /** 662 * Returns {@code true} if, and only if, {@link #length()} is {@code 0}. 663 * 664 * @return {@code true} if {@link #length()} is {@code 0}, otherwise 665 * {@code false} 666 * 667 * @since 1.6 668 */ 669 public boolean isEmpty() { 670 return value.length == 0; 671 } 672 673 /** 674 * Returns the {@code char} value at the 675 * specified index. An index ranges from {@code 0} to 676 * {@code length() - 1}. The first {@code char} value of the sequence 677 * is at index {@code 0}, the next at index {@code 1}, 678 * and so on, as for array indexing. 679 * 680 * <p>If the {@code char} value specified by the index is a 681 * <a href="Character.html#unicode">surrogate</a>, the surrogate 682 * value is returned. 683 * 684 * @param index the index of the {@code char} value. 685 * @return the {@code char} value at the specified index of this string. 686 * The first {@code char} value is at index {@code 0}. 687 * @exception IndexOutOfBoundsException if the {@code index} 688 * argument is negative or not less than the length of this 689 * string. 690 */ 691 public char charAt(int index) { 692 if ((index < 0) || (index >= value.length)) { 693 throw new StringIndexOutOfBoundsException(index); 694 } 695 return value[index]; 696 } 697 698 /** 699 * Returns the character (Unicode code point) at the specified 700 * index. The index refers to {@code char} values 701 * (Unicode code units) and ranges from {@code 0} to 702 * {@link #length()}{@code - 1}. 703 * 704 * <p> If the {@code char} value specified at the given index 705 * is in the high-surrogate range, the following index is less 706 * than the length of this {@code String}, and the 707 * {@code char} value at the following index is in the 708 * low-surrogate range, then the supplementary code point 709 * corresponding to this surrogate pair is returned. Otherwise, 710 * the {@code char} value at the given index is returned. 711 * 712 * @param index the index to the {@code char} values 713 * @return the code point value of the character at the 714 * {@code index} 715 * @exception IndexOutOfBoundsException if the {@code index} 716 * argument is negative or not less than the length of this 717 * string. 718 * @since 1.5 719 */ 720 public int codePointAt(int index) { 721 if ((index < 0) || (index >= value.length)) { 722 throw new StringIndexOutOfBoundsException(index); 723 } 724 return Character.codePointAtImpl(value, index, value.length); 725 } 726 727 /** 728 * Returns the character (Unicode code point) before the specified 729 * index. The index refers to {@code char} values 730 * (Unicode code units) and ranges from {@code 1} to {@link 731 * CharSequence#length() length}. 732 * 733 * <p> If the {@code char} value at {@code (index - 1)} 734 * is in the low-surrogate range, {@code (index - 2)} is not 735 * negative, and the {@code char} value at {@code (index - 736 * 2)} is in the high-surrogate range, then the 737 * supplementary code point value of the surrogate pair is 738 * returned. If the {@code char} value at {@code index - 739 * 1} is an unpaired low-surrogate or a high-surrogate, the 740 * surrogate value is returned. 741 * 742 * @param index the index following the code point that should be returned 743 * @return the Unicode code point value before the given index. 744 * @exception IndexOutOfBoundsException if the {@code index} 745 * argument is less than 1 or greater than the length 746 * of this string. 747 * @since 1.5 748 */ 749 public int codePointBefore(int index) { 750 int i = index - 1; 751 if ((i < 0) || (i >= value.length)) { 752 throw new StringIndexOutOfBoundsException(index); 753 } 754 return Character.codePointBeforeImpl(value, index, 0); 755 } 756 757 /** 758 * Returns the number of Unicode code points in the specified text 759 * range of this {@code String}. The text range begins at the 760 * specified {@code beginIndex} and extends to the 761 * {@code char} at index {@code endIndex - 1}. Thus the 762 * length (in {@code char}s) of the text range is 763 * {@code endIndex-beginIndex}. Unpaired surrogates within 764 * the text range count as one code point each. 765 * 766 * @param beginIndex the index to the first {@code char} of 767 * the text range. 768 * @param endIndex the index after the last {@code char} of 769 * the text range. 770 * @return the number of Unicode code points in the specified text 771 * range 772 * @exception IndexOutOfBoundsException if the 773 * {@code beginIndex} is negative, or {@code endIndex} 774 * is larger than the length of this {@code String}, or 775 * {@code beginIndex} is larger than {@code endIndex}. 776 * @since 1.5 777 */ 778 public int codePointCount(int beginIndex, int endIndex) { 779 if (beginIndex < 0 || endIndex > value.length || beginIndex > endIndex) { 780 throw new IndexOutOfBoundsException(); 781 } 782 return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex); 783 } 784 785 /** 786 * Returns the index within this {@code String} that is 787 * offset from the given {@code index} by 788 * {@code codePointOffset} code points. Unpaired surrogates 789 * within the text range given by {@code index} and 790 * {@code codePointOffset} count as one code point each. 791 * 792 * @param index the index to be offset 793 * @param codePointOffset the offset in code points 794 * @return the index within this {@code String} 795 * @exception IndexOutOfBoundsException if {@code index} 796 * is negative or larger then the length of this 797 * {@code String}, or if {@code codePointOffset} is positive 798 * and the substring starting with {@code index} has fewer 799 * than {@code codePointOffset} code points, 800 * or if {@code codePointOffset} is negative and the substring 801 * before {@code index} has fewer than the absolute value 802 * of {@code codePointOffset} code points. 803 * @since 1.5 804 */ 805 public int offsetByCodePoints(int index, int codePointOffset) { 806 if (index < 0 || index > value.length) { 807 throw new IndexOutOfBoundsException(); 808 } 809 return Character.offsetByCodePointsImpl(value, 0, value.length, 810 index, codePointOffset); 811 } 812 813 /** 814 * Copy characters from this string into dst starting at dstBegin. 815 * This method doesn't perform any range checking. 816 */ 817 void getChars(char dst[], int dstBegin) { 818 System.arraycopy(value, 0, dst, dstBegin, value.length); 819 } 820 821 /** 822 * Copies characters from this string into the destination character 823 * array. 824 * <p> 825 * The first character to be copied is at index {@code srcBegin}; 826 * the last character to be copied is at index {@code srcEnd-1} 827 * (thus the total number of characters to be copied is 828 * {@code srcEnd-srcBegin}). The characters are copied into the 829 * subarray of {@code dst} starting at index {@code dstBegin} 830 * and ending at index: 831 * <blockquote><pre> 832 * dstbegin + (srcEnd-srcBegin) - 1 833 * </pre></blockquote> 834 * 835 * @param srcBegin index of the first character in the string 836 * to copy. 837 * @param srcEnd index after the last character in the string 838 * to copy. 839 * @param dst the destination array. 840 * @param dstBegin the start offset in the destination array. 841 * @exception IndexOutOfBoundsException If any of the following 842 * is true: 843 * <ul><li>{@code srcBegin} is negative. 844 * <li>{@code srcBegin} is greater than {@code srcEnd} 845 * <li>{@code srcEnd} is greater than the length of this 846 * string 847 * <li>{@code dstBegin} is negative 848 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than 849 * {@code dst.length}</ul> 850 */ 851 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) { 852 if (srcBegin < 0) { 853 throw new StringIndexOutOfBoundsException(srcBegin); 854 } 855 if (srcEnd > value.length) { 856 throw new StringIndexOutOfBoundsException(srcEnd); 857 } 858 if (srcBegin > srcEnd) { 859 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin); 860 } 861 System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); 862 } 863 864 /** 865 * Copies characters from this string into the destination byte array. Each 866 * byte receives the 8 low-order bits of the corresponding character. The 867 * eight high-order bits of each character are not copied and do not 868 * participate in the transfer in any way. 869 * 870 * <p> The first character to be copied is at index {@code srcBegin}; the 871 * last character to be copied is at index {@code srcEnd-1}. The total 872 * number of characters to be copied is {@code srcEnd-srcBegin}. The 873 * characters, converted to bytes, are copied into the subarray of {@code 874 * dst} starting at index {@code dstBegin} and ending at index: 875 * 876 * <blockquote><pre> 877 * dstbegin + (srcEnd-srcBegin) - 1 878 * </pre></blockquote> 879 * 880 * @deprecated This method does not properly convert characters into 881 * bytes. As of JDK 1.1, the preferred way to do this is via the 882 * {@link #getBytes()} method, which uses the platform's default charset. 883 * 884 * @param srcBegin 885 * Index of the first character in the string to copy 886 * 887 * @param srcEnd 888 * Index after the last character in the string to copy 889 * 890 * @param dst 891 * The destination array 892 * 893 * @param dstBegin 894 * The start offset in the destination array 895 * 896 * @throws IndexOutOfBoundsException 897 * If any of the following is true: 898 * <ul> 899 * <li> {@code srcBegin} is negative 900 * <li> {@code srcBegin} is greater than {@code srcEnd} 901 * <li> {@code srcEnd} is greater than the length of this String 902 * <li> {@code dstBegin} is negative 903 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code 904 * dst.length} 905 * </ul> 906 */ 907 @Deprecated 908 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) { 909 if (srcBegin < 0) { 910 throw new StringIndexOutOfBoundsException(srcBegin); 911 } 912 if (srcEnd > value.length) { 913 throw new StringIndexOutOfBoundsException(srcEnd); 914 } 915 if (srcBegin > srcEnd) { 916 throw new StringIndexOutOfBoundsException(srcEnd - srcBegin); 917 } 918 Objects.requireNonNull(dst); 919 920 int j = dstBegin; 921 int n = srcEnd; 922 int i = srcBegin; 923 char[] val = value; /* avoid getfield opcode */ 924 925 while (i < n) { 926 dst[j++] = (byte)val[i++]; 927 } 928 } 929 930 /** 931 * Encodes this {@code String} into a sequence of bytes using the named 932 * charset, storing the result into a new byte array. 933 * 934 * <p> The behavior of this method when this string cannot be encoded in 935 * the given charset is unspecified. The {@link 936 * java.nio.charset.CharsetEncoder} class should be used when more control 937 * over the encoding process is required. 938 * 939 * @param charsetName 940 * The name of a supported {@linkplain java.nio.charset.Charset 941 * charset} 942 * 943 * @return The resultant byte array 944 * 945 * @throws UnsupportedEncodingException 946 * If the named charset is not supported 947 * 948 * @since 1.1 949 */ 950 public byte[] getBytes(String charsetName) 951 throws UnsupportedEncodingException { 952 if (charsetName == null) throw new NullPointerException(); 953 return StringCoding.encode(charsetName, value, 0, value.length); 954 } 955 956 /** 957 * Encodes this {@code String} into a sequence of bytes using the given 958 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 959 * new byte array. 960 * 961 * <p> This method always replaces malformed-input and unmappable-character 962 * sequences with this charset's default replacement byte array. The 963 * {@link java.nio.charset.CharsetEncoder} class should be used when more 964 * control over the encoding process is required. 965 * 966 * @param charset 967 * The {@linkplain java.nio.charset.Charset} to be used to encode 968 * the {@code String} 969 * 970 * @return The resultant byte array 971 * 972 * @since 1.6 973 */ 974 public byte[] getBytes(Charset charset) { 975 Objects.requireNonNull(charset); 976 return StringCoding.encode(charset, value, 0, value.length); 977 } 978 979 /** 980 * <p>Encodes this {@code String} into a sequence of bytes using the given 981 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 982 * byte array that has been passed as an argument. 983 * 984 * @param destBuffer 985 * The destination array 986 * 987 * @param destOffset 988 * The start offset in the destination array 989 * 990 * @param charset 991 * The {@linkplain java.nio.charset.Charset} to be used to encode 992 * the {@code String} 993 * 994 * @return the number of bytes copied 995 * 996 * @since 1.9 997 */ 998 public int getBytes(final byte[] destBuffer, final int destOffset, final Charset charset) { 999 Objects.requireNonNull(destBuffer); 1000 Objects.requireNonNull(charset); 1001 return StringCoding.encode(charset, value, 0, value.length, destBuffer, destOffset); 1002 } 1003 1004 /** 1005 * <p>Encodes this {@code String} into a sequence of bytes using the given 1006 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 1007 * {@linkplain java.nio.ByteBuffer byte buffer} that has been passed as an argument. 1008 * 1009 * @param destBuffer 1010 * The destination {@linkplain java.nio.ByteBuffer} 1011 * 1012 * @param destOffset 1013 * The start offset in the destination array 1014 * 1015 * @param charset 1016 * The {@linkplain java.nio.charset.Charset} to be used to encode 1017 * the {@code String} 1018 * 1019 * @return the number of bytes copied 1020 * 1021 * @since 1.9 1022 */ 1023 public int getBytes(final ByteBuffer destBuffer, final int destOffset, final Charset charset) { 1024 Objects.requireNonNull(destBuffer); 1025 Objects.requireNonNull(charset); 1026 return StringCoding.encode(charset, value, 0, value.length, destBuffer, destOffset); 1027 } 1028 1029 /** 1030 * Encodes this {@code String} into a sequence of bytes using the 1031 * platform's default charset, storing the result into a new byte array. 1032 * 1033 * <p> The behavior of this method when this string cannot be encoded in 1034 * the default charset is unspecified. The {@link 1035 * java.nio.charset.CharsetEncoder} class should be used when more control 1036 * over the encoding process is required. 1037 * 1038 * @return The resultant byte array 1039 * 1040 * @since 1.1 1041 */ 1042 public byte[] getBytes() { 1043 return StringCoding.encode(value, 0, value.length); 1044 } 1045 1046 /** 1047 * Compares this string to the specified object. The result is {@code 1048 * true} if and only if the argument is not {@code null} and is a {@code 1049 * String} object that represents the same sequence of characters as this 1050 * object. 1051 * 1052 * @param anObject 1053 * The object to compare this {@code String} against 1054 * 1055 * @return {@code true} if the given object represents a {@code String} 1056 * equivalent to this string, {@code false} otherwise 1057 * 1058 * @see #compareTo(String) 1059 * @see #equalsIgnoreCase(String) 1060 */ 1061 public boolean equals(Object anObject) { 1062 if (this == anObject) { 1063 return true; 1064 } 1065 if (anObject instanceof String) { 1066 String anotherString = (String)anObject; 1067 int n = value.length; 1068 if (n == anotherString.value.length) { 1069 char v1[] = value; 1070 char v2[] = anotherString.value; 1071 int i = 0; 1072 while (n-- != 0) { 1073 if (v1[i] != v2[i]) 1074 return false; 1075 i++; 1076 } 1077 return true; 1078 } 1079 } 1080 return false; 1081 } 1082 1083 /** 1084 * Compares this string to the specified {@code StringBuffer}. The result 1085 * is {@code true} if and only if this {@code String} represents the same 1086 * sequence of characters as the specified {@code StringBuffer}. This method 1087 * synchronizes on the {@code StringBuffer}. 1088 * 1089 * @param sb 1090 * The {@code StringBuffer} to compare this {@code String} against 1091 * 1092 * @return {@code true} if this {@code String} represents the same 1093 * sequence of characters as the specified {@code StringBuffer}, 1094 * {@code false} otherwise 1095 * 1096 * @since 1.4 1097 */ 1098 public boolean contentEquals(StringBuffer sb) { 1099 return contentEquals((CharSequence)sb); 1100 } 1101 1102 private boolean nonSyncContentEquals(AbstractStringBuilder sb) { 1103 char v1[] = value; 1104 char v2[] = sb.getValue(); 1105 int n = v1.length; 1106 if (n != sb.length()) { 1107 return false; 1108 } 1109 for (int i = 0; i < n; i++) { 1110 if (v1[i] != v2[i]) { 1111 return false; 1112 } 1113 } 1114 return true; 1115 } 1116 1117 /** 1118 * Compares this string to the specified {@code CharSequence}. The 1119 * result is {@code true} if and only if this {@code String} represents the 1120 * same sequence of char values as the specified sequence. Note that if the 1121 * {@code CharSequence} is a {@code StringBuffer} then the method 1122 * synchronizes on it. 1123 * 1124 * @param cs 1125 * The sequence to compare this {@code String} against 1126 * 1127 * @return {@code true} if this {@code String} represents the same 1128 * sequence of char values as the specified sequence, {@code 1129 * false} otherwise 1130 * 1131 * @since 1.5 1132 */ 1133 public boolean contentEquals(CharSequence cs) { 1134 // Argument is a StringBuffer, StringBuilder 1135 if (cs instanceof AbstractStringBuilder) { 1136 if (cs instanceof StringBuffer) { 1137 synchronized(cs) { 1138 return nonSyncContentEquals((AbstractStringBuilder)cs); 1139 } 1140 } else { 1141 return nonSyncContentEquals((AbstractStringBuilder)cs); 1142 } 1143 } 1144 // Argument is a String 1145 if (cs.equals(this)) 1146 return true; 1147 // Argument is a generic CharSequence 1148 char v1[] = value; 1149 int n = v1.length; 1150 if (n != cs.length()) { 1151 return false; 1152 } 1153 for (int i = 0; i < n; i++) { 1154 if (v1[i] != cs.charAt(i)) { 1155 return false; 1156 } 1157 } 1158 return true; 1159 } 1160 1161 /** 1162 * Compares this {@code String} to another {@code String}, ignoring case 1163 * considerations. Two strings are considered equal ignoring case if they 1164 * are of the same length and corresponding characters in the two strings 1165 * are equal ignoring case. 1166 * 1167 * <p> Two characters {@code c1} and {@code c2} are considered the same 1168 * ignoring case if at least one of the following is true: 1169 * <ul> 1170 * <li> The two characters are the same (as compared by the 1171 * {@code ==} operator) 1172 * <li> Applying the method {@link 1173 * java.lang.Character#toUpperCase(char)} to each character 1174 * produces the same result 1175 * <li> Applying the method {@link 1176 * java.lang.Character#toLowerCase(char)} to each character 1177 * produces the same result 1178 * </ul> 1179 * 1180 * @param anotherString 1181 * The {@code String} to compare this {@code String} against 1182 * 1183 * @return {@code true} if the argument is not {@code null} and it 1184 * represents an equivalent {@code String} ignoring case; {@code 1185 * false} otherwise 1186 * 1187 * @see #equals(Object) 1188 */ 1189 public boolean equalsIgnoreCase(String anotherString) { 1190 return (this == anotherString) ? true 1191 : (anotherString != null) 1192 && (anotherString.value.length == value.length) 1193 && regionMatches(true, 0, anotherString, 0, value.length); 1194 } 1195 1196 /** 1197 * Compares two strings lexicographically. 1198 * The comparison is based on the Unicode value of each character in 1199 * the strings. The character sequence represented by this 1200 * {@code String} object is compared lexicographically to the 1201 * character sequence represented by the argument string. The result is 1202 * a negative integer if this {@code String} object 1203 * lexicographically precedes the argument string. The result is a 1204 * positive integer if this {@code String} object lexicographically 1205 * follows the argument string. The result is zero if the strings 1206 * are equal; {@code compareTo} returns {@code 0} exactly when 1207 * the {@link #equals(Object)} method would return {@code true}. 1208 * <p> 1209 * This is the definition of lexicographic ordering. If two strings are 1210 * different, then either they have different characters at some index 1211 * that is a valid index for both strings, or their lengths are different, 1212 * or both. If they have different characters at one or more index 1213 * positions, let <i>k</i> be the smallest such index; then the string 1214 * whose character at position <i>k</i> has the smaller value, as 1215 * determined by using the < operator, lexicographically precedes the 1216 * other string. In this case, {@code compareTo} returns the 1217 * difference of the two character values at position {@code k} in 1218 * the two string -- that is, the value: 1219 * <blockquote><pre> 1220 * this.charAt(k)-anotherString.charAt(k) 1221 * </pre></blockquote> 1222 * If there is no index position at which they differ, then the shorter 1223 * string lexicographically precedes the longer string. In this case, 1224 * {@code compareTo} returns the difference of the lengths of the 1225 * strings -- that is, the value: 1226 * <blockquote><pre> 1227 * this.length()-anotherString.length() 1228 * </pre></blockquote> 1229 * 1230 * @param anotherString the {@code String} to be compared. 1231 * @return the value {@code 0} if the argument string is equal to 1232 * this string; a value less than {@code 0} if this string 1233 * is lexicographically less than the string argument; and a 1234 * value greater than {@code 0} if this string is 1235 * lexicographically greater than the string argument. 1236 */ 1237 public int compareTo(String anotherString) { 1238 int len1 = value.length; 1239 int len2 = anotherString.value.length; 1240 int lim = Math.min(len1, len2); 1241 char v1[] = value; 1242 char v2[] = anotherString.value; 1243 1244 int k = 0; 1245 while (k < lim) { 1246 char c1 = v1[k]; 1247 char c2 = v2[k]; 1248 if (c1 != c2) { 1249 return c1 - c2; 1250 } 1251 k++; 1252 } 1253 return len1 - len2; 1254 } 1255 1256 /** 1257 * A Comparator that orders {@code String} objects as by 1258 * {@code compareToIgnoreCase}. This comparator is serializable. 1259 * <p> 1260 * Note that this Comparator does <em>not</em> take locale into account, 1261 * and will result in an unsatisfactory ordering for certain locales. 1262 * The java.text package provides <em>Collators</em> to allow 1263 * locale-sensitive ordering. 1264 * 1265 * @see java.text.Collator#compare(String, String) 1266 * @since 1.2 1267 */ 1268 public static final Comparator<String> CASE_INSENSITIVE_ORDER 1269 = new CaseInsensitiveComparator(); 1270 private static class CaseInsensitiveComparator 1271 implements Comparator<String>, java.io.Serializable { 1272 // use serialVersionUID from JDK 1.2.2 for interoperability 1273 private static final long serialVersionUID = 8575799808933029326L; 1274 1275 public int compare(String s1, String s2) { 1276 int n1 = s1.length(); 1277 int n2 = s2.length(); 1278 int min = Math.min(n1, n2); 1279 for (int i = 0; i < min; i++) { 1280 char c1 = s1.charAt(i); 1281 char c2 = s2.charAt(i); 1282 if (c1 != c2) { 1283 c1 = Character.toUpperCase(c1); 1284 c2 = Character.toUpperCase(c2); 1285 if (c1 != c2) { 1286 c1 = Character.toLowerCase(c1); 1287 c2 = Character.toLowerCase(c2); 1288 if (c1 != c2) { 1289 // No overflow because of numeric promotion 1290 return c1 - c2; 1291 } 1292 } 1293 } 1294 } 1295 return n1 - n2; 1296 } 1297 1298 /** Replaces the de-serialized object. */ 1299 private Object readResolve() { return CASE_INSENSITIVE_ORDER; } 1300 } 1301 1302 /** 1303 * Compares two strings lexicographically, ignoring case 1304 * differences. This method returns an integer whose sign is that of 1305 * calling {@code compareTo} with normalized versions of the strings 1306 * where case differences have been eliminated by calling 1307 * {@code Character.toLowerCase(Character.toUpperCase(character))} on 1308 * each character. 1309 * <p> 1310 * Note that this method does <em>not</em> take locale into account, 1311 * and will result in an unsatisfactory ordering for certain locales. 1312 * The java.text package provides <em>collators</em> to allow 1313 * locale-sensitive ordering. 1314 * 1315 * @param str the {@code String} to be compared. 1316 * @return a negative integer, zero, or a positive integer as the 1317 * specified String is greater than, equal to, or less 1318 * than this String, ignoring case considerations. 1319 * @see java.text.Collator#compare(String, String) 1320 * @since 1.2 1321 */ 1322 public int compareToIgnoreCase(String str) { 1323 return CASE_INSENSITIVE_ORDER.compare(this, str); 1324 } 1325 1326 /** 1327 * Tests if two string regions are equal. 1328 * <p> 1329 * A substring of this {@code String} object is compared to a substring 1330 * of the argument other. The result is true if these substrings 1331 * represent identical character sequences. The substring of this 1332 * {@code String} object to be compared begins at index {@code toffset} 1333 * and has length {@code len}. The substring of other to be compared 1334 * begins at index {@code ooffset} and has length {@code len}. The 1335 * result is {@code false} if and only if at least one of the following 1336 * is true: 1337 * <ul><li>{@code toffset} is negative. 1338 * <li>{@code ooffset} is negative. 1339 * <li>{@code toffset+len} is greater than the length of this 1340 * {@code String} object. 1341 * <li>{@code ooffset+len} is greater than the length of the other 1342 * argument. 1343 * <li>There is some nonnegative integer <i>k</i> less than {@code len} 1344 * such that: 1345 * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + } 1346 * <i>k</i>{@code )} 1347 * </ul> 1348 * 1349 * @param toffset the starting offset of the subregion in this string. 1350 * @param other the string argument. 1351 * @param ooffset the starting offset of the subregion in the string 1352 * argument. 1353 * @param len the number of characters to compare. 1354 * @return {@code true} if the specified subregion of this string 1355 * exactly matches the specified subregion of the string argument; 1356 * {@code false} otherwise. 1357 */ 1358 public boolean regionMatches(int toffset, String other, int ooffset, 1359 int len) { 1360 char ta[] = value; 1361 int to = toffset; 1362 char pa[] = other.value; 1363 int po = ooffset; 1364 // Note: toffset, ooffset, or len might be near -1>>>1. 1365 if ((ooffset < 0) || (toffset < 0) 1366 || (toffset > (long)value.length - len) 1367 || (ooffset > (long)other.value.length - len)) { 1368 return false; 1369 } 1370 while (len-- > 0) { 1371 if (ta[to++] != pa[po++]) { 1372 return false; 1373 } 1374 } 1375 return true; 1376 } 1377 1378 /** 1379 * Tests if two string regions are equal. 1380 * <p> 1381 * A substring of this {@code String} object is compared to a substring 1382 * of the argument {@code other}. The result is {@code true} if these 1383 * substrings represent character sequences that are the same, ignoring 1384 * case if and only if {@code ignoreCase} is true. The substring of 1385 * this {@code String} object to be compared begins at index 1386 * {@code toffset} and has length {@code len}. The substring of 1387 * {@code other} to be compared begins at index {@code ooffset} and 1388 * has length {@code len}. The result is {@code false} if and only if 1389 * at least one of the following is true: 1390 * <ul><li>{@code toffset} is negative. 1391 * <li>{@code ooffset} is negative. 1392 * <li>{@code toffset+len} is greater than the length of this 1393 * {@code String} object. 1394 * <li>{@code ooffset+len} is greater than the length of the other 1395 * argument. 1396 * <li>{@code ignoreCase} is {@code false} and there is some nonnegative 1397 * integer <i>k</i> less than {@code len} such that: 1398 * <blockquote><pre> 1399 * this.charAt(toffset+k) != other.charAt(ooffset+k) 1400 * </pre></blockquote> 1401 * <li>{@code ignoreCase} is {@code true} and there is some nonnegative 1402 * integer <i>k</i> less than {@code len} such that: 1403 * <blockquote><pre> 1404 * Character.toLowerCase(this.charAt(toffset+k)) != 1405 Character.toLowerCase(other.charAt(ooffset+k)) 1406 * </pre></blockquote> 1407 * and: 1408 * <blockquote><pre> 1409 * Character.toUpperCase(this.charAt(toffset+k)) != 1410 * Character.toUpperCase(other.charAt(ooffset+k)) 1411 * </pre></blockquote> 1412 * </ul> 1413 * 1414 * @param ignoreCase if {@code true}, ignore case when comparing 1415 * characters. 1416 * @param toffset the starting offset of the subregion in this 1417 * string. 1418 * @param other the string argument. 1419 * @param ooffset the starting offset of the subregion in the string 1420 * argument. 1421 * @param len the number of characters to compare. 1422 * @return {@code true} if the specified subregion of this string 1423 * matches the specified subregion of the string argument; 1424 * {@code false} otherwise. Whether the matching is exact 1425 * or case insensitive depends on the {@code ignoreCase} 1426 * argument. 1427 */ 1428 public boolean regionMatches(boolean ignoreCase, int toffset, 1429 String other, int ooffset, int len) { 1430 char ta[] = value; 1431 int to = toffset; 1432 char pa[] = other.value; 1433 int po = ooffset; 1434 // Note: toffset, ooffset, or len might be near -1>>>1. 1435 if ((ooffset < 0) || (toffset < 0) 1436 || (toffset > (long)value.length - len) 1437 || (ooffset > (long)other.value.length - len)) { 1438 return false; 1439 } 1440 while (len-- > 0) { 1441 char c1 = ta[to++]; 1442 char c2 = pa[po++]; 1443 if (c1 == c2) { 1444 continue; 1445 } 1446 if (ignoreCase) { 1447 // If characters don't match but case may be ignored, 1448 // try converting both characters to uppercase. 1449 // If the results match, then the comparison scan should 1450 // continue. 1451 char u1 = Character.toUpperCase(c1); 1452 char u2 = Character.toUpperCase(c2); 1453 if (u1 == u2) { 1454 continue; 1455 } 1456 // Unfortunately, conversion to uppercase does not work properly 1457 // for the Georgian alphabet, which has strange rules about case 1458 // conversion. So we need to make one last check before 1459 // exiting. 1460 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 1461 continue; 1462 } 1463 } 1464 return false; 1465 } 1466 return true; 1467 } 1468 1469 /** 1470 * Tests if the substring of this string beginning at the 1471 * specified index starts with the specified prefix. 1472 * 1473 * @param prefix the prefix. 1474 * @param toffset where to begin looking in this string. 1475 * @return {@code true} if the character sequence represented by the 1476 * argument is a prefix of the substring of this object starting 1477 * at index {@code toffset}; {@code false} otherwise. 1478 * The result is {@code false} if {@code toffset} is 1479 * negative or greater than the length of this 1480 * {@code String} object; otherwise the result is the same 1481 * as the result of the expression 1482 * <pre> 1483 * this.substring(toffset).startsWith(prefix) 1484 * </pre> 1485 */ 1486 public boolean startsWith(String prefix, int toffset) { 1487 char ta[] = value; 1488 int to = toffset; 1489 char pa[] = prefix.value; 1490 int po = 0; 1491 int pc = prefix.value.length; 1492 // Note: toffset might be near -1>>>1. 1493 if ((toffset < 0) || (toffset > value.length - pc)) { 1494 return false; 1495 } 1496 while (--pc >= 0) { 1497 if (ta[to++] != pa[po++]) { 1498 return false; 1499 } 1500 } 1501 return true; 1502 } 1503 1504 /** 1505 * Tests if this string starts with the specified prefix. 1506 * 1507 * @param prefix the prefix. 1508 * @return {@code true} if the character sequence represented by the 1509 * argument is a prefix of the character sequence represented by 1510 * this string; {@code false} otherwise. 1511 * Note also that {@code true} will be returned if the 1512 * argument is an empty string or is equal to this 1513 * {@code String} object as determined by the 1514 * {@link #equals(Object)} method. 1515 * @since 1.0 1516 */ 1517 public boolean startsWith(String prefix) { 1518 return startsWith(prefix, 0); 1519 } 1520 1521 /** 1522 * Tests if this string ends with the specified suffix. 1523 * 1524 * @param suffix the suffix. 1525 * @return {@code true} if the character sequence represented by the 1526 * argument is a suffix of the character sequence represented by 1527 * this object; {@code false} otherwise. Note that the 1528 * result will be {@code true} if the argument is the 1529 * empty string or is equal to this {@code String} object 1530 * as determined by the {@link #equals(Object)} method. 1531 */ 1532 public boolean endsWith(String suffix) { 1533 return startsWith(suffix, value.length - suffix.value.length); 1534 } 1535 1536 /** 1537 * Returns a hash code for this string. The hash code for a 1538 * {@code String} object is computed as 1539 * <blockquote><pre> 1540 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] 1541 * </pre></blockquote> 1542 * using {@code int} arithmetic, where {@code s[i]} is the 1543 * <i>i</i>th character of the string, {@code n} is the length of 1544 * the string, and {@code ^} indicates exponentiation. 1545 * (The hash value of the empty string is zero.) 1546 * 1547 * @return a hash code value for this object. 1548 */ 1549 public int hashCode() { 1550 int h = hash; 1551 if (h == 0 && value.length > 0) { 1552 char val[] = value; 1553 1554 for (int i = 0; i < value.length; i++) { 1555 h = 31 * h + val[i]; 1556 } 1557 hash = h; 1558 } 1559 return h; 1560 } 1561 1562 /** 1563 * Returns the index within this string of the first occurrence of 1564 * the specified character. If a character with value 1565 * {@code ch} occurs in the character sequence represented by 1566 * this {@code String} object, then the index (in Unicode 1567 * code units) of the first such occurrence is returned. For 1568 * values of {@code ch} in the range from 0 to 0xFFFF 1569 * (inclusive), this is the smallest value <i>k</i> such that: 1570 * <blockquote><pre> 1571 * this.charAt(<i>k</i>) == ch 1572 * </pre></blockquote> 1573 * is true. For other values of {@code ch}, it is the 1574 * smallest value <i>k</i> such that: 1575 * <blockquote><pre> 1576 * this.codePointAt(<i>k</i>) == ch 1577 * </pre></blockquote> 1578 * is true. In either case, if no such character occurs in this 1579 * string, then {@code -1} is returned. 1580 * 1581 * @param ch a character (Unicode code point). 1582 * @return the index of the first occurrence of the character in the 1583 * character sequence represented by this object, or 1584 * {@code -1} if the character does not occur. 1585 */ 1586 public int indexOf(int ch) { 1587 return indexOf(ch, 0); 1588 } 1589 1590 /** 1591 * Returns the index within this string of the first occurrence of the 1592 * specified character, starting the search at the specified index. 1593 * <p> 1594 * If a character with value {@code ch} occurs in the 1595 * character sequence represented by this {@code String} 1596 * object at an index no smaller than {@code fromIndex}, then 1597 * the index of the first such occurrence is returned. For values 1598 * of {@code ch} in the range from 0 to 0xFFFF (inclusive), 1599 * this is the smallest value <i>k</i> such that: 1600 * <blockquote><pre> 1601 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1602 * </pre></blockquote> 1603 * is true. For other values of {@code ch}, it is the 1604 * smallest value <i>k</i> such that: 1605 * <blockquote><pre> 1606 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1607 * </pre></blockquote> 1608 * is true. In either case, if no such character occurs in this 1609 * string at or after position {@code fromIndex}, then 1610 * {@code -1} is returned. 1611 * 1612 * <p> 1613 * There is no restriction on the value of {@code fromIndex}. If it 1614 * is negative, it has the same effect as if it were zero: this entire 1615 * string may be searched. If it is greater than the length of this 1616 * string, it has the same effect as if it were equal to the length of 1617 * this string: {@code -1} is returned. 1618 * 1619 * <p>All indices are specified in {@code char} values 1620 * (Unicode code units). 1621 * 1622 * @param ch a character (Unicode code point). 1623 * @param fromIndex the index to start the search from. 1624 * @return the index of the first occurrence of the character in the 1625 * character sequence represented by this object that is greater 1626 * than or equal to {@code fromIndex}, or {@code -1} 1627 * if the character does not occur. 1628 */ 1629 public int indexOf(int ch, int fromIndex) { 1630 final int max = value.length; 1631 if (fromIndex < 0) { 1632 fromIndex = 0; 1633 } else if (fromIndex >= max) { 1634 // Note: fromIndex might be near -1>>>1. 1635 return -1; 1636 } 1637 1638 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 1639 // handle most cases here (ch is a BMP code point or a 1640 // negative value (invalid code point)) 1641 final char[] value = this.value; 1642 for (int i = fromIndex; i < max; i++) { 1643 if (value[i] == ch) { 1644 return i; 1645 } 1646 } 1647 return -1; 1648 } else { 1649 return indexOfSupplementary(ch, fromIndex); 1650 } 1651 } 1652 1653 /** 1654 * Handles (rare) calls of indexOf with a supplementary character. 1655 */ 1656 private int indexOfSupplementary(int ch, int fromIndex) { 1657 if (Character.isValidCodePoint(ch)) { 1658 final char[] value = this.value; 1659 final char hi = Character.highSurrogate(ch); 1660 final char lo = Character.lowSurrogate(ch); 1661 final int max = value.length - 1; 1662 for (int i = fromIndex; i < max; i++) { 1663 if (value[i] == hi && value[i + 1] == lo) { 1664 return i; 1665 } 1666 } 1667 } 1668 return -1; 1669 } 1670 1671 /** 1672 * Returns the index within this string of the last occurrence of 1673 * the specified character. For values of {@code ch} in the 1674 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code 1675 * units) returned is the largest value <i>k</i> such that: 1676 * <blockquote><pre> 1677 * this.charAt(<i>k</i>) == ch 1678 * </pre></blockquote> 1679 * is true. For other values of {@code ch}, it is the 1680 * largest value <i>k</i> such that: 1681 * <blockquote><pre> 1682 * this.codePointAt(<i>k</i>) == ch 1683 * </pre></blockquote> 1684 * is true. In either case, if no such character occurs in this 1685 * string, then {@code -1} is returned. The 1686 * {@code String} is searched backwards starting at the last 1687 * character. 1688 * 1689 * @param ch a character (Unicode code point). 1690 * @return the index of the last occurrence of the character in the 1691 * character sequence represented by this object, or 1692 * {@code -1} if the character does not occur. 1693 */ 1694 public int lastIndexOf(int ch) { 1695 return lastIndexOf(ch, value.length - 1); 1696 } 1697 1698 /** 1699 * Returns the index within this string of the last occurrence of 1700 * the specified character, searching backward starting at the 1701 * specified index. For values of {@code ch} in the range 1702 * from 0 to 0xFFFF (inclusive), the index returned is the largest 1703 * value <i>k</i> such that: 1704 * <blockquote><pre> 1705 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1706 * </pre></blockquote> 1707 * is true. For other values of {@code ch}, it is the 1708 * largest value <i>k</i> such that: 1709 * <blockquote><pre> 1710 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1711 * </pre></blockquote> 1712 * is true. In either case, if no such character occurs in this 1713 * string at or before position {@code fromIndex}, then 1714 * {@code -1} is returned. 1715 * 1716 * <p>All indices are specified in {@code char} values 1717 * (Unicode code units). 1718 * 1719 * @param ch a character (Unicode code point). 1720 * @param fromIndex the index to start the search from. There is no 1721 * restriction on the value of {@code fromIndex}. If it is 1722 * greater than or equal to the length of this string, it has 1723 * the same effect as if it were equal to one less than the 1724 * length of this string: this entire string may be searched. 1725 * If it is negative, it has the same effect as if it were -1: 1726 * -1 is returned. 1727 * @return the index of the last occurrence of the character in the 1728 * character sequence represented by this object that is less 1729 * than or equal to {@code fromIndex}, or {@code -1} 1730 * if the character does not occur before that point. 1731 */ 1732 public int lastIndexOf(int ch, int fromIndex) { 1733 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 1734 // handle most cases here (ch is a BMP code point or a 1735 // negative value (invalid code point)) 1736 final char[] value = this.value; 1737 int i = Math.min(fromIndex, value.length - 1); 1738 for (; i >= 0; i--) { 1739 if (value[i] == ch) { 1740 return i; 1741 } 1742 } 1743 return -1; 1744 } else { 1745 return lastIndexOfSupplementary(ch, fromIndex); 1746 } 1747 } 1748 1749 /** 1750 * Handles (rare) calls of lastIndexOf with a supplementary character. 1751 */ 1752 private int lastIndexOfSupplementary(int ch, int fromIndex) { 1753 if (Character.isValidCodePoint(ch)) { 1754 final char[] value = this.value; 1755 char hi = Character.highSurrogate(ch); 1756 char lo = Character.lowSurrogate(ch); 1757 int i = Math.min(fromIndex, value.length - 2); 1758 for (; i >= 0; i--) { 1759 if (value[i] == hi && value[i + 1] == lo) { 1760 return i; 1761 } 1762 } 1763 } 1764 return -1; 1765 } 1766 1767 /** 1768 * Returns the index within this string of the first occurrence of the 1769 * specified substring. 1770 * 1771 * <p>The returned index is the smallest value {@code k} for which: 1772 * <pre>{@code 1773 * this.startsWith(str, k) 1774 * }</pre> 1775 * If no such value of {@code k} exists, then {@code -1} is returned. 1776 * 1777 * @param str the substring to search for. 1778 * @return the index of the first occurrence of the specified substring, 1779 * or {@code -1} if there is no such occurrence. 1780 */ 1781 public int indexOf(String str) { 1782 return indexOf(str, 0); 1783 } 1784 1785 /** 1786 * Returns the index within this string of the first occurrence of the 1787 * specified substring, starting at the specified index. 1788 * 1789 * <p>The returned index is the smallest value {@code k} for which: 1790 * <pre>{@code 1791 * k >= Math.min(fromIndex, this.length()) && 1792 * this.startsWith(str, k) 1793 * }</pre> 1794 * If no such value of {@code k} exists, then {@code -1} is returned. 1795 * 1796 * @param str the substring to search for. 1797 * @param fromIndex the index from which to start the search. 1798 * @return the index of the first occurrence of the specified substring, 1799 * starting at the specified index, 1800 * or {@code -1} if there is no such occurrence. 1801 */ 1802 public int indexOf(String str, int fromIndex) { 1803 return indexOf(value, 0, value.length, 1804 str.value, 0, str.value.length, fromIndex); 1805 } 1806 1807 /** 1808 * Code shared by String and AbstractStringBuilder to do searches. The 1809 * source is the character array being searched, and the target 1810 * is the string being searched for. 1811 * 1812 * @param source the characters being searched. 1813 * @param sourceOffset offset of the source string. 1814 * @param sourceCount count of the source string. 1815 * @param target the characters being searched for. 1816 * @param fromIndex the index to begin searching from. 1817 */ 1818 static int indexOf(char[] source, int sourceOffset, int sourceCount, 1819 String target, int fromIndex) { 1820 return indexOf(source, sourceOffset, sourceCount, 1821 target.value, 0, target.value.length, 1822 fromIndex); 1823 } 1824 1825 /** 1826 * Code shared by String and StringBuffer to do searches. The 1827 * source is the character array being searched, and the target 1828 * is the string being searched for. 1829 * 1830 * @param source the characters being searched. 1831 * @param sourceOffset offset of the source string. 1832 * @param sourceCount count of the source string. 1833 * @param target the characters being searched for. 1834 * @param targetOffset offset of the target string. 1835 * @param targetCount count of the target string. 1836 * @param fromIndex the index to begin searching from. 1837 */ 1838 static int indexOf(char[] source, int sourceOffset, int sourceCount, 1839 char[] target, int targetOffset, int targetCount, 1840 int fromIndex) { 1841 if (fromIndex >= sourceCount) { 1842 return (targetCount == 0 ? sourceCount : -1); 1843 } 1844 if (fromIndex < 0) { 1845 fromIndex = 0; 1846 } 1847 if (targetCount == 0) { 1848 return fromIndex; 1849 } 1850 1851 char first = target[targetOffset]; 1852 int max = sourceOffset + (sourceCount - targetCount); 1853 1854 for (int i = sourceOffset + fromIndex; i <= max; i++) { 1855 /* Look for first character. */ 1856 if (source[i] != first) { 1857 while (++i <= max && source[i] != first); 1858 } 1859 1860 /* Found first character, now look at the rest of v2 */ 1861 if (i <= max) { 1862 int j = i + 1; 1863 int end = j + targetCount - 1; 1864 for (int k = targetOffset + 1; j < end && source[j] 1865 == target[k]; j++, k++); 1866 1867 if (j == end) { 1868 /* Found whole string. */ 1869 return i - sourceOffset; 1870 } 1871 } 1872 } 1873 return -1; 1874 } 1875 1876 /** 1877 * Returns the index within this string of the last occurrence of the 1878 * specified substring. The last occurrence of the empty string "" 1879 * is considered to occur at the index value {@code this.length()}. 1880 * 1881 * <p>The returned index is the largest value {@code k} for which: 1882 * <pre>{@code 1883 * this.startsWith(str, k) 1884 * }</pre> 1885 * If no such value of {@code k} exists, then {@code -1} is returned. 1886 * 1887 * @param str the substring to search for. 1888 * @return the index of the last occurrence of the specified substring, 1889 * or {@code -1} if there is no such occurrence. 1890 */ 1891 public int lastIndexOf(String str) { 1892 return lastIndexOf(str, value.length); 1893 } 1894 1895 /** 1896 * Returns the index within this string of the last occurrence of the 1897 * specified substring, searching backward starting at the specified index. 1898 * 1899 * <p>The returned index is the largest value {@code k} for which: 1900 * <pre>{@code 1901 * k <= Math.min(fromIndex, this.length()) && 1902 * this.startsWith(str, k) 1903 * }</pre> 1904 * If no such value of {@code k} exists, then {@code -1} is returned. 1905 * 1906 * @param str the substring to search for. 1907 * @param fromIndex the index to start the search from. 1908 * @return the index of the last occurrence of the specified substring, 1909 * searching backward from the specified index, 1910 * or {@code -1} if there is no such occurrence. 1911 */ 1912 public int lastIndexOf(String str, int fromIndex) { 1913 return lastIndexOf(value, 0, value.length, 1914 str.value, 0, str.value.length, fromIndex); 1915 } 1916 1917 /** 1918 * Code shared by String and AbstractStringBuilder to do searches. The 1919 * source is the character array being searched, and the target 1920 * is the string being searched for. 1921 * 1922 * @param source the characters being searched. 1923 * @param sourceOffset offset of the source string. 1924 * @param sourceCount count of the source string. 1925 * @param target the characters being searched for. 1926 * @param fromIndex the index to begin searching from. 1927 */ 1928 static int lastIndexOf(char[] source, int sourceOffset, int sourceCount, 1929 String target, int fromIndex) { 1930 return lastIndexOf(source, sourceOffset, sourceCount, 1931 target.value, 0, target.value.length, 1932 fromIndex); 1933 } 1934 1935 /** 1936 * Code shared by String and StringBuffer to do searches. The 1937 * source is the character array being searched, and the target 1938 * is the string being searched for. 1939 * 1940 * @param source the characters being searched. 1941 * @param sourceOffset offset of the source string. 1942 * @param sourceCount count of the source string. 1943 * @param target the characters being searched for. 1944 * @param targetOffset offset of the target string. 1945 * @param targetCount count of the target string. 1946 * @param fromIndex the index to begin searching from. 1947 */ 1948 static int lastIndexOf(char[] source, int sourceOffset, int sourceCount, 1949 char[] target, int targetOffset, int targetCount, 1950 int fromIndex) { 1951 /* 1952 * Check arguments; return immediately where possible. For 1953 * consistency, don't check for null str. 1954 */ 1955 int rightIndex = sourceCount - targetCount; 1956 if (fromIndex < 0) { 1957 return -1; 1958 } 1959 if (fromIndex > rightIndex) { 1960 fromIndex = rightIndex; 1961 } 1962 /* Empty string always matches. */ 1963 if (targetCount == 0) { 1964 return fromIndex; 1965 } 1966 1967 int strLastIndex = targetOffset + targetCount - 1; 1968 char strLastChar = target[strLastIndex]; 1969 int min = sourceOffset + targetCount - 1; 1970 int i = min + fromIndex; 1971 1972 startSearchForLastChar: 1973 while (true) { 1974 while (i >= min && source[i] != strLastChar) { 1975 i--; 1976 } 1977 if (i < min) { 1978 return -1; 1979 } 1980 int j = i - 1; 1981 int start = j - (targetCount - 1); 1982 int k = strLastIndex - 1; 1983 1984 while (j > start) { 1985 if (source[j--] != target[k--]) { 1986 i--; 1987 continue startSearchForLastChar; 1988 } 1989 } 1990 return start - sourceOffset + 1; 1991 } 1992 } 1993 1994 /** 1995 * Returns a string that is a substring of this string. The 1996 * substring begins with the character at the specified index and 1997 * extends to the end of this string. <p> 1998 * Examples: 1999 * <blockquote><pre> 2000 * "unhappy".substring(2) returns "happy" 2001 * "Harbison".substring(3) returns "bison" 2002 * "emptiness".substring(9) returns "" (an empty string) 2003 * </pre></blockquote> 2004 * 2005 * @param beginIndex the beginning index, inclusive. 2006 * @return the specified substring. 2007 * @exception IndexOutOfBoundsException if 2008 * {@code beginIndex} is negative or larger than the 2009 * length of this {@code String} object. 2010 */ 2011 public String substring(int beginIndex) { 2012 if (beginIndex < 0) { 2013 throw new StringIndexOutOfBoundsException(beginIndex); 2014 } 2015 int subLen = value.length - beginIndex; 2016 if (subLen < 0) { 2017 throw new StringIndexOutOfBoundsException(subLen); 2018 } 2019 return (beginIndex == 0) ? this : new String(value, beginIndex, subLen); 2020 } 2021 2022 /** 2023 * Returns a string that is a substring of this string. The 2024 * substring begins at the specified {@code beginIndex} and 2025 * extends to the character at index {@code endIndex - 1}. 2026 * Thus the length of the substring is {@code endIndex-beginIndex}. 2027 * <p> 2028 * Examples: 2029 * <blockquote><pre> 2030 * "hamburger".substring(4, 8) returns "urge" 2031 * "smiles".substring(1, 5) returns "mile" 2032 * </pre></blockquote> 2033 * 2034 * @param beginIndex the beginning index, inclusive. 2035 * @param endIndex the ending index, exclusive. 2036 * @return the specified substring. 2037 * @exception IndexOutOfBoundsException if the 2038 * {@code beginIndex} is negative, or 2039 * {@code endIndex} is larger than the length of 2040 * this {@code String} object, or 2041 * {@code beginIndex} is larger than 2042 * {@code endIndex}. 2043 */ 2044 public String substring(int beginIndex, int endIndex) { 2045 if (beginIndex < 0) { 2046 throw new StringIndexOutOfBoundsException(beginIndex); 2047 } 2048 if (endIndex > value.length) { 2049 throw new StringIndexOutOfBoundsException(endIndex); 2050 } 2051 int subLen = endIndex - beginIndex; 2052 if (subLen < 0) { 2053 throw new StringIndexOutOfBoundsException(subLen); 2054 } 2055 return ((beginIndex == 0) && (endIndex == value.length)) ? this 2056 : new String(value, beginIndex, subLen); 2057 } 2058 2059 /** 2060 * Returns a character sequence that is a subsequence of this sequence. 2061 * 2062 * <p> An invocation of this method of the form 2063 * 2064 * <blockquote><pre> 2065 * str.subSequence(begin, end)</pre></blockquote> 2066 * 2067 * behaves in exactly the same way as the invocation 2068 * 2069 * <blockquote><pre> 2070 * str.substring(begin, end)</pre></blockquote> 2071 * 2072 * @apiNote 2073 * This method is defined so that the {@code String} class can implement 2074 * the {@link CharSequence} interface. 2075 * 2076 * @param beginIndex the begin index, inclusive. 2077 * @param endIndex the end index, exclusive. 2078 * @return the specified subsequence. 2079 * 2080 * @throws IndexOutOfBoundsException 2081 * if {@code beginIndex} or {@code endIndex} is negative, 2082 * if {@code endIndex} is greater than {@code length()}, 2083 * or if {@code beginIndex} is greater than {@code endIndex} 2084 * 2085 * @since 1.4 2086 * @spec JSR-51 2087 */ 2088 public CharSequence subSequence(int beginIndex, int endIndex) { 2089 return this.substring(beginIndex, endIndex); 2090 } 2091 2092 /** 2093 * Concatenates the specified string to the end of this string. 2094 * <p> 2095 * If the length of the argument string is {@code 0}, then this 2096 * {@code String} object is returned. Otherwise, a 2097 * {@code String} object is returned that represents a character 2098 * sequence that is the concatenation of the character sequence 2099 * represented by this {@code String} object and the character 2100 * sequence represented by the argument string.<p> 2101 * Examples: 2102 * <blockquote><pre> 2103 * "cares".concat("s") returns "caress" 2104 * "to".concat("get").concat("her") returns "together" 2105 * </pre></blockquote> 2106 * 2107 * @param str the {@code String} that is concatenated to the end 2108 * of this {@code String}. 2109 * @return a string that represents the concatenation of this object's 2110 * characters followed by the string argument's characters. 2111 */ 2112 public String concat(String str) { 2113 int otherLen = str.length(); 2114 if (otherLen == 0) { 2115 return this; 2116 } 2117 int len = value.length; 2118 char buf[] = Arrays.copyOf(value, len + otherLen); 2119 str.getChars(buf, len); 2120 return new String(buf, true); 2121 } 2122 2123 /** 2124 * Returns a string resulting from replacing all occurrences of 2125 * {@code oldChar} in this string with {@code newChar}. 2126 * <p> 2127 * If the character {@code oldChar} does not occur in the 2128 * character sequence represented by this {@code String} object, 2129 * then a reference to this {@code String} object is returned. 2130 * Otherwise, a {@code String} object is returned that 2131 * represents a character sequence identical to the character sequence 2132 * represented by this {@code String} object, except that every 2133 * occurrence of {@code oldChar} is replaced by an occurrence 2134 * of {@code newChar}. 2135 * <p> 2136 * Examples: 2137 * <blockquote><pre> 2138 * "mesquite in your cellar".replace('e', 'o') 2139 * returns "mosquito in your collar" 2140 * "the war of baronets".replace('r', 'y') 2141 * returns "the way of bayonets" 2142 * "sparring with a purple porpoise".replace('p', 't') 2143 * returns "starring with a turtle tortoise" 2144 * "JonL".replace('q', 'x') returns "JonL" (no change) 2145 * </pre></blockquote> 2146 * 2147 * @param oldChar the old character. 2148 * @param newChar the new character. 2149 * @return a string derived from this string by replacing every 2150 * occurrence of {@code oldChar} with {@code newChar}. 2151 */ 2152 public String replace(char oldChar, char newChar) { 2153 if (oldChar != newChar) { 2154 int len = value.length; 2155 int i = -1; 2156 char[] val = value; /* avoid getfield opcode */ 2157 2158 while (++i < len) { 2159 if (val[i] == oldChar) { 2160 break; 2161 } 2162 } 2163 if (i < len) { 2164 char buf[] = new char[len]; 2165 for (int j = 0; j < i; j++) { 2166 buf[j] = val[j]; 2167 } 2168 while (i < len) { 2169 char c = val[i]; 2170 buf[i] = (c == oldChar) ? newChar : c; 2171 i++; 2172 } 2173 return new String(buf, true); 2174 } 2175 } 2176 return this; 2177 } 2178 2179 /** 2180 * Tells whether or not this string matches the given <a 2181 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2182 * 2183 * <p> An invocation of this method of the form 2184 * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the 2185 * same result as the expression 2186 * 2187 * <blockquote> 2188 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence) 2189 * matches(<i>regex</i>, <i>str</i>)} 2190 * </blockquote> 2191 * 2192 * @param regex 2193 * the regular expression to which this string is to be matched 2194 * 2195 * @return {@code true} if, and only if, this string matches the 2196 * given regular expression 2197 * 2198 * @throws PatternSyntaxException 2199 * if the regular expression's syntax is invalid 2200 * 2201 * @see java.util.regex.Pattern 2202 * 2203 * @since 1.4 2204 * @spec JSR-51 2205 */ 2206 public boolean matches(String regex) { 2207 return Pattern.matches(regex, this); 2208 } 2209 2210 /** 2211 * Returns true if and only if this string contains the specified 2212 * sequence of char values. 2213 * 2214 * @param s the sequence to search for 2215 * @return true if this string contains {@code s}, false otherwise 2216 * @since 1.5 2217 */ 2218 public boolean contains(CharSequence s) { 2219 return indexOf(s.toString()) > -1; 2220 } 2221 2222 /** 2223 * Replaces the first substring of this string that matches the given <a 2224 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2225 * given replacement. 2226 * 2227 * <p> An invocation of this method of the form 2228 * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2229 * yields exactly the same result as the expression 2230 * 2231 * <blockquote> 2232 * <code> 2233 * {@link java.util.regex.Pattern}.{@link 2234 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2235 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2236 * java.util.regex.Matcher#replaceFirst replaceFirst}(<i>repl</i>) 2237 * </code> 2238 * </blockquote> 2239 * 2240 *<p> 2241 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2242 * replacement string may cause the results to be different than if it were 2243 * being treated as a literal replacement string; see 2244 * {@link java.util.regex.Matcher#replaceFirst}. 2245 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2246 * meaning of these characters, if desired. 2247 * 2248 * @param regex 2249 * the regular expression to which this string is to be matched 2250 * @param replacement 2251 * the string to be substituted for the first match 2252 * 2253 * @return The resulting {@code String} 2254 * 2255 * @throws PatternSyntaxException 2256 * if the regular expression's syntax is invalid 2257 * 2258 * @see java.util.regex.Pattern 2259 * 2260 * @since 1.4 2261 * @spec JSR-51 2262 */ 2263 public String replaceFirst(String regex, String replacement) { 2264 return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 2265 } 2266 2267 /** 2268 * Replaces each substring of this string that matches the given <a 2269 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2270 * given replacement. 2271 * 2272 * <p> An invocation of this method of the form 2273 * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2274 * yields exactly the same result as the expression 2275 * 2276 * <blockquote> 2277 * <code> 2278 * {@link java.util.regex.Pattern}.{@link 2279 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2280 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2281 * java.util.regex.Matcher#replaceAll replaceAll}(<i>repl</i>) 2282 * </code> 2283 * </blockquote> 2284 * 2285 *<p> 2286 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2287 * replacement string may cause the results to be different than if it were 2288 * being treated as a literal replacement string; see 2289 * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}. 2290 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2291 * meaning of these characters, if desired. 2292 * 2293 * @param regex 2294 * the regular expression to which this string is to be matched 2295 * @param replacement 2296 * the string to be substituted for each match 2297 * 2298 * @return The resulting {@code String} 2299 * 2300 * @throws PatternSyntaxException 2301 * if the regular expression's syntax is invalid 2302 * 2303 * @see java.util.regex.Pattern 2304 * 2305 * @since 1.4 2306 * @spec JSR-51 2307 */ 2308 public String replaceAll(String regex, String replacement) { 2309 return Pattern.compile(regex).matcher(this).replaceAll(replacement); 2310 } 2311 2312 /** 2313 * Replaces each substring of this string that matches the literal target 2314 * sequence with the specified literal replacement sequence. The 2315 * replacement proceeds from the beginning of the string to the end, for 2316 * example, replacing "aa" with "b" in the string "aaa" will result in 2317 * "ba" rather than "ab". 2318 * 2319 * @param target The sequence of char values to be replaced 2320 * @param replacement The replacement sequence of char values 2321 * @return The resulting string 2322 * @since 1.5 2323 */ 2324 public String replace(CharSequence target, CharSequence replacement) { 2325 return Pattern.compile(target.toString(), Pattern.LITERAL).matcher( 2326 this).replaceAll(Matcher.quoteReplacement(replacement.toString())); 2327 } 2328 2329 /** 2330 * Splits this string around matches of the given 2331 * <a href="../util/regex/Pattern.html#sum">regular expression</a>. 2332 * 2333 * <p> The array returned by this method contains each substring of this 2334 * string that is terminated by another substring that matches the given 2335 * expression or is terminated by the end of the string. The substrings in 2336 * the array are in the order in which they occur in this string. If the 2337 * expression does not match any part of the input then the resulting array 2338 * has just one element, namely this string. 2339 * 2340 * <p> When there is a positive-width match at the beginning of this 2341 * string then an empty leading substring is included at the beginning 2342 * of the resulting array. A zero-width match at the beginning however 2343 * never produces such empty leading substring. 2344 * 2345 * <p> The {@code limit} parameter controls the number of times the 2346 * pattern is applied and therefore affects the length of the resulting 2347 * array. If the limit <i>n</i> is greater than zero then the pattern 2348 * will be applied at most <i>n</i> - 1 times, the array's 2349 * length will be no greater than <i>n</i>, and the array's last entry 2350 * will contain all input beyond the last matched delimiter. If <i>n</i> 2351 * is non-positive then the pattern will be applied as many times as 2352 * possible and the array can have any length. If <i>n</i> is zero then 2353 * the pattern will be applied as many times as possible, the array can 2354 * have any length, and trailing empty strings will be discarded. 2355 * 2356 * <p> The string {@code "boo:and:foo"}, for example, yields the 2357 * following results with these parameters: 2358 * 2359 * <blockquote><table cellpadding=1 cellspacing=0 summary="Split example showing regex, limit, and result"> 2360 * <tr> 2361 * <th>Regex</th> 2362 * <th>Limit</th> 2363 * <th>Result</th> 2364 * </tr> 2365 * <tr><td align=center>:</td> 2366 * <td align=center>2</td> 2367 * <td>{@code { "boo", "and:foo" }}</td></tr> 2368 * <tr><td align=center>:</td> 2369 * <td align=center>5</td> 2370 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2371 * <tr><td align=center>:</td> 2372 * <td align=center>-2</td> 2373 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2374 * <tr><td align=center>o</td> 2375 * <td align=center>5</td> 2376 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2377 * <tr><td align=center>o</td> 2378 * <td align=center>-2</td> 2379 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2380 * <tr><td align=center>o</td> 2381 * <td align=center>0</td> 2382 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2383 * </table></blockquote> 2384 * 2385 * <p> An invocation of this method of the form 2386 * <i>str.</i>{@code split(}<i>regex</i>{@code ,} <i>n</i>{@code )} 2387 * yields the same result as the expression 2388 * 2389 * <blockquote> 2390 * <code> 2391 * {@link java.util.regex.Pattern}.{@link 2392 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2393 * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>, <i>n</i>) 2394 * </code> 2395 * </blockquote> 2396 * 2397 * 2398 * @param regex 2399 * the delimiting regular expression 2400 * 2401 * @param limit 2402 * the result threshold, as described above 2403 * 2404 * @return the array of strings computed by splitting this string 2405 * around matches of the given regular expression 2406 * 2407 * @throws PatternSyntaxException 2408 * if the regular expression's syntax is invalid 2409 * 2410 * @see java.util.regex.Pattern 2411 * 2412 * @since 1.4 2413 * @spec JSR-51 2414 */ 2415 public String[] split(String regex, int limit) { 2416 /* fastpath if the regex is a 2417 (1)one-char String and this character is not one of the 2418 RegEx's meta characters ".$|()[{^?*+\\", or 2419 (2)two-char String and the first char is the backslash and 2420 the second is not the ascii digit or ascii letter. 2421 */ 2422 char ch = 0; 2423 if (((regex.value.length == 1 && 2424 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || 2425 (regex.length() == 2 && 2426 regex.charAt(0) == '\\' && 2427 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && 2428 ((ch-'a')|('z'-ch)) < 0 && 2429 ((ch-'A')|('Z'-ch)) < 0)) && 2430 (ch < Character.MIN_HIGH_SURROGATE || 2431 ch > Character.MAX_LOW_SURROGATE)) 2432 { 2433 int off = 0; 2434 int next = 0; 2435 boolean limited = limit > 0; 2436 ArrayList<String> list = new ArrayList<>(); 2437 while ((next = indexOf(ch, off)) != -1) { 2438 if (!limited || list.size() < limit - 1) { 2439 list.add(substring(off, next)); 2440 off = next + 1; 2441 } else { // last one 2442 //assert (list.size() == limit - 1); 2443 list.add(substring(off, value.length)); 2444 off = value.length; 2445 break; 2446 } 2447 } 2448 // If no match was found, return this 2449 if (off == 0) 2450 return new String[]{this}; 2451 2452 // Add remaining segment 2453 if (!limited || list.size() < limit) 2454 list.add(substring(off, value.length)); 2455 2456 // Construct result 2457 int resultSize = list.size(); 2458 if (limit == 0) { 2459 while (resultSize > 0 && list.get(resultSize - 1).length() == 0) { 2460 resultSize--; 2461 } 2462 } 2463 String[] result = new String[resultSize]; 2464 return list.subList(0, resultSize).toArray(result); 2465 } 2466 return Pattern.compile(regex).split(this, limit); 2467 } 2468 2469 /** 2470 * Splits this string around matches of the given <a 2471 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2472 * 2473 * <p> This method works as if by invoking the two-argument {@link 2474 * #split(String, int) split} method with the given expression and a limit 2475 * argument of zero. Trailing empty strings are therefore not included in 2476 * the resulting array. 2477 * 2478 * <p> The string {@code "boo:and:foo"}, for example, yields the following 2479 * results with these expressions: 2480 * 2481 * <blockquote><table cellpadding=1 cellspacing=0 summary="Split examples showing regex and result"> 2482 * <tr> 2483 * <th>Regex</th> 2484 * <th>Result</th> 2485 * </tr> 2486 * <tr><td align=center>:</td> 2487 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2488 * <tr><td align=center>o</td> 2489 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2490 * </table></blockquote> 2491 * 2492 * 2493 * @param regex 2494 * the delimiting regular expression 2495 * 2496 * @return the array of strings computed by splitting this string 2497 * around matches of the given regular expression 2498 * 2499 * @throws PatternSyntaxException 2500 * if the regular expression's syntax is invalid 2501 * 2502 * @see java.util.regex.Pattern 2503 * 2504 * @since 1.4 2505 * @spec JSR-51 2506 */ 2507 public String[] split(String regex) { 2508 return split(regex, 0); 2509 } 2510 2511 /** 2512 * Returns a new String composed of copies of the 2513 * {@code CharSequence elements} joined together with a copy of 2514 * the specified {@code delimiter}. 2515 * 2516 * <blockquote>For example, 2517 * <pre>{@code 2518 * String message = String.join("-", "Java", "is", "cool"); 2519 * // message returned is: "Java-is-cool" 2520 * }</pre></blockquote> 2521 * 2522 * Note that if an element is null, then {@code "null"} is added. 2523 * 2524 * @param delimiter the delimiter that separates each element 2525 * @param elements the elements to join together. 2526 * 2527 * @return a new {@code String} that is composed of the {@code elements} 2528 * separated by the {@code delimiter} 2529 * 2530 * @throws NullPointerException If {@code delimiter} or {@code elements} 2531 * is {@code null} 2532 * 2533 * @see java.util.StringJoiner 2534 * @since 1.8 2535 */ 2536 public static String join(CharSequence delimiter, CharSequence... elements) { 2537 Objects.requireNonNull(delimiter); 2538 Objects.requireNonNull(elements); 2539 // Number of elements not likely worth Arrays.stream overhead. 2540 StringJoiner joiner = new StringJoiner(delimiter); 2541 for (CharSequence cs: elements) { 2542 joiner.add(cs); 2543 } 2544 return joiner.toString(); 2545 } 2546 2547 /** 2548 * Returns a new {@code String} composed of copies of the 2549 * {@code CharSequence elements} joined together with a copy of the 2550 * specified {@code delimiter}. 2551 * 2552 * <blockquote>For example, 2553 * <pre>{@code 2554 * List<String> strings = new LinkedList<>(); 2555 * strings.add("Java");strings.add("is"); 2556 * strings.add("cool"); 2557 * String message = String.join(" ", strings); 2558 * //message returned is: "Java is cool" 2559 * 2560 * Set<String> strings = new LinkedHashSet<>(); 2561 * strings.add("Java"); strings.add("is"); 2562 * strings.add("very"); strings.add("cool"); 2563 * String message = String.join("-", strings); 2564 * //message returned is: "Java-is-very-cool" 2565 * }</pre></blockquote> 2566 * 2567 * Note that if an individual element is {@code null}, then {@code "null"} is added. 2568 * 2569 * @param delimiter a sequence of characters that is used to separate each 2570 * of the {@code elements} in the resulting {@code String} 2571 * @param elements an {@code Iterable} that will have its {@code elements} 2572 * joined together. 2573 * 2574 * @return a new {@code String} that is composed from the {@code elements} 2575 * argument 2576 * 2577 * @throws NullPointerException If {@code delimiter} or {@code elements} 2578 * is {@code null} 2579 * 2580 * @see #join(CharSequence,CharSequence...) 2581 * @see java.util.StringJoiner 2582 * @since 1.8 2583 */ 2584 public static String join(CharSequence delimiter, 2585 Iterable<? extends CharSequence> elements) { 2586 Objects.requireNonNull(delimiter); 2587 Objects.requireNonNull(elements); 2588 StringJoiner joiner = new StringJoiner(delimiter); 2589 for (CharSequence cs: elements) { 2590 joiner.add(cs); 2591 } 2592 return joiner.toString(); 2593 } 2594 2595 /** 2596 * Converts all of the characters in this {@code String} to lower 2597 * case using the rules of the given {@code Locale}. Case mapping is based 2598 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2599 * class. Since case mappings are not always 1:1 char mappings, the resulting 2600 * {@code String} may be a different length than the original {@code String}. 2601 * <p> 2602 * Examples of lowercase mappings are in the following table: 2603 * <table border="1" summary="Lowercase mapping examples showing language code of locale, upper case, lower case, and description"> 2604 * <tr> 2605 * <th>Language Code of Locale</th> 2606 * <th>Upper Case</th> 2607 * <th>Lower Case</th> 2608 * <th>Description</th> 2609 * </tr> 2610 * <tr> 2611 * <td>tr (Turkish)</td> 2612 * <td>\u0130</td> 2613 * <td>\u0069</td> 2614 * <td>capital letter I with dot above -> small letter i</td> 2615 * </tr> 2616 * <tr> 2617 * <td>tr (Turkish)</td> 2618 * <td>\u0049</td> 2619 * <td>\u0131</td> 2620 * <td>capital letter I -> small letter dotless i </td> 2621 * </tr> 2622 * <tr> 2623 * <td>(all)</td> 2624 * <td>French Fries</td> 2625 * <td>french fries</td> 2626 * <td>lowercased all chars in String</td> 2627 * </tr> 2628 * <tr> 2629 * <td>(all)</td> 2630 * <td><img src="doc-files/capiota.gif" alt="capiota"><img src="doc-files/capchi.gif" alt="capchi"> 2631 * <img src="doc-files/captheta.gif" alt="captheta"><img src="doc-files/capupsil.gif" alt="capupsil"> 2632 * <img src="doc-files/capsigma.gif" alt="capsigma"></td> 2633 * <td><img src="doc-files/iota.gif" alt="iota"><img src="doc-files/chi.gif" alt="chi"> 2634 * <img src="doc-files/theta.gif" alt="theta"><img src="doc-files/upsilon.gif" alt="upsilon"> 2635 * <img src="doc-files/sigma1.gif" alt="sigma"></td> 2636 * <td>lowercased all chars in String</td> 2637 * </tr> 2638 * </table> 2639 * 2640 * @param locale use the case transformation rules for this locale 2641 * @return the {@code String}, converted to lowercase. 2642 * @see java.lang.String#toLowerCase() 2643 * @see java.lang.String#toUpperCase() 2644 * @see java.lang.String#toUpperCase(Locale) 2645 * @since 1.1 2646 */ 2647 public String toLowerCase(Locale locale) { 2648 if (locale == null) { 2649 throw new NullPointerException(); 2650 } 2651 int first; 2652 boolean hasSurr = false; 2653 final int len = value.length; 2654 2655 // Now check if there are any characters that need to be changed, or are surrogate 2656 for (first = 0 ; first < len; first++) { 2657 int cp = (int)value[first]; 2658 if (Character.isSurrogate((char)cp)) { 2659 hasSurr = true; 2660 break; 2661 } 2662 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 2663 break; 2664 } 2665 } 2666 if (first == len) 2667 return this; 2668 char[] result = new char[len]; 2669 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 2670 // lowerCase characters. 2671 String lang = locale.getLanguage(); 2672 if (lang == "tr" || lang == "az" || lang == "lt") { 2673 return toLowerCaseEx(result, first, locale, true); 2674 } 2675 if (hasSurr) { 2676 return toLowerCaseEx(result, first, locale, false); 2677 } 2678 for (int i = first; i < len; i++) { 2679 int cp = (int)value[i]; 2680 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 2681 Character.isSurrogate((char)cp)) { 2682 return toLowerCaseEx(result, i, locale, false); 2683 } 2684 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 2685 return toLowerCaseEx(result, i, locale, true); 2686 } 2687 cp = Character.toLowerCase(cp); 2688 if (!Character.isBmpCodePoint(cp)) { 2689 return toLowerCaseEx(result, i, locale, false); 2690 } 2691 result[i] = (char)cp; 2692 } 2693 return new String(result, true); 2694 } 2695 2696 private String toLowerCaseEx(char[] result, int first, Locale locale, boolean localeDependent) { 2697 int resultOffset = first; 2698 int srcCount; 2699 for (int i = first; i < value.length; i += srcCount) { 2700 int srcChar = (int)value[i]; 2701 int lowerChar; 2702 char[] lowerCharArray; 2703 srcCount = 1; 2704 if (Character.isSurrogate((char)srcChar)) { 2705 srcChar = codePointAt(i); 2706 srcCount = Character.charCount(srcChar); 2707 } 2708 if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA 2709 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale); 2710 } else { 2711 lowerChar = Character.toLowerCase(srcChar); 2712 } 2713 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 2714 result[resultOffset++] = (char)lowerChar; 2715 } else { 2716 if (lowerChar == Character.ERROR) { 2717 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale); 2718 } else if (srcCount == 2) { 2719 resultOffset += Character.toChars(lowerChar, result, resultOffset); 2720 continue; 2721 } else { 2722 lowerCharArray = Character.toChars(lowerChar); 2723 } 2724 /* Grow result if needed */ 2725 int mapLen = lowerCharArray.length; 2726 if (mapLen > srcCount) { 2727 char[] result2 = new char[result.length + mapLen - srcCount]; 2728 System.arraycopy(result, 0, result2, 0, resultOffset); 2729 result = result2; 2730 } 2731 for (int x = 0; x < mapLen; ++x) { 2732 result[resultOffset++] = lowerCharArray[x]; 2733 } 2734 } 2735 } 2736 return new String(result, 0, resultOffset); 2737 } 2738 2739 /** 2740 * Converts all of the characters in this {@code String} to lower 2741 * case using the rules of the default locale. This is equivalent to calling 2742 * {@code toLowerCase(Locale.getDefault())}. 2743 * <p> 2744 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2745 * results if used for strings that are intended to be interpreted locale 2746 * independently. 2747 * Examples are programming language identifiers, protocol keys, and HTML 2748 * tags. 2749 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale 2750 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the 2751 * LATIN SMALL LETTER DOTLESS I character. 2752 * To obtain correct results for locale insensitive strings, use 2753 * {@code toLowerCase(Locale.ROOT)}. 2754 * 2755 * @return the {@code String}, converted to lowercase. 2756 * @see java.lang.String#toLowerCase(Locale) 2757 */ 2758 public String toLowerCase() { 2759 return toLowerCase(Locale.getDefault()); 2760 } 2761 2762 /** 2763 * Converts all of the characters in this {@code String} to upper 2764 * case using the rules of the given {@code Locale}. Case mapping is based 2765 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2766 * class. Since case mappings are not always 1:1 char mappings, the resulting 2767 * {@code String} may be a different length than the original {@code String}. 2768 * <p> 2769 * Examples of locale-sensitive and 1:M case mappings are in the following table. 2770 * 2771 * <table border="1" summary="Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description."> 2772 * <tr> 2773 * <th>Language Code of Locale</th> 2774 * <th>Lower Case</th> 2775 * <th>Upper Case</th> 2776 * <th>Description</th> 2777 * </tr> 2778 * <tr> 2779 * <td>tr (Turkish)</td> 2780 * <td>\u0069</td> 2781 * <td>\u0130</td> 2782 * <td>small letter i -> capital letter I with dot above</td> 2783 * </tr> 2784 * <tr> 2785 * <td>tr (Turkish)</td> 2786 * <td>\u0131</td> 2787 * <td>\u0049</td> 2788 * <td>small letter dotless i -> capital letter I</td> 2789 * </tr> 2790 * <tr> 2791 * <td>(all)</td> 2792 * <td>\u00df</td> 2793 * <td>\u0053 \u0053</td> 2794 * <td>small letter sharp s -> two letters: SS</td> 2795 * </tr> 2796 * <tr> 2797 * <td>(all)</td> 2798 * <td>Fahrvergnügen</td> 2799 * <td>FAHRVERGNÜGEN</td> 2800 * <td></td> 2801 * </tr> 2802 * </table> 2803 * @param locale use the case transformation rules for this locale 2804 * @return the {@code String}, converted to uppercase. 2805 * @see java.lang.String#toUpperCase() 2806 * @see java.lang.String#toLowerCase() 2807 * @see java.lang.String#toLowerCase(Locale) 2808 * @since 1.1 2809 */ 2810 public String toUpperCase(Locale locale) { 2811 if (locale == null) { 2812 throw new NullPointerException(); 2813 } 2814 int first; 2815 boolean hasSurr = false; 2816 final int len = value.length; 2817 2818 // Now check if there are any characters that need to be changed, or are surrogate 2819 for (first = 0 ; first < len; first++ ) { 2820 int cp = (int)value[first]; 2821 if (Character.isSurrogate((char)cp)) { 2822 hasSurr = true; 2823 break; 2824 } 2825 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 2826 break; 2827 } 2828 } 2829 if (first == len) { 2830 return this; 2831 } 2832 char[] result = new char[len]; 2833 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 2834 // upperCase characters. 2835 String lang = locale.getLanguage(); 2836 if (lang == "tr" || lang == "az" || lang == "lt") { 2837 return toUpperCaseEx(result, first, locale, true); 2838 } 2839 if (hasSurr) { 2840 return toUpperCaseEx(result, first, locale, false); 2841 } 2842 for (int i = first; i < len; i++) { 2843 int cp = (int)value[i]; 2844 if (Character.isSurrogate((char)cp)) { 2845 return toUpperCaseEx(result, i, locale, false); 2846 } 2847 cp = Character.toUpperCaseEx(cp); 2848 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 2849 return toUpperCaseEx(result, i, locale, false); 2850 } 2851 result[i] = (char)cp; 2852 } 2853 return new String(result, true); 2854 } 2855 2856 private String toUpperCaseEx(char[] result, int first, Locale locale, 2857 boolean localeDependent) { 2858 int resultOffset = first; 2859 int srcCount; 2860 for (int i = first; i < value.length; i += srcCount) { 2861 int srcChar = (int)value[i]; 2862 int upperChar; 2863 char[] upperCharArray; 2864 srcCount = 1; 2865 if (Character.isSurrogate((char)srcChar)) { 2866 srcChar = codePointAt(i); 2867 srcCount = Character.charCount(srcChar); 2868 } 2869 if (localeDependent) { 2870 upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale); 2871 } else { 2872 upperChar = Character.toUpperCaseEx(srcChar); 2873 } 2874 if (Character.isBmpCodePoint(upperChar)) { 2875 result[resultOffset++] = (char)upperChar; 2876 } else { 2877 if (upperChar == Character.ERROR) { 2878 if (localeDependent) { 2879 upperCharArray = 2880 ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale); 2881 } else { 2882 upperCharArray = Character.toUpperCaseCharArray(srcChar); 2883 } 2884 } else if (srcCount == 2) { 2885 resultOffset += Character.toChars(upperChar, result, resultOffset); 2886 continue; 2887 } else { 2888 upperCharArray = Character.toChars(upperChar); 2889 } 2890 /* Grow result if needed */ 2891 int mapLen = upperCharArray.length; 2892 if (mapLen > srcCount) { 2893 char[] result2 = new char[result.length + mapLen - srcCount]; 2894 System.arraycopy(result, 0, result2, 0, resultOffset); 2895 result = result2; 2896 } 2897 for (int x = 0; x < mapLen; ++x) { 2898 result[resultOffset++] = upperCharArray[x]; 2899 } 2900 } 2901 } 2902 return new String(result, 0, resultOffset); 2903 } 2904 2905 /** 2906 * Converts all of the characters in this {@code String} to upper 2907 * case using the rules of the default locale. This method is equivalent to 2908 * {@code toUpperCase(Locale.getDefault())}. 2909 * <p> 2910 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2911 * results if used for strings that are intended to be interpreted locale 2912 * independently. 2913 * Examples are programming language identifiers, protocol keys, and HTML 2914 * tags. 2915 * For instance, {@code "title".toUpperCase()} in a Turkish locale 2916 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the 2917 * LATIN CAPITAL LETTER I WITH DOT ABOVE character. 2918 * To obtain correct results for locale insensitive strings, use 2919 * {@code toUpperCase(Locale.ROOT)}. 2920 * 2921 * @return the {@code String}, converted to uppercase. 2922 * @see java.lang.String#toUpperCase(Locale) 2923 */ 2924 public String toUpperCase() { 2925 return toUpperCase(Locale.getDefault()); 2926 } 2927 2928 /** 2929 * Returns a string whose value is this string, with any leading and trailing 2930 * whitespace removed. 2931 * <p> 2932 * If this {@code String} object represents an empty character 2933 * sequence, or the first and last characters of character sequence 2934 * represented by this {@code String} object both have codes 2935 * greater than {@code '\u005Cu0020'} (the space character), then a 2936 * reference to this {@code String} object is returned. 2937 * <p> 2938 * Otherwise, if there is no character with a code greater than 2939 * {@code '\u005Cu0020'} in the string, then a 2940 * {@code String} object representing an empty string is 2941 * returned. 2942 * <p> 2943 * Otherwise, let <i>k</i> be the index of the first character in the 2944 * string whose code is greater than {@code '\u005Cu0020'}, and let 2945 * <i>m</i> be the index of the last character in the string whose code 2946 * is greater than {@code '\u005Cu0020'}. A {@code String} 2947 * object is returned, representing the substring of this string that 2948 * begins with the character at index <i>k</i> and ends with the 2949 * character at index <i>m</i>-that is, the result of 2950 * {@code this.substring(k, m + 1)}. 2951 * <p> 2952 * This method may be used to trim whitespace (as defined above) from 2953 * the beginning and end of a string. 2954 * 2955 * @return A string whose value is this string, with any leading and trailing white 2956 * space removed, or this string if it has no leading or 2957 * trailing white space. 2958 */ 2959 public String trim() { 2960 int len = value.length; 2961 int st = 0; 2962 char[] val = value; /* avoid getfield opcode */ 2963 2964 while ((st < len) && (val[st] <= ' ')) { 2965 st++; 2966 } 2967 while ((st < len) && (val[len - 1] <= ' ')) { 2968 len--; 2969 } 2970 return ((st > 0) || (len < value.length)) ? substring(st, len) : this; 2971 } 2972 2973 /** 2974 * This object (which is already a string!) is itself returned. 2975 * 2976 * @return the string itself. 2977 */ 2978 public String toString() { 2979 return this; 2980 } 2981 2982 /** 2983 * Converts this string to a new character array. 2984 * 2985 * @return a newly allocated character array whose length is the length 2986 * of this string and whose contents are initialized to contain 2987 * the character sequence represented by this string. 2988 */ 2989 public char[] toCharArray() { 2990 // Cannot use Arrays.copyOf because of class initialization order issues 2991 char result[] = new char[value.length]; 2992 System.arraycopy(value, 0, result, 0, value.length); 2993 return result; 2994 } 2995 2996 /** 2997 * Returns a formatted string using the specified format string and 2998 * arguments. 2999 * 3000 * <p> The locale always used is the one returned by {@link 3001 * java.util.Locale#getDefault() Locale.getDefault()}. 3002 * 3003 * @param format 3004 * A <a href="../util/Formatter.html#syntax">format string</a> 3005 * 3006 * @param args 3007 * Arguments referenced by the format specifiers in the format 3008 * string. If there are more arguments than format specifiers, the 3009 * extra arguments are ignored. The number of arguments is 3010 * variable and may be zero. The maximum number of arguments is 3011 * limited by the maximum dimension of a Java array as defined by 3012 * <cite>The Java™ Virtual Machine Specification</cite>. 3013 * The behaviour on a 3014 * {@code null} argument depends on the <a 3015 * href="../util/Formatter.html#syntax">conversion</a>. 3016 * 3017 * @throws java.util.IllegalFormatException 3018 * If a format string contains an illegal syntax, a format 3019 * specifier that is incompatible with the given arguments, 3020 * insufficient arguments given the format string, or other 3021 * illegal conditions. For specification of all possible 3022 * formatting errors, see the <a 3023 * href="../util/Formatter.html#detail">Details</a> section of the 3024 * formatter class specification. 3025 * 3026 * @return A formatted string 3027 * 3028 * @see java.util.Formatter 3029 * @since 1.5 3030 */ 3031 public static String format(String format, Object... args) { 3032 return new Formatter().format(format, args).toString(); 3033 } 3034 3035 /** 3036 * Returns a formatted string using the specified locale, format string, 3037 * and arguments. 3038 * 3039 * @param l 3040 * The {@linkplain java.util.Locale locale} to apply during 3041 * formatting. If {@code l} is {@code null} then no localization 3042 * is applied. 3043 * 3044 * @param format 3045 * A <a href="../util/Formatter.html#syntax">format string</a> 3046 * 3047 * @param args 3048 * Arguments referenced by the format specifiers in the format 3049 * string. If there are more arguments than format specifiers, the 3050 * extra arguments are ignored. The number of arguments is 3051 * variable and may be zero. The maximum number of arguments is 3052 * limited by the maximum dimension of a Java array as defined by 3053 * <cite>The Java™ Virtual Machine Specification</cite>. 3054 * The behaviour on a 3055 * {@code null} argument depends on the 3056 * <a href="../util/Formatter.html#syntax">conversion</a>. 3057 * 3058 * @throws java.util.IllegalFormatException 3059 * If a format string contains an illegal syntax, a format 3060 * specifier that is incompatible with the given arguments, 3061 * insufficient arguments given the format string, or other 3062 * illegal conditions. For specification of all possible 3063 * formatting errors, see the <a 3064 * href="../util/Formatter.html#detail">Details</a> section of the 3065 * formatter class specification 3066 * 3067 * @return A formatted string 3068 * 3069 * @see java.util.Formatter 3070 * @since 1.5 3071 */ 3072 public static String format(Locale l, String format, Object... args) { 3073 return new Formatter(l).format(format, args).toString(); 3074 } 3075 3076 /** 3077 * Returns the string representation of the {@code Object} argument. 3078 * 3079 * @param obj an {@code Object}. 3080 * @return if the argument is {@code null}, then a string equal to 3081 * {@code "null"}; otherwise, the value of 3082 * {@code obj.toString()} is returned. 3083 * @see java.lang.Object#toString() 3084 */ 3085 public static String valueOf(Object obj) { 3086 return (obj == null) ? "null" : obj.toString(); 3087 } 3088 3089 /** 3090 * Returns the string representation of the {@code char} array 3091 * argument. The contents of the character array are copied; subsequent 3092 * modification of the character array does not affect the returned 3093 * string. 3094 * 3095 * @param data the character array. 3096 * @return a {@code String} that contains the characters of the 3097 * character array. 3098 */ 3099 public static String valueOf(char data[]) { 3100 return new String(data); 3101 } 3102 3103 /** 3104 * Returns the string representation of a specific subarray of the 3105 * {@code char} array argument. 3106 * <p> 3107 * The {@code offset} argument is the index of the first 3108 * character of the subarray. The {@code count} argument 3109 * specifies the length of the subarray. The contents of the subarray 3110 * are copied; subsequent modification of the character array does not 3111 * affect the returned string. 3112 * 3113 * @param data the character array. 3114 * @param offset initial offset of the subarray. 3115 * @param count length of the subarray. 3116 * @return a {@code String} that contains the characters of the 3117 * specified subarray of the character array. 3118 * @exception IndexOutOfBoundsException if {@code offset} is 3119 * negative, or {@code count} is negative, or 3120 * {@code offset+count} is larger than 3121 * {@code data.length}. 3122 */ 3123 public static String valueOf(char data[], int offset, int count) { 3124 return new String(data, offset, count); 3125 } 3126 3127 /** 3128 * Equivalent to {@link #valueOf(char[], int, int)}. 3129 * 3130 * @param data the character array. 3131 * @param offset initial offset of the subarray. 3132 * @param count length of the subarray. 3133 * @return a {@code String} that contains the characters of the 3134 * specified subarray of the character array. 3135 * @exception IndexOutOfBoundsException if {@code offset} is 3136 * negative, or {@code count} is negative, or 3137 * {@code offset+count} is larger than 3138 * {@code data.length}. 3139 */ 3140 public static String copyValueOf(char data[], int offset, int count) { 3141 return new String(data, offset, count); 3142 } 3143 3144 /** 3145 * Equivalent to {@link #valueOf(char[])}. 3146 * 3147 * @param data the character array. 3148 * @return a {@code String} that contains the characters of the 3149 * character array. 3150 */ 3151 public static String copyValueOf(char data[]) { 3152 return new String(data); 3153 } 3154 3155 /** 3156 * Returns the string representation of the {@code boolean} argument. 3157 * 3158 * @param b a {@code boolean}. 3159 * @return if the argument is {@code true}, a string equal to 3160 * {@code "true"} is returned; otherwise, a string equal to 3161 * {@code "false"} is returned. 3162 */ 3163 public static String valueOf(boolean b) { 3164 return b ? "true" : "false"; 3165 } 3166 3167 /** 3168 * Returns the string representation of the {@code char} 3169 * argument. 3170 * 3171 * @param c a {@code char}. 3172 * @return a string of length {@code 1} containing 3173 * as its single character the argument {@code c}. 3174 */ 3175 public static String valueOf(char c) { 3176 char data[] = {c}; 3177 return new String(data, true); 3178 } 3179 3180 /** 3181 * Returns the string representation of the {@code int} argument. 3182 * <p> 3183 * The representation is exactly the one returned by the 3184 * {@code Integer.toString} method of one argument. 3185 * 3186 * @param i an {@code int}. 3187 * @return a string representation of the {@code int} argument. 3188 * @see java.lang.Integer#toString(int, int) 3189 */ 3190 public static String valueOf(int i) { 3191 return Integer.toString(i); 3192 } 3193 3194 /** 3195 * Returns the string representation of the {@code long} argument. 3196 * <p> 3197 * The representation is exactly the one returned by the 3198 * {@code Long.toString} method of one argument. 3199 * 3200 * @param l a {@code long}. 3201 * @return a string representation of the {@code long} argument. 3202 * @see java.lang.Long#toString(long) 3203 */ 3204 public static String valueOf(long l) { 3205 return Long.toString(l); 3206 } 3207 3208 /** 3209 * Returns the string representation of the {@code float} argument. 3210 * <p> 3211 * The representation is exactly the one returned by the 3212 * {@code Float.toString} method of one argument. 3213 * 3214 * @param f a {@code float}. 3215 * @return a string representation of the {@code float} argument. 3216 * @see java.lang.Float#toString(float) 3217 */ 3218 public static String valueOf(float f) { 3219 return Float.toString(f); 3220 } 3221 3222 /** 3223 * Returns the string representation of the {@code double} argument. 3224 * <p> 3225 * The representation is exactly the one returned by the 3226 * {@code Double.toString} method of one argument. 3227 * 3228 * @param d a {@code double}. 3229 * @return a string representation of the {@code double} argument. 3230 * @see java.lang.Double#toString(double) 3231 */ 3232 public static String valueOf(double d) { 3233 return Double.toString(d); 3234 } 3235 3236 /** 3237 * Returns a canonical representation for the string object. 3238 * <p> 3239 * A pool of strings, initially empty, is maintained privately by the 3240 * class {@code String}. 3241 * <p> 3242 * When the intern method is invoked, if the pool already contains a 3243 * string equal to this {@code String} object as determined by 3244 * the {@link #equals(Object)} method, then the string from the pool is 3245 * returned. Otherwise, this {@code String} object is added to the 3246 * pool and a reference to this {@code String} object is returned. 3247 * <p> 3248 * It follows that for any two strings {@code s} and {@code t}, 3249 * {@code s.intern() == t.intern()} is {@code true} 3250 * if and only if {@code s.equals(t)} is {@code true}. 3251 * <p> 3252 * All literal strings and string-valued constant expressions are 3253 * interned. String literals are defined in section 3.10.5 of the 3254 * <cite>The Java™ Language Specification</cite>. 3255 * 3256 * @return a string that has the same contents as this string, but is 3257 * guaranteed to be from a pool of unique strings. 3258 */ 3259 public native String intern(); 3260 }