1 /* 2 * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Spliterator; 31 import java.util.function.IntConsumer; 32 import jdk.internal.HotSpotIntrinsicCandidate; 33 34 import static java.lang.String.UTF16; 35 import static java.lang.String.LATIN1; 36 import static java.lang.String.checkIndex; 37 import static java.lang.String.checkOffset; 38 import static java.lang.String.checkBoundsOffCount; 39 40 final class StringUTF16 { 41 42 public static byte[] newBytesFor(int len) { 43 if (len < 0) { 44 throw new NegativeArraySizeException(); 45 } 46 if (len > MAX_LENGTH) { 47 throw new OutOfMemoryError("UTF16 String size is " + len + 48 ", should be less than " + MAX_LENGTH); 49 } 50 return new byte[len << 1]; 51 } 52 53 @HotSpotIntrinsicCandidate 54 public static void putChar(byte[] val, int index, int c) { 55 index <<= 1; 56 val[index++] = (byte)(c >> HI_BYTE_SHIFT); 57 val[index] = (byte)(c >> LO_BYTE_SHIFT); 58 } 59 60 @HotSpotIntrinsicCandidate 61 public static char getChar(byte[] val, int index) { 62 index <<= 1; 63 return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) | 64 ((val[index] & 0xff) << LO_BYTE_SHIFT)); 65 } 66 67 public static char charAt(byte[] value, int index) { 68 if (index < 0 || index >= value.length >> 1) { 69 throw new StringIndexOutOfBoundsException(index); 70 } 71 return getChar(value, index); 72 } 73 74 public static int length(byte[] value) { 75 return value.length >> 1; 76 } 77 78 public static int codePointAt(byte[] value, int index, int end) { 79 char c1 = getChar(value, index); 80 if (Character.isHighSurrogate(c1) && ++index < end) { 81 char c2 = getChar(value, index); 82 if (Character.isLowSurrogate(c2)) { 83 return Character.toCodePoint(c1, c2); 84 } 85 } 86 return c1; 87 } 88 89 public static int codePointBefore(byte[] value, int index) { 90 char c2 = getChar(value, --index); 91 if (Character.isLowSurrogate(c2) && index > 0) { 92 char c1 = getChar(value, --index); 93 if (Character.isHighSurrogate(c1)) { 94 return Character.toCodePoint(c1, c2); 95 } 96 } 97 return c2; 98 } 99 100 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 101 int count = endIndex - beginIndex; 102 for (int i = beginIndex; i < endIndex; ) { 103 if (Character.isHighSurrogate(getChar(value, i++)) && 104 i < endIndex && 105 Character.isLowSurrogate(getChar(value, i))) { 106 count--; 107 i++; 108 } 109 } 110 return count; 111 } 112 113 public static char[] toChars(byte[] value) { 114 char[] dst = new char[value.length >> 1]; 115 getChars(value, 0, dst.length, dst, 0); 116 return dst; 117 } 118 119 @HotSpotIntrinsicCandidate 120 public static byte[] toBytes(char[] value, int off, int len) { 121 byte[] val = newBytesFor(len); 122 for (int i = 0; i < len; i++) { 123 putChar(val, i, value[off]); 124 off++; 125 } 126 return val; 127 } 128 129 public static byte[] compress(char[] val, int off, int len) { 130 byte[] ret = new byte[len]; 131 if (compress(val, off, ret, 0, len) == len) { 132 return ret; 133 } 134 return null; 135 } 136 137 public static byte[] compress(byte[] val, int off, int len) { 138 byte[] ret = new byte[len]; 139 if (compress(val, off, ret, 0, len) == len) { 140 return ret; 141 } 142 return null; 143 } 145 // compressedCopy char[] -> byte[] 146 @HotSpotIntrinsicCandidate 147 public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { 148 for (int i = 0; i < len; i++) { 149 char c = src[srcOff]; 150 if (c > 0xFF) { 151 len = 0; 152 break; 153 } 154 dst[dstOff] = (byte)c; 155 srcOff++; 156 dstOff++; 157 } 158 return len; 159 } 160 161 // compressedCopy byte[] -> byte[] 162 @HotSpotIntrinsicCandidate 163 public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 164 // We need a range check here because 'getChar' has no checks 165 checkBoundsOffCount(srcOff << 1, len << 1, src.length); 166 for (int i = 0; i < len; i++) { 167 char c = getChar(src, srcOff); 168 if (c > 0xFF) { 169 len = 0; 170 break; 171 } 172 dst[dstOff] = (byte)c; 173 srcOff++; 174 dstOff++; 175 } 176 return len; 177 } 178 179 public static byte[] toBytes(int[] val, int index, int len) { 180 final int end = index + len; 181 // Pass 1: Compute precise size of char[] 182 int n = len; 183 for (int i = index; i < end; i++) { 184 int cp = val[i]; 185 if (Character.isBmpCodePoint(cp)) 186 continue; 187 else if (Character.isValidCodePoint(cp)) 188 n++; 189 else throw new IllegalArgumentException(Integer.toString(cp)); 190 } 191 // Pass 2: Allocate and fill in <high, low> pair 192 byte[] buf = newBytesFor(n); 193 for (int i = index, j = 0; i < end; i++, j++) { 194 int cp = val[i]; 195 if (Character.isBmpCodePoint(cp)) { 196 putChar(buf, j, cp); 197 } else { 198 putChar(buf, j++, Character.highSurrogate(cp)); 199 putChar(buf, j, Character.lowSurrogate(cp)); 200 } 201 } 202 return buf; 203 } 204 205 public static byte[] toBytes(char c) { 206 byte[] result = new byte[2]; 207 putChar(result, 0, c); 208 return result; 209 } 210 211 @HotSpotIntrinsicCandidate 212 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 213 // We need a range check here because 'getChar' has no checks 214 if (srcBegin < srcEnd) { 215 checkBoundsOffCount(srcBegin << 1, (srcEnd - srcBegin) << 1, value.length); 216 } 217 for (int i = srcBegin; i < srcEnd; i++) { 218 dst[dstBegin++] = getChar(value, i); 219 } 220 } 221 222 /* @see java.lang.String.getBytes(int, int, byte[], int) */ 223 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 224 srcBegin <<= 1; 225 srcEnd <<= 1; 226 for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) { 227 dst[dstBegin++] = value[i]; 228 } 229 } 230 231 @HotSpotIntrinsicCandidate 232 public static boolean equals(byte[] value, byte[] other) { 233 if (value.length == other.length) { 234 int len = value.length >> 1; 235 for (int i = 0; i < len; i++) { 236 if (getChar(value, i) != getChar(other, i)) { 237 return false; 238 } 239 } 240 return true; 241 } 242 return false; 243 } 244 245 @HotSpotIntrinsicCandidate 246 public static int compareTo(byte[] value, byte[] other) { 247 int len1 = length(value); 248 int len2 = length(other); 249 int lim = Math.min(len1, len2); 250 for (int k = 0; k < lim; k++) { 251 char c1 = getChar(value, k); 252 char c2 = getChar(other, k); 253 if (c1 != c2) { 254 return c1 - c2; 255 } 256 } 257 return len1 - len2; 258 } 259 260 @HotSpotIntrinsicCandidate 261 public static int compareToLatin1(byte[] value, byte[] other) { 262 return -StringLatin1.compareToUTF16(other, value); 263 } 264 265 public static int compareToCI(byte[] value, byte[] other) { 266 int len1 = length(value); 267 int len2 = length(other); 268 int lim = Math.min(len1, len2); 269 for (int k = 0; k < lim; k++) { 270 char c1 = getChar(value, k); 271 char c2 = getChar(other, k); 272 if (c1 != c2) { 273 c1 = Character.toUpperCase(c1); 274 c2 = Character.toUpperCase(c2); 275 if (c1 != c2) { 276 c1 = Character.toLowerCase(c1); 277 c2 = Character.toLowerCase(c2); 278 if (c1 != c2) { 279 return c1 - c2; 280 } 281 } 282 } 283 } 284 return len1 - len2; 285 } 286 287 public static int compareToCI_Latin1(byte[] value, byte[] other) { 288 return -StringLatin1.compareToCI_UTF16(other, value); 289 } 290 291 public static int hashCode(byte[] value) { 292 int h = 0; 293 int length = value.length >> 1; 294 for (int i = 0; i < length; i++) { 295 h = 31 * h + getChar(value, i); 296 } 297 return h; 298 } 299 300 public static int indexOf(byte[] value, int ch, int fromIndex) { 301 int max = value.length >> 1; 302 if (fromIndex < 0) { 303 fromIndex = 0; 304 } else if (fromIndex >= max) { 305 // Note: fromIndex might be near -1>>>1. 306 return -1; 307 } 308 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 309 // handle most cases here (ch is a BMP code point or a 310 // negative value (invalid code point)) 311 return indexOfChar(value, ch, fromIndex, max); 312 } else { 313 return indexOfSupplementary(value, ch, fromIndex, max); 314 } 315 } 316 317 @HotSpotIntrinsicCandidate 318 public static int indexOf(byte[] value, byte[] str) { 319 if (str.length == 0) { 320 return 0; 321 } 322 if (value.length == 0) { 323 return -1; 324 } 325 return indexOf(value, length(value), str, length(str), 0); 326 } 327 328 @HotSpotIntrinsicCandidate 329 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 330 char first = getChar(str, 0); 331 int max = (valueCount - strCount); 332 for (int i = fromIndex; i <= max; i++) { 333 // Look for first character. 334 if (getChar(value, i) != first) { 335 while (++i <= max && getChar(value, i) != first); 336 } 337 // Found first character, now look at the rest of value 338 if (i <= max) { 339 int j = i + 1; 340 int end = j + strCount - 1; 341 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++); 342 if (j == end) { 343 // Found whole string. 344 return i; 345 } 346 } 347 } 348 return -1; 349 } 350 351 /** 352 * Handles indexOf Latin1 substring in UTF16 string. 353 */ 354 @HotSpotIntrinsicCandidate 355 public static int indexOfLatin1(byte[] value, byte[] str) { 356 if (str.length == 0) { 357 return 0; 358 } 359 if (value.length == 0) { 360 return -1; 361 } 362 return indexOfLatin1(value, length(value), str, str.length, 0); 363 } 364 365 @HotSpotIntrinsicCandidate 366 public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 367 char first = (char)(tgt[0] & 0xff); 368 int max = (srcCount - tgtCount); 369 for (int i = fromIndex; i <= max; i++) { 370 // Look for first character. 371 if (getChar(src, i) != first) { 372 while (++i <= max && getChar(src, i) != first); 373 } 374 // Found first character, now look at the rest of v2 375 if (i <= max) { 376 int j = i + 1; 377 int end = j + tgtCount - 1; 378 for (int k = 1; 379 j < end && getChar(src, j) == (tgt[k] & 0xff); 380 j++, k++); 381 if (j == end) { 382 // Found whole string. 383 return i; 384 } 385 } 386 } 387 return -1; 388 } 389 390 @HotSpotIntrinsicCandidate 391 private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { 392 for (int i = fromIndex; i < max; i++) { 393 if (getChar(value, i) == ch) { 394 return i; 395 } 396 } 397 return -1; 398 } 399 400 /** 401 * Handles (rare) calls of indexOf with a supplementary character. 402 */ 403 private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) { 404 if (Character.isValidCodePoint(ch)) { 405 final char hi = Character.highSurrogate(ch); 406 final char lo = Character.lowSurrogate(ch); 407 for (int i = fromIndex; i < max - 1; i++) { 408 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) { 409 return i; 410 } 411 } 412 } 413 return -1; 414 } 415 416 public static int lastIndexOf(byte[] src, int srcCount, 417 byte[] tgt, int tgtCount, int fromIndex) { 418 int min = tgtCount - 1; 419 int i = min + fromIndex; 420 int strLastIndex = tgtCount - 1; 421 char strLastChar = getChar(tgt, strLastIndex); 422 423 startSearchForLastChar: 424 while (true) { 425 while (i >= min && getChar(src, i) != strLastChar) { 426 i--; 427 } 428 if (i < min) { 429 return -1; 430 } 431 int j = i - 1; 432 int start = j - strLastIndex; 433 int k = strLastIndex - 1; 434 while (j > start) { 435 if (getChar(src, j--) != getChar(tgt, k--)) { 436 i--; 437 continue startSearchForLastChar; 438 } 439 } 440 return start + 1; 441 } 442 } 443 444 public static int lastIndexOf(byte[] value, int ch, int fromIndex) { 445 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 446 // handle most cases here (ch is a BMP code point or a 447 // negative value (invalid code point)) 448 int i = Math.min(fromIndex, (value.length >> 1) - 1); 449 for (; i >= 0; i--) { 450 if (getChar(value, i) == ch) { 451 return i; 452 } 453 } 454 return -1; 455 } else { 456 return lastIndexOfSupplementary(value, ch, fromIndex); 457 } 458 } 459 460 /** 461 * Handles (rare) calls of lastIndexOf with a supplementary character. 462 */ 463 private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) { 464 if (Character.isValidCodePoint(ch)) { 465 char hi = Character.highSurrogate(ch); 466 char lo = Character.lowSurrogate(ch); 467 int i = Math.min(fromIndex, (value.length >> 1) - 2); 468 for (; i >= 0; i--) { 469 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) { 470 return i; 471 } 472 } 473 } 474 return -1; 475 } 476 477 public static String replace(byte[] value, char oldChar, char newChar) { 478 int len = value.length >> 1; 479 int i = -1; 480 while (++i < len) { 481 if (getChar(value, i) == oldChar) { 482 break; 483 } 484 } 485 if (i < len) { 486 byte buf[] = new byte[value.length]; 487 for (int j = 0; j < i; j++) { 488 putChar(buf, j, getChar(value, j)); // TBD:arraycopy? 489 } 490 while (i < len) { 491 char c = getChar(value, i); 492 putChar(buf, i, c == oldChar ? newChar : c); 493 i++; 494 } 495 // Check if we should try to compress to latin1 496 if (String.COMPACT_STRINGS && 497 !StringLatin1.canEncode(oldChar) && 498 StringLatin1.canEncode(newChar)) { 499 byte[] val = compress(buf, 0, len); 500 if (val != null) { 501 return new String(val, LATIN1); 502 } 503 } 504 return new String(buf, UTF16); 505 } 506 return null; 507 } 508 509 public static boolean regionMatchesCI(byte[] value, int toffset, 510 byte[] other, int ooffset, int len) { 511 int last = toffset + len; 512 while (toffset < last) { 513 char c1 = getChar(value, toffset++); 514 char c2 = getChar(other, ooffset++); 515 if (c1 == c2) { 516 continue; 517 } 518 // try converting both characters to uppercase. 519 // If the results match, then the comparison scan should 520 // continue. 521 char u1 = Character.toUpperCase(c1); 522 char u2 = Character.toUpperCase(c2); 523 if (u1 == u2) { 524 continue; 525 } 526 // Unfortunately, conversion to uppercase does not work properly 527 // for the Georgian alphabet, which has strange rules about case 528 // conversion. So we need to make one last check before 529 // exiting. 530 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 531 continue; 532 } 533 return false; 534 } 535 return true; 536 } 537 538 public static boolean regionMatchesCI_Latin1(byte[] value, int toffset, 539 byte[] other, int ooffset, 540 int len) { 541 return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len); 542 } 543 544 public static String toLowerCase(String str, byte[] value, Locale locale) { 545 if (locale == null) { 546 throw new NullPointerException(); 547 } 548 int first; 549 boolean hasSurr = false; 550 final int len = value.length >> 1; 551 552 // Now check if there are any characters that need to be changed, or are surrogate 553 for (first = 0 ; first < len; first++) { 554 int cp = (int)getChar(value, first); 555 if (Character.isSurrogate((char)cp)) { 556 hasSurr = true; 557 break; 558 } 559 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 560 break; 561 } 562 } 563 if (first == len) 564 return str; 565 byte[] result = new byte[value.length]; 566 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 567 // lowerCase characters. 568 String lang = locale.getLanguage(); 569 if (lang == "tr" || lang == "az" || lang == "lt") { 570 return toLowerCaseEx(str, value, result, first, locale, true); 571 } 572 if (hasSurr) { 573 return toLowerCaseEx(str, value, result, first, locale, false); 574 } 575 int bits = 0; 576 for (int i = first; i < len; i++) { 577 int cp = (int)getChar(value, i); 578 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 579 Character.isSurrogate((char)cp)) { 580 return toLowerCaseEx(str, value, result, i, locale, false); 581 } 582 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 583 return toLowerCaseEx(str, value, result, i, locale, true); 584 } 585 cp = Character.toLowerCase(cp); 586 if (!Character.isBmpCodePoint(cp)) { 587 return toLowerCaseEx(str, value, result, i, locale, false); 588 } 589 bits |= cp; 590 putChar(result, i, cp); 591 } 592 if (bits > 0xFF) { 593 return new String(result, UTF16); 594 } else { 595 return newString(result, 0, len); 596 } 597 } 598 599 private static String toLowerCaseEx(String str, byte[] value, 600 byte[] result, int first, Locale locale, 601 boolean localeDependent) { 602 int resultOffset = first; 603 int length = value.length >> 1; 604 int srcCount; 605 for (int i = first; i < length; i += srcCount) { 606 int srcChar = getChar(value, i); 607 int lowerChar; 608 char[] lowerCharArray; 609 srcCount = 1; 610 if (Character.isSurrogate((char)srcChar)) { 611 srcChar = codePointAt(value, i, length); 612 srcCount = Character.charCount(srcChar); 613 } 614 if (localeDependent || 615 srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 616 srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 617 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 618 } else { 619 lowerChar = Character.toLowerCase(srcChar); 620 } 621 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 622 putChar(result, resultOffset++, lowerChar); 623 } else { 624 if (lowerChar == Character.ERROR) { 625 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 626 } else { 627 lowerCharArray = Character.toChars(lowerChar); 628 } 629 /* Grow result if needed */ 630 int mapLen = lowerCharArray.length; 631 if (mapLen > srcCount) { 632 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 633 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 634 result = result2; 635 } 636 for (int x = 0; x < mapLen; ++x) { 637 putChar(result, resultOffset++, lowerCharArray[x]); 638 } 639 } 640 } 641 return newString(result, 0, resultOffset); 642 } 643 644 public static String toUpperCase(String str, byte[] value, Locale locale) { 645 if (locale == null) { 646 throw new NullPointerException(); 647 } 648 int first; 649 boolean hasSurr = false; 650 final int len = value.length >> 1; 651 652 // Now check if there are any characters that need to be changed, or are surrogate 653 for (first = 0 ; first < len; first++) { 654 int cp = (int)getChar(value, first); 655 if (Character.isSurrogate((char)cp)) { 656 hasSurr = true; 657 break; 658 } 659 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 660 break; 661 } 662 } 663 if (first == len) { 664 return str; 665 } 666 byte[] result = new byte[value.length]; 667 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 668 // upperCase characters. 669 String lang = locale.getLanguage(); 670 if (lang == "tr" || lang == "az" || lang == "lt") { 671 return toUpperCaseEx(str, value, result, first, locale, true); 672 } 673 if (hasSurr) { 674 return toUpperCaseEx(str, value, result, first, locale, false); 675 } 676 int bits = 0; 677 for (int i = first; i < len; i++) { 678 int cp = (int)getChar(value, i); 679 if (Character.isSurrogate((char)cp)) { 680 return toUpperCaseEx(str, value, result, i, locale, false); 681 } 682 cp = Character.toUpperCaseEx(cp); 683 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 684 return toUpperCaseEx(str, value, result, i, locale, false); 685 } 686 bits |= cp; 687 putChar(result, i, cp); 688 } 689 if (bits > 0xFF) { 690 return new String(result, UTF16); 691 } else { 692 return newString(result, 0, len); 693 } 694 } 695 696 private static String toUpperCaseEx(String str, byte[] value, 697 byte[] result, int first, 698 Locale locale, boolean localeDependent) 699 { 700 int resultOffset = first; 701 int length = value.length >> 1; 702 int srcCount; 703 for (int i = first; i < length; i += srcCount) { 704 int srcChar = getChar(value, i); 705 int upperChar; 706 char[] upperCharArray; 707 srcCount = 1; 708 if (Character.isSurrogate((char)srcChar)) { 709 srcChar = codePointAt(value, i, length); 710 srcCount = Character.charCount(srcChar); 711 } 712 if (localeDependent) { 713 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 714 } else { 715 upperChar = Character.toUpperCaseEx(srcChar); 716 } 717 if (Character.isBmpCodePoint(upperChar)) { 718 putChar(result, resultOffset++, upperChar); 719 } else { 720 if (upperChar == Character.ERROR) { 721 if (localeDependent) { 722 upperCharArray = 723 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 724 } else { 725 upperCharArray = Character.toUpperCaseCharArray(srcChar); 726 } 727 } else { 728 upperCharArray = Character.toChars(upperChar); 729 } 730 /* Grow result if needed */ 731 int mapLen = upperCharArray.length; 732 if (mapLen > srcCount) { 733 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 734 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 735 result = result2; 736 } 737 for (int x = 0; x < mapLen; ++x) { 738 putChar(result, resultOffset++, upperCharArray[x]); 739 } 740 } 741 } 742 return newString(result, 0, resultOffset); 743 } 744 745 public static String trim(byte[] value) { 746 int length = value.length >> 1; 747 int len = length; 748 int st = 0; 749 while (st < len && getChar(value, st) <= ' ') { 750 st++; 751 } 752 while (st < len && getChar(value, len - 1) <= ' ') { 753 len--; 754 } 755 return ((st > 0) || (len < length )) ? 756 new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) : 757 null; 758 } 759 760 public static void putChars(byte[] val, int index, char[] str, int off, int end) { 761 while (off < end) { 762 putChar(val, index++, str[off++]); 763 } 764 } 765 766 public static String newString(byte[] val, int index, int len) { 767 if (String.COMPACT_STRINGS) { 768 byte[] buf = compress(val, index, len); 769 if (buf != null) { 770 return new String(buf, LATIN1); 771 } 772 } 773 int last = index + len; 774 return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16); 775 } 776 777 public static void fillNull(byte[] val, int index, int end) { 778 Arrays.fill(val, index << 1, end << 1, (byte)0); 779 } 780 781 static class CharsSpliterator implements Spliterator.OfInt { 782 private final byte[] array; 910 i++; 911 cp = Character.toCodePoint(c1, c2); 912 } 913 } 914 action.accept(cp); 915 return i; 916 } 917 918 @Override 919 public long estimateSize() { return (long)(fence - index); } 920 921 @Override 922 public int characteristics() { 923 return cs; 924 } 925 } 926 927 //////////////////////////////////////////////////////////////// 928 929 public static void putCharSB(byte[] val, int index, int c) { 930 checkIndex(index, val.length >> 1); 931 putChar(val, index, c); 932 } 933 934 public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) { 935 checkOffset(index + end - off, val.length >> 1); 936 putChars(val, index, ca, off, end); 937 } 938 939 public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) { 940 checkOffset(index + end - off, val.length >> 1); 941 for (int i = off; i < end; i++) { 942 putChar(val, index++, s.charAt(i)); 943 } 944 } 945 946 public static int codePointAtSB(byte[] val, int index, int end) { 947 checkOffset(end, val.length >> 1); 948 return codePointAt(val, index, end); 949 } 950 951 public static int codePointBeforeSB(byte[] val, int index) { 952 checkOffset(index, val.length >> 1); 953 return codePointBefore(val, index); 954 } 955 956 public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) { 957 checkOffset(endIndex, val.length >> 1); 958 return codePointCount(val, beginIndex, endIndex); 959 } 960 961 //////////////////////////////////////////////////////////////// 962 963 private static native boolean isBigEndian(); 964 965 static final int HI_BYTE_SHIFT; 966 static final int LO_BYTE_SHIFT; 967 static { 968 if (isBigEndian()) { 969 HI_BYTE_SHIFT = 8; 970 LO_BYTE_SHIFT = 0; 971 } else { 972 HI_BYTE_SHIFT = 0; 973 LO_BYTE_SHIFT = 8; 974 } 975 } 976 977 static final int MAX_LENGTH = Integer.MAX_VALUE >> 1; 978 } | 1 /* 2 * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Spliterator; 31 import java.util.function.IntConsumer; 32 import jdk.internal.HotSpotIntrinsicCandidate; 33 import jdk.internal.vm.annotation.ForceInline; 34 import jdk.internal.vm.annotation.DontInline; 35 36 import static java.lang.String.UTF16; 37 import static java.lang.String.LATIN1; 38 //import static java.lang.String.checkIndex; 39 //import static java.lang.String.checkOffset; 40 //import static java.lang.String.checkBoundsOffCount; 41 42 final class StringUTF16 { 43 44 public static byte[] newBytesFor(int len) { 45 if (len < 0) { 46 throw new NegativeArraySizeException(); 47 } 48 if (len > MAX_LENGTH) { 49 throw new OutOfMemoryError("UTF16 String size is " + len + 50 ", should be less than " + MAX_LENGTH); 51 } 52 return new byte[len << 1]; 53 } 54 55 @HotSpotIntrinsicCandidate 56 // should be private but then javac generates an accessor method 57 // intrinsic performs no bounds checks 58 static void putCharUnsafe(byte[] val, int index, int c) { 59 index <<= 1; 60 val[index++] = (byte)(c >> HI_BYTE_SHIFT); 61 val[index] = (byte)(c >> LO_BYTE_SHIFT); 62 } 63 64 @HotSpotIntrinsicCandidate 65 // should be private but then javac generates an accessor method 66 // intrinsic performs no bounds checks 67 static char getCharUnsafe(byte[] val, int index) { 68 index <<= 1; 69 return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) | 70 ((val[index] & 0xff) << LO_BYTE_SHIFT)); 71 } 72 73 public static int length(byte[] value) { 74 return value.length >> 1; 75 } 76 77 private static int codePointAt(byte[] value, int index, int end, boolean checked) { 78 assert index < end; 79 if (checked) { 80 checkIndex(index, value); 81 } 82 char c1 = Trusted.getChar(value, index); 83 if (Character.isHighSurrogate(c1) && ++index < end) { 84 if (checked) { 85 checkIndex(index, value); 86 } 87 char c2 = Trusted.getChar(value, index); 88 if (Character.isLowSurrogate(c2)) { 89 return Character.toCodePoint(c1, c2); 90 } 91 } 92 return c1; 93 } 94 95 public static int codePointAt(byte[] value, int index, int end) { 96 return codePointAt(value, index, end, false /* unchecked */); 97 } 98 99 private static int codePointBefore(byte[] value, int index, boolean checked) { 100 --index; 101 if (checked) { 102 checkIndex(index, value); 103 } 104 char c2 = Trusted.getChar(value, index); 105 if (Character.isLowSurrogate(c2) && index > 0) { 106 --index; 107 if (checked) { 108 checkIndex(index, value); 109 } 110 char c1 = Trusted.getChar(value, index); 111 if (Character.isHighSurrogate(c1)) { 112 return Character.toCodePoint(c1, c2); 113 } 114 } 115 return c2; 116 } 117 118 public static int codePointBefore(byte[] value, int index) { 119 return codePointBefore(value, index, false /* unchecked */); 120 } 121 122 private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) { 123 assert beginIndex <= endIndex; 124 int count = endIndex - beginIndex; 125 int i = beginIndex; 126 if (checked && i < endIndex) { 127 checkBoundsBeginEnd(i, endIndex, value); 128 } 129 for (; i < endIndex - 1; ) { 130 if (Character.isHighSurrogate(Trusted.getChar(value, i++)) && 131 Character.isLowSurrogate(Trusted.getChar(value, i))) { 132 count--; 133 i++; 134 } 135 } 136 return count; 137 } 138 139 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 140 return codePointCount(value, beginIndex, endIndex, false /* unchecked */); 141 } 142 143 public static char[] toChars(byte[] value) { 144 char[] dst = new char[value.length >> 1]; 145 getChars(value, 0, dst.length, dst, 0); 146 return dst; 147 } 148 149 @HotSpotIntrinsicCandidate 150 public static byte[] toBytes(char[] value, int off, int len) { 151 byte[] val = newBytesFor(len); 152 for (int i = 0; i < len; i++) { 153 Trusted.putChar(val, i, value[off]); 154 off++; 155 } 156 return val; 157 } 158 159 public static byte[] compress(char[] val, int off, int len) { 160 byte[] ret = new byte[len]; 161 if (compress(val, off, ret, 0, len) == len) { 162 return ret; 163 } 164 return null; 165 } 166 167 public static byte[] compress(byte[] val, int off, int len) { 168 byte[] ret = new byte[len]; 169 if (compress(val, off, ret, 0, len) == len) { 170 return ret; 171 } 172 return null; 173 } 175 // compressedCopy char[] -> byte[] 176 @HotSpotIntrinsicCandidate 177 public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { 178 for (int i = 0; i < len; i++) { 179 char c = src[srcOff]; 180 if (c > 0xFF) { 181 len = 0; 182 break; 183 } 184 dst[dstOff] = (byte)c; 185 srcOff++; 186 dstOff++; 187 } 188 return len; 189 } 190 191 // compressedCopy byte[] -> byte[] 192 @HotSpotIntrinsicCandidate 193 public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 194 // We need a range check here because 'getChar' has no checks 195 checkBoundsOffCount(srcOff, len, src); 196 for (int i = 0; i < len; i++) { 197 char c = Trusted.getChar(src, srcOff); 198 if (c > 0xFF) { 199 len = 0; 200 break; 201 } 202 dst[dstOff] = (byte)c; 203 srcOff++; 204 dstOff++; 205 } 206 return len; 207 } 208 209 public static byte[] toBytes(int[] val, int index, int len) { 210 final int end = index + len; 211 // Pass 1: Compute precise size of char[] 212 int n = len; 213 for (int i = index; i < end; i++) { 214 int cp = val[i]; 215 if (Character.isBmpCodePoint(cp)) 216 continue; 217 else if (Character.isValidCodePoint(cp)) 218 n++; 219 else throw new IllegalArgumentException(Integer.toString(cp)); 220 } 221 // Pass 2: Allocate and fill in <high, low> pair 222 byte[] buf = newBytesFor(n); 223 for (int i = index, j = 0; i < end; i++, j++) { 224 int cp = val[i]; 225 if (Character.isBmpCodePoint(cp)) { 226 Trusted.putChar(buf, j, cp); 227 } else { 228 Trusted.putChar(buf, j++, Character.highSurrogate(cp)); 229 Trusted.putChar(buf, j, Character.lowSurrogate(cp)); 230 } 231 } 232 return buf; 233 } 234 235 public static byte[] toBytes(char c) { 236 byte[] result = new byte[2]; 237 Trusted.putChar(result, 0, c); 238 return result; 239 } 240 241 @HotSpotIntrinsicCandidate 242 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 243 // We need a range check here because 'getChar' has no checks 244 if (srcBegin < srcEnd) { 245 checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value); 246 } 247 for (int i = srcBegin; i < srcEnd; i++) { 248 dst[dstBegin++] = Trusted.getChar(value, i); 249 } 250 } 251 252 /* @see java.lang.String.getBytes(int, int, byte[], int) */ 253 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 254 srcBegin <<= 1; 255 srcEnd <<= 1; 256 for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) { 257 dst[dstBegin++] = value[i]; 258 } 259 } 260 261 @HotSpotIntrinsicCandidate 262 public static boolean equals(byte[] value, byte[] other) { 263 if (value.length == other.length) { 264 int len = value.length >> 1; 265 for (int i = 0; i < len; i++) { 266 if (Trusted.getChar(value, i) != Trusted.getChar(other, i)) { 267 return false; 268 } 269 } 270 return true; 271 } 272 return false; 273 } 274 275 @HotSpotIntrinsicCandidate 276 public static int compareTo(byte[] value, byte[] other) { 277 int len1 = length(value); 278 int len2 = length(other); 279 int lim = Math.min(len1, len2); 280 for (int k = 0; k < lim; k++) { 281 char c1 = Trusted.getChar(value, k); 282 char c2 = Trusted.getChar(other, k); 283 if (c1 != c2) { 284 return c1 - c2; 285 } 286 } 287 return len1 - len2; 288 } 289 290 @HotSpotIntrinsicCandidate 291 public static int compareToLatin1(byte[] value, byte[] other) { 292 return -StringLatin1.compareToUTF16(other, value); 293 } 294 295 public static int compareToCI(byte[] value, byte[] other) { 296 int len1 = length(value); 297 int len2 = length(other); 298 int lim = Math.min(len1, len2); 299 for (int k = 0; k < lim; k++) { 300 char c1 = Trusted.getChar(value, k); 301 char c2 = Trusted.getChar(other, k); 302 if (c1 != c2) { 303 c1 = Character.toUpperCase(c1); 304 c2 = Character.toUpperCase(c2); 305 if (c1 != c2) { 306 c1 = Character.toLowerCase(c1); 307 c2 = Character.toLowerCase(c2); 308 if (c1 != c2) { 309 return c1 - c2; 310 } 311 } 312 } 313 } 314 return len1 - len2; 315 } 316 317 public static int compareToCI_Latin1(byte[] value, byte[] other) { 318 return -StringLatin1.compareToCI_UTF16(other, value); 319 } 320 321 public static int hashCode(byte[] value) { 322 int h = 0; 323 int length = value.length >> 1; 324 for (int i = 0; i < length; i++) { 325 h = 31 * h + Trusted.getChar(value, i); 326 } 327 return h; 328 } 329 330 public static int indexOf(byte[] value, int ch, int fromIndex) { 331 int max = value.length >> 1; 332 if (fromIndex < 0) { 333 fromIndex = 0; 334 } else if (fromIndex >= max) { 335 // Note: fromIndex might be near -1>>>1. 336 return -1; 337 } 338 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 339 // handle most cases here (ch is a BMP code point or a 340 // negative value (invalid code point)) 341 return indexOfChar(value, ch, fromIndex, max); 342 } else { 343 return indexOfSupplementary(value, ch, fromIndex, max); 344 } 345 } 346 347 @HotSpotIntrinsicCandidate 348 public static int indexOf(byte[] value, byte[] str) { 349 if (str.length == 0) { 350 return 0; 351 } 352 if (value.length < str.length) { 353 return -1; 354 } 355 return indexOfUnsafe(value, length(value), str, length(str), 0); 356 } 357 358 @HotSpotIntrinsicCandidate 359 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 360 checkBoundsBeginEnd(fromIndex, valueCount, value); 361 checkBoundsBeginEnd(0, strCount, str); 362 return indexOfUnsafe(value, valueCount, str, strCount, fromIndex); 363 } 364 365 366 private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 367 assert fromIndex >= 0; 368 assert strCount > 0; 369 assert strCount <= length(str); 370 assert valueCount >= strCount; 371 char first = Trusted.getChar(str, 0); 372 int max = (valueCount - strCount); 373 for (int i = fromIndex; i <= max; i++) { 374 // Look for first character. 375 if (Trusted.getChar(value, i) != first) { 376 while (++i <= max && Trusted.getChar(value, i) != first); 377 } 378 // Found first character, now look at the rest of value 379 if (i <= max) { 380 int j = i + 1; 381 int end = j + strCount - 1; 382 for (int k = 1; j < end && Trusted.getChar(value, j) == Trusted.getChar(str, k); j++, k++); 383 if (j == end) { 384 // Found whole string. 385 return i; 386 } 387 } 388 } 389 return -1; 390 } 391 392 393 /** 394 * Handles indexOf Latin1 substring in UTF16 string. 395 */ 396 @HotSpotIntrinsicCandidate 397 public static int indexOfLatin1(byte[] value, byte[] str) { 398 if (str.length == 0) { 399 return 0; 400 } 401 if (length(value) < str.length) { 402 return -1; 403 } 404 return indexOfLatin1Unsafe(value, length(value), str, str.length, 0); 405 } 406 407 @HotSpotIntrinsicCandidate 408 public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 409 checkBoundsBeginEnd(fromIndex, srcCount, src); 410 String.checkBoundsBeginEnd(0, tgtCount, tgt.length); 411 return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex); 412 } 413 414 public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 415 assert fromIndex >= 0; 416 assert tgtCount > 0; 417 assert tgtCount <= tgt.length; 418 assert srcCount >= tgtCount; 419 char first = (char)(tgt[0] & 0xff); 420 int max = (srcCount - tgtCount); 421 for (int i = fromIndex; i <= max; i++) { 422 // Look for first character. 423 if (Trusted.getChar(src, i) != first) { 424 while (++i <= max && Trusted.getChar(src, i) != first); 425 } 426 // Found first character, now look at the rest of v2 427 if (i <= max) { 428 int j = i + 1; 429 int end = j + tgtCount - 1; 430 for (int k = 1; 431 j < end && Trusted.getChar(src, j) == (tgt[k] & 0xff); 432 j++, k++); 433 if (j == end) { 434 // Found whole string. 435 return i; 436 } 437 } 438 } 439 return -1; 440 } 441 442 @HotSpotIntrinsicCandidate 443 private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { 444 checkBoundsBeginEnd(fromIndex, max, value); 445 return indexOfCharUnsafe(value, ch, fromIndex, max); 446 } 447 448 private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) { 449 for (int i = fromIndex; i < max; i++) { 450 if (Trusted.getChar(value, i) == ch) { 451 return i; 452 } 453 } 454 return -1; 455 } 456 457 /** 458 * Handles (rare) calls of indexOf with a supplementary character. 459 */ 460 private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) { 461 if (Character.isValidCodePoint(ch)) { 462 final char hi = Character.highSurrogate(ch); 463 final char lo = Character.lowSurrogate(ch); 464 checkBoundsBeginEnd(fromIndex, max, value); 465 for (int i = fromIndex; i < max - 1; i++) { 466 if (Trusted.getChar(value, i) == hi && Trusted.getChar(value, i + 1 ) == lo) { 467 return i; 468 } 469 } 470 } 471 return -1; 472 } 473 474 // srcCoder == UTF16 && tgtCoder == UTF16 475 public static int lastIndexOf(byte[] src, int srcCount, 476 byte[] tgt, int tgtCount, int fromIndex) { 477 assert fromIndex >= 0; 478 assert tgtCount > 0; 479 assert tgtCount <= length(tgt); 480 int min = tgtCount - 1; 481 int i = min + fromIndex; 482 int strLastIndex = tgtCount - 1; 483 484 checkIndex(strLastIndex, tgt); 485 char strLastChar = Trusted.getChar(tgt, strLastIndex); 486 487 checkIndex(i, src); 488 489 startSearchForLastChar: 490 while (true) { 491 while (i >= min && Trusted.getChar(src, i) != strLastChar) { 492 i--; 493 } 494 if (i < min) { 495 return -1; 496 } 497 int j = i - 1; 498 int start = j - strLastIndex; 499 int k = strLastIndex - 1; 500 while (j > start) { 501 if (Trusted.getChar(src, j--) != Trusted.getChar(tgt, k--)) { 502 i--; 503 continue startSearchForLastChar; 504 } 505 } 506 return start + 1; 507 } 508 } 509 510 public static int lastIndexOf(byte[] value, int ch, int fromIndex) { 511 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 512 // handle most cases here (ch is a BMP code point or a 513 // negative value (invalid code point)) 514 int i = Math.min(fromIndex, (value.length >> 1) - 1); 515 for (; i >= 0; i--) { 516 if (Trusted.getChar(value, i) == ch) { 517 return i; 518 } 519 } 520 return -1; 521 } else { 522 return lastIndexOfSupplementary(value, ch, fromIndex); 523 } 524 } 525 526 /** 527 * Handles (rare) calls of lastIndexOf with a supplementary character. 528 */ 529 private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) { 530 if (Character.isValidCodePoint(ch)) { 531 char hi = Character.highSurrogate(ch); 532 char lo = Character.lowSurrogate(ch); 533 int i = Math.min(fromIndex, (value.length >> 1) - 2); 534 for (; i >= 0; i--) { 535 if (Trusted.getChar(value, i) == hi && Trusted.getChar(value, i + 1) == lo) { 536 return i; 537 } 538 } 539 } 540 return -1; 541 } 542 543 public static String replace(byte[] value, char oldChar, char newChar) { 544 int len = value.length >> 1; 545 int i = -1; 546 while (++i < len) { 547 if (Trusted.getChar(value, i) == oldChar) { 548 break; 549 } 550 } 551 if (i < len) { 552 byte buf[] = new byte[value.length]; 553 for (int j = 0; j < i; j++) { 554 Trusted.putChar(buf, j, Trusted.getChar(value, j)); // TBD:arraycopy? 555 } 556 while (i < len) { 557 char c = Trusted.getChar(value, i); 558 Trusted.putChar(buf, i, c == oldChar ? newChar : c); 559 i++; 560 } 561 // Check if we should try to compress to latin1 562 if (String.COMPACT_STRINGS && 563 !StringLatin1.canEncode(oldChar) && 564 StringLatin1.canEncode(newChar)) { 565 byte[] val = compress(buf, 0, len); 566 if (val != null) { 567 return new String(val, LATIN1); 568 } 569 } 570 return new String(buf, UTF16); 571 } 572 return null; 573 } 574 575 public static boolean regionMatchesCI(byte[] value, int toffset, 576 byte[] other, int ooffset, int len) { 577 int last = toffset + len; 578 assert toffset >= 0 && ooffset >= 0; 579 assert ooffset + len <= length(other); 580 assert last <= length(value); 581 while (toffset < last) { 582 char c1 = Trusted.getChar(value, toffset++); 583 char c2 = Trusted.getChar(other, ooffset++); 584 if (c1 == c2) { 585 continue; 586 } 587 // try converting both characters to uppercase. 588 // If the results match, then the comparison scan should 589 // continue. 590 char u1 = Character.toUpperCase(c1); 591 char u2 = Character.toUpperCase(c2); 592 if (u1 == u2) { 593 continue; 594 } 595 // Unfortunately, conversion to uppercase does not work properly 596 // for the Georgian alphabet, which has strange rules about case 597 // conversion. So we need to make one last check before 598 // exiting. 599 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 600 continue; 601 } 602 return false; 603 } 604 return true; 605 } 606 607 public static boolean regionMatchesCI_Latin1(byte[] value, int toffset, 608 byte[] other, int ooffset, 609 int len) { 610 return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len); 611 } 612 613 public static String toLowerCase(String str, byte[] value, Locale locale) { 614 if (locale == null) { 615 throw new NullPointerException(); 616 } 617 int first; 618 boolean hasSurr = false; 619 final int len = value.length >> 1; 620 621 // Now check if there are any characters that need to be changed, or are surrogate 622 for (first = 0 ; first < len; first++) { 623 int cp = (int)Trusted.getChar(value, first); 624 if (Character.isSurrogate((char)cp)) { 625 hasSurr = true; 626 break; 627 } 628 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 629 break; 630 } 631 } 632 if (first == len) 633 return str; 634 byte[] result = new byte[value.length]; 635 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 636 // lowerCase characters. 637 String lang = locale.getLanguage(); 638 if (lang == "tr" || lang == "az" || lang == "lt") { 639 return toLowerCaseEx(str, value, result, first, locale, true); 640 } 641 if (hasSurr) { 642 return toLowerCaseEx(str, value, result, first, locale, false); 643 } 644 int bits = 0; 645 for (int i = first; i < len; i++) { 646 int cp = (int)Trusted.getChar(value, i); 647 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 648 Character.isSurrogate((char)cp)) { 649 return toLowerCaseEx(str, value, result, i, locale, false); 650 } 651 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 652 return toLowerCaseEx(str, value, result, i, locale, true); 653 } 654 cp = Character.toLowerCase(cp); 655 if (!Character.isBmpCodePoint(cp)) { 656 return toLowerCaseEx(str, value, result, i, locale, false); 657 } 658 bits |= cp; 659 Trusted.putChar(result, i, cp); 660 } 661 if (bits > 0xFF) { 662 return new String(result, UTF16); 663 } else { 664 return newString(result, 0, len); 665 } 666 } 667 668 private static String toLowerCaseEx(String str, byte[] value, 669 byte[] result, int first, Locale locale, 670 boolean localeDependent) { 671 assert(result.length == value.length); 672 assert(first >= 0); 673 int resultOffset = first; 674 int length = value.length >> 1; 675 int srcCount; 676 for (int i = first; i < length; i += srcCount) { 677 int srcChar = Trusted.getChar(value, i); 678 int lowerChar; 679 char[] lowerCharArray; 680 srcCount = 1; 681 if (Character.isSurrogate((char)srcChar)) { 682 srcChar = codePointAt(value, i, length); 683 srcCount = Character.charCount(srcChar); 684 } 685 if (localeDependent || 686 srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 687 srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 688 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 689 } else { 690 lowerChar = Character.toLowerCase(srcChar); 691 } 692 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 693 Trusted.putChar(result, resultOffset++, lowerChar); 694 } else { 695 if (lowerChar == Character.ERROR) { 696 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 697 } else { 698 lowerCharArray = Character.toChars(lowerChar); 699 } 700 /* Grow result if needed */ 701 int mapLen = lowerCharArray.length; 702 if (mapLen > srcCount) { 703 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 704 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 705 result = result2; 706 } 707 assert resultOffset >= 0; 708 assert resultOffset + mapLen <= length(result); 709 for (int x = 0; x < mapLen; ++x) { 710 Trusted.putChar(result, resultOffset++, lowerCharArray[x]); 711 } 712 } 713 } 714 return newString(result, 0, resultOffset); 715 } 716 717 public static String toUpperCase(String str, byte[] value, Locale locale) { 718 if (locale == null) { 719 throw new NullPointerException(); 720 } 721 int first; 722 boolean hasSurr = false; 723 final int len = value.length >> 1; 724 725 // Now check if there are any characters that need to be changed, or are surrogate 726 for (first = 0 ; first < len; first++) { 727 int cp = (int)Trusted.getChar(value, first); 728 if (Character.isSurrogate((char)cp)) { 729 hasSurr = true; 730 break; 731 } 732 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 733 break; 734 } 735 } 736 if (first == len) { 737 return str; 738 } 739 byte[] result = new byte[value.length]; 740 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 741 // upperCase characters. 742 String lang = locale.getLanguage(); 743 if (lang == "tr" || lang == "az" || lang == "lt") { 744 return toUpperCaseEx(str, value, result, first, locale, true); 745 } 746 if (hasSurr) { 747 return toUpperCaseEx(str, value, result, first, locale, false); 748 } 749 int bits = 0; 750 for (int i = first; i < len; i++) { 751 int cp = (int)Trusted.getChar(value, i); 752 if (Character.isSurrogate((char)cp)) { 753 return toUpperCaseEx(str, value, result, i, locale, false); 754 } 755 cp = Character.toUpperCaseEx(cp); 756 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 757 return toUpperCaseEx(str, value, result, i, locale, false); 758 } 759 bits |= cp; 760 Trusted.putChar(result, i, cp); 761 } 762 if (bits > 0xFF) { 763 return new String(result, UTF16); 764 } else { 765 return newString(result, 0, len); 766 } 767 } 768 769 private static String toUpperCaseEx(String str, byte[] value, 770 byte[] result, int first, 771 Locale locale, boolean localeDependent) 772 { 773 assert(result.length == value.length); 774 assert(first >= 0); 775 int resultOffset = first; 776 int length = value.length >> 1; 777 int srcCount; 778 for (int i = first; i < length; i += srcCount) { 779 int srcChar = Trusted.getChar(value, i); 780 int upperChar; 781 char[] upperCharArray; 782 srcCount = 1; 783 if (Character.isSurrogate((char)srcChar)) { 784 srcChar = codePointAt(value, i, length); 785 srcCount = Character.charCount(srcChar); 786 } 787 if (localeDependent) { 788 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 789 } else { 790 upperChar = Character.toUpperCaseEx(srcChar); 791 } 792 if (Character.isBmpCodePoint(upperChar)) { 793 Trusted.putChar(result, resultOffset++, upperChar); 794 } else { 795 if (upperChar == Character.ERROR) { 796 if (localeDependent) { 797 upperCharArray = 798 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 799 } else { 800 upperCharArray = Character.toUpperCaseCharArray(srcChar); 801 } 802 } else { 803 upperCharArray = Character.toChars(upperChar); 804 } 805 /* Grow result if needed */ 806 int mapLen = upperCharArray.length; 807 if (mapLen > srcCount) { 808 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 809 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 810 result = result2; 811 } 812 assert resultOffset >= 0; 813 assert resultOffset + mapLen <= length(result); 814 for (int x = 0; x < mapLen; ++x) { 815 Trusted.putChar(result, resultOffset++, upperCharArray[x]); 816 } 817 } 818 } 819 return newString(result, 0, resultOffset); 820 } 821 822 public static String trim(byte[] value) { 823 int length = value.length >> 1; 824 int len = length; 825 int st = 0; 826 while (st < len && Trusted.getChar(value, st) <= ' ') { 827 st++; 828 } 829 while (st < len && Trusted.getChar(value, len - 1) <= ' ') { 830 len--; 831 } 832 return ((st > 0) || (len < length )) ? 833 new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) : 834 null; 835 } 836 837 private static void putChars(byte[] val, int index, char[] str, int off, int end) { 838 while (off < end) { 839 Trusted.putChar(val, index++, str[off++]); 840 } 841 } 842 843 public static String newString(byte[] val, int index, int len) { 844 if (String.COMPACT_STRINGS) { 845 byte[] buf = compress(val, index, len); 846 if (buf != null) { 847 return new String(buf, LATIN1); 848 } 849 } 850 int last = index + len; 851 return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16); 852 } 853 854 public static void fillNull(byte[] val, int index, int end) { 855 Arrays.fill(val, index << 1, end << 1, (byte)0); 856 } 857 858 static class CharsSpliterator implements Spliterator.OfInt { 859 private final byte[] array; 987 i++; 988 cp = Character.toCodePoint(c1, c2); 989 } 990 } 991 action.accept(cp); 992 return i; 993 } 994 995 @Override 996 public long estimateSize() { return (long)(fence - index); } 997 998 @Override 999 public int characteristics() { 1000 return cs; 1001 } 1002 } 1003 1004 //////////////////////////////////////////////////////////////// 1005 1006 public static void putCharSB(byte[] val, int index, int c) { 1007 checkIndex(index, val); 1008 Trusted.putChar(val, index, c); 1009 } 1010 1011 public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) { 1012 checkBoundsBeginEnd(index, index + end - off, val); 1013 putChars(val, index, ca, off, end); 1014 } 1015 1016 public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) { 1017 checkBoundsBeginEnd(index, index + end - off, val); 1018 for (int i = off; i < end; i++) { 1019 Trusted.putChar(val, index++, s.charAt(i)); 1020 } 1021 } 1022 1023 public static int codePointAtSB(byte[] val, int index, int end) { 1024 return codePointAt(val, index, end, true /* checked */); 1025 } 1026 1027 public static int codePointBeforeSB(byte[] val, int index) { 1028 return codePointBefore(val, index, true /* checked */); 1029 } 1030 1031 public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) { 1032 return codePointCount(val, beginIndex, endIndex, true /* checked */); 1033 } 1034 1035 public static int getChars(int i, int begin, int end, byte[] value) { 1036 checkBoundsBeginEnd(begin, end, value); 1037 int pos = Trusted.getChars(i, end, value); 1038 assert begin == pos; 1039 return pos; 1040 } 1041 1042 public static int getChars(long l, int begin, int end, byte[] value) { 1043 checkBoundsBeginEnd(begin, end, value); 1044 int pos = Trusted.getChars(l, end, value); 1045 assert begin == pos; 1046 return pos; 1047 } 1048 1049 public static boolean contentEquals(byte[] v1, byte[] v2, int len) { 1050 checkBoundsOffCount(0, len, v2); 1051 for (int i = 0; i < len; i++) { 1052 if ((char)(v1[i] & 0xff) != Trusted.getChar(v2, i)) { 1053 return false; 1054 } 1055 } 1056 return true; 1057 } 1058 1059 public static boolean contentEquals(byte[] value, CharSequence cs, int len) { 1060 checkOffset(len, value); 1061 for (int i = 0; i < len; i++) { 1062 if (Trusted.getChar(value, i) != cs.charAt(i)) { 1063 return false; 1064 } 1065 } 1066 return true; 1067 } 1068 1069 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) { 1070 int end = i + 4; 1071 checkBoundsBeginEnd(i, end, value); 1072 Trusted.putChar(value, i++, c1); 1073 Trusted.putChar(value, i++, c2); 1074 Trusted.putChar(value, i++, c3); 1075 Trusted.putChar(value, i++, c4); 1076 assert(i == end); 1077 return end; 1078 } 1079 1080 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) { 1081 int end = i + 5; 1082 checkBoundsBeginEnd(i, end, value); 1083 Trusted.putChar(value, i++, c1); 1084 Trusted.putChar(value, i++, c2); 1085 Trusted.putChar(value, i++, c3); 1086 Trusted.putChar(value, i++, c4); 1087 Trusted.putChar(value, i++, c5); 1088 assert(i == end); 1089 return end; 1090 } 1091 1092 public static char charAt(byte[] value, int index) { 1093 checkIndex(index, value); 1094 return Trusted.getChar(value, index); 1095 } 1096 1097 public static void reverse(byte[] val, int count) { 1098 checkOffset(count, val); 1099 int n = count - 1; 1100 boolean hasSurrogates = false; 1101 for (int j = (n-1) >> 1; j >= 0; j--) { 1102 int k = n - j; 1103 char cj = Trusted.getChar(val, j); 1104 char ck = Trusted.getChar(val, k); 1105 Trusted.putChar(val, j, ck); 1106 Trusted.putChar(val, k, cj); 1107 if (Character.isSurrogate(cj) || 1108 Character.isSurrogate(ck)) { 1109 hasSurrogates = true; 1110 } 1111 } 1112 if (hasSurrogates) { 1113 reverseAllValidSurrogatePairs(val, count); 1114 } 1115 } 1116 1117 /** Outlined helper method for reverse() */ 1118 private static void reverseAllValidSurrogatePairs(byte[] val, int count) { 1119 for (int i = 0; i < count - 1; i++) { 1120 char c2 = Trusted.getChar(val, i); 1121 if (Character.isLowSurrogate(c2)) { 1122 char c1 = Trusted.getChar(val, i + 1); 1123 if (Character.isHighSurrogate(c1)) { 1124 Trusted.putChar(val, i++, c1); 1125 Trusted.putChar(val, i, c2); 1126 } 1127 } 1128 } 1129 } 1130 1131 // inflatedCopy byte[] -> byte[] 1132 public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 1133 // We need a range check here because 'putChar' has no checks 1134 checkBoundsOffCount(dstOff, len, dst); 1135 for (int i = 0; i < len; i++) { 1136 Trusted.putChar(dst, dstOff++, src[srcOff++] & 0xff); 1137 } 1138 } 1139 1140 // srcCoder == UTF16 && tgtCoder == LATIN1 1141 public static int lastIndexOfLatin1(byte[] src, int srcCount, 1142 byte[] tgt, int tgtCount, int fromIndex) { 1143 assert fromIndex >= 0; 1144 assert tgtCount > 0; 1145 assert tgtCount <= tgt.length; 1146 int min = tgtCount - 1; 1147 int i = min + fromIndex; 1148 int strLastIndex = tgtCount - 1; 1149 1150 char strLastChar = (char)(tgt[strLastIndex] & 0xff); 1151 1152 checkIndex(i, src); 1153 1154 startSearchForLastChar: 1155 while (true) { 1156 while (i >= min && Trusted.getChar(src, i) != strLastChar) { 1157 i--; 1158 } 1159 if (i < min) { 1160 return -1; 1161 } 1162 int j = i - 1; 1163 int start = j - strLastIndex; 1164 int k = strLastIndex - 1; 1165 while (j > start) { 1166 if (Trusted.getChar(src, j--) != (tgt[k--] & 0xff)) { 1167 i--; 1168 continue startSearchForLastChar; 1169 } 1170 } 1171 return start + 1; 1172 } 1173 } 1174 1175 //////////////////////////////////////////////////////////////// 1176 1177 private static native boolean isBigEndian(); 1178 1179 static final int HI_BYTE_SHIFT; 1180 static final int LO_BYTE_SHIFT; 1181 static { 1182 if (isBigEndian()) { 1183 HI_BYTE_SHIFT = 8; 1184 LO_BYTE_SHIFT = 0; 1185 } else { 1186 HI_BYTE_SHIFT = 0; 1187 LO_BYTE_SHIFT = 8; 1188 } 1189 } 1190 1191 static final int MAX_LENGTH = Integer.MAX_VALUE >> 1; 1192 1193 @DontInline 1194 static char throwAssertionError(Throwable e) { 1195 throw new AssertionError("Trusted caller missed bounds check", e); 1196 } 1197 1198 @DontInline 1199 static AssertionError assertionError(Throwable e) { 1200 return new AssertionError("Trusted caller missed bounds check", e); 1201 } 1202 1203 // Used by trusted callers. Assumes all necessary bounds checks have 1204 // been done by the caller. 1205 static class Trusted { 1206 1207 @ForceInline 1208 static char getChar(byte[] val, int index) { 1209 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 1210 if (!String.DEBUG_INTRINSICS) { 1211 return getCharUnsafe(val, index); 1212 } 1213 try { 1214 return getCharUnsafe(val, index); 1215 } catch (IndexOutOfBoundsException e) { 1216 return throwAssertionError(e); 1217 } 1218 } 1219 1220 @ForceInline 1221 static void putChar(byte[] val, int index, int c) { 1222 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 1223 if (!String.DEBUG_INTRINSICS) { 1224 putCharUnsafe(val, index, c); 1225 return; 1226 } 1227 try { 1228 putCharUnsafe(val, index, c); 1229 } catch (IndexOutOfBoundsException e) { 1230 throwAssertionError(e); 1231 } 1232 } 1233 1234 /** 1235 * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for 1236 * UTF-16 coder. 1237 * 1238 * @param i value to convert 1239 * @param index next index, after the least significant digit 1240 * @param buf target buffer, UTF16-coded. 1241 * @return index of the most significant digit or minus sign, if present 1242 */ 1243 static int getChars(int i, int index, byte[] buf) { 1244 int q, r; 1245 int charPos = index; 1246 1247 boolean negative = (i < 0); 1248 if (!negative) { 1249 i = -i; 1250 } 1251 1252 // Get 2 digits/iteration using ints 1253 while (i <= -100) { 1254 q = i / 100; 1255 r = (q * 100) - i; 1256 i = q; 1257 Trusted.putChar(buf, --charPos, Integer.DigitOnes[r]); 1258 Trusted.putChar(buf, --charPos, Integer.DigitTens[r]); 1259 } 1260 1261 // We know there are at most two digits left at this point. 1262 q = i / 10; 1263 r = (q * 10) - i; 1264 Trusted.putChar(buf, --charPos, '0' + r); 1265 1266 // Whatever left is the remaining digit. 1267 if (q < 0) { 1268 Trusted.putChar(buf, --charPos, '0' - q); 1269 } 1270 1271 if (negative) { 1272 Trusted.putChar(buf, --charPos, '-'); 1273 } 1274 return charPos; 1275 } 1276 1277 /** 1278 * This is a variant of {@link Long#getChars(long, int, byte[])}, but for 1279 * UTF-16 coder. 1280 * 1281 * @param i value to convert 1282 * @param index next index, after the least significant digit 1283 * @param buf target buffer, UTF16-coded. 1284 * @return index of the most significant digit or minus sign, if present 1285 */ 1286 static int getChars(long i, int index, byte[] buf) { 1287 long q; 1288 int r; 1289 int charPos = index; 1290 1291 boolean negative = (i < 0); 1292 if (!negative) { 1293 i = -i; 1294 } 1295 1296 // Get 2 digits/iteration using longs until quotient fits into an int 1297 while (i <= Integer.MIN_VALUE) { 1298 q = i / 100; 1299 r = (int)((q * 100) - i); 1300 i = q; 1301 Trusted.putChar(buf, --charPos, Integer.DigitOnes[r]); 1302 Trusted.putChar(buf, --charPos, Integer.DigitTens[r]); 1303 } 1304 1305 // Get 2 digits/iteration using ints 1306 int q2; 1307 int i2 = (int)i; 1308 while (i2 <= -100) { 1309 q2 = i2 / 100; 1310 r = (q2 * 100) - i2; 1311 i2 = q2; 1312 Trusted.putChar(buf, --charPos, Integer.DigitOnes[r]); 1313 Trusted.putChar(buf, --charPos, Integer.DigitTens[r]); 1314 } 1315 1316 // We know there are at most two digits left at this point. 1317 q2 = i2 / 10; 1318 r = (q2 * 10) - i2; 1319 Trusted.putChar(buf, --charPos, '0' + r); 1320 1321 // Whatever left is the remaining digit. 1322 if (q2 < 0) { 1323 Trusted.putChar(buf, --charPos, '0' - q2); 1324 } 1325 1326 if (negative) { 1327 Trusted.putChar(buf, --charPos, '-'); 1328 } 1329 return charPos; 1330 } 1331 1332 } 1333 1334 public static void checkIndex(int off, byte[] val) { 1335 String.checkIndex(off, length(val)); 1336 } 1337 1338 public static void checkOffset(int off, byte[] val) { 1339 String.checkOffset(off, length(val)); 1340 } 1341 1342 public static void checkBoundsBeginEnd(int begin, int end, byte[] val) { 1343 String.checkBoundsBeginEnd(begin, end, length(val)); 1344 } 1345 1346 public static void checkBoundsOffCount(int offset, int count, byte[] val) { 1347 String.checkBoundsOffCount(offset, count, length(val)); 1348 } 1349 1350 } |