1 /* 2 * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Spliterator; 31 import java.util.function.IntConsumer; 32 import jdk.internal.HotSpotIntrinsicCandidate; 33 import jdk.internal.vm.annotation.ForceInline; 34 import jdk.internal.vm.annotation.DontInline; 35 36 import static java.lang.String.UTF16; 37 import static java.lang.String.LATIN1; 38 39 final class StringUTF16 { 40 41 public static byte[] newBytesFor(int len) { 42 if (len < 0) { 43 throw new NegativeArraySizeException(); 44 } 45 if (len > MAX_LENGTH) { 46 throw new OutOfMemoryError("UTF16 String size is " + len + 47 ", should be less than " + MAX_LENGTH); 48 } 49 return new byte[len << 1]; 50 } 51 52 @HotSpotIntrinsicCandidate 53 // intrinsic performs no bounds checks 54 static void putChar(byte[] val, int index, int c) { 55 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 56 index <<= 1; 57 val[index++] = (byte)(c >> HI_BYTE_SHIFT); 58 val[index] = (byte)(c >> LO_BYTE_SHIFT); 59 } 60 61 @HotSpotIntrinsicCandidate 62 // intrinsic performs no bounds checks 63 static char getChar(byte[] val, int index) { 64 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 65 index <<= 1; 66 return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) | 67 ((val[index] & 0xff) << LO_BYTE_SHIFT)); 68 } 69 70 public static int length(byte[] value) { 71 return value.length >> 1; 72 } 73 74 private static int codePointAt(byte[] value, int index, int end, boolean checked) { 75 assert index < end; 76 if (checked) { 77 checkIndex(index, value); 78 } 79 char c1 = getChar(value, index); 80 if (Character.isHighSurrogate(c1) && ++index < end) { 81 if (checked) { 82 checkIndex(index, value); 83 } 84 char c2 = getChar(value, index); 85 if (Character.isLowSurrogate(c2)) { 86 return Character.toCodePoint(c1, c2); 87 } 88 } 89 return c1; 90 } 91 92 public static int codePointAt(byte[] value, int index, int end) { 93 return codePointAt(value, index, end, false /* unchecked */); 94 } 95 96 private static int codePointBefore(byte[] value, int index, boolean checked) { 97 --index; 98 if (checked) { 99 checkIndex(index, value); 100 } 101 char c2 = getChar(value, index); 102 if (Character.isLowSurrogate(c2) && index > 0) { 103 --index; 104 if (checked) { 105 checkIndex(index, value); 106 } 107 char c1 = getChar(value, index); 108 if (Character.isHighSurrogate(c1)) { 109 return Character.toCodePoint(c1, c2); 110 } 111 } 112 return c2; 113 } 114 115 public static int codePointBefore(byte[] value, int index) { 116 return codePointBefore(value, index, false /* unchecked */); 117 } 118 119 private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) { 120 assert beginIndex <= endIndex; 121 int count = endIndex - beginIndex; 122 int i = beginIndex; 123 if (checked && i < endIndex) { 124 checkBoundsBeginEnd(i, endIndex, value); 125 } 126 for (; i < endIndex - 1; ) { 127 if (Character.isHighSurrogate(getChar(value, i++)) && 128 Character.isLowSurrogate(getChar(value, i))) { 129 count--; 130 i++; 131 } 132 } 133 return count; 134 } 135 136 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 137 return codePointCount(value, beginIndex, endIndex, false /* unchecked */); 138 } 139 140 public static char[] toChars(byte[] value) { 141 char[] dst = new char[value.length >> 1]; 142 getChars(value, 0, dst.length, dst, 0); 143 return dst; 144 } 145 146 @HotSpotIntrinsicCandidate 147 public static byte[] toBytes(char[] value, int off, int len) { 148 byte[] val = newBytesFor(len); 149 for (int i = 0; i < len; i++) { 150 putChar(val, i, value[off]); 151 off++; 152 } 153 return val; 154 } 155 156 public static byte[] compress(char[] val, int off, int len) { 157 byte[] ret = new byte[len]; 158 if (compress(val, off, ret, 0, len) == len) { 159 return ret; 160 } 161 return null; 162 } 163 164 public static byte[] compress(byte[] val, int off, int len) { 165 byte[] ret = new byte[len]; 166 if (compress(val, off, ret, 0, len) == len) { 167 return ret; 168 } 169 return null; 170 } 171 172 // compressedCopy char[] -> byte[] 173 @HotSpotIntrinsicCandidate 174 public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { 175 for (int i = 0; i < len; i++) { 176 char c = src[srcOff]; 177 if (c > 0xFF) { 178 len = 0; 179 break; 180 } 181 dst[dstOff] = (byte)c; 182 srcOff++; 183 dstOff++; 184 } 185 return len; 186 } 187 188 // compressedCopy byte[] -> byte[] 189 @HotSpotIntrinsicCandidate 190 public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 191 // We need a range check here because 'getChar' has no checks 192 checkBoundsOffCount(srcOff, len, src); 193 for (int i = 0; i < len; i++) { 194 char c = getChar(src, srcOff); 195 if (c > 0xFF) { 196 len = 0; 197 break; 198 } 199 dst[dstOff] = (byte)c; 200 srcOff++; 201 dstOff++; 202 } 203 return len; 204 } 205 206 public static byte[] toBytes(int[] val, int index, int len) { 207 final int end = index + len; 208 // Pass 1: Compute precise size of char[] 209 int n = len; 210 for (int i = index; i < end; i++) { 211 int cp = val[i]; 212 if (Character.isBmpCodePoint(cp)) 213 continue; 214 else if (Character.isValidCodePoint(cp)) 215 n++; 216 else throw new IllegalArgumentException(Integer.toString(cp)); 217 } 218 // Pass 2: Allocate and fill in <high, low> pair 219 byte[] buf = newBytesFor(n); 220 for (int i = index, j = 0; i < end; i++, j++) { 221 int cp = val[i]; 222 if (Character.isBmpCodePoint(cp)) { 223 putChar(buf, j, cp); 224 } else { 225 putChar(buf, j++, Character.highSurrogate(cp)); 226 putChar(buf, j, Character.lowSurrogate(cp)); 227 } 228 } 229 return buf; 230 } 231 232 public static byte[] toBytes(char c) { 233 byte[] result = new byte[2]; 234 putChar(result, 0, c); 235 return result; 236 } 237 238 @HotSpotIntrinsicCandidate 239 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 240 // We need a range check here because 'getChar' has no checks 241 if (srcBegin < srcEnd) { 242 checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value); 243 } 244 for (int i = srcBegin; i < srcEnd; i++) { 245 dst[dstBegin++] = getChar(value, i); 246 } 247 } 248 249 /* @see java.lang.String.getBytes(int, int, byte[], int) */ 250 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 251 srcBegin <<= 1; 252 srcEnd <<= 1; 253 for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) { 254 dst[dstBegin++] = value[i]; 255 } 256 } 257 258 @HotSpotIntrinsicCandidate 259 public static boolean equals(byte[] value, byte[] other) { 260 if (value.length == other.length) { 261 int len = value.length >> 1; 262 for (int i = 0; i < len; i++) { 263 if (getChar(value, i) != getChar(other, i)) { 264 return false; 265 } 266 } 267 return true; 268 } 269 return false; 270 } 271 272 @HotSpotIntrinsicCandidate 273 public static int compareTo(byte[] value, byte[] other) { 274 int len1 = length(value); 275 int len2 = length(other); 276 return compareValues(value, other, len1, len2); 277 } 278 279 /* 280 * Checks the boundary and then compares the byte arrays. 281 */ 282 public static int compareTo(byte[] value, byte[] other, int len1, int len2) { 283 checkOffset(len1, value); 284 checkOffset(len2, other); 285 286 return compareValues(value, other, len1, len2); 287 } 288 289 private static int compareValues(byte[] value, byte[] other, int len1, int len2) { 290 int lim = Math.min(len1, len2); 291 for (int k = 0; k < lim; k++) { 292 char c1 = getChar(value, k); 293 char c2 = getChar(other, k); 294 if (c1 != c2) { 295 return c1 - c2; 296 } 297 } 298 return len1 - len2; 299 } 300 301 @HotSpotIntrinsicCandidate 302 public static int compareToLatin1(byte[] value, byte[] other) { 303 return -StringLatin1.compareToUTF16(other, value); 304 } 305 306 public static int compareToLatin1(byte[] value, byte[] other, int len1, int len2) { 307 return -StringLatin1.compareToUTF16(other, value, len2, len1); 308 } 309 310 public static int compareToCI(byte[] value, byte[] other) { 311 int len1 = length(value); 312 int len2 = length(other); 313 int lim = Math.min(len1, len2); 314 for (int k = 0; k < lim; k++) { 315 char c1 = getChar(value, k); 316 char c2 = getChar(other, k); 317 if (c1 != c2) { 318 c1 = Character.toUpperCase(c1); 319 c2 = Character.toUpperCase(c2); 320 if (c1 != c2) { 321 c1 = Character.toLowerCase(c1); 322 c2 = Character.toLowerCase(c2); 323 if (c1 != c2) { 324 return c1 - c2; 325 } 326 } 327 } 328 } 329 return len1 - len2; 330 } 331 332 public static int compareToCI_Latin1(byte[] value, byte[] other) { 333 return -StringLatin1.compareToCI_UTF16(other, value); 334 } 335 336 public static int hashCode(byte[] value) { 337 int h = 0; 338 int length = value.length >> 1; 339 for (int i = 0; i < length; i++) { 340 h = 31 * h + getChar(value, i); 341 } 342 return h; 343 } 344 345 public static int indexOf(byte[] value, int ch, int fromIndex) { 346 int max = value.length >> 1; 347 if (fromIndex < 0) { 348 fromIndex = 0; 349 } else if (fromIndex >= max) { 350 // Note: fromIndex might be near -1>>>1. 351 return -1; 352 } 353 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 354 // handle most cases here (ch is a BMP code point or a 355 // negative value (invalid code point)) 356 return indexOfChar(value, ch, fromIndex, max); 357 } else { 358 return indexOfSupplementary(value, ch, fromIndex, max); 359 } 360 } 361 362 @HotSpotIntrinsicCandidate 363 public static int indexOf(byte[] value, byte[] str) { 364 if (str.length == 0) { 365 return 0; 366 } 367 if (value.length < str.length) { 368 return -1; 369 } 370 return indexOfUnsafe(value, length(value), str, length(str), 0); 371 } 372 373 @HotSpotIntrinsicCandidate 374 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 375 checkBoundsBeginEnd(fromIndex, valueCount, value); 376 checkBoundsBeginEnd(0, strCount, str); 377 return indexOfUnsafe(value, valueCount, str, strCount, fromIndex); 378 } 379 380 381 private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 382 assert fromIndex >= 0; 383 assert strCount > 0; 384 assert strCount <= length(str); 385 assert valueCount >= strCount; 386 char first = getChar(str, 0); 387 int max = (valueCount - strCount); 388 for (int i = fromIndex; i <= max; i++) { 389 // Look for first character. 390 if (getChar(value, i) != first) { 391 while (++i <= max && getChar(value, i) != first); 392 } 393 // Found first character, now look at the rest of value 394 if (i <= max) { 395 int j = i + 1; 396 int end = j + strCount - 1; 397 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++); 398 if (j == end) { 399 // Found whole string. 400 return i; 401 } 402 } 403 } 404 return -1; 405 } 406 407 408 /** 409 * Handles indexOf Latin1 substring in UTF16 string. 410 */ 411 @HotSpotIntrinsicCandidate 412 public static int indexOfLatin1(byte[] value, byte[] str) { 413 if (str.length == 0) { 414 return 0; 415 } 416 if (length(value) < str.length) { 417 return -1; 418 } 419 return indexOfLatin1Unsafe(value, length(value), str, str.length, 0); 420 } 421 422 @HotSpotIntrinsicCandidate 423 public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 424 checkBoundsBeginEnd(fromIndex, srcCount, src); 425 String.checkBoundsBeginEnd(0, tgtCount, tgt.length); 426 return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex); 427 } 428 429 public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 430 assert fromIndex >= 0; 431 assert tgtCount > 0; 432 assert tgtCount <= tgt.length; 433 assert srcCount >= tgtCount; 434 char first = (char)(tgt[0] & 0xff); 435 int max = (srcCount - tgtCount); 436 for (int i = fromIndex; i <= max; i++) { 437 // Look for first character. 438 if (getChar(src, i) != first) { 439 while (++i <= max && getChar(src, i) != first); 440 } 441 // Found first character, now look at the rest of v2 442 if (i <= max) { 443 int j = i + 1; 444 int end = j + tgtCount - 1; 445 for (int k = 1; 446 j < end && getChar(src, j) == (tgt[k] & 0xff); 447 j++, k++); 448 if (j == end) { 449 // Found whole string. 450 return i; 451 } 452 } 453 } 454 return -1; 455 } 456 457 @HotSpotIntrinsicCandidate 458 private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { 459 checkBoundsBeginEnd(fromIndex, max, value); 460 return indexOfCharUnsafe(value, ch, fromIndex, max); 461 } 462 463 private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) { 464 for (int i = fromIndex; i < max; i++) { 465 if (getChar(value, i) == ch) { 466 return i; 467 } 468 } 469 return -1; 470 } 471 472 /** 473 * Handles (rare) calls of indexOf with a supplementary character. 474 */ 475 private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) { 476 if (Character.isValidCodePoint(ch)) { 477 final char hi = Character.highSurrogate(ch); 478 final char lo = Character.lowSurrogate(ch); 479 checkBoundsBeginEnd(fromIndex, max, value); 480 for (int i = fromIndex; i < max - 1; i++) { 481 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) { 482 return i; 483 } 484 } 485 } 486 return -1; 487 } 488 489 // srcCoder == UTF16 && tgtCoder == UTF16 490 public static int lastIndexOf(byte[] src, int srcCount, 491 byte[] tgt, int tgtCount, int fromIndex) { 492 assert fromIndex >= 0; 493 assert tgtCount > 0; 494 assert tgtCount <= length(tgt); 495 int min = tgtCount - 1; 496 int i = min + fromIndex; 497 int strLastIndex = tgtCount - 1; 498 499 checkIndex(strLastIndex, tgt); 500 char strLastChar = getChar(tgt, strLastIndex); 501 502 checkIndex(i, src); 503 504 startSearchForLastChar: 505 while (true) { 506 while (i >= min && getChar(src, i) != strLastChar) { 507 i--; 508 } 509 if (i < min) { 510 return -1; 511 } 512 int j = i - 1; 513 int start = j - strLastIndex; 514 int k = strLastIndex - 1; 515 while (j > start) { 516 if (getChar(src, j--) != getChar(tgt, k--)) { 517 i--; 518 continue startSearchForLastChar; 519 } 520 } 521 return start + 1; 522 } 523 } 524 525 public static int lastIndexOf(byte[] value, int ch, int fromIndex) { 526 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 527 // handle most cases here (ch is a BMP code point or a 528 // negative value (invalid code point)) 529 int i = Math.min(fromIndex, (value.length >> 1) - 1); 530 for (; i >= 0; i--) { 531 if (getChar(value, i) == ch) { 532 return i; 533 } 534 } 535 return -1; 536 } else { 537 return lastIndexOfSupplementary(value, ch, fromIndex); 538 } 539 } 540 541 /** 542 * Handles (rare) calls of lastIndexOf with a supplementary character. 543 */ 544 private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) { 545 if (Character.isValidCodePoint(ch)) { 546 char hi = Character.highSurrogate(ch); 547 char lo = Character.lowSurrogate(ch); 548 int i = Math.min(fromIndex, (value.length >> 1) - 2); 549 for (; i >= 0; i--) { 550 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) { 551 return i; 552 } 553 } 554 } 555 return -1; 556 } 557 558 public static String replace(byte[] value, char oldChar, char newChar) { 559 int len = value.length >> 1; 560 int i = -1; 561 while (++i < len) { 562 if (getChar(value, i) == oldChar) { 563 break; 564 } 565 } 566 if (i < len) { 567 byte buf[] = new byte[value.length]; 568 for (int j = 0; j < i; j++) { 569 putChar(buf, j, getChar(value, j)); // TBD:arraycopy? 570 } 571 while (i < len) { 572 char c = getChar(value, i); 573 putChar(buf, i, c == oldChar ? newChar : c); 574 i++; 575 } 576 // Check if we should try to compress to latin1 577 if (String.COMPACT_STRINGS && 578 !StringLatin1.canEncode(oldChar) && 579 StringLatin1.canEncode(newChar)) { 580 byte[] val = compress(buf, 0, len); 581 if (val != null) { 582 return new String(val, LATIN1); 583 } 584 } 585 return new String(buf, UTF16); 586 } 587 return null; 588 } 589 590 public static boolean regionMatchesCI(byte[] value, int toffset, 591 byte[] other, int ooffset, int len) { 592 int last = toffset + len; 593 assert toffset >= 0 && ooffset >= 0; 594 assert ooffset + len <= length(other); 595 assert last <= length(value); 596 while (toffset < last) { 597 char c1 = getChar(value, toffset++); 598 char c2 = getChar(other, ooffset++); 599 if (c1 == c2) { 600 continue; 601 } 602 // try converting both characters to uppercase. 603 // If the results match, then the comparison scan should 604 // continue. 605 char u1 = Character.toUpperCase(c1); 606 char u2 = Character.toUpperCase(c2); 607 if (u1 == u2) { 608 continue; 609 } 610 // Unfortunately, conversion to uppercase does not work properly 611 // for the Georgian alphabet, which has strange rules about case 612 // conversion. So we need to make one last check before 613 // exiting. 614 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 615 continue; 616 } 617 return false; 618 } 619 return true; 620 } 621 622 public static boolean regionMatchesCI_Latin1(byte[] value, int toffset, 623 byte[] other, int ooffset, 624 int len) { 625 return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len); 626 } 627 628 public static String toLowerCase(String str, byte[] value, Locale locale) { 629 if (locale == null) { 630 throw new NullPointerException(); 631 } 632 int first; 633 boolean hasSurr = false; 634 final int len = value.length >> 1; 635 636 // Now check if there are any characters that need to be changed, or are surrogate 637 for (first = 0 ; first < len; first++) { 638 int cp = (int)getChar(value, first); 639 if (Character.isSurrogate((char)cp)) { 640 hasSurr = true; 641 break; 642 } 643 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 644 break; 645 } 646 } 647 if (first == len) 648 return str; 649 byte[] result = new byte[value.length]; 650 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 651 // lowerCase characters. 652 String lang = locale.getLanguage(); 653 if (lang == "tr" || lang == "az" || lang == "lt") { 654 return toLowerCaseEx(str, value, result, first, locale, true); 655 } 656 if (hasSurr) { 657 return toLowerCaseEx(str, value, result, first, locale, false); 658 } 659 int bits = 0; 660 for (int i = first; i < len; i++) { 661 int cp = (int)getChar(value, i); 662 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 663 Character.isSurrogate((char)cp)) { 664 return toLowerCaseEx(str, value, result, i, locale, false); 665 } 666 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 667 return toLowerCaseEx(str, value, result, i, locale, true); 668 } 669 cp = Character.toLowerCase(cp); 670 if (!Character.isBmpCodePoint(cp)) { 671 return toLowerCaseEx(str, value, result, i, locale, false); 672 } 673 bits |= cp; 674 putChar(result, i, cp); 675 } 676 if (bits > 0xFF) { 677 return new String(result, UTF16); 678 } else { 679 return newString(result, 0, len); 680 } 681 } 682 683 private static String toLowerCaseEx(String str, byte[] value, 684 byte[] result, int first, Locale locale, 685 boolean localeDependent) { 686 assert(result.length == value.length); 687 assert(first >= 0); 688 int resultOffset = first; 689 int length = value.length >> 1; 690 int srcCount; 691 for (int i = first; i < length; i += srcCount) { 692 int srcChar = getChar(value, i); 693 int lowerChar; 694 char[] lowerCharArray; 695 srcCount = 1; 696 if (Character.isSurrogate((char)srcChar)) { 697 srcChar = codePointAt(value, i, length); 698 srcCount = Character.charCount(srcChar); 699 } 700 if (localeDependent || 701 srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 702 srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 703 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 704 } else { 705 lowerChar = Character.toLowerCase(srcChar); 706 } 707 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 708 putChar(result, resultOffset++, lowerChar); 709 } else { 710 if (lowerChar == Character.ERROR) { 711 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 712 } else { 713 lowerCharArray = Character.toChars(lowerChar); 714 } 715 /* Grow result if needed */ 716 int mapLen = lowerCharArray.length; 717 if (mapLen > srcCount) { 718 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 719 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 720 result = result2; 721 } 722 assert resultOffset >= 0; 723 assert resultOffset + mapLen <= length(result); 724 for (int x = 0; x < mapLen; ++x) { 725 putChar(result, resultOffset++, lowerCharArray[x]); 726 } 727 } 728 } 729 return newString(result, 0, resultOffset); 730 } 731 732 public static String toUpperCase(String str, byte[] value, Locale locale) { 733 if (locale == null) { 734 throw new NullPointerException(); 735 } 736 int first; 737 boolean hasSurr = false; 738 final int len = value.length >> 1; 739 740 // Now check if there are any characters that need to be changed, or are surrogate 741 for (first = 0 ; first < len; first++) { 742 int cp = (int)getChar(value, first); 743 if (Character.isSurrogate((char)cp)) { 744 hasSurr = true; 745 break; 746 } 747 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 748 break; 749 } 750 } 751 if (first == len) { 752 return str; 753 } 754 byte[] result = new byte[value.length]; 755 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 756 // upperCase characters. 757 String lang = locale.getLanguage(); 758 if (lang == "tr" || lang == "az" || lang == "lt") { 759 return toUpperCaseEx(str, value, result, first, locale, true); 760 } 761 if (hasSurr) { 762 return toUpperCaseEx(str, value, result, first, locale, false); 763 } 764 int bits = 0; 765 for (int i = first; i < len; i++) { 766 int cp = (int)getChar(value, i); 767 if (Character.isSurrogate((char)cp)) { 768 return toUpperCaseEx(str, value, result, i, locale, false); 769 } 770 cp = Character.toUpperCaseEx(cp); 771 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 772 return toUpperCaseEx(str, value, result, i, locale, false); 773 } 774 bits |= cp; 775 putChar(result, i, cp); 776 } 777 if (bits > 0xFF) { 778 return new String(result, UTF16); 779 } else { 780 return newString(result, 0, len); 781 } 782 } 783 784 private static String toUpperCaseEx(String str, byte[] value, 785 byte[] result, int first, 786 Locale locale, boolean localeDependent) 787 { 788 assert(result.length == value.length); 789 assert(first >= 0); 790 int resultOffset = first; 791 int length = value.length >> 1; 792 int srcCount; 793 for (int i = first; i < length; i += srcCount) { 794 int srcChar = getChar(value, i); 795 int upperChar; 796 char[] upperCharArray; 797 srcCount = 1; 798 if (Character.isSurrogate((char)srcChar)) { 799 srcChar = codePointAt(value, i, length); 800 srcCount = Character.charCount(srcChar); 801 } 802 if (localeDependent) { 803 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 804 } else { 805 upperChar = Character.toUpperCaseEx(srcChar); 806 } 807 if (Character.isBmpCodePoint(upperChar)) { 808 putChar(result, resultOffset++, upperChar); 809 } else { 810 if (upperChar == Character.ERROR) { 811 if (localeDependent) { 812 upperCharArray = 813 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 814 } else { 815 upperCharArray = Character.toUpperCaseCharArray(srcChar); 816 } 817 } else { 818 upperCharArray = Character.toChars(upperChar); 819 } 820 /* Grow result if needed */ 821 int mapLen = upperCharArray.length; 822 if (mapLen > srcCount) { 823 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 824 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 825 result = result2; 826 } 827 assert resultOffset >= 0; 828 assert resultOffset + mapLen <= length(result); 829 for (int x = 0; x < mapLen; ++x) { 830 putChar(result, resultOffset++, upperCharArray[x]); 831 } 832 } 833 } 834 return newString(result, 0, resultOffset); 835 } 836 837 public static String trim(byte[] value) { 838 int length = value.length >> 1; 839 int len = length; 840 int st = 0; 841 while (st < len && getChar(value, st) <= ' ') { 842 st++; 843 } 844 while (st < len && getChar(value, len - 1) <= ' ') { 845 len--; 846 } 847 return ((st > 0) || (len < length )) ? 848 new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) : 849 null; 850 } 851 852 private static void putChars(byte[] val, int index, char[] str, int off, int end) { 853 while (off < end) { 854 putChar(val, index++, str[off++]); 855 } 856 } 857 858 public static String newString(byte[] val, int index, int len) { 859 if (String.COMPACT_STRINGS) { 860 byte[] buf = compress(val, index, len); 861 if (buf != null) { 862 return new String(buf, LATIN1); 863 } 864 } 865 int last = index + len; 866 return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16); 867 } 868 869 public static void fillNull(byte[] val, int index, int end) { 870 Arrays.fill(val, index << 1, end << 1, (byte)0); 871 } 872 873 static class CharsSpliterator implements Spliterator.OfInt { 874 private final byte[] array; 875 private int index; // current index, modified on advance/split 876 private final int fence; // one past last index 877 private final int cs; 878 879 CharsSpliterator(byte[] array, int acs) { 880 this(array, 0, array.length >> 1, acs); 881 } 882 883 CharsSpliterator(byte[] array, int origin, int fence, int acs) { 884 this.array = array; 885 this.index = origin; 886 this.fence = fence; 887 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED 888 | Spliterator.SUBSIZED; 889 } 890 891 @Override 892 public OfInt trySplit() { 893 int lo = index, mid = (lo + fence) >>> 1; 894 return (lo >= mid) 895 ? null 896 : new CharsSpliterator(array, lo, index = mid, cs); 897 } 898 899 @Override 900 public void forEachRemaining(IntConsumer action) { 901 byte[] a; int i, hi; // hoist accesses and checks from loop 902 if (action == null) 903 throw new NullPointerException(); 904 if (((a = array).length >> 1) >= (hi = fence) && 905 (i = index) >= 0 && i < (index = hi)) { 906 do { 907 action.accept(charAt(a, i)); 908 } while (++i < hi); 909 } 910 } 911 912 @Override 913 public boolean tryAdvance(IntConsumer action) { 914 if (action == null) 915 throw new NullPointerException(); 916 int i = index; 917 if (i >= 0 && i < fence) { 918 action.accept(charAt(array, i)); 919 index++; 920 return true; 921 } 922 return false; 923 } 924 925 @Override 926 public long estimateSize() { return (long)(fence - index); } 927 928 @Override 929 public int characteristics() { 930 return cs; 931 } 932 } 933 934 static class CodePointsSpliterator implements Spliterator.OfInt { 935 private final byte[] array; 936 private int index; // current index, modified on advance/split 937 private final int fence; // one past last index 938 private final int cs; 939 940 CodePointsSpliterator(byte[] array, int acs) { 941 this(array, 0, array.length >> 1, acs); 942 } 943 944 CodePointsSpliterator(byte[] array, int origin, int fence, int acs) { 945 this.array = array; 946 this.index = origin; 947 this.fence = fence; 948 this.cs = acs | Spliterator.ORDERED; 949 } 950 951 @Override 952 public OfInt trySplit() { 953 int lo = index, mid = (lo + fence) >>> 1; 954 if (lo >= mid) 955 return null; 956 957 int midOneLess; 958 // If the mid-point intersects a surrogate pair 959 if (Character.isLowSurrogate(charAt(array, mid)) && 960 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) { 961 // If there is only one pair it cannot be split 962 if (lo >= midOneLess) 963 return null; 964 // Shift the mid-point to align with the surrogate pair 965 return new CodePointsSpliterator(array, lo, index = midOneLess, cs); 966 } 967 return new CodePointsSpliterator(array, lo, index = mid, cs); 968 } 969 970 @Override 971 public void forEachRemaining(IntConsumer action) { 972 byte[] a; int i, hi; // hoist accesses and checks from loop 973 if (action == null) 974 throw new NullPointerException(); 975 if (((a = array).length >> 1) >= (hi = fence) && 976 (i = index) >= 0 && i < (index = hi)) { 977 do { 978 i = advance(a, i, hi, action); 979 } while (i < hi); 980 } 981 } 982 983 @Override 984 public boolean tryAdvance(IntConsumer action) { 985 if (action == null) 986 throw new NullPointerException(); 987 if (index >= 0 && index < fence) { 988 index = advance(array, index, fence, action); 989 return true; 990 } 991 return false; 992 } 993 994 // Advance one code point from the index, i, and return the next 995 // index to advance from 996 private static int advance(byte[] a, int i, int hi, IntConsumer action) { 997 char c1 = charAt(a, i++); 998 int cp = c1; 999 if (Character.isHighSurrogate(c1) && i < hi) { 1000 char c2 = charAt(a, i); 1001 if (Character.isLowSurrogate(c2)) { 1002 i++; 1003 cp = Character.toCodePoint(c1, c2); 1004 } 1005 } 1006 action.accept(cp); 1007 return i; 1008 } 1009 1010 @Override 1011 public long estimateSize() { return (long)(fence - index); } 1012 1013 @Override 1014 public int characteristics() { 1015 return cs; 1016 } 1017 } 1018 1019 //////////////////////////////////////////////////////////////// 1020 1021 public static void putCharSB(byte[] val, int index, int c) { 1022 checkIndex(index, val); 1023 putChar(val, index, c); 1024 } 1025 1026 public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) { 1027 checkBoundsBeginEnd(index, index + end - off, val); 1028 putChars(val, index, ca, off, end); 1029 } 1030 1031 public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) { 1032 checkBoundsBeginEnd(index, index + end - off, val); 1033 for (int i = off; i < end; i++) { 1034 putChar(val, index++, s.charAt(i)); 1035 } 1036 } 1037 1038 public static int codePointAtSB(byte[] val, int index, int end) { 1039 return codePointAt(val, index, end, true /* checked */); 1040 } 1041 1042 public static int codePointBeforeSB(byte[] val, int index) { 1043 return codePointBefore(val, index, true /* checked */); 1044 } 1045 1046 public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) { 1047 return codePointCount(val, beginIndex, endIndex, true /* checked */); 1048 } 1049 1050 public static int getChars(int i, int begin, int end, byte[] value) { 1051 checkBoundsBeginEnd(begin, end, value); 1052 int pos = getChars(i, end, value); 1053 assert begin == pos; 1054 return pos; 1055 } 1056 1057 public static int getChars(long l, int begin, int end, byte[] value) { 1058 checkBoundsBeginEnd(begin, end, value); 1059 int pos = getChars(l, end, value); 1060 assert begin == pos; 1061 return pos; 1062 } 1063 1064 public static boolean contentEquals(byte[] v1, byte[] v2, int len) { 1065 checkBoundsOffCount(0, len, v2); 1066 for (int i = 0; i < len; i++) { 1067 if ((char)(v1[i] & 0xff) != getChar(v2, i)) { 1068 return false; 1069 } 1070 } 1071 return true; 1072 } 1073 1074 public static boolean contentEquals(byte[] value, CharSequence cs, int len) { 1075 checkOffset(len, value); 1076 for (int i = 0; i < len; i++) { 1077 if (getChar(value, i) != cs.charAt(i)) { 1078 return false; 1079 } 1080 } 1081 return true; 1082 } 1083 1084 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) { 1085 int end = i + 4; 1086 checkBoundsBeginEnd(i, end, value); 1087 putChar(value, i++, c1); 1088 putChar(value, i++, c2); 1089 putChar(value, i++, c3); 1090 putChar(value, i++, c4); 1091 assert(i == end); 1092 return end; 1093 } 1094 1095 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) { 1096 int end = i + 5; 1097 checkBoundsBeginEnd(i, end, value); 1098 putChar(value, i++, c1); 1099 putChar(value, i++, c2); 1100 putChar(value, i++, c3); 1101 putChar(value, i++, c4); 1102 putChar(value, i++, c5); 1103 assert(i == end); 1104 return end; 1105 } 1106 1107 public static char charAt(byte[] value, int index) { 1108 checkIndex(index, value); 1109 return getChar(value, index); 1110 } 1111 1112 public static void reverse(byte[] val, int count) { 1113 checkOffset(count, val); 1114 int n = count - 1; 1115 boolean hasSurrogates = false; 1116 for (int j = (n-1) >> 1; j >= 0; j--) { 1117 int k = n - j; 1118 char cj = getChar(val, j); 1119 char ck = getChar(val, k); 1120 putChar(val, j, ck); 1121 putChar(val, k, cj); 1122 if (Character.isSurrogate(cj) || 1123 Character.isSurrogate(ck)) { 1124 hasSurrogates = true; 1125 } 1126 } 1127 if (hasSurrogates) { 1128 reverseAllValidSurrogatePairs(val, count); 1129 } 1130 } 1131 1132 /** Outlined helper method for reverse() */ 1133 private static void reverseAllValidSurrogatePairs(byte[] val, int count) { 1134 for (int i = 0; i < count - 1; i++) { 1135 char c2 = getChar(val, i); 1136 if (Character.isLowSurrogate(c2)) { 1137 char c1 = getChar(val, i + 1); 1138 if (Character.isHighSurrogate(c1)) { 1139 putChar(val, i++, c1); 1140 putChar(val, i, c2); 1141 } 1142 } 1143 } 1144 } 1145 1146 // inflatedCopy byte[] -> byte[] 1147 public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 1148 // We need a range check here because 'putChar' has no checks 1149 checkBoundsOffCount(dstOff, len, dst); 1150 for (int i = 0; i < len; i++) { 1151 putChar(dst, dstOff++, src[srcOff++] & 0xff); 1152 } 1153 } 1154 1155 // srcCoder == UTF16 && tgtCoder == LATIN1 1156 public static int lastIndexOfLatin1(byte[] src, int srcCount, 1157 byte[] tgt, int tgtCount, int fromIndex) { 1158 assert fromIndex >= 0; 1159 assert tgtCount > 0; 1160 assert tgtCount <= tgt.length; 1161 int min = tgtCount - 1; 1162 int i = min + fromIndex; 1163 int strLastIndex = tgtCount - 1; 1164 1165 char strLastChar = (char)(tgt[strLastIndex] & 0xff); 1166 1167 checkIndex(i, src); 1168 1169 startSearchForLastChar: 1170 while (true) { 1171 while (i >= min && getChar(src, i) != strLastChar) { 1172 i--; 1173 } 1174 if (i < min) { 1175 return -1; 1176 } 1177 int j = i - 1; 1178 int start = j - strLastIndex; 1179 int k = strLastIndex - 1; 1180 while (j > start) { 1181 if (getChar(src, j--) != (tgt[k--] & 0xff)) { 1182 i--; 1183 continue startSearchForLastChar; 1184 } 1185 } 1186 return start + 1; 1187 } 1188 } 1189 1190 //////////////////////////////////////////////////////////////// 1191 1192 private static native boolean isBigEndian(); 1193 1194 static final int HI_BYTE_SHIFT; 1195 static final int LO_BYTE_SHIFT; 1196 static { 1197 if (isBigEndian()) { 1198 HI_BYTE_SHIFT = 8; 1199 LO_BYTE_SHIFT = 0; 1200 } else { 1201 HI_BYTE_SHIFT = 0; 1202 LO_BYTE_SHIFT = 8; 1203 } 1204 } 1205 1206 static final int MAX_LENGTH = Integer.MAX_VALUE >> 1; 1207 1208 // Used by trusted callers. Assumes all necessary bounds checks have 1209 // been done by the caller. 1210 1211 /** 1212 * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for 1213 * UTF-16 coder. 1214 * 1215 * @param i value to convert 1216 * @param index next index, after the least significant digit 1217 * @param buf target buffer, UTF16-coded. 1218 * @return index of the most significant digit or minus sign, if present 1219 */ 1220 static int getChars(int i, int index, byte[] buf) { 1221 int q, r; 1222 int charPos = index; 1223 1224 boolean negative = (i < 0); 1225 if (!negative) { 1226 i = -i; 1227 } 1228 1229 // Get 2 digits/iteration using ints 1230 while (i <= -100) { 1231 q = i / 100; 1232 r = (q * 100) - i; 1233 i = q; 1234 putChar(buf, --charPos, Integer.DigitOnes[r]); 1235 putChar(buf, --charPos, Integer.DigitTens[r]); 1236 } 1237 1238 // We know there are at most two digits left at this point. 1239 q = i / 10; 1240 r = (q * 10) - i; 1241 putChar(buf, --charPos, '0' + r); 1242 1243 // Whatever left is the remaining digit. 1244 if (q < 0) { 1245 putChar(buf, --charPos, '0' - q); 1246 } 1247 1248 if (negative) { 1249 putChar(buf, --charPos, '-'); 1250 } 1251 return charPos; 1252 } 1253 1254 /** 1255 * This is a variant of {@link Long#getChars(long, int, byte[])}, but for 1256 * UTF-16 coder. 1257 * 1258 * @param i value to convert 1259 * @param index next index, after the least significant digit 1260 * @param buf target buffer, UTF16-coded. 1261 * @return index of the most significant digit or minus sign, if present 1262 */ 1263 static int getChars(long i, int index, byte[] buf) { 1264 long q; 1265 int r; 1266 int charPos = index; 1267 1268 boolean negative = (i < 0); 1269 if (!negative) { 1270 i = -i; 1271 } 1272 1273 // Get 2 digits/iteration using longs until quotient fits into an int 1274 while (i <= Integer.MIN_VALUE) { 1275 q = i / 100; 1276 r = (int)((q * 100) - i); 1277 i = q; 1278 putChar(buf, --charPos, Integer.DigitOnes[r]); 1279 putChar(buf, --charPos, Integer.DigitTens[r]); 1280 } 1281 1282 // Get 2 digits/iteration using ints 1283 int q2; 1284 int i2 = (int)i; 1285 while (i2 <= -100) { 1286 q2 = i2 / 100; 1287 r = (q2 * 100) - i2; 1288 i2 = q2; 1289 putChar(buf, --charPos, Integer.DigitOnes[r]); 1290 putChar(buf, --charPos, Integer.DigitTens[r]); 1291 } 1292 1293 // We know there are at most two digits left at this point. 1294 q2 = i2 / 10; 1295 r = (q2 * 10) - i2; 1296 putChar(buf, --charPos, '0' + r); 1297 1298 // Whatever left is the remaining digit. 1299 if (q2 < 0) { 1300 putChar(buf, --charPos, '0' - q2); 1301 } 1302 1303 if (negative) { 1304 putChar(buf, --charPos, '-'); 1305 } 1306 return charPos; 1307 } 1308 // End of trusted methods. 1309 1310 public static void checkIndex(int off, byte[] val) { 1311 String.checkIndex(off, length(val)); 1312 } 1313 1314 public static void checkOffset(int off, byte[] val) { 1315 String.checkOffset(off, length(val)); 1316 } 1317 1318 public static void checkBoundsBeginEnd(int begin, int end, byte[] val) { 1319 String.checkBoundsBeginEnd(begin, end, length(val)); 1320 } 1321 1322 public static void checkBoundsOffCount(int offset, int count, byte[] val) { 1323 String.checkBoundsOffCount(offset, count, length(val)); 1324 } 1325 1326 }