1 /* 2 * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Spliterator; 31 import java.util.function.IntConsumer; 32 import jdk.internal.HotSpotIntrinsicCandidate; 33 import jdk.internal.vm.annotation.ForceInline; 34 import jdk.internal.vm.annotation.DontInline; 35 36 import static java.lang.String.UTF16; 37 import static java.lang.String.LATIN1; 38 39 final class StringUTF16 { 40 41 public static byte[] newBytesFor(int len) { 42 if (len < 0) { 43 throw new NegativeArraySizeException(); 44 } 45 if (len > MAX_LENGTH) { 46 throw new OutOfMemoryError("UTF16 String size is " + len + 47 ", should be less than " + MAX_LENGTH); 48 } 49 return new byte[len << 1]; 50 } 51 52 @HotSpotIntrinsicCandidate 53 // intrinsic performs no bounds checks 54 static void putChar(byte[] val, int index, int c) { 55 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 56 index <<= 1; 57 val[index++] = (byte)(c >> HI_BYTE_SHIFT); 58 val[index] = (byte)(c >> LO_BYTE_SHIFT); 59 } 60 61 @HotSpotIntrinsicCandidate 62 // intrinsic performs no bounds checks 63 static char getChar(byte[] val, int index) { 64 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 65 index <<= 1; 66 return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) | 67 ((val[index] & 0xff) << LO_BYTE_SHIFT)); 68 } 69 70 public static int length(byte[] value) { 71 return value.length >> 1; 72 } 73 74 private static int codePointAt(byte[] value, int index, int end, boolean checked) { 75 assert index < end; 76 if (checked) { 77 checkIndex(index, value); 78 } 79 char c1 = getChar(value, index); 80 if (Character.isHighSurrogate(c1) && ++index < end) { 81 if (checked) { 82 checkIndex(index, value); 83 } 84 char c2 = getChar(value, index); 85 if (Character.isLowSurrogate(c2)) { 86 return Character.toCodePoint(c1, c2); 87 } 88 } 89 return c1; 90 } 91 92 public static int codePointAt(byte[] value, int index, int end) { 93 return codePointAt(value, index, end, false /* unchecked */); 94 } 95 96 private static int codePointBefore(byte[] value, int index, boolean checked) { 97 --index; 98 if (checked) { 99 checkIndex(index, value); 100 } 101 char c2 = getChar(value, index); 102 if (Character.isLowSurrogate(c2) && index > 0) { 103 --index; 104 if (checked) { 105 checkIndex(index, value); 106 } 107 char c1 = getChar(value, index); 108 if (Character.isHighSurrogate(c1)) { 109 return Character.toCodePoint(c1, c2); 110 } 111 } 112 return c2; 113 } 114 115 public static int codePointBefore(byte[] value, int index) { 116 return codePointBefore(value, index, false /* unchecked */); 117 } 118 119 private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) { 120 assert beginIndex <= endIndex; 121 int count = endIndex - beginIndex; 122 int i = beginIndex; 123 if (checked && i < endIndex) { 124 checkBoundsBeginEnd(i, endIndex, value); 125 } 126 for (; i < endIndex - 1; ) { 127 if (Character.isHighSurrogate(getChar(value, i++)) && 128 Character.isLowSurrogate(getChar(value, i))) { 129 count--; 130 i++; 131 } 132 } 133 return count; 134 } 135 136 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 137 return codePointCount(value, beginIndex, endIndex, false /* unchecked */); 138 } 139 140 public static char[] toChars(byte[] value) { 141 char[] dst = new char[value.length >> 1]; 142 getChars(value, 0, dst.length, dst, 0); 143 return dst; 144 } 145 146 @HotSpotIntrinsicCandidate 147 public static byte[] toBytes(char[] value, int off, int len) { 148 byte[] val = newBytesFor(len); 149 for (int i = 0; i < len; i++) { 150 putChar(val, i, value[off]); 151 off++; 152 } 153 return val; 154 } 155 156 public static byte[] compress(char[] val, int off, int len) { 157 byte[] ret = new byte[len]; 158 if (compress(val, off, ret, 0, len) == len) { 159 return ret; 160 } 161 return null; 162 } 163 164 public static byte[] compress(byte[] val, int off, int len) { 165 byte[] ret = new byte[len]; 166 if (compress(val, off, ret, 0, len) == len) { 167 return ret; 168 } 169 return null; 170 } 171 172 // compressedCopy char[] -> byte[] 173 @HotSpotIntrinsicCandidate 174 public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { 175 for (int i = 0; i < len; i++) { 176 char c = src[srcOff]; 177 if (c > 0xFF) { 178 len = 0; 179 break; 180 } 181 dst[dstOff] = (byte)c; 182 srcOff++; 183 dstOff++; 184 } 185 return len; 186 } 187 188 // compressedCopy byte[] -> byte[] 189 @HotSpotIntrinsicCandidate 190 public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 191 // We need a range check here because 'getChar' has no checks 192 checkBoundsOffCount(srcOff, len, src); 193 for (int i = 0; i < len; i++) { 194 char c = getChar(src, srcOff); 195 if (c > 0xFF) { 196 len = 0; 197 break; 198 } 199 dst[dstOff] = (byte)c; 200 srcOff++; 201 dstOff++; 202 } 203 return len; 204 } 205 206 public static byte[] toBytes(int[] val, int index, int len) { 207 final int end = index + len; 208 // Pass 1: Compute precise size of char[] 209 int n = len; 210 for (int i = index; i < end; i++) { 211 int cp = val[i]; 212 if (Character.isBmpCodePoint(cp)) 213 continue; 214 else if (Character.isValidCodePoint(cp)) 215 n++; 216 else throw new IllegalArgumentException(Integer.toString(cp)); 217 } 218 // Pass 2: Allocate and fill in <high, low> pair 219 byte[] buf = newBytesFor(n); 220 for (int i = index, j = 0; i < end; i++, j++) { 221 int cp = val[i]; 222 if (Character.isBmpCodePoint(cp)) { 223 putChar(buf, j, cp); 224 } else { 225 putChar(buf, j++, Character.highSurrogate(cp)); 226 putChar(buf, j, Character.lowSurrogate(cp)); 227 } 228 } 229 return buf; 230 } 231 232 public static byte[] toBytes(char c) { 233 byte[] result = new byte[2]; 234 putChar(result, 0, c); 235 return result; 236 } 237 238 @HotSpotIntrinsicCandidate 239 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 240 // We need a range check here because 'getChar' has no checks 241 if (srcBegin < srcEnd) { 242 checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value); 243 } 244 for (int i = srcBegin; i < srcEnd; i++) { 245 dst[dstBegin++] = getChar(value, i); 246 } 247 } 248 249 /* @see java.lang.String.getBytes(int, int, byte[], int) */ 250 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 251 srcBegin <<= 1; 252 srcEnd <<= 1; 253 for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) { 254 dst[dstBegin++] = value[i]; 255 } 256 } 257 258 @HotSpotIntrinsicCandidate 259 public static boolean equals(byte[] value, byte[] other) { 260 if (value.length == other.length) { 261 int len = value.length >> 1; 262 for (int i = 0; i < len; i++) { 263 if (getChar(value, i) != getChar(other, i)) { 264 return false; 265 } 266 } 267 return true; 268 } 269 return false; 270 } 271 272 @HotSpotIntrinsicCandidate 273 public static int compareTo(byte[] value, byte[] other) { 274 int len1 = length(value); 275 int len2 = length(other); 276 int lim = Math.min(len1, len2); 277 for (int k = 0; k < lim; k++) { 278 char c1 = getChar(value, k); 279 char c2 = getChar(other, k); 280 if (c1 != c2) { 281 return c1 - c2; 282 } 283 } 284 return len1 - len2; 285 } 286 287 @HotSpotIntrinsicCandidate 288 public static int compareToLatin1(byte[] value, byte[] other) { 289 return -StringLatin1.compareToUTF16(other, value); 290 } 291 292 public static int compareToCI(byte[] value, byte[] other) { 293 int len1 = length(value); 294 int len2 = length(other); 295 int lim = Math.min(len1, len2); 296 for (int k = 0; k < lim; k++) { 297 char c1 = getChar(value, k); 298 char c2 = getChar(other, k); 299 if (c1 != c2) { 300 c1 = Character.toUpperCase(c1); 301 c2 = Character.toUpperCase(c2); 302 if (c1 != c2) { 303 c1 = Character.toLowerCase(c1); 304 c2 = Character.toLowerCase(c2); 305 if (c1 != c2) { 306 return c1 - c2; 307 } 308 } 309 } 310 } 311 return len1 - len2; 312 } 313 314 public static int compareToCI_Latin1(byte[] value, byte[] other) { 315 return -StringLatin1.compareToCI_UTF16(other, value); 316 } 317 318 public static int hashCode(byte[] value) { 319 int h = 0; 320 int length = value.length >> 1; 321 for (int i = 0; i < length; i++) { 322 h = 31 * h + getChar(value, i); 323 } 324 return h; 325 } 326 327 public static int indexOf(byte[] value, int ch, int fromIndex) { 328 int max = value.length >> 1; 329 if (fromIndex < 0) { 330 fromIndex = 0; 331 } else if (fromIndex >= max) { 332 // Note: fromIndex might be near -1>>>1. 333 return -1; 334 } 335 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 336 // handle most cases here (ch is a BMP code point or a 337 // negative value (invalid code point)) 338 return indexOfChar(value, ch, fromIndex, max); 339 } else { 340 return indexOfSupplementary(value, ch, fromIndex, max); 341 } 342 } 343 344 @HotSpotIntrinsicCandidate 345 public static int indexOf(byte[] value, byte[] str) { 346 if (str.length == 0) { 347 return 0; 348 } 349 if (value.length < str.length) { 350 return -1; 351 } 352 return indexOfUnsafe(value, length(value), str, length(str), 0); 353 } 354 355 @HotSpotIntrinsicCandidate 356 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 357 checkBoundsBeginEnd(fromIndex, valueCount, value); 358 checkBoundsBeginEnd(0, strCount, str); 359 return indexOfUnsafe(value, valueCount, str, strCount, fromIndex); 360 } 361 362 363 private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 364 assert fromIndex >= 0; 365 assert strCount > 0; 366 assert strCount <= length(str); 367 assert valueCount >= strCount; 368 char first = getChar(str, 0); 369 int max = (valueCount - strCount); 370 for (int i = fromIndex; i <= max; i++) { 371 // Look for first character. 372 if (getChar(value, i) != first) { 373 while (++i <= max && getChar(value, i) != first); 374 } 375 // Found first character, now look at the rest of value 376 if (i <= max) { 377 int j = i + 1; 378 int end = j + strCount - 1; 379 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++); 380 if (j == end) { 381 // Found whole string. 382 return i; 383 } 384 } 385 } 386 return -1; 387 } 388 389 390 /** 391 * Handles indexOf Latin1 substring in UTF16 string. 392 */ 393 @HotSpotIntrinsicCandidate 394 public static int indexOfLatin1(byte[] value, byte[] str) { 395 if (str.length == 0) { 396 return 0; 397 } 398 if (length(value) < str.length) { 399 return -1; 400 } 401 return indexOfLatin1Unsafe(value, length(value), str, str.length, 0); 402 } 403 404 @HotSpotIntrinsicCandidate 405 public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 406 checkBoundsBeginEnd(fromIndex, srcCount, src); 407 String.checkBoundsBeginEnd(0, tgtCount, tgt.length); 408 return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex); 409 } 410 411 public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 412 assert fromIndex >= 0; 413 assert tgtCount > 0; 414 assert tgtCount <= tgt.length; 415 assert srcCount >= tgtCount; 416 char first = (char)(tgt[0] & 0xff); 417 int max = (srcCount - tgtCount); 418 for (int i = fromIndex; i <= max; i++) { 419 // Look for first character. 420 if (getChar(src, i) != first) { 421 while (++i <= max && getChar(src, i) != first); 422 } 423 // Found first character, now look at the rest of v2 424 if (i <= max) { 425 int j = i + 1; 426 int end = j + tgtCount - 1; 427 for (int k = 1; 428 j < end && getChar(src, j) == (tgt[k] & 0xff); 429 j++, k++); 430 if (j == end) { 431 // Found whole string. 432 return i; 433 } 434 } 435 } 436 return -1; 437 } 438 439 @HotSpotIntrinsicCandidate 440 private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { 441 checkBoundsBeginEnd(fromIndex, max, value); 442 return indexOfCharUnsafe(value, ch, fromIndex, max); 443 } 444 445 private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) { 446 for (int i = fromIndex; i < max; i++) { 447 if (getChar(value, i) == ch) { 448 return i; 449 } 450 } 451 return -1; 452 } 453 454 /** 455 * Handles (rare) calls of indexOf with a supplementary character. 456 */ 457 private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) { 458 if (Character.isValidCodePoint(ch)) { 459 final char hi = Character.highSurrogate(ch); 460 final char lo = Character.lowSurrogate(ch); 461 checkBoundsBeginEnd(fromIndex, max, value); 462 for (int i = fromIndex; i < max - 1; i++) { 463 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) { 464 return i; 465 } 466 } 467 } 468 return -1; 469 } 470 471 // srcCoder == UTF16 && tgtCoder == UTF16 472 public static int lastIndexOf(byte[] src, int srcCount, 473 byte[] tgt, int tgtCount, int fromIndex) { 474 assert fromIndex >= 0; 475 assert tgtCount > 0; 476 assert tgtCount <= length(tgt); 477 int min = tgtCount - 1; 478 int i = min + fromIndex; 479 int strLastIndex = tgtCount - 1; 480 481 checkIndex(strLastIndex, tgt); 482 char strLastChar = getChar(tgt, strLastIndex); 483 484 checkIndex(i, src); 485 486 startSearchForLastChar: 487 while (true) { 488 while (i >= min && getChar(src, i) != strLastChar) { 489 i--; 490 } 491 if (i < min) { 492 return -1; 493 } 494 int j = i - 1; 495 int start = j - strLastIndex; 496 int k = strLastIndex - 1; 497 while (j > start) { 498 if (getChar(src, j--) != getChar(tgt, k--)) { 499 i--; 500 continue startSearchForLastChar; 501 } 502 } 503 return start + 1; 504 } 505 } 506 507 public static int lastIndexOf(byte[] value, int ch, int fromIndex) { 508 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 509 // handle most cases here (ch is a BMP code point or a 510 // negative value (invalid code point)) 511 int i = Math.min(fromIndex, (value.length >> 1) - 1); 512 for (; i >= 0; i--) { 513 if (getChar(value, i) == ch) { 514 return i; 515 } 516 } 517 return -1; 518 } else { 519 return lastIndexOfSupplementary(value, ch, fromIndex); 520 } 521 } 522 523 /** 524 * Handles (rare) calls of lastIndexOf with a supplementary character. 525 */ 526 private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) { 527 if (Character.isValidCodePoint(ch)) { 528 char hi = Character.highSurrogate(ch); 529 char lo = Character.lowSurrogate(ch); 530 int i = Math.min(fromIndex, (value.length >> 1) - 2); 531 for (; i >= 0; i--) { 532 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) { 533 return i; 534 } 535 } 536 } 537 return -1; 538 } 539 540 public static String replace(byte[] value, char oldChar, char newChar) { 541 int len = value.length >> 1; 542 int i = -1; 543 while (++i < len) { 544 if (getChar(value, i) == oldChar) { 545 break; 546 } 547 } 548 if (i < len) { 549 byte buf[] = new byte[value.length]; 550 for (int j = 0; j < i; j++) { 551 putChar(buf, j, getChar(value, j)); // TBD:arraycopy? 552 } 553 while (i < len) { 554 char c = getChar(value, i); 555 putChar(buf, i, c == oldChar ? newChar : c); 556 i++; 557 } 558 // Check if we should try to compress to latin1 559 if (String.COMPACT_STRINGS && 560 !StringLatin1.canEncode(oldChar) && 561 StringLatin1.canEncode(newChar)) { 562 byte[] val = compress(buf, 0, len); 563 if (val != null) { 564 return new String(val, LATIN1); 565 } 566 } 567 return new String(buf, UTF16); 568 } 569 return null; 570 } 571 572 public static boolean regionMatchesCI(byte[] value, int toffset, 573 byte[] other, int ooffset, int len) { 574 int last = toffset + len; 575 assert toffset >= 0 && ooffset >= 0; 576 assert ooffset + len <= length(other); 577 assert last <= length(value); 578 while (toffset < last) { 579 char c1 = getChar(value, toffset++); 580 char c2 = getChar(other, ooffset++); 581 if (c1 == c2) { 582 continue; 583 } 584 // try converting both characters to uppercase. 585 // If the results match, then the comparison scan should 586 // continue. 587 char u1 = Character.toUpperCase(c1); 588 char u2 = Character.toUpperCase(c2); 589 if (u1 == u2) { 590 continue; 591 } 592 // Unfortunately, conversion to uppercase does not work properly 593 // for the Georgian alphabet, which has strange rules about case 594 // conversion. So we need to make one last check before 595 // exiting. 596 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 597 continue; 598 } 599 return false; 600 } 601 return true; 602 } 603 604 public static boolean regionMatchesCI_Latin1(byte[] value, int toffset, 605 byte[] other, int ooffset, 606 int len) { 607 return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len); 608 } 609 610 public static String toLowerCase(String str, byte[] value, Locale locale) { 611 if (locale == null) { 612 throw new NullPointerException(); 613 } 614 int first; 615 boolean hasSurr = false; 616 final int len = value.length >> 1; 617 618 // Now check if there are any characters that need to be changed, or are surrogate 619 for (first = 0 ; first < len; first++) { 620 int cp = (int)getChar(value, first); 621 if (Character.isSurrogate((char)cp)) { 622 hasSurr = true; 623 break; 624 } 625 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 626 break; 627 } 628 } 629 if (first == len) 630 return str; 631 byte[] result = new byte[value.length]; 632 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 633 // lowerCase characters. 634 String lang = locale.getLanguage(); 635 if (lang == "tr" || lang == "az" || lang == "lt") { 636 return toLowerCaseEx(str, value, result, first, locale, true); 637 } 638 if (hasSurr) { 639 return toLowerCaseEx(str, value, result, first, locale, false); 640 } 641 int bits = 0; 642 for (int i = first; i < len; i++) { 643 int cp = (int)getChar(value, i); 644 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 645 Character.isSurrogate((char)cp)) { 646 return toLowerCaseEx(str, value, result, i, locale, false); 647 } 648 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 649 return toLowerCaseEx(str, value, result, i, locale, true); 650 } 651 cp = Character.toLowerCase(cp); 652 if (!Character.isBmpCodePoint(cp)) { 653 return toLowerCaseEx(str, value, result, i, locale, false); 654 } 655 bits |= cp; 656 putChar(result, i, cp); 657 } 658 if (bits > 0xFF) { 659 return new String(result, UTF16); 660 } else { 661 return newString(result, 0, len); 662 } 663 } 664 665 private static String toLowerCaseEx(String str, byte[] value, 666 byte[] result, int first, Locale locale, 667 boolean localeDependent) { 668 assert(result.length == value.length); 669 assert(first >= 0); 670 int resultOffset = first; 671 int length = value.length >> 1; 672 int srcCount; 673 for (int i = first; i < length; i += srcCount) { 674 int srcChar = getChar(value, i); 675 int lowerChar; 676 char[] lowerCharArray; 677 srcCount = 1; 678 if (Character.isSurrogate((char)srcChar)) { 679 srcChar = codePointAt(value, i, length); 680 srcCount = Character.charCount(srcChar); 681 } 682 if (localeDependent || 683 srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 684 srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 685 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 686 } else { 687 lowerChar = Character.toLowerCase(srcChar); 688 } 689 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 690 putChar(result, resultOffset++, lowerChar); 691 } else { 692 if (lowerChar == Character.ERROR) { 693 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 694 } else { 695 lowerCharArray = Character.toChars(lowerChar); 696 } 697 /* Grow result if needed */ 698 int mapLen = lowerCharArray.length; 699 if (mapLen > srcCount) { 700 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 701 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 702 result = result2; 703 } 704 assert resultOffset >= 0; 705 assert resultOffset + mapLen <= length(result); 706 for (int x = 0; x < mapLen; ++x) { 707 putChar(result, resultOffset++, lowerCharArray[x]); 708 } 709 } 710 } 711 return newString(result, 0, resultOffset); 712 } 713 714 public static String toUpperCase(String str, byte[] value, Locale locale) { 715 if (locale == null) { 716 throw new NullPointerException(); 717 } 718 int first; 719 boolean hasSurr = false; 720 final int len = value.length >> 1; 721 722 // Now check if there are any characters that need to be changed, or are surrogate 723 for (first = 0 ; first < len; first++) { 724 int cp = (int)getChar(value, first); 725 if (Character.isSurrogate((char)cp)) { 726 hasSurr = true; 727 break; 728 } 729 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 730 break; 731 } 732 } 733 if (first == len) { 734 return str; 735 } 736 byte[] result = new byte[value.length]; 737 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 738 // upperCase characters. 739 String lang = locale.getLanguage(); 740 if (lang == "tr" || lang == "az" || lang == "lt") { 741 return toUpperCaseEx(str, value, result, first, locale, true); 742 } 743 if (hasSurr) { 744 return toUpperCaseEx(str, value, result, first, locale, false); 745 } 746 int bits = 0; 747 for (int i = first; i < len; i++) { 748 int cp = (int)getChar(value, i); 749 if (Character.isSurrogate((char)cp)) { 750 return toUpperCaseEx(str, value, result, i, locale, false); 751 } 752 cp = Character.toUpperCaseEx(cp); 753 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 754 return toUpperCaseEx(str, value, result, i, locale, false); 755 } 756 bits |= cp; 757 putChar(result, i, cp); 758 } 759 if (bits > 0xFF) { 760 return new String(result, UTF16); 761 } else { 762 return newString(result, 0, len); 763 } 764 } 765 766 private static String toUpperCaseEx(String str, byte[] value, 767 byte[] result, int first, 768 Locale locale, boolean localeDependent) 769 { 770 assert(result.length == value.length); 771 assert(first >= 0); 772 int resultOffset = first; 773 int length = value.length >> 1; 774 int srcCount; 775 for (int i = first; i < length; i += srcCount) { 776 int srcChar = getChar(value, i); 777 int upperChar; 778 char[] upperCharArray; 779 srcCount = 1; 780 if (Character.isSurrogate((char)srcChar)) { 781 srcChar = codePointAt(value, i, length); 782 srcCount = Character.charCount(srcChar); 783 } 784 if (localeDependent) { 785 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 786 } else { 787 upperChar = Character.toUpperCaseEx(srcChar); 788 } 789 if (Character.isBmpCodePoint(upperChar)) { 790 putChar(result, resultOffset++, upperChar); 791 } else { 792 if (upperChar == Character.ERROR) { 793 if (localeDependent) { 794 upperCharArray = 795 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 796 } else { 797 upperCharArray = Character.toUpperCaseCharArray(srcChar); 798 } 799 } else { 800 upperCharArray = Character.toChars(upperChar); 801 } 802 /* Grow result if needed */ 803 int mapLen = upperCharArray.length; 804 if (mapLen > srcCount) { 805 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 806 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 807 result = result2; 808 } 809 assert resultOffset >= 0; 810 assert resultOffset + mapLen <= length(result); 811 for (int x = 0; x < mapLen; ++x) { 812 putChar(result, resultOffset++, upperCharArray[x]); 813 } 814 } 815 } 816 return newString(result, 0, resultOffset); 817 } 818 819 public static String trim(byte[] value) { 820 int length = value.length >> 1; 821 int len = length; 822 int st = 0; 823 while (st < len && getChar(value, st) <= ' ') { 824 st++; 825 } 826 while (st < len && getChar(value, len - 1) <= ' ') { 827 len--; 828 } 829 return ((st > 0) || (len < length )) ? 830 new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) : 831 null; 832 } 833 834 private static void putChars(byte[] val, int index, char[] str, int off, int end) { 835 while (off < end) { 836 putChar(val, index++, str[off++]); 837 } 838 } 839 840 public static String newString(byte[] val, int index, int len) { 841 if (String.COMPACT_STRINGS) { 842 byte[] buf = compress(val, index, len); 843 if (buf != null) { 844 return new String(buf, LATIN1); 845 } 846 } 847 int last = index + len; 848 return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16); 849 } 850 851 public static void fillNull(byte[] val, int index, int end) { 852 Arrays.fill(val, index << 1, end << 1, (byte)0); 853 } 854 855 static class CharsSpliterator implements Spliterator.OfInt { 856 private final byte[] array; 857 private int index; // current index, modified on advance/split 858 private final int fence; // one past last index 859 private final int cs; 860 861 CharsSpliterator(byte[] array, int acs) { 862 this(array, 0, array.length >> 1, acs); 863 } 864 865 CharsSpliterator(byte[] array, int origin, int fence, int acs) { 866 this.array = array; 867 this.index = origin; 868 this.fence = fence; 869 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED 870 | Spliterator.SUBSIZED; 871 } 872 873 @Override 874 public OfInt trySplit() { 875 int lo = index, mid = (lo + fence) >>> 1; 876 return (lo >= mid) 877 ? null 878 : new CharsSpliterator(array, lo, index = mid, cs); 879 } 880 881 @Override 882 public void forEachRemaining(IntConsumer action) { 883 byte[] a; int i, hi; // hoist accesses and checks from loop 884 if (action == null) 885 throw new NullPointerException(); 886 if (((a = array).length >> 1) >= (hi = fence) && 887 (i = index) >= 0 && i < (index = hi)) { 888 do { 889 action.accept(charAt(a, i)); 890 } while (++i < hi); 891 } 892 } 893 894 @Override 895 public boolean tryAdvance(IntConsumer action) { 896 if (action == null) 897 throw new NullPointerException(); 898 int i = index; 899 if (i >= 0 && i < fence) { 900 action.accept(charAt(array, i)); 901 index++; 902 return true; 903 } 904 return false; 905 } 906 907 @Override 908 public long estimateSize() { return (long)(fence - index); } 909 910 @Override 911 public int characteristics() { 912 return cs; 913 } 914 } 915 916 static class CodePointsSpliterator implements Spliterator.OfInt { 917 private final byte[] array; 918 private int index; // current index, modified on advance/split 919 private final int fence; // one past last index 920 private final int cs; 921 922 CodePointsSpliterator(byte[] array, int acs) { 923 this(array, 0, array.length >> 1, acs); 924 } 925 926 CodePointsSpliterator(byte[] array, int origin, int fence, int acs) { 927 this.array = array; 928 this.index = origin; 929 this.fence = fence; 930 this.cs = acs | Spliterator.ORDERED; 931 } 932 933 @Override 934 public OfInt trySplit() { 935 int lo = index, mid = (lo + fence) >>> 1; 936 if (lo >= mid) 937 return null; 938 939 int midOneLess; 940 // If the mid-point intersects a surrogate pair 941 if (Character.isLowSurrogate(charAt(array, mid)) && 942 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) { 943 // If there is only one pair it cannot be split 944 if (lo >= midOneLess) 945 return null; 946 // Shift the mid-point to align with the surrogate pair 947 return new CodePointsSpliterator(array, lo, index = midOneLess, cs); 948 } 949 return new CodePointsSpliterator(array, lo, index = mid, cs); 950 } 951 952 @Override 953 public void forEachRemaining(IntConsumer action) { 954 byte[] a; int i, hi; // hoist accesses and checks from loop 955 if (action == null) 956 throw new NullPointerException(); 957 if (((a = array).length >> 1) >= (hi = fence) && 958 (i = index) >= 0 && i < (index = hi)) { 959 do { 960 i = advance(a, i, hi, action); 961 } while (i < hi); 962 } 963 } 964 965 @Override 966 public boolean tryAdvance(IntConsumer action) { 967 if (action == null) 968 throw new NullPointerException(); 969 if (index >= 0 && index < fence) { 970 index = advance(array, index, fence, action); 971 return true; 972 } 973 return false; 974 } 975 976 // Advance one code point from the index, i, and return the next 977 // index to advance from 978 private static int advance(byte[] a, int i, int hi, IntConsumer action) { 979 char c1 = charAt(a, i++); 980 int cp = c1; 981 if (Character.isHighSurrogate(c1) && i < hi) { 982 char c2 = charAt(a, i); 983 if (Character.isLowSurrogate(c2)) { 984 i++; 985 cp = Character.toCodePoint(c1, c2); 986 } 987 } 988 action.accept(cp); 989 return i; 990 } 991 992 @Override 993 public long estimateSize() { return (long)(fence - index); } 994 995 @Override 996 public int characteristics() { 997 return cs; 998 } 999 } 1000 1001 //////////////////////////////////////////////////////////////// 1002 1003 public static void putCharSB(byte[] val, int index, int c) { 1004 checkIndex(index, val); 1005 putChar(val, index, c); 1006 } 1007 1008 public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) { 1009 checkBoundsBeginEnd(index, index + end - off, val); 1010 putChars(val, index, ca, off, end); 1011 } 1012 1013 public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) { 1014 checkBoundsBeginEnd(index, index + end - off, val); 1015 for (int i = off; i < end; i++) { 1016 putChar(val, index++, s.charAt(i)); 1017 } 1018 } 1019 1020 public static int codePointAtSB(byte[] val, int index, int end) { 1021 return codePointAt(val, index, end, true /* checked */); 1022 } 1023 1024 public static int codePointBeforeSB(byte[] val, int index) { 1025 return codePointBefore(val, index, true /* checked */); 1026 } 1027 1028 public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) { 1029 return codePointCount(val, beginIndex, endIndex, true /* checked */); 1030 } 1031 1032 public static int getChars(int i, int begin, int end, byte[] value) { 1033 checkBoundsBeginEnd(begin, end, value); 1034 int pos = getChars(i, end, value); 1035 assert begin == pos; 1036 return pos; 1037 } 1038 1039 public static int getChars(long l, int begin, int end, byte[] value) { 1040 checkBoundsBeginEnd(begin, end, value); 1041 int pos = getChars(l, end, value); 1042 assert begin == pos; 1043 return pos; 1044 } 1045 1046 public static boolean contentEquals(byte[] v1, byte[] v2, int len) { 1047 checkBoundsOffCount(0, len, v2); 1048 for (int i = 0; i < len; i++) { 1049 if ((char)(v1[i] & 0xff) != getChar(v2, i)) { 1050 return false; 1051 } 1052 } 1053 return true; 1054 } 1055 1056 public static boolean contentEquals(byte[] value, CharSequence cs, int len) { 1057 checkOffset(len, value); 1058 for (int i = 0; i < len; i++) { 1059 if (getChar(value, i) != cs.charAt(i)) { 1060 return false; 1061 } 1062 } 1063 return true; 1064 } 1065 1066 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) { 1067 int end = i + 4; 1068 checkBoundsBeginEnd(i, end, value); 1069 putChar(value, i++, c1); 1070 putChar(value, i++, c2); 1071 putChar(value, i++, c3); 1072 putChar(value, i++, c4); 1073 assert(i == end); 1074 return end; 1075 } 1076 1077 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) { 1078 int end = i + 5; 1079 checkBoundsBeginEnd(i, end, value); 1080 putChar(value, i++, c1); 1081 putChar(value, i++, c2); 1082 putChar(value, i++, c3); 1083 putChar(value, i++, c4); 1084 putChar(value, i++, c5); 1085 assert(i == end); 1086 return end; 1087 } 1088 1089 public static char charAt(byte[] value, int index) { 1090 checkIndex(index, value); 1091 return getChar(value, index); 1092 } 1093 1094 public static void reverse(byte[] val, int count) { 1095 checkOffset(count, val); 1096 int n = count - 1; 1097 boolean hasSurrogates = false; 1098 for (int j = (n-1) >> 1; j >= 0; j--) { 1099 int k = n - j; 1100 char cj = getChar(val, j); 1101 char ck = getChar(val, k); 1102 putChar(val, j, ck); 1103 putChar(val, k, cj); 1104 if (Character.isSurrogate(cj) || 1105 Character.isSurrogate(ck)) { 1106 hasSurrogates = true; 1107 } 1108 } 1109 if (hasSurrogates) { 1110 reverseAllValidSurrogatePairs(val, count); 1111 } 1112 } 1113 1114 /** Outlined helper method for reverse() */ 1115 private static void reverseAllValidSurrogatePairs(byte[] val, int count) { 1116 for (int i = 0; i < count - 1; i++) { 1117 char c2 = getChar(val, i); 1118 if (Character.isLowSurrogate(c2)) { 1119 char c1 = getChar(val, i + 1); 1120 if (Character.isHighSurrogate(c1)) { 1121 putChar(val, i++, c1); 1122 putChar(val, i, c2); 1123 } 1124 } 1125 } 1126 } 1127 1128 // inflatedCopy byte[] -> byte[] 1129 public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 1130 // We need a range check here because 'putChar' has no checks 1131 checkBoundsOffCount(dstOff, len, dst); 1132 for (int i = 0; i < len; i++) { 1133 putChar(dst, dstOff++, src[srcOff++] & 0xff); 1134 } 1135 } 1136 1137 // srcCoder == UTF16 && tgtCoder == LATIN1 1138 public static int lastIndexOfLatin1(byte[] src, int srcCount, 1139 byte[] tgt, int tgtCount, int fromIndex) { 1140 assert fromIndex >= 0; 1141 assert tgtCount > 0; 1142 assert tgtCount <= tgt.length; 1143 int min = tgtCount - 1; 1144 int i = min + fromIndex; 1145 int strLastIndex = tgtCount - 1; 1146 1147 char strLastChar = (char)(tgt[strLastIndex] & 0xff); 1148 1149 checkIndex(i, src); 1150 1151 startSearchForLastChar: 1152 while (true) { 1153 while (i >= min && getChar(src, i) != strLastChar) { 1154 i--; 1155 } 1156 if (i < min) { 1157 return -1; 1158 } 1159 int j = i - 1; 1160 int start = j - strLastIndex; 1161 int k = strLastIndex - 1; 1162 while (j > start) { 1163 if (getChar(src, j--) != (tgt[k--] & 0xff)) { 1164 i--; 1165 continue startSearchForLastChar; 1166 } 1167 } 1168 return start + 1; 1169 } 1170 } 1171 1172 //////////////////////////////////////////////////////////////// 1173 1174 private static native boolean isBigEndian(); 1175 1176 static final int HI_BYTE_SHIFT; 1177 static final int LO_BYTE_SHIFT; 1178 static { 1179 if (isBigEndian()) { 1180 HI_BYTE_SHIFT = 8; 1181 LO_BYTE_SHIFT = 0; 1182 } else { 1183 HI_BYTE_SHIFT = 0; 1184 LO_BYTE_SHIFT = 8; 1185 } 1186 } 1187 1188 static final int MAX_LENGTH = Integer.MAX_VALUE >> 1; 1189 1190 // Used by trusted callers. Assumes all necessary bounds checks have 1191 // been done by the caller. 1192 1193 /** 1194 * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for 1195 * UTF-16 coder. 1196 * 1197 * @param i value to convert 1198 * @param index next index, after the least significant digit 1199 * @param buf target buffer, UTF16-coded. 1200 * @return index of the most significant digit or minus sign, if present 1201 */ 1202 static int getChars(int i, int index, byte[] buf) { 1203 int q, r; 1204 int charPos = index; 1205 1206 boolean negative = (i < 0); 1207 if (!negative) { 1208 i = -i; 1209 } 1210 1211 // Get 2 digits/iteration using ints 1212 while (i <= -100) { 1213 q = i / 100; 1214 r = (q * 100) - i; 1215 i = q; 1216 putChar(buf, --charPos, Integer.DigitOnes[r]); 1217 putChar(buf, --charPos, Integer.DigitTens[r]); 1218 } 1219 1220 // We know there are at most two digits left at this point. 1221 q = i / 10; 1222 r = (q * 10) - i; 1223 putChar(buf, --charPos, '0' + r); 1224 1225 // Whatever left is the remaining digit. 1226 if (q < 0) { 1227 putChar(buf, --charPos, '0' - q); 1228 } 1229 1230 if (negative) { 1231 putChar(buf, --charPos, '-'); 1232 } 1233 return charPos; 1234 } 1235 1236 /** 1237 * This is a variant of {@link Long#getChars(long, int, byte[])}, but for 1238 * UTF-16 coder. 1239 * 1240 * @param i value to convert 1241 * @param index next index, after the least significant digit 1242 * @param buf target buffer, UTF16-coded. 1243 * @return index of the most significant digit or minus sign, if present 1244 */ 1245 static int getChars(long i, int index, byte[] buf) { 1246 long q; 1247 int r; 1248 int charPos = index; 1249 1250 boolean negative = (i < 0); 1251 if (!negative) { 1252 i = -i; 1253 } 1254 1255 // Get 2 digits/iteration using longs until quotient fits into an int 1256 while (i <= Integer.MIN_VALUE) { 1257 q = i / 100; 1258 r = (int)((q * 100) - i); 1259 i = q; 1260 putChar(buf, --charPos, Integer.DigitOnes[r]); 1261 putChar(buf, --charPos, Integer.DigitTens[r]); 1262 } 1263 1264 // Get 2 digits/iteration using ints 1265 int q2; 1266 int i2 = (int)i; 1267 while (i2 <= -100) { 1268 q2 = i2 / 100; 1269 r = (q2 * 100) - i2; 1270 i2 = q2; 1271 putChar(buf, --charPos, Integer.DigitOnes[r]); 1272 putChar(buf, --charPos, Integer.DigitTens[r]); 1273 } 1274 1275 // We know there are at most two digits left at this point. 1276 q2 = i2 / 10; 1277 r = (q2 * 10) - i2; 1278 putChar(buf, --charPos, '0' + r); 1279 1280 // Whatever left is the remaining digit. 1281 if (q2 < 0) { 1282 putChar(buf, --charPos, '0' - q2); 1283 } 1284 1285 if (negative) { 1286 putChar(buf, --charPos, '-'); 1287 } 1288 return charPos; 1289 } 1290 // End of trusted methods. 1291 1292 public static void checkIndex(int off, byte[] val) { 1293 String.checkIndex(off, length(val)); 1294 } 1295 1296 public static void checkOffset(int off, byte[] val) { 1297 String.checkOffset(off, length(val)); 1298 } 1299 1300 public static void checkBoundsBeginEnd(int begin, int end, byte[] val) { 1301 String.checkBoundsBeginEnd(begin, end, length(val)); 1302 } 1303 1304 public static void checkBoundsOffCount(int offset, int count, byte[] val) { 1305 String.checkBoundsOffCount(offset, count, length(val)); 1306 } 1307 1308 }