1 /* 2 * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Spliterator; 31 import java.util.function.IntConsumer; 32 import jdk.internal.HotSpotIntrinsicCandidate; 33 import jdk.internal.vm.annotation.ForceInline; 34 import jdk.internal.vm.annotation.DontInline; 35 36 import static java.lang.String.UTF16; 37 import static java.lang.String.LATIN1; 38 39 final class StringUTF16 { 40 41 public static byte[] newBytesFor(int len) { 42 if (len < 0) { 43 throw new NegativeArraySizeException(); 44 } 45 if (len > MAX_LENGTH) { 46 throw new OutOfMemoryError("UTF16 String size is " + len + 47 ", should be less than " + MAX_LENGTH); 48 } 49 return new byte[len << 1]; 50 } 51 52 @HotSpotIntrinsicCandidate 53 // intrinsic performs no bounds checks 54 static void putChar(byte[] val, int index, int c) { 55 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 56 index <<= 1; 57 val[index++] = (byte)(c >> HI_BYTE_SHIFT); 58 val[index] = (byte)(c >> LO_BYTE_SHIFT); 59 } 60 61 @HotSpotIntrinsicCandidate 62 // intrinsic performs no bounds checks 63 static char getChar(byte[] val, int index) { 64 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 65 index <<= 1; 66 return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) | 67 ((val[index] & 0xff) << LO_BYTE_SHIFT)); 68 } 69 70 public static int length(byte[] value) { 71 return value.length >> 1; 72 } 73 74 private static int codePointAt(byte[] value, int index, int end, boolean checked) { 75 assert index < end; 76 if (checked) { 77 checkIndex(index, value); 78 } 79 char c1 = getChar(value, index); 80 if (Character.isHighSurrogate(c1) && ++index < end) { 81 if (checked) { 82 checkIndex(index, value); 83 } 84 char c2 = getChar(value, index); 85 if (Character.isLowSurrogate(c2)) { 86 return Character.toCodePoint(c1, c2); 87 } 88 } 89 return c1; 90 } 91 92 public static int codePointAt(byte[] value, int index, int end) { 93 return codePointAt(value, index, end, false /* unchecked */); 94 } 95 96 private static int codePointBefore(byte[] value, int index, boolean checked) { 97 --index; 98 if (checked) { 99 checkIndex(index, value); 100 } 101 char c2 = getChar(value, index); 102 if (Character.isLowSurrogate(c2) && index > 0) { 103 --index; 104 if (checked) { 105 checkIndex(index, value); 106 } 107 char c1 = getChar(value, index); 108 if (Character.isHighSurrogate(c1)) { 109 return Character.toCodePoint(c1, c2); 110 } 111 } 112 return c2; 113 } 114 115 public static int codePointBefore(byte[] value, int index) { 116 return codePointBefore(value, index, false /* unchecked */); 117 } 118 119 private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) { 120 assert beginIndex <= endIndex; 121 int count = endIndex - beginIndex; 122 int i = beginIndex; 123 if (checked && i < endIndex) { 124 checkBoundsBeginEnd(i, endIndex, value); 125 } 126 for (; i < endIndex - 1; ) { 127 if (Character.isHighSurrogate(getChar(value, i++)) && 128 Character.isLowSurrogate(getChar(value, i))) { 129 count--; 130 i++; 131 } 132 } 133 return count; 134 } 135 136 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 137 return codePointCount(value, beginIndex, endIndex, false /* unchecked */); 138 } 139 140 public static char[] toChars(byte[] value) { 141 char[] dst = new char[value.length >> 1]; 142 getChars(value, 0, dst.length, dst, 0); 143 return dst; 144 } 145 146 @HotSpotIntrinsicCandidate 147 public static byte[] toBytes(char[] value, int off, int len) { 148 byte[] val = newBytesFor(len); 149 for (int i = 0; i < len; i++) { 150 putChar(val, i, value[off]); 151 off++; 152 } 153 return val; 154 } 155 156 public static byte[] compress(char[] val, int off, int len) { 157 byte[] ret = new byte[len]; 158 if (compress(val, off, ret, 0, len) == len) { 159 return ret; 160 } 161 return null; 162 } 163 164 public static byte[] compress(byte[] val, int off, int len) { 165 byte[] ret = new byte[len]; 166 if (compress(val, off, ret, 0, len) == len) { 167 return ret; 168 } 169 return null; 170 } 171 172 // compressedCopy char[] -> byte[] 173 @HotSpotIntrinsicCandidate 174 public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { 175 for (int i = 0; i < len; i++) { 176 char c = src[srcOff]; 177 if (c > 0xFF) { 178 len = 0; 179 break; 180 } 181 dst[dstOff] = (byte)c; 182 srcOff++; 183 dstOff++; 184 } 185 return len; 186 } 187 188 // compressedCopy byte[] -> byte[] 189 @HotSpotIntrinsicCandidate 190 public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 191 // We need a range check here because 'getChar' has no checks 192 checkBoundsOffCount(srcOff, len, src); 193 for (int i = 0; i < len; i++) { 194 char c = getChar(src, srcOff); 195 if (c > 0xFF) { 196 len = 0; 197 break; 198 } 199 dst[dstOff] = (byte)c; 200 srcOff++; 201 dstOff++; 202 } 203 return len; 204 } 205 206 public static byte[] toBytes(int[] val, int index, int len) { 207 final int end = index + len; 208 // Pass 1: Compute precise size of char[] 209 int n = len; 210 for (int i = index; i < end; i++) { 211 int cp = val[i]; 212 if (Character.isBmpCodePoint(cp)) 213 continue; 214 else if (Character.isValidCodePoint(cp)) 215 n++; 216 else throw new IllegalArgumentException(Integer.toString(cp)); 217 } 218 // Pass 2: Allocate and fill in <high, low> pair 219 byte[] buf = newBytesFor(n); 220 for (int i = index, j = 0; i < end; i++, j++) { 221 int cp = val[i]; 222 if (Character.isBmpCodePoint(cp)) { 223 putChar(buf, j, cp); 224 } else { 225 putChar(buf, j++, Character.highSurrogate(cp)); 226 putChar(buf, j, Character.lowSurrogate(cp)); 227 } 228 } 229 return buf; 230 } 231 232 public static byte[] toBytes(char c) { 233 byte[] result = new byte[2]; 234 putChar(result, 0, c); 235 return result; 236 } 237 238 static byte[] toBytesSupplementary(int cp) { 239 byte[] result = new byte[4]; 240 putChar(result, 0, Character.highSurrogate(cp)); 241 putChar(result, 1, Character.lowSurrogate(cp)); 242 return result; 243 } 244 245 @HotSpotIntrinsicCandidate 246 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 247 // We need a range check here because 'getChar' has no checks 248 if (srcBegin < srcEnd) { 249 checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value); 250 } 251 for (int i = srcBegin; i < srcEnd; i++) { 252 dst[dstBegin++] = getChar(value, i); 253 } 254 } 255 256 /* @see java.lang.String.getBytes(int, int, byte[], int) */ 257 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 258 srcBegin <<= 1; 259 srcEnd <<= 1; 260 for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) { 261 dst[dstBegin++] = value[i]; 262 } 263 } 264 265 @HotSpotIntrinsicCandidate 266 public static boolean equals(byte[] value, byte[] other) { 267 if (value.length == other.length) { 268 int len = value.length >> 1; 269 for (int i = 0; i < len; i++) { 270 if (getChar(value, i) != getChar(other, i)) { 271 return false; 272 } 273 } 274 return true; 275 } 276 return false; 277 } 278 279 @HotSpotIntrinsicCandidate 280 public static int compareTo(byte[] value, byte[] other) { 281 int len1 = length(value); 282 int len2 = length(other); 283 return compareValues(value, other, len1, len2); 284 } 285 286 /* 287 * Checks the boundary and then compares the byte arrays. 288 */ 289 public static int compareTo(byte[] value, byte[] other, int len1, int len2) { 290 checkOffset(len1, value); 291 checkOffset(len2, other); 292 293 return compareValues(value, other, len1, len2); 294 } 295 296 private static int compareValues(byte[] value, byte[] other, int len1, int len2) { 297 int lim = Math.min(len1, len2); 298 for (int k = 0; k < lim; k++) { 299 char c1 = getChar(value, k); 300 char c2 = getChar(other, k); 301 if (c1 != c2) { 302 return c1 - c2; 303 } 304 } 305 return len1 - len2; 306 } 307 308 @HotSpotIntrinsicCandidate 309 public static int compareToLatin1(byte[] value, byte[] other) { 310 return -StringLatin1.compareToUTF16(other, value); 311 } 312 313 public static int compareToLatin1(byte[] value, byte[] other, int len1, int len2) { 314 return -StringLatin1.compareToUTF16(other, value, len2, len1); 315 } 316 317 public static int compareToCI(byte[] value, byte[] other) { 318 int len1 = length(value); 319 int len2 = length(other); 320 int lim = Math.min(len1, len2); 321 for (int k = 0; k < lim; k++) { 322 char c1 = getChar(value, k); 323 char c2 = getChar(other, k); 324 if (c1 != c2) { 325 c1 = Character.toUpperCase(c1); 326 c2 = Character.toUpperCase(c2); 327 if (c1 != c2) { 328 c1 = Character.toLowerCase(c1); 329 c2 = Character.toLowerCase(c2); 330 if (c1 != c2) { 331 return c1 - c2; 332 } 333 } 334 } 335 } 336 return len1 - len2; 337 } 338 339 public static int compareToCI_Latin1(byte[] value, byte[] other) { 340 return -StringLatin1.compareToCI_UTF16(other, value); 341 } 342 343 public static int hashCode(byte[] value) { 344 int h = 0; 345 int length = value.length >> 1; 346 for (int i = 0; i < length; i++) { 347 h = 31 * h + getChar(value, i); 348 } 349 return h; 350 } 351 352 public static int indexOf(byte[] value, int ch, int fromIndex) { 353 int max = value.length >> 1; 354 if (fromIndex < 0) { 355 fromIndex = 0; 356 } else if (fromIndex >= max) { 357 // Note: fromIndex might be near -1>>>1. 358 return -1; 359 } 360 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 361 // handle most cases here (ch is a BMP code point or a 362 // negative value (invalid code point)) 363 return indexOfChar(value, ch, fromIndex, max); 364 } else { 365 return indexOfSupplementary(value, ch, fromIndex, max); 366 } 367 } 368 369 @HotSpotIntrinsicCandidate 370 public static int indexOf(byte[] value, byte[] str) { 371 if (str.length == 0) { 372 return 0; 373 } 374 if (value.length < str.length) { 375 return -1; 376 } 377 return indexOfUnsafe(value, length(value), str, length(str), 0); 378 } 379 380 @HotSpotIntrinsicCandidate 381 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 382 checkBoundsBeginEnd(fromIndex, valueCount, value); 383 checkBoundsBeginEnd(0, strCount, str); 384 return indexOfUnsafe(value, valueCount, str, strCount, fromIndex); 385 } 386 387 388 private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 389 assert fromIndex >= 0; 390 assert strCount > 0; 391 assert strCount <= length(str); 392 assert valueCount >= strCount; 393 char first = getChar(str, 0); 394 int max = (valueCount - strCount); 395 for (int i = fromIndex; i <= max; i++) { 396 // Look for first character. 397 if (getChar(value, i) != first) { 398 while (++i <= max && getChar(value, i) != first); 399 } 400 // Found first character, now look at the rest of value 401 if (i <= max) { 402 int j = i + 1; 403 int end = j + strCount - 1; 404 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++); 405 if (j == end) { 406 // Found whole string. 407 return i; 408 } 409 } 410 } 411 return -1; 412 } 413 414 415 /** 416 * Handles indexOf Latin1 substring in UTF16 string. 417 */ 418 @HotSpotIntrinsicCandidate 419 public static int indexOfLatin1(byte[] value, byte[] str) { 420 if (str.length == 0) { 421 return 0; 422 } 423 if (length(value) < str.length) { 424 return -1; 425 } 426 return indexOfLatin1Unsafe(value, length(value), str, str.length, 0); 427 } 428 429 @HotSpotIntrinsicCandidate 430 public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 431 checkBoundsBeginEnd(fromIndex, srcCount, src); 432 String.checkBoundsBeginEnd(0, tgtCount, tgt.length); 433 return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex); 434 } 435 436 public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 437 assert fromIndex >= 0; 438 assert tgtCount > 0; 439 assert tgtCount <= tgt.length; 440 assert srcCount >= tgtCount; 441 char first = (char)(tgt[0] & 0xff); 442 int max = (srcCount - tgtCount); 443 for (int i = fromIndex; i <= max; i++) { 444 // Look for first character. 445 if (getChar(src, i) != first) { 446 while (++i <= max && getChar(src, i) != first); 447 } 448 // Found first character, now look at the rest of v2 449 if (i <= max) { 450 int j = i + 1; 451 int end = j + tgtCount - 1; 452 for (int k = 1; 453 j < end && getChar(src, j) == (tgt[k] & 0xff); 454 j++, k++); 455 if (j == end) { 456 // Found whole string. 457 return i; 458 } 459 } 460 } 461 return -1; 462 } 463 464 @HotSpotIntrinsicCandidate 465 private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { 466 checkBoundsBeginEnd(fromIndex, max, value); 467 return indexOfCharUnsafe(value, ch, fromIndex, max); 468 } 469 470 private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) { 471 for (int i = fromIndex; i < max; i++) { 472 if (getChar(value, i) == ch) { 473 return i; 474 } 475 } 476 return -1; 477 } 478 479 /** 480 * Handles (rare) calls of indexOf with a supplementary character. 481 */ 482 private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) { 483 if (Character.isValidCodePoint(ch)) { 484 final char hi = Character.highSurrogate(ch); 485 final char lo = Character.lowSurrogate(ch); 486 checkBoundsBeginEnd(fromIndex, max, value); 487 for (int i = fromIndex; i < max - 1; i++) { 488 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) { 489 return i; 490 } 491 } 492 } 493 return -1; 494 } 495 496 // srcCoder == UTF16 && tgtCoder == UTF16 497 public static int lastIndexOf(byte[] src, int srcCount, 498 byte[] tgt, int tgtCount, int fromIndex) { 499 assert fromIndex >= 0; 500 assert tgtCount > 0; 501 assert tgtCount <= length(tgt); 502 int min = tgtCount - 1; 503 int i = min + fromIndex; 504 int strLastIndex = tgtCount - 1; 505 506 checkIndex(strLastIndex, tgt); 507 char strLastChar = getChar(tgt, strLastIndex); 508 509 checkIndex(i, src); 510 511 startSearchForLastChar: 512 while (true) { 513 while (i >= min && getChar(src, i) != strLastChar) { 514 i--; 515 } 516 if (i < min) { 517 return -1; 518 } 519 int j = i - 1; 520 int start = j - strLastIndex; 521 int k = strLastIndex - 1; 522 while (j > start) { 523 if (getChar(src, j--) != getChar(tgt, k--)) { 524 i--; 525 continue startSearchForLastChar; 526 } 527 } 528 return start + 1; 529 } 530 } 531 532 public static int lastIndexOf(byte[] value, int ch, int fromIndex) { 533 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 534 // handle most cases here (ch is a BMP code point or a 535 // negative value (invalid code point)) 536 int i = Math.min(fromIndex, (value.length >> 1) - 1); 537 for (; i >= 0; i--) { 538 if (getChar(value, i) == ch) { 539 return i; 540 } 541 } 542 return -1; 543 } else { 544 return lastIndexOfSupplementary(value, ch, fromIndex); 545 } 546 } 547 548 /** 549 * Handles (rare) calls of lastIndexOf with a supplementary character. 550 */ 551 private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) { 552 if (Character.isValidCodePoint(ch)) { 553 char hi = Character.highSurrogate(ch); 554 char lo = Character.lowSurrogate(ch); 555 int i = Math.min(fromIndex, (value.length >> 1) - 2); 556 for (; i >= 0; i--) { 557 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) { 558 return i; 559 } 560 } 561 } 562 return -1; 563 } 564 565 public static String replace(byte[] value, char oldChar, char newChar) { 566 int len = value.length >> 1; 567 int i = -1; 568 while (++i < len) { 569 if (getChar(value, i) == oldChar) { 570 break; 571 } 572 } 573 if (i < len) { 574 byte buf[] = new byte[value.length]; 575 for (int j = 0; j < i; j++) { 576 putChar(buf, j, getChar(value, j)); // TBD:arraycopy? 577 } 578 while (i < len) { 579 char c = getChar(value, i); 580 putChar(buf, i, c == oldChar ? newChar : c); 581 i++; 582 } 583 // Check if we should try to compress to latin1 584 if (String.COMPACT_STRINGS && 585 !StringLatin1.canEncode(oldChar) && 586 StringLatin1.canEncode(newChar)) { 587 byte[] val = compress(buf, 0, len); 588 if (val != null) { 589 return new String(val, LATIN1); 590 } 591 } 592 return new String(buf, UTF16); 593 } 594 return null; 595 } 596 597 public static boolean regionMatchesCI(byte[] value, int toffset, 598 byte[] other, int ooffset, int len) { 599 int last = toffset + len; 600 assert toffset >= 0 && ooffset >= 0; 601 assert ooffset + len <= length(other); 602 assert last <= length(value); 603 while (toffset < last) { 604 char c1 = getChar(value, toffset++); 605 char c2 = getChar(other, ooffset++); 606 if (c1 == c2) { 607 continue; 608 } 609 // try converting both characters to uppercase. 610 // If the results match, then the comparison scan should 611 // continue. 612 char u1 = Character.toUpperCase(c1); 613 char u2 = Character.toUpperCase(c2); 614 if (u1 == u2) { 615 continue; 616 } 617 // Unfortunately, conversion to uppercase does not work properly 618 // for the Georgian alphabet, which has strange rules about case 619 // conversion. So we need to make one last check before 620 // exiting. 621 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 622 continue; 623 } 624 return false; 625 } 626 return true; 627 } 628 629 public static boolean regionMatchesCI_Latin1(byte[] value, int toffset, 630 byte[] other, int ooffset, 631 int len) { 632 return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len); 633 } 634 635 public static String toLowerCase(String str, byte[] value, Locale locale) { 636 if (locale == null) { 637 throw new NullPointerException(); 638 } 639 int first; 640 boolean hasSurr = false; 641 final int len = value.length >> 1; 642 643 // Now check if there are any characters that need to be changed, or are surrogate 644 for (first = 0 ; first < len; first++) { 645 int cp = (int)getChar(value, first); 646 if (Character.isSurrogate((char)cp)) { 647 hasSurr = true; 648 break; 649 } 650 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 651 break; 652 } 653 } 654 if (first == len) 655 return str; 656 byte[] result = new byte[value.length]; 657 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 658 // lowerCase characters. 659 String lang = locale.getLanguage(); 660 if (lang == "tr" || lang == "az" || lang == "lt") { 661 return toLowerCaseEx(str, value, result, first, locale, true); 662 } 663 if (hasSurr) { 664 return toLowerCaseEx(str, value, result, first, locale, false); 665 } 666 int bits = 0; 667 for (int i = first; i < len; i++) { 668 int cp = (int)getChar(value, i); 669 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 670 Character.isSurrogate((char)cp)) { 671 return toLowerCaseEx(str, value, result, i, locale, false); 672 } 673 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 674 return toLowerCaseEx(str, value, result, i, locale, true); 675 } 676 cp = Character.toLowerCase(cp); 677 if (!Character.isBmpCodePoint(cp)) { 678 return toLowerCaseEx(str, value, result, i, locale, false); 679 } 680 bits |= cp; 681 putChar(result, i, cp); 682 } 683 if (bits > 0xFF) { 684 return new String(result, UTF16); 685 } else { 686 return newString(result, 0, len); 687 } 688 } 689 690 private static String toLowerCaseEx(String str, byte[] value, 691 byte[] result, int first, Locale locale, 692 boolean localeDependent) { 693 assert(result.length == value.length); 694 assert(first >= 0); 695 int resultOffset = first; 696 int length = value.length >> 1; 697 int srcCount; 698 for (int i = first; i < length; i += srcCount) { 699 int srcChar = getChar(value, i); 700 int lowerChar; 701 char[] lowerCharArray; 702 srcCount = 1; 703 if (Character.isSurrogate((char)srcChar)) { 704 srcChar = codePointAt(value, i, length); 705 srcCount = Character.charCount(srcChar); 706 } 707 if (localeDependent || 708 srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 709 srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 710 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 711 } else { 712 lowerChar = Character.toLowerCase(srcChar); 713 } 714 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 715 putChar(result, resultOffset++, lowerChar); 716 } else { 717 if (lowerChar == Character.ERROR) { 718 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 719 } else { 720 lowerCharArray = Character.toChars(lowerChar); 721 } 722 /* Grow result if needed */ 723 int mapLen = lowerCharArray.length; 724 if (mapLen > srcCount) { 725 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 726 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 727 result = result2; 728 } 729 assert resultOffset >= 0; 730 assert resultOffset + mapLen <= length(result); 731 for (int x = 0; x < mapLen; ++x) { 732 putChar(result, resultOffset++, lowerCharArray[x]); 733 } 734 } 735 } 736 return newString(result, 0, resultOffset); 737 } 738 739 public static String toUpperCase(String str, byte[] value, Locale locale) { 740 if (locale == null) { 741 throw new NullPointerException(); 742 } 743 int first; 744 boolean hasSurr = false; 745 final int len = value.length >> 1; 746 747 // Now check if there are any characters that need to be changed, or are surrogate 748 for (first = 0 ; first < len; first++) { 749 int cp = (int)getChar(value, first); 750 if (Character.isSurrogate((char)cp)) { 751 hasSurr = true; 752 break; 753 } 754 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 755 break; 756 } 757 } 758 if (first == len) { 759 return str; 760 } 761 byte[] result = new byte[value.length]; 762 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 763 // upperCase characters. 764 String lang = locale.getLanguage(); 765 if (lang == "tr" || lang == "az" || lang == "lt") { 766 return toUpperCaseEx(str, value, result, first, locale, true); 767 } 768 if (hasSurr) { 769 return toUpperCaseEx(str, value, result, first, locale, false); 770 } 771 int bits = 0; 772 for (int i = first; i < len; i++) { 773 int cp = (int)getChar(value, i); 774 if (Character.isSurrogate((char)cp)) { 775 return toUpperCaseEx(str, value, result, i, locale, false); 776 } 777 cp = Character.toUpperCaseEx(cp); 778 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 779 return toUpperCaseEx(str, value, result, i, locale, false); 780 } 781 bits |= cp; 782 putChar(result, i, cp); 783 } 784 if (bits > 0xFF) { 785 return new String(result, UTF16); 786 } else { 787 return newString(result, 0, len); 788 } 789 } 790 791 private static String toUpperCaseEx(String str, byte[] value, 792 byte[] result, int first, 793 Locale locale, boolean localeDependent) 794 { 795 assert(result.length == value.length); 796 assert(first >= 0); 797 int resultOffset = first; 798 int length = value.length >> 1; 799 int srcCount; 800 for (int i = first; i < length; i += srcCount) { 801 int srcChar = getChar(value, i); 802 int upperChar; 803 char[] upperCharArray; 804 srcCount = 1; 805 if (Character.isSurrogate((char)srcChar)) { 806 srcChar = codePointAt(value, i, length); 807 srcCount = Character.charCount(srcChar); 808 } 809 if (localeDependent) { 810 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 811 } else { 812 upperChar = Character.toUpperCaseEx(srcChar); 813 } 814 if (Character.isBmpCodePoint(upperChar)) { 815 putChar(result, resultOffset++, upperChar); 816 } else { 817 if (upperChar == Character.ERROR) { 818 if (localeDependent) { 819 upperCharArray = 820 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 821 } else { 822 upperCharArray = Character.toUpperCaseCharArray(srcChar); 823 } 824 } else { 825 upperCharArray = Character.toChars(upperChar); 826 } 827 /* Grow result if needed */ 828 int mapLen = upperCharArray.length; 829 if (mapLen > srcCount) { 830 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 831 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 832 result = result2; 833 } 834 assert resultOffset >= 0; 835 assert resultOffset + mapLen <= length(result); 836 for (int x = 0; x < mapLen; ++x) { 837 putChar(result, resultOffset++, upperCharArray[x]); 838 } 839 } 840 } 841 return newString(result, 0, resultOffset); 842 } 843 844 public static String trim(byte[] value) { 845 int length = value.length >> 1; 846 int len = length; 847 int st = 0; 848 while (st < len && getChar(value, st) <= ' ') { 849 st++; 850 } 851 while (st < len && getChar(value, len - 1) <= ' ') { 852 len--; 853 } 854 return ((st > 0) || (len < length )) ? 855 new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) : 856 null; 857 } 858 859 private static void putChars(byte[] val, int index, char[] str, int off, int end) { 860 while (off < end) { 861 putChar(val, index++, str[off++]); 862 } 863 } 864 865 public static String newString(byte[] val, int index, int len) { 866 if (String.COMPACT_STRINGS) { 867 byte[] buf = compress(val, index, len); 868 if (buf != null) { 869 return new String(buf, LATIN1); 870 } 871 } 872 int last = index + len; 873 return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16); 874 } 875 876 public static void fillNull(byte[] val, int index, int end) { 877 Arrays.fill(val, index << 1, end << 1, (byte)0); 878 } 879 880 static class CharsSpliterator implements Spliterator.OfInt { 881 private final byte[] array; 882 private int index; // current index, modified on advance/split 883 private final int fence; // one past last index 884 private final int cs; 885 886 CharsSpliterator(byte[] array, int acs) { 887 this(array, 0, array.length >> 1, acs); 888 } 889 890 CharsSpliterator(byte[] array, int origin, int fence, int acs) { 891 this.array = array; 892 this.index = origin; 893 this.fence = fence; 894 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED 895 | Spliterator.SUBSIZED; 896 } 897 898 @Override 899 public OfInt trySplit() { 900 int lo = index, mid = (lo + fence) >>> 1; 901 return (lo >= mid) 902 ? null 903 : new CharsSpliterator(array, lo, index = mid, cs); 904 } 905 906 @Override 907 public void forEachRemaining(IntConsumer action) { 908 byte[] a; int i, hi; // hoist accesses and checks from loop 909 if (action == null) 910 throw new NullPointerException(); 911 if (((a = array).length >> 1) >= (hi = fence) && 912 (i = index) >= 0 && i < (index = hi)) { 913 do { 914 action.accept(charAt(a, i)); 915 } while (++i < hi); 916 } 917 } 918 919 @Override 920 public boolean tryAdvance(IntConsumer action) { 921 if (action == null) 922 throw new NullPointerException(); 923 int i = index; 924 if (i >= 0 && i < fence) { 925 action.accept(charAt(array, i)); 926 index++; 927 return true; 928 } 929 return false; 930 } 931 932 @Override 933 public long estimateSize() { return (long)(fence - index); } 934 935 @Override 936 public int characteristics() { 937 return cs; 938 } 939 } 940 941 static class CodePointsSpliterator implements Spliterator.OfInt { 942 private final byte[] array; 943 private int index; // current index, modified on advance/split 944 private final int fence; // one past last index 945 private final int cs; 946 947 CodePointsSpliterator(byte[] array, int acs) { 948 this(array, 0, array.length >> 1, acs); 949 } 950 951 CodePointsSpliterator(byte[] array, int origin, int fence, int acs) { 952 this.array = array; 953 this.index = origin; 954 this.fence = fence; 955 this.cs = acs | Spliterator.ORDERED; 956 } 957 958 @Override 959 public OfInt trySplit() { 960 int lo = index, mid = (lo + fence) >>> 1; 961 if (lo >= mid) 962 return null; 963 964 int midOneLess; 965 // If the mid-point intersects a surrogate pair 966 if (Character.isLowSurrogate(charAt(array, mid)) && 967 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) { 968 // If there is only one pair it cannot be split 969 if (lo >= midOneLess) 970 return null; 971 // Shift the mid-point to align with the surrogate pair 972 return new CodePointsSpliterator(array, lo, index = midOneLess, cs); 973 } 974 return new CodePointsSpliterator(array, lo, index = mid, cs); 975 } 976 977 @Override 978 public void forEachRemaining(IntConsumer action) { 979 byte[] a; int i, hi; // hoist accesses and checks from loop 980 if (action == null) 981 throw new NullPointerException(); 982 if (((a = array).length >> 1) >= (hi = fence) && 983 (i = index) >= 0 && i < (index = hi)) { 984 do { 985 i = advance(a, i, hi, action); 986 } while (i < hi); 987 } 988 } 989 990 @Override 991 public boolean tryAdvance(IntConsumer action) { 992 if (action == null) 993 throw new NullPointerException(); 994 if (index >= 0 && index < fence) { 995 index = advance(array, index, fence, action); 996 return true; 997 } 998 return false; 999 } 1000 1001 // Advance one code point from the index, i, and return the next 1002 // index to advance from 1003 private static int advance(byte[] a, int i, int hi, IntConsumer action) { 1004 char c1 = charAt(a, i++); 1005 int cp = c1; 1006 if (Character.isHighSurrogate(c1) && i < hi) { 1007 char c2 = charAt(a, i); 1008 if (Character.isLowSurrogate(c2)) { 1009 i++; 1010 cp = Character.toCodePoint(c1, c2); 1011 } 1012 } 1013 action.accept(cp); 1014 return i; 1015 } 1016 1017 @Override 1018 public long estimateSize() { return (long)(fence - index); } 1019 1020 @Override 1021 public int characteristics() { 1022 return cs; 1023 } 1024 } 1025 1026 //////////////////////////////////////////////////////////////// 1027 1028 public static void putCharSB(byte[] val, int index, int c) { 1029 checkIndex(index, val); 1030 putChar(val, index, c); 1031 } 1032 1033 public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) { 1034 checkBoundsBeginEnd(index, index + end - off, val); 1035 putChars(val, index, ca, off, end); 1036 } 1037 1038 public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) { 1039 checkBoundsBeginEnd(index, index + end - off, val); 1040 for (int i = off; i < end; i++) { 1041 putChar(val, index++, s.charAt(i)); 1042 } 1043 } 1044 1045 public static int codePointAtSB(byte[] val, int index, int end) { 1046 return codePointAt(val, index, end, true /* checked */); 1047 } 1048 1049 public static int codePointBeforeSB(byte[] val, int index) { 1050 return codePointBefore(val, index, true /* checked */); 1051 } 1052 1053 public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) { 1054 return codePointCount(val, beginIndex, endIndex, true /* checked */); 1055 } 1056 1057 public static int getChars(int i, int begin, int end, byte[] value) { 1058 checkBoundsBeginEnd(begin, end, value); 1059 int pos = getChars(i, end, value); 1060 assert begin == pos; 1061 return pos; 1062 } 1063 1064 public static int getChars(long l, int begin, int end, byte[] value) { 1065 checkBoundsBeginEnd(begin, end, value); 1066 int pos = getChars(l, end, value); 1067 assert begin == pos; 1068 return pos; 1069 } 1070 1071 public static boolean contentEquals(byte[] v1, byte[] v2, int len) { 1072 checkBoundsOffCount(0, len, v2); 1073 for (int i = 0; i < len; i++) { 1074 if ((char)(v1[i] & 0xff) != getChar(v2, i)) { 1075 return false; 1076 } 1077 } 1078 return true; 1079 } 1080 1081 public static boolean contentEquals(byte[] value, CharSequence cs, int len) { 1082 checkOffset(len, value); 1083 for (int i = 0; i < len; i++) { 1084 if (getChar(value, i) != cs.charAt(i)) { 1085 return false; 1086 } 1087 } 1088 return true; 1089 } 1090 1091 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) { 1092 int end = i + 4; 1093 checkBoundsBeginEnd(i, end, value); 1094 putChar(value, i++, c1); 1095 putChar(value, i++, c2); 1096 putChar(value, i++, c3); 1097 putChar(value, i++, c4); 1098 assert(i == end); 1099 return end; 1100 } 1101 1102 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) { 1103 int end = i + 5; 1104 checkBoundsBeginEnd(i, end, value); 1105 putChar(value, i++, c1); 1106 putChar(value, i++, c2); 1107 putChar(value, i++, c3); 1108 putChar(value, i++, c4); 1109 putChar(value, i++, c5); 1110 assert(i == end); 1111 return end; 1112 } 1113 1114 public static char charAt(byte[] value, int index) { 1115 checkIndex(index, value); 1116 return getChar(value, index); 1117 } 1118 1119 public static void reverse(byte[] val, int count) { 1120 checkOffset(count, val); 1121 int n = count - 1; 1122 boolean hasSurrogates = false; 1123 for (int j = (n-1) >> 1; j >= 0; j--) { 1124 int k = n - j; 1125 char cj = getChar(val, j); 1126 char ck = getChar(val, k); 1127 putChar(val, j, ck); 1128 putChar(val, k, cj); 1129 if (Character.isSurrogate(cj) || 1130 Character.isSurrogate(ck)) { 1131 hasSurrogates = true; 1132 } 1133 } 1134 if (hasSurrogates) { 1135 reverseAllValidSurrogatePairs(val, count); 1136 } 1137 } 1138 1139 /** Outlined helper method for reverse() */ 1140 private static void reverseAllValidSurrogatePairs(byte[] val, int count) { 1141 for (int i = 0; i < count - 1; i++) { 1142 char c2 = getChar(val, i); 1143 if (Character.isLowSurrogate(c2)) { 1144 char c1 = getChar(val, i + 1); 1145 if (Character.isHighSurrogate(c1)) { 1146 putChar(val, i++, c1); 1147 putChar(val, i, c2); 1148 } 1149 } 1150 } 1151 } 1152 1153 // inflatedCopy byte[] -> byte[] 1154 public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 1155 // We need a range check here because 'putChar' has no checks 1156 checkBoundsOffCount(dstOff, len, dst); 1157 for (int i = 0; i < len; i++) { 1158 putChar(dst, dstOff++, src[srcOff++] & 0xff); 1159 } 1160 } 1161 1162 // srcCoder == UTF16 && tgtCoder == LATIN1 1163 public static int lastIndexOfLatin1(byte[] src, int srcCount, 1164 byte[] tgt, int tgtCount, int fromIndex) { 1165 assert fromIndex >= 0; 1166 assert tgtCount > 0; 1167 assert tgtCount <= tgt.length; 1168 int min = tgtCount - 1; 1169 int i = min + fromIndex; 1170 int strLastIndex = tgtCount - 1; 1171 1172 char strLastChar = (char)(tgt[strLastIndex] & 0xff); 1173 1174 checkIndex(i, src); 1175 1176 startSearchForLastChar: 1177 while (true) { 1178 while (i >= min && getChar(src, i) != strLastChar) { 1179 i--; 1180 } 1181 if (i < min) { 1182 return -1; 1183 } 1184 int j = i - 1; 1185 int start = j - strLastIndex; 1186 int k = strLastIndex - 1; 1187 while (j > start) { 1188 if (getChar(src, j--) != (tgt[k--] & 0xff)) { 1189 i--; 1190 continue startSearchForLastChar; 1191 } 1192 } 1193 return start + 1; 1194 } 1195 } 1196 1197 //////////////////////////////////////////////////////////////// 1198 1199 private static native boolean isBigEndian(); 1200 1201 static final int HI_BYTE_SHIFT; 1202 static final int LO_BYTE_SHIFT; 1203 static { 1204 if (isBigEndian()) { 1205 HI_BYTE_SHIFT = 8; 1206 LO_BYTE_SHIFT = 0; 1207 } else { 1208 HI_BYTE_SHIFT = 0; 1209 LO_BYTE_SHIFT = 8; 1210 } 1211 } 1212 1213 static final int MAX_LENGTH = Integer.MAX_VALUE >> 1; 1214 1215 // Used by trusted callers. Assumes all necessary bounds checks have 1216 // been done by the caller. 1217 1218 /** 1219 * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for 1220 * UTF-16 coder. 1221 * 1222 * @param i value to convert 1223 * @param index next index, after the least significant digit 1224 * @param buf target buffer, UTF16-coded. 1225 * @return index of the most significant digit or minus sign, if present 1226 */ 1227 static int getChars(int i, int index, byte[] buf) { 1228 int q, r; 1229 int charPos = index; 1230 1231 boolean negative = (i < 0); 1232 if (!negative) { 1233 i = -i; 1234 } 1235 1236 // Get 2 digits/iteration using ints 1237 while (i <= -100) { 1238 q = i / 100; 1239 r = (q * 100) - i; 1240 i = q; 1241 putChar(buf, --charPos, Integer.DigitOnes[r]); 1242 putChar(buf, --charPos, Integer.DigitTens[r]); 1243 } 1244 1245 // We know there are at most two digits left at this point. 1246 q = i / 10; 1247 r = (q * 10) - i; 1248 putChar(buf, --charPos, '0' + r); 1249 1250 // Whatever left is the remaining digit. 1251 if (q < 0) { 1252 putChar(buf, --charPos, '0' - q); 1253 } 1254 1255 if (negative) { 1256 putChar(buf, --charPos, '-'); 1257 } 1258 return charPos; 1259 } 1260 1261 /** 1262 * This is a variant of {@link Long#getChars(long, int, byte[])}, but for 1263 * UTF-16 coder. 1264 * 1265 * @param i value to convert 1266 * @param index next index, after the least significant digit 1267 * @param buf target buffer, UTF16-coded. 1268 * @return index of the most significant digit or minus sign, if present 1269 */ 1270 static int getChars(long i, int index, byte[] buf) { 1271 long q; 1272 int r; 1273 int charPos = index; 1274 1275 boolean negative = (i < 0); 1276 if (!negative) { 1277 i = -i; 1278 } 1279 1280 // Get 2 digits/iteration using longs until quotient fits into an int 1281 while (i <= Integer.MIN_VALUE) { 1282 q = i / 100; 1283 r = (int)((q * 100) - i); 1284 i = q; 1285 putChar(buf, --charPos, Integer.DigitOnes[r]); 1286 putChar(buf, --charPos, Integer.DigitTens[r]); 1287 } 1288 1289 // Get 2 digits/iteration using ints 1290 int q2; 1291 int i2 = (int)i; 1292 while (i2 <= -100) { 1293 q2 = i2 / 100; 1294 r = (q2 * 100) - i2; 1295 i2 = q2; 1296 putChar(buf, --charPos, Integer.DigitOnes[r]); 1297 putChar(buf, --charPos, Integer.DigitTens[r]); 1298 } 1299 1300 // We know there are at most two digits left at this point. 1301 q2 = i2 / 10; 1302 r = (q2 * 10) - i2; 1303 putChar(buf, --charPos, '0' + r); 1304 1305 // Whatever left is the remaining digit. 1306 if (q2 < 0) { 1307 putChar(buf, --charPos, '0' - q2); 1308 } 1309 1310 if (negative) { 1311 putChar(buf, --charPos, '-'); 1312 } 1313 return charPos; 1314 } 1315 // End of trusted methods. 1316 1317 public static void checkIndex(int off, byte[] val) { 1318 String.checkIndex(off, length(val)); 1319 } 1320 1321 public static void checkOffset(int off, byte[] val) { 1322 String.checkOffset(off, length(val)); 1323 } 1324 1325 public static void checkBoundsBeginEnd(int begin, int end, byte[] val) { 1326 String.checkBoundsBeginEnd(begin, end, length(val)); 1327 } 1328 1329 public static void checkBoundsOffCount(int offset, int count, byte[] val) { 1330 String.checkBoundsOffCount(offset, count, length(val)); 1331 } 1332 1333 }