1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Spliterator; 31 import java.util.function.IntConsumer; 32 import jdk.internal.HotSpotIntrinsicCandidate; 33 34 import static java.lang.String.UTF16; 35 import static java.lang.String.LATIN1; 36 import static java.lang.String.checkIndex; 37 import static java.lang.String.checkOffset; 38 import static java.lang.String.checkBoundsOffCount; 39 40 final class StringUTF16 { 41 42 public static byte[] newBytesFor(int len) { 43 if (len < 0) { 44 throw new NegativeArraySizeException(); 45 } 46 if (len > MAX_LENGTH) { 47 throw new OutOfMemoryError("UTF16 String size is " + len + 48 ", should be less than " + MAX_LENGTH); 49 } 50 return new byte[len << 1]; 51 } 52 53 @HotSpotIntrinsicCandidate 54 public static void putChar(byte[] val, int index, int c) { 55 index <<= 1; 56 val[index++] = (byte)(c >> HI_BYTE_SHIFT); 57 val[index] = (byte)(c >> LO_BYTE_SHIFT); 58 } 59 60 @HotSpotIntrinsicCandidate 61 public static char getChar(byte[] val, int index) { 62 index <<= 1; 63 return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) | 64 ((val[index] & 0xff) << LO_BYTE_SHIFT)); 65 } 66 67 public static char charAt(byte[] value, int index) { 68 if (index < 0 || index >= value.length >> 1) { 69 throw new StringIndexOutOfBoundsException(index); 70 } 71 return getChar(value, index); 72 } 73 74 public static int length(byte[] value) { 75 return value.length >> 1; 76 } 77 78 public static int codePointAt(byte[] value, int index, int end) { 79 char c1 = getChar(value, index); 80 if (Character.isHighSurrogate(c1) && ++index < end) { 81 char c2 = getChar(value, index); 82 if (Character.isLowSurrogate(c2)) { 83 return Character.toCodePoint(c1, c2); 84 } 85 } 86 return c1; 87 } 88 89 public static int codePointBefore(byte[] value, int index) { 90 char c2 = getChar(value, --index); 91 if (Character.isLowSurrogate(c2) && index > 0) { 92 char c1 = getChar(value, --index); 93 if (Character.isHighSurrogate(c1)) { 94 return Character.toCodePoint(c1, c2); 95 } 96 } 97 return c2; 98 } 99 100 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 101 int count = endIndex - beginIndex; 102 for (int i = beginIndex; i < endIndex; ) { 103 if (Character.isHighSurrogate(getChar(value, i++)) && 104 i < endIndex && 105 Character.isLowSurrogate(getChar(value, i))) { 106 count--; 107 i++; 108 } 109 } 110 return count; 111 } 112 113 public static char[] toChars(byte[] value) { 114 char[] dst = new char[value.length >> 1]; 115 getChars(value, 0, dst.length, dst, 0); 116 return dst; 117 } 118 119 @HotSpotIntrinsicCandidate 120 public static byte[] toBytes(char[] value, int off, int len) { 121 byte[] val = newBytesFor(len); 122 for (int i = 0; i < len; i++) { 123 putChar(val, i, value[off]); 124 off++; 125 } 126 return val; 127 } 128 129 public static byte[] compress(char[] val, int off, int len) { 130 byte[] ret = new byte[len]; 131 if (compress(val, off, ret, 0, len) == len) { 132 return ret; 133 } 134 return null; 135 } 136 137 public static byte[] compress(byte[] val, int off, int len) { 138 byte[] ret = new byte[len]; 139 if (compress(val, off, ret, 0, len) == len) { 140 return ret; 141 } 142 return null; 143 } 144 145 // compressedCopy char[] -> byte[] 146 @HotSpotIntrinsicCandidate 147 private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { 148 for (int i = 0; i < len; i++) { 149 char c = src[srcOff]; 150 if (c > 0xFF) { 151 len = 0; 152 break; 153 } 154 dst[dstOff] = (byte)c; 155 srcOff++; 156 dstOff++; 157 } 158 return len; 159 } 160 161 // compressedCopy byte[] -> byte[] 162 @HotSpotIntrinsicCandidate 163 public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 164 // We need a range check here because 'getChar' has no checks 165 checkBoundsOffCount(srcOff, len, src.length); 166 for (int i = 0; i < len; i++) { 167 char c = getChar(src, srcOff); 168 if (c > 0xFF) { 169 len = 0; 170 break; 171 } 172 dst[dstOff] = (byte)c; 173 srcOff++; 174 dstOff++; 175 } 176 return len; 177 } 178 179 public static byte[] toBytes(int[] val, int index, int len) { 180 final int end = index + len; 181 // Pass 1: Compute precise size of char[] 182 int n = len; 183 for (int i = index; i < end; i++) { 184 int cp = val[i]; 185 if (Character.isBmpCodePoint(cp)) 186 continue; 187 else if (Character.isValidCodePoint(cp)) 188 n++; 189 else throw new IllegalArgumentException(Integer.toString(cp)); 190 } 191 // Pass 2: Allocate and fill in <high, low> pair 192 byte[] buf = newBytesFor(n); 193 for (int i = index, j = 0; i < end; i++, j++) { 194 int cp = val[i]; 195 if (Character.isBmpCodePoint(cp)) { 196 putChar(buf, j, cp); 197 } else { 198 putChar(buf, j++, Character.highSurrogate(cp)); 199 putChar(buf, j, Character.lowSurrogate(cp)); 200 } 201 } 202 return buf; 203 } 204 205 public static byte[] toBytes(char c) { 206 byte[] result = new byte[2]; 207 putChar(result, 0, c); 208 return result; 209 } 210 211 @HotSpotIntrinsicCandidate 212 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 213 // We need a range check here because 'getChar' has no checks 214 checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value.length); 215 for (int i = srcBegin; i < srcEnd; i++) { 216 dst[dstBegin++] = getChar(value, i); 217 } 218 } 219 220 /* @see java.lang.String.getBytes(int, int, byte[], int) */ 221 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 222 srcBegin <<= 1; 223 srcEnd <<= 1; 224 for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) { 225 dst[dstBegin++] = value[i]; 226 } 227 } 228 229 @HotSpotIntrinsicCandidate 230 public static boolean equals(byte[] value, byte[] other) { 231 if (value.length == other.length) { 232 int len = value.length >> 1; 233 for (int i = 0; i < len; i++) { 234 if (getChar(value, i) != getChar(other, i)) { 235 return false; 236 } 237 } 238 return true; 239 } 240 return false; 241 } 242 243 @HotSpotIntrinsicCandidate 244 public static int compareTo(byte[] value, byte[] other) { 245 int len1 = length(value); 246 int len2 = length(other); 247 int lim = Math.min(len1, len2); 248 for (int k = 0; k < lim; k++) { 249 char c1 = getChar(value, k); 250 char c2 = getChar(other, k); 251 if (c1 != c2) { 252 return c1 - c2; 253 } 254 } 255 return len1 - len2; 256 } 257 258 @HotSpotIntrinsicCandidate 259 public static int compareToLatin1(byte[] value, byte[] other) { 260 int len1 = length(value); 261 int len2 = StringLatin1.length(other); 262 int lim = Math.min(len1, len2); 263 for (int k = 0; k < lim; k++) { 264 char c1 = getChar(value, k); 265 char c2 = StringLatin1.getChar(other, k); 266 if (c1 != c2) { 267 return c1 - c2; 268 } 269 } 270 return len1 - len2; 271 } 272 273 public static int compareToCI(byte[] value, byte[] other) { 274 int len1 = length(value); 275 int len2 = length(other); 276 int lim = Math.min(len1, len2); 277 for (int k = 0; k < lim; k++) { 278 char c1 = getChar(value, k); 279 char c2 = getChar(other, k); 280 if (c1 != c2) { 281 c1 = Character.toUpperCase(c1); 282 c2 = Character.toUpperCase(c2); 283 if (c1 != c2) { 284 c1 = Character.toLowerCase(c1); 285 c2 = Character.toLowerCase(c2); 286 if (c1 != c2) { 287 return c1 - c2; 288 } 289 } 290 } 291 } 292 return len1 - len2; 293 } 294 295 public static int compareToCI_Latin1(byte[] value, byte[] other) { 296 int len1 = length(value); 297 int len2 = StringLatin1.length(other); 298 int lim = Math.min(len1, len2); 299 for (int k = 0; k < lim; k++) { 300 char c1 = getChar(value, k); 301 char c2 = StringLatin1.getChar(other, k); 302 if (c1 != c2) { 303 c1 = Character.toUpperCase(c1); 304 c2 = Character.toUpperCase(c2); 305 if (c1 != c2) { 306 c1 = Character.toLowerCase(c1); 307 c2 = Character.toLowerCase(c2); 308 if (c1 != c2) { 309 return c1 - c2; 310 } 311 } 312 } 313 } 314 return len1 - len2; 315 } 316 317 public static int hashCode(byte[] value) { 318 int h = 0; 319 int length = value.length >> 1; 320 for (int i = 0; i < length; i++) { 321 h = 31 * h + getChar(value, i); 322 } 323 return h; 324 } 325 326 public static int indexOf(byte[] value, int ch, int fromIndex) { 327 int max = value.length >> 1; 328 if (fromIndex < 0) { 329 fromIndex = 0; 330 } else if (fromIndex >= max) { 331 // Note: fromIndex might be near -1>>>1. 332 return -1; 333 } 334 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 335 // handle most cases here (ch is a BMP code point or a 336 // negative value (invalid code point)) 337 return indexOfChar(value, ch, fromIndex, max); 338 } else { 339 return indexOfSupplementary(value, ch, fromIndex, max); 340 } 341 } 342 343 @HotSpotIntrinsicCandidate 344 public static int indexOf(byte[] value, byte[] str) { 345 if (str.length == 0) { 346 return 0; 347 } 348 if (value.length == 0) { 349 return -1; 350 } 351 return indexOf(value, length(value), str, length(str), 0); 352 } 353 354 @HotSpotIntrinsicCandidate 355 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 356 char first = getChar(str, 0); 357 int max = (valueCount - strCount); 358 for (int i = fromIndex; i <= max; i++) { 359 // Look for first character. 360 if (getChar(value, i) != first) { 361 while (++i <= max && getChar(value, i) != first); 362 } 363 // Found first character, now look at the rest of value 364 if (i <= max) { 365 int j = i + 1; 366 int end = j + strCount - 1; 367 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++); 368 if (j == end) { 369 // Found whole string. 370 return i; 371 } 372 } 373 } 374 return -1; 375 } 376 377 /** 378 * Handles indexOf Latin1 substring in UTF16 string. 379 */ 380 @HotSpotIntrinsicCandidate 381 public static int indexOfLatin1(byte[] value, byte[] str) { 382 if (str.length == 0) { 383 return 0; 384 } 385 if (value.length == 0) { 386 return -1; 387 } 388 return indexOfLatin1(value, length(value), str, str.length, 0); 389 } 390 391 @HotSpotIntrinsicCandidate 392 public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 393 char first = (char)(tgt[0] & 0xff); 394 int max = (srcCount - tgtCount); 395 for (int i = fromIndex; i <= max; i++) { 396 // Look for first character. 397 if (getChar(src, i) != first) { 398 while (++i <= max && getChar(src, i) != first); 399 } 400 // Found first character, now look at the rest of v2 401 if (i <= max) { 402 int j = i + 1; 403 int end = j + tgtCount - 1; 404 for (int k = 1; 405 j < end && getChar(src, j) == (tgt[k] & 0xff); 406 j++, k++); 407 if (j == end) { 408 // Found whole string. 409 return i; 410 } 411 } 412 } 413 return -1; 414 } 415 416 @HotSpotIntrinsicCandidate 417 private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { 418 for (int i = fromIndex; i < max; i++) { 419 if (getChar(value, i) == ch) { 420 return i; 421 } 422 } 423 return -1; 424 } 425 426 /** 427 * Handles (rare) calls of indexOf with a supplementary character. 428 */ 429 private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) { 430 if (Character.isValidCodePoint(ch)) { 431 final char hi = Character.highSurrogate(ch); 432 final char lo = Character.lowSurrogate(ch); 433 for (int i = fromIndex; i < max - 1; i++) { 434 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) { 435 return i; 436 } 437 } 438 } 439 return -1; 440 } 441 442 public static int lastIndexOf(byte[] src, int srcCount, 443 byte[] tgt, int tgtCount, int fromIndex) { 444 int min = tgtCount - 1; 445 int i = min + fromIndex; 446 int strLastIndex = tgtCount - 1; 447 char strLastChar = getChar(tgt, strLastIndex); 448 449 startSearchForLastChar: 450 while (true) { 451 while (i >= min && getChar(src, i) != strLastChar) { 452 i--; 453 } 454 if (i < min) { 455 return -1; 456 } 457 int j = i - 1; 458 int start = j - strLastIndex; 459 int k = strLastIndex - 1; 460 while (j > start) { 461 if (getChar(src, j--) != getChar(tgt, k--)) { 462 i--; 463 continue startSearchForLastChar; 464 } 465 } 466 return start + 1; 467 } 468 } 469 470 public static int lastIndexOf(byte[] value, int ch, int fromIndex) { 471 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 472 // handle most cases here (ch is a BMP code point or a 473 // negative value (invalid code point)) 474 int i = Math.min(fromIndex, (value.length >> 1) - 1); 475 for (; i >= 0; i--) { 476 if (getChar(value, i) == ch) { 477 return i; 478 } 479 } 480 return -1; 481 } else { 482 return lastIndexOfSupplementary(value, ch, fromIndex); 483 } 484 } 485 486 /** 487 * Handles (rare) calls of lastIndexOf with a supplementary character. 488 */ 489 private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) { 490 if (Character.isValidCodePoint(ch)) { 491 char hi = Character.highSurrogate(ch); 492 char lo = Character.lowSurrogate(ch); 493 int i = Math.min(fromIndex, (value.length >> 1) - 2); 494 for (; i >= 0; i--) { 495 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) { 496 return i; 497 } 498 } 499 } 500 return -1; 501 } 502 503 public static String replace(byte[] value, char oldChar, char newChar) { 504 int len = value.length >> 1; 505 int i = -1; 506 while (++i < len) { 507 if (getChar(value, i) == oldChar) { 508 break; 509 } 510 } 511 if (i < len) { 512 byte buf[] = new byte[value.length]; 513 for (int j = 0; j < i; j++) { 514 putChar(buf, j, getChar(value, j)); // TBD:arraycopy? 515 } 516 while (i < len) { 517 char c = getChar(value, i); 518 putChar(buf, i, c == oldChar ? newChar : c); 519 i++; 520 } 521 // Check if we should try to compress to latin1 522 if (String.COMPACT_STRINGS && 523 !StringLatin1.canEncode(oldChar) && 524 StringLatin1.canEncode(newChar)) { 525 byte[] val = compress(buf, 0, len); 526 if (val != null) { 527 return new String(val, LATIN1); 528 } 529 } 530 return new String(buf, UTF16); 531 } 532 return null; 533 } 534 535 public static boolean regionMatchesCI(byte[] value, int toffset, 536 byte[] other, int ooffset, int len) { 537 int last = toffset + len; 538 while (toffset < last) { 539 char c1 = getChar(value, toffset++); 540 char c2 = getChar(other, ooffset++); 541 if (c1 == c2) { 542 continue; 543 } 544 // try converting both characters to uppercase. 545 // If the results match, then the comparison scan should 546 // continue. 547 char u1 = Character.toUpperCase(c1); 548 char u2 = Character.toUpperCase(c2); 549 if (u1 == u2) { 550 continue; 551 } 552 // Unfortunately, conversion to uppercase does not work properly 553 // for the Georgian alphabet, which has strange rules about case 554 // conversion. So we need to make one last check before 555 // exiting. 556 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 557 continue; 558 } 559 return false; 560 } 561 return true; 562 } 563 564 public static boolean regionMatchesCI_Latin1(byte[] value, int toffset, 565 byte[] other, int ooffset, 566 int len) { 567 int last = toffset + len; 568 while (toffset < last) { 569 char c1 = getChar(value, toffset++); 570 char c2 = (char)(other[ooffset++] & 0xff); 571 if (c1 == c2) { 572 continue; 573 } 574 char u1 = Character.toUpperCase(c1); 575 char u2 = Character.toUpperCase(c2); 576 if (u1 == u2) { 577 continue; 578 } 579 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 580 continue; 581 } 582 return false; 583 } 584 return true; 585 } 586 587 public static String toLowerCase(String str, byte[] value, Locale locale) { 588 if (locale == null) { 589 throw new NullPointerException(); 590 } 591 int first; 592 boolean hasSurr = false; 593 final int len = value.length >> 1; 594 595 // Now check if there are any characters that need to be changed, or are surrogate 596 for (first = 0 ; first < len; first++) { 597 int cp = (int)getChar(value, first); 598 if (Character.isSurrogate((char)cp)) { 599 hasSurr = true; 600 break; 601 } 602 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 603 break; 604 } 605 } 606 if (first == len) 607 return str; 608 byte[] result = new byte[value.length]; 609 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 610 // lowerCase characters. 611 String lang = locale.getLanguage(); 612 if (lang == "tr" || lang == "az" || lang == "lt") { 613 return toLowerCaseEx(str, value, result, first, locale, true); 614 } 615 if (hasSurr) { 616 return toLowerCaseEx(str, value, result, first, locale, false); 617 } 618 int bits = 0; 619 for (int i = first; i < len; i++) { 620 int cp = (int)getChar(value, i); 621 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 622 Character.isSurrogate((char)cp)) { 623 return toLowerCaseEx(str, value, result, i, locale, false); 624 } 625 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 626 return toLowerCaseEx(str, value, result, i, locale, true); 627 } 628 cp = Character.toLowerCase(cp); 629 if (!Character.isBmpCodePoint(cp)) { 630 return toLowerCaseEx(str, value, result, i, locale, false); 631 } 632 bits |= cp; 633 putChar(result, i, cp); 634 } 635 if (bits > 0xFF) { 636 return new String(result, UTF16); 637 } else { 638 return newString(result, 0, len); 639 } 640 } 641 642 private static String toLowerCaseEx(String str, byte[] value, 643 byte[] result, int first, Locale locale, 644 boolean localeDependent) { 645 int resultOffset = first; 646 int length = value.length >> 1; 647 int srcCount; 648 for (int i = first; i < length; i += srcCount) { 649 int srcChar = getChar(value, i); 650 int lowerChar; 651 char[] lowerCharArray; 652 srcCount = 1; 653 if (Character.isSurrogate((char)srcChar)) { 654 srcChar = codePointAt(value, i, length); 655 srcCount = Character.charCount(srcChar); 656 } 657 if (localeDependent || 658 srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 659 srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 660 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 661 } else { 662 lowerChar = Character.toLowerCase(srcChar); 663 } 664 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 665 putChar(result, resultOffset++, lowerChar); 666 } else { 667 if (lowerChar == Character.ERROR) { 668 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 669 } else { 670 lowerCharArray = Character.toChars(lowerChar); 671 } 672 /* Grow result if needed */ 673 int mapLen = lowerCharArray.length; 674 if (mapLen > srcCount) { 675 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 676 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 677 result = result2; 678 } 679 for (int x = 0; x < mapLen; ++x) { 680 putChar(result, resultOffset++, lowerCharArray[x]); 681 } 682 } 683 } 684 return newString(result, 0, resultOffset); 685 } 686 687 public static String toUpperCase(String str, byte[] value, Locale locale) { 688 if (locale == null) { 689 throw new NullPointerException(); 690 } 691 int first; 692 boolean hasSurr = false; 693 final int len = value.length >> 1; 694 695 // Now check if there are any characters that need to be changed, or are surrogate 696 for (first = 0 ; first < len; first++) { 697 int cp = (int)getChar(value, first); 698 if (Character.isSurrogate((char)cp)) { 699 hasSurr = true; 700 break; 701 } 702 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 703 break; 704 } 705 } 706 if (first == len) { 707 return str; 708 } 709 byte[] result = new byte[value.length]; 710 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 711 // upperCase characters. 712 String lang = locale.getLanguage(); 713 if (lang == "tr" || lang == "az" || lang == "lt") { 714 return toUpperCaseEx(str, value, result, first, locale, true); 715 } 716 if (hasSurr) { 717 return toUpperCaseEx(str, value, result, first, locale, false); 718 } 719 int bits = 0; 720 for (int i = first; i < len; i++) { 721 int cp = (int)getChar(value, i); 722 if (Character.isSurrogate((char)cp)) { 723 return toUpperCaseEx(str, value, result, i, locale, false); 724 } 725 cp = Character.toUpperCaseEx(cp); 726 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 727 return toUpperCaseEx(str, value, result, i, locale, false); 728 } 729 bits |= cp; 730 putChar(result, i, cp); 731 } 732 if (bits > 0xFF) { 733 return new String(result, UTF16); 734 } else { 735 return newString(result, 0, len); 736 } 737 } 738 739 private static String toUpperCaseEx(String str, byte[] value, 740 byte[] result, int first, 741 Locale locale, boolean localeDependent) 742 { 743 int resultOffset = first; 744 int length = value.length >> 1; 745 int srcCount; 746 for (int i = first; i < length; i += srcCount) { 747 int srcChar = getChar(value, i); 748 int upperChar; 749 char[] upperCharArray; 750 srcCount = 1; 751 if (Character.isSurrogate((char)srcChar)) { 752 srcChar = codePointAt(value, i, length); 753 srcCount = Character.charCount(srcChar); 754 } 755 if (localeDependent) { 756 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 757 } else { 758 upperChar = Character.toUpperCaseEx(srcChar); 759 } 760 if (Character.isBmpCodePoint(upperChar)) { 761 putChar(result, resultOffset++, upperChar); 762 } else { 763 if (upperChar == Character.ERROR) { 764 if (localeDependent) { 765 upperCharArray = 766 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 767 } else { 768 upperCharArray = Character.toUpperCaseCharArray(srcChar); 769 } 770 } else { 771 upperCharArray = Character.toChars(upperChar); 772 } 773 /* Grow result if needed */ 774 int mapLen = upperCharArray.length; 775 if (mapLen > srcCount) { 776 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 777 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 778 result = result2; 779 } 780 for (int x = 0; x < mapLen; ++x) { 781 putChar(result, resultOffset++, upperCharArray[x]); 782 } 783 } 784 } 785 return newString(result, 0, resultOffset); 786 } 787 788 public static String trim(byte[] value) { 789 int length = value.length >> 1; 790 int len = length; 791 int st = 0; 792 while (st < len && getChar(value, st) <= ' ') { 793 st++; 794 } 795 while (st < len && getChar(value, len - 1) <= ' ') { 796 len--; 797 } 798 return ((st > 0) || (len < length )) ? 799 new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) : 800 null; 801 } 802 803 public static void putChars(byte[] val, int index, char[] str, int off, int end) { 804 while (off < end) { 805 putChar(val, index++, str[off++]); 806 } 807 } 808 809 public static String newString(byte[] val, int index, int len) { 810 if (String.COMPACT_STRINGS) { 811 byte[] buf = compress(val, index, len); 812 if (buf != null) { 813 return new String(buf, LATIN1); 814 } 815 } 816 int last = index + len; 817 return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16); 818 } 819 820 public static void fillNull(byte[] val, int index, int end) { 821 Arrays.fill(val, index << 1, end << 1, (byte)0); 822 } 823 824 static class CharsSpliterator implements Spliterator.OfInt { 825 private final byte[] array; 826 private int index; // current index, modified on advance/split 827 private final int fence; // one past last index 828 private final int cs; 829 830 CharsSpliterator(byte[] array, int acs) { 831 this(array, 0, array.length >> 1, acs); 832 } 833 834 CharsSpliterator(byte[] array, int origin, int fence, int acs) { 835 this.array = array; 836 this.index = origin; 837 this.fence = fence; 838 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED 839 | Spliterator.SUBSIZED; 840 } 841 842 @Override 843 public OfInt trySplit() { 844 int lo = index, mid = (lo + fence) >>> 1; 845 return (lo >= mid) 846 ? null 847 : new CharsSpliterator(array, lo, index = mid, cs); 848 } 849 850 @Override 851 public void forEachRemaining(IntConsumer action) { 852 byte[] a; int i, hi; // hoist accesses and checks from loop 853 if (action == null) 854 throw new NullPointerException(); 855 if (((a = array).length >> 1) >= (hi = fence) && 856 (i = index) >= 0 && i < (index = hi)) { 857 do { action.accept(getChar(a, i)); } while (++i < hi); 858 } 859 } 860 861 @Override 862 public boolean tryAdvance(IntConsumer action) { 863 if (action == null) 864 throw new NullPointerException(); 865 if (index >= 0 && index < fence) { 866 action.accept(getChar(array, index++)); 867 return true; 868 } 869 return false; 870 } 871 872 @Override 873 public long estimateSize() { return (long)(fence - index); } 874 875 @Override 876 public int characteristics() { 877 return cs; 878 } 879 } 880 881 static class CodePointsSpliterator implements Spliterator.OfInt { 882 private final byte[] array; 883 private int index; // current index, modified on advance/split 884 private final int fence; // one past last index 885 private final int cs; 886 887 CodePointsSpliterator(byte[] array, int acs) { 888 this(array, 0, array.length >> 1, acs); 889 } 890 891 CodePointsSpliterator(byte[] array, int origin, int fence, int acs) { 892 this.array = array; 893 this.index = origin; 894 this.fence = fence; 895 this.cs = acs | Spliterator.ORDERED; 896 } 897 898 @Override 899 public OfInt trySplit() { 900 int lo = index, mid = (lo + fence) >>> 1; 901 if (lo >= mid) 902 return null; 903 904 int midOneLess; 905 // If the mid-point intersects a surrogate pair 906 if (Character.isLowSurrogate(getChar(array, mid)) && 907 Character.isHighSurrogate(getChar(array, midOneLess = (mid -1)))) { 908 // If there is only one pair it cannot be split 909 if (lo >= midOneLess) 910 return null; 911 // Shift the mid-point to align with the surrogate pair 912 return new CodePointsSpliterator(array, lo, index = midOneLess, cs); 913 } 914 return new CodePointsSpliterator(array, lo, index = mid, cs); 915 } 916 917 @Override 918 public void forEachRemaining(IntConsumer action) { 919 byte[] a; int i, hi; // hoist accesses and checks from loop 920 if (action == null) 921 throw new NullPointerException(); 922 if (((a = array).length >> 1) >= (hi = fence) && 923 (i = index) >= 0 && i < (index = hi)) { 924 do { 925 i = advance(a, i, hi, action); 926 } while (i < hi); 927 } 928 } 929 930 @Override 931 public boolean tryAdvance(IntConsumer action) { 932 if (action == null) 933 throw new NullPointerException(); 934 if (index >= 0 && index < fence) { 935 index = advance(array, index, fence, action); 936 return true; 937 } 938 return false; 939 } 940 941 // Advance one code point from the index, i, and return the next 942 // index to advance from 943 private static int advance(byte[] a, int i, int hi, IntConsumer action) { 944 char c1 = getChar(a, i++); 945 int cp = c1; 946 if (Character.isHighSurrogate(c1) && i < hi) { 947 char c2 = getChar(a, i); 948 if (Character.isLowSurrogate(c2)) { 949 i++; 950 cp = Character.toCodePoint(c1, c2); 951 } 952 } 953 action.accept(cp); 954 return i; 955 } 956 957 @Override 958 public long estimateSize() { return (long)(fence - index); } 959 960 @Override 961 public int characteristics() { 962 return cs; 963 } 964 } 965 966 //////////////////////////////////////////////////////////////// 967 968 public static void putCharSB(byte[] val, int index, int c) { 969 checkIndex(index, val.length >> 1); 970 putChar(val, index, c); 971 } 972 973 public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) { 974 checkOffset(index + end - off, val.length >> 1); 975 putChars(val, index, ca, off, end); 976 } 977 978 public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) { 979 checkOffset(index + end - off, val.length >> 1); 980 for (int i = off; i < end; i++) { 981 putChar(val, index++, s.charAt(i)); 982 } 983 } 984 985 public static int codePointAtSB(byte[] val, int index, int end) { 986 checkOffset(end, val.length >> 1); 987 return codePointAt(val, index, end); 988 } 989 990 public static int codePointBeforeSB(byte[] val, int index) { 991 checkOffset(index, val.length >> 1); 992 return codePointBefore(val, index); 993 } 994 995 public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) { 996 checkOffset(endIndex, val.length >> 1); 997 return codePointCount(val, beginIndex, endIndex); 998 } 999 1000 //////////////////////////////////////////////////////////////// 1001 1002 private static native boolean isBigEndian(); 1003 1004 static final int HI_BYTE_SHIFT; 1005 static final int LO_BYTE_SHIFT; 1006 static { 1007 if (isBigEndian()) { 1008 HI_BYTE_SHIFT = 8; 1009 LO_BYTE_SHIFT = 0; 1010 } else { 1011 HI_BYTE_SHIFT = 0; 1012 LO_BYTE_SHIFT = 8; 1013 } 1014 } 1015 1016 static final int MAX_LENGTH = Integer.MAX_VALUE >> 1; 1017 }