1 /* 2 * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Spliterator; 31 import java.util.function.Consumer; 32 import java.util.function.IntConsumer; 33 import java.util.stream.Stream; 34 import java.util.stream.StreamSupport; 35 import jdk.internal.HotSpotIntrinsicCandidate; 36 import jdk.internal.util.ArraysSupport; 37 import jdk.internal.vm.annotation.ForceInline; 38 import jdk.internal.vm.annotation.DontInline; 39 40 import static java.lang.String.UTF16; 41 import static java.lang.String.LATIN1; 42 43 final class StringUTF16 { 44 45 public static byte[] newBytesFor(int len) { 46 if (len < 0) { 47 throw new NegativeArraySizeException(); 48 } 49 if (len > MAX_LENGTH) { 50 throw new OutOfMemoryError("UTF16 String size is " + len + 51 ", should be less than " + MAX_LENGTH); 52 } 53 return new byte[len << 1]; 54 } 55 56 @HotSpotIntrinsicCandidate 57 // intrinsic performs no bounds checks 58 static void putChar(byte[] val, int index, int c) { 59 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 60 index <<= 1; 61 val[index++] = (byte)(c >> HI_BYTE_SHIFT); 62 val[index] = (byte)(c >> LO_BYTE_SHIFT); 63 } 64 65 @HotSpotIntrinsicCandidate 66 // intrinsic performs no bounds checks 67 static char getChar(byte[] val, int index) { 68 assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; 69 index <<= 1; 70 return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) | 71 ((val[index] & 0xff) << LO_BYTE_SHIFT)); 72 } 73 74 public static int length(byte[] value) { 75 return value.length >> 1; 76 } 77 78 private static int codePointAt(byte[] value, int index, int end, boolean checked) { 79 assert index < end; 80 if (checked) { 81 checkIndex(index, value); 82 } 83 char c1 = getChar(value, index); 84 if (Character.isHighSurrogate(c1) && ++index < end) { 85 if (checked) { 86 checkIndex(index, value); 87 } 88 char c2 = getChar(value, index); 89 if (Character.isLowSurrogate(c2)) { 90 return Character.toCodePoint(c1, c2); 91 } 92 } 93 return c1; 94 } 95 96 public static int codePointAt(byte[] value, int index, int end) { 97 return codePointAt(value, index, end, false /* unchecked */); 98 } 99 100 private static int codePointBefore(byte[] value, int index, boolean checked) { 101 --index; 102 if (checked) { 103 checkIndex(index, value); 104 } 105 char c2 = getChar(value, index); 106 if (Character.isLowSurrogate(c2) && index > 0) { 107 --index; 108 if (checked) { 109 checkIndex(index, value); 110 } 111 char c1 = getChar(value, index); 112 if (Character.isHighSurrogate(c1)) { 113 return Character.toCodePoint(c1, c2); 114 } 115 } 116 return c2; 117 } 118 119 public static int codePointBefore(byte[] value, int index) { 120 return codePointBefore(value, index, false /* unchecked */); 121 } 122 123 private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) { 124 assert beginIndex <= endIndex; 125 int count = endIndex - beginIndex; 126 int i = beginIndex; 127 if (checked && i < endIndex) { 128 checkBoundsBeginEnd(i, endIndex, value); 129 } 130 for (; i < endIndex - 1; ) { 131 if (Character.isHighSurrogate(getChar(value, i++)) && 132 Character.isLowSurrogate(getChar(value, i))) { 133 count--; 134 i++; 135 } 136 } 137 return count; 138 } 139 140 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 141 return codePointCount(value, beginIndex, endIndex, false /* unchecked */); 142 } 143 144 public static char[] toChars(byte[] value) { 145 char[] dst = new char[value.length >> 1]; 146 getChars(value, 0, dst.length, dst, 0); 147 return dst; 148 } 149 150 @HotSpotIntrinsicCandidate 151 public static byte[] toBytes(char[] value, int off, int len) { 152 byte[] val = newBytesFor(len); 153 for (int i = 0; i < len; i++) { 154 putChar(val, i, value[off]); 155 off++; 156 } 157 return val; 158 } 159 160 public static byte[] compress(char[] val, int off, int len) { 161 byte[] ret = new byte[len]; 162 if (compress(val, off, ret, 0, len) == len) { 163 return ret; 164 } 165 return null; 166 } 167 168 public static byte[] compress(byte[] val, int off, int len) { 169 byte[] ret = new byte[len]; 170 if (compress(val, off, ret, 0, len) == len) { 171 return ret; 172 } 173 return null; 174 } 175 176 // compressedCopy char[] -> byte[] 177 @HotSpotIntrinsicCandidate 178 public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { 179 for (int i = 0; i < len; i++) { 180 char c = src[srcOff]; 181 if (c > 0xFF) { 182 len = 0; 183 break; 184 } 185 dst[dstOff] = (byte)c; 186 srcOff++; 187 dstOff++; 188 } 189 return len; 190 } 191 192 // compressedCopy byte[] -> byte[] 193 @HotSpotIntrinsicCandidate 194 public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 195 // We need a range check here because 'getChar' has no checks 196 checkBoundsOffCount(srcOff, len, src); 197 for (int i = 0; i < len; i++) { 198 char c = getChar(src, srcOff); 199 if (c > 0xFF) { 200 len = 0; 201 break; 202 } 203 dst[dstOff] = (byte)c; 204 srcOff++; 205 dstOff++; 206 } 207 return len; 208 } 209 210 public static byte[] toBytes(int[] val, int index, int len) { 211 final int end = index + len; 212 // Pass 1: Compute precise size of char[] 213 int n = len; 214 for (int i = index; i < end; i++) { 215 int cp = val[i]; 216 if (Character.isBmpCodePoint(cp)) 217 continue; 218 else if (Character.isValidCodePoint(cp)) 219 n++; 220 else throw new IllegalArgumentException(Integer.toString(cp)); 221 } 222 // Pass 2: Allocate and fill in <high, low> pair 223 byte[] buf = newBytesFor(n); 224 for (int i = index, j = 0; i < end; i++, j++) { 225 int cp = val[i]; 226 if (Character.isBmpCodePoint(cp)) { 227 putChar(buf, j, cp); 228 } else { 229 putChar(buf, j++, Character.highSurrogate(cp)); 230 putChar(buf, j, Character.lowSurrogate(cp)); 231 } 232 } 233 return buf; 234 } 235 236 public static byte[] toBytes(char c) { 237 byte[] result = new byte[2]; 238 putChar(result, 0, c); 239 return result; 240 } 241 242 static byte[] toBytesSupplementary(int cp) { 243 byte[] result = new byte[4]; 244 putChar(result, 0, Character.highSurrogate(cp)); 245 putChar(result, 1, Character.lowSurrogate(cp)); 246 return result; 247 } 248 249 @HotSpotIntrinsicCandidate 250 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 251 // We need a range check here because 'getChar' has no checks 252 if (srcBegin < srcEnd) { 253 checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value); 254 } 255 for (int i = srcBegin; i < srcEnd; i++) { 256 dst[dstBegin++] = getChar(value, i); 257 } 258 } 259 260 /* @see java.lang.String.getBytes(int, int, byte[], int) */ 261 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 262 srcBegin <<= 1; 263 srcEnd <<= 1; 264 for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) { 265 dst[dstBegin++] = value[i]; 266 } 267 } 268 269 @HotSpotIntrinsicCandidate 270 public static boolean equals(byte[] value, byte[] other) { 271 if (value.length == other.length) { 272 int len = value.length >> 1; 273 for (int i = 0; i < len; i++) { 274 if (getChar(value, i) != getChar(other, i)) { 275 return false; 276 } 277 } 278 return true; 279 } 280 return false; 281 } 282 283 @HotSpotIntrinsicCandidate 284 public static int compareTo(byte[] value, byte[] other) { 285 int len1 = length(value); 286 int len2 = length(other); 287 return compareValues(value, other, len1, len2); 288 } 289 290 /* 291 * Checks the boundary and then compares the byte arrays. 292 */ 293 public static int compareTo(byte[] value, byte[] other, int len1, int len2) { 294 checkOffset(len1, value); 295 checkOffset(len2, other); 296 297 return compareValues(value, other, len1, len2); 298 } 299 300 private static int compareValues(byte[] value, byte[] other, int len1, int len2) { 301 int lim = Math.min(len1, len2); 302 for (int k = 0; k < lim; k++) { 303 char c1 = getChar(value, k); 304 char c2 = getChar(other, k); 305 if (c1 != c2) { 306 return c1 - c2; 307 } 308 } 309 return len1 - len2; 310 } 311 312 @HotSpotIntrinsicCandidate 313 public static int compareToLatin1(byte[] value, byte[] other) { 314 return -StringLatin1.compareToUTF16(other, value); 315 } 316 317 public static int compareToLatin1(byte[] value, byte[] other, int len1, int len2) { 318 return -StringLatin1.compareToUTF16(other, value, len2, len1); 319 } 320 321 public static int compareToCI(byte[] value, byte[] other) { 322 int len1 = length(value); 323 int len2 = length(other); 324 int lim = Math.min(len1, len2); 325 for (int k = 0; k < lim; k++) { 326 char c1 = getChar(value, k); 327 char c2 = getChar(other, k); 328 if (c1 != c2) { 329 c1 = Character.toUpperCase(c1); 330 c2 = Character.toUpperCase(c2); 331 if (c1 != c2) { 332 c1 = Character.toLowerCase(c1); 333 c2 = Character.toLowerCase(c2); 334 if (c1 != c2) { 335 return c1 - c2; 336 } 337 } 338 } 339 } 340 return len1 - len2; 341 } 342 343 public static int compareToCI_Latin1(byte[] value, byte[] other) { 344 return -StringLatin1.compareToCI_UTF16(other, value); 345 } 346 347 public static int hashCode(byte[] value) { 348 int h = 0; 349 int length = value.length >> 1; 350 for (int i = 0; i < length; i++) { 351 h = 31 * h + getChar(value, i); 352 } 353 return h; 354 } 355 356 public static int indexOf(byte[] value, int ch, int fromIndex) { 357 int max = value.length >> 1; 358 if (fromIndex < 0) { 359 fromIndex = 0; 360 } else if (fromIndex >= max) { 361 // Note: fromIndex might be near -1>>>1. 362 return -1; 363 } 364 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 365 // handle most cases here (ch is a BMP code point or a 366 // negative value (invalid code point)) 367 return indexOfChar(value, ch, fromIndex, max); 368 } else { 369 return indexOfSupplementary(value, ch, fromIndex, max); 370 } 371 } 372 373 @HotSpotIntrinsicCandidate 374 public static int indexOf(byte[] value, byte[] str) { 375 if (str.length == 0) { 376 return 0; 377 } 378 if (value.length < str.length) { 379 return -1; 380 } 381 return indexOfUnsafe(value, length(value), str, length(str), 0); 382 } 383 384 @HotSpotIntrinsicCandidate 385 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 386 checkBoundsBeginEnd(fromIndex, valueCount, value); 387 checkBoundsBeginEnd(0, strCount, str); 388 return indexOfUnsafe(value, valueCount, str, strCount, fromIndex); 389 } 390 391 392 private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 393 assert fromIndex >= 0; 394 assert strCount > 0; 395 assert strCount <= length(str); 396 assert valueCount >= strCount; 397 char first = getChar(str, 0); 398 int max = (valueCount - strCount); 399 for (int i = fromIndex; i <= max; i++) { 400 // Look for first character. 401 if (getChar(value, i) != first) { 402 while (++i <= max && getChar(value, i) != first); 403 } 404 // Found first character, now look at the rest of value 405 if (i <= max) { 406 int j = i + 1; 407 int end = j + strCount - 1; 408 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++); 409 if (j == end) { 410 // Found whole string. 411 return i; 412 } 413 } 414 } 415 return -1; 416 } 417 418 419 /** 420 * Handles indexOf Latin1 substring in UTF16 string. 421 */ 422 @HotSpotIntrinsicCandidate 423 public static int indexOfLatin1(byte[] value, byte[] str) { 424 if (str.length == 0) { 425 return 0; 426 } 427 if (length(value) < str.length) { 428 return -1; 429 } 430 return indexOfLatin1Unsafe(value, length(value), str, str.length, 0); 431 } 432 433 @HotSpotIntrinsicCandidate 434 public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 435 checkBoundsBeginEnd(fromIndex, srcCount, src); 436 String.checkBoundsBeginEnd(0, tgtCount, tgt.length); 437 return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex); 438 } 439 440 public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) { 441 assert fromIndex >= 0; 442 assert tgtCount > 0; 443 assert tgtCount <= tgt.length; 444 assert srcCount >= tgtCount; 445 char first = (char)(tgt[0] & 0xff); 446 int max = (srcCount - tgtCount); 447 for (int i = fromIndex; i <= max; i++) { 448 // Look for first character. 449 if (getChar(src, i) != first) { 450 while (++i <= max && getChar(src, i) != first); 451 } 452 // Found first character, now look at the rest of v2 453 if (i <= max) { 454 int j = i + 1; 455 int end = j + tgtCount - 1; 456 for (int k = 1; 457 j < end && getChar(src, j) == (tgt[k] & 0xff); 458 j++, k++); 459 if (j == end) { 460 // Found whole string. 461 return i; 462 } 463 } 464 } 465 return -1; 466 } 467 468 @HotSpotIntrinsicCandidate 469 private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { 470 checkBoundsBeginEnd(fromIndex, max, value); 471 return indexOfCharUnsafe(value, ch, fromIndex, max); 472 } 473 474 private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) { 475 for (int i = fromIndex; i < max; i++) { 476 if (getChar(value, i) == ch) { 477 return i; 478 } 479 } 480 return -1; 481 } 482 483 /** 484 * Handles (rare) calls of indexOf with a supplementary character. 485 */ 486 private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) { 487 if (Character.isValidCodePoint(ch)) { 488 final char hi = Character.highSurrogate(ch); 489 final char lo = Character.lowSurrogate(ch); 490 checkBoundsBeginEnd(fromIndex, max, value); 491 for (int i = fromIndex; i < max - 1; i++) { 492 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) { 493 return i; 494 } 495 } 496 } 497 return -1; 498 } 499 500 // srcCoder == UTF16 && tgtCoder == UTF16 501 public static int lastIndexOf(byte[] src, int srcCount, 502 byte[] tgt, int tgtCount, int fromIndex) { 503 assert fromIndex >= 0; 504 assert tgtCount > 0; 505 assert tgtCount <= length(tgt); 506 int min = tgtCount - 1; 507 int i = min + fromIndex; 508 int strLastIndex = tgtCount - 1; 509 510 checkIndex(strLastIndex, tgt); 511 char strLastChar = getChar(tgt, strLastIndex); 512 513 checkIndex(i, src); 514 515 startSearchForLastChar: 516 while (true) { 517 while (i >= min && getChar(src, i) != strLastChar) { 518 i--; 519 } 520 if (i < min) { 521 return -1; 522 } 523 int j = i - 1; 524 int start = j - strLastIndex; 525 int k = strLastIndex - 1; 526 while (j > start) { 527 if (getChar(src, j--) != getChar(tgt, k--)) { 528 i--; 529 continue startSearchForLastChar; 530 } 531 } 532 return start + 1; 533 } 534 } 535 536 public static int lastIndexOf(byte[] value, int ch, int fromIndex) { 537 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 538 // handle most cases here (ch is a BMP code point or a 539 // negative value (invalid code point)) 540 int i = Math.min(fromIndex, (value.length >> 1) - 1); 541 for (; i >= 0; i--) { 542 if (getChar(value, i) == ch) { 543 return i; 544 } 545 } 546 return -1; 547 } else { 548 return lastIndexOfSupplementary(value, ch, fromIndex); 549 } 550 } 551 552 /** 553 * Handles (rare) calls of lastIndexOf with a supplementary character. 554 */ 555 private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) { 556 if (Character.isValidCodePoint(ch)) { 557 char hi = Character.highSurrogate(ch); 558 char lo = Character.lowSurrogate(ch); 559 int i = Math.min(fromIndex, (value.length >> 1) - 2); 560 for (; i >= 0; i--) { 561 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) { 562 return i; 563 } 564 } 565 } 566 return -1; 567 } 568 569 public static String replace(byte[] value, char oldChar, char newChar) { 570 int len = value.length >> 1; 571 int i = -1; 572 while (++i < len) { 573 if (getChar(value, i) == oldChar) { 574 break; 575 } 576 } 577 if (i < len) { 578 byte[] buf = new byte[value.length]; 579 for (int j = 0; j < i; j++) { 580 putChar(buf, j, getChar(value, j)); // TBD:arraycopy? 581 } 582 while (i < len) { 583 char c = getChar(value, i); 584 putChar(buf, i, c == oldChar ? newChar : c); 585 i++; 586 } 587 // Check if we should try to compress to latin1 588 if (String.COMPACT_STRINGS && 589 !StringLatin1.canEncode(oldChar) && 590 StringLatin1.canEncode(newChar)) { 591 byte[] val = compress(buf, 0, len); 592 if (val != null) { 593 return new String(val, LATIN1); 594 } 595 } 596 return new String(buf, UTF16); 597 } 598 return null; 599 } 600 601 public static String replace(byte[] value, int valLen, boolean valLat1, 602 byte[] targ, int targLen, boolean targLat1, 603 byte[] repl, int replLen, boolean replLat1) 604 { 605 assert targLen > 0; 606 assert !valLat1 || !targLat1 || !replLat1; 607 608 // Possible combinations of the arguments/result encodings: 609 // +---+--------+--------+--------+-----------------------+ 610 // | # | VALUE | TARGET | REPL | RESULT | 611 // +===+========+========+========+=======================+ 612 // | 1 | Latin1 | Latin1 | UTF16 | null or UTF16 | 613 // +---+--------+--------+--------+-----------------------+ 614 // | 2 | Latin1 | UTF16 | Latin1 | null | 615 // +---+--------+--------+--------+-----------------------+ 616 // | 3 | Latin1 | UTF16 | UTF16 | null | 617 // +---+--------+--------+--------+-----------------------+ 618 // | 4 | UTF16 | Latin1 | Latin1 | null or UTF16 | 619 // +---+--------+--------+--------+-----------------------+ 620 // | 5 | UTF16 | Latin1 | UTF16 | null or UTF16 | 621 // +---+--------+--------+--------+-----------------------+ 622 // | 6 | UTF16 | UTF16 | Latin1 | null, Latin1 or UTF16 | 623 // +---+--------+--------+--------+-----------------------+ 624 // | 7 | UTF16 | UTF16 | UTF16 | null or UTF16 | 625 // +---+--------+--------+--------+-----------------------+ 626 627 if (String.COMPACT_STRINGS && valLat1 && !targLat1) { 628 // combinations 2 or 3 629 return null; // for string to return this; 630 } 631 632 int i = (String.COMPACT_STRINGS && valLat1) 633 ? StringLatin1.indexOf(value, targ) : 634 (String.COMPACT_STRINGS && targLat1) 635 ? indexOfLatin1(value, targ) 636 : indexOf(value, targ); 637 if (i < 0) { 638 return null; // for string to return this; 639 } 640 641 // find and store indices of substrings to replace 642 int j, p = 0; 643 int[] pos = new int[16]; 644 pos[0] = i; 645 i += targLen; 646 while ((j = ((String.COMPACT_STRINGS && valLat1) 647 ? StringLatin1.indexOf(value, valLen, targ, targLen, i) : 648 (String.COMPACT_STRINGS && targLat1) 649 ? indexOfLatin1(value, valLen, targ, targLen, i) 650 : indexOf(value, valLen, targ, targLen, i))) > 0) 651 { 652 if (++p == pos.length) { 653 pos = Arrays.copyOf(pos, ArraysSupport.calcLength(p, 1, p >> 1)); 654 } 655 pos[p] = j; 656 i = j + targLen; 657 } 658 659 int resultLen; 660 try { 661 resultLen = Math.addExact(valLen, 662 Math.multiplyExact(++p, replLen - targLen)); 663 } catch (ArithmeticException ignored) { 664 throw new OutOfMemoryError(); 665 } 666 if (resultLen == 0) { 667 return ""; 668 } 669 670 byte[] result = newBytesFor(resultLen); 671 int posFrom = 0, posTo = 0; 672 for (int q = 0; q < p; ++q) { 673 int nextPos = pos[q]; 674 if (String.COMPACT_STRINGS && valLat1) { 675 while (posFrom < nextPos) { 676 char c = (char)(value[posFrom++] & 0xff); 677 putChar(result, posTo++, c); 678 } 679 } else { 680 while (posFrom < nextPos) { 681 putChar(result, posTo++, getChar(value, posFrom++)); 682 } 683 } 684 posFrom += targLen; 685 if (String.COMPACT_STRINGS && replLat1) { 686 for (int k = 0; k < replLen; ++k) { 687 char c = (char)(repl[k] & 0xff); 688 putChar(result, posTo++, c); 689 } 690 } else { 691 for (int k = 0; k < replLen; ++k) { 692 putChar(result, posTo++, getChar(repl, k)); 693 } 694 } 695 } 696 if (String.COMPACT_STRINGS && valLat1) { 697 while (posFrom < valLen) { 698 char c = (char)(value[posFrom++] & 0xff); 699 putChar(result, posTo++, c); 700 } 701 } else { 702 while (posFrom < valLen) { 703 putChar(result, posTo++, getChar(value, posFrom++)); 704 } 705 } 706 707 if (String.COMPACT_STRINGS && replLat1 && !targLat1) { 708 // combination 6 709 byte[] lat1Result = compress(result, 0, resultLen); 710 if (lat1Result != null) { 711 return new String(lat1Result, LATIN1); 712 } 713 } 714 return new String(result, UTF16); 715 } 716 717 public static boolean regionMatchesCI(byte[] value, int toffset, 718 byte[] other, int ooffset, int len) { 719 int last = toffset + len; 720 assert toffset >= 0 && ooffset >= 0; 721 assert ooffset + len <= length(other); 722 assert last <= length(value); 723 while (toffset < last) { 724 char c1 = getChar(value, toffset++); 725 char c2 = getChar(other, ooffset++); 726 if (c1 == c2) { 727 continue; 728 } 729 // try converting both characters to uppercase. 730 // If the results match, then the comparison scan should 731 // continue. 732 char u1 = Character.toUpperCase(c1); 733 char u2 = Character.toUpperCase(c2); 734 if (u1 == u2) { 735 continue; 736 } 737 // Unfortunately, conversion to uppercase does not work properly 738 // for the Georgian alphabet, which has strange rules about case 739 // conversion. So we need to make one last check before 740 // exiting. 741 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 742 continue; 743 } 744 return false; 745 } 746 return true; 747 } 748 749 public static boolean regionMatchesCI_Latin1(byte[] value, int toffset, 750 byte[] other, int ooffset, 751 int len) { 752 return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len); 753 } 754 755 public static String toLowerCase(String str, byte[] value, Locale locale) { 756 if (locale == null) { 757 throw new NullPointerException(); 758 } 759 int first; 760 boolean hasSurr = false; 761 final int len = value.length >> 1; 762 763 // Now check if there are any characters that need to be changed, or are surrogate 764 for (first = 0 ; first < len; first++) { 765 int cp = (int)getChar(value, first); 766 if (Character.isSurrogate((char)cp)) { 767 hasSurr = true; 768 break; 769 } 770 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 771 break; 772 } 773 } 774 if (first == len) 775 return str; 776 byte[] result = new byte[value.length]; 777 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 778 // lowerCase characters. 779 String lang = locale.getLanguage(); 780 if (lang == "tr" || lang == "az" || lang == "lt") { 781 return toLowerCaseEx(str, value, result, first, locale, true); 782 } 783 if (hasSurr) { 784 return toLowerCaseEx(str, value, result, first, locale, false); 785 } 786 int bits = 0; 787 for (int i = first; i < len; i++) { 788 int cp = (int)getChar(value, i); 789 if (cp == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 790 Character.isSurrogate((char)cp)) { 791 return toLowerCaseEx(str, value, result, i, locale, false); 792 } 793 if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 794 return toLowerCaseEx(str, value, result, i, locale, true); 795 } 796 cp = Character.toLowerCase(cp); 797 if (!Character.isBmpCodePoint(cp)) { 798 return toLowerCaseEx(str, value, result, i, locale, false); 799 } 800 bits |= cp; 801 putChar(result, i, cp); 802 } 803 if (bits > 0xFF) { 804 return new String(result, UTF16); 805 } else { 806 return newString(result, 0, len); 807 } 808 } 809 810 private static String toLowerCaseEx(String str, byte[] value, 811 byte[] result, int first, Locale locale, 812 boolean localeDependent) { 813 assert(result.length == value.length); 814 assert(first >= 0); 815 int resultOffset = first; 816 int length = value.length >> 1; 817 int srcCount; 818 for (int i = first; i < length; i += srcCount) { 819 int srcChar = getChar(value, i); 820 int lowerChar; 821 char[] lowerCharArray; 822 srcCount = 1; 823 if (Character.isSurrogate((char)srcChar)) { 824 srcChar = codePointAt(value, i, length); 825 srcCount = Character.charCount(srcChar); 826 } 827 if (localeDependent || 828 srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA 829 srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE 830 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 831 } else { 832 lowerChar = Character.toLowerCase(srcChar); 833 } 834 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 835 putChar(result, resultOffset++, lowerChar); 836 } else { 837 if (lowerChar == Character.ERROR) { 838 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 839 } else { 840 lowerCharArray = Character.toChars(lowerChar); 841 } 842 /* Grow result if needed */ 843 int mapLen = lowerCharArray.length; 844 if (mapLen > srcCount) { 845 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 846 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 847 result = result2; 848 } 849 assert resultOffset >= 0; 850 assert resultOffset + mapLen <= length(result); 851 for (int x = 0; x < mapLen; ++x) { 852 putChar(result, resultOffset++, lowerCharArray[x]); 853 } 854 } 855 } 856 return newString(result, 0, resultOffset); 857 } 858 859 public static String toUpperCase(String str, byte[] value, Locale locale) { 860 if (locale == null) { 861 throw new NullPointerException(); 862 } 863 int first; 864 boolean hasSurr = false; 865 final int len = value.length >> 1; 866 867 // Now check if there are any characters that need to be changed, or are surrogate 868 for (first = 0 ; first < len; first++) { 869 int cp = (int)getChar(value, first); 870 if (Character.isSurrogate((char)cp)) { 871 hasSurr = true; 872 break; 873 } 874 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 875 break; 876 } 877 } 878 if (first == len) { 879 return str; 880 } 881 byte[] result = new byte[value.length]; 882 System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few 883 // upperCase characters. 884 String lang = locale.getLanguage(); 885 if (lang == "tr" || lang == "az" || lang == "lt") { 886 return toUpperCaseEx(str, value, result, first, locale, true); 887 } 888 if (hasSurr) { 889 return toUpperCaseEx(str, value, result, first, locale, false); 890 } 891 int bits = 0; 892 for (int i = first; i < len; i++) { 893 int cp = (int)getChar(value, i); 894 if (Character.isSurrogate((char)cp)) { 895 return toUpperCaseEx(str, value, result, i, locale, false); 896 } 897 cp = Character.toUpperCaseEx(cp); 898 if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp 899 return toUpperCaseEx(str, value, result, i, locale, false); 900 } 901 bits |= cp; 902 putChar(result, i, cp); 903 } 904 if (bits > 0xFF) { 905 return new String(result, UTF16); 906 } else { 907 return newString(result, 0, len); 908 } 909 } 910 911 private static String toUpperCaseEx(String str, byte[] value, 912 byte[] result, int first, 913 Locale locale, boolean localeDependent) 914 { 915 assert(result.length == value.length); 916 assert(first >= 0); 917 int resultOffset = first; 918 int length = value.length >> 1; 919 int srcCount; 920 for (int i = first; i < length; i += srcCount) { 921 int srcChar = getChar(value, i); 922 int upperChar; 923 char[] upperCharArray; 924 srcCount = 1; 925 if (Character.isSurrogate((char)srcChar)) { 926 srcChar = codePointAt(value, i, length); 927 srcCount = Character.charCount(srcChar); 928 } 929 if (localeDependent) { 930 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 931 } else { 932 upperChar = Character.toUpperCaseEx(srcChar); 933 } 934 if (Character.isBmpCodePoint(upperChar)) { 935 putChar(result, resultOffset++, upperChar); 936 } else { 937 if (upperChar == Character.ERROR) { 938 if (localeDependent) { 939 upperCharArray = 940 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 941 } else { 942 upperCharArray = Character.toUpperCaseCharArray(srcChar); 943 } 944 } else { 945 upperCharArray = Character.toChars(upperChar); 946 } 947 /* Grow result if needed */ 948 int mapLen = upperCharArray.length; 949 if (mapLen > srcCount) { 950 byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount); 951 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 952 result = result2; 953 } 954 assert resultOffset >= 0; 955 assert resultOffset + mapLen <= length(result); 956 for (int x = 0; x < mapLen; ++x) { 957 putChar(result, resultOffset++, upperCharArray[x]); 958 } 959 } 960 } 961 return newString(result, 0, resultOffset); 962 } 963 964 public static String trim(byte[] value) { 965 int length = value.length >> 1; 966 int len = length; 967 int st = 0; 968 while (st < len && getChar(value, st) <= ' ') { 969 st++; 970 } 971 while (st < len && getChar(value, len - 1) <= ' ') { 972 len--; 973 } 974 return ((st > 0) || (len < length )) ? 975 new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) : 976 null; 977 } 978 979 public static int indexOfNonWhitespace(byte[] value) { 980 int length = value.length >> 1; 981 int left = 0; 982 while (left < length) { 983 int codepoint = codePointAt(value, left, length); 984 if (codepoint != ' ' && codepoint != '\t' && !Character.isWhitespace(codepoint)) { 985 break; 986 } 987 left += Character.charCount(codepoint); 988 } 989 return left; 990 } 991 992 public static int lastIndexOfNonWhitespace(byte[] value) { 993 int length = value.length >>> 1; 994 int right = length; 995 while (0 < right) { 996 int codepoint = codePointBefore(value, right); 997 if (codepoint != ' ' && codepoint != '\t' && !Character.isWhitespace(codepoint)) { 998 break; 999 } 1000 right -= Character.charCount(codepoint); 1001 } 1002 return right; 1003 } 1004 1005 public static String strip(byte[] value) { 1006 int length = value.length >>> 1; 1007 int left = indexOfNonWhitespace(value); 1008 if (left == length) { 1009 return ""; 1010 } 1011 int right = lastIndexOfNonWhitespace(value); 1012 boolean ifChanged = (left > 0) || (right < length); 1013 return ifChanged ? newString(value, left, right - left) : null; 1014 } 1015 1016 public static String stripLeading(byte[] value) { 1017 int length = value.length >>> 1; 1018 int left = indexOfNonWhitespace(value); 1019 if (left == length) { 1020 return ""; 1021 } 1022 return (left != 0) ? newString(value, left, length - left) : null; 1023 } 1024 1025 public static String stripTrailing(byte[] value) { 1026 int length = value.length >>> 1; 1027 int right = lastIndexOfNonWhitespace(value); 1028 if (right == 0) { 1029 return ""; 1030 } 1031 return (right != length) ? newString(value, 0, right) : null; 1032 } 1033 1034 private final static class LinesSpliterator implements Spliterator<String> { 1035 private byte[] value; 1036 private int index; // current index, modified on advance/split 1037 private final int fence; // one past last index 1038 1039 private LinesSpliterator(byte[] value, int start, int length) { 1040 this.value = value; 1041 this.index = start; 1042 this.fence = start + length; 1043 } 1044 1045 private int indexOfLineSeparator(int start) { 1046 for (int current = start; current < fence; current++) { 1047 char ch = getChar(value, current); 1048 if (ch == '\n' || ch == '\r') { 1049 return current; 1050 } 1051 } 1052 return fence; 1053 } 1054 1055 private int skipLineSeparator(int start) { 1056 if (start < fence) { 1057 if (getChar(value, start) == '\r') { 1058 int next = start + 1; 1059 if (next < fence && getChar(value, next) == '\n') { 1060 return next + 1; 1061 } 1062 } 1063 return start + 1; 1064 } 1065 return fence; 1066 } 1067 1068 private String next() { 1069 int start = index; 1070 int end = indexOfLineSeparator(start); 1071 index = skipLineSeparator(end); 1072 return newString(value, start, end - start); 1073 } 1074 1075 @Override 1076 public boolean tryAdvance(Consumer<? super String> action) { 1077 if (action == null) { 1078 throw new NullPointerException("tryAdvance action missing"); 1079 } 1080 if (index != fence) { 1081 action.accept(next()); 1082 return true; 1083 } 1084 return false; 1085 } 1086 1087 @Override 1088 public void forEachRemaining(Consumer<? super String> action) { 1089 if (action == null) { 1090 throw new NullPointerException("forEachRemaining action missing"); 1091 } 1092 while (index != fence) { 1093 action.accept(next()); 1094 } 1095 } 1096 1097 @Override 1098 public Spliterator<String> trySplit() { 1099 int half = (fence + index) >>> 1; 1100 int mid = skipLineSeparator(indexOfLineSeparator(half)); 1101 if (mid < fence) { 1102 int start = index; 1103 index = mid; 1104 return new LinesSpliterator(value, start, mid - start); 1105 } 1106 return null; 1107 } 1108 1109 @Override 1110 public long estimateSize() { 1111 return fence - index + 1; 1112 } 1113 1114 @Override 1115 public int characteristics() { 1116 return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL; 1117 } 1118 1119 static LinesSpliterator spliterator(byte[] value) { 1120 return new LinesSpliterator(value, 0, value.length >>> 1); 1121 } 1122 1123 static LinesSpliterator spliterator(byte[] value, int leading, int trailing) { 1124 int length = value.length >>> 1; 1125 int left = 0; 1126 int index; 1127 for (int l = 0; l < leading; l++) { 1128 index = skipBlankForward(value, left, length); 1129 if (index == left) { 1130 break; 1131 } 1132 left = index; 1133 } 1134 int right = length; 1135 for (int t = 0; t < trailing; t++) { 1136 index = skipBlankBackward(value, left, right); 1137 if (index == right) { 1138 break; 1139 } 1140 right = index; 1141 } 1142 return new LinesSpliterator(value, left, right - left); 1143 } 1144 1145 private static int skipBlankForward(byte[] value, int start, int length) { 1146 int index = start; 1147 while (index < length) { 1148 char ch = getChar(value, index++); 1149 if (ch == '\n') { 1150 return index; 1151 } 1152 if (ch == '\r') { 1153 if (index < length && getChar(value, index) == '\n') { 1154 return index + 1; 1155 } 1156 return index; 1157 } 1158 if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { 1159 return start; 1160 } 1161 } 1162 return length; 1163 } 1164 1165 private static int skipBlankBackward(byte[] value, int start, int fence) { 1166 int index = fence; 1167 if (start < index && getChar(value, index - 1) == '\n') { 1168 index--; 1169 } 1170 if (start < index && getChar(value, index - 1) == '\r') { 1171 index--; 1172 } 1173 while (start < index) { 1174 char ch = getChar(value, --index); 1175 if (ch == '\r' || ch == '\n') { 1176 return index + 1; 1177 } 1178 if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) { 1179 return fence; 1180 } 1181 } 1182 return start; 1183 } 1184 } 1185 1186 static Stream<String> lines(byte[] value, int leading, int trailing) { 1187 if (leading == 0 && trailing == 0) { 1188 return StreamSupport.stream(LinesSpliterator.spliterator(value), false); 1189 } else { 1190 return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false); 1191 } 1192 } 1193 1194 private static void putChars(byte[] val, int index, char[] str, int off, int end) { 1195 while (off < end) { 1196 putChar(val, index++, str[off++]); 1197 } 1198 } 1199 1200 public static String newString(byte[] val, int index, int len) { 1201 if (String.COMPACT_STRINGS) { 1202 byte[] buf = compress(val, index, len); 1203 if (buf != null) { 1204 return new String(buf, LATIN1); 1205 } 1206 } 1207 int last = index + len; 1208 return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16); 1209 } 1210 1211 public static void fillNull(byte[] val, int index, int end) { 1212 Arrays.fill(val, index << 1, end << 1, (byte)0); 1213 } 1214 1215 static class CharsSpliterator implements Spliterator.OfInt { 1216 private final byte[] array; 1217 private int index; // current index, modified on advance/split 1218 private final int fence; // one past last index 1219 private final int cs; 1220 1221 CharsSpliterator(byte[] array, int acs) { 1222 this(array, 0, array.length >> 1, acs); 1223 } 1224 1225 CharsSpliterator(byte[] array, int origin, int fence, int acs) { 1226 this.array = array; 1227 this.index = origin; 1228 this.fence = fence; 1229 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED 1230 | Spliterator.SUBSIZED; 1231 } 1232 1233 @Override 1234 public OfInt trySplit() { 1235 int lo = index, mid = (lo + fence) >>> 1; 1236 return (lo >= mid) 1237 ? null 1238 : new CharsSpliterator(array, lo, index = mid, cs); 1239 } 1240 1241 @Override 1242 public void forEachRemaining(IntConsumer action) { 1243 byte[] a; int i, hi; // hoist accesses and checks from loop 1244 if (action == null) 1245 throw new NullPointerException(); 1246 if (((a = array).length >> 1) >= (hi = fence) && 1247 (i = index) >= 0 && i < (index = hi)) { 1248 do { 1249 action.accept(charAt(a, i)); 1250 } while (++i < hi); 1251 } 1252 } 1253 1254 @Override 1255 public boolean tryAdvance(IntConsumer action) { 1256 if (action == null) 1257 throw new NullPointerException(); 1258 int i = index; 1259 if (i >= 0 && i < fence) { 1260 action.accept(charAt(array, i)); 1261 index++; 1262 return true; 1263 } 1264 return false; 1265 } 1266 1267 @Override 1268 public long estimateSize() { return (long)(fence - index); } 1269 1270 @Override 1271 public int characteristics() { 1272 return cs; 1273 } 1274 } 1275 1276 static class CodePointsSpliterator implements Spliterator.OfInt { 1277 private final byte[] array; 1278 private int index; // current index, modified on advance/split 1279 private final int fence; // one past last index 1280 private final int cs; 1281 1282 CodePointsSpliterator(byte[] array, int acs) { 1283 this(array, 0, array.length >> 1, acs); 1284 } 1285 1286 CodePointsSpliterator(byte[] array, int origin, int fence, int acs) { 1287 this.array = array; 1288 this.index = origin; 1289 this.fence = fence; 1290 this.cs = acs | Spliterator.ORDERED; 1291 } 1292 1293 @Override 1294 public OfInt trySplit() { 1295 int lo = index, mid = (lo + fence) >>> 1; 1296 if (lo >= mid) 1297 return null; 1298 1299 int midOneLess; 1300 // If the mid-point intersects a surrogate pair 1301 if (Character.isLowSurrogate(charAt(array, mid)) && 1302 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) { 1303 // If there is only one pair it cannot be split 1304 if (lo >= midOneLess) 1305 return null; 1306 // Shift the mid-point to align with the surrogate pair 1307 return new CodePointsSpliterator(array, lo, index = midOneLess, cs); 1308 } 1309 return new CodePointsSpliterator(array, lo, index = mid, cs); 1310 } 1311 1312 @Override 1313 public void forEachRemaining(IntConsumer action) { 1314 byte[] a; int i, hi; // hoist accesses and checks from loop 1315 if (action == null) 1316 throw new NullPointerException(); 1317 if (((a = array).length >> 1) >= (hi = fence) && 1318 (i = index) >= 0 && i < (index = hi)) { 1319 do { 1320 i = advance(a, i, hi, action); 1321 } while (i < hi); 1322 } 1323 } 1324 1325 @Override 1326 public boolean tryAdvance(IntConsumer action) { 1327 if (action == null) 1328 throw new NullPointerException(); 1329 if (index >= 0 && index < fence) { 1330 index = advance(array, index, fence, action); 1331 return true; 1332 } 1333 return false; 1334 } 1335 1336 // Advance one code point from the index, i, and return the next 1337 // index to advance from 1338 private static int advance(byte[] a, int i, int hi, IntConsumer action) { 1339 char c1 = charAt(a, i++); 1340 int cp = c1; 1341 if (Character.isHighSurrogate(c1) && i < hi) { 1342 char c2 = charAt(a, i); 1343 if (Character.isLowSurrogate(c2)) { 1344 i++; 1345 cp = Character.toCodePoint(c1, c2); 1346 } 1347 } 1348 action.accept(cp); 1349 return i; 1350 } 1351 1352 @Override 1353 public long estimateSize() { return (long)(fence - index); } 1354 1355 @Override 1356 public int characteristics() { 1357 return cs; 1358 } 1359 } 1360 1361 //////////////////////////////////////////////////////////////// 1362 1363 public static void putCharSB(byte[] val, int index, int c) { 1364 checkIndex(index, val); 1365 putChar(val, index, c); 1366 } 1367 1368 public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) { 1369 checkBoundsBeginEnd(index, index + end - off, val); 1370 putChars(val, index, ca, off, end); 1371 } 1372 1373 public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) { 1374 checkBoundsBeginEnd(index, index + end - off, val); 1375 for (int i = off; i < end; i++) { 1376 putChar(val, index++, s.charAt(i)); 1377 } 1378 } 1379 1380 public static int codePointAtSB(byte[] val, int index, int end) { 1381 return codePointAt(val, index, end, true /* checked */); 1382 } 1383 1384 public static int codePointBeforeSB(byte[] val, int index) { 1385 return codePointBefore(val, index, true /* checked */); 1386 } 1387 1388 public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) { 1389 return codePointCount(val, beginIndex, endIndex, true /* checked */); 1390 } 1391 1392 public static int getChars(int i, int begin, int end, byte[] value) { 1393 checkBoundsBeginEnd(begin, end, value); 1394 int pos = getChars(i, end, value); 1395 assert begin == pos; 1396 return pos; 1397 } 1398 1399 public static int getChars(long l, int begin, int end, byte[] value) { 1400 checkBoundsBeginEnd(begin, end, value); 1401 int pos = getChars(l, end, value); 1402 assert begin == pos; 1403 return pos; 1404 } 1405 1406 public static boolean contentEquals(byte[] v1, byte[] v2, int len) { 1407 checkBoundsOffCount(0, len, v2); 1408 for (int i = 0; i < len; i++) { 1409 if ((char)(v1[i] & 0xff) != getChar(v2, i)) { 1410 return false; 1411 } 1412 } 1413 return true; 1414 } 1415 1416 public static boolean contentEquals(byte[] value, CharSequence cs, int len) { 1417 checkOffset(len, value); 1418 for (int i = 0; i < len; i++) { 1419 if (getChar(value, i) != cs.charAt(i)) { 1420 return false; 1421 } 1422 } 1423 return true; 1424 } 1425 1426 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) { 1427 int end = i + 4; 1428 checkBoundsBeginEnd(i, end, value); 1429 putChar(value, i++, c1); 1430 putChar(value, i++, c2); 1431 putChar(value, i++, c3); 1432 putChar(value, i++, c4); 1433 assert(i == end); 1434 return end; 1435 } 1436 1437 public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) { 1438 int end = i + 5; 1439 checkBoundsBeginEnd(i, end, value); 1440 putChar(value, i++, c1); 1441 putChar(value, i++, c2); 1442 putChar(value, i++, c3); 1443 putChar(value, i++, c4); 1444 putChar(value, i++, c5); 1445 assert(i == end); 1446 return end; 1447 } 1448 1449 public static char charAt(byte[] value, int index) { 1450 checkIndex(index, value); 1451 return getChar(value, index); 1452 } 1453 1454 public static void reverse(byte[] val, int count) { 1455 checkOffset(count, val); 1456 int n = count - 1; 1457 boolean hasSurrogates = false; 1458 for (int j = (n-1) >> 1; j >= 0; j--) { 1459 int k = n - j; 1460 char cj = getChar(val, j); 1461 char ck = getChar(val, k); 1462 putChar(val, j, ck); 1463 putChar(val, k, cj); 1464 if (Character.isSurrogate(cj) || 1465 Character.isSurrogate(ck)) { 1466 hasSurrogates = true; 1467 } 1468 } 1469 if (hasSurrogates) { 1470 reverseAllValidSurrogatePairs(val, count); 1471 } 1472 } 1473 1474 /** Outlined helper method for reverse() */ 1475 private static void reverseAllValidSurrogatePairs(byte[] val, int count) { 1476 for (int i = 0; i < count - 1; i++) { 1477 char c2 = getChar(val, i); 1478 if (Character.isLowSurrogate(c2)) { 1479 char c1 = getChar(val, i + 1); 1480 if (Character.isHighSurrogate(c1)) { 1481 putChar(val, i++, c1); 1482 putChar(val, i, c2); 1483 } 1484 } 1485 } 1486 } 1487 1488 // inflatedCopy byte[] -> byte[] 1489 public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 1490 // We need a range check here because 'putChar' has no checks 1491 checkBoundsOffCount(dstOff, len, dst); 1492 for (int i = 0; i < len; i++) { 1493 putChar(dst, dstOff++, src[srcOff++] & 0xff); 1494 } 1495 } 1496 1497 // srcCoder == UTF16 && tgtCoder == LATIN1 1498 public static int lastIndexOfLatin1(byte[] src, int srcCount, 1499 byte[] tgt, int tgtCount, int fromIndex) { 1500 assert fromIndex >= 0; 1501 assert tgtCount > 0; 1502 assert tgtCount <= tgt.length; 1503 int min = tgtCount - 1; 1504 int i = min + fromIndex; 1505 int strLastIndex = tgtCount - 1; 1506 1507 char strLastChar = (char)(tgt[strLastIndex] & 0xff); 1508 1509 checkIndex(i, src); 1510 1511 startSearchForLastChar: 1512 while (true) { 1513 while (i >= min && getChar(src, i) != strLastChar) { 1514 i--; 1515 } 1516 if (i < min) { 1517 return -1; 1518 } 1519 int j = i - 1; 1520 int start = j - strLastIndex; 1521 int k = strLastIndex - 1; 1522 while (j > start) { 1523 if (getChar(src, j--) != (tgt[k--] & 0xff)) { 1524 i--; 1525 continue startSearchForLastChar; 1526 } 1527 } 1528 return start + 1; 1529 } 1530 } 1531 1532 //////////////////////////////////////////////////////////////// 1533 1534 private static native boolean isBigEndian(); 1535 1536 static final int HI_BYTE_SHIFT; 1537 static final int LO_BYTE_SHIFT; 1538 static { 1539 if (isBigEndian()) { 1540 HI_BYTE_SHIFT = 8; 1541 LO_BYTE_SHIFT = 0; 1542 } else { 1543 HI_BYTE_SHIFT = 0; 1544 LO_BYTE_SHIFT = 8; 1545 } 1546 } 1547 1548 static final int MAX_LENGTH = Integer.MAX_VALUE >> 1; 1549 1550 // Used by trusted callers. Assumes all necessary bounds checks have 1551 // been done by the caller. 1552 1553 /** 1554 * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for 1555 * UTF-16 coder. 1556 * 1557 * @param i value to convert 1558 * @param index next index, after the least significant digit 1559 * @param buf target buffer, UTF16-coded. 1560 * @return index of the most significant digit or minus sign, if present 1561 */ 1562 static int getChars(int i, int index, byte[] buf) { 1563 int q, r; 1564 int charPos = index; 1565 1566 boolean negative = (i < 0); 1567 if (!negative) { 1568 i = -i; 1569 } 1570 1571 // Get 2 digits/iteration using ints 1572 while (i <= -100) { 1573 q = i / 100; 1574 r = (q * 100) - i; 1575 i = q; 1576 putChar(buf, --charPos, Integer.DigitOnes[r]); 1577 putChar(buf, --charPos, Integer.DigitTens[r]); 1578 } 1579 1580 // We know there are at most two digits left at this point. 1581 q = i / 10; 1582 r = (q * 10) - i; 1583 putChar(buf, --charPos, '0' + r); 1584 1585 // Whatever left is the remaining digit. 1586 if (q < 0) { 1587 putChar(buf, --charPos, '0' - q); 1588 } 1589 1590 if (negative) { 1591 putChar(buf, --charPos, '-'); 1592 } 1593 return charPos; 1594 } 1595 1596 /** 1597 * This is a variant of {@link Long#getChars(long, int, byte[])}, but for 1598 * UTF-16 coder. 1599 * 1600 * @param i value to convert 1601 * @param index next index, after the least significant digit 1602 * @param buf target buffer, UTF16-coded. 1603 * @return index of the most significant digit or minus sign, if present 1604 */ 1605 static int getChars(long i, int index, byte[] buf) { 1606 long q; 1607 int r; 1608 int charPos = index; 1609 1610 boolean negative = (i < 0); 1611 if (!negative) { 1612 i = -i; 1613 } 1614 1615 // Get 2 digits/iteration using longs until quotient fits into an int 1616 while (i <= Integer.MIN_VALUE) { 1617 q = i / 100; 1618 r = (int)((q * 100) - i); 1619 i = q; 1620 putChar(buf, --charPos, Integer.DigitOnes[r]); 1621 putChar(buf, --charPos, Integer.DigitTens[r]); 1622 } 1623 1624 // Get 2 digits/iteration using ints 1625 int q2; 1626 int i2 = (int)i; 1627 while (i2 <= -100) { 1628 q2 = i2 / 100; 1629 r = (q2 * 100) - i2; 1630 i2 = q2; 1631 putChar(buf, --charPos, Integer.DigitOnes[r]); 1632 putChar(buf, --charPos, Integer.DigitTens[r]); 1633 } 1634 1635 // We know there are at most two digits left at this point. 1636 q2 = i2 / 10; 1637 r = (q2 * 10) - i2; 1638 putChar(buf, --charPos, '0' + r); 1639 1640 // Whatever left is the remaining digit. 1641 if (q2 < 0) { 1642 putChar(buf, --charPos, '0' - q2); 1643 } 1644 1645 if (negative) { 1646 putChar(buf, --charPos, '-'); 1647 } 1648 return charPos; 1649 } 1650 // End of trusted methods. 1651 1652 public static void checkIndex(int off, byte[] val) { 1653 String.checkIndex(off, length(val)); 1654 } 1655 1656 public static void checkOffset(int off, byte[] val) { 1657 String.checkOffset(off, length(val)); 1658 } 1659 1660 public static void checkBoundsBeginEnd(int begin, int end, byte[] val) { 1661 String.checkBoundsBeginEnd(begin, end, length(val)); 1662 } 1663 1664 public static void checkBoundsOffCount(int offset, int count, byte[] val) { 1665 String.checkBoundsOffCount(offset, count, length(val)); 1666 } 1667 1668 }