1 /*
   2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Spliterator;
  31 import java.util.function.IntConsumer;
  32 import jdk.internal.HotSpotIntrinsicCandidate;
  33 import jdk.internal.vm.annotation.ForceInline;
  34 import jdk.internal.vm.annotation.DontInline;
  35 
  36 import static java.lang.String.UTF16;
  37 import static java.lang.String.LATIN1;
  38 
  39 final class StringUTF16 {
  40 
  41     public static byte[] newBytesFor(int len) {
  42         if (len < 0) {
  43             throw new NegativeArraySizeException();
  44         }
  45         if (len > MAX_LENGTH) {
  46             throw new OutOfMemoryError("UTF16 String size is " + len +
  47                                        ", should be less than " + MAX_LENGTH);
  48         }
  49         return new byte[len << 1];
  50     }
  51 
  52     @HotSpotIntrinsicCandidate
  53     // intrinsic performs no bounds checks
  54     static void putChar(byte[] val, int index, int c) {
  55         assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
  56         index <<= 1;
  57         val[index++] = (byte)(c >> HI_BYTE_SHIFT);
  58         val[index]   = (byte)(c >> LO_BYTE_SHIFT);
  59     }
  60 
  61     @HotSpotIntrinsicCandidate
  62     // intrinsic performs no bounds checks
  63     static char getChar(byte[] val, int index) {
  64         assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
  65         index <<= 1;
  66         return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
  67                       ((val[index]   & 0xff) << LO_BYTE_SHIFT));
  68     }
  69 
  70     public static int length(byte[] value) {
  71         return value.length >> 1;
  72     }
  73 
  74     private static int codePointAt(byte[] value, int index, int end, boolean checked) {
  75         assert index < end;
  76         if (checked) {
  77             checkIndex(index, value);
  78         }
  79         char c1 = getChar(value, index);
  80         if (Character.isHighSurrogate(c1) && ++index < end) {
  81             if (checked) {
  82                 checkIndex(index, value);
  83             }
  84             char c2 = getChar(value, index);
  85             if (Character.isLowSurrogate(c2)) {
  86                return Character.toCodePoint(c1, c2);
  87             }
  88         }
  89         return c1;
  90     }
  91 
  92     public static int codePointAt(byte[] value, int index, int end) {
  93        return codePointAt(value, index, end, false /* unchecked */);
  94     }
  95 
  96     private static int codePointBefore(byte[] value, int index, boolean checked) {
  97         --index;
  98         if (checked) {
  99             checkIndex(index, value);
 100         }
 101         char c2 = getChar(value, index);
 102         if (Character.isLowSurrogate(c2) && index > 0) {
 103             --index;
 104             if (checked) {
 105                 checkIndex(index, value);
 106             }
 107             char c1 = getChar(value, index);
 108             if (Character.isHighSurrogate(c1)) {
 109                return Character.toCodePoint(c1, c2);
 110             }
 111         }
 112         return c2;
 113     }
 114 
 115     public static int codePointBefore(byte[] value, int index) {
 116         return codePointBefore(value, index, false /* unchecked */);
 117     }
 118 
 119     private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) {
 120         assert beginIndex <= endIndex;
 121         int count = endIndex - beginIndex;
 122         int i = beginIndex;
 123         if (checked && i < endIndex) {
 124             checkBoundsBeginEnd(i, endIndex, value);
 125         }
 126         for (; i < endIndex - 1; ) {
 127             if (Character.isHighSurrogate(getChar(value, i++)) &&
 128                 Character.isLowSurrogate(getChar(value, i))) {
 129                 count--;
 130                 i++;
 131             }
 132         }
 133         return count;
 134     }
 135 
 136     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
 137         return codePointCount(value, beginIndex, endIndex, false /* unchecked */);
 138     }
 139 
 140     public static char[] toChars(byte[] value) {
 141         char[] dst = new char[value.length >> 1];
 142         getChars(value, 0, dst.length, dst, 0);
 143         return dst;
 144     }
 145 
 146     @HotSpotIntrinsicCandidate
 147     public static byte[] toBytes(char[] value, int off, int len) {
 148         byte[] val = newBytesFor(len);
 149         for (int i = 0; i < len; i++) {
 150             putChar(val, i, value[off]);
 151             off++;
 152         }
 153         return val;
 154     }
 155 
 156     public static byte[] compress(char[] val, int off, int len) {
 157         byte[] ret = new byte[len];
 158         if (compress(val, off, ret, 0, len) == len) {
 159             return ret;
 160         }
 161         return null;
 162     }
 163 
 164     public static byte[] compress(byte[] val, int off, int len) {
 165         byte[] ret = new byte[len];
 166         if (compress(val, off, ret, 0, len) == len) {
 167             return ret;
 168         }
 169         return null;
 170     }
 171 
 172     // compressedCopy char[] -> byte[]
 173     @HotSpotIntrinsicCandidate
 174     public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
 175         for (int i = 0; i < len; i++) {
 176             char c = src[srcOff];
 177             if (c > 0xFF) {
 178                 len = 0;
 179                 break;
 180             }
 181             dst[dstOff] = (byte)c;
 182             srcOff++;
 183             dstOff++;
 184         }
 185         return len;
 186     }
 187 
 188     // compressedCopy byte[] -> byte[]
 189     @HotSpotIntrinsicCandidate
 190     public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 191         // We need a range check here because 'getChar' has no checks
 192         checkBoundsOffCount(srcOff, len, src);
 193         for (int i = 0; i < len; i++) {
 194             char c = getChar(src, srcOff);
 195             if (c > 0xFF) {
 196                 len = 0;
 197                 break;
 198             }
 199             dst[dstOff] = (byte)c;
 200             srcOff++;
 201             dstOff++;
 202         }
 203         return len;
 204     }
 205 
 206     public static byte[] toBytes(int[] val, int index, int len) {
 207         final int end = index + len;
 208         // Pass 1: Compute precise size of char[]
 209         int n = len;
 210         for (int i = index; i < end; i++) {
 211             int cp = val[i];
 212             if (Character.isBmpCodePoint(cp))
 213                 continue;
 214             else if (Character.isValidCodePoint(cp))
 215                 n++;
 216             else throw new IllegalArgumentException(Integer.toString(cp));
 217         }
 218         // Pass 2: Allocate and fill in <high, low> pair
 219         byte[] buf = newBytesFor(n);
 220         for (int i = index, j = 0; i < end; i++, j++) {
 221             int cp = val[i];
 222             if (Character.isBmpCodePoint(cp)) {
 223                 putChar(buf, j, cp);
 224             } else {
 225                 putChar(buf, j++, Character.highSurrogate(cp));
 226                 putChar(buf, j, Character.lowSurrogate(cp));
 227             }
 228         }
 229         return buf;
 230     }
 231 
 232     public static byte[] toBytes(char c) {
 233         byte[] result = new byte[2];
 234         putChar(result, 0, c);
 235         return result;
 236     }
 237 
 238     @HotSpotIntrinsicCandidate
 239     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 240         // We need a range check here because 'getChar' has no checks
 241         if (srcBegin < srcEnd) {
 242             checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value);
 243         }
 244         for (int i = srcBegin; i < srcEnd; i++) {
 245             dst[dstBegin++] = getChar(value, i);
 246         }
 247     }
 248 
 249     /* @see java.lang.String.getBytes(int, int, byte[], int) */
 250     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 251         srcBegin <<= 1;
 252         srcEnd <<= 1;
 253         for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
 254             dst[dstBegin++] = value[i];
 255         }
 256     }
 257 
 258     @HotSpotIntrinsicCandidate
 259     public static boolean equals(byte[] value, byte[] other) {
 260         if (value.length == other.length) {
 261             int len = value.length >> 1;
 262             for (int i = 0; i < len; i++) {
 263                 if (getChar(value, i) != getChar(other, i)) {
 264                     return false;
 265                 }
 266             }
 267             return true;
 268         }
 269         return false;
 270     }
 271 
 272     @HotSpotIntrinsicCandidate
 273     public static int compareTo(byte[] value, byte[] other) {
 274         int len1 = length(value);
 275         int len2 = length(other);
 276         return compareValues(value, other, len1, len2);
 277     }
 278 
 279     /*
 280      * Checks the boundary and then compares the byte arrays.
 281      */
 282     public static int compareTo(byte[] value, byte[] other, int len1, int len2) {
 283         checkOffset(len1, value);
 284         checkOffset(len2, other);
 285 
 286         return compareValues(value, other, len1, len2);
 287     }
 288 
 289     private static int compareValues(byte[] value, byte[] other, int len1, int len2) {
 290         int lim = Math.min(len1, len2);
 291         for (int k = 0; k < lim; k++) {
 292             char c1 = getChar(value, k);
 293             char c2 = getChar(other, k);
 294             if (c1 != c2) {
 295                 return c1 - c2;
 296             }
 297         }
 298         return len1 - len2;
 299     }
 300 
 301     @HotSpotIntrinsicCandidate
 302     public static int compareToLatin1(byte[] value, byte[] other) {
 303         return -StringLatin1.compareToUTF16(other, value);
 304     }
 305 
 306     public static int compareToLatin1(byte[] value, byte[] other, int len1, int len2) {
 307         return -StringLatin1.compareToUTF16(other, value, len2, len1);
 308     }
 309 
 310     public static int compareToCI(byte[] value, byte[] other) {
 311         int len1 = length(value);
 312         int len2 = length(other);
 313         int lim = Math.min(len1, len2);
 314         for (int k = 0; k < lim; k++) {
 315             char c1 = getChar(value, k);
 316             char c2 = getChar(other, k);
 317             if (c1 != c2) {
 318                 c1 = Character.toUpperCase(c1);
 319                 c2 = Character.toUpperCase(c2);
 320                 if (c1 != c2) {
 321                     c1 = Character.toLowerCase(c1);
 322                     c2 = Character.toLowerCase(c2);
 323                     if (c1 != c2) {
 324                         return c1 - c2;
 325                     }
 326                 }
 327             }
 328         }
 329         return len1 - len2;
 330     }
 331 
 332     public static int compareToCI_Latin1(byte[] value, byte[] other) {
 333         return -StringLatin1.compareToCI_UTF16(other, value);
 334     }
 335 
 336     public static int hashCode(byte[] value) {
 337         int h = 0;
 338         int length = value.length >> 1;
 339         for (int i = 0; i < length; i++) {
 340             h = 31 * h + getChar(value, i);
 341         }
 342         return h;
 343     }
 344 
 345     public static int indexOf(byte[] value, int ch, int fromIndex) {
 346         int max = value.length >> 1;
 347         if (fromIndex < 0) {
 348             fromIndex = 0;
 349         } else if (fromIndex >= max) {
 350             // Note: fromIndex might be near -1>>>1.
 351             return -1;
 352         }
 353         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 354             // handle most cases here (ch is a BMP code point or a
 355             // negative value (invalid code point))
 356             return indexOfChar(value, ch, fromIndex, max);
 357         } else {
 358             return indexOfSupplementary(value, ch, fromIndex, max);
 359         }
 360     }
 361 
 362     @HotSpotIntrinsicCandidate
 363     public static int indexOf(byte[] value, byte[] str) {
 364         if (str.length == 0) {
 365             return 0;
 366         }
 367         if (value.length < str.length) {
 368             return -1;
 369         }
 370         return indexOfUnsafe(value, length(value), str, length(str), 0);
 371     }
 372 
 373     @HotSpotIntrinsicCandidate
 374     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 375         checkBoundsBeginEnd(fromIndex, valueCount, value);
 376         checkBoundsBeginEnd(0, strCount, str);
 377         return indexOfUnsafe(value, valueCount, str, strCount, fromIndex);
 378     }
 379 
 380 
 381     private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 382         assert fromIndex >= 0;
 383         assert strCount > 0;
 384         assert strCount <= length(str);
 385         assert valueCount >= strCount;
 386         char first = getChar(str, 0);
 387         int max = (valueCount - strCount);
 388         for (int i = fromIndex; i <= max; i++) {
 389             // Look for first character.
 390             if (getChar(value, i) != first) {
 391                 while (++i <= max && getChar(value, i) != first);
 392             }
 393             // Found first character, now look at the rest of value
 394             if (i <= max) {
 395                 int j = i + 1;
 396                 int end = j + strCount - 1;
 397                 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
 398                 if (j == end) {
 399                     // Found whole string.
 400                     return i;
 401                 }
 402             }
 403         }
 404         return -1;
 405     }
 406 
 407 
 408     /**
 409      * Handles indexOf Latin1 substring in UTF16 string.
 410      */
 411     @HotSpotIntrinsicCandidate
 412     public static int indexOfLatin1(byte[] value, byte[] str) {
 413         if (str.length == 0) {
 414             return 0;
 415         }
 416         if (length(value) < str.length) {
 417             return -1;
 418         }
 419         return indexOfLatin1Unsafe(value, length(value), str, str.length, 0);
 420     }
 421 
 422     @HotSpotIntrinsicCandidate
 423     public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 424         checkBoundsBeginEnd(fromIndex, srcCount, src);
 425         String.checkBoundsBeginEnd(0, tgtCount, tgt.length);
 426         return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex);
 427     }
 428 
 429     public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 430         assert fromIndex >= 0;
 431         assert tgtCount > 0;
 432         assert tgtCount <= tgt.length;
 433         assert srcCount >= tgtCount;
 434         char first = (char)(tgt[0] & 0xff);
 435         int max = (srcCount - tgtCount);
 436         for (int i = fromIndex; i <= max; i++) {
 437             // Look for first character.
 438             if (getChar(src, i) != first) {
 439                 while (++i <= max && getChar(src, i) != first);
 440             }
 441             // Found first character, now look at the rest of v2
 442             if (i <= max) {
 443                 int j = i + 1;
 444                 int end = j + tgtCount - 1;
 445                 for (int k = 1;
 446                      j < end && getChar(src, j) == (tgt[k] & 0xff);
 447                      j++, k++);
 448                 if (j == end) {
 449                     // Found whole string.
 450                     return i;
 451                 }
 452             }
 453         }
 454         return -1;
 455     }
 456 
 457     @HotSpotIntrinsicCandidate
 458     private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
 459         checkBoundsBeginEnd(fromIndex, max, value);
 460         return indexOfCharUnsafe(value, ch, fromIndex, max);
 461     }
 462 
 463     private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) {
 464         for (int i = fromIndex; i < max; i++) {
 465             if (getChar(value, i) == ch) {
 466                 return i;
 467             }
 468         }
 469         return -1;
 470     }
 471 
 472     /**
 473      * Handles (rare) calls of indexOf with a supplementary character.
 474      */
 475     private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
 476         if (Character.isValidCodePoint(ch)) {
 477             final char hi = Character.highSurrogate(ch);
 478             final char lo = Character.lowSurrogate(ch);
 479             checkBoundsBeginEnd(fromIndex, max, value);
 480             for (int i = fromIndex; i < max - 1; i++) {
 481                 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
 482                     return i;
 483                 }
 484             }
 485         }
 486         return -1;
 487     }
 488 
 489     // srcCoder == UTF16 && tgtCoder == UTF16
 490     public static int lastIndexOf(byte[] src, int srcCount,
 491                                   byte[] tgt, int tgtCount, int fromIndex) {
 492         assert fromIndex >= 0;
 493         assert tgtCount > 0;
 494         assert tgtCount <= length(tgt);
 495         int min = tgtCount - 1;
 496         int i = min + fromIndex;
 497         int strLastIndex = tgtCount - 1;
 498 
 499         checkIndex(strLastIndex, tgt);
 500         char strLastChar = getChar(tgt, strLastIndex);
 501 
 502         checkIndex(i, src);
 503 
 504     startSearchForLastChar:
 505         while (true) {
 506             while (i >= min && getChar(src, i) != strLastChar) {
 507                 i--;
 508             }
 509             if (i < min) {
 510                 return -1;
 511             }
 512             int j = i - 1;
 513             int start = j - strLastIndex;
 514             int k = strLastIndex - 1;
 515             while (j > start) {
 516                 if (getChar(src, j--) != getChar(tgt, k--)) {
 517                     i--;
 518                     continue startSearchForLastChar;
 519                 }
 520             }
 521             return start + 1;
 522         }
 523     }
 524 
 525     public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
 526         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 527             // handle most cases here (ch is a BMP code point or a
 528             // negative value (invalid code point))
 529             int i = Math.min(fromIndex, (value.length >> 1) - 1);
 530             for (; i >= 0; i--) {
 531                 if (getChar(value, i) == ch) {
 532                     return i;
 533                 }
 534             }
 535             return -1;
 536         } else {
 537             return lastIndexOfSupplementary(value, ch, fromIndex);
 538         }
 539     }
 540 
 541     /**
 542      * Handles (rare) calls of lastIndexOf with a supplementary character.
 543      */
 544     private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
 545         if (Character.isValidCodePoint(ch)) {
 546             char hi = Character.highSurrogate(ch);
 547             char lo = Character.lowSurrogate(ch);
 548             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 549             for (; i >= 0; i--) {
 550                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 551                     return i;
 552                 }
 553             }
 554         }
 555         return -1;
 556     }
 557 
 558     public static String replace(byte[] value, char oldChar, char newChar) {
 559         int len = value.length >> 1;
 560         int i = -1;
 561         while (++i < len) {
 562             if (getChar(value, i) == oldChar) {
 563                 break;
 564             }
 565         }
 566         if (i < len) {
 567             byte buf[] = new byte[value.length];
 568             for (int j = 0; j < i; j++) {
 569                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 570             }
 571             while (i < len) {
 572                 char c = getChar(value, i);
 573                 putChar(buf, i, c == oldChar ? newChar : c);
 574                 i++;
 575            }
 576            // Check if we should try to compress to latin1
 577            if (String.COMPACT_STRINGS &&
 578                !StringLatin1.canEncode(oldChar) &&
 579                StringLatin1.canEncode(newChar)) {
 580                byte[] val = compress(buf, 0, len);
 581                if (val != null) {
 582                    return new String(val, LATIN1);
 583                }
 584            }
 585            return new String(buf, UTF16);
 586         }
 587         return null;
 588     }
 589 
 590     public static boolean regionMatchesCI(byte[] value, int toffset,
 591                                           byte[] other, int ooffset, int len) {
 592         int last = toffset + len;
 593         assert toffset >= 0 && ooffset >= 0;
 594         assert ooffset + len <= length(other);
 595         assert last <= length(value);
 596         while (toffset < last) {
 597             char c1 = getChar(value, toffset++);
 598             char c2 = getChar(other, ooffset++);
 599             if (c1 == c2) {
 600                 continue;
 601             }
 602             // try converting both characters to uppercase.
 603             // If the results match, then the comparison scan should
 604             // continue.
 605             char u1 = Character.toUpperCase(c1);
 606             char u2 = Character.toUpperCase(c2);
 607             if (u1 == u2) {
 608                 continue;
 609             }
 610             // Unfortunately, conversion to uppercase does not work properly
 611             // for the Georgian alphabet, which has strange rules about case
 612             // conversion.  So we need to make one last check before
 613             // exiting.
 614             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 615                 continue;
 616             }
 617             return false;
 618         }
 619         return true;
 620     }
 621 
 622     public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
 623                                                  byte[] other, int ooffset,
 624                                                  int len) {
 625         return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len);
 626     }
 627 
 628     public static String toLowerCase(String str, byte[] value, Locale locale) {
 629         if (locale == null) {
 630             throw new NullPointerException();
 631         }
 632         int first;
 633         boolean hasSurr = false;
 634         final int len = value.length >> 1;
 635 
 636         // Now check if there are any characters that need to be changed, or are surrogate
 637         for (first = 0 ; first < len; first++) {
 638             int cp = (int)getChar(value, first);
 639             if (Character.isSurrogate((char)cp)) {
 640                 hasSurr = true;
 641                 break;
 642             }
 643             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 644                 break;
 645             }
 646         }
 647         if (first == len)
 648             return str;
 649         byte[] result = new byte[value.length];
 650         System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
 651                                                             // lowerCase characters.
 652         String lang = locale.getLanguage();
 653         if (lang == "tr" || lang == "az" || lang == "lt") {
 654             return toLowerCaseEx(str, value, result, first, locale, true);
 655         }
 656         if (hasSurr) {
 657             return toLowerCaseEx(str, value, result, first, locale, false);
 658         }
 659         int bits = 0;
 660         for (int i = first; i < len; i++) {
 661             int cp = (int)getChar(value, i);
 662             if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
 663                 Character.isSurrogate((char)cp)) {
 664                 return toLowerCaseEx(str, value, result, i, locale, false);
 665             }
 666             if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
 667                 return toLowerCaseEx(str, value, result, i, locale, true);
 668             }
 669             cp = Character.toLowerCase(cp);
 670             if (!Character.isBmpCodePoint(cp)) {
 671                 return toLowerCaseEx(str, value, result, i, locale, false);
 672             }
 673             bits |= cp;
 674             putChar(result, i, cp);
 675         }
 676         if (bits > 0xFF) {
 677             return new String(result, UTF16);
 678         } else {
 679             return newString(result, 0, len);
 680         }
 681     }
 682 
 683     private static String toLowerCaseEx(String str, byte[] value,
 684                                         byte[] result, int first, Locale locale,
 685                                         boolean localeDependent) {
 686         assert(result.length == value.length);
 687         assert(first >= 0);
 688         int resultOffset = first;
 689         int length = value.length >> 1;
 690         int srcCount;
 691         for (int i = first; i < length; i += srcCount) {
 692             int srcChar = getChar(value, i);
 693             int lowerChar;
 694             char[] lowerCharArray;
 695             srcCount = 1;
 696             if (Character.isSurrogate((char)srcChar)) {
 697                 srcChar = codePointAt(value, i, length);
 698                 srcCount = Character.charCount(srcChar);
 699             }
 700             if (localeDependent ||
 701                 srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
 702                 srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
 703                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 704             } else {
 705                 lowerChar = Character.toLowerCase(srcChar);
 706             }
 707             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 708                 putChar(result, resultOffset++, lowerChar);
 709             } else {
 710                 if (lowerChar == Character.ERROR) {
 711                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 712                 } else {
 713                     lowerCharArray = Character.toChars(lowerChar);
 714                 }
 715                 /* Grow result if needed */
 716                 int mapLen = lowerCharArray.length;
 717                 if (mapLen > srcCount) {
 718                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 719                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 720                     result = result2;
 721                 }
 722                 assert resultOffset >= 0;
 723                 assert resultOffset + mapLen <= length(result);
 724                 for (int x = 0; x < mapLen; ++x) {
 725                     putChar(result, resultOffset++, lowerCharArray[x]);
 726                 }
 727             }
 728         }
 729         return newString(result, 0, resultOffset);
 730     }
 731 
 732     public static String toUpperCase(String str, byte[] value, Locale locale) {
 733         if (locale == null) {
 734             throw new NullPointerException();
 735         }
 736         int first;
 737         boolean hasSurr = false;
 738         final int len = value.length >> 1;
 739 
 740         // Now check if there are any characters that need to be changed, or are surrogate
 741         for (first = 0 ; first < len; first++) {
 742             int cp = (int)getChar(value, first);
 743             if (Character.isSurrogate((char)cp)) {
 744                 hasSurr = true;
 745                 break;
 746             }
 747             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 748                 break;
 749             }
 750         }
 751         if (first == len) {
 752             return str;
 753         }
 754         byte[] result = new byte[value.length];
 755         System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
 756                                                            // upperCase characters.
 757         String lang = locale.getLanguage();
 758         if (lang == "tr" || lang == "az" || lang == "lt") {
 759             return toUpperCaseEx(str, value, result, first, locale, true);
 760         }
 761         if (hasSurr) {
 762             return toUpperCaseEx(str, value, result, first, locale, false);
 763         }
 764         int bits = 0;
 765         for (int i = first; i < len; i++) {
 766             int cp = (int)getChar(value, i);
 767             if (Character.isSurrogate((char)cp)) {
 768                 return toUpperCaseEx(str, value, result, i, locale, false);
 769             }
 770             cp = Character.toUpperCaseEx(cp);
 771             if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
 772                 return toUpperCaseEx(str, value, result, i, locale, false);
 773             }
 774             bits |= cp;
 775             putChar(result, i, cp);
 776         }
 777         if (bits > 0xFF) {
 778             return new String(result, UTF16);
 779         } else {
 780             return newString(result, 0, len);
 781         }
 782     }
 783 
 784     private static String toUpperCaseEx(String str, byte[] value,
 785                                         byte[] result, int first,
 786                                         Locale locale, boolean localeDependent)
 787     {
 788         assert(result.length == value.length);
 789         assert(first >= 0);
 790         int resultOffset = first;
 791         int length = value.length >> 1;
 792         int srcCount;
 793         for (int i = first; i < length; i += srcCount) {
 794             int srcChar = getChar(value, i);
 795             int upperChar;
 796             char[] upperCharArray;
 797             srcCount = 1;
 798             if (Character.isSurrogate((char)srcChar)) {
 799                 srcChar = codePointAt(value, i, length);
 800                 srcCount = Character.charCount(srcChar);
 801             }
 802             if (localeDependent) {
 803                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 804             } else {
 805                 upperChar = Character.toUpperCaseEx(srcChar);
 806             }
 807             if (Character.isBmpCodePoint(upperChar)) {
 808                 putChar(result, resultOffset++, upperChar);
 809             } else {
 810                 if (upperChar == Character.ERROR) {
 811                     if (localeDependent) {
 812                         upperCharArray =
 813                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 814                     } else {
 815                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 816                     }
 817                 } else {
 818                     upperCharArray = Character.toChars(upperChar);
 819                 }
 820                 /* Grow result if needed */
 821                 int mapLen = upperCharArray.length;
 822                 if (mapLen > srcCount) {
 823                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 824                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 825                     result = result2;
 826                 }
 827                 assert resultOffset >= 0;
 828                 assert resultOffset + mapLen <= length(result);
 829                 for (int x = 0; x < mapLen; ++x) {
 830                     putChar(result, resultOffset++, upperCharArray[x]);
 831                 }
 832             }
 833         }
 834         return newString(result, 0, resultOffset);
 835     }
 836 
 837     public static String trim(byte[] value) {
 838         int length = value.length >> 1;
 839         int len = length;
 840         int st = 0;
 841         while (st < len && getChar(value, st) <= ' ') {
 842             st++;
 843         }
 844         while (st < len && getChar(value, len - 1) <= ' ') {
 845             len--;
 846         }
 847         return ((st > 0) || (len < length )) ?
 848             new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
 849             null;
 850     }
 851 
 852     private static void putChars(byte[] val, int index, char[] str, int off, int end) {
 853         while (off < end) {
 854             putChar(val, index++, str[off++]);
 855         }
 856     }
 857 
 858     public static String newString(byte[] val, int index, int len) {
 859         if (String.COMPACT_STRINGS) {
 860             byte[] buf = compress(val, index, len);
 861             if (buf != null) {
 862                 return new String(buf, LATIN1);
 863             }
 864         }
 865         int last = index + len;
 866         return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
 867     }
 868 
 869     public static void fillNull(byte[] val, int index, int end) {
 870         Arrays.fill(val, index << 1, end << 1, (byte)0);
 871     }
 872 
 873     static class CharsSpliterator implements Spliterator.OfInt {
 874         private final byte[] array;
 875         private int index;        // current index, modified on advance/split
 876         private final int fence;  // one past last index
 877         private final int cs;
 878 
 879         CharsSpliterator(byte[] array, int acs) {
 880             this(array, 0, array.length >> 1, acs);
 881         }
 882 
 883         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 884             this.array = array;
 885             this.index = origin;
 886             this.fence = fence;
 887             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 888                       | Spliterator.SUBSIZED;
 889         }
 890 
 891         @Override
 892         public OfInt trySplit() {
 893             int lo = index, mid = (lo + fence) >>> 1;
 894             return (lo >= mid)
 895                    ? null
 896                    : new CharsSpliterator(array, lo, index = mid, cs);
 897         }
 898 
 899         @Override
 900         public void forEachRemaining(IntConsumer action) {
 901             byte[] a; int i, hi; // hoist accesses and checks from loop
 902             if (action == null)
 903                 throw new NullPointerException();
 904             if (((a = array).length >> 1) >= (hi = fence) &&
 905                 (i = index) >= 0 && i < (index = hi)) {
 906                 do {
 907                     action.accept(charAt(a, i));
 908                 } while (++i < hi);
 909             }
 910         }
 911 
 912         @Override
 913         public boolean tryAdvance(IntConsumer action) {
 914             if (action == null)
 915                 throw new NullPointerException();
 916             int i = index;
 917             if (i >= 0 && i < fence) {
 918                 action.accept(charAt(array, i));
 919                 index++;
 920                 return true;
 921             }
 922             return false;
 923         }
 924 
 925         @Override
 926         public long estimateSize() { return (long)(fence - index); }
 927 
 928         @Override
 929         public int characteristics() {
 930             return cs;
 931         }
 932     }
 933 
 934     static class CodePointsSpliterator implements Spliterator.OfInt {
 935         private final byte[] array;
 936         private int index;        // current index, modified on advance/split
 937         private final int fence;  // one past last index
 938         private final int cs;
 939 
 940         CodePointsSpliterator(byte[] array, int acs) {
 941             this(array, 0, array.length >> 1, acs);
 942         }
 943 
 944         CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
 945             this.array = array;
 946             this.index = origin;
 947             this.fence = fence;
 948             this.cs = acs | Spliterator.ORDERED;
 949         }
 950 
 951         @Override
 952         public OfInt trySplit() {
 953             int lo = index, mid = (lo + fence) >>> 1;
 954             if (lo >= mid)
 955                 return null;
 956 
 957             int midOneLess;
 958             // If the mid-point intersects a surrogate pair
 959             if (Character.isLowSurrogate(charAt(array, mid)) &&
 960                 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) {
 961                 // If there is only one pair it cannot be split
 962                 if (lo >= midOneLess)
 963                     return null;
 964                 // Shift the mid-point to align with the surrogate pair
 965                 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
 966             }
 967             return new CodePointsSpliterator(array, lo, index = mid, cs);
 968         }
 969 
 970         @Override
 971         public void forEachRemaining(IntConsumer action) {
 972             byte[] a; int i, hi; // hoist accesses and checks from loop
 973             if (action == null)
 974                 throw new NullPointerException();
 975             if (((a = array).length >> 1) >= (hi = fence) &&
 976                 (i = index) >= 0 && i < (index = hi)) {
 977                 do {
 978                     i = advance(a, i, hi, action);
 979                 } while (i < hi);
 980             }
 981         }
 982 
 983         @Override
 984         public boolean tryAdvance(IntConsumer action) {
 985             if (action == null)
 986                 throw new NullPointerException();
 987             if (index >= 0 && index < fence) {
 988                 index = advance(array, index, fence, action);
 989                 return true;
 990             }
 991             return false;
 992         }
 993 
 994         // Advance one code point from the index, i, and return the next
 995         // index to advance from
 996         private static int advance(byte[] a, int i, int hi, IntConsumer action) {
 997             char c1 = charAt(a, i++);
 998             int cp = c1;
 999             if (Character.isHighSurrogate(c1) && i < hi) {
1000                 char c2 = charAt(a, i);
1001                 if (Character.isLowSurrogate(c2)) {
1002                     i++;
1003                     cp = Character.toCodePoint(c1, c2);
1004                 }
1005             }
1006             action.accept(cp);
1007             return i;
1008         }
1009 
1010         @Override
1011         public long estimateSize() { return (long)(fence - index); }
1012 
1013         @Override
1014         public int characteristics() {
1015             return cs;
1016         }
1017     }
1018 
1019     ////////////////////////////////////////////////////////////////
1020 
1021     public static void putCharSB(byte[] val, int index, int c) {
1022         checkIndex(index, val);
1023         putChar(val, index, c);
1024     }
1025 
1026     public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
1027         checkBoundsBeginEnd(index, index + end - off, val);
1028         putChars(val, index, ca, off, end);
1029     }
1030 
1031     public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
1032         checkBoundsBeginEnd(index, index + end - off, val);
1033         for (int i = off; i < end; i++) {
1034             putChar(val, index++, s.charAt(i));
1035         }
1036     }
1037 
1038     public static int codePointAtSB(byte[] val, int index, int end) {
1039         return codePointAt(val, index, end, true /* checked */);
1040     }
1041 
1042     public static int codePointBeforeSB(byte[] val, int index) {
1043         return codePointBefore(val, index, true /* checked */);
1044     }
1045 
1046     public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
1047         return codePointCount(val, beginIndex, endIndex, true /* checked */);
1048     }
1049 
1050     public static int getChars(int i, int begin, int end, byte[] value) {
1051         checkBoundsBeginEnd(begin, end, value);
1052         int pos = getChars(i, end, value);
1053         assert begin == pos;
1054         return pos;
1055     }
1056 
1057     public static int getChars(long l, int begin, int end, byte[] value) {
1058         checkBoundsBeginEnd(begin, end, value);
1059         int pos = getChars(l, end, value);
1060         assert begin == pos;
1061         return pos;
1062     }
1063 
1064     public static boolean contentEquals(byte[] v1, byte[] v2, int len) {
1065         checkBoundsOffCount(0, len, v2);
1066         for (int i = 0; i < len; i++) {
1067             if ((char)(v1[i] & 0xff) != getChar(v2, i)) {
1068                 return false;
1069             }
1070         }
1071         return true;
1072     }
1073 
1074     public static boolean contentEquals(byte[] value, CharSequence cs, int len) {
1075         checkOffset(len, value);
1076         for (int i = 0; i < len; i++) {
1077             if (getChar(value, i) != cs.charAt(i)) {
1078                 return false;
1079             }
1080         }
1081         return true;
1082     }
1083 
1084     public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
1085         int end = i + 4;
1086         checkBoundsBeginEnd(i, end, value);
1087         putChar(value, i++, c1);
1088         putChar(value, i++, c2);
1089         putChar(value, i++, c3);
1090         putChar(value, i++, c4);
1091         assert(i == end);
1092         return end;
1093     }
1094 
1095     public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
1096         int end = i + 5;
1097         checkBoundsBeginEnd(i, end, value);
1098         putChar(value, i++, c1);
1099         putChar(value, i++, c2);
1100         putChar(value, i++, c3);
1101         putChar(value, i++, c4);
1102         putChar(value, i++, c5);
1103         assert(i == end);
1104         return end;
1105     }
1106 
1107     public static char charAt(byte[] value, int index) {
1108         checkIndex(index, value);
1109         return getChar(value, index);
1110     }
1111 
1112     public static void reverse(byte[] val, int count) {
1113         checkOffset(count, val);
1114         int n = count - 1;
1115         boolean hasSurrogates = false;
1116         for (int j = (n-1) >> 1; j >= 0; j--) {
1117             int k = n - j;
1118             char cj = getChar(val, j);
1119             char ck = getChar(val, k);
1120             putChar(val, j, ck);
1121             putChar(val, k, cj);
1122             if (Character.isSurrogate(cj) ||
1123                 Character.isSurrogate(ck)) {
1124                 hasSurrogates = true;
1125             }
1126         }
1127         if (hasSurrogates) {
1128             reverseAllValidSurrogatePairs(val, count);
1129         }
1130     }
1131 
1132     /** Outlined helper method for reverse() */
1133     private static void reverseAllValidSurrogatePairs(byte[] val, int count) {
1134         for (int i = 0; i < count - 1; i++) {
1135             char c2 = getChar(val, i);
1136             if (Character.isLowSurrogate(c2)) {
1137                 char c1 = getChar(val, i + 1);
1138                 if (Character.isHighSurrogate(c1)) {
1139                     putChar(val, i++, c1);
1140                     putChar(val, i, c2);
1141                 }
1142             }
1143         }
1144     }
1145 
1146     // inflatedCopy byte[] -> byte[]
1147     public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
1148         // We need a range check here because 'putChar' has no checks
1149         checkBoundsOffCount(dstOff, len, dst);
1150         for (int i = 0; i < len; i++) {
1151             putChar(dst, dstOff++, src[srcOff++] & 0xff);
1152         }
1153     }
1154 
1155     // srcCoder == UTF16 && tgtCoder == LATIN1
1156     public static int lastIndexOfLatin1(byte[] src, int srcCount,
1157                                         byte[] tgt, int tgtCount, int fromIndex) {
1158         assert fromIndex >= 0;
1159         assert tgtCount > 0;
1160         assert tgtCount <= tgt.length;
1161         int min = tgtCount - 1;
1162         int i = min + fromIndex;
1163         int strLastIndex = tgtCount - 1;
1164 
1165         char strLastChar = (char)(tgt[strLastIndex] & 0xff);
1166 
1167         checkIndex(i, src);
1168 
1169     startSearchForLastChar:
1170         while (true) {
1171             while (i >= min && getChar(src, i) != strLastChar) {
1172                 i--;
1173             }
1174             if (i < min) {
1175                 return -1;
1176             }
1177             int j = i - 1;
1178             int start = j - strLastIndex;
1179             int k = strLastIndex - 1;
1180             while (j > start) {
1181                 if (getChar(src, j--) != (tgt[k--] & 0xff)) {
1182                     i--;
1183                     continue startSearchForLastChar;
1184                 }
1185             }
1186             return start + 1;
1187         }
1188     }
1189 
1190     ////////////////////////////////////////////////////////////////
1191 
1192     private static native boolean isBigEndian();
1193 
1194     static final int HI_BYTE_SHIFT;
1195     static final int LO_BYTE_SHIFT;
1196     static {
1197         if (isBigEndian()) {
1198             HI_BYTE_SHIFT = 8;
1199             LO_BYTE_SHIFT = 0;
1200         } else {
1201             HI_BYTE_SHIFT = 0;
1202             LO_BYTE_SHIFT = 8;
1203         }
1204     }
1205 
1206     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
1207 
1208     // Used by trusted callers.  Assumes all necessary bounds checks have
1209     // been done by the caller.
1210 
1211     /**
1212      * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for
1213      * UTF-16 coder.
1214      *
1215      * @param i     value to convert
1216      * @param index next index, after the least significant digit
1217      * @param buf   target buffer, UTF16-coded.
1218      * @return index of the most significant digit or minus sign, if present
1219      */
1220     static int getChars(int i, int index, byte[] buf) {
1221         int q, r;
1222         int charPos = index;
1223 
1224         boolean negative = (i < 0);
1225         if (!negative) {
1226             i = -i;
1227         }
1228 
1229         // Get 2 digits/iteration using ints
1230         while (i <= -100) {
1231             q = i / 100;
1232             r = (q * 100) - i;
1233             i = q;
1234             putChar(buf, --charPos, Integer.DigitOnes[r]);
1235             putChar(buf, --charPos, Integer.DigitTens[r]);
1236         }
1237 
1238         // We know there are at most two digits left at this point.
1239         q = i / 10;
1240         r = (q * 10) - i;
1241         putChar(buf, --charPos, '0' + r);
1242 
1243         // Whatever left is the remaining digit.
1244         if (q < 0) {
1245             putChar(buf, --charPos, '0' - q);
1246         }
1247 
1248         if (negative) {
1249             putChar(buf, --charPos, '-');
1250         }
1251         return charPos;
1252     }
1253 
1254     /**
1255      * This is a variant of {@link Long#getChars(long, int, byte[])}, but for
1256      * UTF-16 coder.
1257      *
1258      * @param i     value to convert
1259      * @param index next index, after the least significant digit
1260      * @param buf   target buffer, UTF16-coded.
1261      * @return index of the most significant digit or minus sign, if present
1262      */
1263     static int getChars(long i, int index, byte[] buf) {
1264         long q;
1265         int r;
1266         int charPos = index;
1267 
1268         boolean negative = (i < 0);
1269         if (!negative) {
1270             i = -i;
1271         }
1272 
1273         // Get 2 digits/iteration using longs until quotient fits into an int
1274         while (i <= Integer.MIN_VALUE) {
1275             q = i / 100;
1276             r = (int)((q * 100) - i);
1277             i = q;
1278             putChar(buf, --charPos, Integer.DigitOnes[r]);
1279             putChar(buf, --charPos, Integer.DigitTens[r]);
1280         }
1281 
1282         // Get 2 digits/iteration using ints
1283         int q2;
1284         int i2 = (int)i;
1285         while (i2 <= -100) {
1286             q2 = i2 / 100;
1287             r  = (q2 * 100) - i2;
1288             i2 = q2;
1289             putChar(buf, --charPos, Integer.DigitOnes[r]);
1290             putChar(buf, --charPos, Integer.DigitTens[r]);
1291         }
1292 
1293         // We know there are at most two digits left at this point.
1294         q2 = i2 / 10;
1295         r  = (q2 * 10) - i2;
1296         putChar(buf, --charPos, '0' + r);
1297 
1298         // Whatever left is the remaining digit.
1299         if (q2 < 0) {
1300             putChar(buf, --charPos, '0' - q2);
1301         }
1302 
1303         if (negative) {
1304             putChar(buf, --charPos, '-');
1305         }
1306         return charPos;
1307     }
1308     // End of trusted methods.
1309 
1310     public static void checkIndex(int off, byte[] val) {
1311         String.checkIndex(off, length(val));
1312     }
1313 
1314     public static void checkOffset(int off, byte[] val) {
1315         String.checkOffset(off, length(val));
1316     }
1317 
1318     public static void checkBoundsBeginEnd(int begin, int end, byte[] val) {
1319         String.checkBoundsBeginEnd(begin, end, length(val));
1320     }
1321 
1322     public static void checkBoundsOffCount(int offset, int count, byte[] val) {
1323         String.checkBoundsOffCount(offset, count, length(val));
1324     }
1325 
1326 }