New src/java.base/share/classes/java/lang/StringUTF16.java

   1 /*
   2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Spliterator;
  31 import java.util.function.IntConsumer;
  32 import jdk.internal.HotSpotIntrinsicCandidate;
  33 import jdk.internal.vm.annotation.ForceInline;
  34 import jdk.internal.vm.annotation.DontInline;
  35 
  36 import static java.lang.String.UTF16;
  37 import static java.lang.String.LATIN1;
  38 
  39 final class StringUTF16 {
  40 
  41     public static byte[] newBytesFor(int len) {
  42         if (len < 0) {
  43             throw new NegativeArraySizeException();
  44         }
  45         if (len > MAX_LENGTH) {
  46             throw new OutOfMemoryError("UTF16 String size is " + len +
  47                                        ", should be less than " + MAX_LENGTH);
  48         }
  49         return new byte[len << 1];
  50     }
  51 
  52     @HotSpotIntrinsicCandidate
  53     // intrinsic performs no bounds checks
  54     static void putChar(byte[] val, int index, int c) {
  55         assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
  56         index <<= 1;
  57         val[index++] = (byte)(c >> HI_BYTE_SHIFT);
  58         val[index]   = (byte)(c >> LO_BYTE_SHIFT);
  59     }
  60 
  61     @HotSpotIntrinsicCandidate
  62     // intrinsic performs no bounds checks
  63     static char getChar(byte[] val, int index) {
  64         assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
  65         index <<= 1;
  66         return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
  67                       ((val[index]   & 0xff) << LO_BYTE_SHIFT));
  68     }
  69 
  70     public static int length(byte[] value) {
  71         return value.length >> 1;
  72     }
  73 
  74     private static int codePointAt(byte[] value, int index, int end, boolean checked) {
  75         assert index < end;
  76         if (checked) {
  77             checkIndex(index, value);
  78         }
  79         char c1 = getChar(value, index);
  80         if (Character.isHighSurrogate(c1) && ++index < end) {
  81             if (checked) {
  82                 checkIndex(index, value);
  83             }
  84             char c2 = getChar(value, index);
  85             if (Character.isLowSurrogate(c2)) {
  86                return Character.toCodePoint(c1, c2);
  87             }
  88         }
  89         return c1;
  90     }
  91 
  92     public static int codePointAt(byte[] value, int index, int end) {
  93        return codePointAt(value, index, end, false /* unchecked */);
  94     }
  95 
  96     private static int codePointBefore(byte[] value, int index, boolean checked) {
  97         --index;
  98         if (checked) {
  99             checkIndex(index, value);
 100         }
 101         char c2 = getChar(value, index);
 102         if (Character.isLowSurrogate(c2) && index > 0) {
 103             --index;
 104             if (checked) {
 105                 checkIndex(index, value);
 106             }
 107             char c1 = getChar(value, index);
 108             if (Character.isHighSurrogate(c1)) {
 109                return Character.toCodePoint(c1, c2);
 110             }
 111         }
 112         return c2;
 113     }
 114 
 115     public static int codePointBefore(byte[] value, int index) {
 116         return codePointBefore(value, index, false /* unchecked */);
 117     }
 118 
 119     private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) {
 120         assert beginIndex <= endIndex;
 121         int count = endIndex - beginIndex;
 122         int i = beginIndex;
 123         if (checked && i < endIndex) {
 124             checkBoundsBeginEnd(i, endIndex, value);
 125         }
 126         for (; i < endIndex - 1; ) {
 127             if (Character.isHighSurrogate(getChar(value, i++)) &&
 128                 Character.isLowSurrogate(getChar(value, i))) {
 129                 count--;
 130                 i++;
 131             }
 132         }
 133         return count;
 134     }
 135 
 136     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
 137         return codePointCount(value, beginIndex, endIndex, false /* unchecked */);
 138     }
 139 
 140     public static char[] toChars(byte[] value) {
 141         char[] dst = new char[value.length >> 1];
 142         getChars(value, 0, dst.length, dst, 0);
 143         return dst;
 144     }
 145 
 146     @HotSpotIntrinsicCandidate
 147     public static byte[] toBytes(char[] value, int off, int len) {
 148         byte[] val = newBytesFor(len);
 149         for (int i = 0; i < len; i++) {
 150             putChar(val, i, value[off]);
 151             off++;
 152         }
 153         return val;
 154     }
 155 
 156     public static byte[] compress(char[] val, int off, int len) {
 157         byte[] ret = new byte[len];
 158         if (compress(val, off, ret, 0, len) == len) {
 159             return ret;
 160         }
 161         return null;
 162     }
 163 
 164     public static byte[] compress(byte[] val, int off, int len) {
 165         byte[] ret = new byte[len];
 166         if (compress(val, off, ret, 0, len) == len) {
 167             return ret;
 168         }
 169         return null;
 170     }
 171 
 172     // compressedCopy char[] -> byte[]
 173     @HotSpotIntrinsicCandidate
 174     public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
 175         for (int i = 0; i < len; i++) {
 176             char c = src[srcOff];
 177             if (c > 0xFF) {
 178                 len = 0;
 179                 break;
 180             }
 181             dst[dstOff] = (byte)c;
 182             srcOff++;
 183             dstOff++;
 184         }
 185         return len;
 186     }
 187 
 188     // compressedCopy byte[] -> byte[]
 189     @HotSpotIntrinsicCandidate
 190     public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 191         // We need a range check here because 'getChar' has no checks
 192         checkBoundsOffCount(srcOff, len, src);
 193         for (int i = 0; i < len; i++) {
 194             char c = getChar(src, srcOff);
 195             if (c > 0xFF) {
 196                 len = 0;
 197                 break;
 198             }
 199             dst[dstOff] = (byte)c;
 200             srcOff++;
 201             dstOff++;
 202         }
 203         return len;
 204     }
 205 
 206     public static byte[] toBytes(int[] val, int index, int len) {
 207         final int end = index + len;
 208         // Pass 1: Compute precise size of char[]
 209         int n = len;
 210         for (int i = index; i < end; i++) {
 211             int cp = val[i];
 212             if (Character.isBmpCodePoint(cp))
 213                 continue;
 214             else if (Character.isValidCodePoint(cp))
 215                 n++;
 216             else throw new IllegalArgumentException(Integer.toString(cp));
 217         }
 218         // Pass 2: Allocate and fill in <high, low> pair
 219         byte[] buf = newBytesFor(n);
 220         for (int i = index, j = 0; i < end; i++, j++) {
 221             int cp = val[i];
 222             if (Character.isBmpCodePoint(cp)) {
 223                 putChar(buf, j, cp);
 224             } else {
 225                 putChar(buf, j++, Character.highSurrogate(cp));
 226                 putChar(buf, j, Character.lowSurrogate(cp));
 227             }
 228         }
 229         return buf;
 230     }
 231 
 232     public static byte[] toBytes(char c) {
 233         byte[] result = new byte[2];
 234         putChar(result, 0, c);
 235         return result;
 236     }
 237 
 238     static byte[] toBytesSupplementary(int cp) {
 239         byte[] result = new byte[4];
 240         putChar(result, 0, Character.highSurrogate(cp));
 241         putChar(result, 1, Character.lowSurrogate(cp));
 242         return result;
 243     }
 244 
 245     @HotSpotIntrinsicCandidate
 246     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 247         // We need a range check here because 'getChar' has no checks
 248         if (srcBegin < srcEnd) {
 249             checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value);
 250         }
 251         for (int i = srcBegin; i < srcEnd; i++) {
 252             dst[dstBegin++] = getChar(value, i);
 253         }
 254     }
 255 
 256     /* @see java.lang.String.getBytes(int, int, byte[], int) */
 257     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 258         srcBegin <<= 1;
 259         srcEnd <<= 1;
 260         for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
 261             dst[dstBegin++] = value[i];
 262         }
 263     }
 264 
 265     @HotSpotIntrinsicCandidate
 266     public static boolean equals(byte[] value, byte[] other) {
 267         if (value.length == other.length) {
 268             int len = value.length >> 1;
 269             for (int i = 0; i < len; i++) {
 270                 if (getChar(value, i) != getChar(other, i)) {
 271                     return false;
 272                 }
 273             }
 274             return true;
 275         }
 276         return false;
 277     }
 278 
 279     @HotSpotIntrinsicCandidate
 280     public static int compareTo(byte[] value, byte[] other) {
 281         int len1 = length(value);
 282         int len2 = length(other);
 283         return compareValues(value, other, len1, len2);
 284     }
 285 
 286     /*
 287      * Checks the boundary and then compares the byte arrays.
 288      */
 289     public static int compareTo(byte[] value, byte[] other, int len1, int len2) {
 290         checkOffset(len1, value);
 291         checkOffset(len2, other);
 292 
 293         return compareValues(value, other, len1, len2);
 294     }
 295 
 296     private static int compareValues(byte[] value, byte[] other, int len1, int len2) {
 297         int lim = Math.min(len1, len2);
 298         for (int k = 0; k < lim; k++) {
 299             char c1 = getChar(value, k);
 300             char c2 = getChar(other, k);
 301             if (c1 != c2) {
 302                 return c1 - c2;
 303             }
 304         }
 305         return len1 - len2;
 306     }
 307 
 308     @HotSpotIntrinsicCandidate
 309     public static int compareToLatin1(byte[] value, byte[] other) {
 310         return -StringLatin1.compareToUTF16(other, value);
 311     }
 312 
 313     public static int compareToLatin1(byte[] value, byte[] other, int len1, int len2) {
 314         return -StringLatin1.compareToUTF16(other, value, len2, len1);
 315     }
 316 
 317     public static int compareToCI(byte[] value, byte[] other) {
 318         int len1 = length(value);
 319         int len2 = length(other);
 320         int lim = Math.min(len1, len2);
 321         for (int k = 0; k < lim; k++) {
 322             char c1 = getChar(value, k);
 323             char c2 = getChar(other, k);
 324             if (c1 != c2) {
 325                 c1 = Character.toUpperCase(c1);
 326                 c2 = Character.toUpperCase(c2);
 327                 if (c1 != c2) {
 328                     c1 = Character.toLowerCase(c1);
 329                     c2 = Character.toLowerCase(c2);
 330                     if (c1 != c2) {
 331                         return c1 - c2;
 332                     }
 333                 }
 334             }
 335         }
 336         return len1 - len2;
 337     }
 338 
 339     public static int compareToCI_Latin1(byte[] value, byte[] other) {
 340         return -StringLatin1.compareToCI_UTF16(other, value);
 341     }
 342 
 343     public static int hashCode(byte[] value) {
 344         int h = 0;
 345         int length = value.length >> 1;
 346         for (int i = 0; i < length; i++) {
 347             h = 31 * h + getChar(value, i);
 348         }
 349         return h;
 350     }
 351 
 352     public static int indexOf(byte[] value, int ch, int fromIndex) {
 353         int max = value.length >> 1;
 354         if (fromIndex < 0) {
 355             fromIndex = 0;
 356         } else if (fromIndex >= max) {
 357             // Note: fromIndex might be near -1>>>1.
 358             return -1;
 359         }
 360         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 361             // handle most cases here (ch is a BMP code point or a
 362             // negative value (invalid code point))
 363             return indexOfChar(value, ch, fromIndex, max);
 364         } else {
 365             return indexOfSupplementary(value, ch, fromIndex, max);
 366         }
 367     }
 368 
 369     @HotSpotIntrinsicCandidate
 370     public static int indexOf(byte[] value, byte[] str) {
 371         if (str.length == 0) {
 372             return 0;
 373         }
 374         if (value.length < str.length) {
 375             return -1;
 376         }
 377         return indexOfUnsafe(value, length(value), str, length(str), 0);
 378     }
 379 
 380     @HotSpotIntrinsicCandidate
 381     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 382         checkBoundsBeginEnd(fromIndex, valueCount, value);
 383         checkBoundsBeginEnd(0, strCount, str);
 384         return indexOfUnsafe(value, valueCount, str, strCount, fromIndex);
 385     }
 386 
 387 
 388     private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 389         assert fromIndex >= 0;
 390         assert strCount > 0;
 391         assert strCount <= length(str);
 392         assert valueCount >= strCount;
 393         char first = getChar(str, 0);
 394         int max = (valueCount - strCount);
 395         for (int i = fromIndex; i <= max; i++) {
 396             // Look for first character.
 397             if (getChar(value, i) != first) {
 398                 while (++i <= max && getChar(value, i) != first);
 399             }
 400             // Found first character, now look at the rest of value
 401             if (i <= max) {
 402                 int j = i + 1;
 403                 int end = j + strCount - 1;
 404                 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
 405                 if (j == end) {
 406                     // Found whole string.
 407                     return i;
 408                 }
 409             }
 410         }
 411         return -1;
 412     }
 413 
 414 
 415     /**
 416      * Handles indexOf Latin1 substring in UTF16 string.
 417      */
 418     @HotSpotIntrinsicCandidate
 419     public static int indexOfLatin1(byte[] value, byte[] str) {
 420         if (str.length == 0) {
 421             return 0;
 422         }
 423         if (length(value) < str.length) {
 424             return -1;
 425         }
 426         return indexOfLatin1Unsafe(value, length(value), str, str.length, 0);
 427     }
 428 
 429     @HotSpotIntrinsicCandidate
 430     public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 431         checkBoundsBeginEnd(fromIndex, srcCount, src);
 432         String.checkBoundsBeginEnd(0, tgtCount, tgt.length);
 433         return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex);
 434     }
 435 
 436     public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 437         assert fromIndex >= 0;
 438         assert tgtCount > 0;
 439         assert tgtCount <= tgt.length;
 440         assert srcCount >= tgtCount;
 441         char first = (char)(tgt[0] & 0xff);
 442         int max = (srcCount - tgtCount);
 443         for (int i = fromIndex; i <= max; i++) {
 444             // Look for first character.
 445             if (getChar(src, i) != first) {
 446                 while (++i <= max && getChar(src, i) != first);
 447             }
 448             // Found first character, now look at the rest of v2
 449             if (i <= max) {
 450                 int j = i + 1;
 451                 int end = j + tgtCount - 1;
 452                 for (int k = 1;
 453                      j < end && getChar(src, j) == (tgt[k] & 0xff);
 454                      j++, k++);
 455                 if (j == end) {
 456                     // Found whole string.
 457                     return i;
 458                 }
 459             }
 460         }
 461         return -1;
 462     }
 463 
 464     @HotSpotIntrinsicCandidate
 465     private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
 466         checkBoundsBeginEnd(fromIndex, max, value);
 467         return indexOfCharUnsafe(value, ch, fromIndex, max);
 468     }
 469 
 470     private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) {
 471         for (int i = fromIndex; i < max; i++) {
 472             if (getChar(value, i) == ch) {
 473                 return i;
 474             }
 475         }
 476         return -1;
 477     }
 478 
 479     /**
 480      * Handles (rare) calls of indexOf with a supplementary character.
 481      */
 482     private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
 483         if (Character.isValidCodePoint(ch)) {
 484             final char hi = Character.highSurrogate(ch);
 485             final char lo = Character.lowSurrogate(ch);
 486             checkBoundsBeginEnd(fromIndex, max, value);
 487             for (int i = fromIndex; i < max - 1; i++) {
 488                 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
 489                     return i;
 490                 }
 491             }
 492         }
 493         return -1;
 494     }
 495 
 496     // srcCoder == UTF16 && tgtCoder == UTF16
 497     public static int lastIndexOf(byte[] src, int srcCount,
 498                                   byte[] tgt, int tgtCount, int fromIndex) {
 499         assert fromIndex >= 0;
 500         assert tgtCount > 0;
 501         assert tgtCount <= length(tgt);
 502         int min = tgtCount - 1;
 503         int i = min + fromIndex;
 504         int strLastIndex = tgtCount - 1;
 505 
 506         checkIndex(strLastIndex, tgt);
 507         char strLastChar = getChar(tgt, strLastIndex);
 508 
 509         checkIndex(i, src);
 510 
 511     startSearchForLastChar:
 512         while (true) {
 513             while (i >= min && getChar(src, i) != strLastChar) {
 514                 i--;
 515             }
 516             if (i < min) {
 517                 return -1;
 518             }
 519             int j = i - 1;
 520             int start = j - strLastIndex;
 521             int k = strLastIndex - 1;
 522             while (j > start) {
 523                 if (getChar(src, j--) != getChar(tgt, k--)) {
 524                     i--;
 525                     continue startSearchForLastChar;
 526                 }
 527             }
 528             return start + 1;
 529         }
 530     }
 531 
 532     public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
 533         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 534             // handle most cases here (ch is a BMP code point or a
 535             // negative value (invalid code point))
 536             int i = Math.min(fromIndex, (value.length >> 1) - 1);
 537             for (; i >= 0; i--) {
 538                 if (getChar(value, i) == ch) {
 539                     return i;
 540                 }
 541             }
 542             return -1;
 543         } else {
 544             return lastIndexOfSupplementary(value, ch, fromIndex);
 545         }
 546     }
 547 
 548     /**
 549      * Handles (rare) calls of lastIndexOf with a supplementary character.
 550      */
 551     private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
 552         if (Character.isValidCodePoint(ch)) {
 553             char hi = Character.highSurrogate(ch);
 554             char lo = Character.lowSurrogate(ch);
 555             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 556             for (; i >= 0; i--) {
 557                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 558                     return i;
 559                 }
 560             }
 561         }
 562         return -1;
 563     }
 564 
 565     public static String replace(byte[] value, char oldChar, char newChar) {
 566         int len = value.length >> 1;
 567         int i = -1;
 568         while (++i < len) {
 569             if (getChar(value, i) == oldChar) {
 570                 break;
 571             }
 572         }
 573         if (i < len) {
 574             byte buf[] = new byte[value.length];
 575             for (int j = 0; j < i; j++) {
 576                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 577             }
 578             while (i < len) {
 579                 char c = getChar(value, i);
 580                 putChar(buf, i, c == oldChar ? newChar : c);
 581                 i++;
 582            }
 583            // Check if we should try to compress to latin1
 584            if (String.COMPACT_STRINGS &&
 585                !StringLatin1.canEncode(oldChar) &&
 586                StringLatin1.canEncode(newChar)) {
 587                byte[] val = compress(buf, 0, len);
 588                if (val != null) {
 589                    return new String(val, LATIN1);
 590                }
 591            }
 592            return new String(buf, UTF16);
 593         }
 594         return null;
 595     }
 596 
 597     public static boolean regionMatchesCI(byte[] value, int toffset,
 598                                           byte[] other, int ooffset, int len) {
 599         int last = toffset + len;
 600         assert toffset >= 0 && ooffset >= 0;
 601         assert ooffset + len <= length(other);
 602         assert last <= length(value);
 603         while (toffset < last) {
 604             char c1 = getChar(value, toffset++);
 605             char c2 = getChar(other, ooffset++);
 606             if (c1 == c2) {
 607                 continue;
 608             }
 609             // try converting both characters to uppercase.
 610             // If the results match, then the comparison scan should
 611             // continue.
 612             char u1 = Character.toUpperCase(c1);
 613             char u2 = Character.toUpperCase(c2);
 614             if (u1 == u2) {
 615                 continue;
 616             }
 617             // Unfortunately, conversion to uppercase does not work properly
 618             // for the Georgian alphabet, which has strange rules about case
 619             // conversion.  So we need to make one last check before
 620             // exiting.
 621             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 622                 continue;
 623             }
 624             return false;
 625         }
 626         return true;
 627     }
 628 
 629     public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
 630                                                  byte[] other, int ooffset,
 631                                                  int len) {
 632         return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len);
 633     }
 634 
 635     public static String toLowerCase(String str, byte[] value, Locale locale) {
 636         if (locale == null) {
 637             throw new NullPointerException();
 638         }
 639         int first;
 640         boolean hasSurr = false;
 641         final int len = value.length >> 1;
 642 
 643         // Now check if there are any characters that need to be changed, or are surrogate
 644         for (first = 0 ; first < len; first++) {
 645             int cp = (int)getChar(value, first);
 646             if (Character.isSurrogate((char)cp)) {
 647                 hasSurr = true;
 648                 break;
 649             }
 650             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 651                 break;
 652             }
 653         }
 654         if (first == len)
 655             return str;
 656         byte[] result = new byte[value.length];
 657         System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
 658                                                             // lowerCase characters.
 659         String lang = locale.getLanguage();
 660         if (lang == "tr" || lang == "az" || lang == "lt") {
 661             return toLowerCaseEx(str, value, result, first, locale, true);
 662         }
 663         if (hasSurr) {
 664             return toLowerCaseEx(str, value, result, first, locale, false);
 665         }
 666         int bits = 0;
 667         for (int i = first; i < len; i++) {
 668             int cp = (int)getChar(value, i);
 669             if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
 670                 Character.isSurrogate((char)cp)) {
 671                 return toLowerCaseEx(str, value, result, i, locale, false);
 672             }
 673             if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
 674                 return toLowerCaseEx(str, value, result, i, locale, true);
 675             }
 676             cp = Character.toLowerCase(cp);
 677             if (!Character.isBmpCodePoint(cp)) {
 678                 return toLowerCaseEx(str, value, result, i, locale, false);
 679             }
 680             bits |= cp;
 681             putChar(result, i, cp);
 682         }
 683         if (bits > 0xFF) {
 684             return new String(result, UTF16);
 685         } else {
 686             return newString(result, 0, len);
 687         }
 688     }
 689 
 690     private static String toLowerCaseEx(String str, byte[] value,
 691                                         byte[] result, int first, Locale locale,
 692                                         boolean localeDependent) {
 693         assert(result.length == value.length);
 694         assert(first >= 0);
 695         int resultOffset = first;
 696         int length = value.length >> 1;
 697         int srcCount;
 698         for (int i = first; i < length; i += srcCount) {
 699             int srcChar = getChar(value, i);
 700             int lowerChar;
 701             char[] lowerCharArray;
 702             srcCount = 1;
 703             if (Character.isSurrogate((char)srcChar)) {
 704                 srcChar = codePointAt(value, i, length);
 705                 srcCount = Character.charCount(srcChar);
 706             }
 707             if (localeDependent ||
 708                 srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
 709                 srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
 710                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 711             } else {
 712                 lowerChar = Character.toLowerCase(srcChar);
 713             }
 714             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 715                 putChar(result, resultOffset++, lowerChar);
 716             } else {
 717                 if (lowerChar == Character.ERROR) {
 718                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 719                 } else {
 720                     lowerCharArray = Character.toChars(lowerChar);
 721                 }
 722                 /* Grow result if needed */
 723                 int mapLen = lowerCharArray.length;
 724                 if (mapLen > srcCount) {
 725                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 726                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 727                     result = result2;
 728                 }
 729                 assert resultOffset >= 0;
 730                 assert resultOffset + mapLen <= length(result);
 731                 for (int x = 0; x < mapLen; ++x) {
 732                     putChar(result, resultOffset++, lowerCharArray[x]);
 733                 }
 734             }
 735         }
 736         return newString(result, 0, resultOffset);
 737     }
 738 
 739     public static String toUpperCase(String str, byte[] value, Locale locale) {
 740         if (locale == null) {
 741             throw new NullPointerException();
 742         }
 743         int first;
 744         boolean hasSurr = false;
 745         final int len = value.length >> 1;
 746 
 747         // Now check if there are any characters that need to be changed, or are surrogate
 748         for (first = 0 ; first < len; first++) {
 749             int cp = (int)getChar(value, first);
 750             if (Character.isSurrogate((char)cp)) {
 751                 hasSurr = true;
 752                 break;
 753             }
 754             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 755                 break;
 756             }
 757         }
 758         if (first == len) {
 759             return str;
 760         }
 761         byte[] result = new byte[value.length];
 762         System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
 763                                                            // upperCase characters.
 764         String lang = locale.getLanguage();
 765         if (lang == "tr" || lang == "az" || lang == "lt") {
 766             return toUpperCaseEx(str, value, result, first, locale, true);
 767         }
 768         if (hasSurr) {
 769             return toUpperCaseEx(str, value, result, first, locale, false);
 770         }
 771         int bits = 0;
 772         for (int i = first; i < len; i++) {
 773             int cp = (int)getChar(value, i);
 774             if (Character.isSurrogate((char)cp)) {
 775                 return toUpperCaseEx(str, value, result, i, locale, false);
 776             }
 777             cp = Character.toUpperCaseEx(cp);
 778             if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
 779                 return toUpperCaseEx(str, value, result, i, locale, false);
 780             }
 781             bits |= cp;
 782             putChar(result, i, cp);
 783         }
 784         if (bits > 0xFF) {
 785             return new String(result, UTF16);
 786         } else {
 787             return newString(result, 0, len);
 788         }
 789     }
 790 
 791     private static String toUpperCaseEx(String str, byte[] value,
 792                                         byte[] result, int first,
 793                                         Locale locale, boolean localeDependent)
 794     {
 795         assert(result.length == value.length);
 796         assert(first >= 0);
 797         int resultOffset = first;
 798         int length = value.length >> 1;
 799         int srcCount;
 800         for (int i = first; i < length; i += srcCount) {
 801             int srcChar = getChar(value, i);
 802             int upperChar;
 803             char[] upperCharArray;
 804             srcCount = 1;
 805             if (Character.isSurrogate((char)srcChar)) {
 806                 srcChar = codePointAt(value, i, length);
 807                 srcCount = Character.charCount(srcChar);
 808             }
 809             if (localeDependent) {
 810                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 811             } else {
 812                 upperChar = Character.toUpperCaseEx(srcChar);
 813             }
 814             if (Character.isBmpCodePoint(upperChar)) {
 815                 putChar(result, resultOffset++, upperChar);
 816             } else {
 817                 if (upperChar == Character.ERROR) {
 818                     if (localeDependent) {
 819                         upperCharArray =
 820                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 821                     } else {
 822                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 823                     }
 824                 } else {
 825                     upperCharArray = Character.toChars(upperChar);
 826                 }
 827                 /* Grow result if needed */
 828                 int mapLen = upperCharArray.length;
 829                 if (mapLen > srcCount) {
 830                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 831                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 832                     result = result2;
 833                 }
 834                 assert resultOffset >= 0;
 835                 assert resultOffset + mapLen <= length(result);
 836                 for (int x = 0; x < mapLen; ++x) {
 837                     putChar(result, resultOffset++, upperCharArray[x]);
 838                 }
 839             }
 840         }
 841         return newString(result, 0, resultOffset);
 842     }
 843 
 844     public static String trim(byte[] value) {
 845         int length = value.length >> 1;
 846         int len = length;
 847         int st = 0;
 848         while (st < len && getChar(value, st) <= ' ') {
 849             st++;
 850         }
 851         while (st < len && getChar(value, len - 1) <= ' ') {
 852             len--;
 853         }
 854         return ((st > 0) || (len < length )) ?
 855             new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
 856             null;
 857     }
 858 
 859     private static void putChars(byte[] val, int index, char[] str, int off, int end) {
 860         while (off < end) {
 861             putChar(val, index++, str[off++]);
 862         }
 863     }
 864 
 865     public static String newString(byte[] val, int index, int len) {
 866         if (String.COMPACT_STRINGS) {
 867             byte[] buf = compress(val, index, len);
 868             if (buf != null) {
 869                 return new String(buf, LATIN1);
 870             }
 871         }
 872         int last = index + len;
 873         return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
 874     }
 875 
 876     public static void fillNull(byte[] val, int index, int end) {
 877         Arrays.fill(val, index << 1, end << 1, (byte)0);
 878     }
 879 
 880     static class CharsSpliterator implements Spliterator.OfInt {
 881         private final byte[] array;
 882         private int index;        // current index, modified on advance/split
 883         private final int fence;  // one past last index
 884         private final int cs;
 885 
 886         CharsSpliterator(byte[] array, int acs) {
 887             this(array, 0, array.length >> 1, acs);
 888         }
 889 
 890         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 891             this.array = array;
 892             this.index = origin;
 893             this.fence = fence;
 894             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 895                       | Spliterator.SUBSIZED;
 896         }
 897 
 898         @Override
 899         public OfInt trySplit() {
 900             int lo = index, mid = (lo + fence) >>> 1;
 901             return (lo >= mid)
 902                    ? null
 903                    : new CharsSpliterator(array, lo, index = mid, cs);
 904         }
 905 
 906         @Override
 907         public void forEachRemaining(IntConsumer action) {
 908             byte[] a; int i, hi; // hoist accesses and checks from loop
 909             if (action == null)
 910                 throw new NullPointerException();
 911             if (((a = array).length >> 1) >= (hi = fence) &&
 912                 (i = index) >= 0 && i < (index = hi)) {
 913                 do {
 914                     action.accept(charAt(a, i));
 915                 } while (++i < hi);
 916             }
 917         }
 918 
 919         @Override
 920         public boolean tryAdvance(IntConsumer action) {
 921             if (action == null)
 922                 throw new NullPointerException();
 923             int i = index;
 924             if (i >= 0 && i < fence) {
 925                 action.accept(charAt(array, i));
 926                 index++;
 927                 return true;
 928             }
 929             return false;
 930         }
 931 
 932         @Override
 933         public long estimateSize() { return (long)(fence - index); }
 934 
 935         @Override
 936         public int characteristics() {
 937             return cs;
 938         }
 939     }
 940 
 941     static class CodePointsSpliterator implements Spliterator.OfInt {
 942         private final byte[] array;
 943         private int index;        // current index, modified on advance/split
 944         private final int fence;  // one past last index
 945         private final int cs;
 946 
 947         CodePointsSpliterator(byte[] array, int acs) {
 948             this(array, 0, array.length >> 1, acs);
 949         }
 950 
 951         CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
 952             this.array = array;
 953             this.index = origin;
 954             this.fence = fence;
 955             this.cs = acs | Spliterator.ORDERED;
 956         }
 957 
 958         @Override
 959         public OfInt trySplit() {
 960             int lo = index, mid = (lo + fence) >>> 1;
 961             if (lo >= mid)
 962                 return null;
 963 
 964             int midOneLess;
 965             // If the mid-point intersects a surrogate pair
 966             if (Character.isLowSurrogate(charAt(array, mid)) &&
 967                 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) {
 968                 // If there is only one pair it cannot be split
 969                 if (lo >= midOneLess)
 970                     return null;
 971                 // Shift the mid-point to align with the surrogate pair
 972                 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
 973             }
 974             return new CodePointsSpliterator(array, lo, index = mid, cs);
 975         }
 976 
 977         @Override
 978         public void forEachRemaining(IntConsumer action) {
 979             byte[] a; int i, hi; // hoist accesses and checks from loop
 980             if (action == null)
 981                 throw new NullPointerException();
 982             if (((a = array).length >> 1) >= (hi = fence) &&
 983                 (i = index) >= 0 && i < (index = hi)) {
 984                 do {
 985                     i = advance(a, i, hi, action);
 986                 } while (i < hi);
 987             }
 988         }
 989 
 990         @Override
 991         public boolean tryAdvance(IntConsumer action) {
 992             if (action == null)
 993                 throw new NullPointerException();
 994             if (index >= 0 && index < fence) {
 995                 index = advance(array, index, fence, action);
 996                 return true;
 997             }
 998             return false;
 999         }
1000 
1001         // Advance one code point from the index, i, and return the next
1002         // index to advance from
1003         private static int advance(byte[] a, int i, int hi, IntConsumer action) {
1004             char c1 = charAt(a, i++);
1005             int cp = c1;
1006             if (Character.isHighSurrogate(c1) && i < hi) {
1007                 char c2 = charAt(a, i);
1008                 if (Character.isLowSurrogate(c2)) {
1009                     i++;
1010                     cp = Character.toCodePoint(c1, c2);
1011                 }
1012             }
1013             action.accept(cp);
1014             return i;
1015         }
1016 
1017         @Override
1018         public long estimateSize() { return (long)(fence - index); }
1019 
1020         @Override
1021         public int characteristics() {
1022             return cs;
1023         }
1024     }
1025 
1026     ////////////////////////////////////////////////////////////////
1027 
1028     public static void putCharSB(byte[] val, int index, int c) {
1029         checkIndex(index, val);
1030         putChar(val, index, c);
1031     }
1032 
1033     public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
1034         checkBoundsBeginEnd(index, index + end - off, val);
1035         putChars(val, index, ca, off, end);
1036     }
1037 
1038     public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
1039         checkBoundsBeginEnd(index, index + end - off, val);
1040         for (int i = off; i < end; i++) {
1041             putChar(val, index++, s.charAt(i));
1042         }
1043     }
1044 
1045     public static int codePointAtSB(byte[] val, int index, int end) {
1046         return codePointAt(val, index, end, true /* checked */);
1047     }
1048 
1049     public static int codePointBeforeSB(byte[] val, int index) {
1050         return codePointBefore(val, index, true /* checked */);
1051     }
1052 
1053     public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
1054         return codePointCount(val, beginIndex, endIndex, true /* checked */);
1055     }
1056 
1057     public static int getChars(int i, int begin, int end, byte[] value) {
1058         checkBoundsBeginEnd(begin, end, value);
1059         int pos = getChars(i, end, value);
1060         assert begin == pos;
1061         return pos;
1062     }
1063 
1064     public static int getChars(long l, int begin, int end, byte[] value) {
1065         checkBoundsBeginEnd(begin, end, value);
1066         int pos = getChars(l, end, value);
1067         assert begin == pos;
1068         return pos;
1069     }
1070 
1071     public static boolean contentEquals(byte[] v1, byte[] v2, int len) {
1072         checkBoundsOffCount(0, len, v2);
1073         for (int i = 0; i < len; i++) {
1074             if ((char)(v1[i] & 0xff) != getChar(v2, i)) {
1075                 return false;
1076             }
1077         }
1078         return true;
1079     }
1080 
1081     public static boolean contentEquals(byte[] value, CharSequence cs, int len) {
1082         checkOffset(len, value);
1083         for (int i = 0; i < len; i++) {
1084             if (getChar(value, i) != cs.charAt(i)) {
1085                 return false;
1086             }
1087         }
1088         return true;
1089     }
1090 
1091     public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
1092         int end = i + 4;
1093         checkBoundsBeginEnd(i, end, value);
1094         putChar(value, i++, c1);
1095         putChar(value, i++, c2);
1096         putChar(value, i++, c3);
1097         putChar(value, i++, c4);
1098         assert(i == end);
1099         return end;
1100     }
1101 
1102     public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
1103         int end = i + 5;
1104         checkBoundsBeginEnd(i, end, value);
1105         putChar(value, i++, c1);
1106         putChar(value, i++, c2);
1107         putChar(value, i++, c3);
1108         putChar(value, i++, c4);
1109         putChar(value, i++, c5);
1110         assert(i == end);
1111         return end;
1112     }
1113 
1114     public static char charAt(byte[] value, int index) {
1115         checkIndex(index, value);
1116         return getChar(value, index);
1117     }
1118 
1119     public static void reverse(byte[] val, int count) {
1120         checkOffset(count, val);
1121         int n = count - 1;
1122         boolean hasSurrogates = false;
1123         for (int j = (n-1) >> 1; j >= 0; j--) {
1124             int k = n - j;
1125             char cj = getChar(val, j);
1126             char ck = getChar(val, k);
1127             putChar(val, j, ck);
1128             putChar(val, k, cj);
1129             if (Character.isSurrogate(cj) ||
1130                 Character.isSurrogate(ck)) {
1131                 hasSurrogates = true;
1132             }
1133         }
1134         if (hasSurrogates) {
1135             reverseAllValidSurrogatePairs(val, count);
1136         }
1137     }
1138 
1139     /** Outlined helper method for reverse() */
1140     private static void reverseAllValidSurrogatePairs(byte[] val, int count) {
1141         for (int i = 0; i < count - 1; i++) {
1142             char c2 = getChar(val, i);
1143             if (Character.isLowSurrogate(c2)) {
1144                 char c1 = getChar(val, i + 1);
1145                 if (Character.isHighSurrogate(c1)) {
1146                     putChar(val, i++, c1);
1147                     putChar(val, i, c2);
1148                 }
1149             }
1150         }
1151     }
1152 
1153     // inflatedCopy byte[] -> byte[]
1154     public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
1155         // We need a range check here because 'putChar' has no checks
1156         checkBoundsOffCount(dstOff, len, dst);
1157         for (int i = 0; i < len; i++) {
1158             putChar(dst, dstOff++, src[srcOff++] & 0xff);
1159         }
1160     }
1161 
1162     // srcCoder == UTF16 && tgtCoder == LATIN1
1163     public static int lastIndexOfLatin1(byte[] src, int srcCount,
1164                                         byte[] tgt, int tgtCount, int fromIndex) {
1165         assert fromIndex >= 0;
1166         assert tgtCount > 0;
1167         assert tgtCount <= tgt.length;
1168         int min = tgtCount - 1;
1169         int i = min + fromIndex;
1170         int strLastIndex = tgtCount - 1;
1171 
1172         char strLastChar = (char)(tgt[strLastIndex] & 0xff);
1173 
1174         checkIndex(i, src);
1175 
1176     startSearchForLastChar:
1177         while (true) {
1178             while (i >= min && getChar(src, i) != strLastChar) {
1179                 i--;
1180             }
1181             if (i < min) {
1182                 return -1;
1183             }
1184             int j = i - 1;
1185             int start = j - strLastIndex;
1186             int k = strLastIndex - 1;
1187             while (j > start) {
1188                 if (getChar(src, j--) != (tgt[k--] & 0xff)) {
1189                     i--;
1190                     continue startSearchForLastChar;
1191                 }
1192             }
1193             return start + 1;
1194         }
1195     }
1196 
1197     ////////////////////////////////////////////////////////////////
1198 
1199     private static native boolean isBigEndian();
1200 
1201     static final int HI_BYTE_SHIFT;
1202     static final int LO_BYTE_SHIFT;
1203     static {
1204         if (isBigEndian()) {
1205             HI_BYTE_SHIFT = 8;
1206             LO_BYTE_SHIFT = 0;
1207         } else {
1208             HI_BYTE_SHIFT = 0;
1209             LO_BYTE_SHIFT = 8;
1210         }
1211     }
1212 
1213     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
1214 
1215     // Used by trusted callers.  Assumes all necessary bounds checks have
1216     // been done by the caller.
1217 
1218     /**
1219      * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for
1220      * UTF-16 coder.
1221      *
1222      * @param i     value to convert
1223      * @param index next index, after the least significant digit
1224      * @param buf   target buffer, UTF16-coded.
1225      * @return index of the most significant digit or minus sign, if present
1226      */
1227     static int getChars(int i, int index, byte[] buf) {
1228         int q, r;
1229         int charPos = index;
1230 
1231         boolean negative = (i < 0);
1232         if (!negative) {
1233             i = -i;
1234         }
1235 
1236         // Get 2 digits/iteration using ints
1237         while (i <= -100) {
1238             q = i / 100;
1239             r = (q * 100) - i;
1240             i = q;
1241             putChar(buf, --charPos, Integer.DigitOnes[r]);
1242             putChar(buf, --charPos, Integer.DigitTens[r]);
1243         }
1244 
1245         // We know there are at most two digits left at this point.
1246         q = i / 10;
1247         r = (q * 10) - i;
1248         putChar(buf, --charPos, '0' + r);
1249 
1250         // Whatever left is the remaining digit.
1251         if (q < 0) {
1252             putChar(buf, --charPos, '0' - q);
1253         }
1254 
1255         if (negative) {
1256             putChar(buf, --charPos, '-');
1257         }
1258         return charPos;
1259     }
1260 
1261     /**
1262      * This is a variant of {@link Long#getChars(long, int, byte[])}, but for
1263      * UTF-16 coder.
1264      *
1265      * @param i     value to convert
1266      * @param index next index, after the least significant digit
1267      * @param buf   target buffer, UTF16-coded.
1268      * @return index of the most significant digit or minus sign, if present
1269      */
1270     static int getChars(long i, int index, byte[] buf) {
1271         long q;
1272         int r;
1273         int charPos = index;
1274 
1275         boolean negative = (i < 0);
1276         if (!negative) {
1277             i = -i;
1278         }
1279 
1280         // Get 2 digits/iteration using longs until quotient fits into an int
1281         while (i <= Integer.MIN_VALUE) {
1282             q = i / 100;
1283             r = (int)((q * 100) - i);
1284             i = q;
1285             putChar(buf, --charPos, Integer.DigitOnes[r]);
1286             putChar(buf, --charPos, Integer.DigitTens[r]);
1287         }
1288 
1289         // Get 2 digits/iteration using ints
1290         int q2;
1291         int i2 = (int)i;
1292         while (i2 <= -100) {
1293             q2 = i2 / 100;
1294             r  = (q2 * 100) - i2;
1295             i2 = q2;
1296             putChar(buf, --charPos, Integer.DigitOnes[r]);
1297             putChar(buf, --charPos, Integer.DigitTens[r]);
1298         }
1299 
1300         // We know there are at most two digits left at this point.
1301         q2 = i2 / 10;
1302         r  = (q2 * 10) - i2;
1303         putChar(buf, --charPos, '0' + r);
1304 
1305         // Whatever left is the remaining digit.
1306         if (q2 < 0) {
1307             putChar(buf, --charPos, '0' - q2);
1308         }
1309 
1310         if (negative) {
1311             putChar(buf, --charPos, '-');
1312         }
1313         return charPos;
1314     }
1315     // End of trusted methods.
1316 
1317     public static void checkIndex(int off, byte[] val) {
1318         String.checkIndex(off, length(val));
1319     }
1320 
1321     public static void checkOffset(int off, byte[] val) {
1322         String.checkOffset(off, length(val));
1323     }
1324 
1325     public static void checkBoundsBeginEnd(int begin, int end, byte[] val) {
1326         String.checkBoundsBeginEnd(begin, end, length(val));
1327     }
1328 
1329     public static void checkBoundsOffCount(int offset, int count, byte[] val) {
1330         String.checkBoundsOffCount(offset, count, length(val));
1331     }
1332 
1333 }