Old src/java.base/share/classes/java/lang/StringUTF16.java

   1 /*
   2  * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Spliterator;
  31 import java.util.function.IntConsumer;
  32 import jdk.internal.HotSpotIntrinsicCandidate;
  33 import jdk.internal.vm.annotation.ForceInline;
  34 import jdk.internal.vm.annotation.DontInline;
  35 
  36 import static java.lang.String.UTF16;
  37 import static java.lang.String.LATIN1;
  38 
  39 final class StringUTF16 {
  40 
  41     public static byte[] newBytesFor(int len) {
  42         if (len < 0) {
  43             throw new NegativeArraySizeException();
  44         }
  45         if (len > MAX_LENGTH) {
  46             throw new OutOfMemoryError("UTF16 String size is " + len +
  47                                        ", should be less than " + MAX_LENGTH);
  48         }
  49         return new byte[len << 1];
  50     }
  51 
  52     @HotSpotIntrinsicCandidate
  53     // intrinsic performs no bounds checks
  54     static void putChar(byte[] val, int index, int c) {
  55         assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
  56         index <<= 1;
  57         val[index++] = (byte)(c >> HI_BYTE_SHIFT);
  58         val[index]   = (byte)(c >> LO_BYTE_SHIFT);
  59     }
  60 
  61     @HotSpotIntrinsicCandidate
  62     // intrinsic performs no bounds checks
  63     static char getChar(byte[] val, int index) {
  64         assert index >= 0 && index < length(val) : "Trusted caller missed bounds check";
  65         index <<= 1;
  66         return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
  67                       ((val[index]   & 0xff) << LO_BYTE_SHIFT));
  68     }
  69 
  70     public static int length(byte[] value) {
  71         return value.length >> 1;
  72     }
  73 
  74     private static int codePointAt(byte[] value, int index, int end, boolean checked) {
  75         assert index < end;
  76         if (checked) {
  77             checkIndex(index, value);
  78         }
  79         char c1 = getChar(value, index);
  80         if (Character.isHighSurrogate(c1) && ++index < end) {
  81             if (checked) {
  82                 checkIndex(index, value);
  83             }
  84             char c2 = getChar(value, index);
  85             if (Character.isLowSurrogate(c2)) {
  86                return Character.toCodePoint(c1, c2);
  87             }
  88         }
  89         return c1;
  90     }
  91 
  92     public static int codePointAt(byte[] value, int index, int end) {
  93        return codePointAt(value, index, end, false /* unchecked */);
  94     }
  95 
  96     private static int codePointBefore(byte[] value, int index, boolean checked) {
  97         --index;
  98         if (checked) {
  99             checkIndex(index, value);
 100         }
 101         char c2 = getChar(value, index);
 102         if (Character.isLowSurrogate(c2) && index > 0) {
 103             --index;
 104             if (checked) {
 105                 checkIndex(index, value);
 106             }
 107             char c1 = getChar(value, index);
 108             if (Character.isHighSurrogate(c1)) {
 109                return Character.toCodePoint(c1, c2);
 110             }
 111         }
 112         return c2;
 113     }
 114 
 115     public static int codePointBefore(byte[] value, int index) {
 116         return codePointBefore(value, index, false /* unchecked */);
 117     }
 118 
 119     private static int codePointCount(byte[] value, int beginIndex, int endIndex, boolean checked) {
 120         assert beginIndex <= endIndex;
 121         int count = endIndex - beginIndex;
 122         int i = beginIndex;
 123         if (checked && i < endIndex) {
 124             checkBoundsBeginEnd(i, endIndex, value);
 125         }
 126         for (; i < endIndex - 1; ) {
 127             if (Character.isHighSurrogate(getChar(value, i++)) &&
 128                 Character.isLowSurrogate(getChar(value, i))) {
 129                 count--;
 130                 i++;
 131             }
 132         }
 133         return count;
 134     }
 135 
 136     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
 137         return codePointCount(value, beginIndex, endIndex, false /* unchecked */);
 138     }
 139 
 140     public static char[] toChars(byte[] value) {
 141         char[] dst = new char[value.length >> 1];
 142         getChars(value, 0, dst.length, dst, 0);
 143         return dst;
 144     }
 145 
 146     @HotSpotIntrinsicCandidate
 147     public static byte[] toBytes(char[] value, int off, int len) {
 148         byte[] val = newBytesFor(len);
 149         for (int i = 0; i < len; i++) {
 150             putChar(val, i, value[off]);
 151             off++;
 152         }
 153         return val;
 154     }
 155 
 156     public static byte[] compress(char[] val, int off, int len) {
 157         byte[] ret = new byte[len];
 158         if (compress(val, off, ret, 0, len) == len) {
 159             return ret;
 160         }
 161         return null;
 162     }
 163 
 164     public static byte[] compress(byte[] val, int off, int len) {
 165         byte[] ret = new byte[len];
 166         if (compress(val, off, ret, 0, len) == len) {
 167             return ret;
 168         }
 169         return null;
 170     }
 171 
 172     // compressedCopy char[] -> byte[]
 173     @HotSpotIntrinsicCandidate
 174     public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
 175         for (int i = 0; i < len; i++) {
 176             char c = src[srcOff];
 177             if (c > 0xFF) {
 178                 len = 0;
 179                 break;
 180             }
 181             dst[dstOff] = (byte)c;
 182             srcOff++;
 183             dstOff++;
 184         }
 185         return len;
 186     }
 187 
 188     // compressedCopy byte[] -> byte[]
 189     @HotSpotIntrinsicCandidate
 190     public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 191         // We need a range check here because 'getChar' has no checks
 192         checkBoundsOffCount(srcOff, len, src);
 193         for (int i = 0; i < len; i++) {
 194             char c = getChar(src, srcOff);
 195             if (c > 0xFF) {
 196                 len = 0;
 197                 break;
 198             }
 199             dst[dstOff] = (byte)c;
 200             srcOff++;
 201             dstOff++;
 202         }
 203         return len;
 204     }
 205 
 206     public static byte[] toBytes(int[] val, int index, int len) {
 207         final int end = index + len;
 208         // Pass 1: Compute precise size of char[]
 209         int n = len;
 210         for (int i = index; i < end; i++) {
 211             int cp = val[i];
 212             if (Character.isBmpCodePoint(cp))
 213                 continue;
 214             else if (Character.isValidCodePoint(cp))
 215                 n++;
 216             else throw new IllegalArgumentException(Integer.toString(cp));
 217         }
 218         // Pass 2: Allocate and fill in <high, low> pair
 219         byte[] buf = newBytesFor(n);
 220         for (int i = index, j = 0; i < end; i++, j++) {
 221             int cp = val[i];
 222             if (Character.isBmpCodePoint(cp)) {
 223                 putChar(buf, j, cp);
 224             } else {
 225                 putChar(buf, j++, Character.highSurrogate(cp));
 226                 putChar(buf, j, Character.lowSurrogate(cp));
 227             }
 228         }
 229         return buf;
 230     }
 231 
 232     public static byte[] toBytes(char c) {
 233         byte[] result = new byte[2];
 234         putChar(result, 0, c);
 235         return result;
 236     }
 237 
 238     @HotSpotIntrinsicCandidate
 239     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 240         // We need a range check here because 'getChar' has no checks
 241         if (srcBegin < srcEnd) {
 242             checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value);
 243         }
 244         for (int i = srcBegin; i < srcEnd; i++) {
 245             dst[dstBegin++] = getChar(value, i);
 246         }
 247     }
 248 
 249     /* @see java.lang.String.getBytes(int, int, byte[], int) */
 250     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 251         srcBegin <<= 1;
 252         srcEnd <<= 1;
 253         for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
 254             dst[dstBegin++] = value[i];
 255         }
 256     }
 257 
 258     @HotSpotIntrinsicCandidate
 259     public static boolean equals(byte[] value, byte[] other) {
 260         if (value.length == other.length) {
 261             int len = value.length >> 1;
 262             for (int i = 0; i < len; i++) {
 263                 if (getChar(value, i) != getChar(other, i)) {
 264                     return false;
 265                 }
 266             }
 267             return true;
 268         }
 269         return false;
 270     }
 271 
 272     @HotSpotIntrinsicCandidate
 273     public static int compareTo(byte[] value, byte[] other) {
 274         int len1 = length(value);
 275         int len2 = length(other);
 276         int lim = Math.min(len1, len2);
 277         for (int k = 0; k < lim; k++) {
 278             char c1 = getChar(value, k);
 279             char c2 = getChar(other, k);
 280             if (c1 != c2) {
 281                 return c1 - c2;
 282             }
 283         }
 284         return len1 - len2;
 285     }
 286 
 287     @HotSpotIntrinsicCandidate
 288     public static int compareToLatin1(byte[] value, byte[] other) {
 289         return -StringLatin1.compareToUTF16(other, value);
 290     }
 291 
 292     public static int compareToCI(byte[] value, byte[] other) {
 293         int len1 = length(value);
 294         int len2 = length(other);
 295         int lim = Math.min(len1, len2);
 296         for (int k = 0; k < lim; k++) {
 297             char c1 = getChar(value, k);
 298             char c2 = getChar(other, k);
 299             if (c1 != c2) {
 300                 c1 = Character.toUpperCase(c1);
 301                 c2 = Character.toUpperCase(c2);
 302                 if (c1 != c2) {
 303                     c1 = Character.toLowerCase(c1);
 304                     c2 = Character.toLowerCase(c2);
 305                     if (c1 != c2) {
 306                         return c1 - c2;
 307                     }
 308                 }
 309             }
 310         }
 311         return len1 - len2;
 312     }
 313 
 314     public static int compareToCI_Latin1(byte[] value, byte[] other) {
 315         return -StringLatin1.compareToCI_UTF16(other, value);
 316     }
 317 
 318     public static int hashCode(byte[] value) {
 319         int h = 0;
 320         int length = value.length >> 1;
 321         for (int i = 0; i < length; i++) {
 322             h = 31 * h + getChar(value, i);
 323         }
 324         return h;
 325     }
 326 
 327     public static int indexOf(byte[] value, int ch, int fromIndex) {
 328         int max = value.length >> 1;
 329         if (fromIndex < 0) {
 330             fromIndex = 0;
 331         } else if (fromIndex >= max) {
 332             // Note: fromIndex might be near -1>>>1.
 333             return -1;
 334         }
 335         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 336             // handle most cases here (ch is a BMP code point or a
 337             // negative value (invalid code point))
 338             return indexOfChar(value, ch, fromIndex, max);
 339         } else {
 340             return indexOfSupplementary(value, ch, fromIndex, max);
 341         }
 342     }
 343 
 344     @HotSpotIntrinsicCandidate
 345     public static int indexOf(byte[] value, byte[] str) {
 346         if (str.length == 0) {
 347             return 0;
 348         }
 349         if (value.length < str.length) {
 350             return -1;
 351         }
 352         return indexOfUnsafe(value, length(value), str, length(str), 0);
 353     }
 354 
 355     @HotSpotIntrinsicCandidate
 356     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 357         checkBoundsBeginEnd(fromIndex, valueCount, value);
 358         checkBoundsBeginEnd(0, strCount, str);
 359         return indexOfUnsafe(value, valueCount, str, strCount, fromIndex);
 360     }
 361 
 362 
 363     private static int indexOfUnsafe(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 364         assert fromIndex >= 0;
 365         assert strCount > 0;
 366         assert strCount <= length(str);
 367         assert valueCount >= strCount;
 368         char first = getChar(str, 0);
 369         int max = (valueCount - strCount);
 370         for (int i = fromIndex; i <= max; i++) {
 371             // Look for first character.
 372             if (getChar(value, i) != first) {
 373                 while (++i <= max && getChar(value, i) != first);
 374             }
 375             // Found first character, now look at the rest of value
 376             if (i <= max) {
 377                 int j = i + 1;
 378                 int end = j + strCount - 1;
 379                 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
 380                 if (j == end) {
 381                     // Found whole string.
 382                     return i;
 383                 }
 384             }
 385         }
 386         return -1;
 387     }
 388 
 389 
 390     /**
 391      * Handles indexOf Latin1 substring in UTF16 string.
 392      */
 393     @HotSpotIntrinsicCandidate
 394     public static int indexOfLatin1(byte[] value, byte[] str) {
 395         if (str.length == 0) {
 396             return 0;
 397         }
 398         if (length(value) < str.length) {
 399             return -1;
 400         }
 401         return indexOfLatin1Unsafe(value, length(value), str, str.length, 0);
 402     }
 403 
 404     @HotSpotIntrinsicCandidate
 405     public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 406         checkBoundsBeginEnd(fromIndex, srcCount, src);
 407         String.checkBoundsBeginEnd(0, tgtCount, tgt.length);
 408         return indexOfLatin1Unsafe(src, srcCount, tgt, tgtCount, fromIndex);
 409     }
 410 
 411     public static int indexOfLatin1Unsafe(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 412         assert fromIndex >= 0;
 413         assert tgtCount > 0;
 414         assert tgtCount <= tgt.length;
 415         assert srcCount >= tgtCount;
 416         char first = (char)(tgt[0] & 0xff);
 417         int max = (srcCount - tgtCount);
 418         for (int i = fromIndex; i <= max; i++) {
 419             // Look for first character.
 420             if (getChar(src, i) != first) {
 421                 while (++i <= max && getChar(src, i) != first);
 422             }
 423             // Found first character, now look at the rest of v2
 424             if (i <= max) {
 425                 int j = i + 1;
 426                 int end = j + tgtCount - 1;
 427                 for (int k = 1;
 428                      j < end && getChar(src, j) == (tgt[k] & 0xff);
 429                      j++, k++);
 430                 if (j == end) {
 431                     // Found whole string.
 432                     return i;
 433                 }
 434             }
 435         }
 436         return -1;
 437     }
 438 
 439     @HotSpotIntrinsicCandidate
 440     private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
 441         checkBoundsBeginEnd(fromIndex, max, value);
 442         return indexOfCharUnsafe(value, ch, fromIndex, max);
 443     }
 444 
 445     private static int indexOfCharUnsafe(byte[] value, int ch, int fromIndex, int max) {
 446         for (int i = fromIndex; i < max; i++) {
 447             if (getChar(value, i) == ch) {
 448                 return i;
 449             }
 450         }
 451         return -1;
 452     }
 453 
 454     /**
 455      * Handles (rare) calls of indexOf with a supplementary character.
 456      */
 457     private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
 458         if (Character.isValidCodePoint(ch)) {
 459             final char hi = Character.highSurrogate(ch);
 460             final char lo = Character.lowSurrogate(ch);
 461             checkBoundsBeginEnd(fromIndex, max, value);
 462             for (int i = fromIndex; i < max - 1; i++) {
 463                 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
 464                     return i;
 465                 }
 466             }
 467         }
 468         return -1;
 469     }
 470 
 471     // srcCoder == UTF16 && tgtCoder == UTF16
 472     public static int lastIndexOf(byte[] src, int srcCount,
 473                                   byte[] tgt, int tgtCount, int fromIndex) {
 474         assert fromIndex >= 0;
 475         assert tgtCount > 0;
 476         assert tgtCount <= length(tgt);
 477         int min = tgtCount - 1;
 478         int i = min + fromIndex;
 479         int strLastIndex = tgtCount - 1;
 480 
 481         checkIndex(strLastIndex, tgt);
 482         char strLastChar = getChar(tgt, strLastIndex);
 483 
 484         checkIndex(i, src);
 485 
 486     startSearchForLastChar:
 487         while (true) {
 488             while (i >= min && getChar(src, i) != strLastChar) {
 489                 i--;
 490             }
 491             if (i < min) {
 492                 return -1;
 493             }
 494             int j = i - 1;
 495             int start = j - strLastIndex;
 496             int k = strLastIndex - 1;
 497             while (j > start) {
 498                 if (getChar(src, j--) != getChar(tgt, k--)) {
 499                     i--;
 500                     continue startSearchForLastChar;
 501                 }
 502             }
 503             return start + 1;
 504         }
 505     }
 506 
 507     public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
 508         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 509             // handle most cases here (ch is a BMP code point or a
 510             // negative value (invalid code point))
 511             int i = Math.min(fromIndex, (value.length >> 1) - 1);
 512             for (; i >= 0; i--) {
 513                 if (getChar(value, i) == ch) {
 514                     return i;
 515                 }
 516             }
 517             return -1;
 518         } else {
 519             return lastIndexOfSupplementary(value, ch, fromIndex);
 520         }
 521     }
 522 
 523     /**
 524      * Handles (rare) calls of lastIndexOf with a supplementary character.
 525      */
 526     private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
 527         if (Character.isValidCodePoint(ch)) {
 528             char hi = Character.highSurrogate(ch);
 529             char lo = Character.lowSurrogate(ch);
 530             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 531             for (; i >= 0; i--) {
 532                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 533                     return i;
 534                 }
 535             }
 536         }
 537         return -1;
 538     }
 539 
 540     public static String replace(byte[] value, char oldChar, char newChar) {
 541         int len = value.length >> 1;
 542         int i = -1;
 543         while (++i < len) {
 544             if (getChar(value, i) == oldChar) {
 545                 break;
 546             }
 547         }
 548         if (i < len) {
 549             byte buf[] = new byte[value.length];
 550             for (int j = 0; j < i; j++) {
 551                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 552             }
 553             while (i < len) {
 554                 char c = getChar(value, i);
 555                 putChar(buf, i, c == oldChar ? newChar : c);
 556                 i++;
 557            }
 558            // Check if we should try to compress to latin1
 559            if (String.COMPACT_STRINGS &&
 560                !StringLatin1.canEncode(oldChar) &&
 561                StringLatin1.canEncode(newChar)) {
 562                byte[] val = compress(buf, 0, len);
 563                if (val != null) {
 564                    return new String(val, LATIN1);
 565                }
 566            }
 567            return new String(buf, UTF16);
 568         }
 569         return null;
 570     }
 571 
 572     public static boolean regionMatchesCI(byte[] value, int toffset,
 573                                           byte[] other, int ooffset, int len) {
 574         int last = toffset + len;
 575         assert toffset >= 0 && ooffset >= 0;
 576         assert ooffset + len <= length(other);
 577         assert last <= length(value);
 578         while (toffset < last) {
 579             char c1 = getChar(value, toffset++);
 580             char c2 = getChar(other, ooffset++);
 581             if (c1 == c2) {
 582                 continue;
 583             }
 584             // try converting both characters to uppercase.
 585             // If the results match, then the comparison scan should
 586             // continue.
 587             char u1 = Character.toUpperCase(c1);
 588             char u2 = Character.toUpperCase(c2);
 589             if (u1 == u2) {
 590                 continue;
 591             }
 592             // Unfortunately, conversion to uppercase does not work properly
 593             // for the Georgian alphabet, which has strange rules about case
 594             // conversion.  So we need to make one last check before
 595             // exiting.
 596             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 597                 continue;
 598             }
 599             return false;
 600         }
 601         return true;
 602     }
 603 
 604     public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
 605                                                  byte[] other, int ooffset,
 606                                                  int len) {
 607         return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len);
 608     }
 609 
 610     public static String toLowerCase(String str, byte[] value, Locale locale) {
 611         if (locale == null) {
 612             throw new NullPointerException();
 613         }
 614         int first;
 615         boolean hasSurr = false;
 616         final int len = value.length >> 1;
 617 
 618         // Now check if there are any characters that need to be changed, or are surrogate
 619         for (first = 0 ; first < len; first++) {
 620             int cp = (int)getChar(value, first);
 621             if (Character.isSurrogate((char)cp)) {
 622                 hasSurr = true;
 623                 break;
 624             }
 625             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 626                 break;
 627             }
 628         }
 629         if (first == len)
 630             return str;
 631         byte[] result = new byte[value.length];
 632         System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
 633                                                             // lowerCase characters.
 634         String lang = locale.getLanguage();
 635         if (lang == "tr" || lang == "az" || lang == "lt") {
 636             return toLowerCaseEx(str, value, result, first, locale, true);
 637         }
 638         if (hasSurr) {
 639             return toLowerCaseEx(str, value, result, first, locale, false);
 640         }
 641         int bits = 0;
 642         for (int i = first; i < len; i++) {
 643             int cp = (int)getChar(value, i);
 644             if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
 645                 Character.isSurrogate((char)cp)) {
 646                 return toLowerCaseEx(str, value, result, i, locale, false);
 647             }
 648             if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
 649                 return toLowerCaseEx(str, value, result, i, locale, true);
 650             }
 651             cp = Character.toLowerCase(cp);
 652             if (!Character.isBmpCodePoint(cp)) {
 653                 return toLowerCaseEx(str, value, result, i, locale, false);
 654             }
 655             bits |= cp;
 656             putChar(result, i, cp);
 657         }
 658         if (bits > 0xFF) {
 659             return new String(result, UTF16);
 660         } else {
 661             return newString(result, 0, len);
 662         }
 663     }
 664 
 665     private static String toLowerCaseEx(String str, byte[] value,
 666                                         byte[] result, int first, Locale locale,
 667                                         boolean localeDependent) {
 668         assert(result.length == value.length);
 669         assert(first >= 0);
 670         int resultOffset = first;
 671         int length = value.length >> 1;
 672         int srcCount;
 673         for (int i = first; i < length; i += srcCount) {
 674             int srcChar = getChar(value, i);
 675             int lowerChar;
 676             char[] lowerCharArray;
 677             srcCount = 1;
 678             if (Character.isSurrogate((char)srcChar)) {
 679                 srcChar = codePointAt(value, i, length);
 680                 srcCount = Character.charCount(srcChar);
 681             }
 682             if (localeDependent ||
 683                 srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
 684                 srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
 685                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 686             } else {
 687                 lowerChar = Character.toLowerCase(srcChar);
 688             }
 689             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 690                 putChar(result, resultOffset++, lowerChar);
 691             } else {
 692                 if (lowerChar == Character.ERROR) {
 693                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 694                 } else {
 695                     lowerCharArray = Character.toChars(lowerChar);
 696                 }
 697                 /* Grow result if needed */
 698                 int mapLen = lowerCharArray.length;
 699                 if (mapLen > srcCount) {
 700                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 701                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 702                     result = result2;
 703                 }
 704                 assert resultOffset >= 0;
 705                 assert resultOffset + mapLen <= length(result);
 706                 for (int x = 0; x < mapLen; ++x) {
 707                     putChar(result, resultOffset++, lowerCharArray[x]);
 708                 }
 709             }
 710         }
 711         return newString(result, 0, resultOffset);
 712     }
 713 
 714     public static String toUpperCase(String str, byte[] value, Locale locale) {
 715         if (locale == null) {
 716             throw new NullPointerException();
 717         }
 718         int first;
 719         boolean hasSurr = false;
 720         final int len = value.length >> 1;
 721 
 722         // Now check if there are any characters that need to be changed, or are surrogate
 723         for (first = 0 ; first < len; first++) {
 724             int cp = (int)getChar(value, first);
 725             if (Character.isSurrogate((char)cp)) {
 726                 hasSurr = true;
 727                 break;
 728             }
 729             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 730                 break;
 731             }
 732         }
 733         if (first == len) {
 734             return str;
 735         }
 736         byte[] result = new byte[value.length];
 737         System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
 738                                                            // upperCase characters.
 739         String lang = locale.getLanguage();
 740         if (lang == "tr" || lang == "az" || lang == "lt") {
 741             return toUpperCaseEx(str, value, result, first, locale, true);
 742         }
 743         if (hasSurr) {
 744             return toUpperCaseEx(str, value, result, first, locale, false);
 745         }
 746         int bits = 0;
 747         for (int i = first; i < len; i++) {
 748             int cp = (int)getChar(value, i);
 749             if (Character.isSurrogate((char)cp)) {
 750                 return toUpperCaseEx(str, value, result, i, locale, false);
 751             }
 752             cp = Character.toUpperCaseEx(cp);
 753             if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
 754                 return toUpperCaseEx(str, value, result, i, locale, false);
 755             }
 756             bits |= cp;
 757             putChar(result, i, cp);
 758         }
 759         if (bits > 0xFF) {
 760             return new String(result, UTF16);
 761         } else {
 762             return newString(result, 0, len);
 763         }
 764     }
 765 
 766     private static String toUpperCaseEx(String str, byte[] value,
 767                                         byte[] result, int first,
 768                                         Locale locale, boolean localeDependent)
 769     {
 770         assert(result.length == value.length);
 771         assert(first >= 0);
 772         int resultOffset = first;
 773         int length = value.length >> 1;
 774         int srcCount;
 775         for (int i = first; i < length; i += srcCount) {
 776             int srcChar = getChar(value, i);
 777             int upperChar;
 778             char[] upperCharArray;
 779             srcCount = 1;
 780             if (Character.isSurrogate((char)srcChar)) {
 781                 srcChar = codePointAt(value, i, length);
 782                 srcCount = Character.charCount(srcChar);
 783             }
 784             if (localeDependent) {
 785                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 786             } else {
 787                 upperChar = Character.toUpperCaseEx(srcChar);
 788             }
 789             if (Character.isBmpCodePoint(upperChar)) {
 790                 putChar(result, resultOffset++, upperChar);
 791             } else {
 792                 if (upperChar == Character.ERROR) {
 793                     if (localeDependent) {
 794                         upperCharArray =
 795                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 796                     } else {
 797                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 798                     }
 799                 } else {
 800                     upperCharArray = Character.toChars(upperChar);
 801                 }
 802                 /* Grow result if needed */
 803                 int mapLen = upperCharArray.length;
 804                 if (mapLen > srcCount) {
 805                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 806                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 807                     result = result2;
 808                 }
 809                 assert resultOffset >= 0;
 810                 assert resultOffset + mapLen <= length(result);
 811                 for (int x = 0; x < mapLen; ++x) {
 812                     putChar(result, resultOffset++, upperCharArray[x]);
 813                 }
 814             }
 815         }
 816         return newString(result, 0, resultOffset);
 817     }
 818 
 819     public static String trim(byte[] value) {
 820         int length = value.length >> 1;
 821         int len = length;
 822         int st = 0;
 823         while (st < len && getChar(value, st) <= ' ') {
 824             st++;
 825         }
 826         while (st < len && getChar(value, len - 1) <= ' ') {
 827             len--;
 828         }
 829         return ((st > 0) || (len < length )) ?
 830             new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
 831             null;
 832     }
 833 
 834     private static void putChars(byte[] val, int index, char[] str, int off, int end) {
 835         while (off < end) {
 836             putChar(val, index++, str[off++]);
 837         }
 838     }
 839 
 840     public static String newString(byte[] val, int index, int len) {
 841         if (String.COMPACT_STRINGS) {
 842             byte[] buf = compress(val, index, len);
 843             if (buf != null) {
 844                 return new String(buf, LATIN1);
 845             }
 846         }
 847         int last = index + len;
 848         return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
 849     }
 850 
 851     public static void fillNull(byte[] val, int index, int end) {
 852         Arrays.fill(val, index << 1, end << 1, (byte)0);
 853     }
 854 
 855     static class CharsSpliterator implements Spliterator.OfInt {
 856         private final byte[] array;
 857         private int index;        // current index, modified on advance/split
 858         private final int fence;  // one past last index
 859         private final int cs;
 860 
 861         CharsSpliterator(byte[] array, int acs) {
 862             this(array, 0, array.length >> 1, acs);
 863         }
 864 
 865         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 866             this.array = array;
 867             this.index = origin;
 868             this.fence = fence;
 869             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 870                       | Spliterator.SUBSIZED;
 871         }
 872 
 873         @Override
 874         public OfInt trySplit() {
 875             int lo = index, mid = (lo + fence) >>> 1;
 876             return (lo >= mid)
 877                    ? null
 878                    : new CharsSpliterator(array, lo, index = mid, cs);
 879         }
 880 
 881         @Override
 882         public void forEachRemaining(IntConsumer action) {
 883             byte[] a; int i, hi; // hoist accesses and checks from loop
 884             if (action == null)
 885                 throw new NullPointerException();
 886             if (((a = array).length >> 1) >= (hi = fence) &&
 887                 (i = index) >= 0 && i < (index = hi)) {
 888                 do {
 889                     action.accept(charAt(a, i));
 890                 } while (++i < hi);
 891             }
 892         }
 893 
 894         @Override
 895         public boolean tryAdvance(IntConsumer action) {
 896             if (action == null)
 897                 throw new NullPointerException();
 898             int i = index;
 899             if (i >= 0 && i < fence) {
 900                 action.accept(charAt(array, i));
 901                 index++;
 902                 return true;
 903             }
 904             return false;
 905         }
 906 
 907         @Override
 908         public long estimateSize() { return (long)(fence - index); }
 909 
 910         @Override
 911         public int characteristics() {
 912             return cs;
 913         }
 914     }
 915 
 916     static class CodePointsSpliterator implements Spliterator.OfInt {
 917         private final byte[] array;
 918         private int index;        // current index, modified on advance/split
 919         private final int fence;  // one past last index
 920         private final int cs;
 921 
 922         CodePointsSpliterator(byte[] array, int acs) {
 923             this(array, 0, array.length >> 1, acs);
 924         }
 925 
 926         CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
 927             this.array = array;
 928             this.index = origin;
 929             this.fence = fence;
 930             this.cs = acs | Spliterator.ORDERED;
 931         }
 932 
 933         @Override
 934         public OfInt trySplit() {
 935             int lo = index, mid = (lo + fence) >>> 1;
 936             if (lo >= mid)
 937                 return null;
 938 
 939             int midOneLess;
 940             // If the mid-point intersects a surrogate pair
 941             if (Character.isLowSurrogate(charAt(array, mid)) &&
 942                 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) {
 943                 // If there is only one pair it cannot be split
 944                 if (lo >= midOneLess)
 945                     return null;
 946                 // Shift the mid-point to align with the surrogate pair
 947                 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
 948             }
 949             return new CodePointsSpliterator(array, lo, index = mid, cs);
 950         }
 951 
 952         @Override
 953         public void forEachRemaining(IntConsumer action) {
 954             byte[] a; int i, hi; // hoist accesses and checks from loop
 955             if (action == null)
 956                 throw new NullPointerException();
 957             if (((a = array).length >> 1) >= (hi = fence) &&
 958                 (i = index) >= 0 && i < (index = hi)) {
 959                 do {
 960                     i = advance(a, i, hi, action);
 961                 } while (i < hi);
 962             }
 963         }
 964 
 965         @Override
 966         public boolean tryAdvance(IntConsumer action) {
 967             if (action == null)
 968                 throw new NullPointerException();
 969             if (index >= 0 && index < fence) {
 970                 index = advance(array, index, fence, action);
 971                 return true;
 972             }
 973             return false;
 974         }
 975 
 976         // Advance one code point from the index, i, and return the next
 977         // index to advance from
 978         private static int advance(byte[] a, int i, int hi, IntConsumer action) {
 979             char c1 = charAt(a, i++);
 980             int cp = c1;
 981             if (Character.isHighSurrogate(c1) && i < hi) {
 982                 char c2 = charAt(a, i);
 983                 if (Character.isLowSurrogate(c2)) {
 984                     i++;
 985                     cp = Character.toCodePoint(c1, c2);
 986                 }
 987             }
 988             action.accept(cp);
 989             return i;
 990         }
 991 
 992         @Override
 993         public long estimateSize() { return (long)(fence - index); }
 994 
 995         @Override
 996         public int characteristics() {
 997             return cs;
 998         }
 999     }
1000 
1001     ////////////////////////////////////////////////////////////////
1002 
1003     public static void putCharSB(byte[] val, int index, int c) {
1004         checkIndex(index, val);
1005         putChar(val, index, c);
1006     }
1007 
1008     public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
1009         checkBoundsBeginEnd(index, index + end - off, val);
1010         putChars(val, index, ca, off, end);
1011     }
1012 
1013     public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
1014         checkBoundsBeginEnd(index, index + end - off, val);
1015         for (int i = off; i < end; i++) {
1016             putChar(val, index++, s.charAt(i));
1017         }
1018     }
1019 
1020     public static int codePointAtSB(byte[] val, int index, int end) {
1021         return codePointAt(val, index, end, true /* checked */);
1022     }
1023 
1024     public static int codePointBeforeSB(byte[] val, int index) {
1025         return codePointBefore(val, index, true /* checked */);
1026     }
1027 
1028     public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
1029         return codePointCount(val, beginIndex, endIndex, true /* checked */);
1030     }
1031 
1032     public static int getChars(int i, int begin, int end, byte[] value) {
1033         checkBoundsBeginEnd(begin, end, value);
1034         int pos = getChars(i, end, value);
1035         assert begin == pos;
1036         return pos;
1037     }
1038 
1039     public static int getChars(long l, int begin, int end, byte[] value) {
1040         checkBoundsBeginEnd(begin, end, value);
1041         int pos = getChars(l, end, value);
1042         assert begin == pos;
1043         return pos;
1044     }
1045 
1046     public static boolean contentEquals(byte[] v1, byte[] v2, int len) {
1047         checkBoundsOffCount(0, len, v2);
1048         for (int i = 0; i < len; i++) {
1049             if ((char)(v1[i] & 0xff) != getChar(v2, i)) {
1050                 return false;
1051             }
1052         }
1053         return true;
1054     }
1055 
1056     public static boolean contentEquals(byte[] value, CharSequence cs, int len) {
1057         checkOffset(len, value);
1058         for (int i = 0; i < len; i++) {
1059             if (getChar(value, i) != cs.charAt(i)) {
1060                 return false;
1061             }
1062         }
1063         return true;
1064     }
1065 
1066     public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4) {
1067         int end = i + 4;
1068         checkBoundsBeginEnd(i, end, value);
1069         putChar(value, i++, c1);
1070         putChar(value, i++, c2);
1071         putChar(value, i++, c3);
1072         putChar(value, i++, c4);
1073         assert(i == end);
1074         return end;
1075     }
1076 
1077     public static int putCharsAt(byte[] value, int i, char c1, char c2, char c3, char c4, char c5) {
1078         int end = i + 5;
1079         checkBoundsBeginEnd(i, end, value);
1080         putChar(value, i++, c1);
1081         putChar(value, i++, c2);
1082         putChar(value, i++, c3);
1083         putChar(value, i++, c4);
1084         putChar(value, i++, c5);
1085         assert(i == end);
1086         return end;
1087     }
1088 
1089     public static char charAt(byte[] value, int index) {
1090         checkIndex(index, value);
1091         return getChar(value, index);
1092     }
1093 
1094     public static void reverse(byte[] val, int count) {
1095         checkOffset(count, val);
1096         int n = count - 1;
1097         boolean hasSurrogates = false;
1098         for (int j = (n-1) >> 1; j >= 0; j--) {
1099             int k = n - j;
1100             char cj = getChar(val, j);
1101             char ck = getChar(val, k);
1102             putChar(val, j, ck);
1103             putChar(val, k, cj);
1104             if (Character.isSurrogate(cj) ||
1105                 Character.isSurrogate(ck)) {
1106                 hasSurrogates = true;
1107             }
1108         }
1109         if (hasSurrogates) {
1110             reverseAllValidSurrogatePairs(val, count);
1111         }
1112     }
1113 
1114     /** Outlined helper method for reverse() */
1115     private static void reverseAllValidSurrogatePairs(byte[] val, int count) {
1116         for (int i = 0; i < count - 1; i++) {
1117             char c2 = getChar(val, i);
1118             if (Character.isLowSurrogate(c2)) {
1119                 char c1 = getChar(val, i + 1);
1120                 if (Character.isHighSurrogate(c1)) {
1121                     putChar(val, i++, c1);
1122                     putChar(val, i, c2);
1123                 }
1124             }
1125         }
1126     }
1127 
1128     // inflatedCopy byte[] -> byte[]
1129     public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
1130         // We need a range check here because 'putChar' has no checks
1131         checkBoundsOffCount(dstOff, len, dst);
1132         for (int i = 0; i < len; i++) {
1133             putChar(dst, dstOff++, src[srcOff++] & 0xff);
1134         }
1135     }
1136 
1137     // srcCoder == UTF16 && tgtCoder == LATIN1
1138     public static int lastIndexOfLatin1(byte[] src, int srcCount,
1139                                         byte[] tgt, int tgtCount, int fromIndex) {
1140         assert fromIndex >= 0;
1141         assert tgtCount > 0;
1142         assert tgtCount <= tgt.length;
1143         int min = tgtCount - 1;
1144         int i = min + fromIndex;
1145         int strLastIndex = tgtCount - 1;
1146 
1147         char strLastChar = (char)(tgt[strLastIndex] & 0xff);
1148 
1149         checkIndex(i, src);
1150 
1151     startSearchForLastChar:
1152         while (true) {
1153             while (i >= min && getChar(src, i) != strLastChar) {
1154                 i--;
1155             }
1156             if (i < min) {
1157                 return -1;
1158             }
1159             int j = i - 1;
1160             int start = j - strLastIndex;
1161             int k = strLastIndex - 1;
1162             while (j > start) {
1163                 if (getChar(src, j--) != (tgt[k--] & 0xff)) {
1164                     i--;
1165                     continue startSearchForLastChar;
1166                 }
1167             }
1168             return start + 1;
1169         }
1170     }
1171 
1172     ////////////////////////////////////////////////////////////////
1173 
1174     private static native boolean isBigEndian();
1175 
1176     static final int HI_BYTE_SHIFT;
1177     static final int LO_BYTE_SHIFT;
1178     static {
1179         if (isBigEndian()) {
1180             HI_BYTE_SHIFT = 8;
1181             LO_BYTE_SHIFT = 0;
1182         } else {
1183             HI_BYTE_SHIFT = 0;
1184             LO_BYTE_SHIFT = 8;
1185         }
1186     }
1187 
1188     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
1189 
1190     // Used by trusted callers.  Assumes all necessary bounds checks have
1191     // been done by the caller.
1192 
1193     /**
1194      * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for
1195      * UTF-16 coder.
1196      *
1197      * @param i     value to convert
1198      * @param index next index, after the least significant digit
1199      * @param buf   target buffer, UTF16-coded.
1200      * @return index of the most significant digit or minus sign, if present
1201      */
1202     static int getChars(int i, int index, byte[] buf) {
1203         int q, r;
1204         int charPos = index;
1205 
1206         boolean negative = (i < 0);
1207         if (!negative) {
1208             i = -i;
1209         }
1210 
1211         // Get 2 digits/iteration using ints
1212         while (i <= -100) {
1213             q = i / 100;
1214             r = (q * 100) - i;
1215             i = q;
1216             putChar(buf, --charPos, Integer.DigitOnes[r]);
1217             putChar(buf, --charPos, Integer.DigitTens[r]);
1218         }
1219 
1220         // We know there are at most two digits left at this point.
1221         q = i / 10;
1222         r = (q * 10) - i;
1223         putChar(buf, --charPos, '0' + r);
1224 
1225         // Whatever left is the remaining digit.
1226         if (q < 0) {
1227             putChar(buf, --charPos, '0' - q);
1228         }
1229 
1230         if (negative) {
1231             putChar(buf, --charPos, '-');
1232         }
1233         return charPos;
1234     }
1235 
1236     /**
1237      * This is a variant of {@link Long#getChars(long, int, byte[])}, but for
1238      * UTF-16 coder.
1239      *
1240      * @param i     value to convert
1241      * @param index next index, after the least significant digit
1242      * @param buf   target buffer, UTF16-coded.
1243      * @return index of the most significant digit or minus sign, if present
1244      */
1245     static int getChars(long i, int index, byte[] buf) {
1246         long q;
1247         int r;
1248         int charPos = index;
1249 
1250         boolean negative = (i < 0);
1251         if (!negative) {
1252             i = -i;
1253         }
1254 
1255         // Get 2 digits/iteration using longs until quotient fits into an int
1256         while (i <= Integer.MIN_VALUE) {
1257             q = i / 100;
1258             r = (int)((q * 100) - i);
1259             i = q;
1260             putChar(buf, --charPos, Integer.DigitOnes[r]);
1261             putChar(buf, --charPos, Integer.DigitTens[r]);
1262         }
1263 
1264         // Get 2 digits/iteration using ints
1265         int q2;
1266         int i2 = (int)i;
1267         while (i2 <= -100) {
1268             q2 = i2 / 100;
1269             r  = (q2 * 100) - i2;
1270             i2 = q2;
1271             putChar(buf, --charPos, Integer.DigitOnes[r]);
1272             putChar(buf, --charPos, Integer.DigitTens[r]);
1273         }
1274 
1275         // We know there are at most two digits left at this point.
1276         q2 = i2 / 10;
1277         r  = (q2 * 10) - i2;
1278         putChar(buf, --charPos, '0' + r);
1279 
1280         // Whatever left is the remaining digit.
1281         if (q2 < 0) {
1282             putChar(buf, --charPos, '0' - q2);
1283         }
1284 
1285         if (negative) {
1286             putChar(buf, --charPos, '-');
1287         }
1288         return charPos;
1289     }
1290     // End of trusted methods.
1291 
1292     public static void checkIndex(int off, byte[] val) {
1293         String.checkIndex(off, length(val));
1294     }
1295 
1296     public static void checkOffset(int off, byte[] val) {
1297         String.checkOffset(off, length(val));
1298     }
1299 
1300     public static void checkBoundsBeginEnd(int begin, int end, byte[] val) {
1301         String.checkBoundsBeginEnd(begin, end, length(val));
1302     }
1303 
1304     public static void checkBoundsOffCount(int offset, int count, byte[] val) {
1305         String.checkBoundsOffCount(offset, count, length(val));
1306     }
1307 
1308 }