Old src/java.base/share/classes/java/lang/StringUTF16.java

   1 /*
   2  * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Spliterator;
  31 import java.util.function.IntConsumer;
  32 import jdk.internal.HotSpotIntrinsicCandidate;
  33 
  34 import static java.lang.String.UTF16;
  35 import static java.lang.String.LATIN1;
  36 import static java.lang.String.checkIndex;
  37 import static java.lang.String.checkOffset;
  38 import static java.lang.String.checkBoundsOffCount;
  39 import static java.lang.String.checkBoundsBeginEnd;
  40 
  41 final class StringUTF16 {
  42 
  43     public static byte[] newBytesFor(int len) {
  44         if (len < 0) {
  45             throw new NegativeArraySizeException();
  46         }
  47         if (len > MAX_LENGTH) {
  48             throw new OutOfMemoryError("UTF16 String size is " + len +
  49                                        ", should be less than " + MAX_LENGTH);
  50         }
  51         return new byte[len << 1];
  52     }
  53 
  54     @HotSpotIntrinsicCandidate
  55     public static void putChar(byte[] val, int index, int c) {
  56         index <<= 1;
  57         val[index++] = (byte)(c >> HI_BYTE_SHIFT);
  58         val[index]   = (byte)(c >> LO_BYTE_SHIFT);
  59     }
  60 
  61     @HotSpotIntrinsicCandidate
  62     public static char getChar(byte[] val, int index) {
  63         index <<= 1;
  64         return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
  65                       ((val[index]   & 0xff) << LO_BYTE_SHIFT));
  66     }
  67 
  68     public static char charAt(byte[] value, int index) {
  69         if (index < 0 || index >= value.length >> 1) {
  70             throw new StringIndexOutOfBoundsException(index);
  71         }
  72         return getChar(value, index);
  73     }
  74 
  75     public static int length(byte[] value) {
  76         return value.length >> 1;
  77     }
  78 
  79     public static int codePointAt(byte[] value, int index, int end) {
  80         char c1 = getChar(value, index);
  81         if (Character.isHighSurrogate(c1) && ++index < end) {
  82             char c2 = getChar(value, index);
  83             if (Character.isLowSurrogate(c2)) {
  84                return Character.toCodePoint(c1, c2);
  85             }
  86         }
  87         return c1;
  88     }
  89 
  90     public static int codePointBefore(byte[] value, int index) {
  91         char c2 = getChar(value, --index);
  92         if (Character.isLowSurrogate(c2) && index > 0) {
  93             char c1 = getChar(value, --index);
  94             if (Character.isHighSurrogate(c1)) {
  95                return Character.toCodePoint(c1, c2);
  96             }
  97         }
  98         return c2;
  99     }
 100 
 101     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
 102         int count = endIndex - beginIndex;
 103         for (int i = beginIndex; i < endIndex; ) {
 104             if (Character.isHighSurrogate(getChar(value, i++)) &&
 105                 i < endIndex &&
 106                 Character.isLowSurrogate(getChar(value, i))) {
 107                 count--;
 108                 i++;
 109             }
 110         }
 111         return count;
 112     }
 113 
 114     public static char[] toChars(byte[] value) {
 115         char[] dst = new char[value.length >> 1];
 116         getChars(value, 0, dst.length, dst, 0);
 117         return dst;
 118     }
 119 
 120     @HotSpotIntrinsicCandidate
 121     public static byte[] toBytes(char[] value, int off, int len) {
 122         byte[] val = newBytesFor(len);
 123         for (int i = 0; i < len; i++) {
 124             putChar(val, i, value[off]);
 125             off++;
 126         }
 127         return val;
 128     }
 129 
 130     public static byte[] compress(char[] val, int off, int len) {
 131         byte[] ret = new byte[len];
 132         if (compress(val, off, ret, 0, len) == len) {
 133             return ret;
 134         }
 135         return null;
 136     }
 137 
 138     public static byte[] compress(byte[] val, int off, int len) {
 139         byte[] ret = new byte[len];
 140         if (compress(val, off, ret, 0, len) == len) {
 141             return ret;
 142         }
 143         return null;
 144     }
 145 
 146     // compressedCopy char[] -> byte[]
 147     @HotSpotIntrinsicCandidate
 148     public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
 149         for (int i = 0; i < len; i++) {
 150             char c = src[srcOff];
 151             if (c > 0xFF) {
 152                 len = 0;
 153                 break;
 154             }
 155             dst[dstOff] = (byte)c;
 156             srcOff++;
 157             dstOff++;
 158         }
 159         return len;
 160     }
 161 
 162     // compressedCopy byte[] -> byte[]
 163     @HotSpotIntrinsicCandidate
 164     public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 165         // We need a range check here because 'getChar' has no checks
 166         checkBoundsOffCount(srcOff << 1, len << 1, src.length);
 167         for (int i = 0; i < len; i++) {
 168             char c = getChar(src, srcOff);
 169             if (c > 0xFF) {
 170                 len = 0;
 171                 break;
 172             }
 173             dst[dstOff] = (byte)c;
 174             srcOff++;
 175             dstOff++;
 176         }
 177         return len;
 178     }
 179 
 180     public static byte[] toBytes(int[] val, int index, int len) {
 181         final int end = index + len;
 182         // Pass 1: Compute precise size of char[]
 183         int n = len;
 184         for (int i = index; i < end; i++) {
 185             int cp = val[i];
 186             if (Character.isBmpCodePoint(cp))
 187                 continue;
 188             else if (Character.isValidCodePoint(cp))
 189                 n++;
 190             else throw new IllegalArgumentException(Integer.toString(cp));
 191         }
 192         // Pass 2: Allocate and fill in <high, low> pair
 193         byte[] buf = newBytesFor(n);
 194         for (int i = index, j = 0; i < end; i++, j++) {
 195             int cp = val[i];
 196             if (Character.isBmpCodePoint(cp)) {
 197                 putChar(buf, j, cp);
 198             } else {
 199                 putChar(buf, j++, Character.highSurrogate(cp));
 200                 putChar(buf, j, Character.lowSurrogate(cp));
 201             }
 202         }
 203         return buf;
 204     }
 205 
 206     public static byte[] toBytes(char c) {
 207         byte[] result = new byte[2];
 208         putChar(result, 0, c);
 209         return result;
 210     }
 211 
 212     @HotSpotIntrinsicCandidate
 213     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 214         // We need a range check here because 'getChar' has no checks
 215         if (srcBegin < srcEnd) {
 216             checkBoundsOffCount(srcBegin << 1, (srcEnd - srcBegin) << 1, value.length);
 217         }
 218         for (int i = srcBegin; i < srcEnd; i++) {
 219             dst[dstBegin++] = getChar(value, i);
 220         }
 221     }
 222 
 223     /* @see java.lang.String.getBytes(int, int, byte[], int) */
 224     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 225         srcBegin <<= 1;
 226         srcEnd <<= 1;
 227         for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
 228             dst[dstBegin++] = value[i];
 229         }
 230     }
 231 
 232     @HotSpotIntrinsicCandidate
 233     public static boolean equals(byte[] value, byte[] other) {
 234         if (value.length == other.length) {
 235             int len = value.length >> 1;
 236             for (int i = 0; i < len; i++) {
 237                 if (getChar(value, i) != getChar(other, i)) {
 238                     return false;
 239                 }
 240             }
 241             return true;
 242         }
 243         return false;
 244     }
 245 
 246     @HotSpotIntrinsicCandidate
 247     public static int compareTo(byte[] value, byte[] other) {
 248         int len1 = length(value);
 249         int len2 = length(other);
 250         int lim = Math.min(len1, len2);
 251         for (int k = 0; k < lim; k++) {
 252             char c1 = getChar(value, k);
 253             char c2 = getChar(other, k);
 254             if (c1 != c2) {
 255                 return c1 - c2;
 256             }
 257         }
 258         return len1 - len2;
 259     }
 260 
 261     @HotSpotIntrinsicCandidate
 262     public static int compareToLatin1(byte[] value, byte[] other) {
 263         return -StringLatin1.compareToUTF16(other, value);
 264     }
 265 
 266     public static int compareToCI(byte[] value, byte[] other) {
 267         int len1 = length(value);
 268         int len2 = length(other);
 269         int lim = Math.min(len1, len2);
 270         for (int k = 0; k < lim; k++) {
 271             char c1 = getChar(value, k);
 272             char c2 = getChar(other, k);
 273             if (c1 != c2) {
 274                 c1 = Character.toUpperCase(c1);
 275                 c2 = Character.toUpperCase(c2);
 276                 if (c1 != c2) {
 277                     c1 = Character.toLowerCase(c1);
 278                     c2 = Character.toLowerCase(c2);
 279                     if (c1 != c2) {
 280                         return c1 - c2;
 281                     }
 282                 }
 283             }
 284         }
 285         return len1 - len2;
 286     }
 287 
 288     public static int compareToCI_Latin1(byte[] value, byte[] other) {
 289         return -StringLatin1.compareToCI_UTF16(other, value);
 290     }
 291 
 292     public static int hashCode(byte[] value) {
 293         int h = 0;
 294         int length = value.length >> 1;
 295         for (int i = 0; i < length; i++) {
 296             h = 31 * h + getChar(value, i);
 297         }
 298         return h;
 299     }
 300 
 301     public static int indexOf(byte[] value, int ch, int fromIndex) {
 302         int max = value.length >> 1;
 303         if (fromIndex < 0) {
 304             fromIndex = 0;
 305         } else if (fromIndex >= max) {
 306             // Note: fromIndex might be near -1>>>1.
 307             return -1;
 308         }
 309         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 310             // handle most cases here (ch is a BMP code point or a
 311             // negative value (invalid code point))
 312             return indexOfChar(value, ch, fromIndex, max);
 313         } else {
 314             return indexOfSupplementary(value, ch, fromIndex, max);
 315         }
 316     }
 317 
 318     @HotSpotIntrinsicCandidate
 319     public static int indexOf(byte[] value, byte[] str) {
 320         if (str.length == 0) {
 321             return 0;
 322         }
 323         if (value.length == 0) {
 324             return -1;
 325         }
 326         return indexOf(value, length(value), str, length(str), 0);
 327     }
 328 
 329     @HotSpotIntrinsicCandidate
 330     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 331         char first = getChar(str, 0);
 332         int max = (valueCount - strCount);
 333         for (int i = fromIndex; i <= max; i++) {
 334             // Look for first character.
 335             if (getChar(value, i) != first) {
 336                 while (++i <= max && getChar(value, i) != first);
 337             }
 338             // Found first character, now look at the rest of value
 339             if (i <= max) {
 340                 int j = i + 1;
 341                 int end = j + strCount - 1;
 342                 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
 343                 if (j == end) {
 344                     // Found whole string.
 345                     return i;
 346                 }
 347             }
 348         }
 349         return -1;
 350     }
 351 
 352     /**
 353      * Handles indexOf Latin1 substring in UTF16 string.
 354      */
 355     @HotSpotIntrinsicCandidate
 356     public static int indexOfLatin1(byte[] value, byte[] str) {
 357         if (str.length == 0) {
 358             return 0;
 359         }
 360         if (value.length == 0) {
 361             return -1;
 362         }
 363         return indexOfLatin1(value, length(value), str, str.length, 0);
 364     }
 365 
 366     @HotSpotIntrinsicCandidate
 367     public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 368         char first = (char)(tgt[0] & 0xff);
 369         int max = (srcCount - tgtCount);
 370         for (int i = fromIndex; i <= max; i++) {
 371             // Look for first character.
 372             if (getChar(src, i) != first) {
 373                 while (++i <= max && getChar(src, i) != first);
 374             }
 375             // Found first character, now look at the rest of v2
 376             if (i <= max) {
 377                 int j = i + 1;
 378                 int end = j + tgtCount - 1;
 379                 for (int k = 1;
 380                      j < end && getChar(src, j) == (tgt[k] & 0xff);
 381                      j++, k++);
 382                 if (j == end) {
 383                     // Found whole string.
 384                     return i;
 385                 }
 386             }
 387         }
 388         return -1;
 389     }
 390 
 391     @HotSpotIntrinsicCandidate
 392     private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
 393         for (int i = fromIndex; i < max; i++) {
 394             if (getChar(value, i) == ch) {
 395                 return i;
 396             }
 397         }
 398         return -1;
 399     }
 400 
 401     /**
 402      * Handles (rare) calls of indexOf with a supplementary character.
 403      */
 404     private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
 405         if (Character.isValidCodePoint(ch)) {
 406             final char hi = Character.highSurrogate(ch);
 407             final char lo = Character.lowSurrogate(ch);
 408             for (int i = fromIndex; i < max - 1; i++) {
 409                 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
 410                     return i;
 411                 }
 412             }
 413         }
 414         return -1;
 415     }
 416 
 417     // srcCoder == UTF16 && tgtCoder == UTF16
 418     public static int lastIndexOf(byte[] src, int srcCount,
 419                                   byte[] tgt, int tgtCount, int fromIndex) {
 420         int min = tgtCount - 1;
 421         int i = min + fromIndex;
 422         int strLastIndex = tgtCount - 1;
 423         char strLastChar = getChar(tgt, strLastIndex);
 424 
 425     startSearchForLastChar:
 426         while (true) {
 427             while (i >= min && getChar(src, i) != strLastChar) {
 428                 i--;
 429             }
 430             if (i < min) {
 431                 return -1;
 432             }
 433             int j = i - 1;
 434             int start = j - strLastIndex;
 435             int k = strLastIndex - 1;
 436             while (j > start) {
 437                 if (getChar(src, j--) != getChar(tgt, k--)) {
 438                     i--;
 439                     continue startSearchForLastChar;
 440                 }
 441             }
 442             return start + 1;
 443         }
 444     }
 445 
 446     public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
 447         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 448             // handle most cases here (ch is a BMP code point or a
 449             // negative value (invalid code point))
 450             int i = Math.min(fromIndex, (value.length >> 1) - 1);
 451             for (; i >= 0; i--) {
 452                 if (getChar(value, i) == ch) {
 453                     return i;
 454                 }
 455             }
 456             return -1;
 457         } else {
 458             return lastIndexOfSupplementary(value, ch, fromIndex);
 459         }
 460     }
 461 
 462     /**
 463      * Handles (rare) calls of lastIndexOf with a supplementary character.
 464      */
 465     private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
 466         if (Character.isValidCodePoint(ch)) {
 467             char hi = Character.highSurrogate(ch);
 468             char lo = Character.lowSurrogate(ch);
 469             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 470             for (; i >= 0; i--) {
 471                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 472                     return i;
 473                 }
 474             }
 475         }
 476         return -1;
 477     }
 478 
 479     public static String replace(byte[] value, char oldChar, char newChar) {
 480         int len = value.length >> 1;
 481         int i = -1;
 482         while (++i < len) {
 483             if (getChar(value, i) == oldChar) {
 484                 break;
 485             }
 486         }
 487         if (i < len) {
 488             byte buf[] = new byte[value.length];
 489             for (int j = 0; j < i; j++) {
 490                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 491             }
 492             while (i < len) {
 493                 char c = getChar(value, i);
 494                 putChar(buf, i, c == oldChar ? newChar : c);
 495                 i++;
 496            }
 497            // Check if we should try to compress to latin1
 498            if (String.COMPACT_STRINGS &&
 499                !StringLatin1.canEncode(oldChar) &&
 500                StringLatin1.canEncode(newChar)) {
 501                byte[] val = compress(buf, 0, len);
 502                if (val != null) {
 503                    return new String(val, LATIN1);
 504                }
 505            }
 506            return new String(buf, UTF16);
 507         }
 508         return null;
 509     }
 510 
 511     public static boolean regionMatchesCI(byte[] value, int toffset,
 512                                           byte[] other, int ooffset, int len) {
 513         int last = toffset + len;
 514         while (toffset < last) {
 515             char c1 = getChar(value, toffset++);
 516             char c2 = getChar(other, ooffset++);
 517             if (c1 == c2) {
 518                 continue;
 519             }
 520             // try converting both characters to uppercase.
 521             // If the results match, then the comparison scan should
 522             // continue.
 523             char u1 = Character.toUpperCase(c1);
 524             char u2 = Character.toUpperCase(c2);
 525             if (u1 == u2) {
 526                 continue;
 527             }
 528             // Unfortunately, conversion to uppercase does not work properly
 529             // for the Georgian alphabet, which has strange rules about case
 530             // conversion.  So we need to make one last check before
 531             // exiting.
 532             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 533                 continue;
 534             }
 535             return false;
 536         }
 537         return true;
 538     }
 539 
 540     public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
 541                                                  byte[] other, int ooffset,
 542                                                  int len) {
 543         return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len);
 544     }
 545 
 546     public static String toLowerCase(String str, byte[] value, Locale locale) {
 547         if (locale == null) {
 548             throw new NullPointerException();
 549         }
 550         int first;
 551         boolean hasSurr = false;
 552         final int len = value.length >> 1;
 553 
 554         // Now check if there are any characters that need to be changed, or are surrogate
 555         for (first = 0 ; first < len; first++) {
 556             int cp = (int)getChar(value, first);
 557             if (Character.isSurrogate((char)cp)) {
 558                 hasSurr = true;
 559                 break;
 560             }
 561             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 562                 break;
 563             }
 564         }
 565         if (first == len)
 566             return str;
 567         byte[] result = new byte[value.length];
 568         System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
 569                                                             // lowerCase characters.
 570         String lang = locale.getLanguage();
 571         if (lang == "tr" || lang == "az" || lang == "lt") {
 572             return toLowerCaseEx(str, value, result, first, locale, true);
 573         }
 574         if (hasSurr) {
 575             return toLowerCaseEx(str, value, result, first, locale, false);
 576         }
 577         int bits = 0;
 578         for (int i = first; i < len; i++) {
 579             int cp = (int)getChar(value, i);
 580             if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
 581                 Character.isSurrogate((char)cp)) {
 582                 return toLowerCaseEx(str, value, result, i, locale, false);
 583             }
 584             if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
 585                 return toLowerCaseEx(str, value, result, i, locale, true);
 586             }
 587             cp = Character.toLowerCase(cp);
 588             if (!Character.isBmpCodePoint(cp)) {
 589                 return toLowerCaseEx(str, value, result, i, locale, false);
 590             }
 591             bits |= cp;
 592             putChar(result, i, cp);
 593         }
 594         if (bits > 0xFF) {
 595             return new String(result, UTF16);
 596         } else {
 597             return newString(result, 0, len);
 598         }
 599     }
 600 
 601     private static String toLowerCaseEx(String str, byte[] value,
 602                                         byte[] result, int first, Locale locale,
 603                                         boolean localeDependent) {
 604         int resultOffset = first;
 605         int length = value.length >> 1;
 606         int srcCount;
 607         for (int i = first; i < length; i += srcCount) {
 608             int srcChar = getChar(value, i);
 609             int lowerChar;
 610             char[] lowerCharArray;
 611             srcCount = 1;
 612             if (Character.isSurrogate((char)srcChar)) {
 613                 srcChar = codePointAt(value, i, length);
 614                 srcCount = Character.charCount(srcChar);
 615             }
 616             if (localeDependent ||
 617                 srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
 618                 srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
 619                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 620             } else {
 621                 lowerChar = Character.toLowerCase(srcChar);
 622             }
 623             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 624                 putChar(result, resultOffset++, lowerChar);
 625             } else {
 626                 if (lowerChar == Character.ERROR) {
 627                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 628                 } else {
 629                     lowerCharArray = Character.toChars(lowerChar);
 630                 }
 631                 /* Grow result if needed */
 632                 int mapLen = lowerCharArray.length;
 633                 if (mapLen > srcCount) {
 634                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 635                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 636                     result = result2;
 637                 }
 638                 for (int x = 0; x < mapLen; ++x) {
 639                     putChar(result, resultOffset++, lowerCharArray[x]);
 640                 }
 641             }
 642         }
 643         return newString(result, 0, resultOffset);
 644     }
 645 
 646     public static String toUpperCase(String str, byte[] value, Locale locale) {
 647         if (locale == null) {
 648             throw new NullPointerException();
 649         }
 650         int first;
 651         boolean hasSurr = false;
 652         final int len = value.length >> 1;
 653 
 654         // Now check if there are any characters that need to be changed, or are surrogate
 655         for (first = 0 ; first < len; first++) {
 656             int cp = (int)getChar(value, first);
 657             if (Character.isSurrogate((char)cp)) {
 658                 hasSurr = true;
 659                 break;
 660             }
 661             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 662                 break;
 663             }
 664         }
 665         if (first == len) {
 666             return str;
 667         }
 668         byte[] result = new byte[value.length];
 669         System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
 670                                                            // upperCase characters.
 671         String lang = locale.getLanguage();
 672         if (lang == "tr" || lang == "az" || lang == "lt") {
 673             return toUpperCaseEx(str, value, result, first, locale, true);
 674         }
 675         if (hasSurr) {
 676             return toUpperCaseEx(str, value, result, first, locale, false);
 677         }
 678         int bits = 0;
 679         for (int i = first; i < len; i++) {
 680             int cp = (int)getChar(value, i);
 681             if (Character.isSurrogate((char)cp)) {
 682                 return toUpperCaseEx(str, value, result, i, locale, false);
 683             }
 684             cp = Character.toUpperCaseEx(cp);
 685             if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
 686                 return toUpperCaseEx(str, value, result, i, locale, false);
 687             }
 688             bits |= cp;
 689             putChar(result, i, cp);
 690         }
 691         if (bits > 0xFF) {
 692             return new String(result, UTF16);
 693         } else {
 694             return newString(result, 0, len);
 695         }
 696     }
 697 
 698     private static String toUpperCaseEx(String str, byte[] value,
 699                                         byte[] result, int first,
 700                                         Locale locale, boolean localeDependent)
 701     {
 702         int resultOffset = first;
 703         int length = value.length >> 1;
 704         int srcCount;
 705         for (int i = first; i < length; i += srcCount) {
 706             int srcChar = getChar(value, i);
 707             int upperChar;
 708             char[] upperCharArray;
 709             srcCount = 1;
 710             if (Character.isSurrogate((char)srcChar)) {
 711                 srcChar = codePointAt(value, i, length);
 712                 srcCount = Character.charCount(srcChar);
 713             }
 714             if (localeDependent) {
 715                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 716             } else {
 717                 upperChar = Character.toUpperCaseEx(srcChar);
 718             }
 719             if (Character.isBmpCodePoint(upperChar)) {
 720                 putChar(result, resultOffset++, upperChar);
 721             } else {
 722                 if (upperChar == Character.ERROR) {
 723                     if (localeDependent) {
 724                         upperCharArray =
 725                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 726                     } else {
 727                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 728                     }
 729                 } else {
 730                     upperCharArray = Character.toChars(upperChar);
 731                 }
 732                 /* Grow result if needed */
 733                 int mapLen = upperCharArray.length;
 734                 if (mapLen > srcCount) {
 735                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 736                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 737                     result = result2;
 738                  }
 739                  for (int x = 0; x < mapLen; ++x) {
 740                     putChar(result, resultOffset++, upperCharArray[x]);
 741                  }
 742             }
 743         }
 744         return newString(result, 0, resultOffset);
 745     }
 746 
 747     public static String trim(byte[] value) {
 748         int length = value.length >> 1;
 749         int len = length;
 750         int st = 0;
 751         while (st < len && getChar(value, st) <= ' ') {
 752             st++;
 753         }
 754         while (st < len && getChar(value, len - 1) <= ' ') {
 755             len--;
 756         }
 757         return ((st > 0) || (len < length )) ?
 758             new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
 759             null;
 760     }
 761 
 762     public static void putChars(byte[] val, int index, char[] str, int off, int end) {
 763         while (off < end) {
 764             putChar(val, index++, str[off++]);
 765         }
 766     }
 767 
 768     public static String newString(byte[] val, int index, int len) {
 769         if (String.COMPACT_STRINGS) {
 770             byte[] buf = compress(val, index, len);
 771             if (buf != null) {
 772                 return new String(buf, LATIN1);
 773             }
 774         }
 775         int last = index + len;
 776         return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
 777     }
 778 
 779     public static void fillNull(byte[] val, int index, int end) {
 780         Arrays.fill(val, index << 1, end << 1, (byte)0);
 781     }
 782 
 783     static class CharsSpliterator implements Spliterator.OfInt {
 784         private final byte[] array;
 785         private int index;        // current index, modified on advance/split
 786         private final int fence;  // one past last index
 787         private final int cs;
 788 
 789         CharsSpliterator(byte[] array, int acs) {
 790             this(array, 0, array.length >> 1, acs);
 791         }
 792 
 793         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 794             this.array = array;
 795             this.index = origin;
 796             this.fence = fence;
 797             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 798                       | Spliterator.SUBSIZED;
 799         }
 800 
 801         @Override
 802         public OfInt trySplit() {
 803             int lo = index, mid = (lo + fence) >>> 1;
 804             return (lo >= mid)
 805                    ? null
 806                    : new CharsSpliterator(array, lo, index = mid, cs);
 807         }
 808 
 809         @Override
 810         public void forEachRemaining(IntConsumer action) {
 811             byte[] a; int i, hi; // hoist accesses and checks from loop
 812             if (action == null)
 813                 throw new NullPointerException();
 814             if (((a = array).length >> 1) >= (hi = fence) &&
 815                 (i = index) >= 0 && i < (index = hi)) {
 816                 do {
 817                     action.accept(charAt(a, i));
 818                 } while (++i < hi);
 819             }
 820         }
 821 
 822         @Override
 823         public boolean tryAdvance(IntConsumer action) {
 824             if (action == null)
 825                 throw new NullPointerException();
 826             int i = index;
 827             if (i >= 0 && i < fence) {
 828                 action.accept(charAt(array, i));
 829                 index++;
 830                 return true;
 831             }
 832             return false;
 833         }
 834 
 835         @Override
 836         public long estimateSize() { return (long)(fence - index); }
 837 
 838         @Override
 839         public int characteristics() {
 840             return cs;
 841         }
 842     }
 843 
 844     static class CodePointsSpliterator implements Spliterator.OfInt {
 845         private final byte[] array;
 846         private int index;        // current index, modified on advance/split
 847         private final int fence;  // one past last index
 848         private final int cs;
 849 
 850         CodePointsSpliterator(byte[] array, int acs) {
 851             this(array, 0, array.length >> 1, acs);
 852         }
 853 
 854         CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
 855             this.array = array;
 856             this.index = origin;
 857             this.fence = fence;
 858             this.cs = acs | Spliterator.ORDERED;
 859         }
 860 
 861         @Override
 862         public OfInt trySplit() {
 863             int lo = index, mid = (lo + fence) >>> 1;
 864             if (lo >= mid)
 865                 return null;
 866 
 867             int midOneLess;
 868             // If the mid-point intersects a surrogate pair
 869             if (Character.isLowSurrogate(charAt(array, mid)) &&
 870                 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) {
 871                 // If there is only one pair it cannot be split
 872                 if (lo >= midOneLess)
 873                     return null;
 874                 // Shift the mid-point to align with the surrogate pair
 875                 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
 876             }
 877             return new CodePointsSpliterator(array, lo, index = mid, cs);
 878         }
 879 
 880         @Override
 881         public void forEachRemaining(IntConsumer action) {
 882             byte[] a; int i, hi; // hoist accesses and checks from loop
 883             if (action == null)
 884                 throw new NullPointerException();
 885             if (((a = array).length >> 1) >= (hi = fence) &&
 886                 (i = index) >= 0 && i < (index = hi)) {
 887                 do {
 888                     i = advance(a, i, hi, action);
 889                 } while (i < hi);
 890             }
 891         }
 892 
 893         @Override
 894         public boolean tryAdvance(IntConsumer action) {
 895             if (action == null)
 896                 throw new NullPointerException();
 897             if (index >= 0 && index < fence) {
 898                 index = advance(array, index, fence, action);
 899                 return true;
 900             }
 901             return false;
 902         }
 903 
 904         // Advance one code point from the index, i, and return the next
 905         // index to advance from
 906         private static int advance(byte[] a, int i, int hi, IntConsumer action) {
 907             char c1 = charAt(a, i++);
 908             int cp = c1;
 909             if (Character.isHighSurrogate(c1) && i < hi) {
 910                 char c2 = charAt(a, i);
 911                 if (Character.isLowSurrogate(c2)) {
 912                     i++;
 913                     cp = Character.toCodePoint(c1, c2);
 914                 }
 915             }
 916             action.accept(cp);
 917             return i;
 918         }
 919 
 920         @Override
 921         public long estimateSize() { return (long)(fence - index); }
 922 
 923         @Override
 924         public int characteristics() {
 925             return cs;
 926         }
 927     }
 928 
 929     ////////////////////////////////////////////////////////////////
 930 
 931     public static void putCharSB(byte[] val, int index, int c) {
 932         checkIndex(index, val.length >> 1);
 933         putChar(val, index, c);
 934     }
 935 
 936     public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
 937         checkBoundsOffCount(index, index + end - off, val.length >> 1);
 938         putChars(val, index, ca, off, end);
 939     }
 940 
 941     public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
 942         checkBoundsOffCount(index, end - off, val.length >> 1);
 943         for (int i = off; i < end; i++) {
 944             putChar(val, index++, s.charAt(i));
 945         }
 946     }
 947 
 948     public static int codePointAtSB(byte[] val, int index, int end) {
 949         checkOffset(end, val.length >> 1);
 950         return codePointAt(val, index, end);
 951     }
 952 
 953     public static int codePointBeforeSB(byte[] val, int index) {
 954         checkOffset(index, val.length >> 1);
 955         return codePointBefore(val, index);
 956     }
 957 
 958     public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
 959         checkBoundsBeginEnd(beginIndex, endIndex, val.length >> 1);
 960         return codePointCount(val, beginIndex, endIndex);
 961     }
 962 
 963     ////////////////////////////////////////////////////////////////
 964 
 965     private static native boolean isBigEndian();
 966 
 967     static final int HI_BYTE_SHIFT;
 968     static final int LO_BYTE_SHIFT;
 969     static {
 970         if (isBigEndian()) {
 971             HI_BYTE_SHIFT = 8;
 972             LO_BYTE_SHIFT = 0;
 973         } else {
 974             HI_BYTE_SHIFT = 0;
 975             LO_BYTE_SHIFT = 8;
 976         }
 977     }
 978 
 979     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
 980 }