Old src/java.base/share/classes/java/lang/StringUTF16.java

   1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Spliterator;
  31 import java.util.function.IntConsumer;
  32 import jdk.internal.HotSpotIntrinsicCandidate;
  33 
  34 import static java.lang.String.UTF16;
  35 import static java.lang.String.LATIN1;
  36 import static java.lang.String.checkIndex;
  37 import static java.lang.String.checkOffset;
  38 import static java.lang.String.checkBoundsOffCount;
  39 
  40 final class StringUTF16 {
  41 
  42     public static byte[] newBytesFor(int len) {
  43         if (len < 0) {
  44             throw new NegativeArraySizeException();
  45         }
  46         if (len > MAX_LENGTH) {
  47             throw new OutOfMemoryError("UTF16 String size is " + len +
  48                                        ", should be less than " + MAX_LENGTH);
  49         }
  50         return new byte[len << 1];
  51     }
  52 
  53     @HotSpotIntrinsicCandidate
  54     public static void putChar(byte[] val, int index, int c) {
  55         index <<= 1;
  56         val[index++] = (byte)(c >> HI_BYTE_SHIFT);
  57         val[index]   = (byte)(c >> LO_BYTE_SHIFT);
  58     }
  59 
  60     @HotSpotIntrinsicCandidate
  61     public static char getChar(byte[] val, int index) {
  62         index <<= 1;
  63         return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
  64                       ((val[index]   & 0xff) << LO_BYTE_SHIFT));
  65     }
  66 
  67     public static char charAt(byte[] value, int index) {
  68         if (index < 0 || index >= value.length >> 1) {
  69             throw new StringIndexOutOfBoundsException(index);
  70         }
  71         return getChar(value, index);
  72     }
  73 
  74     public static int length(byte[] value) {
  75         return value.length >> 1;
  76     }
  77 
  78     public static int codePointAt(byte[] value, int index, int end) {
  79         char c1 = getChar(value, index);
  80         if (Character.isHighSurrogate(c1) && ++index < end) {
  81             char c2 = getChar(value, index);
  82             if (Character.isLowSurrogate(c2)) {
  83                return Character.toCodePoint(c1, c2);
  84             }
  85         }
  86         return c1;
  87     }
  88 
  89     public static int codePointBefore(byte[] value, int index) {
  90         char c2 = getChar(value, --index);
  91         if (Character.isLowSurrogate(c2) && index > 0) {
  92             char c1 = getChar(value, --index);
  93             if (Character.isHighSurrogate(c1)) {
  94                return Character.toCodePoint(c1, c2);
  95             }
  96         }
  97         return c2;
  98     }
  99 
 100     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
 101         int count = endIndex - beginIndex;
 102         for (int i = beginIndex; i < endIndex; ) {
 103             if (Character.isHighSurrogate(getChar(value, i++)) &&
 104                 i < endIndex &&
 105                 Character.isLowSurrogate(getChar(value, i))) {
 106                 count--;
 107                 i++;
 108             }
 109         }
 110         return count;
 111     }
 112 
 113     public static char[] toChars(byte[] value) {
 114         char[] dst = new char[value.length >> 1];
 115         getChars(value, 0, dst.length, dst, 0);
 116         return dst;
 117     }
 118 
 119     @HotSpotIntrinsicCandidate
 120     public static byte[] toBytes(char[] value, int off, int len) {
 121         byte[] val = newBytesFor(len);
 122         for (int i = 0; i < len; i++) {
 123             putChar(val, i, value[off++]);
 124         }
 125         return val;
 126     }
 127 
 128     public static byte[] compress(char[] val, int off, int len) {
 129         byte[] ret = new byte[len];
 130         if (compress(val, off, ret, 0, len) == len) {
 131             return ret;
 132         }
 133         return null;
 134     }
 135 
 136     public static byte[] compress(byte[] val, int off, int len) {
 137         byte[] ret = new byte[len];
 138         if (compress(val, off, ret, 0, len) == len) {
 139             return ret;
 140         }
 141         return null;
 142     }
 143 
 144     // compressedCopy char[] -> byte[]
 145     @HotSpotIntrinsicCandidate
 146     private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
 147         for (int i = 0; i < len; i++) {
 148             int c = src[srcOff++];
 149             if (c >>> 8 != 0) {
 150                 return 0;
 151             }
 152             dst[dstOff++] = (byte)c;
 153         }
 154         return len;
 155     }
 156 
 157     // compressedCopy byte[] -> byte[]
 158     @HotSpotIntrinsicCandidate
 159     public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 160         // We need a range check here because 'getChar' has no checks
 161         checkBoundsOffCount(srcOff, len, src.length);
 162         for (int i = 0; i < len; i++) {
 163             int c = getChar(src, srcOff++);
 164             if (c >>> 8 != 0) {
 165                 return 0;
 166             }
 167             dst[dstOff++] = (byte)c;
 168         }
 169         return len;
 170     }
 171 
 172     public static byte[] toBytes(int[] val, int index, int len) {
 173         final int end = index + len;
 174         // Pass 1: Compute precise size of char[]
 175         int n = len;
 176         for (int i = index; i < end; i++) {
 177             int cp = val[i];
 178             if (Character.isBmpCodePoint(cp))
 179                 continue;
 180             else if (Character.isValidCodePoint(cp))
 181                 n++;
 182             else throw new IllegalArgumentException(Integer.toString(cp));
 183         }
 184         // Pass 2: Allocate and fill in <high, low> pair
 185         byte[] buf = newBytesFor(n);
 186         for (int i = index, j = 0; i < end; i++, j++) {
 187             int cp = val[i];
 188             if (Character.isBmpCodePoint(cp)) {
 189                 putChar(buf, j, cp);
 190             } else {
 191                 putChar(buf, j++, Character.highSurrogate(cp));
 192                 putChar(buf, j, Character.lowSurrogate(cp));
 193             }
 194         }
 195         return buf;
 196     }
 197 
 198     public static byte[] toBytes(char c) {
 199         byte[] result = new byte[2];
 200         putChar(result, 0, c);
 201         return result;
 202     }
 203 
 204     @HotSpotIntrinsicCandidate
 205     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 206         // We need a range check here because 'getChar' has no checks
 207         checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value.length);
 208         for (int i = srcBegin; i < srcEnd; i++) {
 209             dst[dstBegin++] = getChar(value, i);
 210         }
 211     }
 212 
 213     /* @see java.lang.String.getBytes(int, int, byte[], int) */
 214     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 215         srcBegin <<= 1;
 216         srcEnd <<= 1;
 217         for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
 218             dst[dstBegin++] = value[i];
 219         }
 220     }
 221 
 222     @HotSpotIntrinsicCandidate
 223     public static boolean equals(byte[] value, byte[] other) {
 224         if (value.length == other.length) {
 225             int len = value.length >> 1;
 226             for (int i = 0; i < len; i++) {
 227                 if (getChar(value, i) != getChar(other, i)) {
 228                     return false;
 229                 }
 230             }
 231             return true;
 232         }
 233         return false;
 234     }
 235 
 236     @HotSpotIntrinsicCandidate
 237     public static int compareTo(byte[] value, byte[] other) {
 238         int len1 = length(value);
 239         int len2 = length(other);
 240         int lim = Math.min(len1, len2);
 241         for (int k = 0; k < lim; k++) {
 242             char c1 = getChar(value, k);
 243             char c2 = getChar(other, k);
 244             if (c1 != c2) {
 245                 return c1 - c2;
 246             }
 247         }
 248         return len1 - len2;
 249     }
 250 
 251     @HotSpotIntrinsicCandidate
 252     public static int compareToLatin1(byte[] value, byte[] other) {
 253         int len1 = length(value);
 254         int len2 = StringLatin1.length(other);
 255         int lim = Math.min(len1, len2);
 256         for (int k = 0; k < lim; k++) {
 257             char c1 = getChar(value, k);
 258             char c2 = StringLatin1.getChar(other, k);
 259             if (c1 != c2) {
 260                 return c1 - c2;
 261             }
 262         }
 263         return len1 - len2;
 264     }
 265 
 266     public static int hashCode(byte[] value) {
 267         int h = 0;
 268         int length = value.length >> 1;
 269         for (int i = 0; i < length; i++) {
 270             h = 31 * h + getChar(value, i);
 271         }
 272         return h;
 273     }
 274 
 275     public static int indexOf(byte[] value, int ch, int fromIndex) {
 276         int max = value.length >> 1;
 277         if (fromIndex < 0) {
 278             fromIndex = 0;
 279         } else if (fromIndex >= max) {
 280             // Note: fromIndex might be near -1>>>1.
 281             return -1;
 282         }
 283         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 284             // handle most cases here (ch is a BMP code point or a
 285             // negative value (invalid code point))
 286             return indexOfChar(value, ch, fromIndex, max);
 287         } else {
 288             return indexOfSupplementary(value, ch, fromIndex, max);
 289         }
 290     }
 291 
 292     @HotSpotIntrinsicCandidate
 293     public static int indexOf(byte[] value, byte[] str) {
 294         if (str.length == 0) {
 295             return 0;
 296         }
 297         if (value.length == 0) {
 298             return -1;
 299         }
 300         return indexOf(value, length(value), str, length(str), 0);
 301     }
 302 
 303     @HotSpotIntrinsicCandidate
 304     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 305         char first = getChar(str, 0);
 306         int max = (valueCount - strCount);
 307         for (int i = fromIndex; i <= max; i++) {
 308             // Look for first character.
 309             if (getChar(value, i) != first) {
 310                 while (++i <= max && getChar(value, i) != first);
 311             }
 312             // Found first character, now look at the rest of value
 313             if (i <= max) {
 314                 int j = i + 1;
 315                 int end = j + strCount - 1;
 316                 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
 317                 if (j == end) {
 318                     // Found whole string.
 319                     return i;
 320                 }
 321             }
 322         }
 323         return -1;
 324     }
 325 
 326     /**
 327      * Handles indexOf Latin1 substring in UTF16 string.
 328      */
 329     @HotSpotIntrinsicCandidate
 330     public static int indexOfLatin1(byte[] value, byte[] str) {
 331         if (str.length == 0) {
 332             return 0;
 333         }
 334         if (value.length == 0) {
 335             return -1;
 336         }
 337         return indexOfLatin1(value, length(value), str, str.length, 0);
 338     }
 339 
 340     @HotSpotIntrinsicCandidate
 341     public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 342         char first = (char)(tgt[0] & 0xff);
 343         int max = (srcCount - tgtCount);
 344         for (int i = fromIndex; i <= max; i++) {
 345             // Look for first character.
 346             if (getChar(src, i) != first) {
 347                 while (++i <= max && getChar(src, i) != first);
 348             }
 349             // Found first character, now look at the rest of v2
 350             if (i <= max) {
 351                 int j = i + 1;
 352                 int end = j + tgtCount - 1;
 353                 for (int k = 1;
 354                      j < end && getChar(src, j) == (tgt[k] & 0xff);
 355                      j++, k++);
 356                 if (j == end) {
 357                     // Found whole string.
 358                     return i;
 359                 }
 360             }
 361         }
 362         return -1;
 363     }
 364 
 365     @HotSpotIntrinsicCandidate
 366     private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
 367         for (int i = fromIndex; i < max; i++) {
 368             if (getChar(value, i) == ch) {
 369                 return i;
 370             }
 371         }
 372         return -1;
 373     }
 374 
 375     /**
 376      * Handles (rare) calls of indexOf with a supplementary character.
 377      */
 378     private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
 379         if (Character.isValidCodePoint(ch)) {
 380             final char hi = Character.highSurrogate(ch);
 381             final char lo = Character.lowSurrogate(ch);
 382             for (int i = fromIndex; i < max - 1; i++) {
 383                 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
 384                     return i;
 385                 }
 386             }
 387         }
 388         return -1;
 389     }
 390 
 391     public static int lastIndexOf(byte[] src, int srcCount,
 392                                   byte[] tgt, int tgtCount, int fromIndex) {
 393         int min = tgtCount - 1;
 394         int i = min + fromIndex;
 395         int strLastIndex = tgtCount - 1;
 396         char strLastChar = getChar(tgt, strLastIndex);
 397 
 398     startSearchForLastChar:
 399         while (true) {
 400             while (i >= min && getChar(src, i) != strLastChar) {
 401                 i--;
 402             }
 403             if (i < min) {
 404                 return -1;
 405             }
 406             int j = i - 1;
 407             int start = j - strLastIndex;
 408             int k = strLastIndex - 1;
 409             while (j > start) {
 410                 if (getChar(src, j--) != getChar(tgt, k--)) {
 411                     i--;
 412                     continue startSearchForLastChar;
 413                 }
 414             }
 415             return start + 1;
 416         }
 417     }
 418 
 419     public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
 420         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 421             // handle most cases here (ch is a BMP code point or a
 422             // negative value (invalid code point))
 423             int i = Math.min(fromIndex, (value.length >> 1) - 1);
 424             for (; i >= 0; i--) {
 425                 if (getChar(value, i) == ch) {
 426                     return i;
 427                 }
 428             }
 429             return -1;
 430         } else {
 431             return lastIndexOfSupplementary(value, ch, fromIndex);
 432         }
 433     }
 434 
 435     /**
 436      * Handles (rare) calls of lastIndexOf with a supplementary character.
 437      */
 438     private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
 439         if (Character.isValidCodePoint(ch)) {
 440             char hi = Character.highSurrogate(ch);
 441             char lo = Character.lowSurrogate(ch);
 442             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 443             for (; i >= 0; i--) {
 444                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 445                     return i;
 446                 }
 447             }
 448         }
 449         return -1;
 450     }
 451 
 452     public static String replace(byte[] value, char oldChar, char newChar) {
 453         int len = value.length >> 1;
 454         int i = -1;
 455         while (++i < len) {
 456             if (getChar(value, i) == oldChar) {
 457                 break;
 458             }
 459         }
 460         if (i < len) {
 461             byte buf[] = new byte[value.length];
 462             for (int j = 0; j < i; j++) {
 463                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 464             }
 465             while (i < len) {
 466                 char c = getChar(value, i);
 467                 putChar(buf, i, c == oldChar ? newChar : c);
 468                 i++;
 469            }
 470            // Check if we should try to compress to latin1
 471            if (String.COMPACT_STRINGS &&
 472                !StringLatin1.canEncode(oldChar) &&
 473                StringLatin1.canEncode(newChar)) {
 474                byte[] val = compress(buf, 0, len);
 475                if (val != null) {
 476                    return new String(val, LATIN1);
 477                }
 478            }
 479            return new String(buf, UTF16);
 480         }
 481         return null;
 482     }
 483 
 484     public static boolean regionMatchesCI(byte[] value, int toffset,
 485                                           byte[] other, int ooffset, int len) {
 486         int last = toffset + len;
 487         while (toffset < last) {
 488             char c1 = getChar(value, toffset++);
 489             char c2 = getChar(other, ooffset++);
 490             if (c1 == c2) {
 491                 continue;
 492             }
 493             // try converting both characters to uppercase.
 494             // If the results match, then the comparison scan should
 495             // continue.
 496             char u1 = Character.toUpperCase(c1);
 497             char u2 = Character.toUpperCase(c2);
 498             if (u1 == u2) {
 499                 continue;
 500             }
 501             // Unfortunately, conversion to uppercase does not work properly
 502             // for the Georgian alphabet, which has strange rules about case
 503             // conversion.  So we need to make one last check before
 504             // exiting.
 505             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 506                 continue;
 507             }
 508             return false;
 509         }
 510         return true;
 511     }
 512 
 513     public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
 514                                                  byte[] other, int ooffset,
 515                                                  int len) {
 516         int last = toffset + len;
 517         while (toffset < last) {
 518             char c1 = getChar(value, toffset++);
 519             char c2 = (char)(other[ooffset++] & 0xff);
 520             if (c1 == c2) {
 521                 continue;
 522             }
 523             char u1 = Character.toUpperCase(c1);
 524             char u2 = Character.toUpperCase(c2);
 525             if (u1 == u2) {
 526                 continue;
 527             }
 528             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 529                 continue;
 530             }
 531             return false;
 532         }
 533         return true;
 534     }
 535 
 536     public static String toLowerCase(String str, byte[] value, Locale locale) {
 537         if (locale == null) {
 538             throw new NullPointerException();
 539         }
 540         int first;
 541         boolean hasSurr = false;
 542         final int len = value.length >> 1;
 543 
 544         // Now check if there are any characters that need to be changed, or are surrogate
 545         for (first = 0 ; first < len; first++) {
 546             int cp = (int)getChar(value, first);
 547             if (Character.isSurrogate((char)cp)) {
 548                 hasSurr = true;
 549                 break;
 550             }
 551             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 552                 break;
 553             }
 554         }
 555         if (first == len)
 556             return str;
 557         byte[] result = new byte[value.length];
 558         System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
 559                                                             // lowerCase characters.
 560         String lang = locale.getLanguage();
 561         if (lang == "tr" || lang == "az" || lang == "lt") {
 562             return toLowerCaseEx(str, value, result, first, locale, true);
 563         }
 564         if (hasSurr) {
 565             return toLowerCaseEx(str, value, result, first, locale, false);
 566         }
 567         int bits = 0;
 568         for (int i = first; i < len; i++) {
 569             int cp = (int)getChar(value, i);
 570             if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
 571                 Character.isSurrogate((char)cp)) {
 572                 return toLowerCaseEx(str, value, result, i, locale, false);
 573             }
 574             if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
 575                 return toLowerCaseEx(str, value, result, i, locale, true);
 576             }
 577             cp = Character.toLowerCase(cp);
 578             if (!Character.isBmpCodePoint(cp)) {
 579                 return toLowerCaseEx(str, value, result, i, locale, false);
 580             }
 581             bits |= cp;
 582             putChar(result, i, cp);
 583         }
 584         if (bits >>> 8 != 0) {
 585             return new String(result, UTF16);
 586         } else {
 587             return newString(result, 0, len);
 588         }
 589     }
 590 
 591     private static String toLowerCaseEx(String str, byte[] value,
 592                                         byte[] result, int first, Locale locale,
 593                                         boolean localeDependent) {
 594         int resultOffset = first;
 595         int length = value.length >> 1;
 596         int srcCount;
 597         for (int i = first; i < length; i += srcCount) {
 598             int srcChar = getChar(value, i);
 599             int lowerChar;
 600             char[] lowerCharArray;
 601             srcCount = 1;
 602             if (Character.isSurrogate((char)srcChar)) {
 603                 srcChar = codePointAt(value, i, length);
 604                 srcCount = Character.charCount(srcChar);
 605             }
 606             if (localeDependent ||
 607                 srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
 608                 srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
 609                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 610             } else {
 611                 lowerChar = Character.toLowerCase(srcChar);
 612             }
 613             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 614                 putChar(result, resultOffset++, lowerChar);
 615             } else {
 616                 if (lowerChar == Character.ERROR) {
 617                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 618                 } else {
 619                     lowerCharArray = Character.toChars(lowerChar);
 620                 }
 621                 /* Grow result if needed */
 622                 int mapLen = lowerCharArray.length;
 623                 if (mapLen > srcCount) {
 624                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 625                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 626                     result = result2;
 627                 }
 628                 for (int x = 0; x < mapLen; ++x) {
 629                     putChar(result, resultOffset++, lowerCharArray[x]);
 630                 }
 631             }
 632         }
 633         return newString(result, 0, resultOffset);
 634     }
 635 
 636     public static String toUpperCase(String str, byte[] value, Locale locale) {
 637         if (locale == null) {
 638             throw new NullPointerException();
 639         }
 640         int first;
 641         boolean hasSurr = false;
 642         final int len = value.length >> 1;
 643 
 644         // Now check if there are any characters that need to be changed, or are surrogate
 645         for (first = 0 ; first < len; first++) {
 646             int cp = (int)getChar(value, first);
 647             if (Character.isSurrogate((char)cp)) {
 648                 hasSurr = true;
 649                 break;
 650             }
 651             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 652                 break;
 653             }
 654         }
 655         if (first == len) {
 656             return str;
 657         }
 658         byte[] result = new byte[value.length];
 659         System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
 660                                                            // upperCase characters.
 661         String lang = locale.getLanguage();
 662         if (lang == "tr" || lang == "az" || lang == "lt") {
 663             return toUpperCaseEx(str, value, result, first, locale, true);
 664         }
 665         if (hasSurr) {
 666             return toUpperCaseEx(str, value, result, first, locale, false);
 667         }
 668         int bits = 0;
 669         for (int i = first; i < len; i++) {
 670             int cp = (int)getChar(value, i);
 671             if (Character.isSurrogate((char)cp)) {
 672                 return toUpperCaseEx(str, value, result, i, locale, false);
 673             }
 674             cp = Character.toUpperCaseEx(cp);
 675             if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
 676                 return toUpperCaseEx(str, value, result, i, locale, false);
 677             }
 678             bits |= cp;
 679             putChar(result, i, cp);
 680         }
 681         if (bits >>> 8 != 0) {
 682             return new String(result, UTF16);
 683         } else {
 684             return newString(result, 0, len);
 685         }
 686     }
 687 
 688     private static String toUpperCaseEx(String str, byte[] value,
 689                                         byte[] result, int first,
 690                                         Locale locale, boolean localeDependent)
 691     {
 692         int resultOffset = first;
 693         int length = value.length >> 1;
 694         int srcCount;
 695         for (int i = first; i < length; i += srcCount) {
 696             int srcChar = getChar(value, i);
 697             int upperChar;
 698             char[] upperCharArray;
 699             srcCount = 1;
 700             if (Character.isSurrogate((char)srcChar)) {
 701                 srcChar = codePointAt(value, i, length);
 702                 srcCount = Character.charCount(srcChar);
 703             }
 704             if (localeDependent) {
 705                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 706             } else {
 707                 upperChar = Character.toUpperCaseEx(srcChar);
 708             }
 709             if (Character.isBmpCodePoint(upperChar)) {
 710                 putChar(result, resultOffset++, upperChar);
 711             } else {
 712                 if (upperChar == Character.ERROR) {
 713                     if (localeDependent) {
 714                         upperCharArray =
 715                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 716                     } else {
 717                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 718                     }
 719                 } else {
 720                     upperCharArray = Character.toChars(upperChar);
 721                 }
 722                 /* Grow result if needed */
 723                 int mapLen = upperCharArray.length;
 724                 if (mapLen > srcCount) {
 725                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 726                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 727                     result = result2;
 728                  }
 729                  for (int x = 0; x < mapLen; ++x) {
 730                     putChar(result, resultOffset++, upperCharArray[x]);
 731                  }
 732             }
 733         }
 734         return newString(result, 0, resultOffset);
 735     }
 736 
 737     public static String trim(byte[] value) {
 738         int length = value.length >> 1;
 739         int len = length;
 740         int st = 0;
 741         while (st < len && getChar(value, st) <= ' ') {
 742             st++;
 743         }
 744         while (st < len && getChar(value, len - 1) <= ' ') {
 745             len--;
 746         }
 747         return ((st > 0) || (len < length )) ?
 748             new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
 749             null;
 750     }
 751 
 752     public static void putChars(byte[] val, int index, char[] str, int off, int end) {
 753         while (off < end) {
 754             putChar(val, index++, str[off++]);
 755         }
 756     }
 757 
 758     public static String newString(byte[] val, int index, int len) {
 759         if (String.COMPACT_STRINGS) {
 760             byte[] buf = compress(val, index, len);
 761             if (buf != null) {
 762                 return new String(buf, LATIN1);
 763             }
 764         }
 765         int last = index + len;
 766         return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
 767     }
 768 
 769     public static void fillNull(byte[] val, int index, int end) {
 770         Arrays.fill(val, index << 1, end << 1, (byte)0);
 771     }
 772 
 773     static class CharsSpliterator implements Spliterator.OfInt {
 774         private final byte[] array;
 775         private int index;        // current index, modified on advance/split
 776         private final int fence;  // one past last index
 777         private final int cs;
 778 
 779         CharsSpliterator(byte[] array, int acs) {
 780             this(array, 0, array.length >> 1, acs);
 781         }
 782 
 783         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 784             this.array = array;
 785             this.index = origin;
 786             this.fence = fence;
 787             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 788                       | Spliterator.SUBSIZED;
 789         }
 790 
 791         @Override
 792         public OfInt trySplit() {
 793             int lo = index, mid = (lo + fence) >>> 1;
 794             return (lo >= mid)
 795                    ? null
 796                    : new CharsSpliterator(array, lo, index = mid, cs);
 797         }
 798 
 799         @Override
 800         public void forEachRemaining(IntConsumer action) {
 801             byte[] a; int i, hi; // hoist accesses and checks from loop
 802             if (action == null)
 803                 throw new NullPointerException();
 804             if (((a = array).length >> 1) >= (hi = fence) &&
 805                 (i = index) >= 0 && i < (index = hi)) {
 806                 do { action.accept(getChar(a, i)); } while (++i < hi);
 807             }
 808         }
 809 
 810         @Override
 811         public boolean tryAdvance(IntConsumer action) {
 812             if (action == null)
 813                 throw new NullPointerException();
 814             if (index >= 0 && index < fence) {
 815                 action.accept(getChar(array, index++));
 816                 return true;
 817             }
 818             return false;
 819         }
 820 
 821         @Override
 822         public long estimateSize() { return (long)(fence - index); }
 823 
 824         @Override
 825         public int characteristics() {
 826             return cs;
 827         }
 828     }
 829 
 830     static class CodePointsSpliterator implements Spliterator.OfInt {
 831         private final byte[] array;
 832         private int index;        // current index, modified on advance/split
 833         private final int fence;  // one past last index
 834         private final int cs;
 835 
 836         CodePointsSpliterator(byte[] array, int acs) {
 837             this(array, 0, array.length >> 1, acs);
 838         }
 839 
 840         CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
 841             this.array = array;
 842             this.index = origin;
 843             this.fence = fence;
 844             this.cs = acs | Spliterator.ORDERED;
 845         }
 846 
 847         @Override
 848         public OfInt trySplit() {
 849             int lo = index, mid = (lo + fence) >>> 1;
 850             if (lo >= mid)
 851                 return null;
 852 
 853             int midOneLess;
 854             // If the mid-point intersects a surrogate pair
 855             if (Character.isLowSurrogate(getChar(array, mid)) &&
 856                 Character.isHighSurrogate(getChar(array, midOneLess = (mid -1)))) {
 857                 // If there is only one pair it cannot be split
 858                 if (lo >= midOneLess)
 859                     return null;
 860                 // Shift the mid-point to align with the surrogate pair
 861                 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
 862             }
 863             return new CodePointsSpliterator(array, lo, index = mid, cs);
 864         }
 865 
 866         @Override
 867         public void forEachRemaining(IntConsumer action) {
 868             byte[] a; int i, hi; // hoist accesses and checks from loop
 869             if (action == null)
 870                 throw new NullPointerException();
 871             if (((a = array).length >> 1) >= (hi = fence) &&
 872                 (i = index) >= 0 && i < (index = hi)) {
 873                 do {
 874                     i = advance(a, i, hi, action);
 875                 } while (i < hi);
 876             }
 877         }
 878 
 879         @Override
 880         public boolean tryAdvance(IntConsumer action) {
 881             if (action == null)
 882                 throw new NullPointerException();
 883             if (index >= 0 && index < fence) {
 884                 index = advance(array, index, fence, action);
 885                 return true;
 886             }
 887             return false;
 888         }
 889 
 890         // Advance one code point from the index, i, and return the next
 891         // index to advance from
 892         private static int advance(byte[] a, int i, int hi, IntConsumer action) {
 893             char c1 = getChar(a, i++);
 894             int cp = c1;
 895             if (Character.isHighSurrogate(c1) && i < hi) {
 896                 char c2 = getChar(a, i);
 897                 if (Character.isLowSurrogate(c2)) {
 898                     i++;
 899                     cp = Character.toCodePoint(c1, c2);
 900                 }
 901             }
 902             action.accept(cp);
 903             return i;
 904         }
 905 
 906         @Override
 907         public long estimateSize() { return (long)(fence - index); }
 908 
 909         @Override
 910         public int characteristics() {
 911             return cs;
 912         }
 913     }
 914 
 915     ////////////////////////////////////////////////////////////////
 916 
 917     public static void putCharSB(byte[] val, int index, int c) {
 918         checkIndex(index, val.length >> 1);
 919         putChar(val, index, c);
 920     }
 921 
 922     public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
 923         checkOffset(index + end - off, val.length >> 1);
 924         putChars(val, index, ca, off, end);
 925     }
 926 
 927     public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
 928         checkOffset(index + end - off, val.length >> 1);
 929         for (int i = off; i < end; i++) {
 930             putChar(val, index++, s.charAt(i));
 931         }
 932     }
 933 
 934     public static int codePointAtSB(byte[] val, int index, int end) {
 935         checkOffset(end, val.length >> 1);
 936         return codePointAt(val, index, end);
 937     }
 938 
 939     public static int codePointBeforeSB(byte[] val, int index) {
 940         checkOffset(index, val.length >> 1);
 941         return codePointBefore(val, index);
 942     }
 943 
 944     public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
 945         checkOffset(endIndex, val.length >> 1);
 946         return codePointCount(val, beginIndex, endIndex);
 947     }
 948 
 949     ////////////////////////////////////////////////////////////////
 950 
 951     private static native boolean isBigEndian();
 952 
 953     static final int HI_BYTE_SHIFT;
 954     static final int LO_BYTE_SHIFT;
 955     static {
 956         if (isBigEndian()) {
 957             HI_BYTE_SHIFT = 8;
 958             LO_BYTE_SHIFT = 0;
 959         } else {
 960             HI_BYTE_SHIFT = 0;
 961             LO_BYTE_SHIFT = 8;
 962         }
 963     }
 964 
 965     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
 966 }