New src/java.base/share/classes/java/lang/StringUTF16.java

   1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Spliterator;
  31 import java.util.function.IntConsumer;
  32 import jdk.internal.HotSpotIntrinsicCandidate;
  33 
  34 import static java.lang.String.UTF16;
  35 import static java.lang.String.LATIN1;
  36 import static java.lang.String.checkIndex;
  37 import static java.lang.String.checkOffset;
  38 
  39 final class StringUTF16 {
  40 
  41     public static byte[] newBytesFor(int len) {
  42         if (len < 0) {
  43             throw new NegativeArraySizeException();
  44         }
  45         if (len > MAX_LENGTH) {
  46             throw new OutOfMemoryError("UTF16 String size is " + len +
  47                                        ", should be less than " + MAX_LENGTH);
  48         }
  49         return new byte[len << 1];
  50     }
  51 
  52     @HotSpotIntrinsicCandidate
  53     public static void putChar(byte[] val, int index, int c) {
  54         index <<= 1;
  55         val[index++] = (byte)(c >> HI_BYTE_SHIFT);
  56         val[index]   = (byte)(c >> LO_BYTE_SHIFT);
  57     }
  58 
  59     @HotSpotIntrinsicCandidate
  60     public static char getChar(byte[] val, int index) {
  61         index <<= 1;
  62         return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
  63                       ((val[index]   & 0xff) << LO_BYTE_SHIFT));
  64     }
  65 
  66     public static char charAt(byte[] value, int index) {
  67         if (index < 0 || index >= value.length >> 1) {
  68             throw new StringIndexOutOfBoundsException(index);
  69         }
  70         return getChar(value, index);
  71     }
  72 
  73     public static int length(byte[] value) {
  74         return value.length >> 1;
  75     }
  76 
  77     public static int codePointAt(byte[] value, int index, int end) {
  78         char c1 = getChar(value, index);
  79         if (Character.isHighSurrogate(c1) && ++index < end) {
  80             char c2 = getChar(value, index);
  81             if (Character.isLowSurrogate(c2)) {
  82                return Character.toCodePoint(c1, c2);
  83             }
  84         }
  85         return c1;
  86     }
  87 
  88     public static int codePointBefore(byte[] value, int index) {
  89         char c2 = getChar(value, --index);
  90         if (Character.isLowSurrogate(c2) && index > 0) {
  91             char c1 = getChar(value, --index);
  92             if (Character.isHighSurrogate(c1)) {
  93                return Character.toCodePoint(c1, c2);
  94             }
  95         }
  96         return c2;
  97     }
  98 
  99     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
 100         int count = endIndex - beginIndex;
 101         for (int i = beginIndex; i < endIndex; ) {
 102             if (Character.isHighSurrogate(getChar(value, i++)) &&
 103                 i < endIndex &&
 104                 Character.isLowSurrogate(getChar(value, i))) {
 105                 count--;
 106                 i++;
 107             }
 108         }
 109         return count;
 110     }
 111 
 112     public static char[] toChars(byte[] value) {
 113         char[] dst = new char[value.length >> 1];
 114         getChars(value, 0, dst.length, dst, 0);
 115         return dst;
 116     }
 117 
 118     @HotSpotIntrinsicCandidate
 119     public static byte[] toBytes(char[] value, int off, int len) {
 120         byte[] val = newBytesFor(len);
 121         for (int i = 0; i < len; i++) {
 122             putChar(val, i, value[off++]);
 123         }
 124         return val;
 125     }
 126 
 127     public static byte[] compress(char[] val, int off, int len) {
 128         byte[] ret = new byte[len];
 129         if (compress(val, off, ret, 0, len) == len) {
 130             return ret;
 131         }
 132         return null;
 133     }
 134 
 135     public static byte[] compress(byte[] val, int off, int len) {
 136         byte[] ret = new byte[len];
 137         if (compress(val, off, ret, 0, len) == len) {
 138             return ret;
 139         }
 140         return null;
 141     }
 142 
 143     // compressedCopy char[] -> byte[]
 144     @HotSpotIntrinsicCandidate
 145     private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
 146         for (int i = 0; i < len; i++) {
 147             int c = src[srcOff++];
 148             if (c >>> 8 != 0) {
 149                 return 0;
 150             }
 151             dst[dstOff++] = (byte)c;
 152         }
 153         return len;
 154     }
 155 
 156     // compressedCopy byte[] -> byte[]
 157     @HotSpotIntrinsicCandidate
 158     public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 159         for (int i = 0; i < len; i++) {
 160             int c = getChar(src, srcOff++);
 161             if (c >>> 8 != 0) {
 162                 return 0;
 163             }
 164             dst[dstOff++] = (byte)c;
 165         }
 166         return len;
 167     }
 168 
 169     public static byte[] toBytes(int[] val, int index, int len) {
 170         final int end = index + len;
 171         // Pass 1: Compute precise size of char[]
 172         int n = len;
 173         for (int i = index; i < end; i++) {
 174             int cp = val[i];
 175             if (Character.isBmpCodePoint(cp))
 176                 continue;
 177             else if (Character.isValidCodePoint(cp))
 178                 n++;
 179             else throw new IllegalArgumentException(Integer.toString(cp));
 180         }
 181         // Pass 2: Allocate and fill in <high, low> pair
 182         byte[] buf = newBytesFor(n);
 183         for (int i = index, j = 0; i < end; i++, j++) {
 184             int cp = val[i];
 185             if (Character.isBmpCodePoint(cp)) {
 186                 putChar(buf, j, cp);
 187             } else {
 188                 putChar(buf, j++, Character.highSurrogate(cp));
 189                 putChar(buf, j, Character.lowSurrogate(cp));
 190             }
 191         }
 192         return buf;
 193     }
 194 
 195     public static byte[] toBytes(char c) {
 196         byte[] result = new byte[2];
 197         putChar(result, 0, c);
 198         return result;
 199     }
 200 
 201     @HotSpotIntrinsicCandidate
 202     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 203         for (int i = srcBegin; i < srcEnd; i++) {
 204             dst[dstBegin++] = getChar(value, i);
 205         }
 206     }
 207 
 208     /* @see java.lang.String.getBytes(int, int, byte[], int) */
 209     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 210         srcBegin <<= 1;
 211         srcEnd <<= 1;
 212         for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
 213             dst[dstBegin++] = value[i];
 214         }
 215     }
 216 
 217     @HotSpotIntrinsicCandidate
 218     public static boolean equals(byte[] value, byte[] other) {
 219         if (value.length == other.length) {
 220             int len = value.length >> 1;
 221             for (int i = 0; i < len; i++) {
 222                 if (getChar(value, i) != getChar(other, i)) {
 223                     return false;
 224                 }
 225             }
 226             return true;
 227         }
 228         return false;
 229     }
 230 
 231     @HotSpotIntrinsicCandidate
 232     public static int compareTo(byte[] value, byte[] other) {
 233         int len1 = length(value);
 234         int len2 = length(other);
 235         int lim = Math.min(len1, len2);
 236         for (int k = 0; k < lim; k++) {
 237             char c1 = getChar(value, k);
 238             char c2 = getChar(other, k);
 239             if (c1 != c2) {
 240                 return c1 - c2;
 241             }
 242         }
 243         return len1 - len2;
 244     }
 245 
 246     @HotSpotIntrinsicCandidate
 247     public static int compareToLatin1(byte[] value, byte[] other) {
 248         int len1 = length(value);
 249         int len2 = StringLatin1.length(other);
 250         int lim = Math.min(len1, len2);
 251         for (int k = 0; k < lim; k++) {
 252             char c1 = getChar(value, k);
 253             char c2 = StringLatin1.getChar(other, k);
 254             if (c1 != c2) {
 255                 return c1 - c2;
 256             }
 257         }
 258         return len1 - len2;
 259     }
 260 
 261     public static int hashCode(byte[] value) {
 262         int h = 0;
 263         int length = value.length >> 1;
 264         for (int i = 0; i < length; i++) {
 265             h = 31 * h + getChar(value, i);
 266         }
 267         return h;
 268     }
 269 
 270     public static int indexOf(byte[] value, int ch, int fromIndex) {
 271         int max = value.length >> 1;
 272         if (fromIndex < 0) {
 273             fromIndex = 0;
 274         } else if (fromIndex >= max) {
 275             // Note: fromIndex might be near -1>>>1.
 276             return -1;
 277         }
 278         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 279             // handle most cases here (ch is a BMP code point or a
 280             // negative value (invalid code point))
 281             return indexOfChar(value, ch, fromIndex, max);
 282         } else {
 283             return indexOfSupplementary(value, ch, fromIndex, max);
 284         }
 285     }
 286 
 287     @HotSpotIntrinsicCandidate
 288     public static int indexOf(byte[] value, byte[] str) {
 289         if (str.length == 0) {
 290             return 0;
 291         }
 292         if (value.length == 0) {
 293             return -1;
 294         }
 295         return indexOf(value, length(value), str, length(str), 0);
 296     }
 297 
 298     @HotSpotIntrinsicCandidate
 299     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 300         char first = getChar(str, 0);
 301         int max = (valueCount - strCount);
 302         for (int i = fromIndex; i <= max; i++) {
 303             // Look for first character.
 304             if (getChar(value, i) != first) {
 305                 while (++i <= max && getChar(value, i) != first);
 306             }
 307             // Found first character, now look at the rest of value
 308             if (i <= max) {
 309                 int j = i + 1;
 310                 int end = j + strCount - 1;
 311                 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
 312                 if (j == end) {
 313                     // Found whole string.
 314                     return i;
 315                 }
 316             }
 317         }
 318         return -1;
 319     }
 320 
 321     /**
 322      * Handles indexOf Latin1 substring in UTF16 string.
 323      */
 324     @HotSpotIntrinsicCandidate
 325     public static int indexOfLatin1(byte[] value, byte[] str) {
 326         if (str.length == 0) {
 327             return 0;
 328         }
 329         if (value.length == 0) {
 330             return -1;
 331         }
 332         return indexOfLatin1(value, length(value), str, str.length, 0);
 333     }
 334 
 335     @HotSpotIntrinsicCandidate
 336     public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 337         char first = (char)(tgt[0] & 0xff);
 338         int max = (srcCount - tgtCount);
 339         for (int i = fromIndex; i <= max; i++) {
 340             // Look for first character.
 341             if (getChar(src, i) != first) {
 342                 while (++i <= max && getChar(src, i) != first);
 343             }
 344             // Found first character, now look at the rest of v2
 345             if (i <= max) {
 346                 int j = i + 1;
 347                 int end = j + tgtCount - 1;
 348                 for (int k = 1;
 349                      j < end && getChar(src, j) == (tgt[k] & 0xff);
 350                      j++, k++);
 351                 if (j == end) {
 352                     // Found whole string.
 353                     return i;
 354                 }
 355             }
 356         }
 357         return -1;
 358     }
 359 
 360     @HotSpotIntrinsicCandidate
 361     private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
 362         for (int i = fromIndex; i < max; i++) {
 363             if (getChar(value, i) == ch) {
 364                 return i;
 365             }
 366         }
 367         return -1;
 368     }
 369 
 370     /**
 371      * Handles (rare) calls of indexOf with a supplementary character.
 372      */
 373     private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
 374         if (Character.isValidCodePoint(ch)) {
 375             final char hi = Character.highSurrogate(ch);
 376             final char lo = Character.lowSurrogate(ch);
 377             for (int i = fromIndex; i < max - 1; i++) {
 378                 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
 379                     return i;
 380                 }
 381             }
 382         }
 383         return -1;
 384     }
 385 
 386     public static int lastIndexOf(byte[] src, int srcCount,
 387                                   byte[] tgt, int tgtCount, int fromIndex) {
 388         int min = tgtCount - 1;
 389         int i = min + fromIndex;
 390         int strLastIndex = tgtCount - 1;
 391         char strLastChar = getChar(tgt, strLastIndex);
 392 
 393     startSearchForLastChar:
 394         while (true) {
 395             while (i >= min && getChar(src, i) != strLastChar) {
 396                 i--;
 397             }
 398             if (i < min) {
 399                 return -1;
 400             }
 401             int j = i - 1;
 402             int start = j - strLastIndex;
 403             int k = strLastIndex - 1;
 404             while (j > start) {
 405                 if (getChar(src, j--) != getChar(tgt, k--)) {
 406                     i--;
 407                     continue startSearchForLastChar;
 408                 }
 409             }
 410             return start + 1;
 411         }
 412     }
 413 
 414     public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
 415         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 416             // handle most cases here (ch is a BMP code point or a
 417             // negative value (invalid code point))
 418             int i = Math.min(fromIndex, (value.length >> 1) - 1);
 419             for (; i >= 0; i--) {
 420                 if (getChar(value, i) == ch) {
 421                     return i;
 422                 }
 423             }
 424             return -1;
 425         } else {
 426             return lastIndexOfSupplementary(value, ch, fromIndex);
 427         }
 428     }
 429 
 430     /**
 431      * Handles (rare) calls of lastIndexOf with a supplementary character.
 432      */
 433     private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
 434         if (Character.isValidCodePoint(ch)) {
 435             char hi = Character.highSurrogate(ch);
 436             char lo = Character.lowSurrogate(ch);
 437             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 438             for (; i >= 0; i--) {
 439                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 440                     return i;
 441                 }
 442             }
 443         }
 444         return -1;
 445     }
 446 
 447     public static String replace(byte[] value, char oldChar, char newChar) {
 448         int len = value.length >> 1;
 449         int i = -1;
 450         while (++i < len) {
 451             if (getChar(value, i) == oldChar) {
 452                 break;
 453             }
 454         }
 455         if (i < len) {
 456             byte buf[] = new byte[value.length];
 457             for (int j = 0; j < i; j++) {
 458                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 459             }
 460             while (i < len) {
 461                 char c = getChar(value, i);
 462                 putChar(buf, i, c == oldChar ? newChar : c);
 463                 i++;
 464            }
 465            // Check if we should try to compress to latin1
 466            if (String.COMPACT_STRINGS &&
 467                !StringLatin1.canEncode(oldChar) &&
 468                StringLatin1.canEncode(newChar)) {
 469                byte[] val = compress(buf, 0, len);
 470                if (val != null) {
 471                    return new String(val, LATIN1);
 472                }
 473            }
 474            return new String(buf, UTF16);
 475         }
 476         return null;
 477     }
 478 
 479     public static boolean regionMatchesCI(byte[] value, int toffset,
 480                                           byte[] other, int ooffset, int len) {
 481         int last = toffset + len;
 482         while (toffset < last) {
 483             char c1 = getChar(value, toffset++);
 484             char c2 = getChar(other, ooffset++);
 485             if (c1 == c2) {
 486                 continue;
 487             }
 488             // try converting both characters to uppercase.
 489             // If the results match, then the comparison scan should
 490             // continue.
 491             char u1 = Character.toUpperCase(c1);
 492             char u2 = Character.toUpperCase(c2);
 493             if (u1 == u2) {
 494                 continue;
 495             }
 496             // Unfortunately, conversion to uppercase does not work properly
 497             // for the Georgian alphabet, which has strange rules about case
 498             // conversion.  So we need to make one last check before
 499             // exiting.
 500             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 501                 continue;
 502             }
 503             return false;
 504         }
 505         return true;
 506     }
 507 
 508     public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
 509                                                  byte[] other, int ooffset,
 510                                                  int len) {
 511         int last = toffset + len;
 512         while (toffset < last) {
 513             char c1 = getChar(value, toffset++);
 514             char c2 = (char)(other[ooffset++] & 0xff);
 515             if (c1 == c2) {
 516                 continue;
 517             }
 518             char u1 = Character.toUpperCase(c1);
 519             char u2 = Character.toUpperCase(c2);
 520             if (u1 == u2) {
 521                 continue;
 522             }
 523             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 524                 continue;
 525             }
 526             return false;
 527         }
 528         return true;
 529     }
 530 
 531     public static String toLowerCase(String str, byte[] value, Locale locale) {
 532         if (locale == null) {
 533             throw new NullPointerException();
 534         }
 535         int first;
 536         boolean hasSurr = false;
 537         final int len = value.length >> 1;
 538 
 539         // Now check if there are any characters that need to be changed, or are surrogate
 540         for (first = 0 ; first < len; first++) {
 541             int cp = (int)getChar(value, first);
 542             if (Character.isSurrogate((char)cp)) {
 543                 hasSurr = true;
 544                 break;
 545             }
 546             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 547                 break;
 548             }
 549         }
 550         if (first == len)
 551             return str;
 552         byte[] result = new byte[value.length];
 553         System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
 554                                                             // lowerCase characters.
 555         String lang = locale.getLanguage();
 556         if (lang == "tr" || lang == "az" || lang == "lt") {
 557             return toLowerCaseEx(str, value, result, first, locale, true);
 558         }
 559         if (hasSurr) {
 560             return toLowerCaseEx(str, value, result, first, locale, false);
 561         }
 562         int bits = 0;
 563         for (int i = first; i < len; i++) {
 564             int cp = (int)getChar(value, i);
 565             if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
 566                 Character.isSurrogate((char)cp)) {
 567                 return toLowerCaseEx(str, value, result, i, locale, false);
 568             }
 569             if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
 570                 return toLowerCaseEx(str, value, result, i, locale, true);
 571             }
 572             cp = Character.toLowerCase(cp);
 573             if (!Character.isBmpCodePoint(cp)) {
 574                 return toLowerCaseEx(str, value, result, i, locale, false);
 575             }
 576             bits |= cp;
 577             putChar(result, i, cp);
 578         }
 579         if (bits >>> 8 != 0) {
 580             return new String(result, UTF16);
 581         } else {
 582             return newString(result, 0, len);
 583         }
 584     }
 585 
 586     private static String toLowerCaseEx(String str, byte[] value,
 587                                         byte[] result, int first, Locale locale,
 588                                         boolean localeDependent) {
 589         int resultOffset = first;
 590         int length = value.length >> 1;
 591         int srcCount;
 592         for (int i = first; i < length; i += srcCount) {
 593             int srcChar = getChar(value, i);
 594             int lowerChar;
 595             char[] lowerCharArray;
 596             srcCount = 1;
 597             if (Character.isSurrogate((char)srcChar)) {
 598                 srcChar = codePointAt(value, i, length);
 599                 srcCount = Character.charCount(srcChar);
 600             }
 601             if (localeDependent ||
 602                 srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
 603                 srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
 604                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 605             } else {
 606                 lowerChar = Character.toLowerCase(srcChar);
 607             }
 608             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 609                 putChar(result, resultOffset++, lowerChar);
 610             } else {
 611                 if (lowerChar == Character.ERROR) {
 612                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 613                 } else {
 614                     lowerCharArray = Character.toChars(lowerChar);
 615                 }
 616                 /* Grow result if needed */
 617                 int mapLen = lowerCharArray.length;
 618                 if (mapLen > srcCount) {
 619                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 620                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 621                     result = result2;
 622                 }
 623                 for (int x = 0; x < mapLen; ++x) {
 624                     putChar(result, resultOffset++, lowerCharArray[x]);
 625                 }
 626             }
 627         }
 628         return newString(result, 0, resultOffset);
 629     }
 630 
 631     public static String toUpperCase(String str, byte[] value, Locale locale) {
 632         if (locale == null) {
 633             throw new NullPointerException();
 634         }
 635         int first;
 636         boolean hasSurr = false;
 637         final int len = value.length >> 1;
 638 
 639         // Now check if there are any characters that need to be changed, or are surrogate
 640         for (first = 0 ; first < len; first++) {
 641             int cp = (int)getChar(value, first);
 642             if (Character.isSurrogate((char)cp)) {
 643                 hasSurr = true;
 644                 break;
 645             }
 646             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 647                 break;
 648             }
 649         }
 650         if (first == len) {
 651             return str;
 652         }
 653         byte[] result = new byte[value.length];
 654         System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
 655                                                            // upperCase characters.
 656         String lang = locale.getLanguage();
 657         if (lang == "tr" || lang == "az" || lang == "lt") {
 658             return toUpperCaseEx(str, value, result, first, locale, true);
 659         }
 660         if (hasSurr) {
 661             return toUpperCaseEx(str, value, result, first, locale, false);
 662         }
 663         int bits = 0;
 664         for (int i = first; i < len; i++) {
 665             int cp = (int)getChar(value, i);
 666             if (Character.isSurrogate((char)cp)) {
 667                 return toUpperCaseEx(str, value, result, i, locale, false);
 668             }
 669             cp = Character.toUpperCaseEx(cp);
 670             if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
 671                 return toUpperCaseEx(str, value, result, i, locale, false);
 672             }
 673             bits |= cp;
 674             putChar(result, i, cp);
 675         }
 676         if (bits >>> 8 != 0) {
 677             return new String(result, UTF16);
 678         } else {
 679             return newString(result, 0, len);
 680         }
 681     }
 682 
 683     private static String toUpperCaseEx(String str, byte[] value,
 684                                         byte[] result, int first,
 685                                         Locale locale, boolean localeDependent)
 686     {
 687         int resultOffset = first;
 688         int length = value.length >> 1;
 689         int srcCount;
 690         for (int i = first; i < length; i += srcCount) {
 691             int srcChar = getChar(value, i);
 692             int upperChar;
 693             char[] upperCharArray;
 694             srcCount = 1;
 695             if (Character.isSurrogate((char)srcChar)) {
 696                 srcChar = codePointAt(value, i, length);
 697                 srcCount = Character.charCount(srcChar);
 698             }
 699             if (localeDependent) {
 700                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 701             } else {
 702                 upperChar = Character.toUpperCaseEx(srcChar);
 703             }
 704             if (Character.isBmpCodePoint(upperChar)) {
 705                 putChar(result, resultOffset++, upperChar);
 706             } else {
 707                 if (upperChar == Character.ERROR) {
 708                     if (localeDependent) {
 709                         upperCharArray =
 710                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 711                     } else {
 712                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 713                     }
 714                 } else {
 715                     upperCharArray = Character.toChars(upperChar);
 716                 }
 717                 /* Grow result if needed */
 718                 int mapLen = upperCharArray.length;
 719                 if (mapLen > srcCount) {
 720                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 721                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 722                     result = result2;
 723                  }
 724                  for (int x = 0; x < mapLen; ++x) {
 725                     putChar(result, resultOffset++, upperCharArray[x]);
 726                  }
 727             }
 728         }
 729         return newString(result, 0, resultOffset);
 730     }
 731 
 732     public static String trim(byte[] value) {
 733         int length = value.length >> 1;
 734         int len = length;
 735         int st = 0;
 736         while (st < len && getChar(value, st) <= ' ') {
 737             st++;
 738         }
 739         while (st < len && getChar(value, len - 1) <= ' ') {
 740             len--;
 741         }
 742         return ((st > 0) || (len < length )) ?
 743             new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
 744             null;
 745     }
 746 
 747     public static void putChars(byte[] val, int index, char[] str, int off, int end) {
 748         while (off < end) {
 749             putChar(val, index++, str[off++]);
 750         }
 751     }
 752 
 753     public static String newString(byte[] val, int index, int len) {
 754         if (String.COMPACT_STRINGS) {
 755             byte[] buf = compress(val, index, len);
 756             if (buf != null) {
 757                 return new String(buf, LATIN1);
 758             }
 759         }
 760         int last = index + len;
 761         return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
 762     }
 763 
 764     public static void fillNull(byte[] val, int index, int end) {
 765         Arrays.fill(val, index << 1, end << 1, (byte)0);
 766     }
 767 
 768     static class CharsSpliterator implements Spliterator.OfInt {
 769         private final byte[] array;
 770         private int index;        // current index, modified on advance/split
 771         private final int fence;  // one past last index
 772         private final int cs;
 773 
 774         CharsSpliterator(byte[] array, int acs) {
 775             this(array, 0, array.length >> 1, acs);
 776         }
 777 
 778         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 779             this.array = array;
 780             this.index = origin;
 781             this.fence = fence;
 782             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 783                       | Spliterator.SUBSIZED;
 784         }
 785 
 786         @Override
 787         public OfInt trySplit() {
 788             int lo = index, mid = (lo + fence) >>> 1;
 789             return (lo >= mid)
 790                    ? null
 791                    : new CharsSpliterator(array, lo, index = mid, cs);
 792         }
 793 
 794         @Override
 795         public void forEachRemaining(IntConsumer action) {
 796             byte[] a; int i, hi; // hoist accesses and checks from loop
 797             if (action == null)
 798                 throw new NullPointerException();
 799             if (((a = array).length >> 1) >= (hi = fence) &&
 800                 (i = index) >= 0 && i < (index = hi)) {
 801                 do { action.accept(getChar(a, i)); } while (++i < hi);
 802             }
 803         }
 804 
 805         @Override
 806         public boolean tryAdvance(IntConsumer action) {
 807             if (action == null)
 808                 throw new NullPointerException();
 809             if (index >= 0 && index < fence) {
 810                 action.accept(getChar(array, index++));
 811                 return true;
 812             }
 813             return false;
 814         }
 815 
 816         @Override
 817         public long estimateSize() { return (long)(fence - index); }
 818 
 819         @Override
 820         public int characteristics() {
 821             return cs;
 822         }
 823     }
 824 
 825     static class CodePointsSpliterator implements Spliterator.OfInt {
 826         private final byte[] array;
 827         private int index;        // current index, modified on advance/split
 828         private final int fence;  // one past last index
 829         private final int cs;
 830 
 831         CodePointsSpliterator(byte[] array, int acs) {
 832             this(array, 0, array.length >> 1, acs);
 833         }
 834 
 835         CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
 836             this.array = array;
 837             this.index = origin;
 838             this.fence = fence;
 839             this.cs = acs | Spliterator.ORDERED;
 840         }
 841 
 842         @Override
 843         public OfInt trySplit() {
 844             int lo = index, mid = (lo + fence) >>> 1;
 845             if (lo >= mid)
 846                 return null;
 847 
 848             int midOneLess;
 849             // If the mid-point intersects a surrogate pair
 850             if (Character.isLowSurrogate(getChar(array, mid)) &&
 851                 Character.isHighSurrogate(getChar(array, midOneLess = (mid -1)))) {
 852                 // If there is only one pair it cannot be split
 853                 if (lo >= midOneLess)
 854                     return null;
 855                 // Shift the mid-point to align with the surrogate pair
 856                 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
 857             }
 858             return new CodePointsSpliterator(array, lo, index = mid, cs);
 859         }
 860 
 861         @Override
 862         public void forEachRemaining(IntConsumer action) {
 863             byte[] a; int i, hi; // hoist accesses and checks from loop
 864             if (action == null)
 865                 throw new NullPointerException();
 866             if (((a = array).length >> 1) >= (hi = fence) &&
 867                 (i = index) >= 0 && i < (index = hi)) {
 868                 do {
 869                     i = advance(a, i, hi, action);
 870                 } while (i < hi);
 871             }
 872         }
 873 
 874         @Override
 875         public boolean tryAdvance(IntConsumer action) {
 876             if (action == null)
 877                 throw new NullPointerException();
 878             if (index >= 0 && index < fence) {
 879                 index = advance(array, index, fence, action);
 880                 return true;
 881             }
 882             return false;
 883         }
 884 
 885         // Advance one code point from the index, i, and return the next
 886         // index to advance from
 887         private static int advance(byte[] a, int i, int hi, IntConsumer action) {
 888             char c1 = getChar(a, i++);
 889             int cp = c1;
 890             if (Character.isHighSurrogate(c1) && i < hi) {
 891                 char c2 = getChar(a, i);
 892                 if (Character.isLowSurrogate(c2)) {
 893                     i++;
 894                     cp = Character.toCodePoint(c1, c2);
 895                 }
 896             }
 897             action.accept(cp);
 898             return i;
 899         }
 900 
 901         @Override
 902         public long estimateSize() { return (long)(fence - index); }
 903 
 904         @Override
 905         public int characteristics() {
 906             return cs;
 907         }
 908     }
 909 
 910     ////////////////////////////////////////////////////////////////
 911 
 912     public static void getCharsSB(byte[] val, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 913         checkOffset(srcEnd, val.length >> 1);
 914         getChars(val, srcBegin, srcEnd, dst, dstBegin);
 915     }
 916 
 917     public static void putCharSB(byte[] val, int index, int c) {
 918         checkIndex(index, val.length >> 1);
 919         putChar(val, index, c);
 920     }
 921 
 922     public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
 923         checkOffset(index + end - off, val.length >> 1);
 924         putChars(val, index, ca, off, end);
 925     }
 926 
 927     public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
 928         checkOffset(index + end - off, val.length >> 1);
 929         for (int i = off; i < end; i++) {
 930             putChar(val, index++, s.charAt(i));
 931         }
 932     }
 933 
 934     public static int codePointAtSB(byte[] val, int index, int end) {
 935         checkOffset(end, val.length >> 1);
 936         return codePointAt(val, index, end);
 937     }
 938 
 939     public static int codePointBeforeSB(byte[] val, int index) {
 940         checkOffset(index, val.length >> 1);
 941         return codePointBefore(val, index);
 942     }
 943 
 944     public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
 945         checkOffset(endIndex, val.length >> 1);
 946         return codePointCount(val, beginIndex, endIndex);
 947     }
 948 
 949     public static String newStringSB(byte[] val, int index, int len) {
 950         checkOffset(index + len, val.length >> 1);
 951         return newString(val, index, len);
 952     }
 953 
 954     ////////////////////////////////////////////////////////////////
 955 
 956     private static native boolean isBigEndian();
 957 
 958     static final int HI_BYTE_SHIFT;
 959     static final int LO_BYTE_SHIFT;
 960     static {
 961         if (isBigEndian()) {
 962             HI_BYTE_SHIFT = 8;
 963             LO_BYTE_SHIFT = 0;
 964         } else {
 965             HI_BYTE_SHIFT = 0;
 966             LO_BYTE_SHIFT = 8;
 967         }
 968     }
 969 
 970     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
 971 }