New src/java.base/share/classes/java/lang/StringUTF16.java

   1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Spliterator;
  31 import java.util.function.IntConsumer;
  32 import jdk.internal.HotSpotIntrinsicCandidate;
  33 
  34 import static java.lang.String.UTF16;
  35 import static java.lang.String.LATIN1;
  36 import static java.lang.String.checkIndex;
  37 import static java.lang.String.checkOffset;
  38 import static java.lang.String.checkBoundsOffCount;
  39 
  40 final class StringUTF16 {
  41 
  42     public static byte[] newBytesFor(int len) {
  43         if (len < 0) {
  44             throw new NegativeArraySizeException();
  45         }
  46         if (len > MAX_LENGTH) {
  47             throw new OutOfMemoryError("UTF16 String size is " + len +
  48                                        ", should be less than " + MAX_LENGTH);
  49         }
  50         return new byte[len << 1];
  51     }
  52 
  53     @HotSpotIntrinsicCandidate
  54     public static void putChar(byte[] val, int index, int c) {
  55         index <<= 1;
  56         val[index++] = (byte)(c >> HI_BYTE_SHIFT);
  57         val[index]   = (byte)(c >> LO_BYTE_SHIFT);
  58     }
  59 
  60     @HotSpotIntrinsicCandidate
  61     public static char getChar(byte[] val, int index) {
  62         index <<= 1;
  63         return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
  64                       ((val[index]   & 0xff) << LO_BYTE_SHIFT));
  65     }
  66 
  67     public static char charAt(byte[] value, int index) {
  68         if (index < 0 || index >= value.length >> 1) {
  69             throw new StringIndexOutOfBoundsException(index);
  70         }
  71         return getChar(value, index);
  72     }
  73 
  74     public static int length(byte[] value) {
  75         return value.length >> 1;
  76     }
  77 
  78     public static int codePointAt(byte[] value, int index, int end) {
  79         char c1 = getChar(value, index);
  80         if (Character.isHighSurrogate(c1) && ++index < end) {
  81             char c2 = getChar(value, index);
  82             if (Character.isLowSurrogate(c2)) {
  83                return Character.toCodePoint(c1, c2);
  84             }
  85         }
  86         return c1;
  87     }
  88 
  89     public static int codePointBefore(byte[] value, int index) {
  90         char c2 = getChar(value, --index);
  91         if (Character.isLowSurrogate(c2) && index > 0) {
  92             char c1 = getChar(value, --index);
  93             if (Character.isHighSurrogate(c1)) {
  94                return Character.toCodePoint(c1, c2);
  95             }
  96         }
  97         return c2;
  98     }
  99 
 100     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
 101         int count = endIndex - beginIndex;
 102         for (int i = beginIndex; i < endIndex; ) {
 103             if (Character.isHighSurrogate(getChar(value, i++)) &&
 104                 i < endIndex &&
 105                 Character.isLowSurrogate(getChar(value, i))) {
 106                 count--;
 107                 i++;
 108             }
 109         }
 110         return count;
 111     }
 112 
 113     public static char[] toChars(byte[] value) {
 114         char[] dst = new char[value.length >> 1];
 115         getChars(value, 0, dst.length, dst, 0);
 116         return dst;
 117     }
 118 
 119     @HotSpotIntrinsicCandidate
 120     public static byte[] toBytes(char[] value, int off, int len) {
 121         byte[] val = newBytesFor(len);
 122         for (int i = 0; i < len; i++) {
 123             putChar(val, i, value[off]);
 124             off++;
 125         }
 126         return val;
 127     }
 128 
 129     public static byte[] compress(char[] val, int off, int len) {
 130         byte[] ret = new byte[len];
 131         if (compress(val, off, ret, 0, len) == len) {
 132             return ret;
 133         }
 134         return null;
 135     }
 136 
 137     public static byte[] compress(byte[] val, int off, int len) {
 138         byte[] ret = new byte[len];
 139         if (compress(val, off, ret, 0, len) == len) {
 140             return ret;
 141         }
 142         return null;
 143     }
 144 
 145     // compressedCopy char[] -> byte[]
 146     @HotSpotIntrinsicCandidate
 147     private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
 148         for (int i = 0; i < len; i++) {
 149             char c = src[srcOff];
 150             if (c > 0xFF) {
 151                 len = 0;
 152                 break;
 153             }
 154             dst[dstOff] = (byte)c;
 155             srcOff++;
 156             dstOff++;
 157         }
 158         return len;
 159     }
 160 
 161     // compressedCopy byte[] -> byte[]
 162     @HotSpotIntrinsicCandidate
 163     public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 164         // We need a range check here because 'getChar' has no checks
 165         checkBoundsOffCount(srcOff, len, src.length);
 166         for (int i = 0; i < len; i++) {
 167             char c = getChar(src, srcOff);
 168             if (c > 0xFF) {
 169                 len = 0;
 170                 break;
 171             }
 172             dst[dstOff] = (byte)c;
 173             srcOff++;
 174             dstOff++;
 175         }
 176         return len;
 177     }
 178 
 179     public static byte[] toBytes(int[] val, int index, int len) {
 180         final int end = index + len;
 181         // Pass 1: Compute precise size of char[]
 182         int n = len;
 183         for (int i = index; i < end; i++) {
 184             int cp = val[i];
 185             if (Character.isBmpCodePoint(cp))
 186                 continue;
 187             else if (Character.isValidCodePoint(cp))
 188                 n++;
 189             else throw new IllegalArgumentException(Integer.toString(cp));
 190         }
 191         // Pass 2: Allocate and fill in <high, low> pair
 192         byte[] buf = newBytesFor(n);
 193         for (int i = index, j = 0; i < end; i++, j++) {
 194             int cp = val[i];
 195             if (Character.isBmpCodePoint(cp)) {
 196                 putChar(buf, j, cp);
 197             } else {
 198                 putChar(buf, j++, Character.highSurrogate(cp));
 199                 putChar(buf, j, Character.lowSurrogate(cp));
 200             }
 201         }
 202         return buf;
 203     }
 204 
 205     public static byte[] toBytes(char c) {
 206         byte[] result = new byte[2];
 207         putChar(result, 0, c);
 208         return result;
 209     }
 210 
 211     @HotSpotIntrinsicCandidate
 212     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 213         // We need a range check here because 'getChar' has no checks
 214         checkBoundsOffCount(srcBegin, srcEnd - srcBegin, value.length);
 215         for (int i = srcBegin; i < srcEnd; i++) {
 216             dst[dstBegin++] = getChar(value, i);
 217         }
 218     }
 219 
 220     /* @see java.lang.String.getBytes(int, int, byte[], int) */
 221     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 222         srcBegin <<= 1;
 223         srcEnd <<= 1;
 224         for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
 225             dst[dstBegin++] = value[i];
 226         }
 227     }
 228 
 229     @HotSpotIntrinsicCandidate
 230     public static boolean equals(byte[] value, byte[] other) {
 231         if (value.length == other.length) {
 232             int len = value.length >> 1;
 233             for (int i = 0; i < len; i++) {
 234                 if (getChar(value, i) != getChar(other, i)) {
 235                     return false;
 236                 }
 237             }
 238             return true;
 239         }
 240         return false;
 241     }
 242 
 243     @HotSpotIntrinsicCandidate
 244     public static int compareTo(byte[] value, byte[] other) {
 245         int len1 = length(value);
 246         int len2 = length(other);
 247         int lim = Math.min(len1, len2);
 248         for (int k = 0; k < lim; k++) {
 249             char c1 = getChar(value, k);
 250             char c2 = getChar(other, k);
 251             if (c1 != c2) {
 252                 return c1 - c2;
 253             }
 254         }
 255         return len1 - len2;
 256     }
 257 
 258     @HotSpotIntrinsicCandidate
 259     public static int compareToLatin1(byte[] value, byte[] other) {
 260         int len1 = length(value);
 261         int len2 = StringLatin1.length(other);
 262         int lim = Math.min(len1, len2);
 263         for (int k = 0; k < lim; k++) {
 264             char c1 = getChar(value, k);
 265             char c2 = StringLatin1.getChar(other, k);
 266             if (c1 != c2) {
 267                 return c1 - c2;
 268             }
 269         }
 270         return len1 - len2;
 271     }
 272 
 273     public static int compareToCI(byte[] value, byte[] other) {
 274         int len1 = length(value);
 275         int len2 = length(other);
 276         int lim = Math.min(len1, len2);
 277         for (int k = 0; k < lim; k++) {
 278             char c1 = getChar(value, k);
 279             char c2 = getChar(other, k);
 280             if (c1 != c2) {
 281                 c1 = Character.toUpperCase(c1);
 282                 c2 = Character.toUpperCase(c2);
 283                 if (c1 != c2) {
 284                     c1 = Character.toLowerCase(c1);
 285                     c2 = Character.toLowerCase(c2);
 286                     if (c1 != c2) {
 287                         return c1 - c2;
 288                     }
 289                 }
 290             }
 291         }
 292         return len1 - len2;
 293     }
 294 
 295     public static int compareToCI_Latin1(byte[] value, byte[] other) {
 296         int len1 = length(value);
 297         int len2 = StringLatin1.length(other);
 298         int lim = Math.min(len1, len2);
 299         for (int k = 0; k < lim; k++) {
 300             char c1 = getChar(value, k);
 301             char c2 = StringLatin1.getChar(other, k);
 302             if (c1 != c2) {
 303                 c1 = Character.toUpperCase(c1);
 304                 c2 = Character.toUpperCase(c2);
 305                 if (c1 != c2) {
 306                     c1 = Character.toLowerCase(c1);
 307                     c2 = Character.toLowerCase(c2);
 308                     if (c1 != c2) {
 309                         return c1 - c2;
 310                     }
 311                 }
 312             }
 313         }
 314         return len1 - len2;
 315     }
 316 
 317     public static int hashCode(byte[] value) {
 318         int h = 0;
 319         int length = value.length >> 1;
 320         for (int i = 0; i < length; i++) {
 321             h = 31 * h + getChar(value, i);
 322         }
 323         return h;
 324     }
 325 
 326     public static int indexOf(byte[] value, int ch, int fromIndex) {
 327         int max = value.length >> 1;
 328         if (fromIndex < 0) {
 329             fromIndex = 0;
 330         } else if (fromIndex >= max) {
 331             // Note: fromIndex might be near -1>>>1.
 332             return -1;
 333         }
 334         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 335             // handle most cases here (ch is a BMP code point or a
 336             // negative value (invalid code point))
 337             return indexOfChar(value, ch, fromIndex, max);
 338         } else {
 339             return indexOfSupplementary(value, ch, fromIndex, max);
 340         }
 341     }
 342 
 343     @HotSpotIntrinsicCandidate
 344     public static int indexOf(byte[] value, byte[] str) {
 345         if (str.length == 0) {
 346             return 0;
 347         }
 348         if (value.length == 0) {
 349             return -1;
 350         }
 351         return indexOf(value, length(value), str, length(str), 0);
 352     }
 353 
 354     @HotSpotIntrinsicCandidate
 355     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 356         char first = getChar(str, 0);
 357         int max = (valueCount - strCount);
 358         for (int i = fromIndex; i <= max; i++) {
 359             // Look for first character.
 360             if (getChar(value, i) != first) {
 361                 while (++i <= max && getChar(value, i) != first);
 362             }
 363             // Found first character, now look at the rest of value
 364             if (i <= max) {
 365                 int j = i + 1;
 366                 int end = j + strCount - 1;
 367                 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
 368                 if (j == end) {
 369                     // Found whole string.
 370                     return i;
 371                 }
 372             }
 373         }
 374         return -1;
 375     }
 376 
 377     /**
 378      * Handles indexOf Latin1 substring in UTF16 string.
 379      */
 380     @HotSpotIntrinsicCandidate
 381     public static int indexOfLatin1(byte[] value, byte[] str) {
 382         if (str.length == 0) {
 383             return 0;
 384         }
 385         if (value.length == 0) {
 386             return -1;
 387         }
 388         return indexOfLatin1(value, length(value), str, str.length, 0);
 389     }
 390 
 391     @HotSpotIntrinsicCandidate
 392     public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 393         char first = (char)(tgt[0] & 0xff);
 394         int max = (srcCount - tgtCount);
 395         for (int i = fromIndex; i <= max; i++) {
 396             // Look for first character.
 397             if (getChar(src, i) != first) {
 398                 while (++i <= max && getChar(src, i) != first);
 399             }
 400             // Found first character, now look at the rest of v2
 401             if (i <= max) {
 402                 int j = i + 1;
 403                 int end = j + tgtCount - 1;
 404                 for (int k = 1;
 405                      j < end && getChar(src, j) == (tgt[k] & 0xff);
 406                      j++, k++);
 407                 if (j == end) {
 408                     // Found whole string.
 409                     return i;
 410                 }
 411             }
 412         }
 413         return -1;
 414     }
 415 
 416     @HotSpotIntrinsicCandidate
 417     private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
 418         for (int i = fromIndex; i < max; i++) {
 419             if (getChar(value, i) == ch) {
 420                 return i;
 421             }
 422         }
 423         return -1;
 424     }
 425 
 426     /**
 427      * Handles (rare) calls of indexOf with a supplementary character.
 428      */
 429     private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
 430         if (Character.isValidCodePoint(ch)) {
 431             final char hi = Character.highSurrogate(ch);
 432             final char lo = Character.lowSurrogate(ch);
 433             for (int i = fromIndex; i < max - 1; i++) {
 434                 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
 435                     return i;
 436                 }
 437             }
 438         }
 439         return -1;
 440     }
 441 
 442     public static int lastIndexOf(byte[] src, int srcCount,
 443                                   byte[] tgt, int tgtCount, int fromIndex) {
 444         int min = tgtCount - 1;
 445         int i = min + fromIndex;
 446         int strLastIndex = tgtCount - 1;
 447         char strLastChar = getChar(tgt, strLastIndex);
 448 
 449     startSearchForLastChar:
 450         while (true) {
 451             while (i >= min && getChar(src, i) != strLastChar) {
 452                 i--;
 453             }
 454             if (i < min) {
 455                 return -1;
 456             }
 457             int j = i - 1;
 458             int start = j - strLastIndex;
 459             int k = strLastIndex - 1;
 460             while (j > start) {
 461                 if (getChar(src, j--) != getChar(tgt, k--)) {
 462                     i--;
 463                     continue startSearchForLastChar;
 464                 }
 465             }
 466             return start + 1;
 467         }
 468     }
 469 
 470     public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
 471         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 472             // handle most cases here (ch is a BMP code point or a
 473             // negative value (invalid code point))
 474             int i = Math.min(fromIndex, (value.length >> 1) - 1);
 475             for (; i >= 0; i--) {
 476                 if (getChar(value, i) == ch) {
 477                     return i;
 478                 }
 479             }
 480             return -1;
 481         } else {
 482             return lastIndexOfSupplementary(value, ch, fromIndex);
 483         }
 484     }
 485 
 486     /**
 487      * Handles (rare) calls of lastIndexOf with a supplementary character.
 488      */
 489     private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
 490         if (Character.isValidCodePoint(ch)) {
 491             char hi = Character.highSurrogate(ch);
 492             char lo = Character.lowSurrogate(ch);
 493             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 494             for (; i >= 0; i--) {
 495                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 496                     return i;
 497                 }
 498             }
 499         }
 500         return -1;
 501     }
 502 
 503     public static String replace(byte[] value, char oldChar, char newChar) {
 504         int len = value.length >> 1;
 505         int i = -1;
 506         while (++i < len) {
 507             if (getChar(value, i) == oldChar) {
 508                 break;
 509             }
 510         }
 511         if (i < len) {
 512             byte buf[] = new byte[value.length];
 513             for (int j = 0; j < i; j++) {
 514                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 515             }
 516             while (i < len) {
 517                 char c = getChar(value, i);
 518                 putChar(buf, i, c == oldChar ? newChar : c);
 519                 i++;
 520            }
 521            // Check if we should try to compress to latin1
 522            if (String.COMPACT_STRINGS &&
 523                !StringLatin1.canEncode(oldChar) &&
 524                StringLatin1.canEncode(newChar)) {
 525                byte[] val = compress(buf, 0, len);
 526                if (val != null) {
 527                    return new String(val, LATIN1);
 528                }
 529            }
 530            return new String(buf, UTF16);
 531         }
 532         return null;
 533     }
 534 
 535     public static boolean regionMatchesCI(byte[] value, int toffset,
 536                                           byte[] other, int ooffset, int len) {
 537         int last = toffset + len;
 538         while (toffset < last) {
 539             char c1 = getChar(value, toffset++);
 540             char c2 = getChar(other, ooffset++);
 541             if (c1 == c2) {
 542                 continue;
 543             }
 544             // try converting both characters to uppercase.
 545             // If the results match, then the comparison scan should
 546             // continue.
 547             char u1 = Character.toUpperCase(c1);
 548             char u2 = Character.toUpperCase(c2);
 549             if (u1 == u2) {
 550                 continue;
 551             }
 552             // Unfortunately, conversion to uppercase does not work properly
 553             // for the Georgian alphabet, which has strange rules about case
 554             // conversion.  So we need to make one last check before
 555             // exiting.
 556             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 557                 continue;
 558             }
 559             return false;
 560         }
 561         return true;
 562     }
 563 
 564     public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
 565                                                  byte[] other, int ooffset,
 566                                                  int len) {
 567         int last = toffset + len;
 568         while (toffset < last) {
 569             char c1 = getChar(value, toffset++);
 570             char c2 = (char)(other[ooffset++] & 0xff);
 571             if (c1 == c2) {
 572                 continue;
 573             }
 574             char u1 = Character.toUpperCase(c1);
 575             char u2 = Character.toUpperCase(c2);
 576             if (u1 == u2) {
 577                 continue;
 578             }
 579             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 580                 continue;
 581             }
 582             return false;
 583         }
 584         return true;
 585     }
 586 
 587     public static String toLowerCase(String str, byte[] value, Locale locale) {
 588         if (locale == null) {
 589             throw new NullPointerException();
 590         }
 591         int first;
 592         boolean hasSurr = false;
 593         final int len = value.length >> 1;
 594 
 595         // Now check if there are any characters that need to be changed, or are surrogate
 596         for (first = 0 ; first < len; first++) {
 597             int cp = (int)getChar(value, first);
 598             if (Character.isSurrogate((char)cp)) {
 599                 hasSurr = true;
 600                 break;
 601             }
 602             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 603                 break;
 604             }
 605         }
 606         if (first == len)
 607             return str;
 608         byte[] result = new byte[value.length];
 609         System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
 610                                                             // lowerCase characters.
 611         String lang = locale.getLanguage();
 612         if (lang == "tr" || lang == "az" || lang == "lt") {
 613             return toLowerCaseEx(str, value, result, first, locale, true);
 614         }
 615         if (hasSurr) {
 616             return toLowerCaseEx(str, value, result, first, locale, false);
 617         }
 618         int bits = 0;
 619         for (int i = first; i < len; i++) {
 620             int cp = (int)getChar(value, i);
 621             if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
 622                 Character.isSurrogate((char)cp)) {
 623                 return toLowerCaseEx(str, value, result, i, locale, false);
 624             }
 625             if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
 626                 return toLowerCaseEx(str, value, result, i, locale, true);
 627             }
 628             cp = Character.toLowerCase(cp);
 629             if (!Character.isBmpCodePoint(cp)) {
 630                 return toLowerCaseEx(str, value, result, i, locale, false);
 631             }
 632             bits |= cp;
 633             putChar(result, i, cp);
 634         }
 635         if (bits > 0xFF) {
 636             return new String(result, UTF16);
 637         } else {
 638             return newString(result, 0, len);
 639         }
 640     }
 641 
 642     private static String toLowerCaseEx(String str, byte[] value,
 643                                         byte[] result, int first, Locale locale,
 644                                         boolean localeDependent) {
 645         int resultOffset = first;
 646         int length = value.length >> 1;
 647         int srcCount;
 648         for (int i = first; i < length; i += srcCount) {
 649             int srcChar = getChar(value, i);
 650             int lowerChar;
 651             char[] lowerCharArray;
 652             srcCount = 1;
 653             if (Character.isSurrogate((char)srcChar)) {
 654                 srcChar = codePointAt(value, i, length);
 655                 srcCount = Character.charCount(srcChar);
 656             }
 657             if (localeDependent ||
 658                 srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
 659                 srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
 660                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 661             } else {
 662                 lowerChar = Character.toLowerCase(srcChar);
 663             }
 664             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 665                 putChar(result, resultOffset++, lowerChar);
 666             } else {
 667                 if (lowerChar == Character.ERROR) {
 668                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 669                 } else {
 670                     lowerCharArray = Character.toChars(lowerChar);
 671                 }
 672                 /* Grow result if needed */
 673                 int mapLen = lowerCharArray.length;
 674                 if (mapLen > srcCount) {
 675                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 676                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 677                     result = result2;
 678                 }
 679                 for (int x = 0; x < mapLen; ++x) {
 680                     putChar(result, resultOffset++, lowerCharArray[x]);
 681                 }
 682             }
 683         }
 684         return newString(result, 0, resultOffset);
 685     }
 686 
 687     public static String toUpperCase(String str, byte[] value, Locale locale) {
 688         if (locale == null) {
 689             throw new NullPointerException();
 690         }
 691         int first;
 692         boolean hasSurr = false;
 693         final int len = value.length >> 1;
 694 
 695         // Now check if there are any characters that need to be changed, or are surrogate
 696         for (first = 0 ; first < len; first++) {
 697             int cp = (int)getChar(value, first);
 698             if (Character.isSurrogate((char)cp)) {
 699                 hasSurr = true;
 700                 break;
 701             }
 702             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 703                 break;
 704             }
 705         }
 706         if (first == len) {
 707             return str;
 708         }
 709         byte[] result = new byte[value.length];
 710         System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
 711                                                            // upperCase characters.
 712         String lang = locale.getLanguage();
 713         if (lang == "tr" || lang == "az" || lang == "lt") {
 714             return toUpperCaseEx(str, value, result, first, locale, true);
 715         }
 716         if (hasSurr) {
 717             return toUpperCaseEx(str, value, result, first, locale, false);
 718         }
 719         int bits = 0;
 720         for (int i = first; i < len; i++) {
 721             int cp = (int)getChar(value, i);
 722             if (Character.isSurrogate((char)cp)) {
 723                 return toUpperCaseEx(str, value, result, i, locale, false);
 724             }
 725             cp = Character.toUpperCaseEx(cp);
 726             if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
 727                 return toUpperCaseEx(str, value, result, i, locale, false);
 728             }
 729             bits |= cp;
 730             putChar(result, i, cp);
 731         }
 732         if (bits > 0xFF) {
 733             return new String(result, UTF16);
 734         } else {
 735             return newString(result, 0, len);
 736         }
 737     }
 738 
 739     private static String toUpperCaseEx(String str, byte[] value,
 740                                         byte[] result, int first,
 741                                         Locale locale, boolean localeDependent)
 742     {
 743         int resultOffset = first;
 744         int length = value.length >> 1;
 745         int srcCount;
 746         for (int i = first; i < length; i += srcCount) {
 747             int srcChar = getChar(value, i);
 748             int upperChar;
 749             char[] upperCharArray;
 750             srcCount = 1;
 751             if (Character.isSurrogate((char)srcChar)) {
 752                 srcChar = codePointAt(value, i, length);
 753                 srcCount = Character.charCount(srcChar);
 754             }
 755             if (localeDependent) {
 756                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 757             } else {
 758                 upperChar = Character.toUpperCaseEx(srcChar);
 759             }
 760             if (Character.isBmpCodePoint(upperChar)) {
 761                 putChar(result, resultOffset++, upperChar);
 762             } else {
 763                 if (upperChar == Character.ERROR) {
 764                     if (localeDependent) {
 765                         upperCharArray =
 766                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 767                     } else {
 768                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 769                     }
 770                 } else {
 771                     upperCharArray = Character.toChars(upperChar);
 772                 }
 773                 /* Grow result if needed */
 774                 int mapLen = upperCharArray.length;
 775                 if (mapLen > srcCount) {
 776                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 777                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 778                     result = result2;
 779                  }
 780                  for (int x = 0; x < mapLen; ++x) {
 781                     putChar(result, resultOffset++, upperCharArray[x]);
 782                  }
 783             }
 784         }
 785         return newString(result, 0, resultOffset);
 786     }
 787 
 788     public static String trim(byte[] value) {
 789         int length = value.length >> 1;
 790         int len = length;
 791         int st = 0;
 792         while (st < len && getChar(value, st) <= ' ') {
 793             st++;
 794         }
 795         while (st < len && getChar(value, len - 1) <= ' ') {
 796             len--;
 797         }
 798         return ((st > 0) || (len < length )) ?
 799             new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
 800             null;
 801     }
 802 
 803     public static void putChars(byte[] val, int index, char[] str, int off, int end) {
 804         while (off < end) {
 805             putChar(val, index++, str[off++]);
 806         }
 807     }
 808 
 809     public static String newString(byte[] val, int index, int len) {
 810         if (String.COMPACT_STRINGS) {
 811             byte[] buf = compress(val, index, len);
 812             if (buf != null) {
 813                 return new String(buf, LATIN1);
 814             }
 815         }
 816         int last = index + len;
 817         return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
 818     }
 819 
 820     public static void fillNull(byte[] val, int index, int end) {
 821         Arrays.fill(val, index << 1, end << 1, (byte)0);
 822     }
 823 
 824     static class CharsSpliterator implements Spliterator.OfInt {
 825         private final byte[] array;
 826         private int index;        // current index, modified on advance/split
 827         private final int fence;  // one past last index
 828         private final int cs;
 829 
 830         CharsSpliterator(byte[] array, int acs) {
 831             this(array, 0, array.length >> 1, acs);
 832         }
 833 
 834         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 835             this.array = array;
 836             this.index = origin;
 837             this.fence = fence;
 838             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 839                       | Spliterator.SUBSIZED;
 840         }
 841 
 842         @Override
 843         public OfInt trySplit() {
 844             int lo = index, mid = (lo + fence) >>> 1;
 845             return (lo >= mid)
 846                    ? null
 847                    : new CharsSpliterator(array, lo, index = mid, cs);
 848         }
 849 
 850         @Override
 851         public void forEachRemaining(IntConsumer action) {
 852             byte[] a; int i, hi; // hoist accesses and checks from loop
 853             if (action == null)
 854                 throw new NullPointerException();
 855             if (((a = array).length >> 1) >= (hi = fence) &&
 856                 (i = index) >= 0 && i < (index = hi)) {
 857                 do { action.accept(getChar(a, i)); } while (++i < hi);
 858             }
 859         }
 860 
 861         @Override
 862         public boolean tryAdvance(IntConsumer action) {
 863             if (action == null)
 864                 throw new NullPointerException();
 865             if (index >= 0 && index < fence) {
 866                 action.accept(getChar(array, index++));
 867                 return true;
 868             }
 869             return false;
 870         }
 871 
 872         @Override
 873         public long estimateSize() { return (long)(fence - index); }
 874 
 875         @Override
 876         public int characteristics() {
 877             return cs;
 878         }
 879     }
 880 
 881     static class CodePointsSpliterator implements Spliterator.OfInt {
 882         private final byte[] array;
 883         private int index;        // current index, modified on advance/split
 884         private final int fence;  // one past last index
 885         private final int cs;
 886 
 887         CodePointsSpliterator(byte[] array, int acs) {
 888             this(array, 0, array.length >> 1, acs);
 889         }
 890 
 891         CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
 892             this.array = array;
 893             this.index = origin;
 894             this.fence = fence;
 895             this.cs = acs | Spliterator.ORDERED;
 896         }
 897 
 898         @Override
 899         public OfInt trySplit() {
 900             int lo = index, mid = (lo + fence) >>> 1;
 901             if (lo >= mid)
 902                 return null;
 903 
 904             int midOneLess;
 905             // If the mid-point intersects a surrogate pair
 906             if (Character.isLowSurrogate(getChar(array, mid)) &&
 907                 Character.isHighSurrogate(getChar(array, midOneLess = (mid -1)))) {
 908                 // If there is only one pair it cannot be split
 909                 if (lo >= midOneLess)
 910                     return null;
 911                 // Shift the mid-point to align with the surrogate pair
 912                 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
 913             }
 914             return new CodePointsSpliterator(array, lo, index = mid, cs);
 915         }
 916 
 917         @Override
 918         public void forEachRemaining(IntConsumer action) {
 919             byte[] a; int i, hi; // hoist accesses and checks from loop
 920             if (action == null)
 921                 throw new NullPointerException();
 922             if (((a = array).length >> 1) >= (hi = fence) &&
 923                 (i = index) >= 0 && i < (index = hi)) {
 924                 do {
 925                     i = advance(a, i, hi, action);
 926                 } while (i < hi);
 927             }
 928         }
 929 
 930         @Override
 931         public boolean tryAdvance(IntConsumer action) {
 932             if (action == null)
 933                 throw new NullPointerException();
 934             if (index >= 0 && index < fence) {
 935                 index = advance(array, index, fence, action);
 936                 return true;
 937             }
 938             return false;
 939         }
 940 
 941         // Advance one code point from the index, i, and return the next
 942         // index to advance from
 943         private static int advance(byte[] a, int i, int hi, IntConsumer action) {
 944             char c1 = getChar(a, i++);
 945             int cp = c1;
 946             if (Character.isHighSurrogate(c1) && i < hi) {
 947                 char c2 = getChar(a, i);
 948                 if (Character.isLowSurrogate(c2)) {
 949                     i++;
 950                     cp = Character.toCodePoint(c1, c2);
 951                 }
 952             }
 953             action.accept(cp);
 954             return i;
 955         }
 956 
 957         @Override
 958         public long estimateSize() { return (long)(fence - index); }
 959 
 960         @Override
 961         public int characteristics() {
 962             return cs;
 963         }
 964     }
 965 
 966     ////////////////////////////////////////////////////////////////
 967 
 968     public static void putCharSB(byte[] val, int index, int c) {
 969         checkIndex(index, val.length >> 1);
 970         putChar(val, index, c);
 971     }
 972 
 973     public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
 974         checkOffset(index + end - off, val.length >> 1);
 975         putChars(val, index, ca, off, end);
 976     }
 977 
 978     public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
 979         checkOffset(index + end - off, val.length >> 1);
 980         for (int i = off; i < end; i++) {
 981             putChar(val, index++, s.charAt(i));
 982         }
 983     }
 984 
 985     public static int codePointAtSB(byte[] val, int index, int end) {
 986         checkOffset(end, val.length >> 1);
 987         return codePointAt(val, index, end);
 988     }
 989 
 990     public static int codePointBeforeSB(byte[] val, int index) {
 991         checkOffset(index, val.length >> 1);
 992         return codePointBefore(val, index);
 993     }
 994 
 995     public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
 996         checkOffset(endIndex, val.length >> 1);
 997         return codePointCount(val, beginIndex, endIndex);
 998     }
 999 
1000     ////////////////////////////////////////////////////////////////
1001 
1002     private static native boolean isBigEndian();
1003 
1004     static final int HI_BYTE_SHIFT;
1005     static final int LO_BYTE_SHIFT;
1006     static {
1007         if (isBigEndian()) {
1008             HI_BYTE_SHIFT = 8;
1009             LO_BYTE_SHIFT = 0;
1010         } else {
1011             HI_BYTE_SHIFT = 0;
1012             LO_BYTE_SHIFT = 8;
1013         }
1014     }
1015 
1016     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
1017 }