1 /*
   2  * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Spliterator;
  31 import java.util.function.IntConsumer;
  32 import jdk.internal.HotSpotIntrinsicCandidate;
  33 
  34 import static java.lang.String.UTF16;
  35 import static java.lang.String.LATIN1;
  36 import static java.lang.String.checkIndex;
  37 import static java.lang.String.checkOffset;
  38 import static java.lang.String.checkBoundsOffCount;
  39 
  40 final class StringUTF16 {
  41 
  42     public static byte[] newBytesFor(int len) {
  43         if (len < 0) {
  44             throw new NegativeArraySizeException();
  45         }
  46         if (len > MAX_LENGTH) {
  47             throw new OutOfMemoryError("UTF16 String size is " + len +
  48                                        ", should be less than " + MAX_LENGTH);
  49         }
  50         return new byte[len << 1];
  51     }
  52 
  53     @HotSpotIntrinsicCandidate
  54     public static void putChar(byte[] val, int index, int c) {
  55         index <<= 1;
  56         val[index++] = (byte)(c >> HI_BYTE_SHIFT);
  57         val[index]   = (byte)(c >> LO_BYTE_SHIFT);
  58     }
  59 
  60     @HotSpotIntrinsicCandidate
  61     public static char getChar(byte[] val, int index) {
  62         index <<= 1;
  63         return (char)(((val[index++] & 0xff) << HI_BYTE_SHIFT) |
  64                       ((val[index]   & 0xff) << LO_BYTE_SHIFT));
  65     }
  66 
  67     public static char charAt(byte[] value, int index) {
  68         if (index < 0 || index >= value.length >> 1) {
  69             throw new StringIndexOutOfBoundsException(index);
  70         }
  71         return getChar(value, index);
  72     }
  73 
  74     public static int length(byte[] value) {
  75         return value.length >> 1;
  76     }
  77 
  78     public static int codePointAt(byte[] value, int index, int end) {
  79         char c1 = getChar(value, index);
  80         if (Character.isHighSurrogate(c1) && ++index < end) {
  81             char c2 = getChar(value, index);
  82             if (Character.isLowSurrogate(c2)) {
  83                return Character.toCodePoint(c1, c2);
  84             }
  85         }
  86         return c1;
  87     }
  88 
  89     public static int codePointBefore(byte[] value, int index) {
  90         char c2 = getChar(value, --index);
  91         if (Character.isLowSurrogate(c2) && index > 0) {
  92             char c1 = getChar(value, --index);
  93             if (Character.isHighSurrogate(c1)) {
  94                return Character.toCodePoint(c1, c2);
  95             }
  96         }
  97         return c2;
  98     }
  99 
 100     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
 101         int count = endIndex - beginIndex;
 102         for (int i = beginIndex; i < endIndex; ) {
 103             if (Character.isHighSurrogate(getChar(value, i++)) &&
 104                 i < endIndex &&
 105                 Character.isLowSurrogate(getChar(value, i))) {
 106                 count--;
 107                 i++;
 108             }
 109         }
 110         return count;
 111     }
 112 
 113     public static char[] toChars(byte[] value) {
 114         char[] dst = new char[value.length >> 1];
 115         getChars(value, 0, dst.length, dst, 0);
 116         return dst;
 117     }
 118 
 119     @HotSpotIntrinsicCandidate
 120     public static byte[] toBytes(char[] value, int off, int len) {
 121         byte[] val = newBytesFor(len);
 122         for (int i = 0; i < len; i++) {
 123             putChar(val, i, value[off]);
 124             off++;
 125         }
 126         return val;
 127     }
 128 
 129     public static byte[] compress(char[] val, int off, int len) {
 130         byte[] ret = new byte[len];
 131         if (compress(val, off, ret, 0, len) == len) {
 132             return ret;
 133         }
 134         return null;
 135     }
 136 
 137     public static byte[] compress(byte[] val, int off, int len) {
 138         byte[] ret = new byte[len];
 139         if (compress(val, off, ret, 0, len) == len) {
 140             return ret;
 141         }
 142         return null;
 143     }
 144 
 145     // compressedCopy char[] -> byte[]
 146     @HotSpotIntrinsicCandidate
 147     public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
 148         for (int i = 0; i < len; i++) {
 149             char c = src[srcOff];
 150             if (c > 0xFF) {
 151                 len = 0;
 152                 break;
 153             }
 154             dst[dstOff] = (byte)c;
 155             srcOff++;
 156             dstOff++;
 157         }
 158         return len;
 159     }
 160 
 161     // compressedCopy byte[] -> byte[]
 162     @HotSpotIntrinsicCandidate
 163     public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 164         // We need a range check here because 'getChar' has no checks
 165         checkBoundsOffCount(srcOff << 1, len << 1, src.length);
 166         for (int i = 0; i < len; i++) {
 167             char c = getChar(src, srcOff);
 168             if (c > 0xFF) {
 169                 len = 0;
 170                 break;
 171             }
 172             dst[dstOff] = (byte)c;
 173             srcOff++;
 174             dstOff++;
 175         }
 176         return len;
 177     }
 178 
 179     public static byte[] toBytes(int[] val, int index, int len) {
 180         final int end = index + len;
 181         // Pass 1: Compute precise size of char[]
 182         int n = len;
 183         for (int i = index; i < end; i++) {
 184             int cp = val[i];
 185             if (Character.isBmpCodePoint(cp))
 186                 continue;
 187             else if (Character.isValidCodePoint(cp))
 188                 n++;
 189             else throw new IllegalArgumentException(Integer.toString(cp));
 190         }
 191         // Pass 2: Allocate and fill in <high, low> pair
 192         byte[] buf = newBytesFor(n);
 193         for (int i = index, j = 0; i < end; i++, j++) {
 194             int cp = val[i];
 195             if (Character.isBmpCodePoint(cp)) {
 196                 putChar(buf, j, cp);
 197             } else {
 198                 putChar(buf, j++, Character.highSurrogate(cp));
 199                 putChar(buf, j, Character.lowSurrogate(cp));
 200             }
 201         }
 202         return buf;
 203     }
 204 
 205     public static byte[] toBytes(char c) {
 206         byte[] result = new byte[2];
 207         putChar(result, 0, c);
 208         return result;
 209     }
 210 
 211     @HotSpotIntrinsicCandidate
 212     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 213         // We need a range check here because 'getChar' has no checks
 214         if (srcBegin < srcEnd) {
 215             checkBoundsOffCount(srcBegin << 1, (srcEnd - srcBegin) << 1, value.length);
 216         }
 217         for (int i = srcBegin; i < srcEnd; i++) {
 218             dst[dstBegin++] = getChar(value, i);
 219         }
 220     }
 221 
 222     /* @see java.lang.String.getBytes(int, int, byte[], int) */
 223     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
 224         srcBegin <<= 1;
 225         srcEnd <<= 1;
 226         for (int i = srcBegin + (1 >> LO_BYTE_SHIFT); i < srcEnd; i += 2) {
 227             dst[dstBegin++] = value[i];
 228         }
 229     }
 230 
 231     @HotSpotIntrinsicCandidate
 232     public static boolean equals(byte[] value, byte[] other) {
 233         if (value.length == other.length) {
 234             int len = value.length >> 1;
 235             for (int i = 0; i < len; i++) {
 236                 if (getChar(value, i) != getChar(other, i)) {
 237                     return false;
 238                 }
 239             }
 240             return true;
 241         }
 242         return false;
 243     }
 244 
 245     @HotSpotIntrinsicCandidate
 246     public static int compareTo(byte[] value, byte[] other) {
 247         int len1 = length(value);
 248         int len2 = length(other);
 249         int lim = Math.min(len1, len2);
 250         for (int k = 0; k < lim; k++) {
 251             char c1 = getChar(value, k);
 252             char c2 = getChar(other, k);
 253             if (c1 != c2) {
 254                 return c1 - c2;
 255             }
 256         }
 257         return len1 - len2;
 258     }
 259 
 260     @HotSpotIntrinsicCandidate
 261     public static int compareToLatin1(byte[] value, byte[] other) {
 262         return -StringLatin1.compareToUTF16(other, value);
 263     }
 264 
 265     public static int compareToCI(byte[] value, byte[] other) {
 266         int len1 = length(value);
 267         int len2 = length(other);
 268         int lim = Math.min(len1, len2);
 269         for (int k = 0; k < lim; k++) {
 270             char c1 = getChar(value, k);
 271             char c2 = getChar(other, k);
 272             if (c1 != c2) {
 273                 c1 = Character.toUpperCase(c1);
 274                 c2 = Character.toUpperCase(c2);
 275                 if (c1 != c2) {
 276                     c1 = Character.toLowerCase(c1);
 277                     c2 = Character.toLowerCase(c2);
 278                     if (c1 != c2) {
 279                         return c1 - c2;
 280                     }
 281                 }
 282             }
 283         }
 284         return len1 - len2;
 285     }
 286 
 287     public static int compareToCI_Latin1(byte[] value, byte[] other) {
 288         return -StringLatin1.compareToCI_UTF16(other, value);
 289     }
 290 
 291     public static int hashCode(byte[] value) {
 292         int h = 0;
 293         int length = value.length >> 1;
 294         for (int i = 0; i < length; i++) {
 295             h = 31 * h + getChar(value, i);
 296         }
 297         return h;
 298     }
 299 
 300     public static int indexOf(byte[] value, int ch, int fromIndex) {
 301         int max = value.length >> 1;
 302         if (fromIndex < 0) {
 303             fromIndex = 0;
 304         } else if (fromIndex >= max) {
 305             // Note: fromIndex might be near -1>>>1.
 306             return -1;
 307         }
 308         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 309             // handle most cases here (ch is a BMP code point or a
 310             // negative value (invalid code point))
 311             return indexOfChar(value, ch, fromIndex, max);
 312         } else {
 313             return indexOfSupplementary(value, ch, fromIndex, max);
 314         }
 315     }
 316 
 317     @HotSpotIntrinsicCandidate
 318     public static int indexOf(byte[] value, byte[] str) {
 319         if (str.length == 0) {
 320             return 0;
 321         }
 322         if (value.length == 0) {
 323             return -1;
 324         }
 325         return indexOf(value, length(value), str, length(str), 0);
 326     }
 327 
 328     @HotSpotIntrinsicCandidate
 329     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 330         char first = getChar(str, 0);
 331         int max = (valueCount - strCount);
 332         for (int i = fromIndex; i <= max; i++) {
 333             // Look for first character.
 334             if (getChar(value, i) != first) {
 335                 while (++i <= max && getChar(value, i) != first);
 336             }
 337             // Found first character, now look at the rest of value
 338             if (i <= max) {
 339                 int j = i + 1;
 340                 int end = j + strCount - 1;
 341                 for (int k = 1; j < end && getChar(value, j) == getChar(str, k); j++, k++);
 342                 if (j == end) {
 343                     // Found whole string.
 344                     return i;
 345                 }
 346             }
 347         }
 348         return -1;
 349     }
 350 
 351     /**
 352      * Handles indexOf Latin1 substring in UTF16 string.
 353      */
 354     @HotSpotIntrinsicCandidate
 355     public static int indexOfLatin1(byte[] value, byte[] str) {
 356         if (str.length == 0) {
 357             return 0;
 358         }
 359         if (value.length == 0) {
 360             return -1;
 361         }
 362         return indexOfLatin1(value, length(value), str, str.length, 0);
 363     }
 364 
 365     @HotSpotIntrinsicCandidate
 366     public static int indexOfLatin1(byte[] src, int srcCount, byte[] tgt, int tgtCount, int fromIndex) {
 367         char first = (char)(tgt[0] & 0xff);
 368         int max = (srcCount - tgtCount);
 369         for (int i = fromIndex; i <= max; i++) {
 370             // Look for first character.
 371             if (getChar(src, i) != first) {
 372                 while (++i <= max && getChar(src, i) != first);
 373             }
 374             // Found first character, now look at the rest of v2
 375             if (i <= max) {
 376                 int j = i + 1;
 377                 int end = j + tgtCount - 1;
 378                 for (int k = 1;
 379                      j < end && getChar(src, j) == (tgt[k] & 0xff);
 380                      j++, k++);
 381                 if (j == end) {
 382                     // Found whole string.
 383                     return i;
 384                 }
 385             }
 386         }
 387         return -1;
 388     }
 389 
 390     @HotSpotIntrinsicCandidate
 391     private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
 392         for (int i = fromIndex; i < max; i++) {
 393             if (getChar(value, i) == ch) {
 394                 return i;
 395             }
 396         }
 397         return -1;
 398     }
 399 
 400     /**
 401      * Handles (rare) calls of indexOf with a supplementary character.
 402      */
 403     private static int indexOfSupplementary(byte[] value, int ch, int fromIndex, int max) {
 404         if (Character.isValidCodePoint(ch)) {
 405             final char hi = Character.highSurrogate(ch);
 406             final char lo = Character.lowSurrogate(ch);
 407             for (int i = fromIndex; i < max - 1; i++) {
 408                 if (getChar(value, i) == hi && getChar(value, i + 1 ) == lo) {
 409                     return i;
 410                 }
 411             }
 412         }
 413         return -1;
 414     }
 415 
 416     public static int lastIndexOf(byte[] src, int srcCount,
 417                                   byte[] tgt, int tgtCount, int fromIndex) {
 418         int min = tgtCount - 1;
 419         int i = min + fromIndex;
 420         int strLastIndex = tgtCount - 1;
 421         char strLastChar = getChar(tgt, strLastIndex);
 422 
 423     startSearchForLastChar:
 424         while (true) {
 425             while (i >= min && getChar(src, i) != strLastChar) {
 426                 i--;
 427             }
 428             if (i < min) {
 429                 return -1;
 430             }
 431             int j = i - 1;
 432             int start = j - strLastIndex;
 433             int k = strLastIndex - 1;
 434             while (j > start) {
 435                 if (getChar(src, j--) != getChar(tgt, k--)) {
 436                     i--;
 437                     continue startSearchForLastChar;
 438                 }
 439             }
 440             return start + 1;
 441         }
 442     }
 443 
 444     public static int lastIndexOf(byte[] value, int ch, int fromIndex) {
 445         if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 446             // handle most cases here (ch is a BMP code point or a
 447             // negative value (invalid code point))
 448             int i = Math.min(fromIndex, (value.length >> 1) - 1);
 449             for (; i >= 0; i--) {
 450                 if (getChar(value, i) == ch) {
 451                     return i;
 452                 }
 453             }
 454             return -1;
 455         } else {
 456             return lastIndexOfSupplementary(value, ch, fromIndex);
 457         }
 458     }
 459 
 460     /**
 461      * Handles (rare) calls of lastIndexOf with a supplementary character.
 462      */
 463     private static int lastIndexOfSupplementary(final byte[] value, int ch, int fromIndex) {
 464         if (Character.isValidCodePoint(ch)) {
 465             char hi = Character.highSurrogate(ch);
 466             char lo = Character.lowSurrogate(ch);
 467             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 468             for (; i >= 0; i--) {
 469                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 470                     return i;
 471                 }
 472             }
 473         }
 474         return -1;
 475     }
 476 
 477     public static String replace(byte[] value, char oldChar, char newChar) {
 478         int len = value.length >> 1;
 479         int i = -1;
 480         while (++i < len) {
 481             if (getChar(value, i) == oldChar) {
 482                 break;
 483             }
 484         }
 485         if (i < len) {
 486             byte buf[] = new byte[value.length];
 487             for (int j = 0; j < i; j++) {
 488                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 489             }
 490             while (i < len) {
 491                 char c = getChar(value, i);
 492                 putChar(buf, i, c == oldChar ? newChar : c);
 493                 i++;
 494            }
 495            // Check if we should try to compress to latin1
 496            if (String.COMPACT_STRINGS &&
 497                !StringLatin1.canEncode(oldChar) &&
 498                StringLatin1.canEncode(newChar)) {
 499                byte[] val = compress(buf, 0, len);
 500                if (val != null) {
 501                    return new String(val, LATIN1);
 502                }
 503            }
 504            return new String(buf, UTF16);
 505         }
 506         return null;
 507     }
 508 
 509     public static boolean regionMatchesCI(byte[] value, int toffset,
 510                                           byte[] other, int ooffset, int len) {
 511         int last = toffset + len;
 512         while (toffset < last) {
 513             char c1 = getChar(value, toffset++);
 514             char c2 = getChar(other, ooffset++);
 515             if (c1 == c2) {
 516                 continue;
 517             }
 518             // try converting both characters to uppercase.
 519             // If the results match, then the comparison scan should
 520             // continue.
 521             char u1 = Character.toUpperCase(c1);
 522             char u2 = Character.toUpperCase(c2);
 523             if (u1 == u2) {
 524                 continue;
 525             }
 526             // Unfortunately, conversion to uppercase does not work properly
 527             // for the Georgian alphabet, which has strange rules about case
 528             // conversion.  So we need to make one last check before
 529             // exiting.
 530             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 531                 continue;
 532             }
 533             return false;
 534         }
 535         return true;
 536     }
 537 
 538     public static boolean regionMatchesCI_Latin1(byte[] value, int toffset,
 539                                                  byte[] other, int ooffset,
 540                                                  int len) {
 541         return StringLatin1.regionMatchesCI_UTF16(other, ooffset, value, toffset, len);
 542     }
 543 
 544     public static String toLowerCase(String str, byte[] value, Locale locale) {
 545         if (locale == null) {
 546             throw new NullPointerException();
 547         }
 548         int first;
 549         boolean hasSurr = false;
 550         final int len = value.length >> 1;
 551 
 552         // Now check if there are any characters that need to be changed, or are surrogate
 553         for (first = 0 ; first < len; first++) {
 554             int cp = (int)getChar(value, first);
 555             if (Character.isSurrogate((char)cp)) {
 556                 hasSurr = true;
 557                 break;
 558             }
 559             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 560                 break;
 561             }
 562         }
 563         if (first == len)
 564             return str;
 565         byte[] result = new byte[value.length];
 566         System.arraycopy(value, 0, result, 0, first << 1);  // Just copy the first few
 567                                                             // lowerCase characters.
 568         String lang = locale.getLanguage();
 569         if (lang == "tr" || lang == "az" || lang == "lt") {
 570             return toLowerCaseEx(str, value, result, first, locale, true);
 571         }
 572         if (hasSurr) {
 573             return toLowerCaseEx(str, value, result, first, locale, false);
 574         }
 575         int bits = 0;
 576         for (int i = first; i < len; i++) {
 577             int cp = (int)getChar(value, i);
 578             if (cp == '\u03A3' ||                       // GREEK CAPITAL LETTER SIGMA
 579                 Character.isSurrogate((char)cp)) {
 580                 return toLowerCaseEx(str, value, result, i, locale, false);
 581             }
 582             if (cp == '\u0130') {                       // LATIN CAPITAL LETTER I WITH DOT ABOVE
 583                 return toLowerCaseEx(str, value, result, i, locale, true);
 584             }
 585             cp = Character.toLowerCase(cp);
 586             if (!Character.isBmpCodePoint(cp)) {
 587                 return toLowerCaseEx(str, value, result, i, locale, false);
 588             }
 589             bits |= cp;
 590             putChar(result, i, cp);
 591         }
 592         if (bits > 0xFF) {
 593             return new String(result, UTF16);
 594         } else {
 595             return newString(result, 0, len);
 596         }
 597     }
 598 
 599     private static String toLowerCaseEx(String str, byte[] value,
 600                                         byte[] result, int first, Locale locale,
 601                                         boolean localeDependent) {
 602         int resultOffset = first;
 603         int length = value.length >> 1;
 604         int srcCount;
 605         for (int i = first; i < length; i += srcCount) {
 606             int srcChar = getChar(value, i);
 607             int lowerChar;
 608             char[] lowerCharArray;
 609             srcCount = 1;
 610             if (Character.isSurrogate((char)srcChar)) {
 611                 srcChar = codePointAt(value, i, length);
 612                 srcCount = Character.charCount(srcChar);
 613             }
 614             if (localeDependent ||
 615                 srcChar == '\u03A3' ||  // GREEK CAPITAL LETTER SIGMA
 616                 srcChar == '\u0130') {  // LATIN CAPITAL LETTER I WITH DOT ABOVE
 617                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 618             } else {
 619                 lowerChar = Character.toLowerCase(srcChar);
 620             }
 621             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 622                 putChar(result, resultOffset++, lowerChar);
 623             } else {
 624                 if (lowerChar == Character.ERROR) {
 625                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 626                 } else {
 627                     lowerCharArray = Character.toChars(lowerChar);
 628                 }
 629                 /* Grow result if needed */
 630                 int mapLen = lowerCharArray.length;
 631                 if (mapLen > srcCount) {
 632                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 633                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 634                     result = result2;
 635                 }
 636                 for (int x = 0; x < mapLen; ++x) {
 637                     putChar(result, resultOffset++, lowerCharArray[x]);
 638                 }
 639             }
 640         }
 641         return newString(result, 0, resultOffset);
 642     }
 643 
 644     public static String toUpperCase(String str, byte[] value, Locale locale) {
 645         if (locale == null) {
 646             throw new NullPointerException();
 647         }
 648         int first;
 649         boolean hasSurr = false;
 650         final int len = value.length >> 1;
 651 
 652         // Now check if there are any characters that need to be changed, or are surrogate
 653         for (first = 0 ; first < len; first++) {
 654             int cp = (int)getChar(value, first);
 655             if (Character.isSurrogate((char)cp)) {
 656                 hasSurr = true;
 657                 break;
 658             }
 659             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 660                 break;
 661             }
 662         }
 663         if (first == len) {
 664             return str;
 665         }
 666         byte[] result = new byte[value.length];
 667         System.arraycopy(value, 0, result, 0, first << 1); // Just copy the first few
 668                                                            // upperCase characters.
 669         String lang = locale.getLanguage();
 670         if (lang == "tr" || lang == "az" || lang == "lt") {
 671             return toUpperCaseEx(str, value, result, first, locale, true);
 672         }
 673         if (hasSurr) {
 674             return toUpperCaseEx(str, value, result, first, locale, false);
 675         }
 676         int bits = 0;
 677         for (int i = first; i < len; i++) {
 678             int cp = (int)getChar(value, i);
 679             if (Character.isSurrogate((char)cp)) {
 680                 return toUpperCaseEx(str, value, result, i, locale, false);
 681             }
 682             cp = Character.toUpperCaseEx(cp);
 683             if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
 684                 return toUpperCaseEx(str, value, result, i, locale, false);
 685             }
 686             bits |= cp;
 687             putChar(result, i, cp);
 688         }
 689         if (bits > 0xFF) {
 690             return new String(result, UTF16);
 691         } else {
 692             return newString(result, 0, len);
 693         }
 694     }
 695 
 696     private static String toUpperCaseEx(String str, byte[] value,
 697                                         byte[] result, int first,
 698                                         Locale locale, boolean localeDependent)
 699     {
 700         int resultOffset = first;
 701         int length = value.length >> 1;
 702         int srcCount;
 703         for (int i = first; i < length; i += srcCount) {
 704             int srcChar = getChar(value, i);
 705             int upperChar;
 706             char[] upperCharArray;
 707             srcCount = 1;
 708             if (Character.isSurrogate((char)srcChar)) {
 709                 srcChar = codePointAt(value, i, length);
 710                 srcCount = Character.charCount(srcChar);
 711             }
 712             if (localeDependent) {
 713                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 714             } else {
 715                 upperChar = Character.toUpperCaseEx(srcChar);
 716             }
 717             if (Character.isBmpCodePoint(upperChar)) {
 718                 putChar(result, resultOffset++, upperChar);
 719             } else {
 720                 if (upperChar == Character.ERROR) {
 721                     if (localeDependent) {
 722                         upperCharArray =
 723                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 724                     } else {
 725                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 726                     }
 727                 } else {
 728                     upperCharArray = Character.toChars(upperChar);
 729                 }
 730                 /* Grow result if needed */
 731                 int mapLen = upperCharArray.length;
 732                 if (mapLen > srcCount) {
 733                     byte[] result2 = newBytesFor((result.length >> 1) + mapLen - srcCount);
 734                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 735                     result = result2;
 736                  }
 737                  for (int x = 0; x < mapLen; ++x) {
 738                     putChar(result, resultOffset++, upperCharArray[x]);
 739                  }
 740             }
 741         }
 742         return newString(result, 0, resultOffset);
 743     }
 744 
 745     public static String trim(byte[] value) {
 746         int length = value.length >> 1;
 747         int len = length;
 748         int st = 0;
 749         while (st < len && getChar(value, st) <= ' ') {
 750             st++;
 751         }
 752         while (st < len && getChar(value, len - 1) <= ' ') {
 753             len--;
 754         }
 755         return ((st > 0) || (len < length )) ?
 756             new String(Arrays.copyOfRange(value, st << 1, len << 1), UTF16) :
 757             null;
 758     }
 759 
 760     public static void putChars(byte[] val, int index, char[] str, int off, int end) {
 761         while (off < end) {
 762             putChar(val, index++, str[off++]);
 763         }
 764     }
 765 
 766     public static String newString(byte[] val, int index, int len) {
 767         if (String.COMPACT_STRINGS) {
 768             byte[] buf = compress(val, index, len);
 769             if (buf != null) {
 770                 return new String(buf, LATIN1);
 771             }
 772         }
 773         int last = index + len;
 774         return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
 775     }
 776 
 777     public static void fillNull(byte[] val, int index, int end) {
 778         Arrays.fill(val, index << 1, end << 1, (byte)0);
 779     }
 780 
 781     static class CharsSpliterator implements Spliterator.OfInt {
 782         private final byte[] array;
 783         private int index;        // current index, modified on advance/split
 784         private final int fence;  // one past last index
 785         private final int cs;
 786 
 787         CharsSpliterator(byte[] array, int acs) {
 788             this(array, 0, array.length >> 1, acs);
 789         }
 790 
 791         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 792             this.array = array;
 793             this.index = origin;
 794             this.fence = fence;
 795             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 796                       | Spliterator.SUBSIZED;
 797         }
 798 
 799         @Override
 800         public OfInt trySplit() {
 801             int lo = index, mid = (lo + fence) >>> 1;
 802             return (lo >= mid)
 803                    ? null
 804                    : new CharsSpliterator(array, lo, index = mid, cs);
 805         }
 806 
 807         @Override
 808         public void forEachRemaining(IntConsumer action) {
 809             byte[] a; int i, hi; // hoist accesses and checks from loop
 810             if (action == null)
 811                 throw new NullPointerException();
 812             if (((a = array).length >> 1) >= (hi = fence) &&
 813                 (i = index) >= 0 && i < (index = hi)) {
 814                 do {
 815                     action.accept(charAt(a, i));
 816                 } while (++i < hi);
 817             }
 818         }
 819 
 820         @Override
 821         public boolean tryAdvance(IntConsumer action) {
 822             if (action == null)
 823                 throw new NullPointerException();
 824             int i = index;
 825             if (i >= 0 && i < fence) {
 826                 action.accept(charAt(array, i));
 827                 index++;
 828                 return true;
 829             }
 830             return false;
 831         }
 832 
 833         @Override
 834         public long estimateSize() { return (long)(fence - index); }
 835 
 836         @Override
 837         public int characteristics() {
 838             return cs;
 839         }
 840     }
 841 
 842     static class CodePointsSpliterator implements Spliterator.OfInt {
 843         private final byte[] array;
 844         private int index;        // current index, modified on advance/split
 845         private final int fence;  // one past last index
 846         private final int cs;
 847 
 848         CodePointsSpliterator(byte[] array, int acs) {
 849             this(array, 0, array.length >> 1, acs);
 850         }
 851 
 852         CodePointsSpliterator(byte[] array, int origin, int fence, int acs) {
 853             this.array = array;
 854             this.index = origin;
 855             this.fence = fence;
 856             this.cs = acs | Spliterator.ORDERED;
 857         }
 858 
 859         @Override
 860         public OfInt trySplit() {
 861             int lo = index, mid = (lo + fence) >>> 1;
 862             if (lo >= mid)
 863                 return null;
 864 
 865             int midOneLess;
 866             // If the mid-point intersects a surrogate pair
 867             if (Character.isLowSurrogate(charAt(array, mid)) &&
 868                 Character.isHighSurrogate(charAt(array, midOneLess = (mid -1)))) {
 869                 // If there is only one pair it cannot be split
 870                 if (lo >= midOneLess)
 871                     return null;
 872                 // Shift the mid-point to align with the surrogate pair
 873                 return new CodePointsSpliterator(array, lo, index = midOneLess, cs);
 874             }
 875             return new CodePointsSpliterator(array, lo, index = mid, cs);
 876         }
 877 
 878         @Override
 879         public void forEachRemaining(IntConsumer action) {
 880             byte[] a; int i, hi; // hoist accesses and checks from loop
 881             if (action == null)
 882                 throw new NullPointerException();
 883             if (((a = array).length >> 1) >= (hi = fence) &&
 884                 (i = index) >= 0 && i < (index = hi)) {
 885                 do {
 886                     i = advance(a, i, hi, action);
 887                 } while (i < hi);
 888             }
 889         }
 890 
 891         @Override
 892         public boolean tryAdvance(IntConsumer action) {
 893             if (action == null)
 894                 throw new NullPointerException();
 895             if (index >= 0 && index < fence) {
 896                 index = advance(array, index, fence, action);
 897                 return true;
 898             }
 899             return false;
 900         }
 901 
 902         // Advance one code point from the index, i, and return the next
 903         // index to advance from
 904         private static int advance(byte[] a, int i, int hi, IntConsumer action) {
 905             char c1 = charAt(a, i++);
 906             int cp = c1;
 907             if (Character.isHighSurrogate(c1) && i < hi) {
 908                 char c2 = charAt(a, i);
 909                 if (Character.isLowSurrogate(c2)) {
 910                     i++;
 911                     cp = Character.toCodePoint(c1, c2);
 912                 }
 913             }
 914             action.accept(cp);
 915             return i;
 916         }
 917 
 918         @Override
 919         public long estimateSize() { return (long)(fence - index); }
 920 
 921         @Override
 922         public int characteristics() {
 923             return cs;
 924         }
 925     }
 926 
 927     ////////////////////////////////////////////////////////////////
 928 
 929     public static void putCharSB(byte[] val, int index, int c) {
 930         checkIndex(index, val.length >> 1);
 931         putChar(val, index, c);
 932     }
 933 
 934     public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
 935         checkOffset(index + end - off, val.length >> 1);
 936         putChars(val, index, ca, off, end);
 937     }
 938 
 939     public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
 940         checkOffset(index + end - off, val.length >> 1);
 941         for (int i = off; i < end; i++) {
 942             putChar(val, index++, s.charAt(i));
 943         }
 944     }
 945 
 946     public static int codePointAtSB(byte[] val, int index, int end) {
 947         checkOffset(end, val.length >> 1);
 948         return codePointAt(val, index, end);
 949     }
 950 
 951     public static int codePointBeforeSB(byte[] val, int index) {
 952         checkOffset(index, val.length >> 1);
 953         return codePointBefore(val, index);
 954     }
 955 
 956     public static int codePointCountSB(byte[] val, int beginIndex, int endIndex) {
 957         checkOffset(endIndex, val.length >> 1);
 958         return codePointCount(val, beginIndex, endIndex);
 959     }
 960 
 961     ////////////////////////////////////////////////////////////////
 962 
 963     private static native boolean isBigEndian();
 964 
 965     static final int HI_BYTE_SHIFT;
 966     static final int LO_BYTE_SHIFT;
 967     static {
 968         if (isBigEndian()) {
 969             HI_BYTE_SHIFT = 8;
 970             LO_BYTE_SHIFT = 0;
 971         } else {
 972             HI_BYTE_SHIFT = 0;
 973             LO_BYTE_SHIFT = 8;
 974         }
 975     }
 976 
 977     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
 978 }