New src/java.base/share/classes/java/lang/StringLatin1.java

   1 /*
   2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Objects;
  31 import java.util.Spliterator;
  32 import java.util.function.IntConsumer;
  33 import java.util.stream.IntStream;
  34 import jdk.internal.HotSpotIntrinsicCandidate;
  35 
  36 import static java.lang.String.LATIN1;
  37 import static java.lang.String.UTF16;
  38 import static java.lang.String.checkOffset;
  39 
  40 final class StringLatin1 {
  41 
  42     public static char charAt(byte[] value, int index) {
  43         if (index < 0 || index >= value.length) {
  44             throw new StringIndexOutOfBoundsException(index);
  45         }
  46         return (char)(value[index] & 0xff);
  47     }
  48 
  49     public static boolean canEncode(int cp) {
  50         return cp >>> 8 == 0;
  51     }
  52 
  53     public static int length(byte[] value) {
  54         return value.length;
  55     }
  56 
  57     public static int codePointAt(byte[] value, int index, int end) {
  58         return value[index] & 0xff;
  59     }
  60 
  61     public static int codePointBefore(byte[] value, int index) {
  62         return value[index - 1] & 0xff;
  63     }
  64 
  65     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
  66         return endIndex - beginIndex;
  67     }
  68 
  69     public static char[] toChars(byte[] value) {
  70         char[] dst = new char[value.length];
  71         inflate(value, 0, dst, 0, value.length);
  72         return dst;
  73     }
  74 
  75     public static byte[] inflate(byte[] value, int off, int len) {
  76         byte[] ret = StringUTF16.newBytesFor(len);
  77         inflate(value, off, ret, 0, len);
  78         return ret;
  79     }
  80 
  81     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
  82         inflate(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
  83     }
  84 
  85     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
  86         System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
  87     }
  88 
  89     @HotSpotIntrinsicCandidate
  90     public static boolean equals(byte[] value, byte[] other) {
  91         if (value.length == other.length) {
  92             for (int i = 0; i < value.length; i++) {
  93                 if (value[i] != other[i]) {
  94                     return false;
  95                 }
  96             }
  97             return true;
  98         }
  99         return false;
 100     }
 101 
 102     @HotSpotIntrinsicCandidate
 103     public static int compareTo(byte[] value, byte[] other) {
 104         int len1 = value.length;
 105         int len2 = other.length;
 106         return compareTo(value, other, len1, len2);
 107     }
 108 
 109     public static int compareTo(byte[] value, byte[] other, int len1, int len2) {
 110         int lim = Math.min(len1, len2);
 111         for (int k = 0; k < lim; k++) {
 112             if (value[k] != other[k]) {
 113                 return getChar(value, k) - getChar(other, k);
 114             }
 115         }
 116         return len1 - len2;
 117     }
 118 
 119     @HotSpotIntrinsicCandidate
 120     public static int compareToUTF16(byte[] value, byte[] other) {
 121         int len1 = length(value);
 122         int len2 = StringUTF16.length(other);
 123         return compareToUTF16Values(value, other, len1, len2);
 124     }
 125 
 126     /*
 127      * Checks the boundary and then compares the byte arrays.
 128      */
 129     public static int compareToUTF16(byte[] value, byte[] other, int len1, int len2) {
 130         checkOffset(len1, length(value));
 131         checkOffset(len2, StringUTF16.length(other));
 132 
 133         return compareToUTF16Values(value, other, len1, len2);
 134     }
 135 
 136     private static int compareToUTF16Values(byte[] value, byte[] other, int len1, int len2) {
 137         int lim = Math.min(len1, len2);
 138         for (int k = 0; k < lim; k++) {
 139             char c1 = getChar(value, k);
 140             char c2 = StringUTF16.getChar(other, k);
 141             if (c1 != c2) {
 142                 return c1 - c2;
 143             }
 144         }
 145         return len1 - len2;
 146     }
 147 
 148     public static int compareToCI(byte[] value, byte[] other) {
 149         int len1 = value.length;
 150         int len2 = other.length;
 151         int lim = Math.min(len1, len2);
 152         for (int k = 0; k < lim; k++) {
 153             if (value[k] != other[k]) {
 154                 char c1 = (char) CharacterDataLatin1.instance.toUpperCase(getChar(value, k));
 155                 char c2 = (char) CharacterDataLatin1.instance.toUpperCase(getChar(other, k));
 156                 if (c1 != c2) {
 157                     c1 = Character.toLowerCase(c1);
 158                     c2 = Character.toLowerCase(c2);
 159                     if (c1 != c2) {
 160                         return c1 - c2;
 161                     }
 162                 }
 163             }
 164         }
 165         return len1 - len2;
 166     }
 167 
 168     public static int compareToCI_UTF16(byte[] value, byte[] other) {
 169         int len1 = length(value);
 170         int len2 = StringUTF16.length(other);
 171         int lim = Math.min(len1, len2);
 172         for (int k = 0; k < lim; k++) {
 173             char c1 = getChar(value, k);
 174             char c2 = StringUTF16.getChar(other, k);
 175             if (c1 != c2) {
 176                 c1 = Character.toUpperCase(c1);
 177                 c2 = Character.toUpperCase(c2);
 178                 if (c1 != c2) {
 179                     c1 = Character.toLowerCase(c1);
 180                     c2 = Character.toLowerCase(c2);
 181                     if (c1 != c2) {
 182                         return c1 - c2;
 183                     }
 184                 }
 185             }
 186         }
 187         return len1 - len2;
 188     }
 189 
 190     public static int hashCode(byte[] value) {
 191         int h = 0;
 192         for (byte v : value) {
 193             h = 31 * h + (v & 0xff);
 194         }
 195         return h;
 196     }
 197 
 198     public static int indexOf(byte[] value, int ch, int fromIndex) {
 199         if (!canEncode(ch)) {
 200             return -1;
 201         }
 202         int max = value.length;
 203         if (fromIndex < 0) {
 204             fromIndex = 0;
 205         } else if (fromIndex >= max) {
 206             // Note: fromIndex might be near -1>>>1.
 207             return -1;
 208         }
 209         byte c = (byte)ch;
 210         for (int i = fromIndex; i < max; i++) {
 211             if (value[i] == c) {
 212                return i;
 213             }
 214         }
 215         return -1;
 216     }
 217 
 218     @HotSpotIntrinsicCandidate
 219     public static int indexOf(byte[] value, byte[] str) {
 220         if (str.length == 0) {
 221             return 0;
 222         }
 223         if (value.length == 0) {
 224             return -1;
 225         }
 226         return indexOf(value, value.length, str, str.length, 0);
 227     }
 228 
 229     @HotSpotIntrinsicCandidate
 230     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 231         byte first = str[0];
 232         int max = (valueCount - strCount);
 233         for (int i = fromIndex; i <= max; i++) {
 234             // Look for first character.
 235             if (value[i] != first) {
 236                 while (++i <= max && value[i] != first);
 237             }
 238             // Found first character, now look at the rest of value
 239             if (i <= max) {
 240                 int j = i + 1;
 241                 int end = j + strCount - 1;
 242                 for (int k = 1; j < end && value[j] == str[k]; j++, k++);
 243                 if (j == end) {
 244                     // Found whole string.
 245                     return i;
 246                 }
 247             }
 248         }
 249         return -1;
 250     }
 251 
 252     public static int lastIndexOf(byte[] src, int srcCount,
 253                                   byte[] tgt, int tgtCount, int fromIndex) {
 254         int min = tgtCount - 1;
 255         int i = min + fromIndex;
 256         int strLastIndex = tgtCount - 1;
 257         char strLastChar = (char)(tgt[strLastIndex] & 0xff);
 258 
 259   startSearchForLastChar:
 260         while (true) {
 261             while (i >= min && (src[i] & 0xff) != strLastChar) {
 262                 i--;
 263             }
 264             if (i < min) {
 265                 return -1;
 266             }
 267             int j = i - 1;
 268             int start = j - strLastIndex;
 269             int k = strLastIndex - 1;
 270             while (j > start) {
 271                 if ((src[j--] & 0xff) != (tgt[k--] & 0xff)) {
 272                     i--;
 273                     continue startSearchForLastChar;
 274                 }
 275             }
 276             return start + 1;
 277         }
 278     }
 279 
 280     public static int lastIndexOf(final byte[] value, int ch, int fromIndex) {
 281         if (!canEncode(ch)) {
 282             return -1;
 283         }
 284         int off  = Math.min(fromIndex, value.length - 1);
 285         for (; off >= 0; off--) {
 286             if (value[off] == (byte)ch) {
 287                 return off;
 288             }
 289         }
 290         return -1;
 291     }
 292 
 293     public static String replace(byte[] value, char oldChar, char newChar) {
 294         if (canEncode(oldChar)) {
 295             int len = value.length;
 296             int i = -1;
 297             while (++i < len) {
 298                 if (value[i] == (byte)oldChar) {
 299                     break;
 300                 }
 301             }
 302             if (i < len) {
 303                 if (canEncode(newChar)) {
 304                     byte buf[] = new byte[len];
 305                     for (int j = 0; j < i; j++) {    // TBD arraycopy?
 306                         buf[j] = value[j];
 307                     }
 308                     while (i < len) {
 309                         byte c = value[i];
 310                         buf[i] = (c == (byte)oldChar) ? (byte)newChar : c;
 311                         i++;
 312                     }
 313                     return new String(buf, LATIN1);
 314                 } else {
 315                     byte[] buf = StringUTF16.newBytesFor(len);
 316                     // inflate from latin1 to UTF16
 317                     inflate(value, 0, buf, 0, i);
 318                     while (i < len) {
 319                         char c = (char)(value[i] & 0xff);
 320                         StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c);
 321                         i++;
 322                     }
 323                     return new String(buf, UTF16);
 324                 }
 325             }
 326         }
 327         return null; // for string to return this;
 328     }
 329 
 330     // case insensitive
 331     public static boolean regionMatchesCI(byte[] value, int toffset,
 332                                           byte[] other, int ooffset, int len) {
 333         int last = toffset + len;
 334         while (toffset < last) {
 335             char c1 = (char)(value[toffset++] & 0xff);
 336             char c2 = (char)(other[ooffset++] & 0xff);
 337             if (c1 == c2) {
 338                 continue;
 339             }
 340             char u1 = Character.toUpperCase(c1);
 341             char u2 = Character.toUpperCase(c2);
 342             if (u1 == u2) {
 343                 continue;
 344             }
 345             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 346                 continue;
 347             }
 348             return false;
 349         }
 350         return true;
 351     }
 352 
 353     public static boolean regionMatchesCI_UTF16(byte[] value, int toffset,
 354                                                 byte[] other, int ooffset, int len) {
 355         int last = toffset + len;
 356         while (toffset < last) {
 357             char c1 = (char)(value[toffset++] & 0xff);
 358             char c2 = StringUTF16.getChar(other, ooffset++);
 359             if (c1 == c2) {
 360                 continue;
 361             }
 362             char u1 = Character.toUpperCase(c1);
 363             char u2 = Character.toUpperCase(c2);
 364             if (u1 == u2) {
 365                 continue;
 366             }
 367             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 368                 continue;
 369             }
 370             return false;
 371         }
 372         return true;
 373     }
 374 
 375     public static String toLowerCase(String str, byte[] value, Locale locale) {
 376         if (locale == null) {
 377             throw new NullPointerException();
 378         }
 379         int first;
 380         final int len = value.length;
 381         // Now check if there are any characters that need to be changed, or are surrogate
 382         for (first = 0 ; first < len; first++) {
 383             int cp = value[first] & 0xff;
 384             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 385                 break;
 386             }
 387         }
 388         if (first == len)
 389             return str;
 390         String lang = locale.getLanguage();
 391         if (lang == "tr" || lang == "az" || lang == "lt") {
 392             return toLowerCaseEx(str, value, first, locale, true);
 393         }
 394         byte[] result = new byte[len];
 395         System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
 396                                                        // lowerCase characters.
 397         for (int i = first; i < len; i++) {
 398             int cp = value[i] & 0xff;
 399             cp = Character.toLowerCase(cp);
 400             if (!canEncode(cp)) {                      // not a latin1 character
 401                 return toLowerCaseEx(str, value, first, locale, false);
 402             }
 403             result[i] = (byte)cp;
 404         }
 405         return new String(result, LATIN1);
 406     }
 407 
 408     private static String toLowerCaseEx(String str, byte[] value,
 409                                         int first, Locale locale, boolean localeDependent)
 410     {
 411         byte[] result = StringUTF16.newBytesFor(value.length);
 412         int resultOffset = 0;
 413         for (int i = 0; i < first; i++) {
 414             StringUTF16.putChar(result, resultOffset++, value[i] & 0xff);
 415         }
 416         for (int i = first; i < value.length; i++) {
 417             int srcChar = value[i] & 0xff;
 418             int lowerChar;
 419             char[] lowerCharArray;
 420             if (localeDependent) {
 421                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 422             } else {
 423                 lowerChar = Character.toLowerCase(srcChar);
 424             }
 425             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 426                 StringUTF16.putChar(result, resultOffset++, lowerChar);
 427             } else {
 428                 if (lowerChar == Character.ERROR) {
 429                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 430                 } else {
 431                     lowerCharArray = Character.toChars(lowerChar);
 432                 }
 433                 /* Grow result if needed */
 434                 int mapLen = lowerCharArray.length;
 435                 if (mapLen > 1) {
 436                     byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1);
 437                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 438                     result = result2;
 439                 }
 440                 for (int x = 0; x < mapLen; ++x) {
 441                     StringUTF16.putChar(result, resultOffset++, lowerCharArray[x]);
 442                 }
 443             }
 444         }
 445         return StringUTF16.newString(result, 0, resultOffset);
 446     }
 447 
 448     public static String toUpperCase(String str, byte[] value, Locale locale) {
 449         if (locale == null) {
 450             throw new NullPointerException();
 451         }
 452         int first;
 453         final int len = value.length;
 454 
 455         // Now check if there are any characters that need to be changed, or are surrogate
 456         for (first = 0 ; first < len; first++ ) {
 457             int cp = value[first] & 0xff;
 458             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 459                 break;
 460             }
 461         }
 462         if (first == len) {
 463             return str;
 464         }
 465         String lang = locale.getLanguage();
 466         if (lang == "tr" || lang == "az" || lang == "lt") {
 467             return toUpperCaseEx(str, value, first, locale, true);
 468         }
 469         byte[] result = new byte[len];
 470         System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
 471                                                        // upperCase characters.
 472         for (int i = first; i < len; i++) {
 473             int cp = value[i] & 0xff;
 474             cp = Character.toUpperCaseEx(cp);
 475             if (!canEncode(cp)) {                      // not a latin1 character
 476                 return toUpperCaseEx(str, value, first, locale, false);
 477             }
 478             result[i] = (byte)cp;
 479         }
 480         return new String(result, LATIN1);
 481     }
 482 
 483     private static String toUpperCaseEx(String str, byte[] value,
 484                                         int first, Locale locale, boolean localeDependent)
 485     {
 486         byte[] result = StringUTF16.newBytesFor(value.length);
 487         int resultOffset = 0;
 488         for (int i = 0; i < first; i++) {
 489             StringUTF16.putChar(result, resultOffset++, value[i] & 0xff);
 490         }
 491         for (int i = first; i < value.length; i++) {
 492             int srcChar = value[i] & 0xff;
 493             int upperChar;
 494             char[] upperCharArray;
 495             if (localeDependent) {
 496                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 497             } else {
 498                 upperChar = Character.toUpperCaseEx(srcChar);
 499             }
 500             if (Character.isBmpCodePoint(upperChar)) {
 501                 StringUTF16.putChar(result, resultOffset++, upperChar);
 502             } else {
 503                 if (upperChar == Character.ERROR) {
 504                     if (localeDependent) {
 505                         upperCharArray =
 506                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 507                     } else {
 508                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 509                     }
 510                 } else {
 511                     upperCharArray = Character.toChars(upperChar);
 512                 }
 513                 /* Grow result if needed */
 514                 int mapLen = upperCharArray.length;
 515                 if (mapLen > 1) {
 516                     byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1);
 517                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 518                     result = result2;
 519                 }
 520                 for (int x = 0; x < mapLen; ++x) {
 521                     StringUTF16.putChar(result, resultOffset++, upperCharArray[x]);
 522                 }
 523             }
 524         }
 525         return StringUTF16.newString(result, 0, resultOffset);
 526     }
 527 
 528     public static String trim(byte[] value) {
 529         int len = value.length;
 530         int st = 0;
 531         while ((st < len) && ((value[st] & 0xff) <= ' ')) {
 532             st++;
 533         }
 534         while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) {
 535             len--;
 536         }
 537         return ((st > 0) || (len < value.length)) ?
 538             newString(value, st, len - st) : null;
 539     }
 540 
 541     public static void putChar(byte[] val, int index, int c) {
 542         //assert (canEncode(c));
 543         val[index] = (byte)(c);
 544     }
 545 
 546     public static char getChar(byte[] val, int index) {
 547         return (char)(val[index] & 0xff);
 548     }
 549 
 550     public static byte[] toBytes(int[] val, int off, int len) {
 551         byte[] ret = new byte[len];
 552         for (int i = 0; i < len; i++) {
 553             int cp = val[off++];
 554             if (!canEncode(cp)) {
 555                 return null;
 556             }
 557             ret[i] = (byte)cp;
 558         }
 559         return ret;
 560     }
 561 
 562     public static byte[] toBytes(char c) {
 563         return new byte[] { (byte)c };
 564     }
 565 
 566     public static String newString(byte[] val, int index, int len) {
 567         return new String(Arrays.copyOfRange(val, index, index + len),
 568                           LATIN1);
 569     }
 570 
 571     public static void fillNull(byte[] val, int index, int end) {
 572         Arrays.fill(val, index, end, (byte)0);
 573     }
 574 
 575     // inflatedCopy byte[] -> char[]
 576     @HotSpotIntrinsicCandidate
 577     public static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
 578         for (int i = 0; i < len; i++) {
 579             dst[dstOff++] = (char)(src[srcOff++] & 0xff);
 580         }
 581     }
 582 
 583     // inflatedCopy byte[] -> byte[]
 584     @HotSpotIntrinsicCandidate
 585     public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 586         StringUTF16.inflate(src, srcOff, dst, dstOff, len);
 587     }
 588 
 589     static class CharsSpliterator implements Spliterator.OfInt {
 590         private final byte[] array;
 591         private int index;        // current index, modified on advance/split
 592         private final int fence;  // one past last index
 593         private final int cs;
 594 
 595         CharsSpliterator(byte[] array, int acs) {
 596             this(array, 0, array.length, acs);
 597         }
 598 
 599         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 600             this.array = array;
 601             this.index = origin;
 602             this.fence = fence;
 603             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 604                       | Spliterator.SUBSIZED;
 605         }
 606 
 607         @Override
 608         public OfInt trySplit() {
 609             int lo = index, mid = (lo + fence) >>> 1;
 610             return (lo >= mid)
 611                    ? null
 612                    : new CharsSpliterator(array, lo, index = mid, cs);
 613         }
 614 
 615         @Override
 616         public void forEachRemaining(IntConsumer action) {
 617             byte[] a; int i, hi; // hoist accesses and checks from loop
 618             if (action == null)
 619                 throw new NullPointerException();
 620             if ((a = array).length >= (hi = fence) &&
 621                 (i = index) >= 0 && i < (index = hi)) {
 622                 do { action.accept(a[i] & 0xff); } while (++i < hi);
 623             }
 624         }
 625 
 626         @Override
 627         public boolean tryAdvance(IntConsumer action) {
 628             if (action == null)
 629                 throw new NullPointerException();
 630             if (index >= 0 && index < fence) {
 631                 action.accept(array[index++] & 0xff);
 632                 return true;
 633             }
 634             return false;
 635         }
 636 
 637         @Override
 638         public long estimateSize() { return (long)(fence - index); }
 639 
 640         @Override
 641         public int characteristics() {
 642             return cs;
 643         }
 644     }
 645 }