New src/java.base/share/classes/java/lang/StringLatin1.java

   1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Objects;
  31 import java.util.Spliterator;
  32 import java.util.function.IntConsumer;
  33 import java.util.stream.IntStream;
  34 import jdk.internal.HotSpotIntrinsicCandidate;
  35 
  36 import static java.lang.String.LATIN1;
  37 import static java.lang.String.UTF16;
  38 import static java.lang.String.checkOffset;
  39 import static java.lang.String.checkBoundsOffCount;
  40 
  41 final class StringLatin1 {
  42 
  43     public static char charAt(byte[] value, int index) {
  44         if (index < 0 || index >= value.length) {
  45             throw new StringIndexOutOfBoundsException(index);
  46         }
  47         return (char)(value[index] & 0xff);
  48     }
  49 
  50     public static boolean canEncode(int cp) {
  51         return cp >>> 8 == 0;
  52     }
  53 
  54     public static int length(byte[] value) {
  55         return value.length;
  56     }
  57 
  58     public static int codePointAt(byte[] value, int index, int end) {
  59         return value[index] & 0xff;
  60     }
  61 
  62     public static int codePointBefore(byte[] value, int index) {
  63         return value[index - 1] & 0xff;
  64     }
  65 
  66     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
  67         return endIndex - beginIndex;
  68     }
  69 
  70     public static char[] toChars(byte[] value) {
  71         char[] dst = new char[value.length];
  72         inflate(value, 0, dst, 0, value.length);
  73         return dst;
  74     }
  75 
  76     public static byte[] inflate(byte[] value, int off, int len) {
  77         byte[] ret = StringUTF16.newBytesFor(len);
  78         inflate(value, off, ret, 0, len);
  79         return ret;
  80     }
  81 
  82     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
  83         inflate(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
  84     }
  85 
  86     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
  87         System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
  88     }
  89 
  90     @HotSpotIntrinsicCandidate
  91     public static boolean equals(byte[] value, byte[] other) {
  92         if (value.length == other.length) {
  93             for (int i = 0; i < value.length; i++) {
  94                 if (value[i] != other[i]) {
  95                     return false;
  96                 }
  97             }
  98             return true;
  99         }
 100         return false;
 101     }
 102 
 103     @HotSpotIntrinsicCandidate
 104     public static int compareTo(byte[] value, byte[] other) {
 105         int len1 = value.length;
 106         int len2 = other.length;
 107         int lim = Math.min(len1, len2);
 108         for (int k = 0; k < lim; k++) {
 109             if (value[k] != other[k]) {
 110                 return getChar(value, k) - getChar(other, k);
 111             }
 112         }
 113         return len1 - len2;
 114     }
 115 
 116     @HotSpotIntrinsicCandidate
 117     public static int compareToUTF16(byte[] value, byte[] other) {
 118         int len1 = length(value);
 119         int len2 = StringUTF16.length(other);
 120         int lim = Math.min(len1, len2);
 121         for (int k = 0; k < lim; k++) {
 122             char c1 = getChar(value, k);
 123             char c2 = StringUTF16.getChar(other, k);
 124             if (c1 != c2) {
 125                 return c1 - c2;
 126             }
 127         }
 128         return len1 - len2;
 129     }
 130 
 131     public static int compareToCI(byte[] value, byte[] other) {
 132         int len1 = value.length;
 133         int len2 = other.length;
 134         int lim = Math.min(len1, len2);
 135         for (int k = 0; k < lim; k++) {
 136             if (value[k] != other[k]) {
 137                 char c1 = Character.toUpperCase(getChar(value, k));
 138                 char c2 = Character.toUpperCase(getChar(other, k));
 139                 if (c1 != c2) {
 140                     c1 = Character.toLowerCase(c1);
 141                     c2 = Character.toLowerCase(c2);
 142                     if (c1 != c2) {
 143                         return c1 - c2;
 144                     }
 145                 }
 146             }
 147         }
 148         return len1 - len2;
 149     }
 150 
 151     public static int compareToCI_UTF16(byte[] value, byte[] other) {
 152         int len1 = length(value);
 153         int len2 = StringUTF16.length(other);
 154         int lim = Math.min(len1, len2);
 155         for (int k = 0; k < lim; k++) {
 156             char c1 = getChar(value, k);
 157             char c2 = StringUTF16.getChar(other, k);
 158             if (c1 != c2) {
 159                 c1 = Character.toUpperCase(c1);
 160                 c2 = Character.toUpperCase(c2);
 161                 if (c1 != c2) {
 162                     c1 = Character.toLowerCase(c1);
 163                     c2 = Character.toLowerCase(c2);
 164                     if (c1 != c2) {
 165                         return c1 - c2;
 166                     }
 167                 }
 168             }
 169         }
 170         return len1 - len2;
 171     }
 172 
 173     public static int hashCode(byte[] value) {
 174         int h = 0;
 175         for (byte v : value) {
 176             h = 31 * h + (v & 0xff);
 177         }
 178         return h;
 179     }
 180 
 181     public static int indexOf(byte[] value, int ch, int fromIndex) {
 182         if (!canEncode(ch)) {
 183             return -1;
 184         }
 185         int max = value.length;
 186         if (fromIndex < 0) {
 187             fromIndex = 0;
 188         } else if (fromIndex >= max) {
 189             // Note: fromIndex might be near -1>>>1.
 190             return -1;
 191         }
 192         byte c = (byte)ch;
 193         for (int i = fromIndex; i < max; i++) {
 194             if (value[i] == c) {
 195                return i;
 196             }
 197         }
 198         return -1;
 199     }
 200 
 201     @HotSpotIntrinsicCandidate
 202     public static int indexOf(byte[] value, byte[] str) {
 203         if (str.length == 0) {
 204             return 0;
 205         }
 206         if (value.length == 0) {
 207             return -1;
 208         }
 209         return indexOf(value, value.length, str, str.length, 0);
 210     }
 211 
 212     @HotSpotIntrinsicCandidate
 213     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 214         byte first = str[0];
 215         int max = (valueCount - strCount);
 216         for (int i = fromIndex; i <= max; i++) {
 217             // Look for first character.
 218             if (value[i] != first) {
 219                 while (++i <= max && value[i] != first);
 220             }
 221             // Found first character, now look at the rest of value
 222             if (i <= max) {
 223                 int j = i + 1;
 224                 int end = j + strCount - 1;
 225                 for (int k = 1; j < end && value[j] == str[k]; j++, k++);
 226                 if (j == end) {
 227                     // Found whole string.
 228                     return i;
 229                 }
 230             }
 231         }
 232         return -1;
 233     }
 234 
 235     public static int lastIndexOf(byte[] src, int srcCount,
 236                                   byte[] tgt, int tgtCount, int fromIndex) {
 237         int min = tgtCount - 1;
 238         int i = min + fromIndex;
 239         int strLastIndex = tgtCount - 1;
 240         char strLastChar = (char)(tgt[strLastIndex] & 0xff);
 241 
 242   startSearchForLastChar:
 243         while (true) {
 244             while (i >= min && (src[i] & 0xff) != strLastChar) {
 245                 i--;
 246             }
 247             if (i < min) {
 248                 return -1;
 249             }
 250             int j = i - 1;
 251             int start = j - strLastIndex;
 252             int k = strLastIndex - 1;
 253             while (j > start) {
 254                 if ((src[j--] & 0xff) != (tgt[k--] & 0xff)) {
 255                     i--;
 256                     continue startSearchForLastChar;
 257                 }
 258             }
 259             return start + 1;
 260         }
 261     }
 262 
 263     public static int lastIndexOf(final byte[] value, int ch, int fromIndex) {
 264         if (!canEncode(ch)) {
 265             return -1;
 266         }
 267         int off  = Math.min(fromIndex, value.length - 1);
 268         for (; off >= 0; off--) {
 269             if (value[off] == (byte)ch) {
 270                 return off;
 271             }
 272         }
 273         return -1;
 274     }
 275 
 276     public static String replace(byte[] value, char oldChar, char newChar) {
 277         if (canEncode(oldChar)) {
 278             int len = value.length;
 279             int i = -1;
 280             while (++i < len) {
 281                 if (value[i] == (byte)oldChar) {
 282                     break;
 283                 }
 284             }
 285             if (i < len) {
 286                 if (canEncode(newChar)) {
 287                     byte buf[] = new byte[len];
 288                     for (int j = 0; j < i; j++) {    // TBD arraycopy?
 289                         buf[j] = value[j];
 290                     }
 291                     while (i < len) {
 292                         byte c = value[i];
 293                         buf[i] = (c == (byte)oldChar) ? (byte)newChar : c;
 294                         i++;
 295                     }
 296                     return new String(buf, LATIN1);
 297                 } else {
 298                     byte[] buf = StringUTF16.newBytesFor(len);
 299                     // inflate from latin1 to UTF16
 300                     inflate(value, 0, buf, 0, i);
 301                     while (i < len) {
 302                         char c = (char)(value[i] & 0xff);
 303                         StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c);
 304                         i++;
 305                     }
 306                     return new String(buf, UTF16);
 307                 }
 308             }
 309         }
 310         return null; // for string to return this;
 311     }
 312 
 313     // case insensitive
 314     public static boolean regionMatchesCI(byte[] value, int toffset,
 315                                           byte[] other, int ooffset, int len) {
 316         int last = toffset + len;
 317         while (toffset < last) {
 318             char c1 = (char)(value[toffset++] & 0xff);
 319             char c2 = (char)(other[ooffset++] & 0xff);
 320             if (c1 == c2) {
 321                 continue;
 322             }
 323             char u1 = Character.toUpperCase(c1);
 324             char u2 = Character.toUpperCase(c2);
 325             if (u1 == u2) {
 326                 continue;
 327             }
 328             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 329                 continue;
 330             }
 331             return false;
 332         }
 333         return true;
 334     }
 335 
 336     public static boolean regionMatchesCI_UTF16(byte[] value, int toffset,
 337                                                 byte[] other, int ooffset, int len) {
 338         int last = toffset + len;
 339         while (toffset < last) {
 340             char c1 = (char)(value[toffset++] & 0xff);
 341             char c2 = StringUTF16.getChar(other, ooffset++);
 342             if (c1 == c2) {
 343                 continue;
 344             }
 345             char u1 = Character.toUpperCase(c1);
 346             char u2 = Character.toUpperCase(c2);
 347             if (u1 == u2) {
 348                 continue;
 349             }
 350             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 351                 continue;
 352             }
 353             return false;
 354         }
 355         return true;
 356     }
 357 
 358     public static String toLowerCase(String str, byte[] value, Locale locale) {
 359         if (locale == null) {
 360             throw new NullPointerException();
 361         }
 362         int first;
 363         final int len = value.length;
 364         // Now check if there are any characters that need to be changed, or are surrogate
 365         for (first = 0 ; first < len; first++) {
 366             int cp = value[first] & 0xff;
 367             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 368                 break;
 369             }
 370         }
 371         if (first == len)
 372             return str;
 373         String lang = locale.getLanguage();
 374         if (lang == "tr" || lang == "az" || lang == "lt") {
 375             return toLowerCaseEx(str, value, first, locale, true);
 376         }
 377         byte[] result = new byte[len];
 378         System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
 379                                                        // lowerCase characters.
 380         for (int i = first; i < len; i++) {
 381             int cp = value[i] & 0xff;
 382             cp = Character.toLowerCase(cp);
 383             if (!canEncode(cp)) {                      // not a latin1 character
 384                 return toLowerCaseEx(str, value, first, locale, false);
 385             }
 386             result[i] = (byte)cp;
 387         }
 388         return new String(result, LATIN1);
 389     }
 390 
 391     private static String toLowerCaseEx(String str, byte[] value,
 392                                         int first, Locale locale, boolean localeDependent)
 393     {
 394         byte[] result = StringUTF16.newBytesFor(value.length);
 395         int resultOffset = 0;
 396         for (int i = 0; i < first; i++) {
 397             StringUTF16.putChar(result, resultOffset++, value[i] & 0xff);
 398         }
 399         for (int i = first; i < value.length; i++) {
 400             int srcChar = value[i] & 0xff;
 401             int lowerChar;
 402             char[] lowerCharArray;
 403             if (localeDependent) {
 404                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 405             } else {
 406                 lowerChar = Character.toLowerCase(srcChar);
 407             }
 408             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 409                 StringUTF16.putChar(result, resultOffset++, lowerChar);
 410             } else {
 411                 if (lowerChar == Character.ERROR) {
 412                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 413                 } else {
 414                     lowerCharArray = Character.toChars(lowerChar);
 415                 }
 416                 /* Grow result if needed */
 417                 int mapLen = lowerCharArray.length;
 418                 if (mapLen > 1) {
 419                     byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1);
 420                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 421                     result = result2;
 422                 }
 423                 for (int x = 0; x < mapLen; ++x) {
 424                     StringUTF16.putChar(result, resultOffset++, lowerCharArray[x]);
 425                 }
 426             }
 427         }
 428         return StringUTF16.newString(result, 0, resultOffset);
 429     }
 430 
 431     public static String toUpperCase(String str, byte[] value, Locale locale) {
 432         if (locale == null) {
 433             throw new NullPointerException();
 434         }
 435         int first;
 436         final int len = value.length;
 437 
 438         // Now check if there are any characters that need to be changed, or are surrogate
 439         for (first = 0 ; first < len; first++ ) {
 440             int cp = value[first] & 0xff;
 441             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 442                 break;
 443             }
 444         }
 445         if (first == len) {
 446             return str;
 447         }
 448         String lang = locale.getLanguage();
 449         if (lang == "tr" || lang == "az" || lang == "lt") {
 450             return toUpperCaseEx(str, value, first, locale, true);
 451         }
 452         byte[] result = new byte[len];
 453         System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
 454                                                        // upperCase characters.
 455         for (int i = first; i < len; i++) {
 456             int cp = value[i] & 0xff;
 457             cp = Character.toUpperCaseEx(cp);
 458             if (!canEncode(cp)) {                      // not a latin1 character
 459                 return toUpperCaseEx(str, value, first, locale, false);
 460             }
 461             result[i] = (byte)cp;
 462         }
 463         return new String(result, LATIN1);
 464     }
 465 
 466     private static String toUpperCaseEx(String str, byte[] value,
 467                                         int first, Locale locale, boolean localeDependent)
 468     {
 469         byte[] result = StringUTF16.newBytesFor(value.length);
 470         int resultOffset = 0;
 471         for (int i = 0; i < first; i++) {
 472             StringUTF16.putChar(result, resultOffset++, value[i] & 0xff);
 473         }
 474         for (int i = first; i < value.length; i++) {
 475             int srcChar = value[i] & 0xff;
 476             int upperChar;
 477             char[] upperCharArray;
 478             if (localeDependent) {
 479                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 480             } else {
 481                 upperChar = Character.toUpperCaseEx(srcChar);
 482             }
 483             if (Character.isBmpCodePoint(upperChar)) {
 484                 StringUTF16.putChar(result, resultOffset++, upperChar);
 485             } else {
 486                 if (upperChar == Character.ERROR) {
 487                     if (localeDependent) {
 488                         upperCharArray =
 489                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 490                     } else {
 491                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 492                     }
 493                 } else {
 494                     upperCharArray = Character.toChars(upperChar);
 495                 }
 496                 /* Grow result if needed */
 497                 int mapLen = upperCharArray.length;
 498                 if (mapLen > 1) {
 499                     byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1);
 500                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 501                     result = result2;
 502                 }
 503                 for (int x = 0; x < mapLen; ++x) {
 504                     StringUTF16.putChar(result, resultOffset++, upperCharArray[x]);
 505                 }
 506             }
 507         }
 508         return StringUTF16.newString(result, 0, resultOffset);
 509     }
 510 
 511     public static String trim(byte[] value) {
 512         int len = value.length;
 513         int st = 0;
 514         while ((st < len) && ((value[st] & 0xff) <= ' ')) {
 515             st++;
 516         }
 517         while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) {
 518             len--;
 519         }
 520         return ((st > 0) || (len < value.length)) ?
 521             newString(value, st, len - st) : null;
 522     }
 523 
 524     public static void putChar(byte[] val, int index, int c) {
 525         //assert (canEncode(c));
 526         val[index] = (byte)(c);
 527     }
 528 
 529     public static char getChar(byte[] val, int index) {
 530         return (char)(val[index] & 0xff);
 531     }
 532 
 533     public static byte[] toBytes(int[] val, int off, int len) {
 534         byte[] ret = new byte[len];
 535         for (int i = 0; i < len; i++) {
 536             int cp = val[off++];
 537             if (!canEncode(cp)) {
 538                 return null;
 539             }
 540             ret[i] = (byte)cp;
 541         }
 542         return ret;
 543     }
 544 
 545     public static byte[] toBytes(char c) {
 546         return new byte[] { (byte)c };
 547     }
 548 
 549     public static String newString(byte[] val, int index, int len) {
 550         return new String(Arrays.copyOfRange(val, index, index + len),
 551                           LATIN1);
 552     }
 553 
 554     public static void fillNull(byte[] val, int index, int end) {
 555         Arrays.fill(val, index, end, (byte)0);
 556     }
 557 
 558     // inflatedCopy byte[] -> char[]
 559     @HotSpotIntrinsicCandidate
 560     private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
 561         for (int i = 0; i < len; i++) {
 562             dst[dstOff++] = (char)(src[srcOff++] & 0xff);
 563         }
 564     }
 565 
 566     // inflatedCopy byte[] -> byte[]
 567     @HotSpotIntrinsicCandidate
 568     public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 569         // We need a range check here because 'putChar' has no checks
 570         checkBoundsOffCount(dstOff, len, dst.length);
 571         for (int i = 0; i < len; i++) {
 572             StringUTF16.putChar(dst, dstOff++, src[srcOff++] & 0xff);
 573         }
 574     }
 575 
 576     static class CharsSpliterator implements Spliterator.OfInt {
 577         private final byte[] array;
 578         private int index;        // current index, modified on advance/split
 579         private final int fence;  // one past last index
 580         private final int cs;
 581 
 582         CharsSpliterator(byte[] array, int acs) {
 583             this(array, 0, array.length, acs);
 584         }
 585 
 586         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 587             this.array = array;
 588             this.index = origin;
 589             this.fence = fence;
 590             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 591                       | Spliterator.SUBSIZED;
 592         }
 593 
 594         @Override
 595         public OfInt trySplit() {
 596             int lo = index, mid = (lo + fence) >>> 1;
 597             return (lo >= mid)
 598                    ? null
 599                    : new CharsSpliterator(array, lo, index = mid, cs);
 600         }
 601 
 602         @Override
 603         public void forEachRemaining(IntConsumer action) {
 604             byte[] a; int i, hi; // hoist accesses and checks from loop
 605             if (action == null)
 606                 throw new NullPointerException();
 607             if ((a = array).length >= (hi = fence) &&
 608                 (i = index) >= 0 && i < (index = hi)) {
 609                 do { action.accept(a[i] & 0xff); } while (++i < hi);
 610             }
 611         }
 612 
 613         @Override
 614         public boolean tryAdvance(IntConsumer action) {
 615             if (action == null)
 616                 throw new NullPointerException();
 617             if (index >= 0 && index < fence) {
 618                 action.accept(array[index++] & 0xff);
 619                 return true;
 620             }
 621             return false;
 622         }
 623 
 624         @Override
 625         public long estimateSize() { return (long)(fence - index); }
 626 
 627         @Override
 628         public int characteristics() {
 629             return cs;
 630         }
 631     }
 632 }