Old src/java.base/share/classes/java/lang/StringLatin1.java

   1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Objects;
  31 import java.util.Spliterator;
  32 import java.util.function.IntConsumer;
  33 import java.util.stream.IntStream;
  34 import jdk.internal.HotSpotIntrinsicCandidate;
  35 
  36 import static java.lang.String.LATIN1;
  37 import static java.lang.String.UTF16;
  38 import static java.lang.String.checkOffset;
  39 import static java.lang.String.checkBoundsOffCount;
  40 
  41 final class StringLatin1 {
  42 
  43     public static char charAt(byte[] value, int index) {
  44         if (index < 0 || index >= value.length) {
  45             throw new StringIndexOutOfBoundsException(index);
  46         }
  47         return (char)(value[index] & 0xff);
  48     }
  49 
  50     public static boolean canEncode(int cp) {
  51         return cp >>> 8 == 0;
  52     }
  53 
  54     public static int length(byte[] value) {
  55         return value.length;
  56     }
  57 
  58     public static int codePointAt(byte[] value, int index, int end) {
  59         return value[index] & 0xff;
  60     }
  61 
  62     public static int codePointBefore(byte[] value, int index) {
  63         return value[index - 1] & 0xff;
  64     }
  65 
  66     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
  67         return endIndex - beginIndex;
  68     }
  69 
  70     public static char[] toChars(byte[] value) {
  71         char[] dst = new char[value.length];
  72         inflate(value, 0, dst, 0, value.length);
  73         return dst;
  74     }
  75 
  76     public static byte[] inflate(byte[] value, int off, int len) {
  77         byte[] ret = StringUTF16.newBytesFor(len);
  78         inflate(value, off, ret, 0, len);
  79         return ret;
  80     }
  81 
  82     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
  83         inflate(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
  84     }
  85 
  86     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
  87         System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
  88     }
  89 
  90     @HotSpotIntrinsicCandidate
  91     public static boolean equals(byte[] value, byte[] other) {
  92         if (value.length == other.length) {
  93             for (int i = 0; i < value.length; i++) {
  94                 if (value[i] != other[i]) {
  95                     return false;
  96                 }
  97             }
  98             return true;
  99         }
 100         return false;
 101     }
 102 
 103     @HotSpotIntrinsicCandidate
 104     public static int compareTo(byte[] value, byte[] other) {
 105         int len1 = value.length;
 106         int len2 = other.length;
 107         int lim = Math.min(len1, len2);
 108         for (int k = 0; k < lim; k++) {
 109             if (value[k] != other[k]) {
 110                 return getChar(value, k) - getChar(other, k);
 111             }
 112         }
 113         return len1 - len2;
 114     }
 115 
 116     @HotSpotIntrinsicCandidate
 117     public static int compareToUTF16(byte[] value, byte[] other) {
 118         int len1 = length(value);
 119         int len2 = StringUTF16.length(other);
 120         int lim = Math.min(len1, len2);
 121         for (int k = 0; k < lim; k++) {
 122             char c1 = getChar(value, k);
 123             char c2 = StringUTF16.getChar(other, k);
 124             if (c1 != c2) {
 125                 return c1 - c2;
 126             }
 127         }
 128         return len1 - len2;
 129     }
 130 
 131     public static int hashCode(byte[] value) {
 132         int h = 0;
 133         for (byte v : value) {
 134             h = 31 * h + (v & 0xff);
 135         }
 136         return h;
 137     }
 138 
 139     public static int indexOf(byte[] value, int ch, int fromIndex) {
 140         if (!canEncode(ch)) {
 141             return -1;
 142         }
 143         int max = value.length;
 144         if (fromIndex < 0) {
 145             fromIndex = 0;
 146         } else if (fromIndex >= max) {
 147             // Note: fromIndex might be near -1>>>1.
 148             return -1;
 149         }
 150         byte c = (byte)ch;
 151         for (int i = fromIndex; i < max; i++) {
 152             if (value[i] == c) {
 153                return i;
 154             }
 155         }
 156         return -1;
 157     }
 158 
 159     @HotSpotIntrinsicCandidate
 160     public static int indexOf(byte[] value, byte[] str) {
 161         if (str.length == 0) {
 162             return 0;
 163         }
 164         if (value.length == 0) {
 165             return -1;
 166         }
 167         return indexOf(value, value.length, str, str.length, 0);
 168     }
 169 
 170     @HotSpotIntrinsicCandidate
 171     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 172         byte first = str[0];
 173         int max = (valueCount - strCount);
 174         for (int i = fromIndex; i <= max; i++) {
 175             // Look for first character.
 176             if (value[i] != first) {
 177                 while (++i <= max && value[i] != first);
 178             }
 179             // Found first character, now look at the rest of value
 180             if (i <= max) {
 181                 int j = i + 1;
 182                 int end = j + strCount - 1;
 183                 for (int k = 1; j < end && value[j] == str[k]; j++, k++);
 184                 if (j == end) {
 185                     // Found whole string.
 186                     return i;
 187                 }
 188             }
 189         }
 190         return -1;
 191     }
 192 
 193     public static int lastIndexOf(byte[] src, int srcCount,
 194                                   byte[] tgt, int tgtCount, int fromIndex) {
 195         int min = tgtCount - 1;
 196         int i = min + fromIndex;
 197         int strLastIndex = tgtCount - 1;
 198         char strLastChar = (char)(tgt[strLastIndex] & 0xff);
 199 
 200   startSearchForLastChar:
 201         while (true) {
 202             while (i >= min && (src[i] & 0xff) != strLastChar) {
 203                 i--;
 204             }
 205             if (i < min) {
 206                 return -1;
 207             }
 208             int j = i - 1;
 209             int start = j - strLastIndex;
 210             int k = strLastIndex - 1;
 211             while (j > start) {
 212                 if ((src[j--] & 0xff) != (tgt[k--] & 0xff)) {
 213                     i--;
 214                     continue startSearchForLastChar;
 215                 }
 216             }
 217             return start + 1;
 218         }
 219     }
 220 
 221     public static int lastIndexOf(final byte[] value, int ch, int fromIndex) {
 222         if (!canEncode(ch)) {
 223             return -1;
 224         }
 225         int off  = Math.min(fromIndex, value.length - 1);
 226         for (; off >= 0; off--) {
 227             if (value[off] == (byte)ch) {
 228                 return off;
 229             }
 230         }
 231         return -1;
 232     }
 233 
 234     public static String replace(byte[] value, char oldChar, char newChar) {
 235         if (canEncode(oldChar)) {
 236             int len = value.length;
 237             int i = -1;
 238             while (++i < len) {
 239                 if (value[i] == (byte)oldChar) {
 240                     break;
 241                 }
 242             }
 243             if (i < len) {
 244                 if (canEncode(newChar)) {
 245                     byte buf[] = new byte[len];
 246                     for (int j = 0; j < i; j++) {    // TBD arraycopy?
 247                         buf[j] = value[j];
 248                     }
 249                     while (i < len) {
 250                         byte c = value[i];
 251                         buf[i] = (c == (byte)oldChar) ? (byte)newChar : c;
 252                         i++;
 253                     }
 254                     return new String(buf, LATIN1);
 255                 } else {
 256                     byte[] buf = StringUTF16.newBytesFor(len);
 257                     // inflate from latin1 to UTF16
 258                     inflate(value, 0, buf, 0, i);
 259                     while (i < len) {
 260                         char c = (char)(value[i] & 0xff);
 261                         StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c);
 262                         i++;
 263                     }
 264                     return new String(buf, UTF16);
 265                 }
 266             }
 267         }
 268         return null; // for string to return this;
 269     }
 270 
 271     // case insensitive
 272     public static boolean regionMatchesCI(byte[] value, int toffset,
 273                                           byte[] other, int ooffset, int len) {
 274         int last = toffset + len;
 275         while (toffset < last) {
 276             char c1 = (char)(value[toffset++] & 0xff);
 277             char c2 = (char)(other[ooffset++] & 0xff);
 278             if (c1 == c2) {
 279                 continue;
 280             }
 281             char u1 = Character.toUpperCase(c1);
 282             char u2 = Character.toUpperCase(c2);
 283             if (u1 == u2) {
 284                 continue;
 285             }
 286             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 287                 continue;
 288             }
 289             return false;
 290         }
 291         return true;
 292     }
 293 
 294     public static boolean regionMatchesCI_UTF16(byte[] value, int toffset,
 295                                                 byte[] other, int ooffset, int len) {
 296         int last = toffset + len;
 297         while (toffset < last) {
 298             char c1 = (char)(value[toffset++] & 0xff);
 299             char c2 = StringUTF16.getChar(other, ooffset++);
 300             if (c1 == c2) {
 301                 continue;
 302             }
 303             char u1 = Character.toUpperCase(c1);
 304             char u2 = Character.toUpperCase(c2);
 305             if (u1 == u2) {
 306                 continue;
 307             }
 308             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 309                 continue;
 310             }
 311             return false;
 312         }
 313         return true;
 314     }
 315 
 316     public static String toLowerCase(String str, byte[] value, Locale locale) {
 317         if (locale == null) {
 318             throw new NullPointerException();
 319         }
 320         int first;
 321         final int len = value.length;
 322         // Now check if there are any characters that need to be changed, or are surrogate
 323         for (first = 0 ; first < len; first++) {
 324             int cp = value[first] & 0xff;
 325             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 326                 break;
 327             }
 328         }
 329         if (first == len)
 330             return str;
 331         String lang = locale.getLanguage();
 332         if (lang == "tr" || lang == "az" || lang == "lt") {
 333             return toLowerCaseEx(str, value, first, locale, true);
 334         }
 335         byte[] result = new byte[len];
 336         System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
 337                                                        // lowerCase characters.
 338         for (int i = first; i < len; i++) {
 339             int cp = value[i] & 0xff;
 340             cp = Character.toLowerCase(cp);
 341             if (!canEncode(cp)) {                      // not a latin1 character
 342                 return toLowerCaseEx(str, value, first, locale, false);
 343             }
 344             result[i] = (byte)cp;
 345         }
 346         return new String(result, LATIN1);
 347     }
 348 
 349     private static String toLowerCaseEx(String str, byte[] value,
 350                                         int first, Locale locale, boolean localeDependent)
 351     {
 352         byte[] result = StringUTF16.newBytesFor(value.length);
 353         int resultOffset = 0;
 354         for (int i = 0; i < first; i++) {
 355             StringUTF16.putChar(result, resultOffset++, value[i] & 0xff);
 356         }
 357         for (int i = first; i < value.length; i++) {
 358             int srcChar = value[i] & 0xff;
 359             int lowerChar;
 360             char[] lowerCharArray;
 361             if (localeDependent) {
 362                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 363             } else {
 364                 lowerChar = Character.toLowerCase(srcChar);
 365             }
 366             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 367                 StringUTF16.putChar(result, resultOffset++, lowerChar);
 368             } else {
 369                 if (lowerChar == Character.ERROR) {
 370                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 371                 } else {
 372                     lowerCharArray = Character.toChars(lowerChar);
 373                 }
 374                 /* Grow result if needed */
 375                 int mapLen = lowerCharArray.length;
 376                 if (mapLen > 1) {
 377                     byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1);
 378                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 379                     result = result2;
 380                 }
 381                 for (int x = 0; x < mapLen; ++x) {
 382                     StringUTF16.putChar(result, resultOffset++, lowerCharArray[x]);
 383                 }
 384             }
 385         }
 386         return StringUTF16.newString(result, 0, resultOffset);
 387     }
 388 
 389     public static String toUpperCase(String str, byte[] value, Locale locale) {
 390         if (locale == null) {
 391             throw new NullPointerException();
 392         }
 393         int first;
 394         final int len = value.length;
 395 
 396         // Now check if there are any characters that need to be changed, or are surrogate
 397         for (first = 0 ; first < len; first++ ) {
 398             int cp = value[first] & 0xff;
 399             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 400                 break;
 401             }
 402         }
 403         if (first == len) {
 404             return str;
 405         }
 406         String lang = locale.getLanguage();
 407         if (lang == "tr" || lang == "az" || lang == "lt") {
 408             return toUpperCaseEx(str, value, first, locale, true);
 409         }
 410         byte[] result = new byte[len];
 411         System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
 412                                                        // upperCase characters.
 413         for (int i = first; i < len; i++) {
 414             int cp = value[i] & 0xff;
 415             cp = Character.toUpperCaseEx(cp);
 416             if (!canEncode(cp)) {                      // not a latin1 character
 417                 return toUpperCaseEx(str, value, first, locale, false);
 418             }
 419             result[i] = (byte)cp;
 420         }
 421         return new String(result, LATIN1);
 422     }
 423 
 424     private static String toUpperCaseEx(String str, byte[] value,
 425                                         int first, Locale locale, boolean localeDependent)
 426     {
 427         byte[] result = StringUTF16.newBytesFor(value.length);
 428         int resultOffset = 0;
 429         for (int i = 0; i < first; i++) {
 430             StringUTF16.putChar(result, resultOffset++, value[i] & 0xff);
 431         }
 432         for (int i = first; i < value.length; i++) {
 433             int srcChar = value[i] & 0xff;
 434             int upperChar;
 435             char[] upperCharArray;
 436             if (localeDependent) {
 437                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 438             } else {
 439                 upperChar = Character.toUpperCaseEx(srcChar);
 440             }
 441             if (Character.isBmpCodePoint(upperChar)) {
 442                 StringUTF16.putChar(result, resultOffset++, upperChar);
 443             } else {
 444                 if (upperChar == Character.ERROR) {
 445                     if (localeDependent) {
 446                         upperCharArray =
 447                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 448                     } else {
 449                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 450                     }
 451                 } else {
 452                     upperCharArray = Character.toChars(upperChar);
 453                 }
 454                 /* Grow result if needed */
 455                 int mapLen = upperCharArray.length;
 456                 if (mapLen > 1) {
 457                     byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1);
 458                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 459                     result = result2;
 460                 }
 461                 for (int x = 0; x < mapLen; ++x) {
 462                     StringUTF16.putChar(result, resultOffset++, upperCharArray[x]);
 463                 }
 464             }
 465         }
 466         return StringUTF16.newString(result, 0, resultOffset);
 467     }
 468 
 469     public static String trim(byte[] value) {
 470         int len = value.length;
 471         int st = 0;
 472         while ((st < len) && ((value[st] & 0xff) <= ' ')) {
 473             st++;
 474         }
 475         while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) {
 476             len--;
 477         }
 478         return ((st > 0) || (len < value.length)) ?
 479             newString(value, st, len - st) : null;
 480     }
 481 
 482     public static void putChar(byte[] val, int index, int c) {
 483         //assert (canEncode(c));
 484         val[index] = (byte)(c);
 485     }
 486 
 487     public static char getChar(byte[] val, int index) {
 488         return (char)(val[index] & 0xff);
 489     }
 490 
 491     public static byte[] toBytes(int[] val, int off, int len) {
 492         byte[] ret = new byte[len];
 493         for (int i = 0; i < len; i++) {
 494             int cp = val[off++];
 495             if (!canEncode(cp)) {
 496                 return null;
 497             }
 498             ret[i] = (byte)cp;
 499         }
 500         return ret;
 501     }
 502 
 503     public static byte[] toBytes(char c) {
 504         return new byte[] { (byte)c };
 505     }
 506 
 507     public static String newString(byte[] val, int index, int len) {
 508         return new String(Arrays.copyOfRange(val, index, index + len),
 509                           LATIN1);
 510     }
 511 
 512     public static void fillNull(byte[] val, int index, int end) {
 513         Arrays.fill(val, index, end, (byte)0);
 514     }
 515 
 516     // inflatedCopy byte[] -> char[]
 517     @HotSpotIntrinsicCandidate
 518     private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
 519         for (int i = 0; i < len; i++) {
 520             dst[dstOff++] = (char)(src[srcOff++] & 0xff);
 521         }
 522     }
 523 
 524     // inflatedCopy byte[] -> byte[]
 525     @HotSpotIntrinsicCandidate
 526     public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 527         // We need a range check here because 'putChar' has no checks
 528         checkBoundsOffCount(dstOff, len, dst.length);
 529         for (int i = 0; i < len; i++) {
 530             StringUTF16.putChar(dst, dstOff++, src[srcOff++] & 0xff);
 531         }
 532     }
 533 
 534     static class CharsSpliterator implements Spliterator.OfInt {
 535         private final byte[] array;
 536         private int index;        // current index, modified on advance/split
 537         private final int fence;  // one past last index
 538         private final int cs;
 539 
 540         CharsSpliterator(byte[] array, int acs) {
 541             this(array, 0, array.length, acs);
 542         }
 543 
 544         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 545             this.array = array;
 546             this.index = origin;
 547             this.fence = fence;
 548             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 549                       | Spliterator.SUBSIZED;
 550         }
 551 
 552         @Override
 553         public OfInt trySplit() {
 554             int lo = index, mid = (lo + fence) >>> 1;
 555             return (lo >= mid)
 556                    ? null
 557                    : new CharsSpliterator(array, lo, index = mid, cs);
 558         }
 559 
 560         @Override
 561         public void forEachRemaining(IntConsumer action) {
 562             byte[] a; int i, hi; // hoist accesses and checks from loop
 563             if (action == null)
 564                 throw new NullPointerException();
 565             if ((a = array).length >= (hi = fence) &&
 566                 (i = index) >= 0 && i < (index = hi)) {
 567                 do { action.accept(a[i] & 0xff); } while (++i < hi);
 568             }
 569         }
 570 
 571         @Override
 572         public boolean tryAdvance(IntConsumer action) {
 573             if (action == null)
 574                 throw new NullPointerException();
 575             if (index >= 0 && index < fence) {
 576                 action.accept(array[index++] & 0xff);
 577                 return true;
 578             }
 579             return false;
 580         }
 581 
 582         @Override
 583         public long estimateSize() { return (long)(fence - index); }
 584 
 585         @Override
 586         public int characteristics() {
 587             return cs;
 588         }
 589     }
 590 }