New src/java.base/share/classes/java/lang/StringLatin1.java

   1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Objects;
  31 import java.util.Spliterator;
  32 import java.util.function.IntConsumer;
  33 import java.util.stream.IntStream;
  34 import jdk.internal.HotSpotIntrinsicCandidate;
  35 
  36 import static java.lang.String.LATIN1;
  37 import static java.lang.String.UTF16;
  38 import static java.lang.String.checkOffset;
  39 
  40 final class StringLatin1 {
  41 
  42     public static char charAt(byte[] value, int index) {
  43         if (index < 0 || index >= value.length) {
  44             throw new StringIndexOutOfBoundsException(index);
  45         }
  46         return (char)(value[index] & 0xff);
  47     }
  48 
  49     public static boolean canEncode(int cp) {
  50         return cp >>> 8 == 0;
  51     }
  52 
  53     public static int length(byte[] value) {
  54         return value.length;
  55     }
  56 
  57     public static int codePointAt(byte[] value, int index, int end) {
  58         return value[index] & 0xff;
  59     }
  60 
  61     public static int codePointBefore(byte[] value, int index) {
  62         return value[index - 1] & 0xff;
  63     }
  64 
  65     public static int codePointCount(byte[] value, int beginIndex, int endIndex) {
  66         return endIndex - beginIndex;
  67     }
  68 
  69     public static char[] toChars(byte[] value) {
  70         char[] dst = new char[value.length];
  71         inflate(value, 0, dst, 0, value.length);
  72         return dst;
  73     }
  74 
  75     public static byte[] inflate(byte[] value, int off, int len) {
  76         byte[] ret = StringUTF16.newBytesFor(len);
  77         inflate(value, off, ret, 0, len);
  78         return ret;
  79     }
  80 
  81     public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) {
  82         inflate(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
  83     }
  84 
  85     public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) {
  86         System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
  87     }
  88 
  89     @HotSpotIntrinsicCandidate
  90     public static boolean equals(byte[] value, byte[] other) {
  91         if (value.length == other.length) {
  92             for (int i = 0; i < value.length; i++) {
  93                 if (value[i] != other[i]) {
  94                     return false;
  95                 }
  96             }
  97             return true;
  98         }
  99         return false;
 100     }
 101 
 102     @HotSpotIntrinsicCandidate
 103     public static int compareTo(byte[] value, byte[] other) {
 104         int len1 = value.length;
 105         int len2 = other.length;
 106         int lim = Math.min(len1, len2);
 107         for (int k = 0; k < lim; k++) {
 108             if (value[k] != other[k]) {
 109                 return getChar(value, k) - getChar(other, k);
 110             }
 111         }
 112         return len1 - len2;
 113     }
 114 
 115     @HotSpotIntrinsicCandidate
 116     public static int compareToUTF16(byte[] value, byte[] other) {
 117         int len1 = length(value);
 118         int len2 = StringUTF16.length(other);
 119         int lim = Math.min(len1, len2);
 120         for (int k = 0; k < lim; k++) {
 121             char c1 = getChar(value, k);
 122             char c2 = StringUTF16.getChar(other, k);
 123             if (c1 != c2) {
 124                 return c1 - c2;
 125             }
 126         }
 127         return len1 - len2;
 128     }
 129 
 130     public static int hashCode(byte[] value) {
 131         int h = 0;
 132         for (byte v : value) {
 133             h = 31 * h + (v & 0xff);
 134         }
 135         return h;
 136     }
 137 
 138     public static int indexOf(byte[] value, int ch, int fromIndex) {
 139         if (!canEncode(ch)) {
 140             return -1;
 141         }
 142         int max = value.length;
 143         if (fromIndex < 0) {
 144             fromIndex = 0;
 145         } else if (fromIndex >= max) {
 146             // Note: fromIndex might be near -1>>>1.
 147             return -1;
 148         }
 149         byte c = (byte)ch;
 150         for (int i = fromIndex; i < max; i++) {
 151             if (value[i] == c) {
 152                return i;
 153             }
 154         }
 155         return -1;
 156     }
 157 
 158     @HotSpotIntrinsicCandidate
 159     public static int indexOf(byte[] value, byte[] str) {
 160         if (str.length == 0) {
 161             return 0;
 162         }
 163         if (value.length == 0) {
 164             return -1;
 165         }
 166         return indexOf(value, value.length, str, str.length, 0);
 167     }
 168 
 169     @HotSpotIntrinsicCandidate
 170     public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
 171         byte first = str[0];
 172         int max = (valueCount - strCount);
 173         for (int i = fromIndex; i <= max; i++) {
 174             // Look for first character.
 175             if (value[i] != first) {
 176                 while (++i <= max && value[i] != first);
 177             }
 178             // Found first character, now look at the rest of value
 179             if (i <= max) {
 180                 int j = i + 1;
 181                 int end = j + strCount - 1;
 182                 for (int k = 1; j < end && value[j] == str[k]; j++, k++);
 183                 if (j == end) {
 184                     // Found whole string.
 185                     return i;
 186                 }
 187             }
 188         }
 189         return -1;
 190     }
 191 
 192     public static int lastIndexOf(byte[] src, int srcCount,
 193                                   byte[] tgt, int tgtCount, int fromIndex) {
 194         int min = tgtCount - 1;
 195         int i = min + fromIndex;
 196         int strLastIndex = tgtCount - 1;
 197         char strLastChar = (char)(tgt[strLastIndex] & 0xff);
 198 
 199   startSearchForLastChar:
 200         while (true) {
 201             while (i >= min && (src[i] & 0xff) != strLastChar) {
 202                 i--;
 203             }
 204             if (i < min) {
 205                 return -1;
 206             }
 207             int j = i - 1;
 208             int start = j - strLastIndex;
 209             int k = strLastIndex - 1;
 210             while (j > start) {
 211                 if ((src[j--] & 0xff) != (tgt[k--] & 0xff)) {
 212                     i--;
 213                     continue startSearchForLastChar;
 214                 }
 215             }
 216             return start + 1;
 217         }
 218     }
 219 
 220     public static int lastIndexOf(final byte[] value, int ch, int fromIndex) {
 221         if (!canEncode(ch)) {
 222             return -1;
 223         }
 224         int off  = Math.min(fromIndex, value.length - 1);
 225         for (; off >= 0; off--) {
 226             if (value[off] == (byte)ch) {
 227                 return off;
 228             }
 229         }
 230         return -1;
 231     }
 232 
 233     public static String replace(byte[] value, char oldChar, char newChar) {
 234         if (canEncode(oldChar)) {
 235             int len = value.length;
 236             int i = -1;
 237             while (++i < len) {
 238                 if (value[i] == (byte)oldChar) {
 239                     break;
 240                 }
 241             }
 242             if (i < len) {
 243                 if (canEncode(newChar)) {
 244                     byte buf[] = new byte[len];
 245                     for (int j = 0; j < i; j++) {    // TBD arraycopy?
 246                         buf[j] = value[j];
 247                     }
 248                     while (i < len) {
 249                         byte c = value[i];
 250                         buf[i] = (c == (byte)oldChar) ? (byte)newChar : c;
 251                         i++;
 252                     }
 253                     return new String(buf, LATIN1);
 254                 } else {
 255                     byte[] buf = StringUTF16.newBytesFor(len);
 256                     // inflate from latin1 to UTF16
 257                     inflate(value, 0, buf, 0, i);
 258                     while (i < len) {
 259                         char c = (char)(value[i] & 0xff);
 260                         StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c);
 261                         i++;
 262                     }
 263                     return new String(buf, UTF16);
 264                 }
 265             }
 266         }
 267         return null; // for string to return this;
 268     }
 269 
 270     // case insensitive
 271     public static boolean regionMatchesCI(byte[] value, int toffset,
 272                                           byte[] other, int ooffset, int len) {
 273         int last = toffset + len;
 274         while (toffset < last) {
 275             char c1 = (char)(value[toffset++] & 0xff);
 276             char c2 = (char)(other[ooffset++] & 0xff);
 277             if (c1 == c2) {
 278                 continue;
 279             }
 280             char u1 = Character.toUpperCase(c1);
 281             char u2 = Character.toUpperCase(c2);
 282             if (u1 == u2) {
 283                 continue;
 284             }
 285             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 286                 continue;
 287             }
 288             return false;
 289         }
 290         return true;
 291     }
 292 
 293     public static boolean regionMatchesCI_UTF16(byte[] value, int toffset,
 294                                                 byte[] other, int ooffset, int len) {
 295         int last = toffset + len;
 296         while (toffset < last) {
 297             char c1 = (char)(value[toffset++] & 0xff);
 298             char c2 = StringUTF16.getChar(other, ooffset++);
 299             if (c1 == c2) {
 300                 continue;
 301             }
 302             char u1 = Character.toUpperCase(c1);
 303             char u2 = Character.toUpperCase(c2);
 304             if (u1 == u2) {
 305                 continue;
 306             }
 307             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 308                 continue;
 309             }
 310             return false;
 311         }
 312         return true;
 313     }
 314 
 315     public static String toLowerCase(String str, byte[] value, Locale locale) {
 316         if (locale == null) {
 317             throw new NullPointerException();
 318         }
 319         int first;
 320         final int len = value.length;
 321         // Now check if there are any characters that need to be changed, or are surrogate
 322         for (first = 0 ; first < len; first++) {
 323             int cp = value[first] & 0xff;
 324             if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
 325                 break;
 326             }
 327         }
 328         if (first == len)
 329             return str;
 330         String lang = locale.getLanguage();
 331         if (lang == "tr" || lang == "az" || lang == "lt") {
 332             return toLowerCaseEx(str, value, first, locale, true);
 333         }
 334         byte[] result = new byte[len];
 335         System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
 336                                                        // lowerCase characters.
 337         for (int i = first; i < len; i++) {
 338             int cp = value[i] & 0xff;
 339             cp = Character.toLowerCase(cp);
 340             if (!canEncode(cp)) {                      // not a latin1 character
 341                 return toLowerCaseEx(str, value, first, locale, false);
 342             }
 343             result[i] = (byte)cp;
 344         }
 345         return new String(result, LATIN1);
 346     }
 347 
 348     private static String toLowerCaseEx(String str, byte[] value,
 349                                         int first, Locale locale, boolean localeDependent)
 350     {
 351         byte[] result = StringUTF16.newBytesFor(value.length);
 352         int resultOffset = 0;
 353         for (int i = 0; i < first; i++) {
 354             StringUTF16.putChar(result, resultOffset++, value[i] & 0xff);
 355         }
 356         for (int i = first; i < value.length; i++) {
 357             int srcChar = value[i] & 0xff;
 358             int lowerChar;
 359             char[] lowerCharArray;
 360             if (localeDependent) {
 361                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale);
 362             } else {
 363                 lowerChar = Character.toLowerCase(srcChar);
 364             }
 365             if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
 366                 StringUTF16.putChar(result, resultOffset++, lowerChar);
 367             } else {
 368                 if (lowerChar == Character.ERROR) {
 369                     lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale);
 370                 } else {
 371                     lowerCharArray = Character.toChars(lowerChar);
 372                 }
 373                 /* Grow result if needed */
 374                 int mapLen = lowerCharArray.length;
 375                 if (mapLen > 1) {
 376                     byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1);
 377                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 378                     result = result2;
 379                 }
 380                 for (int x = 0; x < mapLen; ++x) {
 381                     StringUTF16.putChar(result, resultOffset++, lowerCharArray[x]);
 382                 }
 383             }
 384         }
 385         return StringUTF16.newString(result, 0, resultOffset);
 386     }
 387 
 388     public static String toUpperCase(String str, byte[] value, Locale locale) {
 389         if (locale == null) {
 390             throw new NullPointerException();
 391         }
 392         int first;
 393         final int len = value.length;
 394 
 395         // Now check if there are any characters that need to be changed, or are surrogate
 396         for (first = 0 ; first < len; first++ ) {
 397             int cp = value[first] & 0xff;
 398             if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
 399                 break;
 400             }
 401         }
 402         if (first == len) {
 403             return str;
 404         }
 405         String lang = locale.getLanguage();
 406         if (lang == "tr" || lang == "az" || lang == "lt") {
 407             return toUpperCaseEx(str, value, first, locale, true);
 408         }
 409         byte[] result = new byte[len];
 410         System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
 411                                                        // upperCase characters.
 412         for (int i = first; i < len; i++) {
 413             int cp = value[i] & 0xff;
 414             cp = Character.toUpperCaseEx(cp);
 415             if (!canEncode(cp)) {                      // not a latin1 character
 416                 return toUpperCaseEx(str, value, first, locale, false);
 417             }
 418             result[i] = (byte)cp;
 419         }
 420         return new String(result, LATIN1);
 421     }
 422 
 423     private static String toUpperCaseEx(String str, byte[] value,
 424                                         int first, Locale locale, boolean localeDependent)
 425     {
 426         byte[] result = StringUTF16.newBytesFor(value.length);
 427         int resultOffset = 0;
 428         for (int i = 0; i < first; i++) {
 429             StringUTF16.putChar(result, resultOffset++, value[i] & 0xff);
 430         }
 431         for (int i = first; i < value.length; i++) {
 432             int srcChar = value[i] & 0xff;
 433             int upperChar;
 434             char[] upperCharArray;
 435             if (localeDependent) {
 436                 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale);
 437             } else {
 438                 upperChar = Character.toUpperCaseEx(srcChar);
 439             }
 440             if (Character.isBmpCodePoint(upperChar)) {
 441                 StringUTF16.putChar(result, resultOffset++, upperChar);
 442             } else {
 443                 if (upperChar == Character.ERROR) {
 444                     if (localeDependent) {
 445                         upperCharArray =
 446                             ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale);
 447                     } else {
 448                         upperCharArray = Character.toUpperCaseCharArray(srcChar);
 449                     }
 450                 } else {
 451                     upperCharArray = Character.toChars(upperChar);
 452                 }
 453                 /* Grow result if needed */
 454                 int mapLen = upperCharArray.length;
 455                 if (mapLen > 1) {
 456                     byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1);
 457                     System.arraycopy(result, 0, result2, 0, resultOffset << 1);
 458                     result = result2;
 459                 }
 460                 for (int x = 0; x < mapLen; ++x) {
 461                     StringUTF16.putChar(result, resultOffset++, upperCharArray[x]);
 462                 }
 463             }
 464         }
 465         return StringUTF16.newString(result, 0, resultOffset);
 466     }
 467 
 468     public static String trim(byte[] value) {
 469         int len = value.length;
 470         int st = 0;
 471         while ((st < len) && ((value[st] & 0xff) <= ' ')) {
 472             st++;
 473         }
 474         while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) {
 475             len--;
 476         }
 477         return ((st > 0) || (len < value.length)) ?
 478             newString(value, st, len - st) : null;
 479     }
 480 
 481     public static void putChar(byte[] val, int index, int c) {
 482         //assert (canEncode(c));
 483         val[index] = (byte)(c);
 484     }
 485 
 486     public static char getChar(byte[] val, int index) {
 487         return (char)(val[index] & 0xff);
 488     }
 489 
 490     public static byte[] toBytes(int[] val, int off, int len) {
 491         byte[] ret = new byte[len];
 492         for (int i = 0; i < len; i++) {
 493             int cp = val[off++];
 494             if (!canEncode(cp)) {
 495                 return null;
 496             }
 497             ret[i] = (byte)cp;
 498         }
 499         return ret;
 500     }
 501 
 502     public static byte[] toBytes(char c) {
 503         return new byte[] { (byte)c };
 504     }
 505 
 506     public static String newString(byte[] val, int index, int len) {
 507         return new String(Arrays.copyOfRange(val, index, index + len),
 508                           LATIN1);
 509     }
 510 
 511     public static void fillNull(byte[] val, int index, int end) {
 512         Arrays.fill(val, index, end, (byte)0);
 513     }
 514 
 515     // inflatedCopy byte[] -> char[]
 516     @HotSpotIntrinsicCandidate
 517     private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
 518         for (int i = 0; i < len; i++) {
 519             dst[dstOff++] = (char)(src[srcOff++] & 0xff);
 520         }
 521     }
 522 
 523     // inflatedCopy byte[] -> byte[]
 524     @HotSpotIntrinsicCandidate
 525     public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
 526         for (int i = 0; i < len; i++) {
 527             StringUTF16.putChar(dst, dstOff++, src[srcOff++] & 0xff);
 528         }
 529     }
 530 
 531     static class CharsSpliterator implements Spliterator.OfInt {
 532         private final byte[] array;
 533         private int index;        // current index, modified on advance/split
 534         private final int fence;  // one past last index
 535         private final int cs;
 536 
 537         CharsSpliterator(byte[] array, int acs) {
 538             this(array, 0, array.length, acs);
 539         }
 540 
 541         CharsSpliterator(byte[] array, int origin, int fence, int acs) {
 542             this.array = array;
 543             this.index = origin;
 544             this.fence = fence;
 545             this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED
 546                       | Spliterator.SUBSIZED;
 547         }
 548 
 549         @Override
 550         public OfInt trySplit() {
 551             int lo = index, mid = (lo + fence) >>> 1;
 552             return (lo >= mid)
 553                    ? null
 554                    : new CharsSpliterator(array, lo, index = mid, cs);
 555         }
 556 
 557         @Override
 558         public void forEachRemaining(IntConsumer action) {
 559             byte[] a; int i, hi; // hoist accesses and checks from loop
 560             if (action == null)
 561                 throw new NullPointerException();
 562             if ((a = array).length >= (hi = fence) &&
 563                 (i = index) >= 0 && i < (index = hi)) {
 564                 do { action.accept(a[i] & 0xff); } while (++i < hi);
 565             }
 566         }
 567 
 568         @Override
 569         public boolean tryAdvance(IntConsumer action) {
 570             if (action == null)
 571                 throw new NullPointerException();
 572             if (index >= 0 && index < fence) {
 573                 action.accept(array[index++] & 0xff);
 574                 return true;
 575             }
 576             return false;
 577         }
 578 
 579         @Override
 580         public long estimateSize() { return (long)(fence - index); }
 581 
 582         @Override
 583         public int characteristics() {
 584             return cs;
 585         }
 586     }
 587 
 588     ////////////////////////////////////////////////////////////////
 589 
 590     public static void getCharsSB(byte[] val, int srcBegin, int srcEnd, char dst[], int dstBegin) {
 591         checkOffset(srcEnd, val.length);
 592         getChars(val, srcBegin, srcEnd, dst, dstBegin);
 593     }
 594 
 595     public static void inflateSB(byte[] val, byte[] dst, int dstOff, int count) {
 596         checkOffset(count, val.length);
 597         checkOffset(dstOff + count, dst.length >> 1);  // dst is utf16
 598         inflate(val, 0, dst, dstOff, count);
 599     }
 600 }