1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Objects; 31 import java.util.Spliterator; 32 import java.util.function.IntConsumer; 33 import java.util.stream.IntStream; 34 import jdk.internal.HotSpotIntrinsicCandidate; 35 36 import static java.lang.String.LATIN1; 37 import static java.lang.String.UTF16; 38 import static java.lang.String.checkOffset; 39 import static java.lang.String.checkBoundsOffCount; 40 41 final class StringLatin1 { 42 43 public static char charAt(byte[] value, int index) { 44 if (index < 0 || index >= value.length) { 45 throw new StringIndexOutOfBoundsException(index); 46 } 47 return (char)(value[index] & 0xff); 48 } 49 50 public static boolean canEncode(int cp) { 51 return cp >>> 8 == 0; 52 } 53 54 public static int length(byte[] value) { 55 return value.length; 56 } 57 58 public static int codePointAt(byte[] value, int index, int end) { 59 return value[index] & 0xff; 60 } 61 62 public static int codePointBefore(byte[] value, int index) { 63 return value[index - 1] & 0xff; 64 } 65 66 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 67 return endIndex - beginIndex; 68 } 69 70 public static char[] toChars(byte[] value) { 71 char[] dst = new char[value.length]; 72 inflate(value, 0, dst, 0, value.length); 73 return dst; 74 } 75 76 public static byte[] inflate(byte[] value, int off, int len) { 77 byte[] ret = StringUTF16.newBytesFor(len); 78 inflate(value, off, ret, 0, len); 79 return ret; 80 } 81 82 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 83 inflate(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); 84 } 85 86 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 87 System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); 88 } 89 90 @HotSpotIntrinsicCandidate 91 public static boolean equals(byte[] value, byte[] other) { 92 if (value.length == other.length) { 93 for (int i = 0; i < value.length; i++) { 94 if (value[i] != other[i]) { 95 return false; 96 } 97 } 98 return true; 99 } 100 return false; 101 } 102 103 @HotSpotIntrinsicCandidate 104 public static int compareTo(byte[] value, byte[] other) { 105 int len1 = value.length; 106 int len2 = other.length; 107 int lim = Math.min(len1, len2); 108 for (int k = 0; k < lim; k++) { 109 if (value[k] != other[k]) { 110 return getChar(value, k) - getChar(other, k); 111 } 112 } 113 return len1 - len2; 114 } 115 116 @HotSpotIntrinsicCandidate 117 public static int compareToUTF16(byte[] value, byte[] other) { 118 int len1 = length(value); 119 int len2 = StringUTF16.length(other); 120 int lim = Math.min(len1, len2); 121 for (int k = 0; k < lim; k++) { 122 char c1 = getChar(value, k); 123 char c2 = StringUTF16.getChar(other, k); 124 if (c1 != c2) { 125 return c1 - c2; 126 } 127 } 128 return len1 - len2; 129 } 130 131 public static int hashCode(byte[] value) { 132 int h = 0; 133 for (byte v : value) { 134 h = 31 * h + (v & 0xff); 135 } 136 return h; 137 } 138 139 public static int indexOf(byte[] value, int ch, int fromIndex) { 140 if (!canEncode(ch)) { 141 return -1; 142 } 143 int max = value.length; 144 if (fromIndex < 0) { 145 fromIndex = 0; 146 } else if (fromIndex >= max) { 147 // Note: fromIndex might be near -1>>>1. 148 return -1; 149 } 150 byte c = (byte)ch; 151 for (int i = fromIndex; i < max; i++) { 152 if (value[i] == c) { 153 return i; 154 } 155 } 156 return -1; 157 } 158 159 @HotSpotIntrinsicCandidate 160 public static int indexOf(byte[] value, byte[] str) { 161 if (str.length == 0) { 162 return 0; 163 } 164 if (value.length == 0) { 165 return -1; 166 } 167 return indexOf(value, value.length, str, str.length, 0); 168 } 169 170 @HotSpotIntrinsicCandidate 171 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 172 byte first = str[0]; 173 int max = (valueCount - strCount); 174 for (int i = fromIndex; i <= max; i++) { 175 // Look for first character. 176 if (value[i] != first) { 177 while (++i <= max && value[i] != first); 178 } 179 // Found first character, now look at the rest of value 180 if (i <= max) { 181 int j = i + 1; 182 int end = j + strCount - 1; 183 for (int k = 1; j < end && value[j] == str[k]; j++, k++); 184 if (j == end) { 185 // Found whole string. 186 return i; 187 } 188 } 189 } 190 return -1; 191 } 192 193 public static int lastIndexOf(byte[] src, int srcCount, 194 byte[] tgt, int tgtCount, int fromIndex) { 195 int min = tgtCount - 1; 196 int i = min + fromIndex; 197 int strLastIndex = tgtCount - 1; 198 char strLastChar = (char)(tgt[strLastIndex] & 0xff); 199 200 startSearchForLastChar: 201 while (true) { 202 while (i >= min && (src[i] & 0xff) != strLastChar) { 203 i--; 204 } 205 if (i < min) { 206 return -1; 207 } 208 int j = i - 1; 209 int start = j - strLastIndex; 210 int k = strLastIndex - 1; 211 while (j > start) { 212 if ((src[j--] & 0xff) != (tgt[k--] & 0xff)) { 213 i--; 214 continue startSearchForLastChar; 215 } 216 } 217 return start + 1; 218 } 219 } 220 221 public static int lastIndexOf(final byte[] value, int ch, int fromIndex) { 222 if (!canEncode(ch)) { 223 return -1; 224 } 225 int off = Math.min(fromIndex, value.length - 1); 226 for (; off >= 0; off--) { 227 if (value[off] == (byte)ch) { 228 return off; 229 } 230 } 231 return -1; 232 } 233 234 public static String replace(byte[] value, char oldChar, char newChar) { 235 if (canEncode(oldChar)) { 236 int len = value.length; 237 int i = -1; 238 while (++i < len) { 239 if (value[i] == (byte)oldChar) { 240 break; 241 } 242 } 243 if (i < len) { 244 if (canEncode(newChar)) { 245 byte buf[] = new byte[len]; 246 for (int j = 0; j < i; j++) { // TBD arraycopy? 247 buf[j] = value[j]; 248 } 249 while (i < len) { 250 byte c = value[i]; 251 buf[i] = (c == (byte)oldChar) ? (byte)newChar : c; 252 i++; 253 } 254 return new String(buf, LATIN1); 255 } else { 256 byte[] buf = StringUTF16.newBytesFor(len); 257 // inflate from latin1 to UTF16 258 inflate(value, 0, buf, 0, i); 259 while (i < len) { 260 char c = (char)(value[i] & 0xff); 261 StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c); 262 i++; 263 } 264 return new String(buf, UTF16); 265 } 266 } 267 } 268 return null; // for string to return this; 269 } 270 271 // case insensitive 272 public static boolean regionMatchesCI(byte[] value, int toffset, 273 byte[] other, int ooffset, int len) { 274 int last = toffset + len; 275 while (toffset < last) { 276 char c1 = (char)(value[toffset++] & 0xff); 277 char c2 = (char)(other[ooffset++] & 0xff); 278 if (c1 == c2) { 279 continue; 280 } 281 char u1 = Character.toUpperCase(c1); 282 char u2 = Character.toUpperCase(c2); 283 if (u1 == u2) { 284 continue; 285 } 286 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 287 continue; 288 } 289 return false; 290 } 291 return true; 292 } 293 294 public static boolean regionMatchesCI_UTF16(byte[] value, int toffset, 295 byte[] other, int ooffset, int len) { 296 int last = toffset + len; 297 while (toffset < last) { 298 char c1 = (char)(value[toffset++] & 0xff); 299 char c2 = StringUTF16.getChar(other, ooffset++); 300 if (c1 == c2) { 301 continue; 302 } 303 char u1 = Character.toUpperCase(c1); 304 char u2 = Character.toUpperCase(c2); 305 if (u1 == u2) { 306 continue; 307 } 308 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 309 continue; 310 } 311 return false; 312 } 313 return true; 314 } 315 316 public static String toLowerCase(String str, byte[] value, Locale locale) { 317 if (locale == null) { 318 throw new NullPointerException(); 319 } 320 int first; 321 final int len = value.length; 322 // Now check if there are any characters that need to be changed, or are surrogate 323 for (first = 0 ; first < len; first++) { 324 int cp = value[first] & 0xff; 325 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 326 break; 327 } 328 } 329 if (first == len) 330 return str; 331 String lang = locale.getLanguage(); 332 if (lang == "tr" || lang == "az" || lang == "lt") { 333 return toLowerCaseEx(str, value, first, locale, true); 334 } 335 byte[] result = new byte[len]; 336 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 337 // lowerCase characters. 338 for (int i = first; i < len; i++) { 339 int cp = value[i] & 0xff; 340 cp = Character.toLowerCase(cp); 341 if (!canEncode(cp)) { // not a latin1 character 342 return toLowerCaseEx(str, value, first, locale, false); 343 } 344 result[i] = (byte)cp; 345 } 346 return new String(result, LATIN1); 347 } 348 349 private static String toLowerCaseEx(String str, byte[] value, 350 int first, Locale locale, boolean localeDependent) 351 { 352 byte[] result = StringUTF16.newBytesFor(value.length); 353 int resultOffset = 0; 354 for (int i = 0; i < first; i++) { 355 StringUTF16.putChar(result, resultOffset++, value[i] & 0xff); 356 } 357 for (int i = first; i < value.length; i++) { 358 int srcChar = value[i] & 0xff; 359 int lowerChar; 360 char[] lowerCharArray; 361 if (localeDependent) { 362 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 363 } else { 364 lowerChar = Character.toLowerCase(srcChar); 365 } 366 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 367 StringUTF16.putChar(result, resultOffset++, lowerChar); 368 } else { 369 if (lowerChar == Character.ERROR) { 370 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 371 } else { 372 lowerCharArray = Character.toChars(lowerChar); 373 } 374 /* Grow result if needed */ 375 int mapLen = lowerCharArray.length; 376 if (mapLen > 1) { 377 byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1); 378 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 379 result = result2; 380 } 381 for (int x = 0; x < mapLen; ++x) { 382 StringUTF16.putChar(result, resultOffset++, lowerCharArray[x]); 383 } 384 } 385 } 386 return StringUTF16.newString(result, 0, resultOffset); 387 } 388 389 public static String toUpperCase(String str, byte[] value, Locale locale) { 390 if (locale == null) { 391 throw new NullPointerException(); 392 } 393 int first; 394 final int len = value.length; 395 396 // Now check if there are any characters that need to be changed, or are surrogate 397 for (first = 0 ; first < len; first++ ) { 398 int cp = value[first] & 0xff; 399 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 400 break; 401 } 402 } 403 if (first == len) { 404 return str; 405 } 406 String lang = locale.getLanguage(); 407 if (lang == "tr" || lang == "az" || lang == "lt") { 408 return toUpperCaseEx(str, value, first, locale, true); 409 } 410 byte[] result = new byte[len]; 411 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 412 // upperCase characters. 413 for (int i = first; i < len; i++) { 414 int cp = value[i] & 0xff; 415 cp = Character.toUpperCaseEx(cp); 416 if (!canEncode(cp)) { // not a latin1 character 417 return toUpperCaseEx(str, value, first, locale, false); 418 } 419 result[i] = (byte)cp; 420 } 421 return new String(result, LATIN1); 422 } 423 424 private static String toUpperCaseEx(String str, byte[] value, 425 int first, Locale locale, boolean localeDependent) 426 { 427 byte[] result = StringUTF16.newBytesFor(value.length); 428 int resultOffset = 0; 429 for (int i = 0; i < first; i++) { 430 StringUTF16.putChar(result, resultOffset++, value[i] & 0xff); 431 } 432 for (int i = first; i < value.length; i++) { 433 int srcChar = value[i] & 0xff; 434 int upperChar; 435 char[] upperCharArray; 436 if (localeDependent) { 437 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 438 } else { 439 upperChar = Character.toUpperCaseEx(srcChar); 440 } 441 if (Character.isBmpCodePoint(upperChar)) { 442 StringUTF16.putChar(result, resultOffset++, upperChar); 443 } else { 444 if (upperChar == Character.ERROR) { 445 if (localeDependent) { 446 upperCharArray = 447 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 448 } else { 449 upperCharArray = Character.toUpperCaseCharArray(srcChar); 450 } 451 } else { 452 upperCharArray = Character.toChars(upperChar); 453 } 454 /* Grow result if needed */ 455 int mapLen = upperCharArray.length; 456 if (mapLen > 1) { 457 byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1); 458 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 459 result = result2; 460 } 461 for (int x = 0; x < mapLen; ++x) { 462 StringUTF16.putChar(result, resultOffset++, upperCharArray[x]); 463 } 464 } 465 } 466 return StringUTF16.newString(result, 0, resultOffset); 467 } 468 469 public static String trim(byte[] value) { 470 int len = value.length; 471 int st = 0; 472 while ((st < len) && ((value[st] & 0xff) <= ' ')) { 473 st++; 474 } 475 while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) { 476 len--; 477 } 478 return ((st > 0) || (len < value.length)) ? 479 newString(value, st, len - st) : null; 480 } 481 482 public static void putChar(byte[] val, int index, int c) { 483 //assert (canEncode(c)); 484 val[index] = (byte)(c); 485 } 486 487 public static char getChar(byte[] val, int index) { 488 return (char)(val[index] & 0xff); 489 } 490 491 public static byte[] toBytes(int[] val, int off, int len) { 492 byte[] ret = new byte[len]; 493 for (int i = 0; i < len; i++) { 494 int cp = val[off++]; 495 if (!canEncode(cp)) { 496 return null; 497 } 498 ret[i] = (byte)cp; 499 } 500 return ret; 501 } 502 503 public static byte[] toBytes(char c) { 504 return new byte[] { (byte)c }; 505 } 506 507 public static String newString(byte[] val, int index, int len) { 508 return new String(Arrays.copyOfRange(val, index, index + len), 509 LATIN1); 510 } 511 512 public static void fillNull(byte[] val, int index, int end) { 513 Arrays.fill(val, index, end, (byte)0); 514 } 515 516 // inflatedCopy byte[] -> char[] 517 @HotSpotIntrinsicCandidate 518 private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) { 519 for (int i = 0; i < len; i++) { 520 dst[dstOff++] = (char)(src[srcOff++] & 0xff); 521 } 522 } 523 524 // inflatedCopy byte[] -> byte[] 525 @HotSpotIntrinsicCandidate 526 public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 527 // We need a range check here because 'putChar' has no checks 528 checkBoundsOffCount(dstOff, len, dst.length); 529 for (int i = 0; i < len; i++) { 530 StringUTF16.putChar(dst, dstOff++, src[srcOff++] & 0xff); 531 } 532 } 533 534 static class CharsSpliterator implements Spliterator.OfInt { 535 private final byte[] array; 536 private int index; // current index, modified on advance/split 537 private final int fence; // one past last index 538 private final int cs; 539 540 CharsSpliterator(byte[] array, int acs) { 541 this(array, 0, array.length, acs); 542 } 543 544 CharsSpliterator(byte[] array, int origin, int fence, int acs) { 545 this.array = array; 546 this.index = origin; 547 this.fence = fence; 548 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED 549 | Spliterator.SUBSIZED; 550 } 551 552 @Override 553 public OfInt trySplit() { 554 int lo = index, mid = (lo + fence) >>> 1; 555 return (lo >= mid) 556 ? null 557 : new CharsSpliterator(array, lo, index = mid, cs); 558 } 559 560 @Override 561 public void forEachRemaining(IntConsumer action) { 562 byte[] a; int i, hi; // hoist accesses and checks from loop 563 if (action == null) 564 throw new NullPointerException(); 565 if ((a = array).length >= (hi = fence) && 566 (i = index) >= 0 && i < (index = hi)) { 567 do { action.accept(a[i] & 0xff); } while (++i < hi); 568 } 569 } 570 571 @Override 572 public boolean tryAdvance(IntConsumer action) { 573 if (action == null) 574 throw new NullPointerException(); 575 if (index >= 0 && index < fence) { 576 action.accept(array[index++] & 0xff); 577 return true; 578 } 579 return false; 580 } 581 582 @Override 583 public long estimateSize() { return (long)(fence - index); } 584 585 @Override 586 public int characteristics() { 587 return cs; 588 } 589 } 590 }