1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Objects; 31 import java.util.Spliterator; 32 import java.util.function.IntConsumer; 33 import java.util.stream.IntStream; 34 import jdk.internal.HotSpotIntrinsicCandidate; 35 36 import static java.lang.String.LATIN1; 37 import static java.lang.String.UTF16; 38 import static java.lang.String.checkOffset; 39 import static java.lang.String.checkBoundsOffCount; 40 41 final class StringLatin1 { 42 43 public static char charAt(byte[] value, int index) { 44 if (index < 0 || index >= value.length) { 45 throw new StringIndexOutOfBoundsException(index); 46 } 47 return (char)(value[index] & 0xff); 48 } 49 50 public static boolean canEncode(int cp) { 51 return cp >>> 8 == 0; 52 } 53 54 public static int length(byte[] value) { 55 return value.length; 56 } 57 58 public static int codePointAt(byte[] value, int index, int end) { 59 return value[index] & 0xff; 60 } 61 62 public static int codePointBefore(byte[] value, int index) { 63 return value[index - 1] & 0xff; 64 } 65 66 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 67 return endIndex - beginIndex; 68 } 69 70 public static char[] toChars(byte[] value) { 71 char[] dst = new char[value.length]; 72 inflate(value, 0, dst, 0, value.length); 73 return dst; 74 } 75 76 public static byte[] inflate(byte[] value, int off, int len) { 77 byte[] ret = StringUTF16.newBytesFor(len); 78 inflate(value, off, ret, 0, len); 79 return ret; 80 } 81 82 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 83 inflate(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); 84 } 85 86 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 87 System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); 88 } 89 90 @HotSpotIntrinsicCandidate 91 public static boolean equals(byte[] value, byte[] other) { 92 if (value.length == other.length) { 93 for (int i = 0; i < value.length; i++) { 94 if (value[i] != other[i]) { 95 return false; 96 } 97 } 98 return true; 99 } 100 return false; 101 } 102 103 @HotSpotIntrinsicCandidate 104 public static int compareTo(byte[] value, byte[] other) { 105 int len1 = value.length; 106 int len2 = other.length; 107 int lim = Math.min(len1, len2); 108 for (int k = 0; k < lim; k++) { 109 if (value[k] != other[k]) { 110 return getChar(value, k) - getChar(other, k); 111 } 112 } 113 return len1 - len2; 114 } 115 116 @HotSpotIntrinsicCandidate 117 public static int compareToUTF16(byte[] value, byte[] other) { 118 int len1 = length(value); 119 int len2 = StringUTF16.length(other); 120 int lim = Math.min(len1, len2); 121 for (int k = 0; k < lim; k++) { 122 char c1 = getChar(value, k); 123 char c2 = StringUTF16.getChar(other, k); 124 if (c1 != c2) { 125 return c1 - c2; 126 } 127 } 128 return len1 - len2; 129 } 130 131 public static int compareToCI(byte[] value, byte[] other) { 132 int len1 = value.length; 133 int len2 = other.length; 134 int lim = Math.min(len1, len2); 135 for (int k = 0; k < lim; k++) { 136 if (value[k] != other[k]) { 137 char c1 = Character.toUpperCase(getChar(value, k)); 138 char c2 = Character.toUpperCase(getChar(other, k)); 139 if (c1 != c2) { 140 c1 = Character.toLowerCase(c1); 141 c2 = Character.toLowerCase(c2); 142 if (c1 != c2) { 143 return c1 - c2; 144 } 145 } 146 } 147 } 148 return len1 - len2; 149 } 150 151 public static int compareToCI_UTF16(byte[] value, byte[] other) { 152 int len1 = length(value); 153 int len2 = StringUTF16.length(other); 154 int lim = Math.min(len1, len2); 155 for (int k = 0; k < lim; k++) { 156 char c1 = getChar(value, k); 157 char c2 = StringUTF16.getChar(other, k); 158 if (c1 != c2) { 159 c1 = Character.toUpperCase(c1); 160 c2 = Character.toUpperCase(c2); 161 if (c1 != c2) { 162 c1 = Character.toLowerCase(c1); 163 c2 = Character.toLowerCase(c2); 164 if (c1 != c2) { 165 return c1 - c2; 166 } 167 } 168 } 169 } 170 return len1 - len2; 171 } 172 173 public static int hashCode(byte[] value) { 174 int h = 0; 175 for (byte v : value) { 176 h = 31 * h + (v & 0xff); 177 } 178 return h; 179 } 180 181 public static int indexOf(byte[] value, int ch, int fromIndex) { 182 if (!canEncode(ch)) { 183 return -1; 184 } 185 int max = value.length; 186 if (fromIndex < 0) { 187 fromIndex = 0; 188 } else if (fromIndex >= max) { 189 // Note: fromIndex might be near -1>>>1. 190 return -1; 191 } 192 byte c = (byte)ch; 193 for (int i = fromIndex; i < max; i++) { 194 if (value[i] == c) { 195 return i; 196 } 197 } 198 return -1; 199 } 200 201 @HotSpotIntrinsicCandidate 202 public static int indexOf(byte[] value, byte[] str) { 203 if (str.length == 0) { 204 return 0; 205 } 206 if (value.length == 0) { 207 return -1; 208 } 209 return indexOf(value, value.length, str, str.length, 0); 210 } 211 212 @HotSpotIntrinsicCandidate 213 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 214 byte first = str[0]; 215 int max = (valueCount - strCount); 216 for (int i = fromIndex; i <= max; i++) { 217 // Look for first character. 218 if (value[i] != first) { 219 while (++i <= max && value[i] != first); 220 } 221 // Found first character, now look at the rest of value 222 if (i <= max) { 223 int j = i + 1; 224 int end = j + strCount - 1; 225 for (int k = 1; j < end && value[j] == str[k]; j++, k++); 226 if (j == end) { 227 // Found whole string. 228 return i; 229 } 230 } 231 } 232 return -1; 233 } 234 235 public static int lastIndexOf(byte[] src, int srcCount, 236 byte[] tgt, int tgtCount, int fromIndex) { 237 int min = tgtCount - 1; 238 int i = min + fromIndex; 239 int strLastIndex = tgtCount - 1; 240 char strLastChar = (char)(tgt[strLastIndex] & 0xff); 241 242 startSearchForLastChar: 243 while (true) { 244 while (i >= min && (src[i] & 0xff) != strLastChar) { 245 i--; 246 } 247 if (i < min) { 248 return -1; 249 } 250 int j = i - 1; 251 int start = j - strLastIndex; 252 int k = strLastIndex - 1; 253 while (j > start) { 254 if ((src[j--] & 0xff) != (tgt[k--] & 0xff)) { 255 i--; 256 continue startSearchForLastChar; 257 } 258 } 259 return start + 1; 260 } 261 } 262 263 public static int lastIndexOf(final byte[] value, int ch, int fromIndex) { 264 if (!canEncode(ch)) { 265 return -1; 266 } 267 int off = Math.min(fromIndex, value.length - 1); 268 for (; off >= 0; off--) { 269 if (value[off] == (byte)ch) { 270 return off; 271 } 272 } 273 return -1; 274 } 275 276 public static String replace(byte[] value, char oldChar, char newChar) { 277 if (canEncode(oldChar)) { 278 int len = value.length; 279 int i = -1; 280 while (++i < len) { 281 if (value[i] == (byte)oldChar) { 282 break; 283 } 284 } 285 if (i < len) { 286 if (canEncode(newChar)) { 287 byte buf[] = new byte[len]; 288 for (int j = 0; j < i; j++) { // TBD arraycopy? 289 buf[j] = value[j]; 290 } 291 while (i < len) { 292 byte c = value[i]; 293 buf[i] = (c == (byte)oldChar) ? (byte)newChar : c; 294 i++; 295 } 296 return new String(buf, LATIN1); 297 } else { 298 byte[] buf = StringUTF16.newBytesFor(len); 299 // inflate from latin1 to UTF16 300 inflate(value, 0, buf, 0, i); 301 while (i < len) { 302 char c = (char)(value[i] & 0xff); 303 StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c); 304 i++; 305 } 306 return new String(buf, UTF16); 307 } 308 } 309 } 310 return null; // for string to return this; 311 } 312 313 // case insensitive 314 public static boolean regionMatchesCI(byte[] value, int toffset, 315 byte[] other, int ooffset, int len) { 316 int last = toffset + len; 317 while (toffset < last) { 318 char c1 = (char)(value[toffset++] & 0xff); 319 char c2 = (char)(other[ooffset++] & 0xff); 320 if (c1 == c2) { 321 continue; 322 } 323 char u1 = Character.toUpperCase(c1); 324 char u2 = Character.toUpperCase(c2); 325 if (u1 == u2) { 326 continue; 327 } 328 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 329 continue; 330 } 331 return false; 332 } 333 return true; 334 } 335 336 public static boolean regionMatchesCI_UTF16(byte[] value, int toffset, 337 byte[] other, int ooffset, int len) { 338 int last = toffset + len; 339 while (toffset < last) { 340 char c1 = (char)(value[toffset++] & 0xff); 341 char c2 = StringUTF16.getChar(other, ooffset++); 342 if (c1 == c2) { 343 continue; 344 } 345 char u1 = Character.toUpperCase(c1); 346 char u2 = Character.toUpperCase(c2); 347 if (u1 == u2) { 348 continue; 349 } 350 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 351 continue; 352 } 353 return false; 354 } 355 return true; 356 } 357 358 public static String toLowerCase(String str, byte[] value, Locale locale) { 359 if (locale == null) { 360 throw new NullPointerException(); 361 } 362 int first; 363 final int len = value.length; 364 // Now check if there are any characters that need to be changed, or are surrogate 365 for (first = 0 ; first < len; first++) { 366 int cp = value[first] & 0xff; 367 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 368 break; 369 } 370 } 371 if (first == len) 372 return str; 373 String lang = locale.getLanguage(); 374 if (lang == "tr" || lang == "az" || lang == "lt") { 375 return toLowerCaseEx(str, value, first, locale, true); 376 } 377 byte[] result = new byte[len]; 378 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 379 // lowerCase characters. 380 for (int i = first; i < len; i++) { 381 int cp = value[i] & 0xff; 382 cp = Character.toLowerCase(cp); 383 if (!canEncode(cp)) { // not a latin1 character 384 return toLowerCaseEx(str, value, first, locale, false); 385 } 386 result[i] = (byte)cp; 387 } 388 return new String(result, LATIN1); 389 } 390 391 private static String toLowerCaseEx(String str, byte[] value, 392 int first, Locale locale, boolean localeDependent) 393 { 394 byte[] result = StringUTF16.newBytesFor(value.length); 395 int resultOffset = 0; 396 for (int i = 0; i < first; i++) { 397 StringUTF16.putChar(result, resultOffset++, value[i] & 0xff); 398 } 399 for (int i = first; i < value.length; i++) { 400 int srcChar = value[i] & 0xff; 401 int lowerChar; 402 char[] lowerCharArray; 403 if (localeDependent) { 404 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 405 } else { 406 lowerChar = Character.toLowerCase(srcChar); 407 } 408 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 409 StringUTF16.putChar(result, resultOffset++, lowerChar); 410 } else { 411 if (lowerChar == Character.ERROR) { 412 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 413 } else { 414 lowerCharArray = Character.toChars(lowerChar); 415 } 416 /* Grow result if needed */ 417 int mapLen = lowerCharArray.length; 418 if (mapLen > 1) { 419 byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1); 420 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 421 result = result2; 422 } 423 for (int x = 0; x < mapLen; ++x) { 424 StringUTF16.putChar(result, resultOffset++, lowerCharArray[x]); 425 } 426 } 427 } 428 return StringUTF16.newString(result, 0, resultOffset); 429 } 430 431 public static String toUpperCase(String str, byte[] value, Locale locale) { 432 if (locale == null) { 433 throw new NullPointerException(); 434 } 435 int first; 436 final int len = value.length; 437 438 // Now check if there are any characters that need to be changed, or are surrogate 439 for (first = 0 ; first < len; first++ ) { 440 int cp = value[first] & 0xff; 441 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 442 break; 443 } 444 } 445 if (first == len) { 446 return str; 447 } 448 String lang = locale.getLanguage(); 449 if (lang == "tr" || lang == "az" || lang == "lt") { 450 return toUpperCaseEx(str, value, first, locale, true); 451 } 452 byte[] result = new byte[len]; 453 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 454 // upperCase characters. 455 for (int i = first; i < len; i++) { 456 int cp = value[i] & 0xff; 457 cp = Character.toUpperCaseEx(cp); 458 if (!canEncode(cp)) { // not a latin1 character 459 return toUpperCaseEx(str, value, first, locale, false); 460 } 461 result[i] = (byte)cp; 462 } 463 return new String(result, LATIN1); 464 } 465 466 private static String toUpperCaseEx(String str, byte[] value, 467 int first, Locale locale, boolean localeDependent) 468 { 469 byte[] result = StringUTF16.newBytesFor(value.length); 470 int resultOffset = 0; 471 for (int i = 0; i < first; i++) { 472 StringUTF16.putChar(result, resultOffset++, value[i] & 0xff); 473 } 474 for (int i = first; i < value.length; i++) { 475 int srcChar = value[i] & 0xff; 476 int upperChar; 477 char[] upperCharArray; 478 if (localeDependent) { 479 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 480 } else { 481 upperChar = Character.toUpperCaseEx(srcChar); 482 } 483 if (Character.isBmpCodePoint(upperChar)) { 484 StringUTF16.putChar(result, resultOffset++, upperChar); 485 } else { 486 if (upperChar == Character.ERROR) { 487 if (localeDependent) { 488 upperCharArray = 489 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 490 } else { 491 upperCharArray = Character.toUpperCaseCharArray(srcChar); 492 } 493 } else { 494 upperCharArray = Character.toChars(upperChar); 495 } 496 /* Grow result if needed */ 497 int mapLen = upperCharArray.length; 498 if (mapLen > 1) { 499 byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1); 500 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 501 result = result2; 502 } 503 for (int x = 0; x < mapLen; ++x) { 504 StringUTF16.putChar(result, resultOffset++, upperCharArray[x]); 505 } 506 } 507 } 508 return StringUTF16.newString(result, 0, resultOffset); 509 } 510 511 public static String trim(byte[] value) { 512 int len = value.length; 513 int st = 0; 514 while ((st < len) && ((value[st] & 0xff) <= ' ')) { 515 st++; 516 } 517 while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) { 518 len--; 519 } 520 return ((st > 0) || (len < value.length)) ? 521 newString(value, st, len - st) : null; 522 } 523 524 public static void putChar(byte[] val, int index, int c) { 525 //assert (canEncode(c)); 526 val[index] = (byte)(c); 527 } 528 529 public static char getChar(byte[] val, int index) { 530 return (char)(val[index] & 0xff); 531 } 532 533 public static byte[] toBytes(int[] val, int off, int len) { 534 byte[] ret = new byte[len]; 535 for (int i = 0; i < len; i++) { 536 int cp = val[off++]; 537 if (!canEncode(cp)) { 538 return null; 539 } 540 ret[i] = (byte)cp; 541 } 542 return ret; 543 } 544 545 public static byte[] toBytes(char c) { 546 return new byte[] { (byte)c }; 547 } 548 549 public static String newString(byte[] val, int index, int len) { 550 return new String(Arrays.copyOfRange(val, index, index + len), 551 LATIN1); 552 } 553 554 public static void fillNull(byte[] val, int index, int end) { 555 Arrays.fill(val, index, end, (byte)0); 556 } 557 558 // inflatedCopy byte[] -> char[] 559 @HotSpotIntrinsicCandidate 560 private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) { 561 for (int i = 0; i < len; i++) { 562 dst[dstOff++] = (char)(src[srcOff++] & 0xff); 563 } 564 } 565 566 // inflatedCopy byte[] -> byte[] 567 @HotSpotIntrinsicCandidate 568 public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 569 // We need a range check here because 'putChar' has no checks 570 checkBoundsOffCount(dstOff, len, dst.length); 571 for (int i = 0; i < len; i++) { 572 StringUTF16.putChar(dst, dstOff++, src[srcOff++] & 0xff); 573 } 574 } 575 576 static class CharsSpliterator implements Spliterator.OfInt { 577 private final byte[] array; 578 private int index; // current index, modified on advance/split 579 private final int fence; // one past last index 580 private final int cs; 581 582 CharsSpliterator(byte[] array, int acs) { 583 this(array, 0, array.length, acs); 584 } 585 586 CharsSpliterator(byte[] array, int origin, int fence, int acs) { 587 this.array = array; 588 this.index = origin; 589 this.fence = fence; 590 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED 591 | Spliterator.SUBSIZED; 592 } 593 594 @Override 595 public OfInt trySplit() { 596 int lo = index, mid = (lo + fence) >>> 1; 597 return (lo >= mid) 598 ? null 599 : new CharsSpliterator(array, lo, index = mid, cs); 600 } 601 602 @Override 603 public void forEachRemaining(IntConsumer action) { 604 byte[] a; int i, hi; // hoist accesses and checks from loop 605 if (action == null) 606 throw new NullPointerException(); 607 if ((a = array).length >= (hi = fence) && 608 (i = index) >= 0 && i < (index = hi)) { 609 do { action.accept(a[i] & 0xff); } while (++i < hi); 610 } 611 } 612 613 @Override 614 public boolean tryAdvance(IntConsumer action) { 615 if (action == null) 616 throw new NullPointerException(); 617 if (index >= 0 && index < fence) { 618 action.accept(array[index++] & 0xff); 619 return true; 620 } 621 return false; 622 } 623 624 @Override 625 public long estimateSize() { return (long)(fence - index); } 626 627 @Override 628 public int characteristics() { 629 return cs; 630 } 631 } 632 }