1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Locale; 30 import java.util.Objects; 31 import java.util.Spliterator; 32 import java.util.function.IntConsumer; 33 import java.util.stream.IntStream; 34 import jdk.internal.HotSpotIntrinsicCandidate; 35 36 import static java.lang.String.LATIN1; 37 import static java.lang.String.UTF16; 38 import static java.lang.String.checkOffset; 39 40 final class StringLatin1 { 41 42 public static char charAt(byte[] value, int index) { 43 if (index < 0 || index >= value.length) { 44 throw new StringIndexOutOfBoundsException(index); 45 } 46 return (char)(value[index] & 0xff); 47 } 48 49 public static boolean canEncode(int cp) { 50 return cp >>> 8 == 0; 51 } 52 53 public static int length(byte[] value) { 54 return value.length; 55 } 56 57 public static int codePointAt(byte[] value, int index, int end) { 58 return value[index] & 0xff; 59 } 60 61 public static int codePointBefore(byte[] value, int index) { 62 return value[index - 1] & 0xff; 63 } 64 65 public static int codePointCount(byte[] value, int beginIndex, int endIndex) { 66 return endIndex - beginIndex; 67 } 68 69 public static char[] toChars(byte[] value) { 70 char[] dst = new char[value.length]; 71 inflate(value, 0, dst, 0, value.length); 72 return dst; 73 } 74 75 public static byte[] inflate(byte[] value, int off, int len) { 76 byte[] ret = StringUTF16.newBytesFor(len); 77 inflate(value, off, ret, 0, len); 78 return ret; 79 } 80 81 public static void getChars(byte[] value, int srcBegin, int srcEnd, char dst[], int dstBegin) { 82 inflate(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); 83 } 84 85 public static void getBytes(byte[] value, int srcBegin, int srcEnd, byte dst[], int dstBegin) { 86 System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin); 87 } 88 89 @HotSpotIntrinsicCandidate 90 public static boolean equals(byte[] value, byte[] other) { 91 if (value.length == other.length) { 92 for (int i = 0; i < value.length; i++) { 93 if (value[i] != other[i]) { 94 return false; 95 } 96 } 97 return true; 98 } 99 return false; 100 } 101 102 @HotSpotIntrinsicCandidate 103 public static int compareTo(byte[] value, byte[] other) { 104 int len1 = value.length; 105 int len2 = other.length; 106 int lim = Math.min(len1, len2); 107 for (int k = 0; k < lim; k++) { 108 if (value[k] != other[k]) { 109 return getChar(value, k) - getChar(other, k); 110 } 111 } 112 return len1 - len2; 113 } 114 115 @HotSpotIntrinsicCandidate 116 public static int compareToUTF16(byte[] value, byte[] other) { 117 int len1 = length(value); 118 int len2 = StringUTF16.length(other); 119 int lim = Math.min(len1, len2); 120 for (int k = 0; k < lim; k++) { 121 char c1 = getChar(value, k); 122 char c2 = StringUTF16.getChar(other, k); 123 if (c1 != c2) { 124 return c1 - c2; 125 } 126 } 127 return len1 - len2; 128 } 129 130 public static int hashCode(byte[] value) { 131 int h = 0; 132 for (byte v : value) { 133 h = 31 * h + (v & 0xff); 134 } 135 return h; 136 } 137 138 public static int indexOf(byte[] value, int ch, int fromIndex) { 139 if (!canEncode(ch)) { 140 return -1; 141 } 142 int max = value.length; 143 if (fromIndex < 0) { 144 fromIndex = 0; 145 } else if (fromIndex >= max) { 146 // Note: fromIndex might be near -1>>>1. 147 return -1; 148 } 149 byte c = (byte)ch; 150 for (int i = fromIndex; i < max; i++) { 151 if (value[i] == c) { 152 return i; 153 } 154 } 155 return -1; 156 } 157 158 @HotSpotIntrinsicCandidate 159 public static int indexOf(byte[] value, byte[] str) { 160 if (str.length == 0) { 161 return 0; 162 } 163 if (value.length == 0) { 164 return -1; 165 } 166 return indexOf(value, value.length, str, str.length, 0); 167 } 168 169 @HotSpotIntrinsicCandidate 170 public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) { 171 byte first = str[0]; 172 int max = (valueCount - strCount); 173 for (int i = fromIndex; i <= max; i++) { 174 // Look for first character. 175 if (value[i] != first) { 176 while (++i <= max && value[i] != first); 177 } 178 // Found first character, now look at the rest of value 179 if (i <= max) { 180 int j = i + 1; 181 int end = j + strCount - 1; 182 for (int k = 1; j < end && value[j] == str[k]; j++, k++); 183 if (j == end) { 184 // Found whole string. 185 return i; 186 } 187 } 188 } 189 return -1; 190 } 191 192 public static int lastIndexOf(byte[] src, int srcCount, 193 byte[] tgt, int tgtCount, int fromIndex) { 194 int min = tgtCount - 1; 195 int i = min + fromIndex; 196 int strLastIndex = tgtCount - 1; 197 char strLastChar = (char)(tgt[strLastIndex] & 0xff); 198 199 startSearchForLastChar: 200 while (true) { 201 while (i >= min && (src[i] & 0xff) != strLastChar) { 202 i--; 203 } 204 if (i < min) { 205 return -1; 206 } 207 int j = i - 1; 208 int start = j - strLastIndex; 209 int k = strLastIndex - 1; 210 while (j > start) { 211 if ((src[j--] & 0xff) != (tgt[k--] & 0xff)) { 212 i--; 213 continue startSearchForLastChar; 214 } 215 } 216 return start + 1; 217 } 218 } 219 220 public static int lastIndexOf(final byte[] value, int ch, int fromIndex) { 221 if (!canEncode(ch)) { 222 return -1; 223 } 224 int off = Math.min(fromIndex, value.length - 1); 225 for (; off >= 0; off--) { 226 if (value[off] == (byte)ch) { 227 return off; 228 } 229 } 230 return -1; 231 } 232 233 public static String replace(byte[] value, char oldChar, char newChar) { 234 if (canEncode(oldChar)) { 235 int len = value.length; 236 int i = -1; 237 while (++i < len) { 238 if (value[i] == (byte)oldChar) { 239 break; 240 } 241 } 242 if (i < len) { 243 if (canEncode(newChar)) { 244 byte buf[] = new byte[len]; 245 for (int j = 0; j < i; j++) { // TBD arraycopy? 246 buf[j] = value[j]; 247 } 248 while (i < len) { 249 byte c = value[i]; 250 buf[i] = (c == (byte)oldChar) ? (byte)newChar : c; 251 i++; 252 } 253 return new String(buf, LATIN1); 254 } else { 255 byte[] buf = StringUTF16.newBytesFor(len); 256 // inflate from latin1 to UTF16 257 inflate(value, 0, buf, 0, i); 258 while (i < len) { 259 char c = (char)(value[i] & 0xff); 260 StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c); 261 i++; 262 } 263 return new String(buf, UTF16); 264 } 265 } 266 } 267 return null; // for string to return this; 268 } 269 270 // case insensitive 271 public static boolean regionMatchesCI(byte[] value, int toffset, 272 byte[] other, int ooffset, int len) { 273 int last = toffset + len; 274 while (toffset < last) { 275 char c1 = (char)(value[toffset++] & 0xff); 276 char c2 = (char)(other[ooffset++] & 0xff); 277 if (c1 == c2) { 278 continue; 279 } 280 char u1 = Character.toUpperCase(c1); 281 char u2 = Character.toUpperCase(c2); 282 if (u1 == u2) { 283 continue; 284 } 285 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 286 continue; 287 } 288 return false; 289 } 290 return true; 291 } 292 293 public static boolean regionMatchesCI_UTF16(byte[] value, int toffset, 294 byte[] other, int ooffset, int len) { 295 int last = toffset + len; 296 while (toffset < last) { 297 char c1 = (char)(value[toffset++] & 0xff); 298 char c2 = StringUTF16.getChar(other, ooffset++); 299 if (c1 == c2) { 300 continue; 301 } 302 char u1 = Character.toUpperCase(c1); 303 char u2 = Character.toUpperCase(c2); 304 if (u1 == u2) { 305 continue; 306 } 307 if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) { 308 continue; 309 } 310 return false; 311 } 312 return true; 313 } 314 315 public static String toLowerCase(String str, byte[] value, Locale locale) { 316 if (locale == null) { 317 throw new NullPointerException(); 318 } 319 int first; 320 final int len = value.length; 321 // Now check if there are any characters that need to be changed, or are surrogate 322 for (first = 0 ; first < len; first++) { 323 int cp = value[first] & 0xff; 324 if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR 325 break; 326 } 327 } 328 if (first == len) 329 return str; 330 String lang = locale.getLanguage(); 331 if (lang == "tr" || lang == "az" || lang == "lt") { 332 return toLowerCaseEx(str, value, first, locale, true); 333 } 334 byte[] result = new byte[len]; 335 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 336 // lowerCase characters. 337 for (int i = first; i < len; i++) { 338 int cp = value[i] & 0xff; 339 cp = Character.toLowerCase(cp); 340 if (!canEncode(cp)) { // not a latin1 character 341 return toLowerCaseEx(str, value, first, locale, false); 342 } 343 result[i] = (byte)cp; 344 } 345 return new String(result, LATIN1); 346 } 347 348 private static String toLowerCaseEx(String str, byte[] value, 349 int first, Locale locale, boolean localeDependent) 350 { 351 byte[] result = StringUTF16.newBytesFor(value.length); 352 int resultOffset = 0; 353 for (int i = 0; i < first; i++) { 354 StringUTF16.putChar(result, resultOffset++, value[i] & 0xff); 355 } 356 for (int i = first; i < value.length; i++) { 357 int srcChar = value[i] & 0xff; 358 int lowerChar; 359 char[] lowerCharArray; 360 if (localeDependent) { 361 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(str, i, locale); 362 } else { 363 lowerChar = Character.toLowerCase(srcChar); 364 } 365 if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp 366 StringUTF16.putChar(result, resultOffset++, lowerChar); 367 } else { 368 if (lowerChar == Character.ERROR) { 369 lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(str, i, locale); 370 } else { 371 lowerCharArray = Character.toChars(lowerChar); 372 } 373 /* Grow result if needed */ 374 int mapLen = lowerCharArray.length; 375 if (mapLen > 1) { 376 byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1); 377 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 378 result = result2; 379 } 380 for (int x = 0; x < mapLen; ++x) { 381 StringUTF16.putChar(result, resultOffset++, lowerCharArray[x]); 382 } 383 } 384 } 385 return StringUTF16.newString(result, 0, resultOffset); 386 } 387 388 public static String toUpperCase(String str, byte[] value, Locale locale) { 389 if (locale == null) { 390 throw new NullPointerException(); 391 } 392 int first; 393 final int len = value.length; 394 395 // Now check if there are any characters that need to be changed, or are surrogate 396 for (first = 0 ; first < len; first++ ) { 397 int cp = value[first] & 0xff; 398 if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR 399 break; 400 } 401 } 402 if (first == len) { 403 return str; 404 } 405 String lang = locale.getLanguage(); 406 if (lang == "tr" || lang == "az" || lang == "lt") { 407 return toUpperCaseEx(str, value, first, locale, true); 408 } 409 byte[] result = new byte[len]; 410 System.arraycopy(value, 0, result, 0, first); // Just copy the first few 411 // upperCase characters. 412 for (int i = first; i < len; i++) { 413 int cp = value[i] & 0xff; 414 cp = Character.toUpperCaseEx(cp); 415 if (!canEncode(cp)) { // not a latin1 character 416 return toUpperCaseEx(str, value, first, locale, false); 417 } 418 result[i] = (byte)cp; 419 } 420 return new String(result, LATIN1); 421 } 422 423 private static String toUpperCaseEx(String str, byte[] value, 424 int first, Locale locale, boolean localeDependent) 425 { 426 byte[] result = StringUTF16.newBytesFor(value.length); 427 int resultOffset = 0; 428 for (int i = 0; i < first; i++) { 429 StringUTF16.putChar(result, resultOffset++, value[i] & 0xff); 430 } 431 for (int i = first; i < value.length; i++) { 432 int srcChar = value[i] & 0xff; 433 int upperChar; 434 char[] upperCharArray; 435 if (localeDependent) { 436 upperChar = ConditionalSpecialCasing.toUpperCaseEx(str, i, locale); 437 } else { 438 upperChar = Character.toUpperCaseEx(srcChar); 439 } 440 if (Character.isBmpCodePoint(upperChar)) { 441 StringUTF16.putChar(result, resultOffset++, upperChar); 442 } else { 443 if (upperChar == Character.ERROR) { 444 if (localeDependent) { 445 upperCharArray = 446 ConditionalSpecialCasing.toUpperCaseCharArray(str, i, locale); 447 } else { 448 upperCharArray = Character.toUpperCaseCharArray(srcChar); 449 } 450 } else { 451 upperCharArray = Character.toChars(upperChar); 452 } 453 /* Grow result if needed */ 454 int mapLen = upperCharArray.length; 455 if (mapLen > 1) { 456 byte[] result2 = StringUTF16.newBytesFor((result.length >> 1) + mapLen - 1); 457 System.arraycopy(result, 0, result2, 0, resultOffset << 1); 458 result = result2; 459 } 460 for (int x = 0; x < mapLen; ++x) { 461 StringUTF16.putChar(result, resultOffset++, upperCharArray[x]); 462 } 463 } 464 } 465 return StringUTF16.newString(result, 0, resultOffset); 466 } 467 468 public static String trim(byte[] value) { 469 int len = value.length; 470 int st = 0; 471 while ((st < len) && ((value[st] & 0xff) <= ' ')) { 472 st++; 473 } 474 while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) { 475 len--; 476 } 477 return ((st > 0) || (len < value.length)) ? 478 newString(value, st, len - st) : null; 479 } 480 481 public static void putChar(byte[] val, int index, int c) { 482 //assert (canEncode(c)); 483 val[index] = (byte)(c); 484 } 485 486 public static char getChar(byte[] val, int index) { 487 return (char)(val[index] & 0xff); 488 } 489 490 public static byte[] toBytes(int[] val, int off, int len) { 491 byte[] ret = new byte[len]; 492 for (int i = 0; i < len; i++) { 493 int cp = val[off++]; 494 if (!canEncode(cp)) { 495 return null; 496 } 497 ret[i] = (byte)cp; 498 } 499 return ret; 500 } 501 502 public static byte[] toBytes(char c) { 503 return new byte[] { (byte)c }; 504 } 505 506 public static String newString(byte[] val, int index, int len) { 507 return new String(Arrays.copyOfRange(val, index, index + len), 508 LATIN1); 509 } 510 511 public static void fillNull(byte[] val, int index, int end) { 512 Arrays.fill(val, index, end, (byte)0); 513 } 514 515 // inflatedCopy byte[] -> char[] 516 @HotSpotIntrinsicCandidate 517 private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) { 518 for (int i = 0; i < len; i++) { 519 dst[dstOff++] = (char)(src[srcOff++] & 0xff); 520 } 521 } 522 523 // inflatedCopy byte[] -> byte[] 524 @HotSpotIntrinsicCandidate 525 public static void inflate(byte[] src, int srcOff, byte[] dst, int dstOff, int len) { 526 for (int i = 0; i < len; i++) { 527 StringUTF16.putChar(dst, dstOff++, src[srcOff++] & 0xff); 528 } 529 } 530 531 static class CharsSpliterator implements Spliterator.OfInt { 532 private final byte[] array; 533 private int index; // current index, modified on advance/split 534 private final int fence; // one past last index 535 private final int cs; 536 537 CharsSpliterator(byte[] array, int acs) { 538 this(array, 0, array.length, acs); 539 } 540 541 CharsSpliterator(byte[] array, int origin, int fence, int acs) { 542 this.array = array; 543 this.index = origin; 544 this.fence = fence; 545 this.cs = acs | Spliterator.ORDERED | Spliterator.SIZED 546 | Spliterator.SUBSIZED; 547 } 548 549 @Override 550 public OfInt trySplit() { 551 int lo = index, mid = (lo + fence) >>> 1; 552 return (lo >= mid) 553 ? null 554 : new CharsSpliterator(array, lo, index = mid, cs); 555 } 556 557 @Override 558 public void forEachRemaining(IntConsumer action) { 559 byte[] a; int i, hi; // hoist accesses and checks from loop 560 if (action == null) 561 throw new NullPointerException(); 562 if ((a = array).length >= (hi = fence) && 563 (i = index) >= 0 && i < (index = hi)) { 564 do { action.accept(a[i] & 0xff); } while (++i < hi); 565 } 566 } 567 568 @Override 569 public boolean tryAdvance(IntConsumer action) { 570 if (action == null) 571 throw new NullPointerException(); 572 if (index >= 0 && index < fence) { 573 action.accept(array[index++] & 0xff); 574 return true; 575 } 576 return false; 577 } 578 579 @Override 580 public long estimateSize() { return (long)(fence - index); } 581 582 @Override 583 public int characteristics() { 584 return cs; 585 } 586 } 587 588 //////////////////////////////////////////////////////////////// 589 590 public static void getCharsSB(byte[] val, int srcBegin, int srcEnd, char dst[], int dstBegin) { 591 checkOffset(srcEnd, val.length); 592 getChars(val, srcBegin, srcEnd, dst, dstBegin); 593 } 594 595 public static void inflateSB(byte[] val, byte[] dst, int dstOff, int count) { 596 checkOffset(count, val.length); 597 checkOffset(dstOff + count, dst.length >> 1); // dst is utf16 598 inflate(val, 0, dst, dstOff, count); 599 } 600 }