< prev index next >

src/java.base/share/classes/java/lang/StringLatin1.java

Print this page
rev 54647 : [mq]: 8222955-Optimize-String-replace-CharSequence-CharSequence-for-Latin1-encoded-strings
   1 /*
   2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Objects;
  31 import java.util.Spliterator;
  32 import java.util.function.Consumer;
  33 import java.util.function.IntConsumer;
  34 import java.util.stream.IntStream;
  35 import java.util.stream.Stream;
  36 import java.util.stream.StreamSupport;
  37 import jdk.internal.HotSpotIntrinsicCandidate;
  38 
  39 import static java.lang.String.LATIN1;
  40 import static java.lang.String.UTF16;
  41 import static java.lang.String.checkOffset;
  42 
  43 final class StringLatin1 {
  44 








  45     public static char charAt(byte[] value, int index) {
  46         if (index < 0 || index >= value.length) {
  47             throw new StringIndexOutOfBoundsException(index);
  48         }
  49         return (char)(value[index] & 0xff);
  50     }
  51 
  52     public static boolean canEncode(int cp) {
  53         return cp >>> 8 == 0;
  54     }
  55 
  56     public static int length(byte[] value) {
  57         return value.length;
  58     }
  59 
  60     public static int codePointAt(byte[] value, int index, int end) {
  61         return value[index] & 0xff;
  62     }
  63 
  64     public static int codePointBefore(byte[] value, int index) {


 287         int off  = Math.min(fromIndex, value.length - 1);
 288         for (; off >= 0; off--) {
 289             if (value[off] == (byte)ch) {
 290                 return off;
 291             }
 292         }
 293         return -1;
 294     }
 295 
 296     public static String replace(byte[] value, char oldChar, char newChar) {
 297         if (canEncode(oldChar)) {
 298             int len = value.length;
 299             int i = -1;
 300             while (++i < len) {
 301                 if (value[i] == (byte)oldChar) {
 302                     break;
 303                 }
 304             }
 305             if (i < len) {
 306                 if (canEncode(newChar)) {
 307                     byte buf[] = new byte[len];
 308                     for (int j = 0; j < i; j++) {    // TBD arraycopy?
 309                         buf[j] = value[j];
 310                     }
 311                     while (i < len) {
 312                         byte c = value[i];
 313                         buf[i] = (c == (byte)oldChar) ? (byte)newChar : c;
 314                         i++;
 315                     }
 316                     return new String(buf, LATIN1);
 317                 } else {
 318                     byte[] buf = StringUTF16.newBytesFor(len);
 319                     // inflate from latin1 to UTF16
 320                     inflate(value, 0, buf, 0, i);
 321                     while (i < len) {
 322                         char c = (char)(value[i] & 0xff);
 323                         StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c);
 324                         i++;
 325                     }
 326                     return new String(buf, UTF16);
 327                 }
 328             }
 329         }
 330         return null; // for string to return this;


























































 331     }
 332 
 333     // case insensitive
 334     public static boolean regionMatchesCI(byte[] value, int toffset,
 335                                           byte[] other, int ooffset, int len) {
 336         int last = toffset + len;
 337         while (toffset < last) {
 338             char c1 = (char)(value[toffset++] & 0xff);
 339             char c2 = (char)(other[ooffset++] & 0xff);
 340             if (c1 == c2) {
 341                 continue;
 342             }
 343             char u1 = Character.toUpperCase(c1);
 344             char u2 = Character.toUpperCase(c2);
 345             if (u1 == u2) {
 346                 continue;
 347             }
 348             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 349                 continue;
 350             }


   1 /*
   2  * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Locale;
  30 import java.util.Objects;
  31 import java.util.Spliterator;
  32 import java.util.function.Consumer;
  33 import java.util.function.IntConsumer;
  34 import java.util.stream.IntStream;
  35 import java.util.stream.Stream;
  36 import java.util.stream.StreamSupport;
  37 import jdk.internal.HotSpotIntrinsicCandidate;
  38 
  39 import static java.lang.String.LATIN1;
  40 import static java.lang.String.UTF16;
  41 import static java.lang.String.checkOffset;
  42 
  43 final class StringLatin1 {
  44 
  45     /**
  46      * The maximum size of array to allocate (unless necessary).
  47      * Some VMs reserve some header words in an array.
  48      * Attempts to allocate larger arrays may result in
  49      * OutOfMemoryError: Requested array size exceeds VM limit
  50      */
  51     private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
  52 
  53     public static char charAt(byte[] value, int index) {
  54         if (index < 0 || index >= value.length) {
  55             throw new StringIndexOutOfBoundsException(index);
  56         }
  57         return (char)(value[index] & 0xff);
  58     }
  59 
  60     public static boolean canEncode(int cp) {
  61         return cp >>> 8 == 0;
  62     }
  63 
  64     public static int length(byte[] value) {
  65         return value.length;
  66     }
  67 
  68     public static int codePointAt(byte[] value, int index, int end) {
  69         return value[index] & 0xff;
  70     }
  71 
  72     public static int codePointBefore(byte[] value, int index) {


 295         int off  = Math.min(fromIndex, value.length - 1);
 296         for (; off >= 0; off--) {
 297             if (value[off] == (byte)ch) {
 298                 return off;
 299             }
 300         }
 301         return -1;
 302     }
 303 
 304     public static String replace(byte[] value, char oldChar, char newChar) {
 305         if (canEncode(oldChar)) {
 306             int len = value.length;
 307             int i = -1;
 308             while (++i < len) {
 309                 if (value[i] == (byte)oldChar) {
 310                     break;
 311                 }
 312             }
 313             if (i < len) {
 314                 if (canEncode(newChar)) {
 315                     byte[] buf = StringConcatHelper.newArray(len);
 316                     for (int j = 0; j < i; j++) {    // TBD arraycopy?
 317                         buf[j] = value[j];
 318                     }
 319                     while (i < len) {
 320                         byte c = value[i];
 321                         buf[i] = (c == (byte)oldChar) ? (byte)newChar : c;
 322                         i++;
 323                     }
 324                     return new String(buf, LATIN1);
 325                 } else {
 326                     byte[] buf = StringUTF16.newBytesFor(len);
 327                     // inflate from latin1 to UTF16
 328                     inflate(value, 0, buf, 0, i);
 329                     while (i < len) {
 330                         char c = (char)(value[i] & 0xff);
 331                         StringUTF16.putChar(buf, i, (c == oldChar) ? newChar : c);
 332                         i++;
 333                     }
 334                     return new String(buf, UTF16);
 335                 }
 336             }
 337         }
 338         return null; // for string to return this;
 339     }
 340 
 341     public static String replace(byte[] value, int valLen, byte[] targ,
 342                                  int targLen, byte[] repl, int replLen)
 343     {
 344         assert targLen > 0;
 345         int i, j, p = 0;
 346         if (valLen == 0 || (i = indexOf(value, valLen, targ, targLen, 0)) < 0) {
 347             return null; // for string to return this;
 348         }
 349 
 350         // find and store indices of substrings to replace
 351         int[] pos = new int[16];
 352         pos[0] = i;
 353         i += targLen;
 354         while ((j = indexOf(value, valLen, targ, targLen, i)) > 0) {
 355             if (++p == pos.length) {
 356                 int cap = p + (p >> 1);
 357                 // overflow-conscious code
 358                 if (cap - MAX_ARRAY_SIZE > 0) {
 359                     if (p == MAX_ARRAY_SIZE) {
 360                         throw new OutOfMemoryError();
 361                     }
 362                     cap = MAX_ARRAY_SIZE;
 363                 }
 364                 pos = Arrays.copyOf(pos, cap);
 365             }
 366             pos[p] = j;
 367             i = j + targLen;
 368         }
 369 
 370         int resultLen;
 371         try {
 372             resultLen = Math.addExact(valLen,
 373                     Math.multiplyExact(++p, replLen - targLen));
 374         } catch (ArithmeticException ignored) {
 375             throw new OutOfMemoryError();
 376         }
 377         if (resultLen == 0) {
 378             return "";
 379         }
 380 
 381         byte[] result = StringConcatHelper.newArray(resultLen);
 382         int posFrom = 0, posTo = 0;
 383         for (int q = 0; q < p; ++q) {
 384             int nextPos = pos[q];
 385             while (posFrom < nextPos) {
 386                 result[posTo++] = value[posFrom++];
 387             }
 388             posFrom += targLen;
 389             for (int k = 0; k < replLen; ++k) {
 390                 result[posTo++] = repl[k];
 391             }
 392         }
 393         while (posFrom < valLen) {
 394             result[posTo++] = value[posFrom++];
 395         }
 396         return new String(result, LATIN1);
 397     }
 398 
 399     // case insensitive
 400     public static boolean regionMatchesCI(byte[] value, int toffset,
 401                                           byte[] other, int ooffset, int len) {
 402         int last = toffset + len;
 403         while (toffset < last) {
 404             char c1 = (char)(value[toffset++] & 0xff);
 405             char c2 = (char)(other[ooffset++] & 0xff);
 406             if (c1 == c2) {
 407                 continue;
 408             }
 409             char u1 = Character.toUpperCase(c1);
 410             char u2 = Character.toUpperCase(c2);
 411             if (u1 == u2) {
 412                 continue;
 413             }
 414             if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
 415                 continue;
 416             }


< prev index next >