< prev index next >

src/java.base/share/classes/java/lang/StringUTF16.java

Print this page
rev 54647 : [mq]: 8222955-Optimize-String-replace-CharSequence-CharSequence-for-Latin1-encoded-strings


 557             char lo = Character.lowSurrogate(ch);
 558             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 559             for (; i >= 0; i--) {
 560                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 561                     return i;
 562                 }
 563             }
 564         }
 565         return -1;
 566     }
 567 
 568     public static String replace(byte[] value, char oldChar, char newChar) {
 569         int len = value.length >> 1;
 570         int i = -1;
 571         while (++i < len) {
 572             if (getChar(value, i) == oldChar) {
 573                 break;
 574             }
 575         }
 576         if (i < len) {
 577             byte buf[] = new byte[value.length];
 578             for (int j = 0; j < i; j++) {
 579                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 580             }
 581             while (i < len) {
 582                 char c = getChar(value, i);
 583                 putChar(buf, i, c == oldChar ? newChar : c);
 584                 i++;
 585            }
 586            // Check if we should try to compress to latin1
 587            if (String.COMPACT_STRINGS &&
 588                !StringLatin1.canEncode(oldChar) &&
 589                StringLatin1.canEncode(newChar)) {
 590                byte[] val = compress(buf, 0, len);
 591                if (val != null) {
 592                    return new String(val, LATIN1);
 593                }
 594            }
 595            return new String(buf, UTF16);
 596         }
 597         return null;
 598     }
 599 



























































































































 600     public static boolean regionMatchesCI(byte[] value, int toffset,
 601                                           byte[] other, int ooffset, int len) {
 602         int last = toffset + len;
 603         assert toffset >= 0 && ooffset >= 0;
 604         assert ooffset + len <= length(other);
 605         assert last <= length(value);
 606         while (toffset < last) {
 607             char c1 = getChar(value, toffset++);
 608             char c2 = getChar(other, ooffset++);
 609             if (c1 == c2) {
 610                 continue;
 611             }
 612             // try converting both characters to uppercase.
 613             // If the results match, then the comparison scan should
 614             // continue.
 615             char u1 = Character.toUpperCase(c1);
 616             char u2 = Character.toUpperCase(c2);
 617             if (u1 == u2) {
 618                 continue;
 619             }


1412         }
1413     }
1414 
1415     ////////////////////////////////////////////////////////////////
1416 
1417     private static native boolean isBigEndian();
1418 
1419     static final int HI_BYTE_SHIFT;
1420     static final int LO_BYTE_SHIFT;
1421     static {
1422         if (isBigEndian()) {
1423             HI_BYTE_SHIFT = 8;
1424             LO_BYTE_SHIFT = 0;
1425         } else {
1426             HI_BYTE_SHIFT = 0;
1427             LO_BYTE_SHIFT = 8;
1428         }
1429     }
1430 
1431     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;









1432 
1433     // Used by trusted callers.  Assumes all necessary bounds checks have
1434     // been done by the caller.
1435 
1436     /**
1437      * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for
1438      * UTF-16 coder.
1439      *
1440      * @param i     value to convert
1441      * @param index next index, after the least significant digit
1442      * @param buf   target buffer, UTF16-coded.
1443      * @return index of the most significant digit or minus sign, if present
1444      */
1445     static int getChars(int i, int index, byte[] buf) {
1446         int q, r;
1447         int charPos = index;
1448 
1449         boolean negative = (i < 0);
1450         if (!negative) {
1451             i = -i;




 557             char lo = Character.lowSurrogate(ch);
 558             int i = Math.min(fromIndex, (value.length >> 1) - 2);
 559             for (; i >= 0; i--) {
 560                 if (getChar(value, i) == hi && getChar(value, i + 1) == lo) {
 561                     return i;
 562                 }
 563             }
 564         }
 565         return -1;
 566     }
 567 
 568     public static String replace(byte[] value, char oldChar, char newChar) {
 569         int len = value.length >> 1;
 570         int i = -1;
 571         while (++i < len) {
 572             if (getChar(value, i) == oldChar) {
 573                 break;
 574             }
 575         }
 576         if (i < len) {
 577             byte[] buf = new byte[value.length];
 578             for (int j = 0; j < i; j++) {
 579                 putChar(buf, j, getChar(value, j)); // TBD:arraycopy?
 580             }
 581             while (i < len) {
 582                 char c = getChar(value, i);
 583                 putChar(buf, i, c == oldChar ? newChar : c);
 584                 i++;
 585             }
 586             // Check if we should try to compress to latin1
 587             if (String.COMPACT_STRINGS &&
 588                 !StringLatin1.canEncode(oldChar) &&
 589                 StringLatin1.canEncode(newChar)) {
 590                 byte[] val = compress(buf, 0, len);
 591                 if (val != null) {
 592                     return new String(val, LATIN1);
 593                 }
 594             }
 595             return new String(buf, UTF16);
 596         }
 597         return null;
 598     }
 599 
 600     public static String replace(byte[] value, int valLen, boolean valLat1,
 601                                  byte[] targ, int targLen, boolean targLat1,
 602                                  byte[] repl, int replLen, boolean replLat1)
 603     {
 604         assert targLen > 0;
 605         assert !valLat1 || !targLat1 || !replLat1;
 606 
 607         //  Possible combinations of the arguments/result encodings:
 608         //  +---+--------+--------+--------+-----------------------+
 609         //  | # | VALUE  | TARGET | REPL   | RESULT                |
 610         //  +===+========+========+========+=======================+
 611         //  | 1 | Latin1 | Latin1 |  UTF16 | null or UTF16         |
 612         //  +---+--------+--------+--------+-----------------------+
 613         //  | 2 | Latin1 |  UTF16 | Latin1 | null                  |
 614         //  +---+--------+--------+--------+-----------------------+
 615         //  | 3 | Latin1 |  UTF16 |  UTF16 | null                  |
 616         //  +---+--------+--------+--------+-----------------------+
 617         //  | 4 |  UTF16 | Latin1 | Latin1 | null or UTF16         |
 618         //  +---+--------+--------+--------+-----------------------+
 619         //  | 5 |  UTF16 | Latin1 |  UTF16 | null or UTF16         |
 620         //  +---+--------+--------+--------+-----------------------+
 621         //  | 6 |  UTF16 |  UTF16 | Latin1 | null, Latin1 or UTF16 |
 622         //  +---+--------+--------+--------+-----------------------+
 623         //  | 7 |  UTF16 |  UTF16 |  UTF16 | null or UTF16         |
 624         //  +---+--------+--------+--------+-----------------------+
 625 
 626         if (String.COMPACT_STRINGS && valLat1 && !targLat1) {
 627             // combinations 2 or 3
 628             return null; // for string to return this;
 629         }
 630 
 631         int i = (String.COMPACT_STRINGS && valLat1)
 632                         ? StringLatin1.indexOf(value, targ) :
 633                 (String.COMPACT_STRINGS && targLat1)
 634                         ? indexOfLatin1(value, targ)
 635                         : indexOf(value, targ);
 636         if (i < 0) {
 637             return null; // for string to return this;
 638         }
 639 
 640         // find and store indices of substrings to replace
 641         int j, p = 0;
 642         int[] pos = new int[16];
 643         pos[0] = i;
 644         i += targLen;
 645         while ((j = ((String.COMPACT_STRINGS && valLat1)
 646                             ? StringLatin1.indexOf(value, valLen, targ, targLen, i) :
 647                      (String.COMPACT_STRINGS && targLat1)
 648                             ? indexOfLatin1(value, valLen, targ, targLen, i)
 649                             : indexOf(value, valLen, targ, targLen, i))) > 0)
 650         {
 651             if (++p == pos.length) {
 652                 int cap = p + (p >> 1);
 653                 // overflow-conscious code
 654                 if (cap - MAX_ARRAY_SIZE > 0) {
 655                     if (p == MAX_ARRAY_SIZE) {
 656                         throw new OutOfMemoryError();
 657                     }
 658                     cap = MAX_ARRAY_SIZE;
 659                 }
 660                 pos = Arrays.copyOf(pos, cap);
 661             }
 662             pos[p] = j;
 663             i = j + targLen;
 664         }
 665 
 666         int resultLen;
 667         try {
 668             resultLen = Math.addExact(valLen, Math.multiplyExact(++p, replLen - targLen));
 669         } catch (ArithmeticException ignored) {
 670             throw new OutOfMemoryError();
 671         }
 672         if (resultLen == 0) {
 673             return "";
 674         }
 675 
 676         byte[] result = newBytesFor(resultLen);
 677         int posFrom = 0, posTo = 0;
 678         for (int q = 0; q < p; ++q) {
 679             int nextPos = pos[q];
 680             if (String.COMPACT_STRINGS && valLat1) {
 681                 while (posFrom < nextPos) {
 682                     char c = (char)(value[posFrom++] & 0xff);
 683                     putChar(result, posTo++, c);
 684                 }
 685             } else {
 686                 while (posFrom < nextPos) {
 687                     putChar(result, posTo++, getChar(value, posFrom++));
 688                 }
 689             }
 690             posFrom += targLen;
 691             if (String.COMPACT_STRINGS && replLat1) {
 692                 for (int k = 0; k < replLen; ++k) {
 693                     char c = (char)(repl[k] & 0xff);
 694                     putChar(result, posTo++, c);
 695                 }
 696             } else {
 697                 for (int k = 0; k < replLen; ++k) {
 698                     putChar(result, posTo++, getChar(repl, k));
 699                 }
 700             }
 701         }
 702         if (String.COMPACT_STRINGS && valLat1) {
 703             while (posFrom < valLen) {
 704                 char c = (char)(value[posFrom++] & 0xff);
 705                 putChar(result, posTo++, c);
 706             }
 707         } else {
 708             while (posFrom < valLen) {
 709                 putChar(result, posTo++, getChar(value, posFrom++));
 710             }
 711         }
 712 
 713         if (String.COMPACT_STRINGS && replLat1 && !targLat1) {
 714             // combination 6
 715             byte[] lat1Result = compress(result, 0, resultLen);
 716             if (lat1Result != null) {
 717                 return new String(lat1Result, LATIN1);
 718             }
 719         }
 720         return new String(result, UTF16);
 721     }
 722 
 723     public static boolean regionMatchesCI(byte[] value, int toffset,
 724                                           byte[] other, int ooffset, int len) {
 725         int last = toffset + len;
 726         assert toffset >= 0 && ooffset >= 0;
 727         assert ooffset + len <= length(other);
 728         assert last <= length(value);
 729         while (toffset < last) {
 730             char c1 = getChar(value, toffset++);
 731             char c2 = getChar(other, ooffset++);
 732             if (c1 == c2) {
 733                 continue;
 734             }
 735             // try converting both characters to uppercase.
 736             // If the results match, then the comparison scan should
 737             // continue.
 738             char u1 = Character.toUpperCase(c1);
 739             char u2 = Character.toUpperCase(c2);
 740             if (u1 == u2) {
 741                 continue;
 742             }


1535         }
1536     }
1537 
1538     ////////////////////////////////////////////////////////////////
1539 
1540     private static native boolean isBigEndian();
1541 
1542     static final int HI_BYTE_SHIFT;
1543     static final int LO_BYTE_SHIFT;
1544     static {
1545         if (isBigEndian()) {
1546             HI_BYTE_SHIFT = 8;
1547             LO_BYTE_SHIFT = 0;
1548         } else {
1549             HI_BYTE_SHIFT = 0;
1550             LO_BYTE_SHIFT = 8;
1551         }
1552     }
1553 
1554     static final int MAX_LENGTH = Integer.MAX_VALUE >> 1;
1555 
1556 
1557     /**
1558      * The maximum size of array to allocate (unless necessary).
1559      * Some VMs reserve some header words in an array.
1560      * Attempts to allocate larger arrays may result in
1561      * OutOfMemoryError: Requested array size exceeds VM limit
1562      */
1563     private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
1564 
1565     // Used by trusted callers.  Assumes all necessary bounds checks have
1566     // been done by the caller.
1567 
1568     /**
1569      * This is a variant of {@link Integer#getChars(int, int, byte[])}, but for
1570      * UTF-16 coder.
1571      *
1572      * @param i     value to convert
1573      * @param index next index, after the least significant digit
1574      * @param buf   target buffer, UTF16-coded.
1575      * @return index of the most significant digit or minus sign, if present
1576      */
1577     static int getChars(int i, int index, byte[] buf) {
1578         int q, r;
1579         int charPos = index;
1580 
1581         boolean negative = (i < 0);
1582         if (!negative) {
1583             i = -i;


< prev index next >