# HG changeset patch # User igerasim # Date 1432498544 -10800 # Sun May 24 23:15:44 2015 +0300 # Node ID f2c20292540afaf501f7f118d6fb382405cc6825 # Parent 1dab21cbe54d5de4d6f2ebd1d26be3563d89ea16 [mq]: 8058779-Faster-implementation-of-String-replace-HEAVYDUTY diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2235,6 +2235,14 @@ } /** + * The maximum size of array to allocate. + * Some VMs reserve some header words in an array. + * Attempts to allocate larger arrays may result in + * OutOfMemoryError: Requested array size exceeds VM limit + */ + private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8; + + /** * Replaces each substring of this string that matches the literal target * sequence with the specified literal replacement sequence. The * replacement proceeds from the beginning of the string to the end, for @@ -2247,8 +2255,114 @@ * @since 1.5 */ public String replace(CharSequence target, CharSequence replacement) { - return Pattern.compile(target.toString(), Pattern.LITERAL).matcher( - this).replaceAll(Matcher.quoteReplacement(replacement.toString())); + String starget = target.toString(); + int targLen = starget.length(); + String srepl = replacement.toString(); + int replLen = srepl.length(); + + // special case: replacing empty substrings + if (targLen == 0) { + return splitAndJoin(srepl); + } + + int i = indexOf(starget); + // special case: nothing to replace + if (i < 0) { + return this; + } + + // find and store indices of substrings to replace + int p = 0, j; + int[] pos = new int[16]; + pos[0] = i; + i += targLen; + while ((j = indexOf(starget, i)) > 0) { + if (++p == pos.length) { + int cap = p << 1; + // overflow-conscious code + if (p >= MAX_ARRAY_SIZE - p) { + if (p == MAX_ARRAY_SIZE) + throw new OutOfMemoryError(); + cap = MAX_ARRAY_SIZE; + } + pos = Arrays.copyOf(pos, cap); + } + pos[p] = j; + i = j + targLen; + } + + final char[] value = this.value; + final char[] replValue = srepl.value; + int thisLen = value.length; + int deltaLen = replLen - targLen; + // overflow-conscious code + int resultLen = thisLen + (p + 1) * deltaLen; + if (MAX_ARRAY_SIZE / (p + 1) < deltaLen || + MAX_ARRAY_SIZE - resultLen < 0) { + throw new OutOfMemoryError(); + } + if (resultLen <= 0) { + return ""; + } + + char[] result = new char[resultLen]; + int posFrom = 0, posTo = 0; + for (int q = 0; q <= p; ++q) { + int nextPos = pos[q]; + int lenInc = nextPos - posFrom; + if (lenInc > 0) { + System.arraycopy(value, posFrom, result, posTo, lenInc); + posTo += lenInc; + } + if (replLen > 0) { + System.arraycopy(replValue, 0, result, posTo, replLen); + posTo += replLen; + } + posFrom = nextPos + targLen; + } + System.arraycopy(value, posFrom, result, posTo, + thisLen - posFrom); + + return new String(result, true); + } + + /** + * Returns this string with every empty substring + * replaced by the given delimiter. + * If this string is empty, the delimiter is returned. + * If the delimiter is empty, unchanged this string + * is returned. + */ + private String splitAndJoin(String delimiter) { + final char[] value = this.value; + int thisLen = value.length; + if (thisLen == 0) { + return delimiter; + } + + final char[] delimValue = delimiter.value; + int delimLen = delimValue.length; + if (delimLen == 0) { + return this; + } + + // overflow-conscious code + int resultLen = thisLen + (thisLen + 1) * delimLen; + if (MAX_ARRAY_SIZE / (thisLen + 1) < delimLen || + MAX_ARRAY_SIZE - resultLen < 0) { + throw new OutOfMemoryError(); + } + + char[] result = new char[resultLen]; + int posFrom = 0, posTo = 0; + while (posFrom < thisLen) { + System.arraycopy(delimValue, 0, result, posTo, delimLen); + posTo += delimLen; + result[posTo++] = value[posFrom++]; + } + System.arraycopy(delimValue, 0, result, posTo, delimLen); + + return new String(result, true); } /** diff --git a/test/java/lang/String/LiteralReplace.java b/test/java/lang/String/LiteralReplace.java new file mode 100644 --- /dev/null +++ b/test/java/lang/String/LiteralReplace.java @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* @test + * @bug 8058779 + * @run testng LiteralReplace + * @summary Basic tests of String.replace(CharSequence, CharSequence) + * @key randomness + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.Random; +import sun.misc.JavaLangAccess; +import sun.misc.SharedSecrets; + +import org.testng.annotations.Test; +import org.testng.annotations.DataProvider; +import static org.testng.Assert.fail; + +public class LiteralReplace { + + @Test(dataProvider="sourceTargetReplacementExpected") + public void testExpected(String source, String target, + String replacement, String expected) + { + String canonical = canonicalReplace(source, target, replacement); + if (!canonical.equals(expected)) { + fail("Canonical: " + canonical + " != " + expected); + } + test0(source, target, replacement, expected); + } + + @Test(dataProvider="sourceTargetReplacement") + public void testCanonical(String source, String target, + String replacement) + { + String canonical = canonicalReplace(source, target, replacement); + test0(source, target, replacement, canonical); + } + + private void test0(String source, String target, String replacement, + String expected) + { + String result = source.replace(target, replacement); + if (!result.equals(expected)) { + fail(result + " != " + expected); + } + } + + @Test(dataProvider="sourceTargetReplacementWithNull") + public void testNPE(String source, String target, String replacement) { + try { + source.replace(target, replacement); + fail("Expected to throw NPE: " + source + ".replace(" + target + + "," + replacement + ")"); + } catch (NullPointerException npe) { + } + } + + + @DataProvider + public static Object[][] sourceTargetReplacementExpected() { + return new Object[][] { + {"aaa", "aa", "b", "ba"}, + {"abcdefgh", "def", "DEF", "abcDEFgh"}, + {"abcdefgh", "123", "DEF", "abcdefgh"}, + {"abcdefgh", "abcdefghi", "DEF", "abcdefgh"}, + {"abcdefghabc", "abc", "DEF", "DEFdefghDEF"}, + {"abcdefghdef", "def", "", "abcgh"}, + {"abcdefgh", "", "_", "_a_b_c_d_e_f_g_h_"}, + {"", "", "", ""}, + {"", "a", "b", ""}, + {"", "", "abc", "abc"}, + {"abcdefgh", "abcdefgh", "abcdefgh", "abcdefgh"}, + {"abcdefgh", "abcdefgh", "abcdefghi", "abcdefghi"}, + {"abcdefgh", "abcdefgh", "", ""}, + {"abcdabcd", "abcd", "", ""}, + {"aaaaaaaaa", "aa", "_X_", "_X__X__X__X_a"}, + {"aaaaaaaaa", "aa", "aaa", "aaaaaaaaaaaaa"}, + {"aaaaaaaaa", "aa", "aa", "aaaaaaaaa"}, + {"a.c.e.g.", ".", "-", "a-c-e-g-"}, + {"abcdefgh", "[a-h]", "X", "abcdefgh"}, + {"aa+", "a+", "", "a"}, + {"^abc$", "abc", "x", "^x$"}, + }; + } + + @DataProvider + public static Object[][] sourceTargetReplacementWithNull() { + return new Object[][] { + {"a", "a", null}, + {"a", null, "a"}, + {"a", null, null}, + {null, "a", "a"}, + {null, "a", null}, + {null, null, "a"}, + {null, null, null}, + }; + } + + @DataProvider + public static Iterator sourceTargetReplacement() { + ArrayList list = new ArrayList<>(); + for (int maxSrcLen = 1; maxSrcLen <= (1 << 10); maxSrcLen <<= 1) { + for (int maxTrgLen = 1; maxTrgLen <= (1 << 10); maxTrgLen <<= 1) { + for (int maxPrlLen = 1; maxPrlLen <= (1 << 10); maxPrlLen <<= 1) { + list.add(makeArray(makeRandomString(maxSrcLen), + makeRandomString(maxTrgLen), + makeRandomString(maxPrlLen))); + + String source = makeRandomString(maxSrcLen); + list.add(makeArray(source, + mekeRandomSubstring(source, maxTrgLen), + makeRandomString(maxPrlLen))); + } + } + } + return list.iterator(); + } + + // utilities + + /** + * How the String.replace(CharSequence, CharSequence) used to be implemented + */ + private static String canonicalReplace(String source, String target, String replacement) { + return Pattern.compile(target.toString(), Pattern.LITERAL).matcher( + source).replaceAll(Matcher.quoteReplacement(replacement.toString())); + } + + private static final Random random = new Random(); + private static final JavaLangAccess jla = SharedSecrets.getJavaLangAccess(); + + private static final char[] CHARS = ("qwertyuiop[]12345678" + + "90-=\\`asdfghjkl;'zxcvbnm,./~!@#$%^&*()_+|QWERTYUIOP{" + + "}ASDFGHJKL:\"ZXCVBNM<>?\n\r\t\u0444\u044B\u0432\u0430").toCharArray(); + + private static String makeRandomString(int maxLen) { + int len = random.nextInt(maxLen); + char[] buf = new char[len]; + for (int i = 0; i < len; ++i) { + buf[i] = CHARS[random.nextInt(CHARS.length)]; + } + return jla.newStringUnsafe(buf); + } + + private static String mekeRandomSubstring(String source, int maxLen) { + if (source.isEmpty()) { + return source; + } + int pos = random.nextInt(source.length()); + int len = Integer.min(source.length() - pos, + random.nextInt(maxLen)); + return source.substring(pos, pos + len); + } + + private static Object[] makeArray(Object... array) { + return array; + } +}