--- old/src/share/classes/java/lang/String.java 2013-11-11 14:41:10.000000000 -0800 +++ new/src/share/classes/java/lang/String.java 2013-11-11 14:41:09.000000000 -0800 @@ -2235,7 +2235,13 @@ * expression or is terminated by the end of the string. The substrings in * the array are in the order in which they occur in this string. If the * expression does not match any part of the input then the resulting array - * has just one element, namely this string. + * has just one element, namely this string. A zero-length input sequence + * always results zero-length resulting array. + * + *

When there is a positive-width match at the beginning of this + * string then an empty leading substring is included at the beginning + * of the resulting array. A zero-width match at the beginning however + * never produces such empty leading substring. * *

The {@code limit} parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting @@ -2325,6 +2331,8 @@ (ch < Character.MIN_HIGH_SURROGATE || ch > Character.MAX_LOW_SURROGATE)) { + if (value.length == 0) + return new String[0]; int off = 0; int next = 0; boolean limited = limit > 0; --- old/src/share/classes/java/util/regex/Pattern.java 2013-11-11 14:41:11.000000000 -0800 +++ new/src/share/classes/java/util/regex/Pattern.java 2013-11-11 14:41:10.000000000 -0800 @@ -1142,9 +1142,15 @@ * input sequence that is terminated by another subsequence that matches * this pattern or is terminated by the end of the input sequence. The * substrings in the array are in the order in which they occur in the - * input. If this pattern does not match any subsequence of the input then + * input. If this pattern does not match any subsequence of the input then * the resulting array has just one element, namely the input sequence in - * string form. + * string form. A zero-length input sequence always results zero-length + * resulting array. + * + *

When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the resulting array. A zero-width match at the beginning however + * never produces such empty leading substring. * *

The limit parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting @@ -1185,7 +1191,6 @@ * { "b", "", ":and:f" } * * - * * @param input * The character sequence to be split * @@ -1196,6 +1201,8 @@ * around matches of this pattern */ public String[] split(CharSequence input, int limit) { + if (input.length() == 0) + return new String[0]; int index = 0; boolean matchLimited = limit > 0; ArrayList matchList = new ArrayList<>(); @@ -1204,6 +1211,11 @@ // Add segments before each match found while(m.find()) { if (!matchLimited || matchList.size() < limit - 1) { + if (index == 0 && index == m.start() && m.start() == m.end()) { + // no empty leading substring included for zero-width match + // at the beginning of the input char sequence. + continue; + } String match = input.subSequence(index, m.start()).toString(); matchList.add(match); index = m.end(); @@ -5762,6 +5774,13 @@ * the resulting stream has just one element, namely the input sequence in * string form. * + *

A zero-length input sequence always results an empty stream. + * + *

When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the stream. A zero-width match at the beginning however never produces + * such empty leading substring. + * *

If the input sequence is mutable, it must remain constant during the * execution of the terminal stream operation. Otherwise, the result of the * terminal stream operation is undefined. @@ -5817,7 +5836,8 @@ current = matcher.end(); if (!nextElement.isEmpty()) { return true; - } else { + } else if (current > 0) { // no empty leading substring for zero-width + // match at the beginning of the input emptyElementCount++; } } --- old/test/java/lang/String/Split.java 2013-11-11 14:41:12.000000000 -0800 +++ new/test/java/lang/String/Split.java 2013-11-11 14:41:12.000000000 -0800 @@ -23,7 +23,7 @@ /** * @test - * @bug 6840246 + * @bug 6840246 6559590 * @summary test String.split() */ import java.util.Arrays; @@ -78,12 +78,11 @@ throw new RuntimeException("String.split failure 7"); } // Check the case for limit == 0, source = ""; + // split() now returns 0-length for empty source "" see #6559590 source = ""; String[] result = source.split("e", 0); - if (result.length != 1) + if (result.length != 0) throw new RuntimeException("String.split failure 8"); - if (!result[0].equals(source)) - throw new RuntimeException("String.split failure 9"); // check fastpath of String.split() source = "0123456789abcdefgABCDEFG"; --- old/test/java/util/regex/RegExTest.java 2013-11-11 14:41:13.000000000 -0800 +++ new/test/java/util/regex/RegExTest.java 2013-11-11 14:41:12.000000000 -0800 @@ -33,7 +33,8 @@ * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 - * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 + * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 + * 8027645 */ import java.util.regex.*; @@ -148,6 +149,7 @@ groupCurlyNotFoundSuppTest(); groupCurlyBackoffTest(); patternAsPredicate(); + if (failure) { throw new RuntimeException("RegExTest failed, 1st failure: " + @@ -1776,13 +1778,68 @@ failCount++; } // Check the case for limit == 0, source = ""; + // split() now returns 0-length for empty source "" see #6559590 source = ""; result = source.split("e", 0); - if (result.length != 1) - failCount++; - if (!result[0].equals(source)) + if (result.length != 0) failCount++; + // Check both split() and splitAsStraem(), especially for zero-lenth + // input and zero-lenth match cases + String[][] input = new String[][] { + { " ", "Abc Efg Hij" }, // normal non-zero-match + { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match + { " ", "Abc Efg Hij" }, // non-zero-match in the middle + { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match + { "(?=\\p{Lu})", "AbcEfg" }, + { "(?=\\p{Lu})", "Abc" }, + { " ", "" }, // zero-length input + { ".*", "" }, + + // some tests from PatternStreamTest.java + { "4", "awgqwefg1fefw4vssv1vvv1" }, + { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, + { "1", "awgqwefg1fefw4vssv1vvv1" }, + { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, + { "\u56da", "1\u56da23\u56da456\u56da7890" }, + { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, + { "\u56da", "" }, + { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs + { "o", "boo:and:foo" }, + { "o", "booooo:and:fooooo" }, + { "o", "fooooo:" }, + }; + + String[][] expected = new String[][] { + { "Abc", "Efg", "Hij" }, + { "", "Abc", "Efg", "Hij" }, + { "Abc", "", "Efg", "Hij" }, + { "Abc", "Efg", "Hij" }, + { "Abc", "Efg" }, + { "Abc" }, + {}, + {}, + + { "awgqwefg1fefw", "vssv1vvv1" }, + { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, + { "awgqwefg", "fefw4vssv", "vvv" }, + { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, + { "1", "23", "456", "7890" }, + { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, + {}, + { "This", "is", "testing", "", "with", "different", "separators" }, + { "b", "", ":and:f" }, + { "b", "", "", "", "", ":and:f" }, + { "f", "", "", "", "", ":" }, + }; + for (int i = 0; i < input.length; i++) { + pattern = Pattern.compile(input[i][0]); + if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) + failCount++; + if (!Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), + expected[i])) + failCount++; + } report("Split"); }