--- old/src/share/classes/java/util/regex/Pattern.java 2013-11-11 14:41:11.000000000 -0800 +++ new/src/share/classes/java/util/regex/Pattern.java 2013-11-11 14:41:10.000000000 -0800 @@ -1142,9 +1142,15 @@ * input sequence that is terminated by another subsequence that matches * this pattern or is terminated by the end of the input sequence. The * substrings in the array are in the order in which they occur in the - * input. If this pattern does not match any subsequence of the input then + * input. If this pattern does not match any subsequence of the input then * the resulting array has just one element, namely the input sequence in - * string form. + * string form. A zero-length input sequence always results zero-length + * resulting array. + * + *

When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the resulting array. A zero-width match at the beginning however + * never produces such empty leading substring. * *

The limit parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting @@ -1185,7 +1191,6 @@ * { "b", "", ":and:f" } * * - * * @param input * The character sequence to be split * @@ -1196,6 +1201,8 @@ * around matches of this pattern */ public String[] split(CharSequence input, int limit) { + if (input.length() == 0) + return new String[0]; int index = 0; boolean matchLimited = limit > 0; ArrayList matchList = new ArrayList<>(); @@ -1204,6 +1211,11 @@ // Add segments before each match found while(m.find()) { if (!matchLimited || matchList.size() < limit - 1) { + if (index == 0 && index == m.start() && m.start() == m.end()) { + // no empty leading substring included for zero-width match + // at the beginning of the input char sequence. + continue; + } String match = input.subSequence(index, m.start()).toString(); matchList.add(match); index = m.end(); @@ -5762,6 +5774,13 @@ * the resulting stream has just one element, namely the input sequence in * string form. * + *

A zero-length input sequence always results an empty stream. + * + *

When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the stream. A zero-width match at the beginning however never produces + * such empty leading substring. + * *

If the input sequence is mutable, it must remain constant during the * execution of the terminal stream operation. Otherwise, the result of the * terminal stream operation is undefined. @@ -5817,7 +5836,8 @@ current = matcher.end(); if (!nextElement.isEmpty()) { return true; - } else { + } else if (current > 0) { // no empty leading substring for zero-width + // match at the beginning of the input emptyElementCount++; } }