src/share/classes/java/util/regex/Pattern.java
Print this page
@@ -1142,11 +1142,17 @@
* input sequence that is terminated by another subsequence that matches
* this pattern or is terminated by the end of the input sequence. The
* substrings in the array are in the order in which they occur in the
* input. If this pattern does not match any subsequence of the input then
* the resulting array has just one element, namely the input sequence in
- * string form.
+ * string form. A zero-length input sequence always results zero-length
+ * resulting array.
+ *
+ * <p> When there is a positive-width match at the beginning of the input
+ * sequence then an empty leading substring is included at the beginning
+ * of the resulting array. A zero-width match at the beginning however
+ * never produces such empty leading substring.
*
* <p> The <tt>limit</tt> parameter controls the number of times the
* pattern is applied and therefore affects the length of the resulting
* array. If the limit <i>n</i> is greater than zero then the pattern
* will be applied at most <i>n</i> - 1 times, the array's
@@ -1183,29 +1189,35 @@
* <tr><td align=center>o</td>
* <td align=center>0</td>
* <td><tt>{ "b", "", ":and:f" }</tt></td></tr>
* </table></blockquote>
*
- *
* @param input
* The character sequence to be split
*
* @param limit
* The result threshold, as described above
*
* @return The array of strings computed by splitting the input
* around matches of this pattern
*/
public String[] split(CharSequence input, int limit) {
+ if (input.length() == 0)
+ return new String[0];
int index = 0;
boolean matchLimited = limit > 0;
ArrayList<String> matchList = new ArrayList<>();
Matcher m = matcher(input);
// Add segments before each match found
while(m.find()) {
if (!matchLimited || matchList.size() < limit - 1) {
+ if (index == 0 && index == m.start() && m.start() == m.end()) {
+ // no empty leading substring included for zero-width match
+ // at the beginning of the input char sequence.
+ continue;
+ }
String match = input.subSequence(index, m.start()).toString();
matchList.add(match);
index = m.end();
} else if (matchList.size() == limit - 1) { // last one
String match = input.subSequence(index,
@@ -5760,10 +5772,17 @@
*
* <p> If this pattern does not match any subsequence of the input then
* the resulting stream has just one element, namely the input sequence in
* string form.
*
+ * <p> A zero-length input sequence always results an empty stream.
+ *
+ * <p> When there is a positive-width match at the beginning of the input
+ * sequence then an empty leading substring is included at the beginning
+ * of the stream. A zero-width match at the beginning however never produces
+ * such empty leading substring.
+ *
* <p> If the input sequence is mutable, it must remain constant during the
* execution of the terminal stream operation. Otherwise, the result of the
* terminal stream operation is undefined.
*
* @param input
@@ -5815,11 +5834,12 @@
while (matcher.find()) {
nextElement = input.subSequence(current, matcher.start()).toString();
current = matcher.end();
if (!nextElement.isEmpty()) {
return true;
- } else {
+ } else if (current > 0) { // no empty leading substring for zero-width
+ // match at the beginning of the input
emptyElementCount++;
}
}
// Consume last matching element