src/share/classes/java/util/regex/Pattern.java
Print this page
*** 1142,1152 ****
* input sequence that is terminated by another subsequence that matches
* this pattern or is terminated by the end of the input sequence. The
* substrings in the array are in the order in which they occur in the
* input. If this pattern does not match any subsequence of the input then
* the resulting array has just one element, namely the input sequence in
! * string form.
*
* <p> The <tt>limit</tt> parameter controls the number of times the
* pattern is applied and therefore affects the length of the resulting
* array. If the limit <i>n</i> is greater than zero then the pattern
* will be applied at most <i>n</i> - 1 times, the array's
--- 1142,1158 ----
* input sequence that is terminated by another subsequence that matches
* this pattern or is terminated by the end of the input sequence. The
* substrings in the array are in the order in which they occur in the
* input. If this pattern does not match any subsequence of the input then
* the resulting array has just one element, namely the input sequence in
! * string form. A zero-length input sequence always results zero-length
! * resulting array.
! *
! * <p> When there is a positive-width match at the beginning of the input
! * sequence then an empty leading substring is included at the beginning
! * of the resulting array. A zero-width match at the beginning however
! * never produces such empty leading substring.
*
* <p> The <tt>limit</tt> parameter controls the number of times the
* pattern is applied and therefore affects the length of the resulting
* array. If the limit <i>n</i> is greater than zero then the pattern
* will be applied at most <i>n</i> - 1 times, the array's
*** 1183,1211 ****
* <tr><td align=center>o</td>
* <td align=center>0</td>
* <td><tt>{ "b", "", ":and:f" }</tt></td></tr>
* </table></blockquote>
*
- *
* @param input
* The character sequence to be split
*
* @param limit
* The result threshold, as described above
*
* @return The array of strings computed by splitting the input
* around matches of this pattern
*/
public String[] split(CharSequence input, int limit) {
int index = 0;
boolean matchLimited = limit > 0;
ArrayList<String> matchList = new ArrayList<>();
Matcher m = matcher(input);
// Add segments before each match found
while(m.find()) {
if (!matchLimited || matchList.size() < limit - 1) {
String match = input.subSequence(index, m.start()).toString();
matchList.add(match);
index = m.end();
} else if (matchList.size() == limit - 1) { // last one
String match = input.subSequence(index,
--- 1189,1223 ----
* <tr><td align=center>o</td>
* <td align=center>0</td>
* <td><tt>{ "b", "", ":and:f" }</tt></td></tr>
* </table></blockquote>
*
* @param input
* The character sequence to be split
*
* @param limit
* The result threshold, as described above
*
* @return The array of strings computed by splitting the input
* around matches of this pattern
*/
public String[] split(CharSequence input, int limit) {
+ if (input.length() == 0)
+ return new String[0];
int index = 0;
boolean matchLimited = limit > 0;
ArrayList<String> matchList = new ArrayList<>();
Matcher m = matcher(input);
// Add segments before each match found
while(m.find()) {
if (!matchLimited || matchList.size() < limit - 1) {
+ if (index == 0 && index == m.start() && m.start() == m.end()) {
+ // no empty leading substring included for zero-width match
+ // at the beginning of the input char sequence.
+ continue;
+ }
String match = input.subSequence(index, m.start()).toString();
matchList.add(match);
index = m.end();
} else if (matchList.size() == limit - 1) { // last one
String match = input.subSequence(index,
*** 5760,5769 ****
--- 5772,5788 ----
*
* <p> If this pattern does not match any subsequence of the input then
* the resulting stream has just one element, namely the input sequence in
* string form.
*
+ * <p> A zero-length input sequence always results an empty stream.
+ *
+ * <p> When there is a positive-width match at the beginning of the input
+ * sequence then an empty leading substring is included at the beginning
+ * of the stream. A zero-width match at the beginning however never produces
+ * such empty leading substring.
+ *
* <p> If the input sequence is mutable, it must remain constant during the
* execution of the terminal stream operation. Otherwise, the result of the
* terminal stream operation is undefined.
*
* @param input
*** 5815,5825 ****
while (matcher.find()) {
nextElement = input.subSequence(current, matcher.start()).toString();
current = matcher.end();
if (!nextElement.isEmpty()) {
return true;
! } else {
emptyElementCount++;
}
}
// Consume last matching element
--- 5834,5845 ----
while (matcher.find()) {
nextElement = input.subSequence(current, matcher.start()).toString();
current = matcher.end();
if (!nextElement.isEmpty()) {
return true;
! } else if (current > 0) { // no empty leading substring for zero-width
! // match at the beginning of the input
emptyElementCount++;
}
}
// Consume last matching element