src/share/classes/java/util/regex/Pattern.java

Print this page

        

*** 1142,1152 **** * input sequence that is terminated by another subsequence that matches * this pattern or is terminated by the end of the input sequence. The * substrings in the array are in the order in which they occur in the * input. If this pattern does not match any subsequence of the input then * the resulting array has just one element, namely the input sequence in ! * string form. * * <p> The <tt>limit</tt> parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting * array. If the limit <i>n</i> is greater than zero then the pattern * will be applied at most <i>n</i>&nbsp;-&nbsp;1 times, the array's --- 1142,1158 ---- * input sequence that is terminated by another subsequence that matches * this pattern or is terminated by the end of the input sequence. The * substrings in the array are in the order in which they occur in the * input. If this pattern does not match any subsequence of the input then * the resulting array has just one element, namely the input sequence in ! * string form. A zero-length input sequence always results zero-length ! * resulting array. ! * ! * <p> When there is a positive-width match at the beginning of the input ! * sequence then an empty leading substring is included at the beginning ! * of the resulting array. A zero-width match at the beginning however ! * never produces such empty leading substring. * * <p> The <tt>limit</tt> parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting * array. If the limit <i>n</i> is greater than zero then the pattern * will be applied at most <i>n</i>&nbsp;-&nbsp;1 times, the array's
*** 1183,1211 **** * <tr><td align=center>o</td> * <td align=center>0</td> * <td><tt>{ "b", "", ":and:f" }</tt></td></tr> * </table></blockquote> * - * * @param input * The character sequence to be split * * @param limit * The result threshold, as described above * * @return The array of strings computed by splitting the input * around matches of this pattern */ public String[] split(CharSequence input, int limit) { int index = 0; boolean matchLimited = limit > 0; ArrayList<String> matchList = new ArrayList<>(); Matcher m = matcher(input); // Add segments before each match found while(m.find()) { if (!matchLimited || matchList.size() < limit - 1) { String match = input.subSequence(index, m.start()).toString(); matchList.add(match); index = m.end(); } else if (matchList.size() == limit - 1) { // last one String match = input.subSequence(index, --- 1189,1223 ---- * <tr><td align=center>o</td> * <td align=center>0</td> * <td><tt>{ "b", "", ":and:f" }</tt></td></tr> * </table></blockquote> * * @param input * The character sequence to be split * * @param limit * The result threshold, as described above * * @return The array of strings computed by splitting the input * around matches of this pattern */ public String[] split(CharSequence input, int limit) { + if (input.length() == 0) + return new String[0]; int index = 0; boolean matchLimited = limit > 0; ArrayList<String> matchList = new ArrayList<>(); Matcher m = matcher(input); // Add segments before each match found while(m.find()) { if (!matchLimited || matchList.size() < limit - 1) { + if (index == 0 && index == m.start() && m.start() == m.end()) { + // no empty leading substring included for zero-width match + // at the beginning of the input char sequence. + continue; + } String match = input.subSequence(index, m.start()).toString(); matchList.add(match); index = m.end(); } else if (matchList.size() == limit - 1) { // last one String match = input.subSequence(index,
*** 5760,5769 **** --- 5772,5788 ---- * * <p> If this pattern does not match any subsequence of the input then * the resulting stream has just one element, namely the input sequence in * string form. * + * <p> A zero-length input sequence always results an empty stream. + * + * <p> When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the stream. A zero-width match at the beginning however never produces + * such empty leading substring. + * * <p> If the input sequence is mutable, it must remain constant during the * execution of the terminal stream operation. Otherwise, the result of the * terminal stream operation is undefined. * * @param input
*** 5815,5825 **** while (matcher.find()) { nextElement = input.subSequence(current, matcher.start()).toString(); current = matcher.end(); if (!nextElement.isEmpty()) { return true; ! } else { emptyElementCount++; } } // Consume last matching element --- 5834,5845 ---- while (matcher.find()) { nextElement = input.subSequence(current, matcher.start()).toString(); current = matcher.end(); if (!nextElement.isEmpty()) { return true; ! } else if (current > 0) { // no empty leading substring for zero-width ! // match at the beginning of the input emptyElementCount++; } } // Consume last matching element