src/share/classes/java/util/regex/Pattern.java

Print this page

        

@@ -1142,11 +1142,17 @@
      * input sequence that is terminated by another subsequence that matches
      * this pattern or is terminated by the end of the input sequence.  The
      * substrings in the array are in the order in which they occur in the
      * input.  If this pattern does not match any subsequence of the input then
      * the resulting array has just one element, namely the input sequence in
-     * string form.
+     * string form. A zero-length input sequence always results zero-length
+     * resulting array.
+     *
+     * <p> When there is a positive-width match at the beginning of the input
+     * sequence then an empty leading substring is included at the beginning
+     * of the resulting array. A zero-width match at the beginning however
+     * never produces such empty leading substring.
      *
      * <p> The <tt>limit</tt> parameter controls the number of times the
      * pattern is applied and therefore affects the length of the resulting
      * array.  If the limit <i>n</i> is greater than zero then the pattern
      * will be applied at most <i>n</i>&nbsp;-&nbsp;1 times, the array's

@@ -1183,29 +1189,35 @@
      * <tr><td align=center>o</td>
      *     <td align=center>0</td>
      *     <td><tt>{ "b", "", ":and:f" }</tt></td></tr>
      * </table></blockquote>
      *
-     *
      * @param  input
      *         The character sequence to be split
      *
      * @param  limit
      *         The result threshold, as described above
      *
      * @return  The array of strings computed by splitting the input
      *          around matches of this pattern
      */
     public String[] split(CharSequence input, int limit) {
+        if (input.length() == 0)
+            return new String[0];
         int index = 0;
         boolean matchLimited = limit > 0;
         ArrayList<String> matchList = new ArrayList<>();
         Matcher m = matcher(input);
 
         // Add segments before each match found
         while(m.find()) {
             if (!matchLimited || matchList.size() < limit - 1) {
+                if (index == 0 && index == m.start() && m.start() == m.end()) {
+                    // no empty leading substring included for zero-width match
+                    // at the beginning of the input char sequence.
+                    continue;
+                }
                 String match = input.subSequence(index, m.start()).toString();
                 matchList.add(match);
                 index = m.end();
             } else if (matchList.size() == limit - 1) { // last one
                 String match = input.subSequence(index,

@@ -5760,10 +5772,17 @@
      *
      * <p> If this pattern does not match any subsequence of the input then
      * the resulting stream has just one element, namely the input sequence in
      * string form.
      *
+     * <p> A zero-length input sequence always results an empty stream.
+     *
+     * <p> When there is a positive-width match at the beginning of the input
+     * sequence then an empty leading substring is included at the beginning
+     * of the stream. A zero-width match at the beginning however never produces
+     * such empty leading substring.
+     *
      * <p> If the input sequence is mutable, it must remain constant during the
      * execution of the terminal stream operation.  Otherwise, the result of the
      * terminal stream operation is undefined.
      *
      * @param   input

@@ -5815,11 +5834,12 @@
                 while (matcher.find()) {
                     nextElement = input.subSequence(current, matcher.start()).toString();
                     current = matcher.end();
                     if (!nextElement.isEmpty()) {
                         return true;
-                    } else {
+                    } else if (current > 0) { // no empty leading substring for zero-width
+                                              // match at the beginning of the input
                         emptyElementCount++;
                     }
                 }
 
                 // Consume last matching element