< prev index next >

src/java.base/share/classes/java/util/Scanner.java

Print this page
rev 12670 : 8072722: add stream support to Scanner
Reviewed-by: psandoz

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -23,20 +23,22 @@
  * questions.
  */
 
 package java.util;
 
-import java.nio.file.Path;
-import java.nio.file.Files;
-import java.util.regex.*;
 import java.io.*;
 import java.math.*;
 import java.nio.*;
 import java.nio.channels.*;
 import java.nio.charset.*;
+import java.nio.file.Path;
+import java.nio.file.Files;
 import java.text.*;
-import java.util.Locale;
+import java.util.function.Consumer;
+import java.util.regex.*;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
 
 import sun.misc.LRUCache;
 
 /**
  * A simple text scanner which can parse primitive types and strings using

@@ -94,26 +96,29 @@
  *         System.out.println(result.group(i));
  *     s.close();
  * }</pre></blockquote>
  *
  * <p>The <a name="default-delimiter">default whitespace delimiter</a> used
- * by a scanner is as recognized by {@link java.lang.Character}.{@link
- * java.lang.Character#isWhitespace(char) isWhitespace}. The {@link #reset}
+ * by a scanner is as recognized by {@link Character#isWhitespace(char)
+ * Character.isWhitespace()}. The {@link #reset reset()}
  * method will reset the value of the scanner's delimiter to the default
  * whitespace delimiter regardless of whether it was previously changed.
  *
  * <p>A scanning operation may block waiting for input.
  *
  * <p>The {@link #next} and {@link #hasNext} methods and their
- * primitive-type companion methods (such as {@link #nextInt} and
+ * companion methods (such as {@link #nextInt} and
  * {@link #hasNextInt}) first skip any input that matches the delimiter
- * pattern, and then attempt to return the next token. Both {@code hasNext}
- * and {@code next} methods may block waiting for further input.  Whether a
- * {@code hasNext} method blocks has no connection to whether or not its
- * associated {@code next} method will block.
- *
- * <p> The {@link #findInLine}, {@link #findWithinHorizon}, and {@link #skip}
+ * pattern, and then attempt to return the next token. Both {@code hasNext()}
+ * and {@code next()} methods may block waiting for further input.  Whether a
+ * {@code hasNext()} method blocks has no connection to whether or not its
+ * associated {@code next()} method will block. The {@link #tokens} method
+ * may also block waiting for input.
+ *
+ * <p>The {@link #findInLine findInLine()},
+ * {@link #findWithinHorizon findWithinHorizon()},
+ * {@link #skip skip()}, and {@link #findAll findAll()}
  * methods operate independently of the delimiter pattern. These methods will
  * attempt to match the specified pattern with no regard to delimiters in the
  * input and thus can be used in special circumstances where delimiters are
  * not relevant. These methods may block waiting for more input.
  *

@@ -127,11 +132,11 @@
  * pattern {@code "\\s"} could return empty tokens since it only passes one
  * space at a time.
  *
  * <p> A scanner can read text from any object which implements the {@link
  * java.lang.Readable} interface.  If an invocation of the underlying
- * readable's {@link java.lang.Readable#read} method throws an {@link
+ * readable's {@link java.lang.Readable#read read()} method throws an {@link
  * java.io.IOException} then the scanner assumes that the end of the input
  * has been reached.  The most recent {@code IOException} thrown by the
  * underlying readable can be retrieved via the {@link #ioException} method.
  *
  * <p>When a {@code Scanner} is closed, it will close its input source

@@ -154,11 +159,11 @@
  * <p> An instance of this class is capable of scanning numbers in the standard
  * formats as well as in the formats of the scanner's locale. A scanner's
  * <a name="initial-locale">initial locale </a>is the value returned by the {@link
  * java.util.Locale#getDefault(Locale.Category)
  * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link
- * #useLocale} method. The {@link #reset} method will reset the value of the
+ * #useLocale useLocale()} method. The {@link #reset} method will reset the value of the
  * scanner's locale to the initial locale regardless of whether it was
  * previously changed.
  *
  * <p>The localized formats are defined in terms of the following parameters,
  * which for a particular locale are taken from that locale's {@link

@@ -372,10 +377,15 @@
     };
 
     // A holder of the last IOException encountered
     private IOException lastException;
 
+    // Number of times this scanner's state has been modified.
+    // Generally incremented on most public APIs and checked
+    // within spliterator implementations.
+    int modCount;
+
     // A pattern for java whitespace
     private static Pattern WHITESPACE_PATTERN = Pattern.compile(
                                                 "\\p{javaWhitespace}+");
 
     // A pattern for any token

@@ -993,12 +1003,13 @@
         needInput = true;
         return null;
     }
 
     // Finds the specified pattern in the buffer up to horizon.
-    // Returns a match for the specified input pattern.
-    private String findPatternInBuffer(Pattern pattern, int horizon) {
+    // Returns true if the specified input pattern was matched,
+    // and leaves the matcher field with the current match state.
+    private boolean findPatternInBuffer(Pattern pattern, int horizon) {
         matchValid = false;
         matcher.usePattern(pattern);
         int bufferLimit = buf.limit();
         int horizonLimit = -1;
         int searchLimit = bufferLimit;

@@ -1012,58 +1023,59 @@
             if (matcher.hitEnd() && (!sourceClosed)) {
                 // The match may be longer if didn't hit horizon or real end
                 if (searchLimit != horizonLimit) {
                      // Hit an artificial end; try to extend the match
                     needInput = true;
-                    return null;
+                    return false;
                 }
                 // The match could go away depending on what is next
                 if ((searchLimit == horizonLimit) && matcher.requireEnd()) {
                     // Rare case: we hit the end of input and it happens
                     // that it is at the horizon and the end of input is
                     // required for the match.
                     needInput = true;
-                    return null;
+                    return false;
                 }
             }
             // Did not hit end, or hit real end, or hit horizon
             position = matcher.end();
-            return matcher.group();
+            return true;
         }
 
         if (sourceClosed)
-            return null;
+            return false;
 
         // If there is no specified horizon, or if we have not searched
         // to the specified horizon yet, get more input
         if ((horizon == 0) || (searchLimit != horizonLimit))
             needInput = true;
-        return null;
+        return false;
     }
 
-    // Returns a match for the specified input pattern anchored at
-    // the current position
-    private String matchPatternInBuffer(Pattern pattern) {
+    // Attempts to match a pattern anchored at the current position.
+    // Returns true if the specified input pattern was matched,
+    // and leaves the matcher field with the current match state.
+    private boolean matchPatternInBuffer(Pattern pattern) {
         matchValid = false;
         matcher.usePattern(pattern);
         matcher.region(position, buf.limit());
         if (matcher.lookingAt()) {
             if (matcher.hitEnd() && (!sourceClosed)) {
                 // Get more input and try again
                 needInput = true;
-                return null;
+                return false;
             }
             position = matcher.end();
-            return matcher.group();
+            return true;
         }
 
         if (sourceClosed)
-            return null;
+            return false;
 
         // Read more to find pattern
         needInput = true;
-        return null;
+        return false;
     }
 
     // Throws if the scanner is closed
     private void ensureOpen() {
         if (closed)

@@ -1126,10 +1138,11 @@
      *
      * @param pattern A delimiting pattern
      * @return this scanner
      */
     public Scanner useDelimiter(Pattern pattern) {
+        modCount++;
         delimPattern = pattern;
         return this;
     }
 
     /**

@@ -1145,10 +1158,11 @@
      *
      * @param pattern A string specifying a delimiting pattern
      * @return this scanner
      */
     public Scanner useDelimiter(String pattern) {
+        modCount++;
         delimPattern = patternCache.forName(pattern);
         return this;
     }
 
     /**

@@ -1179,10 +1193,11 @@
      */
     public Scanner useLocale(Locale locale) {
         if (locale.equals(this.locale))
             return this;
 
+        modCount++;
         this.locale = locale;
         DecimalFormat df =
             (DecimalFormat)NumberFormat.getNumberInstance(locale);
         DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
 

@@ -1234,12 +1249,12 @@
      *
      * <p>A scanner's radix affects elements of its default
      * number matching regular expressions; see
      * <a href= "#localized-numbers">localized numbers</a> above.
      *
-     * <p>If the radix is less than {@code Character.MIN_RADIX}
-     * or greater than {@code Character.MAX_RADIX}, then an
+     * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX}
+     * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an
      * {@code IllegalArgumentException} is thrown.
      *
      * <p>Invoking the {@link #reset} method will set the scanner's radix to
      * {@code 10}.
      *

@@ -1251,10 +1266,11 @@
         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX))
             throw new IllegalArgumentException("radix:"+radix);
 
         if (this.defaultRadix == radix)
             return this;
+        modCount++;
         this.defaultRadix = radix;
         // Force rebuilding and recompilation of radix dependent patterns
         integerPattern = null;
         return this;
     }

@@ -1273,19 +1289,19 @@
      * Returns the match result of the last scanning operation performed
      * by this scanner. This method throws {@code IllegalStateException}
      * if no match has been performed, or if the last match was
      * not successful.
      *
-     * <p>The various {@code next}methods of {@code Scanner}
+     * <p>The various {@code next} methods of {@code Scanner}
      * make a match result available if they complete without throwing an
      * exception. For instance, after an invocation of the {@link #nextInt}
      * method that returned an int, this method returns a
      * {@code MatchResult} for the search of the
      * <a href="#Integer-regex"><i>Integer</i></a> regular expression
-     * defined above. Similarly the {@link #findInLine},
-     * {@link #findWithinHorizon}, and {@link #skip} methods will make a
-     * match available if they succeed.
+     * defined above. Similarly the {@link #findInLine findInLine()},
+     * {@link #findWithinHorizon findWithinHorizon()}, and {@link #skip skip()}
+     * methods will make a match available if they succeed.
      *
      * @return a match result for the last match operation
      * @throws IllegalStateException  If no match result is available
      */
     public MatchResult match() {

@@ -1331,10 +1347,11 @@
      * @see java.util.Iterator
      */
     public boolean hasNext() {
         ensureOpen();
         saveState();
+        modCount++;
         while (!sourceClosed) {
             if (hasTokenInBuffer())
                 return revertState(true);
             readInput();
         }

@@ -1355,10 +1372,11 @@
      * @see java.util.Iterator
      */
     public String next() {
         ensureOpen();
         clearCaches();
+        modCount++;
 
         while (true) {
             String token = getCompleteTokenInBuffer(null);
             if (token != null) {
                 matchValid = true;

@@ -1433,10 +1451,11 @@
         ensureOpen();
         if (pattern == null)
             throw new NullPointerException();
         hasNextPattern = null;
         saveState();
+        modCount++;
 
         while (true) {
             if (getCompleteTokenInBuffer(pattern) != null) {
                 matchValid = true;
                 cacheResult();

@@ -1464,10 +1483,11 @@
     public String next(Pattern pattern) {
         ensureOpen();
         if (pattern == null)
             throw new NullPointerException();
 
+        modCount++;
         // Did we already find this pattern?
         if (hasNextPattern == pattern)
             return getCachedResult();
         clearCaches();
 

@@ -1495,10 +1515,11 @@
      * @throws IllegalStateException if this scanner is closed
      */
     public boolean hasNextLine() {
         saveState();
 
+        modCount++;
         String result = findWithinHorizon(linePattern(), 0);
         if (result != null) {
             MatchResult mr = this.match();
             String lineSep = mr.group(1);
             if (lineSep != null) {

@@ -1529,10 +1550,11 @@
      * @return the line that was skipped
      * @throws NoSuchElementException if no line was found
      * @throws IllegalStateException if this scanner is closed
      */
     public String nextLine() {
+        modCount++;
         if (hasNextPattern == linePattern())
             return getCachedResult();
         clearCaches();
 
         String result = findWithinHorizon(linePattern, 0);

@@ -1587,16 +1609,16 @@
     public String findInLine(Pattern pattern) {
         ensureOpen();
         if (pattern == null)
             throw new NullPointerException();
         clearCaches();
+        modCount++;
         // Expand buffer to include the next newline or end of input
         int endPosition = 0;
         saveState();
         while (true) {
-            String token = findPatternInBuffer(separatorPattern(), 0);
-            if (token != null) {
+            if (findPatternInBuffer(separatorPattern(), 0)) {
                 endPosition = matcher.start();
                 break; // up to next newline
             }
             if (needInput) {
                 readInput();

@@ -1621,11 +1643,11 @@
      * specified string, ignoring delimiters.
      *
      * <p>An invocation of this method of the form
      * {@code findWithinHorizon(pattern)} behaves in exactly the same way as
      * the invocation
-     * {@code findWithinHorizon(Pattern.compile(pattern, horizon))}.
+     * {@code findWithinHorizon(Pattern.compile(pattern), horizon)}.
      *
      * @param pattern a string specifying the pattern to search for
      * @param horizon the search horizon
      * @return the text that matched the specified pattern
      * @throws IllegalStateException if this scanner is closed

@@ -1671,17 +1693,17 @@
         if (pattern == null)
             throw new NullPointerException();
         if (horizon < 0)
             throw new IllegalArgumentException("horizon < 0");
         clearCaches();
+        modCount++;
 
         // Search for the pattern
         while (true) {
-            String token = findPatternInBuffer(pattern, horizon);
-            if (token != null) {
+            if (findPatternInBuffer(pattern, horizon)) {
                 matchValid = true;
-                return token;
+                return matcher.group();
             }
             if (needInput)
                 readInput();
             else
                 break; // up to end of input

@@ -1715,15 +1737,15 @@
     public Scanner skip(Pattern pattern) {
         ensureOpen();
         if (pattern == null)
             throw new NullPointerException();
         clearCaches();
+        modCount++;
 
         // Search for the pattern
         while (true) {
-            String token = matchPatternInBuffer(pattern);
-            if (token != null) {
+            if (matchPatternInBuffer(pattern)) {
                 matchValid = true;
                 position = matcher.end();
                 return this;
             }
             if (needInput)

@@ -1930,11 +1952,11 @@
     /**
      * Scans the next token of the input as a {@code short}.
      *
      * <p> An invocation of this method of the form
      * {@code nextShort()} behaves in exactly the same way as the
-     * invocation {@code nextShort(radix)}, where {@code radix}
+     * invocation {@link #nextShort(int) nextShort(radix)}, where {@code radix}
      * is the default radix of this scanner.
      *
      * @return the {@code short} scanned from the input
      * @throws InputMismatchException
      *         if the next token does not match the <i>Integer</i>

@@ -2588,12 +2610,14 @@
 
     /**
      * Resets this scanner.
      *
      * <p> Resetting a scanner discards all of its explicit state
-     * information which may have been changed by invocations of {@link
-     * #useDelimiter}, {@link #useLocale}, or {@link #useRadix}.
+     * information which may have been changed by invocations of
+     * {@link #useDelimiter useDelimiter()},
+     * {@link #useLocale useLocale()}, or
+     * {@link #useRadix useRadix()}.
      *
      * <p> An invocation of this method of the form
      * {@code scanner.reset()} behaves in exactly the same way as the
      * invocation
      *

@@ -2610,8 +2634,208 @@
     public Scanner reset() {
         delimPattern = WHITESPACE_PATTERN;
         useLocale(Locale.getDefault(Locale.Category.FORMAT));
         useRadix(10);
         clearCaches();
+        modCount++;
         return this;
     }
+
+    /**
+     * Returns a stream of delimiter-separated tokens from this scanner. The
+     * stream contains the same tokens that would be returned, starting from
+     * this scanner's current state, by calling the {@link #next} method
+     * repeatedly until the {@link #hasNext} returns false.
+     *
+     * <p>The resulting stream is sequential and ordered. All stream elements are
+     * non-null.
+     *
+     * <p>Scanning starts upon initiation of the terminal stream operation, using the
+     * current state of this scanner. Subsequent calls to any methods on this scanner
+     * other than {@link #close} and {@link #ioException} may return undefined results
+     * or may cause undefined effects on the returned stream. The returned stream's source
+     * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a
+     * {@link java.util.ConcurrentModificationException} if any such calls are detected
+     * during pipeline execution.
+     *
+     * <p>After pipeline execution completes, this scanner is left in an indeterminate
+     * state and cannot be reused.
+     *
+     * <p>If this scanner contains a resource that must be released, this scanner
+     * should be closed, either by calling its {@link #close} method, or by
+     * closing the returned stream. Closing the stream will close the underlying scanner.
+     * {@code IllegalStateException} is thrown if the scanner has been closed when this
+     * method is called, or if this scanner is closed during pipeline execution.
+     *
+     * <p>This method might block waiting for more input.
+     *
+     * @apiNote
+     * For example, the following code will create a list of
+     * comma-delimited tokens from a string:
+     *
+     * <pre>{@code
+     * List<String> result = new Scanner("abc,def,,ghi")
+     *     .useDelimiter(",")
+     *     .tokens()
+     *     .collect(Collectors.toList());
+     * }</pre>
+     *
+     * <p>The resulting list would contain {@code "abc"}, {@code "def"},
+     * the empty string, and {@code "ghi"}.
+     *
+     * @return a sequential stream of token strings
+     * @throws IllegalStateException if this scanner is closed
+     * @since 1.9
+     */
+    public Stream<String> tokens() {
+        ensureOpen();
+        Stream<String> stream = StreamSupport.stream(new TokenSpliterator(), false);
+        return stream.onClose(this::close);
+    }
+
+    class TokenSpliterator extends Spliterators.AbstractSpliterator<String> {
+        int expectedCount = -1;
+
+        TokenSpliterator() {
+            super(Long.MAX_VALUE,
+                  Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED);
+        }
+
+        @Override
+        public boolean tryAdvance(Consumer<? super String> cons) {
+            if (expectedCount >= 0 && expectedCount != modCount) {
+                throw new ConcurrentModificationException();
+            }
+
+            if (hasNext()) {
+                String token = next();
+                expectedCount = modCount;
+                cons.accept(token);
+                if (expectedCount != modCount) {
+                    throw new ConcurrentModificationException();
+                }
+                return true;
+            } else {
+                expectedCount = modCount;
+                return false;
+            }
+        }
+    }
+
+    /**
+     * Returns a stream of match results from this scanner. The stream
+     * contains the same results in the same order that would be returned by
+     * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match}
+     * successively as long as {@link #findWithinHorizon findWithinHorizon()}
+     * finds matches.
+     *
+     * <p>The resulting stream is sequential and ordered. All stream elements are
+     * non-null.
+     *
+     * <p>Scanning starts upon initiation of the terminal stream operation, using the
+     * current state of this scanner. Subsequent calls to any methods on this scanner
+     * other than {@link #close} and {@link #ioException} may return undefined results
+     * or may cause undefined effects on the returned stream. The returned stream's source
+     * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a
+     * {@link java.util.ConcurrentModificationException} if any such calls are detected
+     * during pipeline execution.
+     *
+     * <p>After pipeline execution completes, this scanner is left in an indeterminate
+     * state and cannot be reused.
+     *
+     * <p>If this scanner contains a resource that must be released, this scanner
+     * should be closed, either by calling its {@link #close} method, or by
+     * closing the returned stream. Closing the stream will close the underlying scanner.
+     * {@code IllegalStateException} is thrown if the scanner has been closed when this
+     * method is called, or if this scanner is closed during pipeline execution.
+     *
+     * <p>As with the {@link #findWithinHorizon findWithinHorizon()} methods, this method
+     * might block waiting for additional input, and it might buffer an unbounded amount of
+     * input searching for a match.
+     *
+     * @apiNote
+     * For example, the following code will read a file and return a list
+     * of all sequences of characters consisting of seven or more Latin capital
+     * letters:
+     *
+     * <pre>{@code
+     * try (Scanner sc = new Scanner(Paths.get("input.txt"))) {
+     *     Pattern pat = Pattern.compile("[A-Z]{7,}");
+     *     List<String> capWords = sc.findAll(pat)
+     *                               .map(MatchResult::group)
+     *                               .collect(Collectors.toList());
+     * }
+     * }</pre>
+     *
+     * @param pattern the pattern to be matched
+     * @return a sequential stream of match results
+     * @throws NullPointerException if pattern is null
+     * @throws IllegalStateException if this scanner is closed
+     * @since 1.9
+     */
+    public Stream<MatchResult> findAll(Pattern pattern) {
+        Objects.requireNonNull(pattern);
+        ensureOpen();
+        Stream<MatchResult> stream = StreamSupport.stream(new FindSpliterator(pattern), false);
+        return stream.onClose(this::close);
+    }
+
+    /**
+     * Returns a stream of match results that match the provided pattern string.
+     * The effect is equivalent to the following code:
+     *
+     * <pre>{@code
+     *     scanner.findAll(Pattern.compile(patString))
+     * }</pre>
+     *
+     * @param patString the pattern string
+     * @return a sequential stream of match results
+     * @throws NullPointerException if patString is null
+     * @throws IllegalStateException if this scanner is closed
+     * @throws PatternSyntaxException if the regular expression's syntax is invalid
+     * @since 1.9
+     * @see java.util.regex.Pattern
+     */
+    public Stream<MatchResult> findAll(String patString) {
+        Objects.requireNonNull(patString);
+        ensureOpen();
+        return findAll(patternCache.forName(patString));
+    }
+
+    class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> {
+        final Pattern pattern;
+        int expectedCount = -1;
+
+        FindSpliterator(Pattern pattern) {
+            super(Long.MAX_VALUE,
+                  Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED);
+            this.pattern = pattern;
+        }
+
+        @Override
+        public boolean tryAdvance(Consumer<? super MatchResult> cons) {
+            ensureOpen();
+            if (expectedCount >= 0) {
+                if (expectedCount != modCount) {
+                    throw new ConcurrentModificationException();
+                }
+            } else {
+                expectedCount = modCount;
+            }
+
+            while (true) {
+                // assert expectedCount == modCount
+                if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount
+                    cons.accept(matcher.toMatchResult());
+                    if (expectedCount != modCount) {
+                        throw new ConcurrentModificationException();
+                    }
+                    return true;
+                }
+                if (needInput)
+                    readInput(); // doesn't increment modCount
+                else
+                    return false; // reached end of input
+            }
+        }
+    }
 }
< prev index next >