< prev index next >

src/java.base/share/classes/java/util/Scanner.java

Print this page
rev 12497 : 8072722: add stream support to Scanner
Reviewed-by: XXX

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -23,20 +23,22 @@
  * questions.
  */
 
 package java.util;
 
-import java.nio.file.Path;
-import java.nio.file.Files;
-import java.util.regex.*;
 import java.io.*;
 import java.math.*;
 import java.nio.*;
 import java.nio.channels.*;
 import java.nio.charset.*;
+import java.nio.file.Path;
+import java.nio.file.Files;
 import java.text.*;
-import java.util.Locale;
+import java.util.function.Consumer;
+import java.util.regex.*;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
 
 import sun.misc.LRUCache;
 
 /**
  * A simple text scanner which can parse primitive types and strings using

@@ -993,12 +995,13 @@
         needInput = true;
         return null;
     }
 
     // Finds the specified pattern in the buffer up to horizon.
-    // Returns a match for the specified input pattern.
-    private String findPatternInBuffer(Pattern pattern, int horizon) {
+    // Returns true if the specified input pattern was matched,
+    // and leaves the matcher field with the current match state.
+    private boolean findPatternInBuffer(Pattern pattern, int horizon) {
         matchValid = false;
         matcher.usePattern(pattern);
         int bufferLimit = buf.limit();
         int horizonLimit = -1;
         int searchLimit = bufferLimit;

@@ -1012,58 +1015,59 @@
             if (matcher.hitEnd() && (!sourceClosed)) {
                 // The match may be longer if didn't hit horizon or real end
                 if (searchLimit != horizonLimit) {
                      // Hit an artificial end; try to extend the match
                     needInput = true;
-                    return null;
+                    return false;
                 }
                 // The match could go away depending on what is next
                 if ((searchLimit == horizonLimit) && matcher.requireEnd()) {
                     // Rare case: we hit the end of input and it happens
                     // that it is at the horizon and the end of input is
                     // required for the match.
                     needInput = true;
-                    return null;
+                    return false;
                 }
             }
             // Did not hit end, or hit real end, or hit horizon
             position = matcher.end();
-            return matcher.group();
+            return true;
         }
 
         if (sourceClosed)
-            return null;
+            return false;
 
         // If there is no specified horizon, or if we have not searched
         // to the specified horizon yet, get more input
         if ((horizon == 0) || (searchLimit != horizonLimit))
             needInput = true;
-        return null;
+        return false;
     }
 
-    // Returns a match for the specified input pattern anchored at
-    // the current position
-    private String matchPatternInBuffer(Pattern pattern) {
+    // Attempts to match a pattern anchored at the current position.
+    // Returns true if the specified input pattern was matched,
+    // and leaves the matcher field with the current match state.
+    private boolean matchPatternInBuffer(Pattern pattern) {
         matchValid = false;
         matcher.usePattern(pattern);
         matcher.region(position, buf.limit());
         if (matcher.lookingAt()) {
             if (matcher.hitEnd() && (!sourceClosed)) {
                 // Get more input and try again
                 needInput = true;
-                return null;
+                return false;
             }
             position = matcher.end();
-            return matcher.group();
+            return true;
         }
 
         if (sourceClosed)
-            return null;
+            return false;
 
         // Read more to find pattern
         needInput = true;
-        return null;
+        return false;
     }
 
     // Throws if the scanner is closed
     private void ensureOpen() {
         if (closed)

@@ -1273,20 +1277,30 @@
      * Returns the match result of the last scanning operation performed
      * by this scanner. This method throws <code>IllegalStateException</code>
      * if no match has been performed, or if the last match was
      * not successful.
      *
-     * <p>The various <code>next</code>methods of <code>Scanner</code>
+     * <p>The various <code>next</code> methods of <code>Scanner</code>
      * make a match result available if they complete without throwing an
      * exception. For instance, after an invocation of the {@link #nextInt}
      * method that returned an int, this method returns a
      * <code>MatchResult</code> for the search of the
      * <a href="#Integer-regex"><i>Integer</i></a> regular expression
      * defined above. Similarly the {@link #findInLine},
      * {@link #findWithinHorizon}, and {@link #skip} methods will make a
      * match available if they succeed.
      *
+     * @apiNote
+     * <p>The offset values reported by the {@link MatchResult#start()},
+     * {@link MatchResult#start(int)}, {@link MatchResult#end()}, and
+     * {@link MatchResult#end(int)} methods are generally not useful, as they
+     * are offsets within the Scanner's internal buffer, which might contain
+     * an arbitrary portion of the input. The returned {@link MatchResult} is
+     * useful for extracting portions of the match using capturing groups,
+     * via the {@link MatchResult#group()}, {@link MatchResult#group(int)},
+     * and {@link MatchResult#groupCount()} methods.
+     *
      * @return a match result for the last match operation
      * @throws IllegalStateException  If no match result is available
      */
     public MatchResult match() {
         if (!matchValid)

@@ -1591,12 +1605,11 @@
         clearCaches();
         // Expand buffer to include the next newline or end of input
         int endPosition = 0;
         saveState();
         while (true) {
-            String token = findPatternInBuffer(separatorPattern(), 0);
-            if (token != null) {
+            if (findPatternInBuffer(separatorPattern(), 0)) {
                 endPosition = matcher.start();
                 break; // up to next newline
             }
             if (needInput) {
                 readInput();

@@ -1621,11 +1634,11 @@
      * specified string, ignoring delimiters.
      *
      * <p>An invocation of this method of the form
      * <tt>findWithinHorizon(pattern)</tt> behaves in exactly the same way as
      * the invocation
-     * <tt>findWithinHorizon(Pattern.compile(pattern, horizon))</tt>.
+     * <tt>findWithinHorizon(Pattern.compile(pattern), horizon)</tt>.
      *
      * @param pattern a string specifying the pattern to search for
      * @param horizon the search horizon
      * @return the text that matched the specified pattern
      * @throws IllegalStateException if this scanner is closed

@@ -1674,14 +1687,13 @@
             throw new IllegalArgumentException("horizon < 0");
         clearCaches();
 
         // Search for the pattern
         while (true) {
-            String token = findPatternInBuffer(pattern, horizon);
-            if (token != null) {
+            if (findPatternInBuffer(pattern, horizon)) {
                 matchValid = true;
-                return token;
+                return matcher.group();
             }
             if (needInput)
                 readInput();
             else
                 break; // up to end of input

@@ -1718,12 +1730,11 @@
             throw new NullPointerException();
         clearCaches();
 
         // Search for the pattern
         while (true) {
-            String token = matchPatternInBuffer(pattern);
-            if (token != null) {
+            if (matchPatternInBuffer(pattern)) {
                 matchValid = true;
                 position = matcher.end();
                 return this;
             }
             if (needInput)

@@ -2612,6 +2623,149 @@
         useLocale(Locale.getDefault(Locale.Category.FORMAT));
         useRadix(10);
         clearCaches();
         return this;
     }
+
+    /**
+     * Returns a stream of delimiter-separated tokens from this scanner. The
+     * stream contains the same tokens that would be returned, starting from
+     * this scanner's current state, by calling the {@link #next} method
+     * repeatedly until the {@link #hasNext} returns false.
+     *
+     * <p>The resulting stream is ordered, and all stream elements are
+     * non-null. Closing the stream will close the underlying scanner.
+     *
+     * <p>After the {@code tokens()} method has been called, the scanner
+     * should be considered to be under the control of the returned stream
+     * object.  Subsequent calls to any methods on this scanner other than
+     * {@link #close} and {@link #ioException} may return undefined results or
+     * may cause undefined effects on the returned stream.
+     * 
+     * <p>If this scanner contains a resource that must be released, this scanner
+     * should be closed, either by calling its {@link #close} method, or by
+     * closing the returned stream.  After the scanner has been closed, the
+     * results of operating on the returned stream are undefined.
+     *
+     * <p>For example, the following code will create a list of
+     * comma-delimited tokens from a string:
+     *
+     * <pre>{@code
+     * List<String> result = new Scanner("abc,def,,ghi").useDelimiter(",").
+     *     .tokens().collect(Collectors.toList());
+     * }</pre>
+     *
+     * <p>The resulting list would contain {@code "abc"}, {@code "def"},
+     * the empty string, and {@code "ghi"}.
+     *
+     * @return the stream of token strings
+     * @throws IllegalStateException if this scanner is closed
+     * @since 1.9
+     */
+    public Stream<String> tokens() {
+        ensureOpen();
+        Stream<String> stream = StreamSupport.stream(
+            Spliterators.spliteratorUnknownSize(this,
+                Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED), false);
+        return stream.onClose(this::close);
+    }
+
+    /**
+     * Returns a stream of match results from this scanner. The stream
+     * contains the same results in the same order that would be returned by
+     * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match}
+     * repeatedly as long as {@link #findWithinHorizon} finds matches.
+     *
+     * <p>The resulting stream is ordered, and all stream elements are
+     * non-null. Closing the stream will close the underlying scanner.
+     *
+     * <p>After the {@code findAll()} method has been called, the scanner
+     * should be considered to be under the control of the returned stream
+     * object.  Subsequent calls to any methods on this scanner other than
+     * {@link #close} and {@link #ioException} may return undefined results or
+     * may cause undefined effects on the returned stream.
+     * 
+     * <p>If this scanner contains a resource that must be released, this scanner
+     * should be closed, either by calling its {@link #close} method, or by
+     * closing the returned stream.  After the scanner has been closed, the
+     * results of operating on the returned stream are undefined.
+     *
+     * <p>For example, the following code will read a file and return a list
+     * of all sequences of characters consisting of seven or more Latin capital
+     * letters:
+     *
+     * <pre>{@code
+     * try (Scanner sc = new Scanner(Paths.get("input.txt"))) {
+     *     Pattern pat = Pattern.compile("[A-Z]{7,}");
+     *     List<String> capWords = sc.findAll(pat)
+     *                               .map(MatchResult::group)
+     *                               .collect(Collectors.toList());
+     * }
+     * }</pre>
+     *
+     * @apiNote
+     * As with the {@link #findWithinHorizon} methods, this method might block
+     * waiting for additional input, and it may buffer an unbounded amount of input
+     * searching for a match. The offset values in the {@link MatchResult} stream
+     * elements might not be useful; see the API Note for the {@link #match} method.
+     *
+     * @param pattern the pattern to be matched
+     * @return a stream of match results
+     * @throws NullPointerException if pattern is null
+     * @throws IllegalStateException if this scanner is closed
+     * @since 1.9
+     */
+    public Stream<MatchResult> findAll(Pattern pattern) {
+        Objects.requireNonNull(pattern);
+        ensureOpen();
+        Stream<MatchResult> stream = StreamSupport.stream(new ScanSpliterator(pattern), false);
+        return stream.onClose(this::close);
+    }
+
+    /**
+     * Returns a stream of match results that match the provided pattern string.
+     * The effect is the same as the code:
+     *
+     * <pre>{@code
+     *     scanner.findAll(Pattern.compile(patString))
+     * }</pre>
+     *
+     * @param patString the pattern string
+     * @return a stream of match results
+     * @throws NullPointerException if patString is null
+     * @throws IllegalStateException if this scanner is closed
+     * @throws PatternSyntaxException if the regular expression's syntax is invalid
+     * @since 1.9
+     * @see java.util.regex.Pattern
+     */
+    public Stream<MatchResult> findAll(String patString) {
+        Objects.requireNonNull(patString);
+        ensureOpen();
+        return findAll(patternCache.forName(patString));
+    }
+
+
+    class ScanSpliterator extends Spliterators.AbstractSpliterator<MatchResult> {
+        final Pattern pattern;
+
+        ScanSpliterator(Pattern pattern) {
+            super(Long.MAX_VALUE,
+                  Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED);
+            this.pattern = pattern;
+        }
+
+        @Override
+        public boolean tryAdvance(Consumer<? super MatchResult> cons) {
+            while (true) {
+                if (findPatternInBuffer(pattern, 0)) {
+                    cons.accept(matcher.toMatchResult());
+                    return true;
+                }
+                if (needInput)
+                    readInput();
+                else
+                    return false; // reached end of input
+            }
+        }
+    }
+
 }
< prev index next >