< prev index next >
src/java.base/share/classes/java/util/Scanner.java
Print this page
rev 12670 : 8072722: add stream support to Scanner
Reviewed-by: psandoz, chegar
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -23,20 +23,22 @@
* questions.
*/
package java.util;
-import java.nio.file.Path;
-import java.nio.file.Files;
-import java.util.regex.*;
import java.io.*;
import java.math.*;
import java.nio.*;
import java.nio.channels.*;
import java.nio.charset.*;
+import java.nio.file.Path;
+import java.nio.file.Files;
import java.text.*;
-import java.util.Locale;
+import java.util.function.Consumer;
+import java.util.regex.*;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
import sun.misc.LRUCache;
/**
* A simple text scanner which can parse primitive types and strings using
@@ -94,26 +96,29 @@
* System.out.println(result.group(i));
* s.close();
* }</pre></blockquote>
*
* <p>The <a name="default-delimiter">default whitespace delimiter</a> used
- * by a scanner is as recognized by {@link java.lang.Character}.{@link
- * java.lang.Character#isWhitespace(char) isWhitespace}. The {@link #reset}
+ * by a scanner is as recognized by {@link Character#isWhitespace(char)
+ * Character.isWhitespace()}. The {@link #reset reset()}
* method will reset the value of the scanner's delimiter to the default
* whitespace delimiter regardless of whether it was previously changed.
*
* <p>A scanning operation may block waiting for input.
*
* <p>The {@link #next} and {@link #hasNext} methods and their
- * primitive-type companion methods (such as {@link #nextInt} and
+ * companion methods (such as {@link #nextInt} and
* {@link #hasNextInt}) first skip any input that matches the delimiter
- * pattern, and then attempt to return the next token. Both {@code hasNext}
- * and {@code next} methods may block waiting for further input. Whether a
- * {@code hasNext} method blocks has no connection to whether or not its
- * associated {@code next} method will block.
- *
- * <p> The {@link #findInLine}, {@link #findWithinHorizon}, and {@link #skip}
+ * pattern, and then attempt to return the next token. Both {@code hasNext()}
+ * and {@code next()} methods may block waiting for further input. Whether a
+ * {@code hasNext()} method blocks has no connection to whether or not its
+ * associated {@code next()} method will block. The {@link #tokens} method
+ * may also block waiting for input.
+ *
+ * <p>The {@link #findInLine findInLine()},
+ * {@link #findWithinHorizon findWithinHorizon()},
+ * {@link #skip skip()}, and {@link #findAll findAll()}
* methods operate independently of the delimiter pattern. These methods will
* attempt to match the specified pattern with no regard to delimiters in the
* input and thus can be used in special circumstances where delimiters are
* not relevant. These methods may block waiting for more input.
*
@@ -127,11 +132,11 @@
* pattern {@code "\\s"} could return empty tokens since it only passes one
* space at a time.
*
* <p> A scanner can read text from any object which implements the {@link
* java.lang.Readable} interface. If an invocation of the underlying
- * readable's {@link java.lang.Readable#read} method throws an {@link
+ * readable's {@link java.lang.Readable#read read()} method throws an {@link
* java.io.IOException} then the scanner assumes that the end of the input
* has been reached. The most recent {@code IOException} thrown by the
* underlying readable can be retrieved via the {@link #ioException} method.
*
* <p>When a {@code Scanner} is closed, it will close its input source
@@ -154,11 +159,11 @@
* <p> An instance of this class is capable of scanning numbers in the standard
* formats as well as in the formats of the scanner's locale. A scanner's
* <a name="initial-locale">initial locale </a>is the value returned by the {@link
* java.util.Locale#getDefault(Locale.Category)
* Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link
- * #useLocale} method. The {@link #reset} method will reset the value of the
+ * #useLocale useLocale()} method. The {@link #reset} method will reset the value of the
* scanner's locale to the initial locale regardless of whether it was
* previously changed.
*
* <p>The localized formats are defined in terms of the following parameters,
* which for a particular locale are taken from that locale's {@link
@@ -372,10 +377,15 @@
};
// A holder of the last IOException encountered
private IOException lastException;
+ // Number of times this scanner's state has been modified.
+ // Generally incremented on most public APIs and checked
+ // within spliterator implementations.
+ int modCount;
+
// A pattern for java whitespace
private static Pattern WHITESPACE_PATTERN = Pattern.compile(
"\\p{javaWhitespace}+");
// A pattern for any token
@@ -993,12 +1003,13 @@
needInput = true;
return null;
}
// Finds the specified pattern in the buffer up to horizon.
- // Returns a match for the specified input pattern.
- private String findPatternInBuffer(Pattern pattern, int horizon) {
+ // Returns true if the specified input pattern was matched,
+ // and leaves the matcher field with the current match state.
+ private boolean findPatternInBuffer(Pattern pattern, int horizon) {
matchValid = false;
matcher.usePattern(pattern);
int bufferLimit = buf.limit();
int horizonLimit = -1;
int searchLimit = bufferLimit;
@@ -1012,58 +1023,59 @@
if (matcher.hitEnd() && (!sourceClosed)) {
// The match may be longer if didn't hit horizon or real end
if (searchLimit != horizonLimit) {
// Hit an artificial end; try to extend the match
needInput = true;
- return null;
+ return false;
}
// The match could go away depending on what is next
if ((searchLimit == horizonLimit) && matcher.requireEnd()) {
// Rare case: we hit the end of input and it happens
// that it is at the horizon and the end of input is
// required for the match.
needInput = true;
- return null;
+ return false;
}
}
// Did not hit end, or hit real end, or hit horizon
position = matcher.end();
- return matcher.group();
+ return true;
}
if (sourceClosed)
- return null;
+ return false;
// If there is no specified horizon, or if we have not searched
// to the specified horizon yet, get more input
if ((horizon == 0) || (searchLimit != horizonLimit))
needInput = true;
- return null;
+ return false;
}
- // Returns a match for the specified input pattern anchored at
- // the current position
- private String matchPatternInBuffer(Pattern pattern) {
+ // Attempts to match a pattern anchored at the current position.
+ // Returns true if the specified input pattern was matched,
+ // and leaves the matcher field with the current match state.
+ private boolean matchPatternInBuffer(Pattern pattern) {
matchValid = false;
matcher.usePattern(pattern);
matcher.region(position, buf.limit());
if (matcher.lookingAt()) {
if (matcher.hitEnd() && (!sourceClosed)) {
// Get more input and try again
needInput = true;
- return null;
+ return false;
}
position = matcher.end();
- return matcher.group();
+ return true;
}
if (sourceClosed)
- return null;
+ return false;
// Read more to find pattern
needInput = true;
- return null;
+ return false;
}
// Throws if the scanner is closed
private void ensureOpen() {
if (closed)
@@ -1126,10 +1138,11 @@
*
* @param pattern A delimiting pattern
* @return this scanner
*/
public Scanner useDelimiter(Pattern pattern) {
+ modCount++;
delimPattern = pattern;
return this;
}
/**
@@ -1145,10 +1158,11 @@
*
* @param pattern A string specifying a delimiting pattern
* @return this scanner
*/
public Scanner useDelimiter(String pattern) {
+ modCount++;
delimPattern = patternCache.forName(pattern);
return this;
}
/**
@@ -1179,10 +1193,11 @@
*/
public Scanner useLocale(Locale locale) {
if (locale.equals(this.locale))
return this;
+ modCount++;
this.locale = locale;
DecimalFormat df =
(DecimalFormat)NumberFormat.getNumberInstance(locale);
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
@@ -1234,12 +1249,12 @@
*
* <p>A scanner's radix affects elements of its default
* number matching regular expressions; see
* <a href= "#localized-numbers">localized numbers</a> above.
*
- * <p>If the radix is less than {@code Character.MIN_RADIX}
- * or greater than {@code Character.MAX_RADIX}, then an
+ * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX}
+ * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an
* {@code IllegalArgumentException} is thrown.
*
* <p>Invoking the {@link #reset} method will set the scanner's radix to
* {@code 10}.
*
@@ -1251,10 +1266,11 @@
if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX))
throw new IllegalArgumentException("radix:"+radix);
if (this.defaultRadix == radix)
return this;
+ modCount++;
this.defaultRadix = radix;
// Force rebuilding and recompilation of radix dependent patterns
integerPattern = null;
return this;
}
@@ -1273,19 +1289,19 @@
* Returns the match result of the last scanning operation performed
* by this scanner. This method throws {@code IllegalStateException}
* if no match has been performed, or if the last match was
* not successful.
*
- * <p>The various {@code next}methods of {@code Scanner}
+ * <p>The various {@code next} methods of {@code Scanner}
* make a match result available if they complete without throwing an
* exception. For instance, after an invocation of the {@link #nextInt}
* method that returned an int, this method returns a
* {@code MatchResult} for the search of the
* <a href="#Integer-regex"><i>Integer</i></a> regular expression
- * defined above. Similarly the {@link #findInLine},
- * {@link #findWithinHorizon}, and {@link #skip} methods will make a
- * match available if they succeed.
+ * defined above. Similarly the {@link #findInLine findInLine()},
+ * {@link #findWithinHorizon findWithinHorizon()}, and {@link #skip skip()}
+ * methods will make a match available if they succeed.
*
* @return a match result for the last match operation
* @throws IllegalStateException If no match result is available
*/
public MatchResult match() {
@@ -1331,10 +1347,11 @@
* @see java.util.Iterator
*/
public boolean hasNext() {
ensureOpen();
saveState();
+ modCount++;
while (!sourceClosed) {
if (hasTokenInBuffer())
return revertState(true);
readInput();
}
@@ -1355,10 +1372,11 @@
* @see java.util.Iterator
*/
public String next() {
ensureOpen();
clearCaches();
+ modCount++;
while (true) {
String token = getCompleteTokenInBuffer(null);
if (token != null) {
matchValid = true;
@@ -1433,10 +1451,11 @@
ensureOpen();
if (pattern == null)
throw new NullPointerException();
hasNextPattern = null;
saveState();
+ modCount++;
while (true) {
if (getCompleteTokenInBuffer(pattern) != null) {
matchValid = true;
cacheResult();
@@ -1464,10 +1483,11 @@
public String next(Pattern pattern) {
ensureOpen();
if (pattern == null)
throw new NullPointerException();
+ modCount++;
// Did we already find this pattern?
if (hasNextPattern == pattern)
return getCachedResult();
clearCaches();
@@ -1495,10 +1515,11 @@
* @throws IllegalStateException if this scanner is closed
*/
public boolean hasNextLine() {
saveState();
+ modCount++;
String result = findWithinHorizon(linePattern(), 0);
if (result != null) {
MatchResult mr = this.match();
String lineSep = mr.group(1);
if (lineSep != null) {
@@ -1529,10 +1550,11 @@
* @return the line that was skipped
* @throws NoSuchElementException if no line was found
* @throws IllegalStateException if this scanner is closed
*/
public String nextLine() {
+ modCount++;
if (hasNextPattern == linePattern())
return getCachedResult();
clearCaches();
String result = findWithinHorizon(linePattern, 0);
@@ -1587,16 +1609,16 @@
public String findInLine(Pattern pattern) {
ensureOpen();
if (pattern == null)
throw new NullPointerException();
clearCaches();
+ modCount++;
// Expand buffer to include the next newline or end of input
int endPosition = 0;
saveState();
while (true) {
- String token = findPatternInBuffer(separatorPattern(), 0);
- if (token != null) {
+ if (findPatternInBuffer(separatorPattern(), 0)) {
endPosition = matcher.start();
break; // up to next newline
}
if (needInput) {
readInput();
@@ -1621,11 +1643,11 @@
* specified string, ignoring delimiters.
*
* <p>An invocation of this method of the form
* {@code findWithinHorizon(pattern)} behaves in exactly the same way as
* the invocation
- * {@code findWithinHorizon(Pattern.compile(pattern, horizon))}.
+ * {@code findWithinHorizon(Pattern.compile(pattern), horizon)}.
*
* @param pattern a string specifying the pattern to search for
* @param horizon the search horizon
* @return the text that matched the specified pattern
* @throws IllegalStateException if this scanner is closed
@@ -1671,17 +1693,17 @@
if (pattern == null)
throw new NullPointerException();
if (horizon < 0)
throw new IllegalArgumentException("horizon < 0");
clearCaches();
+ modCount++;
// Search for the pattern
while (true) {
- String token = findPatternInBuffer(pattern, horizon);
- if (token != null) {
+ if (findPatternInBuffer(pattern, horizon)) {
matchValid = true;
- return token;
+ return matcher.group();
}
if (needInput)
readInput();
else
break; // up to end of input
@@ -1715,15 +1737,15 @@
public Scanner skip(Pattern pattern) {
ensureOpen();
if (pattern == null)
throw new NullPointerException();
clearCaches();
+ modCount++;
// Search for the pattern
while (true) {
- String token = matchPatternInBuffer(pattern);
- if (token != null) {
+ if (matchPatternInBuffer(pattern)) {
matchValid = true;
position = matcher.end();
return this;
}
if (needInput)
@@ -1930,11 +1952,11 @@
/**
* Scans the next token of the input as a {@code short}.
*
* <p> An invocation of this method of the form
* {@code nextShort()} behaves in exactly the same way as the
- * invocation {@code nextShort(radix)}, where {@code radix}
+ * invocation {@link #nextShort(int) nextShort(radix)}, where {@code radix}
* is the default radix of this scanner.
*
* @return the {@code short} scanned from the input
* @throws InputMismatchException
* if the next token does not match the <i>Integer</i>
@@ -2588,12 +2610,14 @@
/**
* Resets this scanner.
*
* <p> Resetting a scanner discards all of its explicit state
- * information which may have been changed by invocations of {@link
- * #useDelimiter}, {@link #useLocale}, or {@link #useRadix}.
+ * information which may have been changed by invocations of
+ * {@link #useDelimiter useDelimiter()},
+ * {@link #useLocale useLocale()}, or
+ * {@link #useRadix useRadix()}.
*
* <p> An invocation of this method of the form
* {@code scanner.reset()} behaves in exactly the same way as the
* invocation
*
@@ -2610,8 +2634,208 @@
public Scanner reset() {
delimPattern = WHITESPACE_PATTERN;
useLocale(Locale.getDefault(Locale.Category.FORMAT));
useRadix(10);
clearCaches();
+ modCount++;
return this;
}
+
+ /**
+ * Returns a stream of delimiter-separated tokens from this scanner. The
+ * stream contains the same tokens that would be returned, starting from
+ * this scanner's current state, by calling the {@link #next} method
+ * repeatedly until the {@link #hasNext} method returns false.
+ *
+ * <p>The resulting stream is sequential and ordered. All stream elements are
+ * non-null.
+ *
+ * <p>Scanning starts upon initiation of the terminal stream operation, using the
+ * current state of this scanner. Subsequent calls to any methods on this scanner
+ * other than {@link #close} and {@link #ioException} may return undefined results
+ * or may cause undefined effects on the returned stream. The returned stream's source
+ * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a
+ * {@link java.util.ConcurrentModificationException} if any such calls are detected
+ * during stream pipeline execution.
+ *
+ * <p>After stream pipeline execution completes, this scanner is left in an indeterminate
+ * state and cannot be reused.
+ *
+ * <p>If this scanner contains a resource that must be released, this scanner
+ * should be closed, either by calling its {@link #close} method, or by
+ * closing the returned stream. Closing the stream will close the underlying scanner.
+ * {@code IllegalStateException} is thrown if the scanner has been closed when this
+ * method is called, or if this scanner is closed during stream pipeline execution.
+ *
+ * <p>This method might block waiting for more input.
+ *
+ * @apiNote
+ * For example, the following code will create a list of
+ * comma-delimited tokens from a string:
+ *
+ * <pre>{@code
+ * List<String> result = new Scanner("abc,def,,ghi")
+ * .useDelimiter(",")
+ * .tokens()
+ * .collect(Collectors.toList());
+ * }</pre>
+ *
+ * <p>The resulting list would contain {@code "abc"}, {@code "def"},
+ * the empty string, and {@code "ghi"}.
+ *
+ * @return a sequential stream of token strings
+ * @throws IllegalStateException if this scanner is closed
+ * @since 1.9
+ */
+ public Stream<String> tokens() {
+ ensureOpen();
+ Stream<String> stream = StreamSupport.stream(new TokenSpliterator(), false);
+ return stream.onClose(this::close);
+ }
+
+ class TokenSpliterator extends Spliterators.AbstractSpliterator<String> {
+ int expectedCount = -1;
+
+ TokenSpliterator() {
+ super(Long.MAX_VALUE,
+ Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED);
+ }
+
+ @Override
+ public boolean tryAdvance(Consumer<? super String> cons) {
+ if (expectedCount >= 0 && expectedCount != modCount) {
+ throw new ConcurrentModificationException();
+ }
+
+ if (hasNext()) {
+ String token = next();
+ expectedCount = modCount;
+ cons.accept(token);
+ if (expectedCount != modCount) {
+ throw new ConcurrentModificationException();
+ }
+ return true;
+ } else {
+ expectedCount = modCount;
+ return false;
+ }
+ }
+ }
+
+ /**
+ * Returns a stream of match results from this scanner. The stream
+ * contains the same results in the same order that would be returned by
+ * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match}
+ * successively as long as {@link #findWithinHorizon findWithinHorizon()}
+ * finds matches.
+ *
+ * <p>The resulting stream is sequential and ordered. All stream elements are
+ * non-null.
+ *
+ * <p>Scanning starts upon initiation of the terminal stream operation, using the
+ * current state of this scanner. Subsequent calls to any methods on this scanner
+ * other than {@link #close} and {@link #ioException} may return undefined results
+ * or may cause undefined effects on the returned stream. The returned stream's source
+ * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a
+ * {@link java.util.ConcurrentModificationException} if any such calls are detected
+ * during stream pipeline execution.
+ *
+ * <p>After stream pipeline execution completes, this scanner is left in an indeterminate
+ * state and cannot be reused.
+ *
+ * <p>If this scanner contains a resource that must be released, this scanner
+ * should be closed, either by calling its {@link #close} method, or by
+ * closing the returned stream. Closing the stream will close the underlying scanner.
+ * {@code IllegalStateException} is thrown if the scanner has been closed when this
+ * method is called, or if this scanner is closed during stream pipeline execution.
+ *
+ * <p>As with the {@link #findWithinHorizon findWithinHorizon()} methods, this method
+ * might block waiting for additional input, and it might buffer an unbounded amount of
+ * input searching for a match.
+ *
+ * @apiNote
+ * For example, the following code will read a file and return a list
+ * of all sequences of characters consisting of seven or more Latin capital
+ * letters:
+ *
+ * <pre>{@code
+ * try (Scanner sc = new Scanner(Paths.get("input.txt"))) {
+ * Pattern pat = Pattern.compile("[A-Z]{7,}");
+ * List<String> capWords = sc.findAll(pat)
+ * .map(MatchResult::group)
+ * .collect(Collectors.toList());
+ * }
+ * }</pre>
+ *
+ * @param pattern the pattern to be matched
+ * @return a sequential stream of match results
+ * @throws NullPointerException if pattern is null
+ * @throws IllegalStateException if this scanner is closed
+ * @since 1.9
+ */
+ public Stream<MatchResult> findAll(Pattern pattern) {
+ Objects.requireNonNull(pattern);
+ ensureOpen();
+ Stream<MatchResult> stream = StreamSupport.stream(new FindSpliterator(pattern), false);
+ return stream.onClose(this::close);
+ }
+
+ /**
+ * Returns a stream of match results that match the provided pattern string.
+ * The effect is equivalent to the following code:
+ *
+ * <pre>{@code
+ * scanner.findAll(Pattern.compile(patString))
+ * }</pre>
+ *
+ * @param patString the pattern string
+ * @return a sequential stream of match results
+ * @throws NullPointerException if patString is null
+ * @throws IllegalStateException if this scanner is closed
+ * @throws PatternSyntaxException if the regular expression's syntax is invalid
+ * @since 1.9
+ * @see java.util.regex.Pattern
+ */
+ public Stream<MatchResult> findAll(String patString) {
+ Objects.requireNonNull(patString);
+ ensureOpen();
+ return findAll(patternCache.forName(patString));
+ }
+
+ class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> {
+ final Pattern pattern;
+ int expectedCount = -1;
+
+ FindSpliterator(Pattern pattern) {
+ super(Long.MAX_VALUE,
+ Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED);
+ this.pattern = pattern;
+ }
+
+ @Override
+ public boolean tryAdvance(Consumer<? super MatchResult> cons) {
+ ensureOpen();
+ if (expectedCount >= 0) {
+ if (expectedCount != modCount) {
+ throw new ConcurrentModificationException();
+ }
+ } else {
+ expectedCount = modCount;
+ }
+
+ while (true) {
+ // assert expectedCount == modCount
+ if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount
+ cons.accept(matcher.toMatchResult());
+ if (expectedCount != modCount) {
+ throw new ConcurrentModificationException();
+ }
+ return true;
+ }
+ if (needInput)
+ readInput(); // doesn't increment modCount
+ else
+ return false; // reached end of input
+ }
+ }
+ }
}
< prev index next >