< prev index next >
src/java.base/share/classes/java/util/Scanner.java
Print this page
rev 12497 : 8072722: add stream support to Scanner
Reviewed-by: XXX
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -23,20 +23,22 @@
* questions.
*/
package java.util;
-import java.nio.file.Path;
-import java.nio.file.Files;
-import java.util.regex.*;
import java.io.*;
import java.math.*;
import java.nio.*;
import java.nio.channels.*;
import java.nio.charset.*;
+import java.nio.file.Path;
+import java.nio.file.Files;
import java.text.*;
-import java.util.Locale;
+import java.util.function.Consumer;
+import java.util.regex.*;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
import sun.misc.LRUCache;
/**
* A simple text scanner which can parse primitive types and strings using
@@ -993,12 +995,13 @@
needInput = true;
return null;
}
// Finds the specified pattern in the buffer up to horizon.
- // Returns a match for the specified input pattern.
- private String findPatternInBuffer(Pattern pattern, int horizon) {
+ // Returns true if the specified input pattern was matched,
+ // and leaves the matcher field with the current match state.
+ private boolean findPatternInBuffer(Pattern pattern, int horizon) {
matchValid = false;
matcher.usePattern(pattern);
int bufferLimit = buf.limit();
int horizonLimit = -1;
int searchLimit = bufferLimit;
@@ -1012,58 +1015,59 @@
if (matcher.hitEnd() && (!sourceClosed)) {
// The match may be longer if didn't hit horizon or real end
if (searchLimit != horizonLimit) {
// Hit an artificial end; try to extend the match
needInput = true;
- return null;
+ return false;
}
// The match could go away depending on what is next
if ((searchLimit == horizonLimit) && matcher.requireEnd()) {
// Rare case: we hit the end of input and it happens
// that it is at the horizon and the end of input is
// required for the match.
needInput = true;
- return null;
+ return false;
}
}
// Did not hit end, or hit real end, or hit horizon
position = matcher.end();
- return matcher.group();
+ return true;
}
if (sourceClosed)
- return null;
+ return false;
// If there is no specified horizon, or if we have not searched
// to the specified horizon yet, get more input
if ((horizon == 0) || (searchLimit != horizonLimit))
needInput = true;
- return null;
+ return false;
}
- // Returns a match for the specified input pattern anchored at
- // the current position
- private String matchPatternInBuffer(Pattern pattern) {
+ // Attempts to match a pattern anchored at the current position.
+ // Returns true if the specified input pattern was matched,
+ // and leaves the matcher field with the current match state.
+ private boolean matchPatternInBuffer(Pattern pattern) {
matchValid = false;
matcher.usePattern(pattern);
matcher.region(position, buf.limit());
if (matcher.lookingAt()) {
if (matcher.hitEnd() && (!sourceClosed)) {
// Get more input and try again
needInput = true;
- return null;
+ return false;
}
position = matcher.end();
- return matcher.group();
+ return true;
}
if (sourceClosed)
- return null;
+ return false;
// Read more to find pattern
needInput = true;
- return null;
+ return false;
}
// Throws if the scanner is closed
private void ensureOpen() {
if (closed)
@@ -1273,20 +1277,30 @@
* Returns the match result of the last scanning operation performed
* by this scanner. This method throws <code>IllegalStateException</code>
* if no match has been performed, or if the last match was
* not successful.
*
- * <p>The various <code>next</code>methods of <code>Scanner</code>
+ * <p>The various <code>next</code> methods of <code>Scanner</code>
* make a match result available if they complete without throwing an
* exception. For instance, after an invocation of the {@link #nextInt}
* method that returned an int, this method returns a
* <code>MatchResult</code> for the search of the
* <a href="#Integer-regex"><i>Integer</i></a> regular expression
* defined above. Similarly the {@link #findInLine},
* {@link #findWithinHorizon}, and {@link #skip} methods will make a
* match available if they succeed.
*
+ * @apiNote
+ * <p>The offset values reported by the {@link MatchResult#start()},
+ * {@link MatchResult#start(int)}, {@link MatchResult#end()}, and
+ * {@link MatchResult#end(int)} methods are generally not useful, as they
+ * are offsets within the Scanner's internal buffer, which might contain
+ * an arbitrary portion of the input. The returned {@link MatchResult} is
+ * useful for extracting portions of the match using capturing groups,
+ * via the {@link MatchResult#group()}, {@link MatchResult#group(int)},
+ * and {@link MatchResult#groupCount()} methods.
+ *
* @return a match result for the last match operation
* @throws IllegalStateException If no match result is available
*/
public MatchResult match() {
if (!matchValid)
@@ -1591,12 +1605,11 @@
clearCaches();
// Expand buffer to include the next newline or end of input
int endPosition = 0;
saveState();
while (true) {
- String token = findPatternInBuffer(separatorPattern(), 0);
- if (token != null) {
+ if (findPatternInBuffer(separatorPattern(), 0)) {
endPosition = matcher.start();
break; // up to next newline
}
if (needInput) {
readInput();
@@ -1621,11 +1634,11 @@
* specified string, ignoring delimiters.
*
* <p>An invocation of this method of the form
* <tt>findWithinHorizon(pattern)</tt> behaves in exactly the same way as
* the invocation
- * <tt>findWithinHorizon(Pattern.compile(pattern, horizon))</tt>.
+ * <tt>findWithinHorizon(Pattern.compile(pattern), horizon)</tt>.
*
* @param pattern a string specifying the pattern to search for
* @param horizon the search horizon
* @return the text that matched the specified pattern
* @throws IllegalStateException if this scanner is closed
@@ -1674,14 +1687,13 @@
throw new IllegalArgumentException("horizon < 0");
clearCaches();
// Search for the pattern
while (true) {
- String token = findPatternInBuffer(pattern, horizon);
- if (token != null) {
+ if (findPatternInBuffer(pattern, horizon)) {
matchValid = true;
- return token;
+ return matcher.group();
}
if (needInput)
readInput();
else
break; // up to end of input
@@ -1718,12 +1730,11 @@
throw new NullPointerException();
clearCaches();
// Search for the pattern
while (true) {
- String token = matchPatternInBuffer(pattern);
- if (token != null) {
+ if (matchPatternInBuffer(pattern)) {
matchValid = true;
position = matcher.end();
return this;
}
if (needInput)
@@ -2612,6 +2623,149 @@
useLocale(Locale.getDefault(Locale.Category.FORMAT));
useRadix(10);
clearCaches();
return this;
}
+
+ /**
+ * Returns a stream of delimiter-separated tokens from this scanner. The
+ * stream contains the same tokens that would be returned, starting from
+ * this scanner's current state, by calling the {@link #next} method
+ * repeatedly until the {@link #hasNext} returns false.
+ *
+ * <p>The resulting stream is ordered, and all stream elements are
+ * non-null. Closing the stream will close the underlying scanner.
+ *
+ * <p>After the {@code tokens()} method has been called, the scanner
+ * should be considered to be under the control of the returned stream
+ * object. Subsequent calls to any methods on this scanner other than
+ * {@link #close} and {@link #ioException} may return undefined results or
+ * may cause undefined effects on the returned stream.
+ *
+ * <p>If this scanner contains a resource that must be released, this scanner
+ * should be closed, either by calling its {@link #close} method, or by
+ * closing the returned stream. After the scanner has been closed, the
+ * results of operating on the returned stream are undefined.
+ *
+ * <p>For example, the following code will create a list of
+ * comma-delimited tokens from a string:
+ *
+ * <pre>{@code
+ * List<String> result = new Scanner("abc,def,,ghi").useDelimiter(",").
+ * .tokens().collect(Collectors.toList());
+ * }</pre>
+ *
+ * <p>The resulting list would contain {@code "abc"}, {@code "def"},
+ * the empty string, and {@code "ghi"}.
+ *
+ * @return the stream of token strings
+ * @throws IllegalStateException if this scanner is closed
+ * @since 1.9
+ */
+ public Stream<String> tokens() {
+ ensureOpen();
+ Stream<String> stream = StreamSupport.stream(
+ Spliterators.spliteratorUnknownSize(this,
+ Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED), false);
+ return stream.onClose(this::close);
+ }
+
+ /**
+ * Returns a stream of match results from this scanner. The stream
+ * contains the same results in the same order that would be returned by
+ * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match}
+ * repeatedly as long as {@link #findWithinHorizon} finds matches.
+ *
+ * <p>The resulting stream is ordered, and all stream elements are
+ * non-null. Closing the stream will close the underlying scanner.
+ *
+ * <p>After the {@code findAll()} method has been called, the scanner
+ * should be considered to be under the control of the returned stream
+ * object. Subsequent calls to any methods on this scanner other than
+ * {@link #close} and {@link #ioException} may return undefined results or
+ * may cause undefined effects on the returned stream.
+ *
+ * <p>If this scanner contains a resource that must be released, this scanner
+ * should be closed, either by calling its {@link #close} method, or by
+ * closing the returned stream. After the scanner has been closed, the
+ * results of operating on the returned stream are undefined.
+ *
+ * <p>For example, the following code will read a file and return a list
+ * of all sequences of characters consisting of seven or more Latin capital
+ * letters:
+ *
+ * <pre>{@code
+ * try (Scanner sc = new Scanner(Paths.get("input.txt"))) {
+ * Pattern pat = Pattern.compile("[A-Z]{7,}");
+ * List<String> capWords = sc.findAll(pat)
+ * .map(MatchResult::group)
+ * .collect(Collectors.toList());
+ * }
+ * }</pre>
+ *
+ * @apiNote
+ * As with the {@link #findWithinHorizon} methods, this method might block
+ * waiting for additional input, and it may buffer an unbounded amount of input
+ * searching for a match. The offset values in the {@link MatchResult} stream
+ * elements might not be useful; see the API Note for the {@link #match} method.
+ *
+ * @param pattern the pattern to be matched
+ * @return a stream of match results
+ * @throws NullPointerException if pattern is null
+ * @throws IllegalStateException if this scanner is closed
+ * @since 1.9
+ */
+ public Stream<MatchResult> findAll(Pattern pattern) {
+ Objects.requireNonNull(pattern);
+ ensureOpen();
+ Stream<MatchResult> stream = StreamSupport.stream(new ScanSpliterator(pattern), false);
+ return stream.onClose(this::close);
+ }
+
+ /**
+ * Returns a stream of match results that match the provided pattern string.
+ * The effect is the same as the code:
+ *
+ * <pre>{@code
+ * scanner.findAll(Pattern.compile(patString))
+ * }</pre>
+ *
+ * @param patString the pattern string
+ * @return a stream of match results
+ * @throws NullPointerException if patString is null
+ * @throws IllegalStateException if this scanner is closed
+ * @throws PatternSyntaxException if the regular expression's syntax is invalid
+ * @since 1.9
+ * @see java.util.regex.Pattern
+ */
+ public Stream<MatchResult> findAll(String patString) {
+ Objects.requireNonNull(patString);
+ ensureOpen();
+ return findAll(patternCache.forName(patString));
+ }
+
+
+ class ScanSpliterator extends Spliterators.AbstractSpliterator<MatchResult> {
+ final Pattern pattern;
+
+ ScanSpliterator(Pattern pattern) {
+ super(Long.MAX_VALUE,
+ Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED);
+ this.pattern = pattern;
+ }
+
+ @Override
+ public boolean tryAdvance(Consumer<? super MatchResult> cons) {
+ while (true) {
+ if (findPatternInBuffer(pattern, 0)) {
+ cons.accept(matcher.toMatchResult());
+ return true;
+ }
+ if (needInput)
+ readInput();
+ else
+ return false; // reached end of input
+ }
+ }
+ }
+
}
< prev index next >