# HG changeset patch # User smarks # Date 1438719029 25200 # Tue Aug 04 13:10:29 2015 -0700 # Node ID 9dec3b0bff778fb45f8808f7e2b8cb0423234890 # Parent 29634bb4e3aced9c486df0d004cdee10335c3edc 8072722: add stream support to Scanner Reviewed-by: XXX diff --git a/src/java.base/share/classes/java/util/Scanner.java b/src/java.base/share/classes/java/util/Scanner.java --- a/src/java.base/share/classes/java/util/Scanner.java +++ b/src/java.base/share/classes/java/util/Scanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,16 +25,18 @@ package java.util; -import java.nio.file.Path; -import java.nio.file.Files; -import java.util.regex.*; import java.io.*; import java.math.*; import java.nio.*; import java.nio.channels.*; import java.nio.charset.*; +import java.nio.file.Path; +import java.nio.file.Files; import java.text.*; -import java.util.Locale; +import java.util.function.Consumer; +import java.util.regex.*; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; import sun.misc.LRUCache; @@ -995,8 +997,9 @@ } // Finds the specified pattern in the buffer up to horizon. - // Returns a match for the specified input pattern. - private String findPatternInBuffer(Pattern pattern, int horizon) { + // Returns true if the specified input pattern was matched, + // and leaves the matcher field with the current match state. + private boolean findPatternInBuffer(Pattern pattern, int horizon) { matchValid = false; matcher.usePattern(pattern); int bufferLimit = buf.limit(); @@ -1014,7 +1017,7 @@ if (searchLimit != horizonLimit) { // Hit an artificial end; try to extend the match needInput = true; - return null; + return false; } // The match could go away depending on what is next if ((searchLimit == horizonLimit) && matcher.requireEnd()) { @@ -1022,27 +1025,28 @@ // that it is at the horizon and the end of input is // required for the match. needInput = true; - return null; + return false; } } // Did not hit end, or hit real end, or hit horizon position = matcher.end(); - return matcher.group(); + return true; } if (sourceClosed) - return null; + return false; // If there is no specified horizon, or if we have not searched // to the specified horizon yet, get more input if ((horizon == 0) || (searchLimit != horizonLimit)) needInput = true; - return null; + return false; } - // Returns a match for the specified input pattern anchored at - // the current position - private String matchPatternInBuffer(Pattern pattern) { + // Attempts to match a pattern anchored at the current position. + // Returns true if the specified input pattern was matched, + // and leaves the matcher field with the current match state. + private boolean matchPatternInBuffer(Pattern pattern) { matchValid = false; matcher.usePattern(pattern); matcher.region(position, buf.limit()); @@ -1050,18 +1054,18 @@ if (matcher.hitEnd() && (!sourceClosed)) { // Get more input and try again needInput = true; - return null; + return false; } position = matcher.end(); - return matcher.group(); + return true; } if (sourceClosed) - return null; + return false; // Read more to find pattern needInput = true; - return null; + return false; } // Throws if the scanner is closed @@ -1275,7 +1279,7 @@ * if no match has been performed, or if the last match was * not successful. * - *

The various nextmethods of Scanner + *

The various next methods of Scanner * make a match result available if they complete without throwing an * exception. For instance, after an invocation of the {@link #nextInt} * method that returned an int, this method returns a @@ -1285,6 +1289,16 @@ * {@link #findWithinHorizon}, and {@link #skip} methods will make a * match available if they succeed. * + * @apiNote + *

The offset values reported by the {@link MatchResult#start()}, + * {@link MatchResult#start(int)}, {@link MatchResult#end()}, and + * {@link MatchResult#end(int)} methods are generally not useful, as they + * are offsets within the Scanner's internal buffer, which might contain + * an arbitrary portion of the input. The returned {@link MatchResult} is + * useful for extracting portions of the match using capturing groups, + * via the {@link MatchResult#group()}, {@link MatchResult#group(int)}, + * and {@link MatchResult#groupCount()} methods. + * * @return a match result for the last match operation * @throws IllegalStateException If no match result is available */ @@ -1593,8 +1607,7 @@ int endPosition = 0; saveState(); while (true) { - String token = findPatternInBuffer(separatorPattern(), 0); - if (token != null) { + if (findPatternInBuffer(separatorPattern(), 0)) { endPosition = matcher.start(); break; // up to next newline } @@ -1623,7 +1636,7 @@ *

An invocation of this method of the form * findWithinHorizon(pattern) behaves in exactly the same way as * the invocation - * findWithinHorizon(Pattern.compile(pattern, horizon)). + * findWithinHorizon(Pattern.compile(pattern), horizon). * * @param pattern a string specifying the pattern to search for * @param horizon the search horizon @@ -1676,10 +1689,9 @@ // Search for the pattern while (true) { - String token = findPatternInBuffer(pattern, horizon); - if (token != null) { + if (findPatternInBuffer(pattern, horizon)) { matchValid = true; - return token; + return matcher.group(); } if (needInput) readInput(); @@ -1720,8 +1732,7 @@ // Search for the pattern while (true) { - String token = matchPatternInBuffer(pattern); - if (token != null) { + if (matchPatternInBuffer(pattern)) { matchValid = true; position = matcher.end(); return this; @@ -2614,4 +2625,147 @@ clearCaches(); return this; } + + /** + * Returns a stream of delimiter-separated tokens from this scanner. The + * stream contains the same tokens that would be returned, starting from + * this scanner's current state, by calling the {@link #next} method + * repeatedly until the {@link #hasNext} returns false. + * + *

The resulting stream is ordered, and all stream elements are + * non-null. Closing the stream will close the underlying scanner. + * + *

After the {@code tokens()} method has been called, the scanner + * should be considered to be under the control of the returned stream + * object. Subsequent calls to any methods on this scanner other than + * {@link #close} and {@link #ioException} may return undefined results or + * may cause undefined effects on the returned stream. + * + *

If this scanner contains a resource that must be released, this scanner + * should be closed, either by calling its {@link #close} method, or by + * closing the returned stream. After the scanner has been closed, the + * results of operating on the returned stream are undefined. + * + *

For example, the following code will create a list of + * comma-delimited tokens from a string: + * + *

{@code
+     * List result = new Scanner("abc,def,,ghi").useDelimiter(",").
+     *     .tokens().collect(Collectors.toList());
+     * }
+ * + *

The resulting list would contain {@code "abc"}, {@code "def"}, + * the empty string, and {@code "ghi"}. + * + * @return the stream of token strings + * @throws IllegalStateException if this scanner is closed + * @since 1.9 + */ + public Stream tokens() { + ensureOpen(); + Stream stream = StreamSupport.stream( + Spliterators.spliteratorUnknownSize(this, + Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED), false); + return stream.onClose(this::close); + } + + /** + * Returns a stream of match results from this scanner. The stream + * contains the same results in the same order that would be returned by + * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match} + * repeatedly as long as {@link #findWithinHorizon} finds matches. + * + *

The resulting stream is ordered, and all stream elements are + * non-null. Closing the stream will close the underlying scanner. + * + *

After the {@code findAll()} method has been called, the scanner + * should be considered to be under the control of the returned stream + * object. Subsequent calls to any methods on this scanner other than + * {@link #close} and {@link #ioException} may return undefined results or + * may cause undefined effects on the returned stream. + * + *

If this scanner contains a resource that must be released, this scanner + * should be closed, either by calling its {@link #close} method, or by + * closing the returned stream. After the scanner has been closed, the + * results of operating on the returned stream are undefined. + * + *

For example, the following code will read a file and return a list + * of all sequences of characters consisting of seven or more Latin capital + * letters: + * + *

{@code
+     * try (Scanner sc = new Scanner(Paths.get("input.txt"))) {
+     *     Pattern pat = Pattern.compile("[A-Z]{7,}");
+     *     List capWords = sc.findAll(pat)
+     *                               .map(MatchResult::group)
+     *                               .collect(Collectors.toList());
+     * }
+     * }
+ * + * @apiNote + * As with the {@link #findWithinHorizon} methods, this method might block + * waiting for additional input, and it may buffer an unbounded amount of input + * searching for a match. The offset values in the {@link MatchResult} stream + * elements might not be useful; see the API Note for the {@link #match} method. + * + * @param pattern the pattern to be matched + * @return a stream of match results + * @throws NullPointerException if pattern is null + * @throws IllegalStateException if this scanner is closed + * @since 1.9 + */ + public Stream findAll(Pattern pattern) { + Objects.requireNonNull(pattern); + ensureOpen(); + Stream stream = StreamSupport.stream(new ScanSpliterator(pattern), false); + return stream.onClose(this::close); + } + + /** + * Returns a stream of match results that match the provided pattern string. + * The effect is the same as the code: + * + *
{@code
+     *     scanner.findAll(Pattern.compile(patString))
+     * }
+ * + * @param patString the pattern string + * @return a stream of match results + * @throws NullPointerException if patString is null + * @throws IllegalStateException if this scanner is closed + * @throws PatternSyntaxException if the regular expression's syntax is invalid + * @since 1.9 + * @see java.util.regex.Pattern + */ + public Stream findAll(String patString) { + Objects.requireNonNull(patString); + ensureOpen(); + return findAll(patternCache.forName(patString)); + } + + + class ScanSpliterator extends Spliterators.AbstractSpliterator { + final Pattern pattern; + + ScanSpliterator(Pattern pattern) { + super(Long.MAX_VALUE, + Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); + this.pattern = pattern; + } + + @Override + public boolean tryAdvance(Consumer cons) { + while (true) { + if (findPatternInBuffer(pattern, 0)) { + cons.accept(matcher.toMatchResult()); + return true; + } + if (needInput) + readInput(); + else + return false; // reached end of input + } + } + } + } diff --git a/test/java/util/Scanner/ScanTest.java b/test/java/util/Scanner/ScanTest.java --- a/test/java/util/Scanner/ScanTest.java +++ b/test/java/util/Scanner/ScanTest.java @@ -24,25 +24,30 @@ /** * @test * @bug 4313885 4926319 4927634 5032610 5032622 5049968 5059533 6223711 6277261 6269946 6288823 + * 8072722 * @summary Basic tests of java.util.Scanner methods * @key randomness * @run main/othervm ScanTest */ +import java.io.*; +import java.math.*; +import java.nio.*; +import java.text.*; import java.util.*; -import java.text.*; -import java.io.*; -import java.nio.*; +import java.util.function.Consumer; import java.util.regex.*; -import java.math.*; +import java.util.stream.*; public class ScanTest { private static boolean failure = false; private static int failCount = 0; private static int NUM_SOURCE_TYPES = 2; + private static File inputFile = new File(System.getProperty("test.src", "."), "input.txt"); public static void main(String[] args) throws Exception { + Locale reservedLocale = Locale.getDefault(); String lang = reservedLocale.getLanguage(); try { @@ -70,8 +75,11 @@ cacheTest2(); nonASCIITest(); resetTest(); + tokensTest(); + findAllTest(); + streamCloseTest(); - for (int j=0; j\n]+"); - sc.next(); - String textOfRef = sc.next(); - if (!textOfRef.equals(expected[i])) + // Read some text parts of four hrefs + String[] expected = { "Diffs", "Sdiffs", "Old", "New" }; + for (int i=0; i<4; i++) { + sc.findWithinHorizon("\n]+"); + sc.next(); + String textOfRef = sc.next(); + if (!textOfRef.equals(expected[i])) + failCount++; + } + // Read some html tags using < and > as delimiters + if (!sc.next().equals("/a")) failCount++; - } - // Read some html tags using < and > as delimiters - if (!sc.next().equals("/a")) - failCount++; - if (!sc.next().equals("b")) - failCount++; + if (!sc.next().equals("b")) + failCount++; - // Scan some html tags using skip and next - Pattern nonTagStart = Pattern.compile("[^<]+"); - Pattern tag = Pattern.compile("<[^>]+?>"); - Pattern spotAfterTag = Pattern.compile("(?<=>)"); - String[] expected2 = { "", "

", "

    ", "
  • " }; - sc.useDelimiter(spotAfterTag); - int tagsFound = 0; - while(tagsFound < 4) { - if (!sc.hasNext(tag)) { - // skip text between tags - sc.skip(nonTagStart); + // Scan some html tags using skip and next + Pattern nonTagStart = Pattern.compile("[^<]+"); + Pattern tag = Pattern.compile("<[^>]+?>"); + Pattern spotAfterTag = Pattern.compile("(?<=>)"); + String[] expected2 = { "", "

    ", "

      ", "
    • " }; + sc.useDelimiter(spotAfterTag); + int tagsFound = 0; + while(tagsFound < 4) { + if (!sc.hasNext(tag)) { + // skip text between tags + sc.skip(nonTagStart); + } + String tagContents = sc.next(tag); + if (!tagContents.equals(expected2[tagsFound])) + failCount++; + tagsFound++; } - String tagContents = sc.next(tag); - if (!tagContents.equals(expected2[tagsFound])) - failCount++; - tagsFound++; } report("Use case 4"); } public static void useCase5() throws Exception { - File f = new File(System.getProperty("test.src", "."), "input.txt"); - Scanner sc = new Scanner(f); - String testDataTag = sc.findWithinHorizon("usage case 5\n", 0); - if (!testDataTag.equals("usage case 5\n")) - failCount++; + try (Scanner sc = new Scanner(inputFile)) { + String testDataTag = sc.findWithinHorizon("usage case 5\n", 0); + if (!testDataTag.equals("usage case 5\n")) + failCount++; - sc.findWithinHorizon("Share Definitions", 0); - sc.nextLine(); - sc.next("\\[([a-z]+)\\]"); - String shareName = sc.match().group(1); - if (!shareName.equals("homes")) - failCount++; + sc.findWithinHorizon("Share Definitions", 0); + sc.nextLine(); + sc.next("\\[([a-z]+)\\]"); + String shareName = sc.match().group(1); + if (!shareName.equals("homes")) + failCount++; - String[] keys = { "comment", "browseable", "writable", "valid users" }; - String[] vals = { "Home Directories", "no", "yes", "%S" }; - for (int i=0; i<4; i++) { - sc.useDelimiter("="); - String key = sc.next().trim(); - if (!key.equals(keys[i])) - failCount++; - sc.skip("[ =]+"); - sc.useDelimiter("\n"); - String value = sc.next(); - if (!value.equals(vals[i])) - failCount++; - sc.nextLine(); + String[] keys = { "comment", "browseable", "writable", "valid users" }; + String[] vals = { "Home Directories", "no", "yes", "%S" }; + for (int i=0; i<4; i++) { + sc.useDelimiter("="); + String key = sc.next().trim(); + if (!key.equals(keys[i])) + failCount++; + sc.skip("[ =]+"); + sc.useDelimiter("\n"); + String value = sc.next(); + if (!value.equals(vals[i])) + failCount++; + sc.nextLine(); + } } report("Use case 5"); @@ -445,12 +451,12 @@ if (sc.hasNextLine()) failCount++; // Go through all the lines in a file - File f = new File(System.getProperty("test.src", "."), "input.txt"); - sc = new Scanner(f); - String lastLine = "blah"; - while(sc.hasNextLine()) - lastLine = sc.nextLine(); - if (!lastLine.equals("# Data for usage case 6")) failCount++; + try (Scanner sc2 = new Scanner(inputFile)) { + String lastLine = "blah"; + while(sc2.hasNextLine()) + lastLine = sc2.nextLine(); + if (!lastLine.equals("# Data for usage case 6")) failCount++; + } report("Has next line test"); } @@ -629,48 +635,47 @@ sc.delimiter(); sc.useDelimiter("blah"); sc.useDelimiter(Pattern.compile("blah")); - for (int i=0; i method : methodList) { try { - methodCall(sc, i); + method.accept(sc); failCount++; } catch (IllegalStateException ise) { // Correct } } + report("Close test"); } - private static int NUM_METHODS = 23; - - private static void methodCall(Scanner sc, int i) { - switch(i) { - case 0: sc.hasNext(); break; - case 1: sc.next(); break; - case 2: sc.hasNext(Pattern.compile("blah")); break; - case 3: sc.next(Pattern.compile("blah")); break; - case 4: sc.hasNextBoolean(); break; - case 5: sc.nextBoolean(); break; - case 6: sc.hasNextByte(); break; - case 7: sc.nextByte(); break; - case 8: sc.hasNextShort(); break; - case 9: sc.nextShort(); break; - case 10: sc.hasNextInt(); break; - case 11: sc.nextInt(); break; - case 12: sc.hasNextLong(); break; - case 13: sc.nextLong(); break; - case 14: sc.hasNextFloat(); break; - case 15: sc.nextFloat(); break; - case 16: sc.hasNextDouble(); break; - case 17: sc.nextDouble(); break; - case 18: sc.hasNextBigInteger(); break; - case 19: sc.nextBigInteger(); break; - case 20: sc.hasNextBigDecimal(); break; - case 21: sc.nextBigDecimal(); break; - case 22: sc.hasNextLine(); break; - default: - break; - } - } + static List> methodList = Arrays.asList( + Scanner::hasNext, + Scanner::next, + sc -> sc.hasNext(Pattern.compile("blah")), + sc -> sc.next(Pattern.compile("blah")), + Scanner::hasNextBoolean, + Scanner::nextBoolean, + Scanner::hasNextByte, + Scanner::nextByte, + Scanner::hasNextShort, + Scanner::nextShort, + Scanner::hasNextInt, + Scanner::nextInt, + Scanner::hasNextLong, + Scanner::nextLong, + Scanner::hasNextFloat, + Scanner::nextFloat, + Scanner::hasNextDouble, + Scanner::nextDouble, + Scanner::hasNextBigInteger, + Scanner::nextBigInteger, + Scanner::hasNextBigDecimal, + Scanner::nextBigDecimal, + Scanner::hasNextLine, + Scanner::tokens, + sc -> sc.findAll(Pattern.compile("blah")), + sc -> sc.findAll("blah") + ); public static void removeTest() throws Exception { Scanner sc = new Scanner("testing"); @@ -864,19 +869,20 @@ public static void fromFileTest() throws Exception { File f = new File(System.getProperty("test.src", "."), "input.txt"); - Scanner sc = new Scanner(f).useDelimiter("\n+"); - String testDataTag = sc.findWithinHorizon("fromFileTest", 0); - if (!testDataTag.equals("fromFileTest")) - failCount++; + try (Scanner sc = new Scanner(f)) { + sc.useDelimiter("\n+"); + String testDataTag = sc.findWithinHorizon("fromFileTest", 0); + if (!testDataTag.equals("fromFileTest")) + failCount++; - int count = 0; - while (sc.hasNextLong()) { - long blah = sc.nextLong(); - count++; + int count = 0; + while (sc.hasNextLong()) { + long blah = sc.nextLong(); + count++; + } + if (count != 7) + failCount++; } - if (count != 7) - failCount++; - sc.close(); report("From file"); } @@ -1472,14 +1478,72 @@ report("Reset test"); } + public static void tokensTest() { + List result = new Scanner("abc def ghi").tokens().collect(Collectors.toList()); + if (! result.equals(Arrays.asList("abc", "def", "ghi"))) { + System.out.println("not equals, failed"); + failCount++; + } + + result = new Scanner("###abc##def###ghi###j").useDelimiter("#+") + .tokens().collect(Collectors.toList()); + if (! result.equals(Arrays.asList("abc", "def", "ghi", "j"))) { + failCount++; + } + + result = new Scanner("abc,def,,ghi").useDelimiter(",") + .tokens().collect(Collectors.toList()); + if (! result.equals(Arrays.asList("abc", "def", "", "ghi"))) { + failCount++; + } + + report("Tokens test"); + } + + public static void findAllTest() throws Exception { + try (Stream str = new Scanner(inputFile).findAll("[A-Z]{7,}")) { + List result = str.map(MatchResult::group).collect(Collectors.toList()); + if (! result.equals(Arrays.asList("MYGROUP", "NODELAY", "ENCRYPTION"))) { + failCount++; + } + } + + report("FindAll test"); + } + + /* + * Test that closing the stream also closes the underlying Scanner. + * The cases of attempting to open streams on a closed Scanner are + * covered by closeTest(). + */ + public static void streamCloseTest() throws Exception { + Scanner sc; + + sc = new Scanner("xyzzy"); + sc.tokens().close(); + try { + sc.hasNext(); + failCount++; + } catch (IllegalStateException ise) { + // Correct result + } + + sc = new Scanner("xyzzy"); + sc.findAll("q").close(); + try { + sc.hasNext(); + failCount++; + } catch (IllegalStateException ise) { + // Correct result + } + + report("Streams Close test"); + } + private static void report(String testName) { - int spacesToAdd = 30 - testName.length(); - StringBuffer paddedNameBuffer = new StringBuffer(testName); - for (int i=0; i 0) failure = true; failCount = 0;