1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.io.*; 29 import java.math.*; 30 import java.nio.*; 31 import java.nio.channels.*; 32 import java.nio.charset.*; 33 import java.nio.file.Path; 34 import java.nio.file.Files; 35 import java.text.*; 36 import java.util.function.Consumer; 37 import java.util.regex.*; 38 import java.util.stream.Stream; 39 import java.util.stream.StreamSupport; 40 41 /** 42 * A simple text scanner which can parse primitive types and strings using 43 * regular expressions. 44 * 45 * <p>A {@code Scanner} breaks its input into tokens using a 46 * delimiter pattern, which by default matches whitespace. The resulting 47 * tokens may then be converted into values of different types using the 48 * various {@code next} methods. 49 * 50 * <p>For example, this code allows a user to read a number from 51 * {@code System.in}: 52 * <blockquote><pre>{@code 53 * Scanner sc = new Scanner(System.in); 54 * int i = sc.nextInt(); 55 * }</pre></blockquote> 56 * 57 * <p>As another example, this code allows {@code long} types to be 58 * assigned from entries in a file {@code myNumbers}: 59 * <blockquote><pre>{@code 60 * Scanner sc = new Scanner(new File("myNumbers")); 61 * while (sc.hasNextLong()) { 62 * long aLong = sc.nextLong(); 63 * } 64 * }</pre></blockquote> 65 * 66 * <p>The scanner can also use delimiters other than whitespace. This 67 * example reads several items in from a string: 68 * <blockquote><pre>{@code 69 * String input = "1 fish 2 fish red fish blue fish"; 70 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 71 * System.out.println(s.nextInt()); 72 * System.out.println(s.nextInt()); 73 * System.out.println(s.next()); 74 * System.out.println(s.next()); 75 * s.close(); 76 * }</pre></blockquote> 77 * <p> 78 * prints the following output: 79 * <blockquote><pre>{@code 80 * 1 81 * 2 82 * red 83 * blue 84 * }</pre></blockquote> 85 * 86 * <p>The same output can be generated with this code, which uses a regular 87 * expression to parse all four tokens at once: 88 * <blockquote><pre>{@code 89 * String input = "1 fish 2 fish red fish blue fish"; 90 * Scanner s = new Scanner(input); 91 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 92 * MatchResult result = s.match(); 93 * for (int i=1; i<=result.groupCount(); i++) 94 * System.out.println(result.group(i)); 95 * s.close(); 96 * }</pre></blockquote> 97 * 98 * <p>The <a name="default-delimiter">default whitespace delimiter</a> used 99 * by a scanner is as recognized by {@link Character#isWhitespace(char) 100 * Character.isWhitespace()}. The {@link #reset reset()} 101 * method will reset the value of the scanner's delimiter to the default 102 * whitespace delimiter regardless of whether it was previously changed. 103 * 104 * <p>A scanning operation may block waiting for input. 105 * 106 * <p>The {@link #next} and {@link #hasNext} methods and their 107 * companion methods (such as {@link #nextInt} and 108 * {@link #hasNextInt}) first skip any input that matches the delimiter 109 * pattern, and then attempt to return the next token. Both {@code hasNext()} 110 * and {@code next()} methods may block waiting for further input. Whether a 111 * {@code hasNext()} method blocks has no connection to whether or not its 112 * associated {@code next()} method will block. The {@link #tokens} method 113 * may also block waiting for input. 114 * 115 * <p>The {@link #findInLine findInLine()}, 116 * {@link #findWithinHorizon findWithinHorizon()}, 117 * {@link #skip skip()}, and {@link #findAll findAll()} 118 * methods operate independently of the delimiter pattern. These methods will 119 * attempt to match the specified pattern with no regard to delimiters in the 120 * input and thus can be used in special circumstances where delimiters are 121 * not relevant. These methods may block waiting for more input. 122 * 123 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 124 * will not pass the token that caused the exception, so that it may be 125 * retrieved or skipped via some other method. 126 * 127 * <p>Depending upon the type of delimiting pattern, empty tokens may be 128 * returned. For example, the pattern {@code "\\s+"} will return no empty 129 * tokens since it matches multiple instances of the delimiter. The delimiting 130 * pattern {@code "\\s"} could return empty tokens since it only passes one 131 * space at a time. 132 * 133 * <p> A scanner can read text from any object which implements the {@link 134 * java.lang.Readable} interface. If an invocation of the underlying 135 * readable's {@link java.lang.Readable#read read()} method throws an {@link 136 * java.io.IOException} then the scanner assumes that the end of the input 137 * has been reached. The most recent {@code IOException} thrown by the 138 * underlying readable can be retrieved via the {@link #ioException} method. 139 * 140 * <p>When a {@code Scanner} is closed, it will close its input source 141 * if the source implements the {@link java.io.Closeable} interface. 142 * 143 * <p>A {@code Scanner} is not safe for multithreaded use without 144 * external synchronization. 145 * 146 * <p>Unless otherwise mentioned, passing a {@code null} parameter into 147 * any method of a {@code Scanner} will cause a 148 * {@code NullPointerException} to be thrown. 149 * 150 * <p>A scanner will default to interpreting numbers as decimal unless a 151 * different radix has been set by using the {@link #useRadix} method. The 152 * {@link #reset} method will reset the value of the scanner's radix to 153 * {@code 10} regardless of whether it was previously changed. 154 * 155 * <h3> <a name="localized-numbers">Localized numbers</a> </h3> 156 * 157 * <p> An instance of this class is capable of scanning numbers in the standard 158 * formats as well as in the formats of the scanner's locale. A scanner's 159 * <a name="initial-locale">initial locale </a>is the value returned by the {@link 160 * java.util.Locale#getDefault(Locale.Category) 161 * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link 162 * #useLocale useLocale()} method. The {@link #reset} method will reset the value of the 163 * scanner's locale to the initial locale regardless of whether it was 164 * previously changed. 165 * 166 * <p>The localized formats are defined in terms of the following parameters, 167 * which for a particular locale are taken from that locale's {@link 168 * java.text.DecimalFormat DecimalFormat} object, {@code df}, and its and 169 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 170 * {@code dfs}. 171 * 172 * <blockquote><dl> 173 * <dt><i>LocalGroupSeparator </i> 174 * <dd>The character used to separate thousands groups, 175 * <i>i.e.,</i> {@code dfs.}{@link 176 * java.text.DecimalFormatSymbols#getGroupingSeparator 177 * getGroupingSeparator()} 178 * <dt><i>LocalDecimalSeparator </i> 179 * <dd>The character used for the decimal point, 180 * <i>i.e.,</i> {@code dfs.}{@link 181 * java.text.DecimalFormatSymbols#getDecimalSeparator 182 * getDecimalSeparator()} 183 * <dt><i>LocalPositivePrefix </i> 184 * <dd>The string that appears before a positive number (may 185 * be empty), <i>i.e.,</i> {@code df.}{@link 186 * java.text.DecimalFormat#getPositivePrefix 187 * getPositivePrefix()} 188 * <dt><i>LocalPositiveSuffix </i> 189 * <dd>The string that appears after a positive number (may be 190 * empty), <i>i.e.,</i> {@code df.}{@link 191 * java.text.DecimalFormat#getPositiveSuffix 192 * getPositiveSuffix()} 193 * <dt><i>LocalNegativePrefix </i> 194 * <dd>The string that appears before a negative number (may 195 * be empty), <i>i.e.,</i> {@code df.}{@link 196 * java.text.DecimalFormat#getNegativePrefix 197 * getNegativePrefix()} 198 * <dt><i>LocalNegativeSuffix </i> 199 * <dd>The string that appears after a negative number (may be 200 * empty), <i>i.e.,</i> {@code df.}{@link 201 * java.text.DecimalFormat#getNegativeSuffix 202 * getNegativeSuffix()} 203 * <dt><i>LocalNaN </i> 204 * <dd>The string that represents not-a-number for 205 * floating-point values, 206 * <i>i.e.,</i> {@code dfs.}{@link 207 * java.text.DecimalFormatSymbols#getNaN 208 * getNaN()} 209 * <dt><i>LocalInfinity </i> 210 * <dd>The string that represents infinity for floating-point 211 * values, <i>i.e.,</i> {@code dfs.}{@link 212 * java.text.DecimalFormatSymbols#getInfinity 213 * getInfinity()} 214 * </dl></blockquote> 215 * 216 * <h4> <a name="number-syntax">Number syntax</a> </h4> 217 * 218 * <p> The strings that can be parsed as numbers by an instance of this class 219 * are specified in terms of the following regular-expression grammar, where 220 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 in base 10). 221 * 222 * <dl> 223 * <dt><i>NonAsciiDigit</i>: 224 * <dd>A non-ASCII character c for which 225 * {@link java.lang.Character#isDigit Character.isDigit}{@code (c)} 226 * returns true 227 * 228 * <dt><i>Non0Digit</i>: 229 * <dd>{@code [1-}<i>Rmax</i>{@code ] | }<i>NonASCIIDigit</i> 230 * 231 * <dt><i>Digit</i>: 232 * <dd>{@code [0-}<i>Rmax</i>{@code ] | }<i>NonASCIIDigit</i> 233 * 234 * <dt><i>GroupedNumeral</i>: 235 * <dd><code>( </code><i>Non0Digit</i> 236 * <i>Digit</i>{@code ? 237 * }<i>Digit</i>{@code ?} 238 * <dd> <code>( </code><i>LocalGroupSeparator</i> 239 * <i>Digit</i> 240 * <i>Digit</i> 241 * <i>Digit</i>{@code )+ )} 242 * 243 * <dt><i>Numeral</i>: 244 * <dd>{@code ( ( }<i>Digit</i>{@code + ) 245 * | }<i>GroupedNumeral</i>{@code )} 246 * 247 * <dt><a name="Integer-regex"><i>Integer</i>:</a> 248 * <dd>{@code ( [-+]? ( }<i>Numeral</i>{@code 249 * ) )} 250 * <dd>{@code | }<i>LocalPositivePrefix</i> <i>Numeral</i> 251 * <i>LocalPositiveSuffix</i> 252 * <dd>{@code | }<i>LocalNegativePrefix</i> <i>Numeral</i> 253 * <i>LocalNegativeSuffix</i> 254 * 255 * <dt><i>DecimalNumeral</i>: 256 * <dd><i>Numeral</i> 257 * <dd>{@code | }<i>Numeral</i> 258 * <i>LocalDecimalSeparator</i> 259 * <i>Digit</i>{@code *} 260 * <dd>{@code | }<i>LocalDecimalSeparator</i> 261 * <i>Digit</i>{@code +} 262 * 263 * <dt><i>Exponent</i>: 264 * <dd>{@code ( [eE] [+-]? }<i>Digit</i>{@code + )} 265 * 266 * <dt><a name="Decimal-regex"><i>Decimal</i>:</a> 267 * <dd>{@code ( [-+]? }<i>DecimalNumeral</i> 268 * <i>Exponent</i>{@code ? )} 269 * <dd>{@code | }<i>LocalPositivePrefix</i> 270 * <i>DecimalNumeral</i> 271 * <i>LocalPositiveSuffix</i> 272 * <i>Exponent</i>{@code ?} 273 * <dd>{@code | }<i>LocalNegativePrefix</i> 274 * <i>DecimalNumeral</i> 275 * <i>LocalNegativeSuffix</i> 276 * <i>Exponent</i>{@code ?} 277 * 278 * <dt><i>HexFloat</i>: 279 * <dd>{@code [-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 280 * ([pP][-+]?[0-9]+)?} 281 * 282 * <dt><i>NonNumber</i>: 283 * <dd>{@code NaN 284 * | }<i>LocalNan</i>{@code 285 * | Infinity 286 * | }<i>LocalInfinity</i> 287 * 288 * <dt><i>SignedNonNumber</i>: 289 * <dd>{@code ( [-+]? }<i>NonNumber</i>{@code )} 290 * <dd>{@code | }<i>LocalPositivePrefix</i> 291 * <i>NonNumber</i> 292 * <i>LocalPositiveSuffix</i> 293 * <dd>{@code | }<i>LocalNegativePrefix</i> 294 * <i>NonNumber</i> 295 * <i>LocalNegativeSuffix</i> 296 * 297 * <dt><a name="Float-regex"><i>Float</i></a>: 298 * <dd><i>Decimal</i> 299 * {@code | }<i>HexFloat</i> 300 * {@code | }<i>SignedNonNumber</i> 301 * 302 * </dl> 303 * <p>Whitespace is not significant in the above regular expressions. 304 * 305 * @since 1.5 306 */ 307 public final class Scanner implements Iterator<String>, Closeable { 308 309 // Internal buffer used to hold input 310 private CharBuffer buf; 311 312 // Size of internal character buffer 313 private static final int BUFFER_SIZE = 1024; // change to 1024; 314 315 // The index into the buffer currently held by the Scanner 316 private int position; 317 318 // Internal matcher used for finding delimiters 319 private Matcher matcher; 320 321 // Pattern used to delimit tokens 322 private Pattern delimPattern; 323 324 // Pattern found in last hasNext operation 325 private Pattern hasNextPattern; 326 327 // Position after last hasNext operation 328 private int hasNextPosition; 329 330 // Result after last hasNext operation 331 private String hasNextResult; 332 333 // The input source 334 private Readable source; 335 336 // Boolean is true if source is done 337 private boolean sourceClosed = false; 338 339 // Boolean indicating more input is required 340 private boolean needInput = false; 341 342 // Boolean indicating if a delim has been skipped this operation 343 private boolean skipped = false; 344 345 // A store of a position that the scanner may fall back to 346 private int savedScannerPosition = -1; 347 348 // A cache of the last primitive type scanned 349 private Object typeCache = null; 350 351 // Boolean indicating if a match result is available 352 private boolean matchValid = false; 353 354 // Boolean indicating if this scanner has been closed 355 private boolean closed = false; 356 357 // The current radix used by this scanner 358 private int radix = 10; 359 360 // The default radix for this scanner 361 private int defaultRadix = 10; 362 363 // The locale used by this scanner 364 private Locale locale = null; 365 366 // A cache of the last few recently used Patterns 367 private PatternLRUCache patternCache = new PatternLRUCache(7); 368 369 // A holder of the last IOException encountered 370 private IOException lastException; 371 372 // Number of times this scanner's state has been modified. 373 // Generally incremented on most public APIs and checked 374 // within spliterator implementations. 375 int modCount; 376 377 // A pattern for java whitespace 378 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 379 "\\p{javaWhitespace}+"); 380 381 // A pattern for any token 382 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 383 384 // A pattern for non-ASCII digits 385 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 386 "[\\p{javaDigit}&&[^0-9]]"); 387 388 // Fields and methods to support scanning primitive types 389 390 /** 391 * Locale dependent values used to scan numbers 392 */ 393 private String groupSeparator = "\\,"; 394 private String decimalSeparator = "\\."; 395 private String nanString = "NaN"; 396 private String infinityString = "Infinity"; 397 private String positivePrefix = ""; 398 private String negativePrefix = "\\-"; 399 private String positiveSuffix = ""; 400 private String negativeSuffix = ""; 401 402 /** 403 * Fields and an accessor method to match booleans 404 */ 405 private static volatile Pattern boolPattern; 406 private static final String BOOLEAN_PATTERN = "true|false"; 407 private static Pattern boolPattern() { 408 Pattern bp = boolPattern; 409 if (bp == null) 410 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 411 Pattern.CASE_INSENSITIVE); 412 return bp; 413 } 414 415 /** 416 * Fields and methods to match bytes, shorts, ints, and longs 417 */ 418 private Pattern integerPattern; 419 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 420 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 421 private int SIMPLE_GROUP_INDEX = 5; 422 private String buildIntegerPatternString() { 423 String radixDigits = digits.substring(0, radix); 424 // \\p{javaDigit} is not guaranteed to be appropriate 425 // here but what can we do? The final authority will be 426 // whatever parse method is invoked, so ultimately the 427 // Scanner will do the right thing 428 String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})"; 429 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 430 groupSeparator+digit+digit+digit+")+)"; 431 // digit++ is the possessive form which is necessary for reducing 432 // backtracking that would otherwise cause unacceptable performance 433 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 434 String javaStyleInteger = "([-+]?(" + numeral + "))"; 435 String negativeInteger = negativePrefix + numeral + negativeSuffix; 436 String positiveInteger = positivePrefix + numeral + positiveSuffix; 437 return "("+ javaStyleInteger + ")|(" + 438 positiveInteger + ")|(" + 439 negativeInteger + ")"; 440 } 441 private Pattern integerPattern() { 442 if (integerPattern == null) { 443 integerPattern = patternCache.forName(buildIntegerPatternString()); 444 } 445 return integerPattern; 446 } 447 448 /** 449 * Fields and an accessor method to match line separators 450 */ 451 private static volatile Pattern separatorPattern; 452 private static volatile Pattern linePattern; 453 private static final String LINE_SEPARATOR_PATTERN = 454 "\r\n|[\n\r\u2028\u2029\u0085]"; 455 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 456 457 private static Pattern separatorPattern() { 458 Pattern sp = separatorPattern; 459 if (sp == null) 460 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 461 return sp; 462 } 463 464 private static Pattern linePattern() { 465 Pattern lp = linePattern; 466 if (lp == null) 467 linePattern = lp = Pattern.compile(LINE_PATTERN); 468 return lp; 469 } 470 471 /** 472 * Fields and methods to match floats and doubles 473 */ 474 private Pattern floatPattern; 475 private Pattern decimalPattern; 476 private void buildFloatAndDecimalPattern() { 477 // \\p{javaDigit} may not be perfect, see above 478 String digit = "([0-9]|(\\p{javaDigit}))"; 479 String exponent = "([eE][+-]?"+digit+"+)?"; 480 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 481 groupSeparator+digit+digit+digit+")+)"; 482 // Once again digit++ is used for performance, as above 483 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 484 String decimalNumeral = "("+numeral+"|"+numeral + 485 decimalSeparator + digit + "*+|"+ decimalSeparator + 486 digit + "++)"; 487 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 488 infinityString+")"; 489 String positiveFloat = "(" + positivePrefix + decimalNumeral + 490 positiveSuffix + exponent + ")"; 491 String negativeFloat = "(" + negativePrefix + decimalNumeral + 492 negativeSuffix + exponent + ")"; 493 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 494 positiveFloat + "|" + negativeFloat + ")"; 495 String hexFloat = 496 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 497 String positiveNonNumber = "(" + positivePrefix + nonNumber + 498 positiveSuffix + ")"; 499 String negativeNonNumber = "(" + negativePrefix + nonNumber + 500 negativeSuffix + ")"; 501 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 502 positiveNonNumber + "|" + 503 negativeNonNumber + ")"; 504 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 505 signedNonNumber); 506 decimalPattern = Pattern.compile(decimal); 507 } 508 private Pattern floatPattern() { 509 if (floatPattern == null) { 510 buildFloatAndDecimalPattern(); 511 } 512 return floatPattern; 513 } 514 private Pattern decimalPattern() { 515 if (decimalPattern == null) { 516 buildFloatAndDecimalPattern(); 517 } 518 return decimalPattern; 519 } 520 521 // Constructors 522 523 /** 524 * Constructs a {@code Scanner} that returns values scanned 525 * from the specified source delimited by the specified pattern. 526 * 527 * @param source A character source implementing the Readable interface 528 * @param pattern A delimiting pattern 529 */ 530 private Scanner(Readable source, Pattern pattern) { 531 assert source != null : "source should not be null"; 532 assert pattern != null : "pattern should not be null"; 533 this.source = source; 534 delimPattern = pattern; 535 buf = CharBuffer.allocate(BUFFER_SIZE); 536 buf.limit(0); 537 matcher = delimPattern.matcher(buf); 538 matcher.useTransparentBounds(true); 539 matcher.useAnchoringBounds(false); 540 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 541 } 542 543 /** 544 * Constructs a new {@code Scanner} that produces values scanned 545 * from the specified source. 546 * 547 * @param source A character source implementing the {@link Readable} 548 * interface 549 */ 550 public Scanner(Readable source) { 551 this(Objects.requireNonNull(source, "source"), WHITESPACE_PATTERN); 552 } 553 554 /** 555 * Constructs a new {@code Scanner} that produces values scanned 556 * from the specified input stream. Bytes from the stream are converted 557 * into characters using the underlying platform's 558 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 559 * 560 * @param source An input stream to be scanned 561 */ 562 public Scanner(InputStream source) { 563 this(new InputStreamReader(source), WHITESPACE_PATTERN); 564 } 565 566 /** 567 * Constructs a new {@code Scanner} that produces values scanned 568 * from the specified input stream. Bytes from the stream are converted 569 * into characters using the specified charset. 570 * 571 * @param source An input stream to be scanned 572 * @param charsetName The encoding type used to convert bytes from the 573 * stream into characters to be scanned 574 * @throws IllegalArgumentException if the specified character set 575 * does not exist 576 */ 577 public Scanner(InputStream source, String charsetName) { 578 this(makeReadable(Objects.requireNonNull(source, "source"), toCharset(charsetName)), 579 WHITESPACE_PATTERN); 580 } 581 582 /** 583 * Returns a charset object for the given charset name. 584 * @throws NullPointerException is csn is null 585 * @throws IllegalArgumentException if the charset is not supported 586 */ 587 private static Charset toCharset(String csn) { 588 Objects.requireNonNull(csn, "charsetName"); 589 try { 590 return Charset.forName(csn); 591 } catch (IllegalCharsetNameException|UnsupportedCharsetException e) { 592 // IllegalArgumentException should be thrown 593 throw new IllegalArgumentException(e); 594 } 595 } 596 597 private static Readable makeReadable(InputStream source, Charset charset) { 598 return new InputStreamReader(source, charset); 599 } 600 601 /** 602 * Constructs a new {@code Scanner} that produces values scanned 603 * from the specified file. Bytes from the file are converted into 604 * characters using the underlying platform's 605 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 606 * 607 * @param source A file to be scanned 608 * @throws FileNotFoundException if source is not found 609 */ 610 public Scanner(File source) throws FileNotFoundException { 611 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 612 } 613 614 /** 615 * Constructs a new {@code Scanner} that produces values scanned 616 * from the specified file. Bytes from the file are converted into 617 * characters using the specified charset. 618 * 619 * @param source A file to be scanned 620 * @param charsetName The encoding type used to convert bytes from the file 621 * into characters to be scanned 622 * @throws FileNotFoundException if source is not found 623 * @throws IllegalArgumentException if the specified encoding is 624 * not found 625 */ 626 public Scanner(File source, String charsetName) 627 throws FileNotFoundException 628 { 629 this(Objects.requireNonNull(source), toDecoder(charsetName)); 630 } 631 632 private Scanner(File source, CharsetDecoder dec) 633 throws FileNotFoundException 634 { 635 this(makeReadable((ReadableByteChannel)(new FileInputStream(source).getChannel()), dec)); 636 } 637 638 private static CharsetDecoder toDecoder(String charsetName) { 639 Objects.requireNonNull(charsetName, "charsetName"); 640 try { 641 return Charset.forName(charsetName).newDecoder(); 642 } catch (IllegalCharsetNameException|UnsupportedCharsetException unused) { 643 throw new IllegalArgumentException(charsetName); 644 } 645 } 646 647 private static Readable makeReadable(ReadableByteChannel source, 648 CharsetDecoder dec) { 649 return Channels.newReader(source, dec, -1); 650 } 651 652 /** 653 * Constructs a new {@code Scanner} that produces values scanned 654 * from the specified file. Bytes from the file are converted into 655 * characters using the underlying platform's 656 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 657 * 658 * @param source 659 * the path to the file to be scanned 660 * @throws IOException 661 * if an I/O error occurs opening source 662 * 663 * @since 1.7 664 */ 665 public Scanner(Path source) 666 throws IOException 667 { 668 this(Files.newInputStream(source)); 669 } 670 671 /** 672 * Constructs a new {@code Scanner} that produces values scanned 673 * from the specified file. Bytes from the file are converted into 674 * characters using the specified charset. 675 * 676 * @param source 677 * the path to the file to be scanned 678 * @param charsetName 679 * The encoding type used to convert bytes from the file 680 * into characters to be scanned 681 * @throws IOException 682 * if an I/O error occurs opening source 683 * @throws IllegalArgumentException 684 * if the specified encoding is not found 685 * @since 1.7 686 */ 687 public Scanner(Path source, String charsetName) throws IOException { 688 this(Objects.requireNonNull(source), toCharset(charsetName)); 689 } 690 691 private Scanner(Path source, Charset charset) throws IOException { 692 this(makeReadable(Files.newInputStream(source), charset)); 693 } 694 695 /** 696 * Constructs a new {@code Scanner} that produces values scanned 697 * from the specified string. 698 * 699 * @param source A string to scan 700 */ 701 public Scanner(String source) { 702 this(new StringReader(source), WHITESPACE_PATTERN); 703 } 704 705 /** 706 * Constructs a new {@code Scanner} that produces values scanned 707 * from the specified channel. Bytes from the source are converted into 708 * characters using the underlying platform's 709 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 710 * 711 * @param source A channel to scan 712 */ 713 public Scanner(ReadableByteChannel source) { 714 this(makeReadable(Objects.requireNonNull(source, "source")), 715 WHITESPACE_PATTERN); 716 } 717 718 private static Readable makeReadable(ReadableByteChannel source) { 719 return makeReadable(source, Charset.defaultCharset().newDecoder()); 720 } 721 722 /** 723 * Constructs a new {@code Scanner} that produces values scanned 724 * from the specified channel. Bytes from the source are converted into 725 * characters using the specified charset. 726 * 727 * @param source A channel to scan 728 * @param charsetName The encoding type used to convert bytes from the 729 * channel into characters to be scanned 730 * @throws IllegalArgumentException if the specified character set 731 * does not exist 732 */ 733 public Scanner(ReadableByteChannel source, String charsetName) { 734 this(makeReadable(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), 735 WHITESPACE_PATTERN); 736 } 737 738 // Private primitives used to support scanning 739 740 private void saveState() { 741 savedScannerPosition = position; 742 } 743 744 private void revertState() { 745 this.position = savedScannerPosition; 746 savedScannerPosition = -1; 747 skipped = false; 748 } 749 750 private boolean revertState(boolean b) { 751 this.position = savedScannerPosition; 752 savedScannerPosition = -1; 753 skipped = false; 754 return b; 755 } 756 757 private void cacheResult() { 758 hasNextResult = matcher.group(); 759 hasNextPosition = matcher.end(); 760 hasNextPattern = matcher.pattern(); 761 } 762 763 private void cacheResult(String result) { 764 hasNextResult = result; 765 hasNextPosition = matcher.end(); 766 hasNextPattern = matcher.pattern(); 767 } 768 769 // Clears both regular cache and type cache 770 private void clearCaches() { 771 hasNextPattern = null; 772 typeCache = null; 773 } 774 775 // Also clears both the regular cache and the type cache 776 private String getCachedResult() { 777 position = hasNextPosition; 778 hasNextPattern = null; 779 typeCache = null; 780 return hasNextResult; 781 } 782 783 // Also clears both the regular cache and the type cache 784 private void useTypeCache() { 785 if (closed) 786 throw new IllegalStateException("Scanner closed"); 787 position = hasNextPosition; 788 hasNextPattern = null; 789 typeCache = null; 790 } 791 792 // Tries to read more input. May block. 793 private void readInput() { 794 if (buf.limit() == buf.capacity()) 795 makeSpace(); 796 797 // Prepare to receive data 798 int p = buf.position(); 799 buf.position(buf.limit()); 800 buf.limit(buf.capacity()); 801 802 int n = 0; 803 try { 804 n = source.read(buf); 805 } catch (IOException ioe) { 806 lastException = ioe; 807 n = -1; 808 } 809 810 if (n == -1) { 811 sourceClosed = true; 812 needInput = false; 813 } 814 815 if (n > 0) 816 needInput = false; 817 818 // Restore current position and limit for reading 819 buf.limit(buf.position()); 820 buf.position(p); 821 } 822 823 // After this method is called there will either be an exception 824 // or else there will be space in the buffer 825 private boolean makeSpace() { 826 clearCaches(); 827 int offset = savedScannerPosition == -1 ? 828 position : savedScannerPosition; 829 buf.position(offset); 830 // Gain space by compacting buffer 831 if (offset > 0) { 832 buf.compact(); 833 translateSavedIndexes(offset); 834 position -= offset; 835 buf.flip(); 836 return true; 837 } 838 // Gain space by growing buffer 839 int newSize = buf.capacity() * 2; 840 CharBuffer newBuf = CharBuffer.allocate(newSize); 841 newBuf.put(buf); 842 newBuf.flip(); 843 translateSavedIndexes(offset); 844 position -= offset; 845 buf = newBuf; 846 matcher.reset(buf); 847 return true; 848 } 849 850 // When a buffer compaction/reallocation occurs the saved indexes must 851 // be modified appropriately 852 private void translateSavedIndexes(int offset) { 853 if (savedScannerPosition != -1) 854 savedScannerPosition -= offset; 855 } 856 857 // If we are at the end of input then NoSuchElement; 858 // If there is still input left then InputMismatch 859 private void throwFor() { 860 skipped = false; 861 if ((sourceClosed) && (position == buf.limit())) 862 throw new NoSuchElementException(); 863 else 864 throw new InputMismatchException(); 865 } 866 867 // Returns true if a complete token or partial token is in the buffer. 868 // It is not necessary to find a complete token since a partial token 869 // means that there will be another token with or without more input. 870 private boolean hasTokenInBuffer() { 871 matchValid = false; 872 matcher.usePattern(delimPattern); 873 matcher.region(position, buf.limit()); 874 875 // Skip delims first 876 if (matcher.lookingAt()) 877 position = matcher.end(); 878 879 // If we are sitting at the end, no more tokens in buffer 880 if (position == buf.limit()) 881 return false; 882 883 return true; 884 } 885 886 /* 887 * Returns a "complete token" that matches the specified pattern 888 * 889 * A token is complete if surrounded by delims; a partial token 890 * is prefixed by delims but not postfixed by them 891 * 892 * The position is advanced to the end of that complete token 893 * 894 * Pattern == null means accept any token at all 895 * 896 * Triple return: 897 * 1. valid string means it was found 898 * 2. null with needInput=false means we won't ever find it 899 * 3. null with needInput=true means try again after readInput 900 */ 901 private String getCompleteTokenInBuffer(Pattern pattern) { 902 matchValid = false; 903 904 // Skip delims first 905 matcher.usePattern(delimPattern); 906 if (!skipped) { // Enforcing only one skip of leading delims 907 matcher.region(position, buf.limit()); 908 if (matcher.lookingAt()) { 909 // If more input could extend the delimiters then we must wait 910 // for more input 911 if (matcher.hitEnd() && !sourceClosed) { 912 needInput = true; 913 return null; 914 } 915 // The delims were whole and the matcher should skip them 916 skipped = true; 917 position = matcher.end(); 918 } 919 } 920 921 // If we are sitting at the end, no more tokens in buffer 922 if (position == buf.limit()) { 923 if (sourceClosed) 924 return null; 925 needInput = true; 926 return null; 927 } 928 929 // Must look for next delims. Simply attempting to match the 930 // pattern at this point may find a match but it might not be 931 // the first longest match because of missing input, or it might 932 // match a partial token instead of the whole thing. 933 934 // Then look for next delims 935 matcher.region(position, buf.limit()); 936 boolean foundNextDelim = matcher.find(); 937 if (foundNextDelim && (matcher.end() == position)) { 938 // Zero length delimiter match; we should find the next one 939 // using the automatic advance past a zero length match; 940 // Otherwise we have just found the same one we just skipped 941 foundNextDelim = matcher.find(); 942 } 943 if (foundNextDelim) { 944 // In the rare case that more input could cause the match 945 // to be lost and there is more input coming we must wait 946 // for more input. Note that hitting the end is okay as long 947 // as the match cannot go away. It is the beginning of the 948 // next delims we want to be sure about, we don't care if 949 // they potentially extend further. 950 if (matcher.requireEnd() && !sourceClosed) { 951 needInput = true; 952 return null; 953 } 954 int tokenEnd = matcher.start(); 955 // There is a complete token. 956 if (pattern == null) { 957 // Must continue with match to provide valid MatchResult 958 pattern = FIND_ANY_PATTERN; 959 } 960 // Attempt to match against the desired pattern 961 matcher.usePattern(pattern); 962 matcher.region(position, tokenEnd); 963 if (matcher.matches()) { 964 String s = matcher.group(); 965 position = matcher.end(); 966 return s; 967 } else { // Complete token but it does not match 968 return null; 969 } 970 } 971 972 // If we can't find the next delims but no more input is coming, 973 // then we can treat the remainder as a whole token 974 if (sourceClosed) { 975 if (pattern == null) { 976 // Must continue with match to provide valid MatchResult 977 pattern = FIND_ANY_PATTERN; 978 } 979 // Last token; Match the pattern here or throw 980 matcher.usePattern(pattern); 981 matcher.region(position, buf.limit()); 982 if (matcher.matches()) { 983 String s = matcher.group(); 984 position = matcher.end(); 985 return s; 986 } 987 // Last piece does not match 988 return null; 989 } 990 991 // There is a partial token in the buffer; must read more 992 // to complete it 993 needInput = true; 994 return null; 995 } 996 997 // Finds the specified pattern in the buffer up to horizon. 998 // Returns true if the specified input pattern was matched, 999 // and leaves the matcher field with the current match state. 1000 private boolean findPatternInBuffer(Pattern pattern, int horizon) { 1001 matchValid = false; 1002 matcher.usePattern(pattern); 1003 int bufferLimit = buf.limit(); 1004 int horizonLimit = -1; 1005 int searchLimit = bufferLimit; 1006 if (horizon > 0) { 1007 horizonLimit = position + horizon; 1008 if (horizonLimit < bufferLimit) 1009 searchLimit = horizonLimit; 1010 } 1011 matcher.region(position, searchLimit); 1012 if (matcher.find()) { 1013 if (matcher.hitEnd() && (!sourceClosed)) { 1014 // The match may be longer if didn't hit horizon or real end 1015 if (searchLimit != horizonLimit) { 1016 // Hit an artificial end; try to extend the match 1017 needInput = true; 1018 return false; 1019 } 1020 // The match could go away depending on what is next 1021 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1022 // Rare case: we hit the end of input and it happens 1023 // that it is at the horizon and the end of input is 1024 // required for the match. 1025 needInput = true; 1026 return false; 1027 } 1028 } 1029 // Did not hit end, or hit real end, or hit horizon 1030 position = matcher.end(); 1031 return true; 1032 } 1033 1034 if (sourceClosed) 1035 return false; 1036 1037 // If there is no specified horizon, or if we have not searched 1038 // to the specified horizon yet, get more input 1039 if ((horizon == 0) || (searchLimit != horizonLimit)) 1040 needInput = true; 1041 return false; 1042 } 1043 1044 // Attempts to match a pattern anchored at the current position. 1045 // Returns true if the specified input pattern was matched, 1046 // and leaves the matcher field with the current match state. 1047 private boolean matchPatternInBuffer(Pattern pattern) { 1048 matchValid = false; 1049 matcher.usePattern(pattern); 1050 matcher.region(position, buf.limit()); 1051 if (matcher.lookingAt()) { 1052 if (matcher.hitEnd() && (!sourceClosed)) { 1053 // Get more input and try again 1054 needInput = true; 1055 return false; 1056 } 1057 position = matcher.end(); 1058 return true; 1059 } 1060 1061 if (sourceClosed) 1062 return false; 1063 1064 // Read more to find pattern 1065 needInput = true; 1066 return false; 1067 } 1068 1069 // Throws if the scanner is closed 1070 private void ensureOpen() { 1071 if (closed) 1072 throw new IllegalStateException("Scanner closed"); 1073 } 1074 1075 // Public methods 1076 1077 /** 1078 * Closes this scanner. 1079 * 1080 * <p> If this scanner has not yet been closed then if its underlying 1081 * {@linkplain java.lang.Readable readable} also implements the {@link 1082 * java.io.Closeable} interface then the readable's {@code close} method 1083 * will be invoked. If this scanner is already closed then invoking this 1084 * method will have no effect. 1085 * 1086 * <p>Attempting to perform search operations after a scanner has 1087 * been closed will result in an {@link IllegalStateException}. 1088 * 1089 */ 1090 public void close() { 1091 if (closed) 1092 return; 1093 if (source instanceof Closeable) { 1094 try { 1095 ((Closeable)source).close(); 1096 } catch (IOException ioe) { 1097 lastException = ioe; 1098 } 1099 } 1100 sourceClosed = true; 1101 source = null; 1102 closed = true; 1103 } 1104 1105 /** 1106 * Returns the {@code IOException} last thrown by this 1107 * {@code Scanner}'s underlying {@code Readable}. This method 1108 * returns {@code null} if no such exception exists. 1109 * 1110 * @return the last exception thrown by this scanner's readable 1111 */ 1112 public IOException ioException() { 1113 return lastException; 1114 } 1115 1116 /** 1117 * Returns the {@code Pattern} this {@code Scanner} is currently 1118 * using to match delimiters. 1119 * 1120 * @return this scanner's delimiting pattern. 1121 */ 1122 public Pattern delimiter() { 1123 return delimPattern; 1124 } 1125 1126 /** 1127 * Sets this scanner's delimiting pattern to the specified pattern. 1128 * 1129 * @param pattern A delimiting pattern 1130 * @return this scanner 1131 */ 1132 public Scanner useDelimiter(Pattern pattern) { 1133 modCount++; 1134 delimPattern = pattern; 1135 return this; 1136 } 1137 1138 /** 1139 * Sets this scanner's delimiting pattern to a pattern constructed from 1140 * the specified {@code String}. 1141 * 1142 * <p> An invocation of this method of the form 1143 * {@code useDelimiter(pattern)} behaves in exactly the same way as the 1144 * invocation {@code useDelimiter(Pattern.compile(pattern))}. 1145 * 1146 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1147 * to the <a href= "#default-delimiter">default</a>. 1148 * 1149 * @param pattern A string specifying a delimiting pattern 1150 * @return this scanner 1151 */ 1152 public Scanner useDelimiter(String pattern) { 1153 modCount++; 1154 delimPattern = patternCache.forName(pattern); 1155 return this; 1156 } 1157 1158 /** 1159 * Returns this scanner's locale. 1160 * 1161 * <p>A scanner's locale affects many elements of its default 1162 * primitive matching regular expressions; see 1163 * <a href= "#localized-numbers">localized numbers</a> above. 1164 * 1165 * @return this scanner's locale 1166 */ 1167 public Locale locale() { 1168 return this.locale; 1169 } 1170 1171 /** 1172 * Sets this scanner's locale to the specified locale. 1173 * 1174 * <p>A scanner's locale affects many elements of its default 1175 * primitive matching regular expressions; see 1176 * <a href= "#localized-numbers">localized numbers</a> above. 1177 * 1178 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1179 * the <a href= "#initial-locale">initial locale</a>. 1180 * 1181 * @param locale A string specifying the locale to use 1182 * @return this scanner 1183 */ 1184 public Scanner useLocale(Locale locale) { 1185 if (locale.equals(this.locale)) 1186 return this; 1187 1188 modCount++; 1189 this.locale = locale; 1190 DecimalFormat df = 1191 (DecimalFormat)NumberFormat.getNumberInstance(locale); 1192 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1193 1194 // These must be literalized to avoid collision with regex 1195 // metacharacters such as dot or parenthesis 1196 groupSeparator = "\\" + dfs.getGroupingSeparator(); 1197 decimalSeparator = "\\" + dfs.getDecimalSeparator(); 1198 1199 // Quoting the nonzero length locale-specific things 1200 // to avoid potential conflict with metacharacters 1201 nanString = "\\Q" + dfs.getNaN() + "\\E"; 1202 infinityString = "\\Q" + dfs.getInfinity() + "\\E"; 1203 positivePrefix = df.getPositivePrefix(); 1204 if (positivePrefix.length() > 0) 1205 positivePrefix = "\\Q" + positivePrefix + "\\E"; 1206 negativePrefix = df.getNegativePrefix(); 1207 if (negativePrefix.length() > 0) 1208 negativePrefix = "\\Q" + negativePrefix + "\\E"; 1209 positiveSuffix = df.getPositiveSuffix(); 1210 if (positiveSuffix.length() > 0) 1211 positiveSuffix = "\\Q" + positiveSuffix + "\\E"; 1212 negativeSuffix = df.getNegativeSuffix(); 1213 if (negativeSuffix.length() > 0) 1214 negativeSuffix = "\\Q" + negativeSuffix + "\\E"; 1215 1216 // Force rebuilding and recompilation of locale dependent 1217 // primitive patterns 1218 integerPattern = null; 1219 floatPattern = null; 1220 1221 return this; 1222 } 1223 1224 /** 1225 * Returns this scanner's default radix. 1226 * 1227 * <p>A scanner's radix affects elements of its default 1228 * number matching regular expressions; see 1229 * <a href= "#localized-numbers">localized numbers</a> above. 1230 * 1231 * @return the default radix of this scanner 1232 */ 1233 public int radix() { 1234 return this.defaultRadix; 1235 } 1236 1237 /** 1238 * Sets this scanner's default radix to the specified radix. 1239 * 1240 * <p>A scanner's radix affects elements of its default 1241 * number matching regular expressions; see 1242 * <a href= "#localized-numbers">localized numbers</a> above. 1243 * 1244 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1245 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1246 * {@code IllegalArgumentException} is thrown. 1247 * 1248 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1249 * {@code 10}. 1250 * 1251 * @param radix The radix to use when scanning numbers 1252 * @return this scanner 1253 * @throws IllegalArgumentException if radix is out of range 1254 */ 1255 public Scanner useRadix(int radix) { 1256 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1257 throw new IllegalArgumentException("radix:"+radix); 1258 1259 if (this.defaultRadix == radix) 1260 return this; 1261 modCount++; 1262 this.defaultRadix = radix; 1263 // Force rebuilding and recompilation of radix dependent patterns 1264 integerPattern = null; 1265 return this; 1266 } 1267 1268 // The next operation should occur in the specified radix but 1269 // the default is left untouched. 1270 private void setRadix(int radix) { 1271 if (this.radix != radix) { 1272 // Force rebuilding and recompilation of radix dependent patterns 1273 integerPattern = null; 1274 this.radix = radix; 1275 } 1276 } 1277 1278 /** 1279 * Returns the match result of the last scanning operation performed 1280 * by this scanner. This method throws {@code IllegalStateException} 1281 * if no match has been performed, or if the last match was 1282 * not successful. 1283 * 1284 * <p>The various {@code next} methods of {@code Scanner} 1285 * make a match result available if they complete without throwing an 1286 * exception. For instance, after an invocation of the {@link #nextInt} 1287 * method that returned an int, this method returns a 1288 * {@code MatchResult} for the search of the 1289 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1290 * defined above. Similarly the {@link #findInLine findInLine()}, 1291 * {@link #findWithinHorizon findWithinHorizon()}, and {@link #skip skip()} 1292 * methods will make a match available if they succeed. 1293 * 1294 * @return a match result for the last match operation 1295 * @throws IllegalStateException If no match result is available 1296 */ 1297 public MatchResult match() { 1298 if (!matchValid) 1299 throw new IllegalStateException("No match result available"); 1300 return matcher.toMatchResult(); 1301 } 1302 1303 /** 1304 * <p>Returns the string representation of this {@code Scanner}. The 1305 * string representation of a {@code Scanner} contains information 1306 * that may be useful for debugging. The exact format is unspecified. 1307 * 1308 * @return The string representation of this scanner 1309 */ 1310 public String toString() { 1311 StringBuilder sb = new StringBuilder(); 1312 sb.append("java.util.Scanner"); 1313 sb.append("[delimiters=" + delimPattern + "]"); 1314 sb.append("[position=" + position + "]"); 1315 sb.append("[match valid=" + matchValid + "]"); 1316 sb.append("[need input=" + needInput + "]"); 1317 sb.append("[source closed=" + sourceClosed + "]"); 1318 sb.append("[skipped=" + skipped + "]"); 1319 sb.append("[group separator=" + groupSeparator + "]"); 1320 sb.append("[decimal separator=" + decimalSeparator + "]"); 1321 sb.append("[positive prefix=" + positivePrefix + "]"); 1322 sb.append("[negative prefix=" + negativePrefix + "]"); 1323 sb.append("[positive suffix=" + positiveSuffix + "]"); 1324 sb.append("[negative suffix=" + negativeSuffix + "]"); 1325 sb.append("[NaN string=" + nanString + "]"); 1326 sb.append("[infinity string=" + infinityString + "]"); 1327 return sb.toString(); 1328 } 1329 1330 /** 1331 * Returns true if this scanner has another token in its input. 1332 * This method may block while waiting for input to scan. 1333 * The scanner does not advance past any input. 1334 * 1335 * @return true if and only if this scanner has another token 1336 * @throws IllegalStateException if this scanner is closed 1337 * @see java.util.Iterator 1338 */ 1339 public boolean hasNext() { 1340 ensureOpen(); 1341 saveState(); 1342 modCount++; 1343 while (!sourceClosed) { 1344 if (hasTokenInBuffer()) 1345 return revertState(true); 1346 readInput(); 1347 } 1348 boolean result = hasTokenInBuffer(); 1349 return revertState(result); 1350 } 1351 1352 /** 1353 * Finds and returns the next complete token from this scanner. 1354 * A complete token is preceded and followed by input that matches 1355 * the delimiter pattern. This method may block while waiting for input 1356 * to scan, even if a previous invocation of {@link #hasNext} returned 1357 * {@code true}. 1358 * 1359 * @return the next token 1360 * @throws NoSuchElementException if no more tokens are available 1361 * @throws IllegalStateException if this scanner is closed 1362 * @see java.util.Iterator 1363 */ 1364 public String next() { 1365 ensureOpen(); 1366 clearCaches(); 1367 modCount++; 1368 1369 while (true) { 1370 String token = getCompleteTokenInBuffer(null); 1371 if (token != null) { 1372 matchValid = true; 1373 skipped = false; 1374 return token; 1375 } 1376 if (needInput) 1377 readInput(); 1378 else 1379 throwFor(); 1380 } 1381 } 1382 1383 /** 1384 * The remove operation is not supported by this implementation of 1385 * {@code Iterator}. 1386 * 1387 * @throws UnsupportedOperationException if this method is invoked. 1388 * @see java.util.Iterator 1389 */ 1390 public void remove() { 1391 throw new UnsupportedOperationException(); 1392 } 1393 1394 /** 1395 * Returns true if the next token matches the pattern constructed from the 1396 * specified string. The scanner does not advance past any input. 1397 * 1398 * <p> An invocation of this method of the form {@code hasNext(pattern)} 1399 * behaves in exactly the same way as the invocation 1400 * {@code hasNext(Pattern.compile(pattern))}. 1401 * 1402 * @param pattern a string specifying the pattern to scan 1403 * @return true if and only if this scanner has another token matching 1404 * the specified pattern 1405 * @throws IllegalStateException if this scanner is closed 1406 */ 1407 public boolean hasNext(String pattern) { 1408 return hasNext(patternCache.forName(pattern)); 1409 } 1410 1411 /** 1412 * Returns the next token if it matches the pattern constructed from the 1413 * specified string. If the match is successful, the scanner advances 1414 * past the input that matched the pattern. 1415 * 1416 * <p> An invocation of this method of the form {@code next(pattern)} 1417 * behaves in exactly the same way as the invocation 1418 * {@code next(Pattern.compile(pattern))}. 1419 * 1420 * @param pattern a string specifying the pattern to scan 1421 * @return the next token 1422 * @throws NoSuchElementException if no such tokens are available 1423 * @throws IllegalStateException if this scanner is closed 1424 */ 1425 public String next(String pattern) { 1426 return next(patternCache.forName(pattern)); 1427 } 1428 1429 /** 1430 * Returns true if the next complete token matches the specified pattern. 1431 * A complete token is prefixed and postfixed by input that matches 1432 * the delimiter pattern. This method may block while waiting for input. 1433 * The scanner does not advance past any input. 1434 * 1435 * @param pattern the pattern to scan for 1436 * @return true if and only if this scanner has another token matching 1437 * the specified pattern 1438 * @throws IllegalStateException if this scanner is closed 1439 */ 1440 public boolean hasNext(Pattern pattern) { 1441 ensureOpen(); 1442 if (pattern == null) 1443 throw new NullPointerException(); 1444 hasNextPattern = null; 1445 saveState(); 1446 modCount++; 1447 1448 while (true) { 1449 if (getCompleteTokenInBuffer(pattern) != null) { 1450 matchValid = true; 1451 cacheResult(); 1452 return revertState(true); 1453 } 1454 if (needInput) 1455 readInput(); 1456 else 1457 return revertState(false); 1458 } 1459 } 1460 1461 /** 1462 * Returns the next token if it matches the specified pattern. This 1463 * method may block while waiting for input to scan, even if a previous 1464 * invocation of {@link #hasNext(Pattern)} returned {@code true}. 1465 * If the match is successful, the scanner advances past the input that 1466 * matched the pattern. 1467 * 1468 * @param pattern the pattern to scan for 1469 * @return the next token 1470 * @throws NoSuchElementException if no more tokens are available 1471 * @throws IllegalStateException if this scanner is closed 1472 */ 1473 public String next(Pattern pattern) { 1474 ensureOpen(); 1475 if (pattern == null) 1476 throw new NullPointerException(); 1477 1478 modCount++; 1479 // Did we already find this pattern? 1480 if (hasNextPattern == pattern) 1481 return getCachedResult(); 1482 clearCaches(); 1483 1484 // Search for the pattern 1485 while (true) { 1486 String token = getCompleteTokenInBuffer(pattern); 1487 if (token != null) { 1488 matchValid = true; 1489 skipped = false; 1490 return token; 1491 } 1492 if (needInput) 1493 readInput(); 1494 else 1495 throwFor(); 1496 } 1497 } 1498 1499 /** 1500 * Returns true if there is another line in the input of this scanner. 1501 * This method may block while waiting for input. The scanner does not 1502 * advance past any input. 1503 * 1504 * @return true if and only if this scanner has another line of input 1505 * @throws IllegalStateException if this scanner is closed 1506 */ 1507 public boolean hasNextLine() { 1508 saveState(); 1509 1510 modCount++; 1511 String result = findWithinHorizon(linePattern(), 0); 1512 if (result != null) { 1513 MatchResult mr = this.match(); 1514 String lineSep = mr.group(1); 1515 if (lineSep != null) { 1516 result = result.substring(0, result.length() - 1517 lineSep.length()); 1518 cacheResult(result); 1519 1520 } else { 1521 cacheResult(); 1522 } 1523 } 1524 revertState(); 1525 return (result != null); 1526 } 1527 1528 /** 1529 * Advances this scanner past the current line and returns the input 1530 * that was skipped. 1531 * 1532 * This method returns the rest of the current line, excluding any line 1533 * separator at the end. The position is set to the beginning of the next 1534 * line. 1535 * 1536 * <p>Since this method continues to search through the input looking 1537 * for a line separator, it may buffer all of the input searching for 1538 * the line to skip if no line separators are present. 1539 * 1540 * @return the line that was skipped 1541 * @throws NoSuchElementException if no line was found 1542 * @throws IllegalStateException if this scanner is closed 1543 */ 1544 public String nextLine() { 1545 modCount++; 1546 if (hasNextPattern == linePattern()) 1547 return getCachedResult(); 1548 clearCaches(); 1549 1550 String result = findWithinHorizon(linePattern, 0); 1551 if (result == null) 1552 throw new NoSuchElementException("No line found"); 1553 MatchResult mr = this.match(); 1554 String lineSep = mr.group(1); 1555 if (lineSep != null) 1556 result = result.substring(0, result.length() - lineSep.length()); 1557 if (result == null) 1558 throw new NoSuchElementException(); 1559 else 1560 return result; 1561 } 1562 1563 // Public methods that ignore delimiters 1564 1565 /** 1566 * Attempts to find the next occurrence of a pattern constructed from the 1567 * specified string, ignoring delimiters. 1568 * 1569 * <p>An invocation of this method of the form {@code findInLine(pattern)} 1570 * behaves in exactly the same way as the invocation 1571 * {@code findInLine(Pattern.compile(pattern))}. 1572 * 1573 * @param pattern a string specifying the pattern to search for 1574 * @return the text that matched the specified pattern 1575 * @throws IllegalStateException if this scanner is closed 1576 */ 1577 public String findInLine(String pattern) { 1578 return findInLine(patternCache.forName(pattern)); 1579 } 1580 1581 /** 1582 * Attempts to find the next occurrence of the specified pattern ignoring 1583 * delimiters. If the pattern is found before the next line separator, the 1584 * scanner advances past the input that matched and returns the string that 1585 * matched the pattern. 1586 * If no such pattern is detected in the input up to the next line 1587 * separator, then {@code null} is returned and the scanner's 1588 * position is unchanged. This method may block waiting for input that 1589 * matches the pattern. 1590 * 1591 * <p>Since this method continues to search through the input looking 1592 * for the specified pattern, it may buffer all of the input searching for 1593 * the desired token if no line separators are present. 1594 * 1595 * @param pattern the pattern to scan for 1596 * @return the text that matched the specified pattern 1597 * @throws IllegalStateException if this scanner is closed 1598 */ 1599 public String findInLine(Pattern pattern) { 1600 ensureOpen(); 1601 if (pattern == null) 1602 throw new NullPointerException(); 1603 clearCaches(); 1604 modCount++; 1605 // Expand buffer to include the next newline or end of input 1606 int endPosition = 0; 1607 saveState(); 1608 while (true) { 1609 if (findPatternInBuffer(separatorPattern(), 0)) { 1610 endPosition = matcher.start(); 1611 break; // up to next newline 1612 } 1613 if (needInput) { 1614 readInput(); 1615 } else { 1616 endPosition = buf.limit(); 1617 break; // up to end of input 1618 } 1619 } 1620 revertState(); 1621 int horizonForLine = endPosition - position; 1622 // If there is nothing between the current pos and the next 1623 // newline simply return null, invoking findWithinHorizon 1624 // with "horizon=0" will scan beyond the line bound. 1625 if (horizonForLine == 0) 1626 return null; 1627 // Search for the pattern 1628 return findWithinHorizon(pattern, horizonForLine); 1629 } 1630 1631 /** 1632 * Attempts to find the next occurrence of a pattern constructed from the 1633 * specified string, ignoring delimiters. 1634 * 1635 * <p>An invocation of this method of the form 1636 * {@code findWithinHorizon(pattern)} behaves in exactly the same way as 1637 * the invocation 1638 * {@code findWithinHorizon(Pattern.compile(pattern), horizon)}. 1639 * 1640 * @param pattern a string specifying the pattern to search for 1641 * @param horizon the search horizon 1642 * @return the text that matched the specified pattern 1643 * @throws IllegalStateException if this scanner is closed 1644 * @throws IllegalArgumentException if horizon is negative 1645 */ 1646 public String findWithinHorizon(String pattern, int horizon) { 1647 return findWithinHorizon(patternCache.forName(pattern), horizon); 1648 } 1649 1650 /** 1651 * Attempts to find the next occurrence of the specified pattern. 1652 * 1653 * <p>This method searches through the input up to the specified 1654 * search horizon, ignoring delimiters. If the pattern is found the 1655 * scanner advances past the input that matched and returns the string 1656 * that matched the pattern. If no such pattern is detected then the 1657 * null is returned and the scanner's position remains unchanged. This 1658 * method may block waiting for input that matches the pattern. 1659 * 1660 * <p>A scanner will never search more than {@code horizon} code 1661 * points beyond its current position. Note that a match may be clipped 1662 * by the horizon; that is, an arbitrary match result may have been 1663 * different if the horizon had been larger. The scanner treats the 1664 * horizon as a transparent, non-anchoring bound (see {@link 1665 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1666 * 1667 * <p>If horizon is {@code 0}, then the horizon is ignored and 1668 * this method continues to search through the input looking for the 1669 * specified pattern without bound. In this case it may buffer all of 1670 * the input searching for the pattern. 1671 * 1672 * <p>If horizon is negative, then an IllegalArgumentException is 1673 * thrown. 1674 * 1675 * @param pattern the pattern to scan for 1676 * @param horizon the search horizon 1677 * @return the text that matched the specified pattern 1678 * @throws IllegalStateException if this scanner is closed 1679 * @throws IllegalArgumentException if horizon is negative 1680 */ 1681 public String findWithinHorizon(Pattern pattern, int horizon) { 1682 ensureOpen(); 1683 if (pattern == null) 1684 throw new NullPointerException(); 1685 if (horizon < 0) 1686 throw new IllegalArgumentException("horizon < 0"); 1687 clearCaches(); 1688 modCount++; 1689 1690 // Search for the pattern 1691 while (true) { 1692 if (findPatternInBuffer(pattern, horizon)) { 1693 matchValid = true; 1694 return matcher.group(); 1695 } 1696 if (needInput) 1697 readInput(); 1698 else 1699 break; // up to end of input 1700 } 1701 return null; 1702 } 1703 1704 /** 1705 * Skips input that matches the specified pattern, ignoring delimiters. 1706 * This method will skip input if an anchored match of the specified 1707 * pattern succeeds. 1708 * 1709 * <p>If a match to the specified pattern is not found at the 1710 * current position, then no input is skipped and a 1711 * {@code NoSuchElementException} is thrown. 1712 * 1713 * <p>Since this method seeks to match the specified pattern starting at 1714 * the scanner's current position, patterns that can match a lot of 1715 * input (".*", for example) may cause the scanner to buffer a large 1716 * amount of input. 1717 * 1718 * <p>Note that it is possible to skip something without risking a 1719 * {@code NoSuchElementException} by using a pattern that can 1720 * match nothing, e.g., {@code sc.skip("[ \t]*")}. 1721 * 1722 * @param pattern a string specifying the pattern to skip over 1723 * @return this scanner 1724 * @throws NoSuchElementException if the specified pattern is not found 1725 * @throws IllegalStateException if this scanner is closed 1726 */ 1727 public Scanner skip(Pattern pattern) { 1728 ensureOpen(); 1729 if (pattern == null) 1730 throw new NullPointerException(); 1731 clearCaches(); 1732 modCount++; 1733 1734 // Search for the pattern 1735 while (true) { 1736 if (matchPatternInBuffer(pattern)) { 1737 matchValid = true; 1738 position = matcher.end(); 1739 return this; 1740 } 1741 if (needInput) 1742 readInput(); 1743 else 1744 throw new NoSuchElementException(); 1745 } 1746 } 1747 1748 /** 1749 * Skips input that matches a pattern constructed from the specified 1750 * string. 1751 * 1752 * <p> An invocation of this method of the form {@code skip(pattern)} 1753 * behaves in exactly the same way as the invocation 1754 * {@code skip(Pattern.compile(pattern))}. 1755 * 1756 * @param pattern a string specifying the pattern to skip over 1757 * @return this scanner 1758 * @throws IllegalStateException if this scanner is closed 1759 */ 1760 public Scanner skip(String pattern) { 1761 return skip(patternCache.forName(pattern)); 1762 } 1763 1764 // Convenience methods for scanning primitives 1765 1766 /** 1767 * Returns true if the next token in this scanner's input can be 1768 * interpreted as a boolean value using a case insensitive pattern 1769 * created from the string "true|false". The scanner does not 1770 * advance past the input that matched. 1771 * 1772 * @return true if and only if this scanner's next token is a valid 1773 * boolean value 1774 * @throws IllegalStateException if this scanner is closed 1775 */ 1776 public boolean hasNextBoolean() { 1777 return hasNext(boolPattern()); 1778 } 1779 1780 /** 1781 * Scans the next token of the input into a boolean value and returns 1782 * that value. This method will throw {@code InputMismatchException} 1783 * if the next token cannot be translated into a valid boolean value. 1784 * If the match is successful, the scanner advances past the input that 1785 * matched. 1786 * 1787 * @return the boolean scanned from the input 1788 * @throws InputMismatchException if the next token is not a valid boolean 1789 * @throws NoSuchElementException if input is exhausted 1790 * @throws IllegalStateException if this scanner is closed 1791 */ 1792 public boolean nextBoolean() { 1793 clearCaches(); 1794 return Boolean.parseBoolean(next(boolPattern())); 1795 } 1796 1797 /** 1798 * Returns true if the next token in this scanner's input can be 1799 * interpreted as a byte value in the default radix using the 1800 * {@link #nextByte} method. The scanner does not advance past any input. 1801 * 1802 * @return true if and only if this scanner's next token is a valid 1803 * byte value 1804 * @throws IllegalStateException if this scanner is closed 1805 */ 1806 public boolean hasNextByte() { 1807 return hasNextByte(defaultRadix); 1808 } 1809 1810 /** 1811 * Returns true if the next token in this scanner's input can be 1812 * interpreted as a byte value in the specified radix using the 1813 * {@link #nextByte} method. The scanner does not advance past any input. 1814 * 1815 * @param radix the radix used to interpret the token as a byte value 1816 * @return true if and only if this scanner's next token is a valid 1817 * byte value 1818 * @throws IllegalStateException if this scanner is closed 1819 */ 1820 public boolean hasNextByte(int radix) { 1821 setRadix(radix); 1822 boolean result = hasNext(integerPattern()); 1823 if (result) { // Cache it 1824 try { 1825 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1826 processIntegerToken(hasNextResult) : 1827 hasNextResult; 1828 typeCache = Byte.parseByte(s, radix); 1829 } catch (NumberFormatException nfe) { 1830 result = false; 1831 } 1832 } 1833 return result; 1834 } 1835 1836 /** 1837 * Scans the next token of the input as a {@code byte}. 1838 * 1839 * <p> An invocation of this method of the form 1840 * {@code nextByte()} behaves in exactly the same way as the 1841 * invocation {@code nextByte(radix)}, where {@code radix} 1842 * is the default radix of this scanner. 1843 * 1844 * @return the {@code byte} scanned from the input 1845 * @throws InputMismatchException 1846 * if the next token does not match the <i>Integer</i> 1847 * regular expression, or is out of range 1848 * @throws NoSuchElementException if input is exhausted 1849 * @throws IllegalStateException if this scanner is closed 1850 */ 1851 public byte nextByte() { 1852 return nextByte(defaultRadix); 1853 } 1854 1855 /** 1856 * Scans the next token of the input as a {@code byte}. 1857 * This method will throw {@code InputMismatchException} 1858 * if the next token cannot be translated into a valid byte value as 1859 * described below. If the translation is successful, the scanner advances 1860 * past the input that matched. 1861 * 1862 * <p> If the next token matches the <a 1863 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1864 * above then the token is converted into a {@code byte} value as if by 1865 * removing all locale specific prefixes, group separators, and locale 1866 * specific suffixes, then mapping non-ASCII digits into ASCII 1867 * digits via {@link Character#digit Character.digit}, prepending a 1868 * negative sign (-) if the locale specific negative prefixes and suffixes 1869 * were present, and passing the resulting string to 1870 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1871 * specified radix. 1872 * 1873 * @param radix the radix used to interpret the token as a byte value 1874 * @return the {@code byte} scanned from the input 1875 * @throws InputMismatchException 1876 * if the next token does not match the <i>Integer</i> 1877 * regular expression, or is out of range 1878 * @throws NoSuchElementException if input is exhausted 1879 * @throws IllegalStateException if this scanner is closed 1880 */ 1881 public byte nextByte(int radix) { 1882 // Check cached result 1883 if ((typeCache != null) && (typeCache instanceof Byte) 1884 && this.radix == radix) { 1885 byte val = ((Byte)typeCache).byteValue(); 1886 useTypeCache(); 1887 return val; 1888 } 1889 setRadix(radix); 1890 clearCaches(); 1891 // Search for next byte 1892 try { 1893 String s = next(integerPattern()); 1894 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1895 s = processIntegerToken(s); 1896 return Byte.parseByte(s, radix); 1897 } catch (NumberFormatException nfe) { 1898 position = matcher.start(); // don't skip bad token 1899 throw new InputMismatchException(nfe.getMessage()); 1900 } 1901 } 1902 1903 /** 1904 * Returns true if the next token in this scanner's input can be 1905 * interpreted as a short value in the default radix using the 1906 * {@link #nextShort} method. The scanner does not advance past any input. 1907 * 1908 * @return true if and only if this scanner's next token is a valid 1909 * short value in the default radix 1910 * @throws IllegalStateException if this scanner is closed 1911 */ 1912 public boolean hasNextShort() { 1913 return hasNextShort(defaultRadix); 1914 } 1915 1916 /** 1917 * Returns true if the next token in this scanner's input can be 1918 * interpreted as a short value in the specified radix using the 1919 * {@link #nextShort} method. The scanner does not advance past any input. 1920 * 1921 * @param radix the radix used to interpret the token as a short value 1922 * @return true if and only if this scanner's next token is a valid 1923 * short value in the specified radix 1924 * @throws IllegalStateException if this scanner is closed 1925 */ 1926 public boolean hasNextShort(int radix) { 1927 setRadix(radix); 1928 boolean result = hasNext(integerPattern()); 1929 if (result) { // Cache it 1930 try { 1931 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1932 processIntegerToken(hasNextResult) : 1933 hasNextResult; 1934 typeCache = Short.parseShort(s, radix); 1935 } catch (NumberFormatException nfe) { 1936 result = false; 1937 } 1938 } 1939 return result; 1940 } 1941 1942 /** 1943 * Scans the next token of the input as a {@code short}. 1944 * 1945 * <p> An invocation of this method of the form 1946 * {@code nextShort()} behaves in exactly the same way as the 1947 * invocation {@link #nextShort(int) nextShort(radix)}, where {@code radix} 1948 * is the default radix of this scanner. 1949 * 1950 * @return the {@code short} scanned from the input 1951 * @throws InputMismatchException 1952 * if the next token does not match the <i>Integer</i> 1953 * regular expression, or is out of range 1954 * @throws NoSuchElementException if input is exhausted 1955 * @throws IllegalStateException if this scanner is closed 1956 */ 1957 public short nextShort() { 1958 return nextShort(defaultRadix); 1959 } 1960 1961 /** 1962 * Scans the next token of the input as a {@code short}. 1963 * This method will throw {@code InputMismatchException} 1964 * if the next token cannot be translated into a valid short value as 1965 * described below. If the translation is successful, the scanner advances 1966 * past the input that matched. 1967 * 1968 * <p> If the next token matches the <a 1969 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1970 * above then the token is converted into a {@code short} value as if by 1971 * removing all locale specific prefixes, group separators, and locale 1972 * specific suffixes, then mapping non-ASCII digits into ASCII 1973 * digits via {@link Character#digit Character.digit}, prepending a 1974 * negative sign (-) if the locale specific negative prefixes and suffixes 1975 * were present, and passing the resulting string to 1976 * {@link Short#parseShort(String, int) Short.parseShort} with the 1977 * specified radix. 1978 * 1979 * @param radix the radix used to interpret the token as a short value 1980 * @return the {@code short} scanned from the input 1981 * @throws InputMismatchException 1982 * if the next token does not match the <i>Integer</i> 1983 * regular expression, or is out of range 1984 * @throws NoSuchElementException if input is exhausted 1985 * @throws IllegalStateException if this scanner is closed 1986 */ 1987 public short nextShort(int radix) { 1988 // Check cached result 1989 if ((typeCache != null) && (typeCache instanceof Short) 1990 && this.radix == radix) { 1991 short val = ((Short)typeCache).shortValue(); 1992 useTypeCache(); 1993 return val; 1994 } 1995 setRadix(radix); 1996 clearCaches(); 1997 // Search for next short 1998 try { 1999 String s = next(integerPattern()); 2000 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2001 s = processIntegerToken(s); 2002 return Short.parseShort(s, radix); 2003 } catch (NumberFormatException nfe) { 2004 position = matcher.start(); // don't skip bad token 2005 throw new InputMismatchException(nfe.getMessage()); 2006 } 2007 } 2008 2009 /** 2010 * Returns true if the next token in this scanner's input can be 2011 * interpreted as an int value in the default radix using the 2012 * {@link #nextInt} method. The scanner does not advance past any input. 2013 * 2014 * @return true if and only if this scanner's next token is a valid 2015 * int value 2016 * @throws IllegalStateException if this scanner is closed 2017 */ 2018 public boolean hasNextInt() { 2019 return hasNextInt(defaultRadix); 2020 } 2021 2022 /** 2023 * Returns true if the next token in this scanner's input can be 2024 * interpreted as an int value in the specified radix using the 2025 * {@link #nextInt} method. The scanner does not advance past any input. 2026 * 2027 * @param radix the radix used to interpret the token as an int value 2028 * @return true if and only if this scanner's next token is a valid 2029 * int value 2030 * @throws IllegalStateException if this scanner is closed 2031 */ 2032 public boolean hasNextInt(int radix) { 2033 setRadix(radix); 2034 boolean result = hasNext(integerPattern()); 2035 if (result) { // Cache it 2036 try { 2037 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2038 processIntegerToken(hasNextResult) : 2039 hasNextResult; 2040 typeCache = Integer.parseInt(s, radix); 2041 } catch (NumberFormatException nfe) { 2042 result = false; 2043 } 2044 } 2045 return result; 2046 } 2047 2048 /** 2049 * The integer token must be stripped of prefixes, group separators, 2050 * and suffixes, non ascii digits must be converted into ascii digits 2051 * before parse will accept it. 2052 */ 2053 private String processIntegerToken(String token) { 2054 String result = token.replaceAll(""+groupSeparator, ""); 2055 boolean isNegative = false; 2056 int preLen = negativePrefix.length(); 2057 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2058 isNegative = true; 2059 result = result.substring(preLen); 2060 } 2061 int sufLen = negativeSuffix.length(); 2062 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2063 isNegative = true; 2064 result = result.substring(result.length() - sufLen, 2065 result.length()); 2066 } 2067 if (isNegative) 2068 result = "-" + result; 2069 return result; 2070 } 2071 2072 /** 2073 * Scans the next token of the input as an {@code int}. 2074 * 2075 * <p> An invocation of this method of the form 2076 * {@code nextInt()} behaves in exactly the same way as the 2077 * invocation {@code nextInt(radix)}, where {@code radix} 2078 * is the default radix of this scanner. 2079 * 2080 * @return the {@code int} scanned from the input 2081 * @throws InputMismatchException 2082 * if the next token does not match the <i>Integer</i> 2083 * regular expression, or is out of range 2084 * @throws NoSuchElementException if input is exhausted 2085 * @throws IllegalStateException if this scanner is closed 2086 */ 2087 public int nextInt() { 2088 return nextInt(defaultRadix); 2089 } 2090 2091 /** 2092 * Scans the next token of the input as an {@code int}. 2093 * This method will throw {@code InputMismatchException} 2094 * if the next token cannot be translated into a valid int value as 2095 * described below. If the translation is successful, the scanner advances 2096 * past the input that matched. 2097 * 2098 * <p> If the next token matches the <a 2099 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2100 * above then the token is converted into an {@code int} value as if by 2101 * removing all locale specific prefixes, group separators, and locale 2102 * specific suffixes, then mapping non-ASCII digits into ASCII 2103 * digits via {@link Character#digit Character.digit}, prepending a 2104 * negative sign (-) if the locale specific negative prefixes and suffixes 2105 * were present, and passing the resulting string to 2106 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2107 * specified radix. 2108 * 2109 * @param radix the radix used to interpret the token as an int value 2110 * @return the {@code int} scanned from the input 2111 * @throws InputMismatchException 2112 * if the next token does not match the <i>Integer</i> 2113 * regular expression, or is out of range 2114 * @throws NoSuchElementException if input is exhausted 2115 * @throws IllegalStateException if this scanner is closed 2116 */ 2117 public int nextInt(int radix) { 2118 // Check cached result 2119 if ((typeCache != null) && (typeCache instanceof Integer) 2120 && this.radix == radix) { 2121 int val = ((Integer)typeCache).intValue(); 2122 useTypeCache(); 2123 return val; 2124 } 2125 setRadix(radix); 2126 clearCaches(); 2127 // Search for next int 2128 try { 2129 String s = next(integerPattern()); 2130 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2131 s = processIntegerToken(s); 2132 return Integer.parseInt(s, radix); 2133 } catch (NumberFormatException nfe) { 2134 position = matcher.start(); // don't skip bad token 2135 throw new InputMismatchException(nfe.getMessage()); 2136 } 2137 } 2138 2139 /** 2140 * Returns true if the next token in this scanner's input can be 2141 * interpreted as a long value in the default radix using the 2142 * {@link #nextLong} method. The scanner does not advance past any input. 2143 * 2144 * @return true if and only if this scanner's next token is a valid 2145 * long value 2146 * @throws IllegalStateException if this scanner is closed 2147 */ 2148 public boolean hasNextLong() { 2149 return hasNextLong(defaultRadix); 2150 } 2151 2152 /** 2153 * Returns true if the next token in this scanner's input can be 2154 * interpreted as a long value in the specified radix using the 2155 * {@link #nextLong} method. The scanner does not advance past any input. 2156 * 2157 * @param radix the radix used to interpret the token as a long value 2158 * @return true if and only if this scanner's next token is a valid 2159 * long value 2160 * @throws IllegalStateException if this scanner is closed 2161 */ 2162 public boolean hasNextLong(int radix) { 2163 setRadix(radix); 2164 boolean result = hasNext(integerPattern()); 2165 if (result) { // Cache it 2166 try { 2167 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2168 processIntegerToken(hasNextResult) : 2169 hasNextResult; 2170 typeCache = Long.parseLong(s, radix); 2171 } catch (NumberFormatException nfe) { 2172 result = false; 2173 } 2174 } 2175 return result; 2176 } 2177 2178 /** 2179 * Scans the next token of the input as a {@code long}. 2180 * 2181 * <p> An invocation of this method of the form 2182 * {@code nextLong()} behaves in exactly the same way as the 2183 * invocation {@code nextLong(radix)}, where {@code radix} 2184 * is the default radix of this scanner. 2185 * 2186 * @return the {@code long} scanned from the input 2187 * @throws InputMismatchException 2188 * if the next token does not match the <i>Integer</i> 2189 * regular expression, or is out of range 2190 * @throws NoSuchElementException if input is exhausted 2191 * @throws IllegalStateException if this scanner is closed 2192 */ 2193 public long nextLong() { 2194 return nextLong(defaultRadix); 2195 } 2196 2197 /** 2198 * Scans the next token of the input as a {@code long}. 2199 * This method will throw {@code InputMismatchException} 2200 * if the next token cannot be translated into a valid long value as 2201 * described below. If the translation is successful, the scanner advances 2202 * past the input that matched. 2203 * 2204 * <p> If the next token matches the <a 2205 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2206 * above then the token is converted into a {@code long} value as if by 2207 * removing all locale specific prefixes, group separators, and locale 2208 * specific suffixes, then mapping non-ASCII digits into ASCII 2209 * digits via {@link Character#digit Character.digit}, prepending a 2210 * negative sign (-) if the locale specific negative prefixes and suffixes 2211 * were present, and passing the resulting string to 2212 * {@link Long#parseLong(String, int) Long.parseLong} with the 2213 * specified radix. 2214 * 2215 * @param radix the radix used to interpret the token as an int value 2216 * @return the {@code long} scanned from the input 2217 * @throws InputMismatchException 2218 * if the next token does not match the <i>Integer</i> 2219 * regular expression, or is out of range 2220 * @throws NoSuchElementException if input is exhausted 2221 * @throws IllegalStateException if this scanner is closed 2222 */ 2223 public long nextLong(int radix) { 2224 // Check cached result 2225 if ((typeCache != null) && (typeCache instanceof Long) 2226 && this.radix == radix) { 2227 long val = ((Long)typeCache).longValue(); 2228 useTypeCache(); 2229 return val; 2230 } 2231 setRadix(radix); 2232 clearCaches(); 2233 try { 2234 String s = next(integerPattern()); 2235 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2236 s = processIntegerToken(s); 2237 return Long.parseLong(s, radix); 2238 } catch (NumberFormatException nfe) { 2239 position = matcher.start(); // don't skip bad token 2240 throw new InputMismatchException(nfe.getMessage()); 2241 } 2242 } 2243 2244 /** 2245 * The float token must be stripped of prefixes, group separators, 2246 * and suffixes, non ascii digits must be converted into ascii digits 2247 * before parseFloat will accept it. 2248 * 2249 * If there are non-ascii digits in the token these digits must 2250 * be processed before the token is passed to parseFloat. 2251 */ 2252 private String processFloatToken(String token) { 2253 String result = token.replaceAll(groupSeparator, ""); 2254 if (!decimalSeparator.equals("\\.")) 2255 result = result.replaceAll(decimalSeparator, "."); 2256 boolean isNegative = false; 2257 int preLen = negativePrefix.length(); 2258 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2259 isNegative = true; 2260 result = result.substring(preLen); 2261 } 2262 int sufLen = negativeSuffix.length(); 2263 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2264 isNegative = true; 2265 result = result.substring(result.length() - sufLen, 2266 result.length()); 2267 } 2268 if (result.equals(nanString)) 2269 result = "NaN"; 2270 if (result.equals(infinityString)) 2271 result = "Infinity"; 2272 if (isNegative) 2273 result = "-" + result; 2274 2275 // Translate non-ASCII digits 2276 Matcher m = NON_ASCII_DIGIT.matcher(result); 2277 if (m.find()) { 2278 StringBuilder inASCII = new StringBuilder(); 2279 for (int i=0; i<result.length(); i++) { 2280 char nextChar = result.charAt(i); 2281 if (Character.isDigit(nextChar)) { 2282 int d = Character.digit(nextChar, 10); 2283 if (d != -1) 2284 inASCII.append(d); 2285 else 2286 inASCII.append(nextChar); 2287 } else { 2288 inASCII.append(nextChar); 2289 } 2290 } 2291 result = inASCII.toString(); 2292 } 2293 2294 return result; 2295 } 2296 2297 /** 2298 * Returns true if the next token in this scanner's input can be 2299 * interpreted as a float value using the {@link #nextFloat} 2300 * method. The scanner does not advance past any input. 2301 * 2302 * @return true if and only if this scanner's next token is a valid 2303 * float value 2304 * @throws IllegalStateException if this scanner is closed 2305 */ 2306 public boolean hasNextFloat() { 2307 setRadix(10); 2308 boolean result = hasNext(floatPattern()); 2309 if (result) { // Cache it 2310 try { 2311 String s = processFloatToken(hasNextResult); 2312 typeCache = Float.valueOf(Float.parseFloat(s)); 2313 } catch (NumberFormatException nfe) { 2314 result = false; 2315 } 2316 } 2317 return result; 2318 } 2319 2320 /** 2321 * Scans the next token of the input as a {@code float}. 2322 * This method will throw {@code InputMismatchException} 2323 * if the next token cannot be translated into a valid float value as 2324 * described below. If the translation is successful, the scanner advances 2325 * past the input that matched. 2326 * 2327 * <p> If the next token matches the <a 2328 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2329 * then the token is converted into a {@code float} value as if by 2330 * removing all locale specific prefixes, group separators, and locale 2331 * specific suffixes, then mapping non-ASCII digits into ASCII 2332 * digits via {@link Character#digit Character.digit}, prepending a 2333 * negative sign (-) if the locale specific negative prefixes and suffixes 2334 * were present, and passing the resulting string to 2335 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2336 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2337 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2338 * appropriate. 2339 * 2340 * @return the {@code float} scanned from the input 2341 * @throws InputMismatchException 2342 * if the next token does not match the <i>Float</i> 2343 * regular expression, or is out of range 2344 * @throws NoSuchElementException if input is exhausted 2345 * @throws IllegalStateException if this scanner is closed 2346 */ 2347 public float nextFloat() { 2348 // Check cached result 2349 if ((typeCache != null) && (typeCache instanceof Float)) { 2350 float val = ((Float)typeCache).floatValue(); 2351 useTypeCache(); 2352 return val; 2353 } 2354 setRadix(10); 2355 clearCaches(); 2356 try { 2357 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2358 } catch (NumberFormatException nfe) { 2359 position = matcher.start(); // don't skip bad token 2360 throw new InputMismatchException(nfe.getMessage()); 2361 } 2362 } 2363 2364 /** 2365 * Returns true if the next token in this scanner's input can be 2366 * interpreted as a double value using the {@link #nextDouble} 2367 * method. The scanner does not advance past any input. 2368 * 2369 * @return true if and only if this scanner's next token is a valid 2370 * double value 2371 * @throws IllegalStateException if this scanner is closed 2372 */ 2373 public boolean hasNextDouble() { 2374 setRadix(10); 2375 boolean result = hasNext(floatPattern()); 2376 if (result) { // Cache it 2377 try { 2378 String s = processFloatToken(hasNextResult); 2379 typeCache = Double.valueOf(Double.parseDouble(s)); 2380 } catch (NumberFormatException nfe) { 2381 result = false; 2382 } 2383 } 2384 return result; 2385 } 2386 2387 /** 2388 * Scans the next token of the input as a {@code double}. 2389 * This method will throw {@code InputMismatchException} 2390 * if the next token cannot be translated into a valid double value. 2391 * If the translation is successful, the scanner advances past the input 2392 * that matched. 2393 * 2394 * <p> If the next token matches the <a 2395 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2396 * then the token is converted into a {@code double} value as if by 2397 * removing all locale specific prefixes, group separators, and locale 2398 * specific suffixes, then mapping non-ASCII digits into ASCII 2399 * digits via {@link Character#digit Character.digit}, prepending a 2400 * negative sign (-) if the locale specific negative prefixes and suffixes 2401 * were present, and passing the resulting string to 2402 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2403 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2404 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2405 * appropriate. 2406 * 2407 * @return the {@code double} scanned from the input 2408 * @throws InputMismatchException 2409 * if the next token does not match the <i>Float</i> 2410 * regular expression, or is out of range 2411 * @throws NoSuchElementException if the input is exhausted 2412 * @throws IllegalStateException if this scanner is closed 2413 */ 2414 public double nextDouble() { 2415 // Check cached result 2416 if ((typeCache != null) && (typeCache instanceof Double)) { 2417 double val = ((Double)typeCache).doubleValue(); 2418 useTypeCache(); 2419 return val; 2420 } 2421 setRadix(10); 2422 clearCaches(); 2423 // Search for next float 2424 try { 2425 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2426 } catch (NumberFormatException nfe) { 2427 position = matcher.start(); // don't skip bad token 2428 throw new InputMismatchException(nfe.getMessage()); 2429 } 2430 } 2431 2432 // Convenience methods for scanning multi precision numbers 2433 2434 /** 2435 * Returns true if the next token in this scanner's input can be 2436 * interpreted as a {@code BigInteger} in the default radix using the 2437 * {@link #nextBigInteger} method. The scanner does not advance past any 2438 * input. 2439 * 2440 * @return true if and only if this scanner's next token is a valid 2441 * {@code BigInteger} 2442 * @throws IllegalStateException if this scanner is closed 2443 */ 2444 public boolean hasNextBigInteger() { 2445 return hasNextBigInteger(defaultRadix); 2446 } 2447 2448 /** 2449 * Returns true if the next token in this scanner's input can be 2450 * interpreted as a {@code BigInteger} in the specified radix using 2451 * the {@link #nextBigInteger} method. The scanner does not advance past 2452 * any input. 2453 * 2454 * @param radix the radix used to interpret the token as an integer 2455 * @return true if and only if this scanner's next token is a valid 2456 * {@code BigInteger} 2457 * @throws IllegalStateException if this scanner is closed 2458 */ 2459 public boolean hasNextBigInteger(int radix) { 2460 setRadix(radix); 2461 boolean result = hasNext(integerPattern()); 2462 if (result) { // Cache it 2463 try { 2464 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2465 processIntegerToken(hasNextResult) : 2466 hasNextResult; 2467 typeCache = new BigInteger(s, radix); 2468 } catch (NumberFormatException nfe) { 2469 result = false; 2470 } 2471 } 2472 return result; 2473 } 2474 2475 /** 2476 * Scans the next token of the input as a {@link java.math.BigInteger 2477 * BigInteger}. 2478 * 2479 * <p> An invocation of this method of the form 2480 * {@code nextBigInteger()} behaves in exactly the same way as the 2481 * invocation {@code nextBigInteger(radix)}, where {@code radix} 2482 * is the default radix of this scanner. 2483 * 2484 * @return the {@code BigInteger} scanned from the input 2485 * @throws InputMismatchException 2486 * if the next token does not match the <i>Integer</i> 2487 * regular expression, or is out of range 2488 * @throws NoSuchElementException if the input is exhausted 2489 * @throws IllegalStateException if this scanner is closed 2490 */ 2491 public BigInteger nextBigInteger() { 2492 return nextBigInteger(defaultRadix); 2493 } 2494 2495 /** 2496 * Scans the next token of the input as a {@link java.math.BigInteger 2497 * BigInteger}. 2498 * 2499 * <p> If the next token matches the <a 2500 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2501 * above then the token is converted into a {@code BigInteger} value as if 2502 * by removing all group separators, mapping non-ASCII digits into ASCII 2503 * digits via the {@link Character#digit Character.digit}, and passing the 2504 * resulting string to the {@link 2505 * java.math.BigInteger#BigInteger(java.lang.String) 2506 * BigInteger(String, int)} constructor with the specified radix. 2507 * 2508 * @param radix the radix used to interpret the token 2509 * @return the {@code BigInteger} scanned from the input 2510 * @throws InputMismatchException 2511 * if the next token does not match the <i>Integer</i> 2512 * regular expression, or is out of range 2513 * @throws NoSuchElementException if the input is exhausted 2514 * @throws IllegalStateException if this scanner is closed 2515 */ 2516 public BigInteger nextBigInteger(int radix) { 2517 // Check cached result 2518 if ((typeCache != null) && (typeCache instanceof BigInteger) 2519 && this.radix == radix) { 2520 BigInteger val = (BigInteger)typeCache; 2521 useTypeCache(); 2522 return val; 2523 } 2524 setRadix(radix); 2525 clearCaches(); 2526 // Search for next int 2527 try { 2528 String s = next(integerPattern()); 2529 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2530 s = processIntegerToken(s); 2531 return new BigInteger(s, radix); 2532 } catch (NumberFormatException nfe) { 2533 position = matcher.start(); // don't skip bad token 2534 throw new InputMismatchException(nfe.getMessage()); 2535 } 2536 } 2537 2538 /** 2539 * Returns true if the next token in this scanner's input can be 2540 * interpreted as a {@code BigDecimal} using the 2541 * {@link #nextBigDecimal} method. The scanner does not advance past any 2542 * input. 2543 * 2544 * @return true if and only if this scanner's next token is a valid 2545 * {@code BigDecimal} 2546 * @throws IllegalStateException if this scanner is closed 2547 */ 2548 public boolean hasNextBigDecimal() { 2549 setRadix(10); 2550 boolean result = hasNext(decimalPattern()); 2551 if (result) { // Cache it 2552 try { 2553 String s = processFloatToken(hasNextResult); 2554 typeCache = new BigDecimal(s); 2555 } catch (NumberFormatException nfe) { 2556 result = false; 2557 } 2558 } 2559 return result; 2560 } 2561 2562 /** 2563 * Scans the next token of the input as a {@link java.math.BigDecimal 2564 * BigDecimal}. 2565 * 2566 * <p> If the next token matches the <a 2567 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2568 * above then the token is converted into a {@code BigDecimal} value as if 2569 * by removing all group separators, mapping non-ASCII digits into ASCII 2570 * digits via the {@link Character#digit Character.digit}, and passing the 2571 * resulting string to the {@link 2572 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2573 * constructor. 2574 * 2575 * @return the {@code BigDecimal} scanned from the input 2576 * @throws InputMismatchException 2577 * if the next token does not match the <i>Decimal</i> 2578 * regular expression, or is out of range 2579 * @throws NoSuchElementException if the input is exhausted 2580 * @throws IllegalStateException if this scanner is closed 2581 */ 2582 public BigDecimal nextBigDecimal() { 2583 // Check cached result 2584 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2585 BigDecimal val = (BigDecimal)typeCache; 2586 useTypeCache(); 2587 return val; 2588 } 2589 setRadix(10); 2590 clearCaches(); 2591 // Search for next float 2592 try { 2593 String s = processFloatToken(next(decimalPattern())); 2594 return new BigDecimal(s); 2595 } catch (NumberFormatException nfe) { 2596 position = matcher.start(); // don't skip bad token 2597 throw new InputMismatchException(nfe.getMessage()); 2598 } 2599 } 2600 2601 /** 2602 * Resets this scanner. 2603 * 2604 * <p> Resetting a scanner discards all of its explicit state 2605 * information which may have been changed by invocations of 2606 * {@link #useDelimiter useDelimiter()}, 2607 * {@link #useLocale useLocale()}, or 2608 * {@link #useRadix useRadix()}. 2609 * 2610 * <p> An invocation of this method of the form 2611 * {@code scanner.reset()} behaves in exactly the same way as the 2612 * invocation 2613 * 2614 * <blockquote><pre>{@code 2615 * scanner.useDelimiter("\\p{javaWhitespace}+") 2616 * .useLocale(Locale.getDefault(Locale.Category.FORMAT)) 2617 * .useRadix(10); 2618 * }</pre></blockquote> 2619 * 2620 * @return this scanner 2621 * 2622 * @since 1.6 2623 */ 2624 public Scanner reset() { 2625 delimPattern = WHITESPACE_PATTERN; 2626 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 2627 useRadix(10); 2628 clearCaches(); 2629 modCount++; 2630 return this; 2631 } 2632 2633 /** 2634 * Returns a stream of delimiter-separated tokens from this scanner. The 2635 * stream contains the same tokens that would be returned, starting from 2636 * this scanner's current state, by calling the {@link #next} method 2637 * repeatedly until the {@link #hasNext} method returns false. 2638 * 2639 * <p>The resulting stream is sequential and ordered. All stream elements are 2640 * non-null. 2641 * 2642 * <p>Scanning starts upon initiation of the terminal stream operation, using the 2643 * current state of this scanner. Subsequent calls to any methods on this scanner 2644 * other than {@link #close} and {@link #ioException} may return undefined results 2645 * or may cause undefined effects on the returned stream. The returned stream's source 2646 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a 2647 * {@link java.util.ConcurrentModificationException} if any such calls are detected 2648 * during stream pipeline execution. 2649 * 2650 * <p>After stream pipeline execution completes, this scanner is left in an indeterminate 2651 * state and cannot be reused. 2652 * 2653 * <p>If this scanner contains a resource that must be released, this scanner 2654 * should be closed, either by calling its {@link #close} method, or by 2655 * closing the returned stream. Closing the stream will close the underlying scanner. 2656 * {@code IllegalStateException} is thrown if the scanner has been closed when this 2657 * method is called, or if this scanner is closed during stream pipeline execution. 2658 * 2659 * <p>This method might block waiting for more input. 2660 * 2661 * @apiNote 2662 * For example, the following code will create a list of 2663 * comma-delimited tokens from a string: 2664 * 2665 * <pre>{@code 2666 * List<String> result = new Scanner("abc,def,,ghi") 2667 * .useDelimiter(",") 2668 * .tokens() 2669 * .collect(Collectors.toList()); 2670 * }</pre> 2671 * 2672 * <p>The resulting list would contain {@code "abc"}, {@code "def"}, 2673 * the empty string, and {@code "ghi"}. 2674 * 2675 * @return a sequential stream of token strings 2676 * @throws IllegalStateException if this scanner is closed 2677 * @since 9 2678 */ 2679 public Stream<String> tokens() { 2680 ensureOpen(); 2681 Stream<String> stream = StreamSupport.stream(new TokenSpliterator(), false); 2682 return stream.onClose(this::close); 2683 } 2684 2685 class TokenSpliterator extends Spliterators.AbstractSpliterator<String> { 2686 int expectedCount = -1; 2687 2688 TokenSpliterator() { 2689 super(Long.MAX_VALUE, 2690 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2691 } 2692 2693 @Override 2694 public boolean tryAdvance(Consumer<? super String> cons) { 2695 if (expectedCount >= 0 && expectedCount != modCount) { 2696 throw new ConcurrentModificationException(); 2697 } 2698 2699 if (hasNext()) { 2700 String token = next(); 2701 expectedCount = modCount; 2702 cons.accept(token); 2703 if (expectedCount != modCount) { 2704 throw new ConcurrentModificationException(); 2705 } 2706 return true; 2707 } else { 2708 expectedCount = modCount; 2709 return false; 2710 } 2711 } 2712 } 2713 2714 /** 2715 * Returns a stream of match results from this scanner. The stream 2716 * contains the same results in the same order that would be returned by 2717 * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match} 2718 * successively as long as {@link #findWithinHorizon findWithinHorizon()} 2719 * finds matches. 2720 * 2721 * <p>The resulting stream is sequential and ordered. All stream elements are 2722 * non-null. 2723 * 2724 * <p>Scanning starts upon initiation of the terminal stream operation, using the 2725 * current state of this scanner. Subsequent calls to any methods on this scanner 2726 * other than {@link #close} and {@link #ioException} may return undefined results 2727 * or may cause undefined effects on the returned stream. The returned stream's source 2728 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a 2729 * {@link java.util.ConcurrentModificationException} if any such calls are detected 2730 * during stream pipeline execution. 2731 * 2732 * <p>After stream pipeline execution completes, this scanner is left in an indeterminate 2733 * state and cannot be reused. 2734 * 2735 * <p>If this scanner contains a resource that must be released, this scanner 2736 * should be closed, either by calling its {@link #close} method, or by 2737 * closing the returned stream. Closing the stream will close the underlying scanner. 2738 * {@code IllegalStateException} is thrown if the scanner has been closed when this 2739 * method is called, or if this scanner is closed during stream pipeline execution. 2740 * 2741 * <p>As with the {@link #findWithinHorizon findWithinHorizon()} methods, this method 2742 * might block waiting for additional input, and it might buffer an unbounded amount of 2743 * input searching for a match. 2744 * 2745 * @apiNote 2746 * For example, the following code will read a file and return a list 2747 * of all sequences of characters consisting of seven or more Latin capital 2748 * letters: 2749 * 2750 * <pre>{@code 2751 * try (Scanner sc = new Scanner(Paths.get("input.txt"))) { 2752 * Pattern pat = Pattern.compile("[A-Z]{7,}"); 2753 * List<String> capWords = sc.findAll(pat) 2754 * .map(MatchResult::group) 2755 * .collect(Collectors.toList()); 2756 * } 2757 * }</pre> 2758 * 2759 * @param pattern the pattern to be matched 2760 * @return a sequential stream of match results 2761 * @throws NullPointerException if pattern is null 2762 * @throws IllegalStateException if this scanner is closed 2763 * @since 9 2764 */ 2765 public Stream<MatchResult> findAll(Pattern pattern) { 2766 Objects.requireNonNull(pattern); 2767 ensureOpen(); 2768 Stream<MatchResult> stream = StreamSupport.stream(new FindSpliterator(pattern), false); 2769 return stream.onClose(this::close); 2770 } 2771 2772 /** 2773 * Returns a stream of match results that match the provided pattern string. 2774 * The effect is equivalent to the following code: 2775 * 2776 * <pre>{@code 2777 * scanner.findAll(Pattern.compile(patString)) 2778 * }</pre> 2779 * 2780 * @param patString the pattern string 2781 * @return a sequential stream of match results 2782 * @throws NullPointerException if patString is null 2783 * @throws IllegalStateException if this scanner is closed 2784 * @throws PatternSyntaxException if the regular expression's syntax is invalid 2785 * @since 9 2786 * @see java.util.regex.Pattern 2787 */ 2788 public Stream<MatchResult> findAll(String patString) { 2789 Objects.requireNonNull(patString); 2790 ensureOpen(); 2791 return findAll(patternCache.forName(patString)); 2792 } 2793 2794 class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> { 2795 final Pattern pattern; 2796 int expectedCount = -1; 2797 2798 FindSpliterator(Pattern pattern) { 2799 super(Long.MAX_VALUE, 2800 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2801 this.pattern = pattern; 2802 } 2803 2804 @Override 2805 public boolean tryAdvance(Consumer<? super MatchResult> cons) { 2806 ensureOpen(); 2807 if (expectedCount >= 0) { 2808 if (expectedCount != modCount) { 2809 throw new ConcurrentModificationException(); 2810 } 2811 } else { 2812 expectedCount = modCount; 2813 } 2814 2815 while (true) { 2816 // assert expectedCount == modCount 2817 if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount 2818 cons.accept(matcher.toMatchResult()); 2819 if (expectedCount != modCount) { 2820 throw new ConcurrentModificationException(); 2821 } 2822 return true; 2823 } 2824 if (needInput) 2825 readInput(); // doesn't increment modCount 2826 else 2827 return false; // reached end of input 2828 } 2829 } 2830 } 2831 2832 /** Utility class for small LRU caches. */ 2833 private static class PatternLRUCache { 2834 2835 private Pattern[] oa = null; 2836 private final int size; 2837 2838 PatternLRUCache(int size) { 2839 this.size = size; 2840 } 2841 2842 boolean hasName(Pattern p, String s) { 2843 return p.pattern().equals(s); 2844 } 2845 2846 void moveToFront(Object[] oa, int i) { 2847 Object ob = oa[i]; 2848 for (int j = i; j > 0; j--) 2849 oa[j] = oa[j - 1]; 2850 oa[0] = ob; 2851 } 2852 2853 Pattern forName(String name) { 2854 if (oa == null) { 2855 Pattern[] temp = new Pattern[size]; 2856 oa = temp; 2857 } else { 2858 for (int i = 0; i < oa.length; i++) { 2859 Pattern ob = oa[i]; 2860 if (ob == null) 2861 continue; 2862 if (hasName(ob, name)) { 2863 if (i > 0) 2864 moveToFront(oa, i); 2865 return ob; 2866 } 2867 } 2868 } 2869 2870 // Create a new object 2871 Pattern ob = Pattern.compile(name); 2872 oa[oa.length - 1] = ob; 2873 moveToFront(oa, oa.length - 1); 2874 return ob; 2875 } 2876 } 2877 }