1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.io.*; 29 import java.math.*; 30 import java.nio.*; 31 import java.nio.channels.*; 32 import java.nio.charset.*; 33 import java.nio.file.Path; 34 import java.nio.file.Files; 35 import java.text.*; 36 import java.util.function.Consumer; 37 import java.util.regex.*; 38 import java.util.stream.Stream; 39 import java.util.stream.StreamSupport; 40 41 /** 42 * A simple text scanner which can parse primitive types and strings using 43 * regular expressions. 44 * 45 * <p>A {@code Scanner} breaks its input into tokens using a 46 * delimiter pattern, which by default matches whitespace. The resulting 47 * tokens may then be converted into values of different types using the 48 * various {@code next} methods. 49 * 50 * <p>For example, this code allows a user to read a number from 51 * {@code System.in}: 52 * <blockquote><pre>{@code 53 * Scanner sc = new Scanner(System.in); 54 * int i = sc.nextInt(); 55 * }</pre></blockquote> 56 * 57 * <p>As another example, this code allows {@code long} types to be 58 * assigned from entries in a file {@code myNumbers}: 59 * <blockquote><pre>{@code 60 * Scanner sc = new Scanner(new File("myNumbers")); 61 * while (sc.hasNextLong()) { 62 * long aLong = sc.nextLong(); 63 * } 64 * }</pre></blockquote> 65 * 66 * <p>The scanner can also use delimiters other than whitespace. This 67 * example reads several items in from a string: 68 * <blockquote><pre>{@code 69 * String input = "1 fish 2 fish red fish blue fish"; 70 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 71 * System.out.println(s.nextInt()); 72 * System.out.println(s.nextInt()); 73 * System.out.println(s.next()); 74 * System.out.println(s.next()); 75 * s.close(); 76 * }</pre></blockquote> 77 * <p> 78 * prints the following output: 79 * <blockquote><pre>{@code 80 * 1 81 * 2 82 * red 83 * blue 84 * }</pre></blockquote> 85 * 86 * <p>The same output can be generated with this code, which uses a regular 87 * expression to parse all four tokens at once: 88 * <blockquote><pre>{@code 89 * String input = "1 fish 2 fish red fish blue fish"; 90 * Scanner s = new Scanner(input); 91 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 92 * MatchResult result = s.match(); 93 * for (int i=1; i<=result.groupCount(); i++) 94 * System.out.println(result.group(i)); 95 * s.close(); 96 * }</pre></blockquote> 97 * 98 * <p>The <a name="default-delimiter">default whitespace delimiter</a> used 99 * by a scanner is as recognized by {@link Character#isWhitespace(char) 100 * Character.isWhitespace()}. The {@link #reset reset()} 101 * method will reset the value of the scanner's delimiter to the default 102 * whitespace delimiter regardless of whether it was previously changed. 103 * 104 * <p>A scanning operation may block waiting for input. 105 * 106 * <p>The {@link #next} and {@link #hasNext} methods and their 107 * companion methods (such as {@link #nextInt} and 108 * {@link #hasNextInt}) first skip any input that matches the delimiter 109 * pattern, and then attempt to return the next token. Both {@code hasNext()} 110 * and {@code next()} methods may block waiting for further input. Whether a 111 * {@code hasNext()} method blocks has no connection to whether or not its 112 * associated {@code next()} method will block. The {@link #tokens} method 113 * may also block waiting for input. 114 * 115 * <p>The {@link #findInLine findInLine()}, 116 * {@link #findWithinHorizon findWithinHorizon()}, 117 * {@link #skip skip()}, and {@link #findAll findAll()} 118 * methods operate independently of the delimiter pattern. These methods will 119 * attempt to match the specified pattern with no regard to delimiters in the 120 * input and thus can be used in special circumstances where delimiters are 121 * not relevant. These methods may block waiting for more input. 122 * 123 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 124 * will not pass the token that caused the exception, so that it may be 125 * retrieved or skipped via some other method. 126 * 127 * <p>Depending upon the type of delimiting pattern, empty tokens may be 128 * returned. For example, the pattern {@code "\\s+"} will return no empty 129 * tokens since it matches multiple instances of the delimiter. The delimiting 130 * pattern {@code "\\s"} could return empty tokens since it only passes one 131 * space at a time. 132 * 133 * <p> A scanner can read text from any object which implements the {@link 134 * java.lang.Readable} interface. If an invocation of the underlying 135 * readable's {@link java.lang.Readable#read read()} method throws an {@link 136 * java.io.IOException} then the scanner assumes that the end of the input 137 * has been reached. The most recent {@code IOException} thrown by the 138 * underlying readable can be retrieved via the {@link #ioException} method. 139 * 140 * <p>When a {@code Scanner} is closed, it will close its input source 141 * if the source implements the {@link java.io.Closeable} interface. 142 * 143 * <p>A {@code Scanner} is not safe for multithreaded use without 144 * external synchronization. 145 * 146 * <p>Unless otherwise mentioned, passing a {@code null} parameter into 147 * any method of a {@code Scanner} will cause a 148 * {@code NullPointerException} to be thrown. 149 * 150 * <p>A scanner will default to interpreting numbers as decimal unless a 151 * different radix has been set by using the {@link #useRadix} method. The 152 * {@link #reset} method will reset the value of the scanner's radix to 153 * {@code 10} regardless of whether it was previously changed. 154 * 155 * <h3> <a name="localized-numbers">Localized numbers</a> </h3> 156 * 157 * <p> An instance of this class is capable of scanning numbers in the standard 158 * formats as well as in the formats of the scanner's locale. A scanner's 159 * <a name="initial-locale">initial locale </a>is the value returned by the {@link 160 * java.util.Locale#getDefault(Locale.Category) 161 * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link 162 * #useLocale useLocale()} method. The {@link #reset} method will reset the value of the 163 * scanner's locale to the initial locale regardless of whether it was 164 * previously changed. 165 * 166 * <p>The localized formats are defined in terms of the following parameters, 167 * which for a particular locale are taken from that locale's {@link 168 * java.text.DecimalFormat DecimalFormat} object, {@code df}, and its and 169 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 170 * {@code dfs}. 171 * 172 * <blockquote><dl> 173 * <dt><i>LocalGroupSeparator </i> 174 * <dd>The character used to separate thousands groups, 175 * <i>i.e.,</i> {@code dfs.}{@link 176 * java.text.DecimalFormatSymbols#getGroupingSeparator 177 * getGroupingSeparator()} 178 * <dt><i>LocalDecimalSeparator </i> 179 * <dd>The character used for the decimal point, 180 * <i>i.e.,</i> {@code dfs.}{@link 181 * java.text.DecimalFormatSymbols#getDecimalSeparator 182 * getDecimalSeparator()} 183 * <dt><i>LocalPositivePrefix </i> 184 * <dd>The string that appears before a positive number (may 185 * be empty), <i>i.e.,</i> {@code df.}{@link 186 * java.text.DecimalFormat#getPositivePrefix 187 * getPositivePrefix()} 188 * <dt><i>LocalPositiveSuffix </i> 189 * <dd>The string that appears after a positive number (may be 190 * empty), <i>i.e.,</i> {@code df.}{@link 191 * java.text.DecimalFormat#getPositiveSuffix 192 * getPositiveSuffix()} 193 * <dt><i>LocalNegativePrefix </i> 194 * <dd>The string that appears before a negative number (may 195 * be empty), <i>i.e.,</i> {@code df.}{@link 196 * java.text.DecimalFormat#getNegativePrefix 197 * getNegativePrefix()} 198 * <dt><i>LocalNegativeSuffix </i> 199 * <dd>The string that appears after a negative number (may be 200 * empty), <i>i.e.,</i> {@code df.}{@link 201 * java.text.DecimalFormat#getNegativeSuffix 202 * getNegativeSuffix()} 203 * <dt><i>LocalNaN </i> 204 * <dd>The string that represents not-a-number for 205 * floating-point values, 206 * <i>i.e.,</i> {@code dfs.}{@link 207 * java.text.DecimalFormatSymbols#getNaN 208 * getNaN()} 209 * <dt><i>LocalInfinity </i> 210 * <dd>The string that represents infinity for floating-point 211 * values, <i>i.e.,</i> {@code dfs.}{@link 212 * java.text.DecimalFormatSymbols#getInfinity 213 * getInfinity()} 214 * </dl></blockquote> 215 * 216 * <h4> <a name="number-syntax">Number syntax</a> </h4> 217 * 218 * <p> The strings that can be parsed as numbers by an instance of this class 219 * are specified in terms of the following regular-expression grammar, where 220 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 in base 10). 221 * 222 * <dl> 223 * <dt><i>NonAsciiDigit</i>: 224 * <dd>A non-ASCII character c for which 225 * {@link java.lang.Character#isDigit Character.isDigit}{@code (c)} 226 * returns true 227 * 228 * <dt><i>Non0Digit</i>: 229 * <dd>{@code [1-}<i>Rmax</i>{@code ] | }<i>NonASCIIDigit</i> 230 * 231 * <dt><i>Digit</i>: 232 * <dd>{@code [0-}<i>Rmax</i>{@code ] | }<i>NonASCIIDigit</i> 233 * 234 * <dt><i>GroupedNumeral</i>: 235 * <dd><code>( </code><i>Non0Digit</i> 236 * <i>Digit</i>{@code ? 237 * }<i>Digit</i>{@code ?} 238 * <dd> <code>( </code><i>LocalGroupSeparator</i> 239 * <i>Digit</i> 240 * <i>Digit</i> 241 * <i>Digit</i>{@code )+ )} 242 * 243 * <dt><i>Numeral</i>: 244 * <dd>{@code ( ( }<i>Digit</i>{@code + ) 245 * | }<i>GroupedNumeral</i>{@code )} 246 * 247 * <dt><a name="Integer-regex"><i>Integer</i>:</a> 248 * <dd>{@code ( [-+]? ( }<i>Numeral</i>{@code 249 * ) )} 250 * <dd>{@code | }<i>LocalPositivePrefix</i> <i>Numeral</i> 251 * <i>LocalPositiveSuffix</i> 252 * <dd>{@code | }<i>LocalNegativePrefix</i> <i>Numeral</i> 253 * <i>LocalNegativeSuffix</i> 254 * 255 * <dt><i>DecimalNumeral</i>: 256 * <dd><i>Numeral</i> 257 * <dd>{@code | }<i>Numeral</i> 258 * <i>LocalDecimalSeparator</i> 259 * <i>Digit</i>{@code *} 260 * <dd>{@code | }<i>LocalDecimalSeparator</i> 261 * <i>Digit</i>{@code +} 262 * 263 * <dt><i>Exponent</i>: 264 * <dd>{@code ( [eE] [+-]? }<i>Digit</i>{@code + )} 265 * 266 * <dt><a name="Decimal-regex"><i>Decimal</i>:</a> 267 * <dd>{@code ( [-+]? }<i>DecimalNumeral</i> 268 * <i>Exponent</i>{@code ? )} 269 * <dd>{@code | }<i>LocalPositivePrefix</i> 270 * <i>DecimalNumeral</i> 271 * <i>LocalPositiveSuffix</i> 272 * <i>Exponent</i>{@code ?} 273 * <dd>{@code | }<i>LocalNegativePrefix</i> 274 * <i>DecimalNumeral</i> 275 * <i>LocalNegativeSuffix</i> 276 * <i>Exponent</i>{@code ?} 277 * 278 * <dt><i>HexFloat</i>: 279 * <dd>{@code [-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 280 * ([pP][-+]?[0-9]+)?} 281 * 282 * <dt><i>NonNumber</i>: 283 * <dd>{@code NaN 284 * | }<i>LocalNan</i>{@code 285 * | Infinity 286 * | }<i>LocalInfinity</i> 287 * 288 * <dt><i>SignedNonNumber</i>: 289 * <dd>{@code ( [-+]? }<i>NonNumber</i>{@code )} 290 * <dd>{@code | }<i>LocalPositivePrefix</i> 291 * <i>NonNumber</i> 292 * <i>LocalPositiveSuffix</i> 293 * <dd>{@code | }<i>LocalNegativePrefix</i> 294 * <i>NonNumber</i> 295 * <i>LocalNegativeSuffix</i> 296 * 297 * <dt><a name="Float-regex"><i>Float</i></a>: 298 * <dd><i>Decimal</i> 299 * {@code | }<i>HexFloat</i> 300 * {@code | }<i>SignedNonNumber</i> 301 * 302 * </dl> 303 * <p>Whitespace is not significant in the above regular expressions. 304 * 305 * @since 1.5 306 */ 307 public final class Scanner implements Iterator<String>, Closeable { 308 309 // Internal buffer used to hold input 310 private CharBuffer buf; 311 312 // Size of internal character buffer 313 private static final int BUFFER_SIZE = 1024; // change to 1024; 314 315 // The index into the buffer currently held by the Scanner 316 private int position; 317 318 // Internal matcher used for finding delimiters 319 private Matcher matcher; 320 321 // Pattern used to delimit tokens 322 private Pattern delimPattern; 323 324 // Pattern found in last hasNext operation 325 private Pattern hasNextPattern; 326 327 // Position after last hasNext operation 328 private int hasNextPosition; 329 330 // Result after last hasNext operation 331 private String hasNextResult; 332 333 // The input source 334 private Readable source; 335 336 // Boolean is true if source is done 337 private boolean sourceClosed = false; 338 339 // Boolean indicating more input is required 340 private boolean needInput = false; 341 342 // Boolean indicating if a delim has been skipped this operation 343 private boolean skipped = false; 344 345 // A store of a position that the scanner may fall back to 346 private int savedScannerPosition = -1; 347 348 // A cache of the last primitive type scanned 349 private Object typeCache = null; 350 351 // Boolean indicating if a match result is available 352 private boolean matchValid = false; 353 354 // Boolean indicating if this scanner has been closed 355 private boolean closed = false; 356 357 // The current radix used by this scanner 358 private int radix = 10; 359 360 // The default radix for this scanner 361 private int defaultRadix = 10; 362 363 // The locale used by this scanner 364 private Locale locale = null; 365 366 // A cache of the last few recently used Patterns 367 private PatternLRUCache patternCache = new PatternLRUCache(7); 368 369 // A holder of the last IOException encountered 370 private IOException lastException; 371 372 // Number of times this scanner's state has been modified. 373 // Generally incremented on most public APIs and checked 374 // within spliterator implementations. 375 int modCount; 376 377 // A pattern for java whitespace 378 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 379 "\\p{javaWhitespace}+"); 380 381 // A pattern for any token 382 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 383 384 // A pattern for non-ASCII digits 385 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 386 "[\\p{javaDigit}&&[^0-9]]"); 387 388 // Fields and methods to support scanning primitive types 389 390 /** 391 * Locale dependent values used to scan numbers 392 */ 393 private String groupSeparator = "\\,"; 394 private String decimalSeparator = "\\."; 395 private String nanString = "NaN"; 396 private String infinityString = "Infinity"; 397 private String positivePrefix = ""; 398 private String negativePrefix = "\\-"; 399 private String positiveSuffix = ""; 400 private String negativeSuffix = ""; 401 402 /** 403 * Fields and an accessor method to match booleans 404 */ 405 private static volatile Pattern boolPattern; 406 private static final String BOOLEAN_PATTERN = "true|false"; 407 private static Pattern boolPattern() { 408 Pattern bp = boolPattern; 409 if (bp == null) 410 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 411 Pattern.CASE_INSENSITIVE); 412 return bp; 413 } 414 415 /** 416 * Fields and methods to match bytes, shorts, ints, and longs 417 */ 418 private Pattern integerPattern; 419 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 420 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 421 private int SIMPLE_GROUP_INDEX = 5; 422 private String buildIntegerPatternString() { 423 String radixDigits = digits.substring(0, radix); 424 // \\p{javaDigit} is not guaranteed to be appropriate 425 // here but what can we do? The final authority will be 426 // whatever parse method is invoked, so ultimately the 427 // Scanner will do the right thing 428 String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})"; 429 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 430 groupSeparator+digit+digit+digit+")+)"; 431 // digit++ is the possessive form which is necessary for reducing 432 // backtracking that would otherwise cause unacceptable performance 433 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 434 String javaStyleInteger = "([-+]?(" + numeral + "))"; 435 String negativeInteger = negativePrefix + numeral + negativeSuffix; 436 String positiveInteger = positivePrefix + numeral + positiveSuffix; 437 return "("+ javaStyleInteger + ")|(" + 438 positiveInteger + ")|(" + 439 negativeInteger + ")"; 440 } 441 private Pattern integerPattern() { 442 if (integerPattern == null) { 443 integerPattern = patternCache.forName(buildIntegerPatternString()); 444 } 445 return integerPattern; 446 } 447 448 /** 449 * Fields and an accessor method to match line separators 450 */ 451 private static volatile Pattern separatorPattern; 452 private static volatile Pattern linePattern; 453 private static final String LINE_SEPARATOR_PATTERN = 454 "\r\n|[\n\r\u2028\u2029\u0085]"; 455 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 456 457 private static Pattern separatorPattern() { 458 Pattern sp = separatorPattern; 459 if (sp == null) 460 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 461 return sp; 462 } 463 464 private static Pattern linePattern() { 465 Pattern lp = linePattern; 466 if (lp == null) 467 linePattern = lp = Pattern.compile(LINE_PATTERN); 468 return lp; 469 } 470 471 /** 472 * Fields and methods to match floats and doubles 473 */ 474 private Pattern floatPattern; 475 private Pattern decimalPattern; 476 private void buildFloatAndDecimalPattern() { 477 // \\p{javaDigit} may not be perfect, see above 478 String digit = "([0-9]|(\\p{javaDigit}))"; 479 String exponent = "([eE][+-]?"+digit+"+)?"; 480 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 481 groupSeparator+digit+digit+digit+")+)"; 482 // Once again digit++ is used for performance, as above 483 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 484 String decimalNumeral = "("+numeral+"|"+numeral + 485 decimalSeparator + digit + "*+|"+ decimalSeparator + 486 digit + "++)"; 487 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 488 infinityString+")"; 489 String positiveFloat = "(" + positivePrefix + decimalNumeral + 490 positiveSuffix + exponent + ")"; 491 String negativeFloat = "(" + negativePrefix + decimalNumeral + 492 negativeSuffix + exponent + ")"; 493 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 494 positiveFloat + "|" + negativeFloat + ")"; 495 String hexFloat = 496 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 497 String positiveNonNumber = "(" + positivePrefix + nonNumber + 498 positiveSuffix + ")"; 499 String negativeNonNumber = "(" + negativePrefix + nonNumber + 500 negativeSuffix + ")"; 501 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 502 positiveNonNumber + "|" + 503 negativeNonNumber + ")"; 504 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 505 signedNonNumber); 506 decimalPattern = Pattern.compile(decimal); 507 } 508 private Pattern floatPattern() { 509 if (floatPattern == null) { 510 buildFloatAndDecimalPattern(); 511 } 512 return floatPattern; 513 } 514 private Pattern decimalPattern() { 515 if (decimalPattern == null) { 516 buildFloatAndDecimalPattern(); 517 } 518 return decimalPattern; 519 } 520 521 // Constructors 522 523 /** 524 * Constructs a {@code Scanner} that returns values scanned 525 * from the specified source delimited by the specified pattern. 526 * 527 * @param source A character source implementing the Readable interface 528 * @param pattern A delimiting pattern 529 */ 530 private Scanner(Readable source, Pattern pattern) { 531 assert source != null : "source should not be null"; 532 assert pattern != null : "pattern should not be null"; 533 this.source = source; 534 delimPattern = pattern; 535 buf = CharBuffer.allocate(BUFFER_SIZE); 536 buf.limit(0); 537 matcher = delimPattern.matcher(buf); 538 matcher.useTransparentBounds(true); 539 matcher.useAnchoringBounds(false); 540 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 541 } 542 543 /** 544 * Constructs a new {@code Scanner} that produces values scanned 545 * from the specified source. 546 * 547 * @param source A character source implementing the {@link Readable} 548 * interface 549 */ 550 public Scanner(Readable source) { 551 this(Objects.requireNonNull(source, "source"), WHITESPACE_PATTERN); 552 } 553 554 /** 555 * Constructs a new {@code Scanner} that produces values scanned 556 * from the specified input stream. Bytes from the stream are converted 557 * into characters using the underlying platform's 558 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 559 * 560 * @param source An input stream to be scanned 561 */ 562 public Scanner(InputStream source) { 563 this(new InputStreamReader(source), WHITESPACE_PATTERN); 564 } 565 566 /** 567 * Constructs a new {@code Scanner} that produces values scanned 568 * from the specified input stream. Bytes from the stream are converted 569 * into characters using the specified charset. 570 * 571 * @param source An input stream to be scanned 572 * @param charsetName The encoding type used to convert bytes from the 573 * stream into characters to be scanned 574 * @throws IllegalArgumentException if the specified character set 575 * does not exist 576 */ 577 public Scanner(InputStream source, String charsetName) { 578 this(makeReadable(Objects.requireNonNull(source, "source"), toCharset(charsetName)), 579 WHITESPACE_PATTERN); 580 } 581 582 /** 583 * Returns a charset object for the given charset name. 584 * @throws NullPointerException is csn is null 585 * @throws IllegalArgumentException if the charset is not supported 586 */ 587 private static Charset toCharset(String csn) { 588 Objects.requireNonNull(csn, "charsetName"); 589 try { 590 return Charset.forName(csn); 591 } catch (IllegalCharsetNameException|UnsupportedCharsetException e) { 592 // IllegalArgumentException should be thrown 593 throw new IllegalArgumentException(e); 594 } 595 } 596 597 private static Readable makeReadable(InputStream source, Charset charset) { 598 return new InputStreamReader(source, charset); 599 } 600 601 /** 602 * Constructs a new {@code Scanner} that produces values scanned 603 * from the specified file. Bytes from the file are converted into 604 * characters using the underlying platform's 605 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 606 * 607 * @param source A file to be scanned 608 * @throws FileNotFoundException if source is not found 609 */ 610 public Scanner(File source) throws FileNotFoundException { 611 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 612 } 613 614 /** 615 * Constructs a new {@code Scanner} that produces values scanned 616 * from the specified file. Bytes from the file are converted into 617 * characters using the specified charset. 618 * 619 * @param source A file to be scanned 620 * @param charsetName The encoding type used to convert bytes from the file 621 * into characters to be scanned 622 * @throws FileNotFoundException if source is not found 623 * @throws IllegalArgumentException if the specified encoding is 624 * not found 625 */ 626 public Scanner(File source, String charsetName) 627 throws FileNotFoundException 628 { 629 this(Objects.requireNonNull(source), toDecoder(charsetName)); 630 } 631 632 private Scanner(File source, CharsetDecoder dec) 633 throws FileNotFoundException 634 { 635 this(makeReadable((ReadableByteChannel)(new FileInputStream(source).getChannel()), dec)); 636 } 637 638 private static CharsetDecoder toDecoder(String charsetName) { 639 Objects.requireNonNull(charsetName, "charsetName"); 640 try { 641 return Charset.forName(charsetName).newDecoder(); 642 } catch (IllegalCharsetNameException|UnsupportedCharsetException unused) { 643 throw new IllegalArgumentException(charsetName); 644 } 645 } 646 647 private static Readable makeReadable(ReadableByteChannel source, 648 CharsetDecoder dec) { 649 return Channels.newReader(source, dec, -1); 650 } 651 652 /** 653 * Constructs a new {@code Scanner} that produces values scanned 654 * from the specified file. Bytes from the file are converted into 655 * characters using the underlying platform's 656 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 657 * 658 * @param source 659 * the path to the file to be scanned 660 * @throws IOException 661 * if an I/O error occurs opening source 662 * 663 * @since 1.7 664 */ 665 public Scanner(Path source) 666 throws IOException 667 { 668 this(Files.newInputStream(source)); 669 } 670 671 /** 672 * Constructs a new {@code Scanner} that produces values scanned 673 * from the specified file. Bytes from the file are converted into 674 * characters using the specified charset. 675 * 676 * @param source 677 * the path to the file to be scanned 678 * @param charsetName 679 * The encoding type used to convert bytes from the file 680 * into characters to be scanned 681 * @throws IOException 682 * if an I/O error occurs opening source 683 * @throws IllegalArgumentException 684 * if the specified encoding is not found 685 * @since 1.7 686 */ 687 public Scanner(Path source, String charsetName) throws IOException { 688 this(Objects.requireNonNull(source), toCharset(charsetName)); 689 } 690 691 private Scanner(Path source, Charset charset) throws IOException { 692 this(makeReadable(Files.newInputStream(source), charset)); 693 } 694 695 /** 696 * Constructs a new {@code Scanner} that produces values scanned 697 * from the specified string. 698 * 699 * @param source A string to scan 700 */ 701 public Scanner(String source) { 702 this(new StringReader(source), WHITESPACE_PATTERN); 703 } 704 705 /** 706 * Constructs a new {@code Scanner} that produces values scanned 707 * from the specified channel. Bytes from the source are converted into 708 * characters using the underlying platform's 709 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 710 * 711 * @param source A channel to scan 712 */ 713 public Scanner(ReadableByteChannel source) { 714 this(makeReadable(Objects.requireNonNull(source, "source")), 715 WHITESPACE_PATTERN); 716 } 717 718 private static Readable makeReadable(ReadableByteChannel source) { 719 return makeReadable(source, Charset.defaultCharset().newDecoder()); 720 } 721 722 /** 723 * Constructs a new {@code Scanner} that produces values scanned 724 * from the specified channel. Bytes from the source are converted into 725 * characters using the specified charset. 726 * 727 * @param source A channel to scan 728 * @param charsetName The encoding type used to convert bytes from the 729 * channel into characters to be scanned 730 * @throws IllegalArgumentException if the specified character set 731 * does not exist 732 */ 733 public Scanner(ReadableByteChannel source, String charsetName) { 734 this(makeReadable(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), 735 WHITESPACE_PATTERN); 736 } 737 738 // Private primitives used to support scanning 739 740 private void saveState() { 741 savedScannerPosition = position; 742 } 743 744 private void revertState() { 745 this.position = savedScannerPosition; 746 savedScannerPosition = -1; 747 skipped = false; 748 } 749 750 private boolean revertState(boolean b) { 751 this.position = savedScannerPosition; 752 savedScannerPosition = -1; 753 skipped = false; 754 return b; 755 } 756 757 private void cacheResult() { 758 hasNextResult = matcher.group(); 759 hasNextPosition = matcher.end(); 760 hasNextPattern = matcher.pattern(); 761 } 762 763 private void cacheResult(String result) { 764 hasNextResult = result; 765 hasNextPosition = matcher.end(); 766 hasNextPattern = matcher.pattern(); 767 } 768 769 // Clears both regular cache and type cache 770 private void clearCaches() { 771 hasNextPattern = null; 772 typeCache = null; 773 } 774 775 // Also clears both the regular cache and the type cache 776 private String getCachedResult() { 777 position = hasNextPosition; 778 hasNextPattern = null; 779 typeCache = null; 780 return hasNextResult; 781 } 782 783 // Also clears both the regular cache and the type cache 784 private void useTypeCache() { 785 if (closed) 786 throw new IllegalStateException("Scanner closed"); 787 position = hasNextPosition; 788 hasNextPattern = null; 789 typeCache = null; 790 } 791 792 // Tries to read more input. May block. 793 private void readInput() { 794 if (buf.limit() == buf.capacity()) 795 makeSpace(); 796 // Prepare to receive data 797 int p = buf.position(); 798 buf.position(buf.limit()); 799 buf.limit(buf.capacity()); 800 801 int n = 0; 802 try { 803 n = source.read(buf); 804 } catch (IOException ioe) { 805 lastException = ioe; 806 n = -1; 807 } 808 if (n == -1) { 809 sourceClosed = true; 810 needInput = false; 811 } 812 if (n > 0) 813 needInput = false; 814 // Restore current position and limit for reading 815 buf.limit(buf.position()); 816 buf.position(p); 817 } 818 819 // After this method is called there will either be an exception 820 // or else there will be space in the buffer 821 private boolean makeSpace() { 822 clearCaches(); 823 int offset = savedScannerPosition == -1 ? 824 position : savedScannerPosition; 825 buf.position(offset); 826 // Gain space by compacting buffer 827 if (offset > 0) { 828 buf.compact(); 829 translateSavedIndexes(offset); 830 position -= offset; 831 buf.flip(); 832 return true; 833 } 834 // Gain space by growing buffer 835 int newSize = buf.capacity() * 2; 836 CharBuffer newBuf = CharBuffer.allocate(newSize); 837 newBuf.put(buf); 838 newBuf.flip(); 839 translateSavedIndexes(offset); 840 position -= offset; 841 buf = newBuf; 842 matcher.reset(buf); 843 return true; 844 } 845 846 // When a buffer compaction/reallocation occurs the saved indexes must 847 // be modified appropriately 848 private void translateSavedIndexes(int offset) { 849 if (savedScannerPosition != -1) 850 savedScannerPosition -= offset; 851 } 852 853 // If we are at the end of input then NoSuchElement; 854 // If there is still input left then InputMismatch 855 private void throwFor() { 856 skipped = false; 857 if ((sourceClosed) && (position == buf.limit())) 858 throw new NoSuchElementException(); 859 else 860 throw new InputMismatchException(); 861 } 862 863 // Returns true if a complete token or partial token is in the buffer. 864 // It is not necessary to find a complete token since a partial token 865 // means that there will be another token with or without more input. 866 private boolean hasTokenInBuffer() { 867 matchValid = false; 868 matcher.usePattern(delimPattern); 869 matcher.region(position, buf.limit()); 870 // Skip delims first 871 if (matcher.lookingAt()) { 872 if (matcher.hitEnd() && !sourceClosed) { 873 // more input might change the match of delims, in which 874 // might change whether or not if there is token left in 875 // buffer (don't update the "position" in this case) 876 needInput = true; 877 return false; 878 } 879 position = matcher.end(); 880 } 881 // If we are sitting at the end, no more tokens in buffer 882 if (position == buf.limit()) 883 return false; 884 return true; 885 } 886 887 /* 888 * Returns a "complete token" that matches the specified pattern 889 * 890 * A token is complete if surrounded by delims; a partial token 891 * is prefixed by delims but not postfixed by them 892 * 893 * The position is advanced to the end of that complete token 894 * 895 * Pattern == null means accept any token at all 896 * 897 * Triple return: 898 * 1. valid string means it was found 899 * 2. null with needInput=false means we won't ever find it 900 * 3. null with needInput=true means try again after readInput 901 */ 902 private String getCompleteTokenInBuffer(Pattern pattern) { 903 matchValid = false; 904 // Skip delims first 905 matcher.usePattern(delimPattern); 906 if (!skipped) { // Enforcing only one skip of leading delims 907 matcher.region(position, buf.limit()); 908 if (matcher.lookingAt()) { 909 // If more input could extend the delimiters then we must wait 910 // for more input 911 if (matcher.hitEnd() && !sourceClosed) { 912 needInput = true; 913 return null; 914 } 915 // The delims were whole and the matcher should skip them 916 skipped = true; 917 position = matcher.end(); 918 } 919 } 920 921 // If we are sitting at the end, no more tokens in buffer 922 if (position == buf.limit()) { 923 if (sourceClosed) 924 return null; 925 needInput = true; 926 return null; 927 } 928 // Must look for next delims. Simply attempting to match the 929 // pattern at this point may find a match but it might not be 930 // the first longest match because of missing input, or it might 931 // match a partial token instead of the whole thing. 932 933 // Then look for next delims 934 matcher.region(position, buf.limit()); 935 boolean foundNextDelim = matcher.find(); 936 if (foundNextDelim && (matcher.end() == position)) { 937 // Zero length delimiter match; we should find the next one 938 // using the automatic advance past a zero length match; 939 // Otherwise we have just found the same one we just skipped 940 foundNextDelim = matcher.find(); 941 } 942 if (foundNextDelim) { 943 // In the rare case that more input could cause the match 944 // to be lost and there is more input coming we must wait 945 // for more input. Note that hitting the end is okay as long 946 // as the match cannot go away. It is the beginning of the 947 // next delims we want to be sure about, we don't care if 948 // they potentially extend further. 949 if (matcher.requireEnd() && !sourceClosed) { 950 needInput = true; 951 return null; 952 } 953 int tokenEnd = matcher.start(); 954 // There is a complete token. 955 if (pattern == null) { 956 // Must continue with match to provide valid MatchResult 957 pattern = FIND_ANY_PATTERN; 958 } 959 // Attempt to match against the desired pattern 960 matcher.usePattern(pattern); 961 matcher.region(position, tokenEnd); 962 if (matcher.matches()) { 963 String s = matcher.group(); 964 position = matcher.end(); 965 return s; 966 } else { // Complete token but it does not match 967 return null; 968 } 969 } 970 971 // If we can't find the next delims but no more input is coming, 972 // then we can treat the remainder as a whole token 973 if (sourceClosed) { 974 if (pattern == null) { 975 // Must continue with match to provide valid MatchResult 976 pattern = FIND_ANY_PATTERN; 977 } 978 // Last token; Match the pattern here or throw 979 matcher.usePattern(pattern); 980 matcher.region(position, buf.limit()); 981 if (matcher.matches()) { 982 String s = matcher.group(); 983 position = matcher.end(); 984 return s; 985 } 986 // Last piece does not match 987 return null; 988 } 989 990 // There is a partial token in the buffer; must read more 991 // to complete it 992 needInput = true; 993 return null; 994 } 995 996 // Finds the specified pattern in the buffer up to horizon. 997 // Returns true if the specified input pattern was matched, 998 // and leaves the matcher field with the current match state. 999 private boolean findPatternInBuffer(Pattern pattern, int horizon) { 1000 matchValid = false; 1001 matcher.usePattern(pattern); 1002 int bufferLimit = buf.limit(); 1003 int horizonLimit = -1; 1004 int searchLimit = bufferLimit; 1005 if (horizon > 0) { 1006 horizonLimit = position + horizon; 1007 if (horizonLimit < bufferLimit) 1008 searchLimit = horizonLimit; 1009 } 1010 matcher.region(position, searchLimit); 1011 if (matcher.find()) { 1012 if (matcher.hitEnd() && (!sourceClosed)) { 1013 // The match may be longer if didn't hit horizon or real end 1014 if (searchLimit != horizonLimit) { 1015 // Hit an artificial end; try to extend the match 1016 needInput = true; 1017 return false; 1018 } 1019 // The match could go away depending on what is next 1020 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1021 // Rare case: we hit the end of input and it happens 1022 // that it is at the horizon and the end of input is 1023 // required for the match. 1024 needInput = true; 1025 return false; 1026 } 1027 } 1028 // Did not hit end, or hit real end, or hit horizon 1029 position = matcher.end(); 1030 return true; 1031 } 1032 1033 if (sourceClosed) 1034 return false; 1035 1036 // If there is no specified horizon, or if we have not searched 1037 // to the specified horizon yet, get more input 1038 if ((horizon == 0) || (searchLimit != horizonLimit)) 1039 needInput = true; 1040 return false; 1041 } 1042 1043 // Attempts to match a pattern anchored at the current position. 1044 // Returns true if the specified input pattern was matched, 1045 // and leaves the matcher field with the current match state. 1046 private boolean matchPatternInBuffer(Pattern pattern) { 1047 matchValid = false; 1048 matcher.usePattern(pattern); 1049 matcher.region(position, buf.limit()); 1050 if (matcher.lookingAt()) { 1051 if (matcher.hitEnd() && (!sourceClosed)) { 1052 // Get more input and try again 1053 needInput = true; 1054 return false; 1055 } 1056 position = matcher.end(); 1057 return true; 1058 } 1059 1060 if (sourceClosed) 1061 return false; 1062 1063 // Read more to find pattern 1064 needInput = true; 1065 return false; 1066 } 1067 1068 // Throws if the scanner is closed 1069 private void ensureOpen() { 1070 if (closed) 1071 throw new IllegalStateException("Scanner closed"); 1072 } 1073 1074 // Public methods 1075 1076 /** 1077 * Closes this scanner. 1078 * 1079 * <p> If this scanner has not yet been closed then if its underlying 1080 * {@linkplain java.lang.Readable readable} also implements the {@link 1081 * java.io.Closeable} interface then the readable's {@code close} method 1082 * will be invoked. If this scanner is already closed then invoking this 1083 * method will have no effect. 1084 * 1085 * <p>Attempting to perform search operations after a scanner has 1086 * been closed will result in an {@link IllegalStateException}. 1087 * 1088 */ 1089 public void close() { 1090 if (closed) 1091 return; 1092 if (source instanceof Closeable) { 1093 try { 1094 ((Closeable)source).close(); 1095 } catch (IOException ioe) { 1096 lastException = ioe; 1097 } 1098 } 1099 sourceClosed = true; 1100 source = null; 1101 closed = true; 1102 } 1103 1104 /** 1105 * Returns the {@code IOException} last thrown by this 1106 * {@code Scanner}'s underlying {@code Readable}. This method 1107 * returns {@code null} if no such exception exists. 1108 * 1109 * @return the last exception thrown by this scanner's readable 1110 */ 1111 public IOException ioException() { 1112 return lastException; 1113 } 1114 1115 /** 1116 * Returns the {@code Pattern} this {@code Scanner} is currently 1117 * using to match delimiters. 1118 * 1119 * @return this scanner's delimiting pattern. 1120 */ 1121 public Pattern delimiter() { 1122 return delimPattern; 1123 } 1124 1125 /** 1126 * Sets this scanner's delimiting pattern to the specified pattern. 1127 * 1128 * @param pattern A delimiting pattern 1129 * @return this scanner 1130 */ 1131 public Scanner useDelimiter(Pattern pattern) { 1132 modCount++; 1133 delimPattern = pattern; 1134 return this; 1135 } 1136 1137 /** 1138 * Sets this scanner's delimiting pattern to a pattern constructed from 1139 * the specified {@code String}. 1140 * 1141 * <p> An invocation of this method of the form 1142 * {@code useDelimiter(pattern)} behaves in exactly the same way as the 1143 * invocation {@code useDelimiter(Pattern.compile(pattern))}. 1144 * 1145 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1146 * to the <a href= "#default-delimiter">default</a>. 1147 * 1148 * @param pattern A string specifying a delimiting pattern 1149 * @return this scanner 1150 */ 1151 public Scanner useDelimiter(String pattern) { 1152 modCount++; 1153 delimPattern = patternCache.forName(pattern); 1154 return this; 1155 } 1156 1157 /** 1158 * Returns this scanner's locale. 1159 * 1160 * <p>A scanner's locale affects many elements of its default 1161 * primitive matching regular expressions; see 1162 * <a href= "#localized-numbers">localized numbers</a> above. 1163 * 1164 * @return this scanner's locale 1165 */ 1166 public Locale locale() { 1167 return this.locale; 1168 } 1169 1170 /** 1171 * Sets this scanner's locale to the specified locale. 1172 * 1173 * <p>A scanner's locale affects many elements of its default 1174 * primitive matching regular expressions; see 1175 * <a href= "#localized-numbers">localized numbers</a> above. 1176 * 1177 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1178 * the <a href= "#initial-locale">initial locale</a>. 1179 * 1180 * @param locale A string specifying the locale to use 1181 * @return this scanner 1182 */ 1183 public Scanner useLocale(Locale locale) { 1184 if (locale.equals(this.locale)) 1185 return this; 1186 1187 modCount++; 1188 this.locale = locale; 1189 DecimalFormat df = 1190 (DecimalFormat)NumberFormat.getNumberInstance(locale); 1191 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1192 1193 // These must be literalized to avoid collision with regex 1194 // metacharacters such as dot or parenthesis 1195 groupSeparator = "\\" + dfs.getGroupingSeparator(); 1196 decimalSeparator = "\\" + dfs.getDecimalSeparator(); 1197 1198 // Quoting the nonzero length locale-specific things 1199 // to avoid potential conflict with metacharacters 1200 nanString = "\\Q" + dfs.getNaN() + "\\E"; 1201 infinityString = "\\Q" + dfs.getInfinity() + "\\E"; 1202 positivePrefix = df.getPositivePrefix(); 1203 if (positivePrefix.length() > 0) 1204 positivePrefix = "\\Q" + positivePrefix + "\\E"; 1205 negativePrefix = df.getNegativePrefix(); 1206 if (negativePrefix.length() > 0) 1207 negativePrefix = "\\Q" + negativePrefix + "\\E"; 1208 positiveSuffix = df.getPositiveSuffix(); 1209 if (positiveSuffix.length() > 0) 1210 positiveSuffix = "\\Q" + positiveSuffix + "\\E"; 1211 negativeSuffix = df.getNegativeSuffix(); 1212 if (negativeSuffix.length() > 0) 1213 negativeSuffix = "\\Q" + negativeSuffix + "\\E"; 1214 1215 // Force rebuilding and recompilation of locale dependent 1216 // primitive patterns 1217 integerPattern = null; 1218 floatPattern = null; 1219 1220 return this; 1221 } 1222 1223 /** 1224 * Returns this scanner's default radix. 1225 * 1226 * <p>A scanner's radix affects elements of its default 1227 * number matching regular expressions; see 1228 * <a href= "#localized-numbers">localized numbers</a> above. 1229 * 1230 * @return the default radix of this scanner 1231 */ 1232 public int radix() { 1233 return this.defaultRadix; 1234 } 1235 1236 /** 1237 * Sets this scanner's default radix to the specified radix. 1238 * 1239 * <p>A scanner's radix affects elements of its default 1240 * number matching regular expressions; see 1241 * <a href= "#localized-numbers">localized numbers</a> above. 1242 * 1243 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1244 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1245 * {@code IllegalArgumentException} is thrown. 1246 * 1247 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1248 * {@code 10}. 1249 * 1250 * @param radix The radix to use when scanning numbers 1251 * @return this scanner 1252 * @throws IllegalArgumentException if radix is out of range 1253 */ 1254 public Scanner useRadix(int radix) { 1255 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1256 throw new IllegalArgumentException("radix:"+radix); 1257 1258 if (this.defaultRadix == radix) 1259 return this; 1260 modCount++; 1261 this.defaultRadix = radix; 1262 // Force rebuilding and recompilation of radix dependent patterns 1263 integerPattern = null; 1264 return this; 1265 } 1266 1267 // The next operation should occur in the specified radix but 1268 // the default is left untouched. 1269 private void setRadix(int radix) { 1270 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1271 throw new IllegalArgumentException("radix:"+radix); 1272 1273 if (this.radix != radix) { 1274 // Force rebuilding and recompilation of radix dependent patterns 1275 integerPattern = null; 1276 this.radix = radix; 1277 } 1278 } 1279 1280 /** 1281 * Returns the match result of the last scanning operation performed 1282 * by this scanner. This method throws {@code IllegalStateException} 1283 * if no match has been performed, or if the last match was 1284 * not successful. 1285 * 1286 * <p>The various {@code next} methods of {@code Scanner} 1287 * make a match result available if they complete without throwing an 1288 * exception. For instance, after an invocation of the {@link #nextInt} 1289 * method that returned an int, this method returns a 1290 * {@code MatchResult} for the search of the 1291 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1292 * defined above. Similarly the {@link #findInLine findInLine()}, 1293 * {@link #findWithinHorizon findWithinHorizon()}, and {@link #skip skip()} 1294 * methods will make a match available if they succeed. 1295 * 1296 * @return a match result for the last match operation 1297 * @throws IllegalStateException If no match result is available 1298 */ 1299 public MatchResult match() { 1300 if (!matchValid) 1301 throw new IllegalStateException("No match result available"); 1302 return matcher.toMatchResult(); 1303 } 1304 1305 /** 1306 * <p>Returns the string representation of this {@code Scanner}. The 1307 * string representation of a {@code Scanner} contains information 1308 * that may be useful for debugging. The exact format is unspecified. 1309 * 1310 * @return The string representation of this scanner 1311 */ 1312 public String toString() { 1313 StringBuilder sb = new StringBuilder(); 1314 sb.append("java.util.Scanner"); 1315 sb.append("[delimiters=" + delimPattern + "]"); 1316 sb.append("[position=" + position + "]"); 1317 sb.append("[match valid=" + matchValid + "]"); 1318 sb.append("[need input=" + needInput + "]"); 1319 sb.append("[source closed=" + sourceClosed + "]"); 1320 sb.append("[skipped=" + skipped + "]"); 1321 sb.append("[group separator=" + groupSeparator + "]"); 1322 sb.append("[decimal separator=" + decimalSeparator + "]"); 1323 sb.append("[positive prefix=" + positivePrefix + "]"); 1324 sb.append("[negative prefix=" + negativePrefix + "]"); 1325 sb.append("[positive suffix=" + positiveSuffix + "]"); 1326 sb.append("[negative suffix=" + negativeSuffix + "]"); 1327 sb.append("[NaN string=" + nanString + "]"); 1328 sb.append("[infinity string=" + infinityString + "]"); 1329 return sb.toString(); 1330 } 1331 1332 /** 1333 * Returns true if this scanner has another token in its input. 1334 * This method may block while waiting for input to scan. 1335 * The scanner does not advance past any input. 1336 * 1337 * @return true if and only if this scanner has another token 1338 * @throws IllegalStateException if this scanner is closed 1339 * @see java.util.Iterator 1340 */ 1341 public boolean hasNext() { 1342 ensureOpen(); 1343 saveState(); 1344 modCount++; 1345 while (!sourceClosed) { 1346 if (hasTokenInBuffer()) { 1347 return revertState(true); 1348 } 1349 readInput(); 1350 } 1351 boolean result = hasTokenInBuffer(); 1352 return revertState(result); 1353 } 1354 1355 /** 1356 * Finds and returns the next complete token from this scanner. 1357 * A complete token is preceded and followed by input that matches 1358 * the delimiter pattern. This method may block while waiting for input 1359 * to scan, even if a previous invocation of {@link #hasNext} returned 1360 * {@code true}. 1361 * 1362 * @return the next token 1363 * @throws NoSuchElementException if no more tokens are available 1364 * @throws IllegalStateException if this scanner is closed 1365 * @see java.util.Iterator 1366 */ 1367 public String next() { 1368 ensureOpen(); 1369 clearCaches(); 1370 modCount++; 1371 while (true) { 1372 String token = getCompleteTokenInBuffer(null); 1373 if (token != null) { 1374 matchValid = true; 1375 skipped = false; 1376 return token; 1377 } 1378 if (needInput) 1379 readInput(); 1380 else 1381 throwFor(); 1382 } 1383 } 1384 1385 /** 1386 * The remove operation is not supported by this implementation of 1387 * {@code Iterator}. 1388 * 1389 * @throws UnsupportedOperationException if this method is invoked. 1390 * @see java.util.Iterator 1391 */ 1392 public void remove() { 1393 throw new UnsupportedOperationException(); 1394 } 1395 1396 /** 1397 * Returns true if the next token matches the pattern constructed from the 1398 * specified string. The scanner does not advance past any input. 1399 * 1400 * <p> An invocation of this method of the form {@code hasNext(pattern)} 1401 * behaves in exactly the same way as the invocation 1402 * {@code hasNext(Pattern.compile(pattern))}. 1403 * 1404 * @param pattern a string specifying the pattern to scan 1405 * @return true if and only if this scanner has another token matching 1406 * the specified pattern 1407 * @throws IllegalStateException if this scanner is closed 1408 */ 1409 public boolean hasNext(String pattern) { 1410 return hasNext(patternCache.forName(pattern)); 1411 } 1412 1413 /** 1414 * Returns the next token if it matches the pattern constructed from the 1415 * specified string. If the match is successful, the scanner advances 1416 * past the input that matched the pattern. 1417 * 1418 * <p> An invocation of this method of the form {@code next(pattern)} 1419 * behaves in exactly the same way as the invocation 1420 * {@code next(Pattern.compile(pattern))}. 1421 * 1422 * @param pattern a string specifying the pattern to scan 1423 * @return the next token 1424 * @throws NoSuchElementException if no such tokens are available 1425 * @throws IllegalStateException if this scanner is closed 1426 */ 1427 public String next(String pattern) { 1428 return next(patternCache.forName(pattern)); 1429 } 1430 1431 /** 1432 * Returns true if the next complete token matches the specified pattern. 1433 * A complete token is prefixed and postfixed by input that matches 1434 * the delimiter pattern. This method may block while waiting for input. 1435 * The scanner does not advance past any input. 1436 * 1437 * @param pattern the pattern to scan for 1438 * @return true if and only if this scanner has another token matching 1439 * the specified pattern 1440 * @throws IllegalStateException if this scanner is closed 1441 */ 1442 public boolean hasNext(Pattern pattern) { 1443 ensureOpen(); 1444 if (pattern == null) 1445 throw new NullPointerException(); 1446 hasNextPattern = null; 1447 saveState(); 1448 modCount++; 1449 1450 while (true) { 1451 if (getCompleteTokenInBuffer(pattern) != null) { 1452 matchValid = true; 1453 cacheResult(); 1454 return revertState(true); 1455 } 1456 if (needInput) 1457 readInput(); 1458 else 1459 return revertState(false); 1460 } 1461 } 1462 1463 /** 1464 * Returns the next token if it matches the specified pattern. This 1465 * method may block while waiting for input to scan, even if a previous 1466 * invocation of {@link #hasNext(Pattern)} returned {@code true}. 1467 * If the match is successful, the scanner advances past the input that 1468 * matched the pattern. 1469 * 1470 * @param pattern the pattern to scan for 1471 * @return the next token 1472 * @throws NoSuchElementException if no more tokens are available 1473 * @throws IllegalStateException if this scanner is closed 1474 */ 1475 public String next(Pattern pattern) { 1476 ensureOpen(); 1477 if (pattern == null) 1478 throw new NullPointerException(); 1479 1480 modCount++; 1481 // Did we already find this pattern? 1482 if (hasNextPattern == pattern) 1483 return getCachedResult(); 1484 clearCaches(); 1485 1486 // Search for the pattern 1487 while (true) { 1488 String token = getCompleteTokenInBuffer(pattern); 1489 if (token != null) { 1490 matchValid = true; 1491 skipped = false; 1492 return token; 1493 } 1494 if (needInput) 1495 readInput(); 1496 else 1497 throwFor(); 1498 } 1499 } 1500 1501 /** 1502 * Returns true if there is another line in the input of this scanner. 1503 * This method may block while waiting for input. The scanner does not 1504 * advance past any input. 1505 * 1506 * @return true if and only if this scanner has another line of input 1507 * @throws IllegalStateException if this scanner is closed 1508 */ 1509 public boolean hasNextLine() { 1510 saveState(); 1511 1512 modCount++; 1513 String result = findWithinHorizon(linePattern(), 0); 1514 if (result != null) { 1515 MatchResult mr = this.match(); 1516 String lineSep = mr.group(1); 1517 if (lineSep != null) { 1518 result = result.substring(0, result.length() - 1519 lineSep.length()); 1520 cacheResult(result); 1521 1522 } else { 1523 cacheResult(); 1524 } 1525 } 1526 revertState(); 1527 return (result != null); 1528 } 1529 1530 /** 1531 * Advances this scanner past the current line and returns the input 1532 * that was skipped. 1533 * 1534 * This method returns the rest of the current line, excluding any line 1535 * separator at the end. The position is set to the beginning of the next 1536 * line. 1537 * 1538 * <p>Since this method continues to search through the input looking 1539 * for a line separator, it may buffer all of the input searching for 1540 * the line to skip if no line separators are present. 1541 * 1542 * @return the line that was skipped 1543 * @throws NoSuchElementException if no line was found 1544 * @throws IllegalStateException if this scanner is closed 1545 */ 1546 public String nextLine() { 1547 modCount++; 1548 if (hasNextPattern == linePattern()) 1549 return getCachedResult(); 1550 clearCaches(); 1551 1552 String result = findWithinHorizon(linePattern, 0); 1553 if (result == null) 1554 throw new NoSuchElementException("No line found"); 1555 MatchResult mr = this.match(); 1556 String lineSep = mr.group(1); 1557 if (lineSep != null) 1558 result = result.substring(0, result.length() - lineSep.length()); 1559 if (result == null) 1560 throw new NoSuchElementException(); 1561 else 1562 return result; 1563 } 1564 1565 // Public methods that ignore delimiters 1566 1567 /** 1568 * Attempts to find the next occurrence of a pattern constructed from the 1569 * specified string, ignoring delimiters. 1570 * 1571 * <p>An invocation of this method of the form {@code findInLine(pattern)} 1572 * behaves in exactly the same way as the invocation 1573 * {@code findInLine(Pattern.compile(pattern))}. 1574 * 1575 * @param pattern a string specifying the pattern to search for 1576 * @return the text that matched the specified pattern 1577 * @throws IllegalStateException if this scanner is closed 1578 */ 1579 public String findInLine(String pattern) { 1580 return findInLine(patternCache.forName(pattern)); 1581 } 1582 1583 /** 1584 * Attempts to find the next occurrence of the specified pattern ignoring 1585 * delimiters. If the pattern is found before the next line separator, the 1586 * scanner advances past the input that matched and returns the string that 1587 * matched the pattern. 1588 * If no such pattern is detected in the input up to the next line 1589 * separator, then {@code null} is returned and the scanner's 1590 * position is unchanged. This method may block waiting for input that 1591 * matches the pattern. 1592 * 1593 * <p>Since this method continues to search through the input looking 1594 * for the specified pattern, it may buffer all of the input searching for 1595 * the desired token if no line separators are present. 1596 * 1597 * @param pattern the pattern to scan for 1598 * @return the text that matched the specified pattern 1599 * @throws IllegalStateException if this scanner is closed 1600 */ 1601 public String findInLine(Pattern pattern) { 1602 ensureOpen(); 1603 if (pattern == null) 1604 throw new NullPointerException(); 1605 clearCaches(); 1606 modCount++; 1607 // Expand buffer to include the next newline or end of input 1608 int endPosition = 0; 1609 saveState(); 1610 while (true) { 1611 if (findPatternInBuffer(separatorPattern(), 0)) { 1612 endPosition = matcher.start(); 1613 break; // up to next newline 1614 } 1615 if (needInput) { 1616 readInput(); 1617 } else { 1618 endPosition = buf.limit(); 1619 break; // up to end of input 1620 } 1621 } 1622 revertState(); 1623 int horizonForLine = endPosition - position; 1624 // If there is nothing between the current pos and the next 1625 // newline simply return null, invoking findWithinHorizon 1626 // with "horizon=0" will scan beyond the line bound. 1627 if (horizonForLine == 0) 1628 return null; 1629 // Search for the pattern 1630 return findWithinHorizon(pattern, horizonForLine); 1631 } 1632 1633 /** 1634 * Attempts to find the next occurrence of a pattern constructed from the 1635 * specified string, ignoring delimiters. 1636 * 1637 * <p>An invocation of this method of the form 1638 * {@code findWithinHorizon(pattern)} behaves in exactly the same way as 1639 * the invocation 1640 * {@code findWithinHorizon(Pattern.compile(pattern), horizon)}. 1641 * 1642 * @param pattern a string specifying the pattern to search for 1643 * @param horizon the search horizon 1644 * @return the text that matched the specified pattern 1645 * @throws IllegalStateException if this scanner is closed 1646 * @throws IllegalArgumentException if horizon is negative 1647 */ 1648 public String findWithinHorizon(String pattern, int horizon) { 1649 return findWithinHorizon(patternCache.forName(pattern), horizon); 1650 } 1651 1652 /** 1653 * Attempts to find the next occurrence of the specified pattern. 1654 * 1655 * <p>This method searches through the input up to the specified 1656 * search horizon, ignoring delimiters. If the pattern is found the 1657 * scanner advances past the input that matched and returns the string 1658 * that matched the pattern. If no such pattern is detected then the 1659 * null is returned and the scanner's position remains unchanged. This 1660 * method may block waiting for input that matches the pattern. 1661 * 1662 * <p>A scanner will never search more than {@code horizon} code 1663 * points beyond its current position. Note that a match may be clipped 1664 * by the horizon; that is, an arbitrary match result may have been 1665 * different if the horizon had been larger. The scanner treats the 1666 * horizon as a transparent, non-anchoring bound (see {@link 1667 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1668 * 1669 * <p>If horizon is {@code 0}, then the horizon is ignored and 1670 * this method continues to search through the input looking for the 1671 * specified pattern without bound. In this case it may buffer all of 1672 * the input searching for the pattern. 1673 * 1674 * <p>If horizon is negative, then an IllegalArgumentException is 1675 * thrown. 1676 * 1677 * @param pattern the pattern to scan for 1678 * @param horizon the search horizon 1679 * @return the text that matched the specified pattern 1680 * @throws IllegalStateException if this scanner is closed 1681 * @throws IllegalArgumentException if horizon is negative 1682 */ 1683 public String findWithinHorizon(Pattern pattern, int horizon) { 1684 ensureOpen(); 1685 if (pattern == null) 1686 throw new NullPointerException(); 1687 if (horizon < 0) 1688 throw new IllegalArgumentException("horizon < 0"); 1689 clearCaches(); 1690 modCount++; 1691 1692 // Search for the pattern 1693 while (true) { 1694 if (findPatternInBuffer(pattern, horizon)) { 1695 matchValid = true; 1696 return matcher.group(); 1697 } 1698 if (needInput) 1699 readInput(); 1700 else 1701 break; // up to end of input 1702 } 1703 return null; 1704 } 1705 1706 /** 1707 * Skips input that matches the specified pattern, ignoring delimiters. 1708 * This method will skip input if an anchored match of the specified 1709 * pattern succeeds. 1710 * 1711 * <p>If a match to the specified pattern is not found at the 1712 * current position, then no input is skipped and a 1713 * {@code NoSuchElementException} is thrown. 1714 * 1715 * <p>Since this method seeks to match the specified pattern starting at 1716 * the scanner's current position, patterns that can match a lot of 1717 * input (".*", for example) may cause the scanner to buffer a large 1718 * amount of input. 1719 * 1720 * <p>Note that it is possible to skip something without risking a 1721 * {@code NoSuchElementException} by using a pattern that can 1722 * match nothing, e.g., {@code sc.skip("[ \t]*")}. 1723 * 1724 * @param pattern a string specifying the pattern to skip over 1725 * @return this scanner 1726 * @throws NoSuchElementException if the specified pattern is not found 1727 * @throws IllegalStateException if this scanner is closed 1728 */ 1729 public Scanner skip(Pattern pattern) { 1730 ensureOpen(); 1731 if (pattern == null) 1732 throw new NullPointerException(); 1733 clearCaches(); 1734 modCount++; 1735 1736 // Search for the pattern 1737 while (true) { 1738 if (matchPatternInBuffer(pattern)) { 1739 matchValid = true; 1740 position = matcher.end(); 1741 return this; 1742 } 1743 if (needInput) 1744 readInput(); 1745 else 1746 throw new NoSuchElementException(); 1747 } 1748 } 1749 1750 /** 1751 * Skips input that matches a pattern constructed from the specified 1752 * string. 1753 * 1754 * <p> An invocation of this method of the form {@code skip(pattern)} 1755 * behaves in exactly the same way as the invocation 1756 * {@code skip(Pattern.compile(pattern))}. 1757 * 1758 * @param pattern a string specifying the pattern to skip over 1759 * @return this scanner 1760 * @throws IllegalStateException if this scanner is closed 1761 */ 1762 public Scanner skip(String pattern) { 1763 return skip(patternCache.forName(pattern)); 1764 } 1765 1766 // Convenience methods for scanning primitives 1767 1768 /** 1769 * Returns true if the next token in this scanner's input can be 1770 * interpreted as a boolean value using a case insensitive pattern 1771 * created from the string "true|false". The scanner does not 1772 * advance past the input that matched. 1773 * 1774 * @return true if and only if this scanner's next token is a valid 1775 * boolean value 1776 * @throws IllegalStateException if this scanner is closed 1777 */ 1778 public boolean hasNextBoolean() { 1779 return hasNext(boolPattern()); 1780 } 1781 1782 /** 1783 * Scans the next token of the input into a boolean value and returns 1784 * that value. This method will throw {@code InputMismatchException} 1785 * if the next token cannot be translated into a valid boolean value. 1786 * If the match is successful, the scanner advances past the input that 1787 * matched. 1788 * 1789 * @return the boolean scanned from the input 1790 * @throws InputMismatchException if the next token is not a valid boolean 1791 * @throws NoSuchElementException if input is exhausted 1792 * @throws IllegalStateException if this scanner is closed 1793 */ 1794 public boolean nextBoolean() { 1795 clearCaches(); 1796 return Boolean.parseBoolean(next(boolPattern())); 1797 } 1798 1799 /** 1800 * Returns true if the next token in this scanner's input can be 1801 * interpreted as a byte value in the default radix using the 1802 * {@link #nextByte} method. The scanner does not advance past any input. 1803 * 1804 * @return true if and only if this scanner's next token is a valid 1805 * byte value 1806 * @throws IllegalStateException if this scanner is closed 1807 */ 1808 public boolean hasNextByte() { 1809 return hasNextByte(defaultRadix); 1810 } 1811 1812 /** 1813 * Returns true if the next token in this scanner's input can be 1814 * interpreted as a byte value in the specified radix using the 1815 * {@link #nextByte} method. The scanner does not advance past any input. 1816 * 1817 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1818 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1819 * {@code IllegalArgumentException} is thrown. 1820 * 1821 * @param radix the radix used to interpret the token as a byte value 1822 * @return true if and only if this scanner's next token is a valid 1823 * byte value 1824 * @throws IllegalStateException if this scanner is closed 1825 * @throws IllegalArgumentException if the radix is out of range 1826 */ 1827 public boolean hasNextByte(int radix) { 1828 setRadix(radix); 1829 boolean result = hasNext(integerPattern()); 1830 if (result) { // Cache it 1831 try { 1832 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1833 processIntegerToken(hasNextResult) : 1834 hasNextResult; 1835 typeCache = Byte.parseByte(s, radix); 1836 } catch (NumberFormatException nfe) { 1837 result = false; 1838 } 1839 } 1840 return result; 1841 } 1842 1843 /** 1844 * Scans the next token of the input as a {@code byte}. 1845 * 1846 * <p> An invocation of this method of the form 1847 * {@code nextByte()} behaves in exactly the same way as the 1848 * invocation {@code nextByte(radix)}, where {@code radix} 1849 * is the default radix of this scanner. 1850 * 1851 * @return the {@code byte} scanned from the input 1852 * @throws InputMismatchException 1853 * if the next token does not match the <i>Integer</i> 1854 * regular expression, or is out of range 1855 * @throws NoSuchElementException if input is exhausted 1856 * @throws IllegalStateException if this scanner is closed 1857 */ 1858 public byte nextByte() { 1859 return nextByte(defaultRadix); 1860 } 1861 1862 /** 1863 * Scans the next token of the input as a {@code byte}. 1864 * This method will throw {@code InputMismatchException} 1865 * if the next token cannot be translated into a valid byte value as 1866 * described below. If the translation is successful, the scanner advances 1867 * past the input that matched. 1868 * 1869 * <p> If the next token matches the <a 1870 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1871 * above then the token is converted into a {@code byte} value as if by 1872 * removing all locale specific prefixes, group separators, and locale 1873 * specific suffixes, then mapping non-ASCII digits into ASCII 1874 * digits via {@link Character#digit Character.digit}, prepending a 1875 * negative sign (-) if the locale specific negative prefixes and suffixes 1876 * were present, and passing the resulting string to 1877 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1878 * specified radix. 1879 * 1880 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1881 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1882 * {@code IllegalArgumentException} is thrown. 1883 * 1884 * @param radix the radix used to interpret the token as a byte value 1885 * @return the {@code byte} scanned from the input 1886 * @throws InputMismatchException 1887 * if the next token does not match the <i>Integer</i> 1888 * regular expression, or is out of range 1889 * @throws NoSuchElementException if input is exhausted 1890 * @throws IllegalStateException if this scanner is closed 1891 * @throws IllegalArgumentException if the radix is out of range 1892 */ 1893 public byte nextByte(int radix) { 1894 // Check cached result 1895 if ((typeCache != null) && (typeCache instanceof Byte) 1896 && this.radix == radix) { 1897 byte val = ((Byte)typeCache).byteValue(); 1898 useTypeCache(); 1899 return val; 1900 } 1901 setRadix(radix); 1902 clearCaches(); 1903 // Search for next byte 1904 try { 1905 String s = next(integerPattern()); 1906 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1907 s = processIntegerToken(s); 1908 return Byte.parseByte(s, radix); 1909 } catch (NumberFormatException nfe) { 1910 position = matcher.start(); // don't skip bad token 1911 throw new InputMismatchException(nfe.getMessage()); 1912 } 1913 } 1914 1915 /** 1916 * Returns true if the next token in this scanner's input can be 1917 * interpreted as a short value in the default radix using the 1918 * {@link #nextShort} method. The scanner does not advance past any input. 1919 * 1920 * @return true if and only if this scanner's next token is a valid 1921 * short value in the default radix 1922 * @throws IllegalStateException if this scanner is closed 1923 */ 1924 public boolean hasNextShort() { 1925 return hasNextShort(defaultRadix); 1926 } 1927 1928 /** 1929 * Returns true if the next token in this scanner's input can be 1930 * interpreted as a short value in the specified radix using the 1931 * {@link #nextShort} method. The scanner does not advance past any input. 1932 * 1933 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1934 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1935 * {@code IllegalArgumentException} is thrown. 1936 * 1937 * @param radix the radix used to interpret the token as a short value 1938 * @return true if and only if this scanner's next token is a valid 1939 * short value in the specified radix 1940 * @throws IllegalStateException if this scanner is closed 1941 * @throws IllegalArgumentException if the radix is out of range 1942 */ 1943 public boolean hasNextShort(int radix) { 1944 setRadix(radix); 1945 boolean result = hasNext(integerPattern()); 1946 if (result) { // Cache it 1947 try { 1948 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1949 processIntegerToken(hasNextResult) : 1950 hasNextResult; 1951 typeCache = Short.parseShort(s, radix); 1952 } catch (NumberFormatException nfe) { 1953 result = false; 1954 } 1955 } 1956 return result; 1957 } 1958 1959 /** 1960 * Scans the next token of the input as a {@code short}. 1961 * 1962 * <p> An invocation of this method of the form 1963 * {@code nextShort()} behaves in exactly the same way as the 1964 * invocation {@link #nextShort(int) nextShort(radix)}, where {@code radix} 1965 * is the default radix of this scanner. 1966 * 1967 * @return the {@code short} scanned from the input 1968 * @throws InputMismatchException 1969 * if the next token does not match the <i>Integer</i> 1970 * regular expression, or is out of range 1971 * @throws NoSuchElementException if input is exhausted 1972 * @throws IllegalStateException if this scanner is closed 1973 */ 1974 public short nextShort() { 1975 return nextShort(defaultRadix); 1976 } 1977 1978 /** 1979 * Scans the next token of the input as a {@code short}. 1980 * This method will throw {@code InputMismatchException} 1981 * if the next token cannot be translated into a valid short value as 1982 * described below. If the translation is successful, the scanner advances 1983 * past the input that matched. 1984 * 1985 * <p> If the next token matches the <a 1986 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1987 * above then the token is converted into a {@code short} value as if by 1988 * removing all locale specific prefixes, group separators, and locale 1989 * specific suffixes, then mapping non-ASCII digits into ASCII 1990 * digits via {@link Character#digit Character.digit}, prepending a 1991 * negative sign (-) if the locale specific negative prefixes and suffixes 1992 * were present, and passing the resulting string to 1993 * {@link Short#parseShort(String, int) Short.parseShort} with the 1994 * specified radix. 1995 * 1996 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1997 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1998 * {@code IllegalArgumentException} is thrown. 1999 * 2000 * @param radix the radix used to interpret the token as a short value 2001 * @return the {@code short} scanned from the input 2002 * @throws InputMismatchException 2003 * if the next token does not match the <i>Integer</i> 2004 * regular expression, or is out of range 2005 * @throws NoSuchElementException if input is exhausted 2006 * @throws IllegalStateException if this scanner is closed 2007 * @throws IllegalArgumentException if the radix is out of range 2008 */ 2009 public short nextShort(int radix) { 2010 // Check cached result 2011 if ((typeCache != null) && (typeCache instanceof Short) 2012 && this.radix == radix) { 2013 short val = ((Short)typeCache).shortValue(); 2014 useTypeCache(); 2015 return val; 2016 } 2017 setRadix(radix); 2018 clearCaches(); 2019 // Search for next short 2020 try { 2021 String s = next(integerPattern()); 2022 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2023 s = processIntegerToken(s); 2024 return Short.parseShort(s, radix); 2025 } catch (NumberFormatException nfe) { 2026 position = matcher.start(); // don't skip bad token 2027 throw new InputMismatchException(nfe.getMessage()); 2028 } 2029 } 2030 2031 /** 2032 * Returns true if the next token in this scanner's input can be 2033 * interpreted as an int value in the default radix using the 2034 * {@link #nextInt} method. The scanner does not advance past any input. 2035 * 2036 * @return true if and only if this scanner's next token is a valid 2037 * int value 2038 * @throws IllegalStateException if this scanner is closed 2039 */ 2040 public boolean hasNextInt() { 2041 return hasNextInt(defaultRadix); 2042 } 2043 2044 /** 2045 * Returns true if the next token in this scanner's input can be 2046 * interpreted as an int value in the specified radix using the 2047 * {@link #nextInt} method. The scanner does not advance past any input. 2048 * 2049 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2050 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2051 * {@code IllegalArgumentException} is thrown. 2052 * 2053 * @param radix the radix used to interpret the token as an int value 2054 * @return true if and only if this scanner's next token is a valid 2055 * int value 2056 * @throws IllegalStateException if this scanner is closed 2057 * @throws IllegalArgumentException if the radix is out of range 2058 */ 2059 public boolean hasNextInt(int radix) { 2060 setRadix(radix); 2061 boolean result = hasNext(integerPattern()); 2062 if (result) { // Cache it 2063 try { 2064 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2065 processIntegerToken(hasNextResult) : 2066 hasNextResult; 2067 typeCache = Integer.parseInt(s, radix); 2068 } catch (NumberFormatException nfe) { 2069 result = false; 2070 } 2071 } 2072 return result; 2073 } 2074 2075 /** 2076 * The integer token must be stripped of prefixes, group separators, 2077 * and suffixes, non ascii digits must be converted into ascii digits 2078 * before parse will accept it. 2079 */ 2080 private String processIntegerToken(String token) { 2081 String result = token.replaceAll(""+groupSeparator, ""); 2082 boolean isNegative = false; 2083 int preLen = negativePrefix.length(); 2084 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2085 isNegative = true; 2086 result = result.substring(preLen); 2087 } 2088 int sufLen = negativeSuffix.length(); 2089 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2090 isNegative = true; 2091 result = result.substring(result.length() - sufLen, 2092 result.length()); 2093 } 2094 if (isNegative) 2095 result = "-" + result; 2096 return result; 2097 } 2098 2099 /** 2100 * Scans the next token of the input as an {@code int}. 2101 * 2102 * <p> An invocation of this method of the form 2103 * {@code nextInt()} behaves in exactly the same way as the 2104 * invocation {@code nextInt(radix)}, where {@code radix} 2105 * is the default radix of this scanner. 2106 * 2107 * @return the {@code int} scanned from the input 2108 * @throws InputMismatchException 2109 * if the next token does not match the <i>Integer</i> 2110 * regular expression, or is out of range 2111 * @throws NoSuchElementException if input is exhausted 2112 * @throws IllegalStateException if this scanner is closed 2113 */ 2114 public int nextInt() { 2115 return nextInt(defaultRadix); 2116 } 2117 2118 /** 2119 * Scans the next token of the input as an {@code int}. 2120 * This method will throw {@code InputMismatchException} 2121 * if the next token cannot be translated into a valid int value as 2122 * described below. If the translation is successful, the scanner advances 2123 * past the input that matched. 2124 * 2125 * <p> If the next token matches the <a 2126 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2127 * above then the token is converted into an {@code int} value as if by 2128 * removing all locale specific prefixes, group separators, and locale 2129 * specific suffixes, then mapping non-ASCII digits into ASCII 2130 * digits via {@link Character#digit Character.digit}, prepending a 2131 * negative sign (-) if the locale specific negative prefixes and suffixes 2132 * were present, and passing the resulting string to 2133 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2134 * specified radix. 2135 * 2136 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2137 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2138 * {@code IllegalArgumentException} is thrown. 2139 * 2140 * @param radix the radix used to interpret the token as an int value 2141 * @return the {@code int} scanned from the input 2142 * @throws InputMismatchException 2143 * if the next token does not match the <i>Integer</i> 2144 * regular expression, or is out of range 2145 * @throws NoSuchElementException if input is exhausted 2146 * @throws IllegalStateException if this scanner is closed 2147 * @throws IllegalArgumentException if the radix is out of range 2148 */ 2149 public int nextInt(int radix) { 2150 // Check cached result 2151 if ((typeCache != null) && (typeCache instanceof Integer) 2152 && this.radix == radix) { 2153 int val = ((Integer)typeCache).intValue(); 2154 useTypeCache(); 2155 return val; 2156 } 2157 setRadix(radix); 2158 clearCaches(); 2159 // Search for next int 2160 try { 2161 String s = next(integerPattern()); 2162 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2163 s = processIntegerToken(s); 2164 return Integer.parseInt(s, radix); 2165 } catch (NumberFormatException nfe) { 2166 position = matcher.start(); // don't skip bad token 2167 throw new InputMismatchException(nfe.getMessage()); 2168 } 2169 } 2170 2171 /** 2172 * Returns true if the next token in this scanner's input can be 2173 * interpreted as a long value in the default radix using the 2174 * {@link #nextLong} method. The scanner does not advance past any input. 2175 * 2176 * @return true if and only if this scanner's next token is a valid 2177 * long value 2178 * @throws IllegalStateException if this scanner is closed 2179 */ 2180 public boolean hasNextLong() { 2181 return hasNextLong(defaultRadix); 2182 } 2183 2184 /** 2185 * Returns true if the next token in this scanner's input can be 2186 * interpreted as a long value in the specified radix using the 2187 * {@link #nextLong} method. The scanner does not advance past any input. 2188 * 2189 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2190 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2191 * {@code IllegalArgumentException} is thrown. 2192 * 2193 * @param radix the radix used to interpret the token as a long value 2194 * @return true if and only if this scanner's next token is a valid 2195 * long value 2196 * @throws IllegalStateException if this scanner is closed 2197 * @throws IllegalArgumentException if the radix is out of range 2198 */ 2199 public boolean hasNextLong(int radix) { 2200 setRadix(radix); 2201 boolean result = hasNext(integerPattern()); 2202 if (result) { // Cache it 2203 try { 2204 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2205 processIntegerToken(hasNextResult) : 2206 hasNextResult; 2207 typeCache = Long.parseLong(s, radix); 2208 } catch (NumberFormatException nfe) { 2209 result = false; 2210 } 2211 } 2212 return result; 2213 } 2214 2215 /** 2216 * Scans the next token of the input as a {@code long}. 2217 * 2218 * <p> An invocation of this method of the form 2219 * {@code nextLong()} behaves in exactly the same way as the 2220 * invocation {@code nextLong(radix)}, where {@code radix} 2221 * is the default radix of this scanner. 2222 * 2223 * @return the {@code long} scanned from the input 2224 * @throws InputMismatchException 2225 * if the next token does not match the <i>Integer</i> 2226 * regular expression, or is out of range 2227 * @throws NoSuchElementException if input is exhausted 2228 * @throws IllegalStateException if this scanner is closed 2229 */ 2230 public long nextLong() { 2231 return nextLong(defaultRadix); 2232 } 2233 2234 /** 2235 * Scans the next token of the input as a {@code long}. 2236 * This method will throw {@code InputMismatchException} 2237 * if the next token cannot be translated into a valid long value as 2238 * described below. If the translation is successful, the scanner advances 2239 * past the input that matched. 2240 * 2241 * <p> If the next token matches the <a 2242 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2243 * above then the token is converted into a {@code long} value as if by 2244 * removing all locale specific prefixes, group separators, and locale 2245 * specific suffixes, then mapping non-ASCII digits into ASCII 2246 * digits via {@link Character#digit Character.digit}, prepending a 2247 * negative sign (-) if the locale specific negative prefixes and suffixes 2248 * were present, and passing the resulting string to 2249 * {@link Long#parseLong(String, int) Long.parseLong} with the 2250 * specified radix. 2251 * 2252 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2253 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2254 * {@code IllegalArgumentException} is thrown. 2255 * 2256 * @param radix the radix used to interpret the token as an int value 2257 * @return the {@code long} scanned from the input 2258 * @throws InputMismatchException 2259 * if the next token does not match the <i>Integer</i> 2260 * regular expression, or is out of range 2261 * @throws NoSuchElementException if input is exhausted 2262 * @throws IllegalStateException if this scanner is closed 2263 * @throws IllegalArgumentException if the radix is out of range 2264 */ 2265 public long nextLong(int radix) { 2266 // Check cached result 2267 if ((typeCache != null) && (typeCache instanceof Long) 2268 && this.radix == radix) { 2269 long val = ((Long)typeCache).longValue(); 2270 useTypeCache(); 2271 return val; 2272 } 2273 setRadix(radix); 2274 clearCaches(); 2275 try { 2276 String s = next(integerPattern()); 2277 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2278 s = processIntegerToken(s); 2279 return Long.parseLong(s, radix); 2280 } catch (NumberFormatException nfe) { 2281 position = matcher.start(); // don't skip bad token 2282 throw new InputMismatchException(nfe.getMessage()); 2283 } 2284 } 2285 2286 /** 2287 * The float token must be stripped of prefixes, group separators, 2288 * and suffixes, non ascii digits must be converted into ascii digits 2289 * before parseFloat will accept it. 2290 * 2291 * If there are non-ascii digits in the token these digits must 2292 * be processed before the token is passed to parseFloat. 2293 */ 2294 private String processFloatToken(String token) { 2295 String result = token.replaceAll(groupSeparator, ""); 2296 if (!decimalSeparator.equals("\\.")) 2297 result = result.replaceAll(decimalSeparator, "."); 2298 boolean isNegative = false; 2299 int preLen = negativePrefix.length(); 2300 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2301 isNegative = true; 2302 result = result.substring(preLen); 2303 } 2304 int sufLen = negativeSuffix.length(); 2305 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2306 isNegative = true; 2307 result = result.substring(result.length() - sufLen, 2308 result.length()); 2309 } 2310 if (result.equals(nanString)) 2311 result = "NaN"; 2312 if (result.equals(infinityString)) 2313 result = "Infinity"; 2314 if (isNegative) 2315 result = "-" + result; 2316 2317 // Translate non-ASCII digits 2318 Matcher m = NON_ASCII_DIGIT.matcher(result); 2319 if (m.find()) { 2320 StringBuilder inASCII = new StringBuilder(); 2321 for (int i=0; i<result.length(); i++) { 2322 char nextChar = result.charAt(i); 2323 if (Character.isDigit(nextChar)) { 2324 int d = Character.digit(nextChar, 10); 2325 if (d != -1) 2326 inASCII.append(d); 2327 else 2328 inASCII.append(nextChar); 2329 } else { 2330 inASCII.append(nextChar); 2331 } 2332 } 2333 result = inASCII.toString(); 2334 } 2335 2336 return result; 2337 } 2338 2339 /** 2340 * Returns true if the next token in this scanner's input can be 2341 * interpreted as a float value using the {@link #nextFloat} 2342 * method. The scanner does not advance past any input. 2343 * 2344 * @return true if and only if this scanner's next token is a valid 2345 * float value 2346 * @throws IllegalStateException if this scanner is closed 2347 */ 2348 public boolean hasNextFloat() { 2349 setRadix(10); 2350 boolean result = hasNext(floatPattern()); 2351 if (result) { // Cache it 2352 try { 2353 String s = processFloatToken(hasNextResult); 2354 typeCache = Float.valueOf(Float.parseFloat(s)); 2355 } catch (NumberFormatException nfe) { 2356 result = false; 2357 } 2358 } 2359 return result; 2360 } 2361 2362 /** 2363 * Scans the next token of the input as a {@code float}. 2364 * This method will throw {@code InputMismatchException} 2365 * if the next token cannot be translated into a valid float value as 2366 * described below. If the translation is successful, the scanner advances 2367 * past the input that matched. 2368 * 2369 * <p> If the next token matches the <a 2370 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2371 * then the token is converted into a {@code float} value as if by 2372 * removing all locale specific prefixes, group separators, and locale 2373 * specific suffixes, then mapping non-ASCII digits into ASCII 2374 * digits via {@link Character#digit Character.digit}, prepending a 2375 * negative sign (-) if the locale specific negative prefixes and suffixes 2376 * were present, and passing the resulting string to 2377 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2378 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2379 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2380 * appropriate. 2381 * 2382 * @return the {@code float} scanned from the input 2383 * @throws InputMismatchException 2384 * if the next token does not match the <i>Float</i> 2385 * regular expression, or is out of range 2386 * @throws NoSuchElementException if input is exhausted 2387 * @throws IllegalStateException if this scanner is closed 2388 */ 2389 public float nextFloat() { 2390 // Check cached result 2391 if ((typeCache != null) && (typeCache instanceof Float)) { 2392 float val = ((Float)typeCache).floatValue(); 2393 useTypeCache(); 2394 return val; 2395 } 2396 setRadix(10); 2397 clearCaches(); 2398 try { 2399 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2400 } catch (NumberFormatException nfe) { 2401 position = matcher.start(); // don't skip bad token 2402 throw new InputMismatchException(nfe.getMessage()); 2403 } 2404 } 2405 2406 /** 2407 * Returns true if the next token in this scanner's input can be 2408 * interpreted as a double value using the {@link #nextDouble} 2409 * method. The scanner does not advance past any input. 2410 * 2411 * @return true if and only if this scanner's next token is a valid 2412 * double value 2413 * @throws IllegalStateException if this scanner is closed 2414 */ 2415 public boolean hasNextDouble() { 2416 setRadix(10); 2417 boolean result = hasNext(floatPattern()); 2418 if (result) { // Cache it 2419 try { 2420 String s = processFloatToken(hasNextResult); 2421 typeCache = Double.valueOf(Double.parseDouble(s)); 2422 } catch (NumberFormatException nfe) { 2423 result = false; 2424 } 2425 } 2426 return result; 2427 } 2428 2429 /** 2430 * Scans the next token of the input as a {@code double}. 2431 * This method will throw {@code InputMismatchException} 2432 * if the next token cannot be translated into a valid double value. 2433 * If the translation is successful, the scanner advances past the input 2434 * that matched. 2435 * 2436 * <p> If the next token matches the <a 2437 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2438 * then the token is converted into a {@code double} value as if by 2439 * removing all locale specific prefixes, group separators, and locale 2440 * specific suffixes, then mapping non-ASCII digits into ASCII 2441 * digits via {@link Character#digit Character.digit}, prepending a 2442 * negative sign (-) if the locale specific negative prefixes and suffixes 2443 * were present, and passing the resulting string to 2444 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2445 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2446 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2447 * appropriate. 2448 * 2449 * @return the {@code double} scanned from the input 2450 * @throws InputMismatchException 2451 * if the next token does not match the <i>Float</i> 2452 * regular expression, or is out of range 2453 * @throws NoSuchElementException if the input is exhausted 2454 * @throws IllegalStateException if this scanner is closed 2455 */ 2456 public double nextDouble() { 2457 // Check cached result 2458 if ((typeCache != null) && (typeCache instanceof Double)) { 2459 double val = ((Double)typeCache).doubleValue(); 2460 useTypeCache(); 2461 return val; 2462 } 2463 setRadix(10); 2464 clearCaches(); 2465 // Search for next float 2466 try { 2467 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2468 } catch (NumberFormatException nfe) { 2469 position = matcher.start(); // don't skip bad token 2470 throw new InputMismatchException(nfe.getMessage()); 2471 } 2472 } 2473 2474 // Convenience methods for scanning multi precision numbers 2475 2476 /** 2477 * Returns true if the next token in this scanner's input can be 2478 * interpreted as a {@code BigInteger} in the default radix using the 2479 * {@link #nextBigInteger} method. The scanner does not advance past any 2480 * input. 2481 * 2482 * @return true if and only if this scanner's next token is a valid 2483 * {@code BigInteger} 2484 * @throws IllegalStateException if this scanner is closed 2485 */ 2486 public boolean hasNextBigInteger() { 2487 return hasNextBigInteger(defaultRadix); 2488 } 2489 2490 /** 2491 * Returns true if the next token in this scanner's input can be 2492 * interpreted as a {@code BigInteger} in the specified radix using 2493 * the {@link #nextBigInteger} method. The scanner does not advance past 2494 * any input. 2495 * 2496 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2497 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2498 * {@code IllegalArgumentException} is thrown. 2499 * 2500 * @param radix the radix used to interpret the token as an integer 2501 * @return true if and only if this scanner's next token is a valid 2502 * {@code BigInteger} 2503 * @throws IllegalStateException if this scanner is closed 2504 * @throws IllegalArgumentException if the radix is out of range 2505 */ 2506 public boolean hasNextBigInteger(int radix) { 2507 setRadix(radix); 2508 boolean result = hasNext(integerPattern()); 2509 if (result) { // Cache it 2510 try { 2511 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2512 processIntegerToken(hasNextResult) : 2513 hasNextResult; 2514 typeCache = new BigInteger(s, radix); 2515 } catch (NumberFormatException nfe) { 2516 result = false; 2517 } 2518 } 2519 return result; 2520 } 2521 2522 /** 2523 * Scans the next token of the input as a {@link java.math.BigInteger 2524 * BigInteger}. 2525 * 2526 * <p> An invocation of this method of the form 2527 * {@code nextBigInteger()} behaves in exactly the same way as the 2528 * invocation {@code nextBigInteger(radix)}, where {@code radix} 2529 * is the default radix of this scanner. 2530 * 2531 * @return the {@code BigInteger} scanned from the input 2532 * @throws InputMismatchException 2533 * if the next token does not match the <i>Integer</i> 2534 * regular expression, or is out of range 2535 * @throws NoSuchElementException if the input is exhausted 2536 * @throws IllegalStateException if this scanner is closed 2537 */ 2538 public BigInteger nextBigInteger() { 2539 return nextBigInteger(defaultRadix); 2540 } 2541 2542 /** 2543 * Scans the next token of the input as a {@link java.math.BigInteger 2544 * BigInteger}. 2545 * 2546 * <p> If the next token matches the <a 2547 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2548 * above then the token is converted into a {@code BigInteger} value as if 2549 * by removing all group separators, mapping non-ASCII digits into ASCII 2550 * digits via the {@link Character#digit Character.digit}, and passing the 2551 * resulting string to the {@link 2552 * java.math.BigInteger#BigInteger(java.lang.String) 2553 * BigInteger(String, int)} constructor with the specified radix. 2554 * 2555 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2556 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2557 * {@code IllegalArgumentException} is thrown. 2558 * 2559 * @param radix the radix used to interpret the token 2560 * @return the {@code BigInteger} scanned from the input 2561 * @throws InputMismatchException 2562 * if the next token does not match the <i>Integer</i> 2563 * regular expression, or is out of range 2564 * @throws NoSuchElementException if the input is exhausted 2565 * @throws IllegalStateException if this scanner is closed 2566 * @throws IllegalArgumentException if the radix is out of range 2567 */ 2568 public BigInteger nextBigInteger(int radix) { 2569 // Check cached result 2570 if ((typeCache != null) && (typeCache instanceof BigInteger) 2571 && this.radix == radix) { 2572 BigInteger val = (BigInteger)typeCache; 2573 useTypeCache(); 2574 return val; 2575 } 2576 setRadix(radix); 2577 clearCaches(); 2578 // Search for next int 2579 try { 2580 String s = next(integerPattern()); 2581 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2582 s = processIntegerToken(s); 2583 return new BigInteger(s, radix); 2584 } catch (NumberFormatException nfe) { 2585 position = matcher.start(); // don't skip bad token 2586 throw new InputMismatchException(nfe.getMessage()); 2587 } 2588 } 2589 2590 /** 2591 * Returns true if the next token in this scanner's input can be 2592 * interpreted as a {@code BigDecimal} using the 2593 * {@link #nextBigDecimal} method. The scanner does not advance past any 2594 * input. 2595 * 2596 * @return true if and only if this scanner's next token is a valid 2597 * {@code BigDecimal} 2598 * @throws IllegalStateException if this scanner is closed 2599 */ 2600 public boolean hasNextBigDecimal() { 2601 setRadix(10); 2602 boolean result = hasNext(decimalPattern()); 2603 if (result) { // Cache it 2604 try { 2605 String s = processFloatToken(hasNextResult); 2606 typeCache = new BigDecimal(s); 2607 } catch (NumberFormatException nfe) { 2608 result = false; 2609 } 2610 } 2611 return result; 2612 } 2613 2614 /** 2615 * Scans the next token of the input as a {@link java.math.BigDecimal 2616 * BigDecimal}. 2617 * 2618 * <p> If the next token matches the <a 2619 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2620 * above then the token is converted into a {@code BigDecimal} value as if 2621 * by removing all group separators, mapping non-ASCII digits into ASCII 2622 * digits via the {@link Character#digit Character.digit}, and passing the 2623 * resulting string to the {@link 2624 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2625 * constructor. 2626 * 2627 * @return the {@code BigDecimal} scanned from the input 2628 * @throws InputMismatchException 2629 * if the next token does not match the <i>Decimal</i> 2630 * regular expression, or is out of range 2631 * @throws NoSuchElementException if the input is exhausted 2632 * @throws IllegalStateException if this scanner is closed 2633 */ 2634 public BigDecimal nextBigDecimal() { 2635 // Check cached result 2636 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2637 BigDecimal val = (BigDecimal)typeCache; 2638 useTypeCache(); 2639 return val; 2640 } 2641 setRadix(10); 2642 clearCaches(); 2643 // Search for next float 2644 try { 2645 String s = processFloatToken(next(decimalPattern())); 2646 return new BigDecimal(s); 2647 } catch (NumberFormatException nfe) { 2648 position = matcher.start(); // don't skip bad token 2649 throw new InputMismatchException(nfe.getMessage()); 2650 } 2651 } 2652 2653 /** 2654 * Resets this scanner. 2655 * 2656 * <p> Resetting a scanner discards all of its explicit state 2657 * information which may have been changed by invocations of 2658 * {@link #useDelimiter useDelimiter()}, 2659 * {@link #useLocale useLocale()}, or 2660 * {@link #useRadix useRadix()}. 2661 * 2662 * <p> An invocation of this method of the form 2663 * {@code scanner.reset()} behaves in exactly the same way as the 2664 * invocation 2665 * 2666 * <blockquote><pre>{@code 2667 * scanner.useDelimiter("\\p{javaWhitespace}+") 2668 * .useLocale(Locale.getDefault(Locale.Category.FORMAT)) 2669 * .useRadix(10); 2670 * }</pre></blockquote> 2671 * 2672 * @return this scanner 2673 * 2674 * @since 1.6 2675 */ 2676 public Scanner reset() { 2677 delimPattern = WHITESPACE_PATTERN; 2678 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 2679 useRadix(10); 2680 clearCaches(); 2681 modCount++; 2682 return this; 2683 } 2684 2685 /** 2686 * Returns a stream of delimiter-separated tokens from this scanner. The 2687 * stream contains the same tokens that would be returned, starting from 2688 * this scanner's current state, by calling the {@link #next} method 2689 * repeatedly until the {@link #hasNext} method returns false. 2690 * 2691 * <p>The resulting stream is sequential and ordered. All stream elements are 2692 * non-null. 2693 * 2694 * <p>Scanning starts upon initiation of the terminal stream operation, using the 2695 * current state of this scanner. Subsequent calls to any methods on this scanner 2696 * other than {@link #close} and {@link #ioException} may return undefined results 2697 * or may cause undefined effects on the returned stream. The returned stream's source 2698 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a 2699 * {@link java.util.ConcurrentModificationException} if any such calls are detected 2700 * during stream pipeline execution. 2701 * 2702 * <p>After stream pipeline execution completes, this scanner is left in an indeterminate 2703 * state and cannot be reused. 2704 * 2705 * <p>If this scanner contains a resource that must be released, this scanner 2706 * should be closed, either by calling its {@link #close} method, or by 2707 * closing the returned stream. Closing the stream will close the underlying scanner. 2708 * {@code IllegalStateException} is thrown if the scanner has been closed when this 2709 * method is called, or if this scanner is closed during stream pipeline execution. 2710 * 2711 * <p>This method might block waiting for more input. 2712 * 2713 * @apiNote 2714 * For example, the following code will create a list of 2715 * comma-delimited tokens from a string: 2716 * 2717 * <pre>{@code 2718 * List<String> result = new Scanner("abc,def,,ghi") 2719 * .useDelimiter(",") 2720 * .tokens() 2721 * .collect(Collectors.toList()); 2722 * }</pre> 2723 * 2724 * <p>The resulting list would contain {@code "abc"}, {@code "def"}, 2725 * the empty string, and {@code "ghi"}. 2726 * 2727 * @return a sequential stream of token strings 2728 * @throws IllegalStateException if this scanner is closed 2729 * @since 9 2730 */ 2731 public Stream<String> tokens() { 2732 ensureOpen(); 2733 Stream<String> stream = StreamSupport.stream(new TokenSpliterator(), false); 2734 return stream.onClose(this::close); 2735 } 2736 2737 class TokenSpliterator extends Spliterators.AbstractSpliterator<String> { 2738 int expectedCount = -1; 2739 2740 TokenSpliterator() { 2741 super(Long.MAX_VALUE, 2742 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2743 } 2744 2745 @Override 2746 public boolean tryAdvance(Consumer<? super String> cons) { 2747 if (expectedCount >= 0 && expectedCount != modCount) { 2748 throw new ConcurrentModificationException(); 2749 } 2750 2751 if (hasNext()) { 2752 String token = next(); 2753 expectedCount = modCount; 2754 cons.accept(token); 2755 if (expectedCount != modCount) { 2756 throw new ConcurrentModificationException(); 2757 } 2758 return true; 2759 } else { 2760 expectedCount = modCount; 2761 return false; 2762 } 2763 } 2764 } 2765 2766 /** 2767 * Returns a stream of match results from this scanner. The stream 2768 * contains the same results in the same order that would be returned by 2769 * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match} 2770 * successively as long as {@link #findWithinHorizon findWithinHorizon()} 2771 * finds matches. 2772 * 2773 * <p>The resulting stream is sequential and ordered. All stream elements are 2774 * non-null. 2775 * 2776 * <p>Scanning starts upon initiation of the terminal stream operation, using the 2777 * current state of this scanner. Subsequent calls to any methods on this scanner 2778 * other than {@link #close} and {@link #ioException} may return undefined results 2779 * or may cause undefined effects on the returned stream. The returned stream's source 2780 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a 2781 * {@link java.util.ConcurrentModificationException} if any such calls are detected 2782 * during stream pipeline execution. 2783 * 2784 * <p>After stream pipeline execution completes, this scanner is left in an indeterminate 2785 * state and cannot be reused. 2786 * 2787 * <p>If this scanner contains a resource that must be released, this scanner 2788 * should be closed, either by calling its {@link #close} method, or by 2789 * closing the returned stream. Closing the stream will close the underlying scanner. 2790 * {@code IllegalStateException} is thrown if the scanner has been closed when this 2791 * method is called, or if this scanner is closed during stream pipeline execution. 2792 * 2793 * <p>As with the {@link #findWithinHorizon findWithinHorizon()} methods, this method 2794 * might block waiting for additional input, and it might buffer an unbounded amount of 2795 * input searching for a match. 2796 * 2797 * @apiNote 2798 * For example, the following code will read a file and return a list 2799 * of all sequences of characters consisting of seven or more Latin capital 2800 * letters: 2801 * 2802 * <pre>{@code 2803 * try (Scanner sc = new Scanner(Paths.get("input.txt"))) { 2804 * Pattern pat = Pattern.compile("[A-Z]{7,}"); 2805 * List<String> capWords = sc.findAll(pat) 2806 * .map(MatchResult::group) 2807 * .collect(Collectors.toList()); 2808 * } 2809 * }</pre> 2810 * 2811 * @param pattern the pattern to be matched 2812 * @return a sequential stream of match results 2813 * @throws NullPointerException if pattern is null 2814 * @throws IllegalStateException if this scanner is closed 2815 * @since 9 2816 */ 2817 public Stream<MatchResult> findAll(Pattern pattern) { 2818 Objects.requireNonNull(pattern); 2819 ensureOpen(); 2820 Stream<MatchResult> stream = StreamSupport.stream(new FindSpliterator(pattern), false); 2821 return stream.onClose(this::close); 2822 } 2823 2824 /** 2825 * Returns a stream of match results that match the provided pattern string. 2826 * The effect is equivalent to the following code: 2827 * 2828 * <pre>{@code 2829 * scanner.findAll(Pattern.compile(patString)) 2830 * }</pre> 2831 * 2832 * @param patString the pattern string 2833 * @return a sequential stream of match results 2834 * @throws NullPointerException if patString is null 2835 * @throws IllegalStateException if this scanner is closed 2836 * @throws PatternSyntaxException if the regular expression's syntax is invalid 2837 * @since 9 2838 * @see java.util.regex.Pattern 2839 */ 2840 public Stream<MatchResult> findAll(String patString) { 2841 Objects.requireNonNull(patString); 2842 ensureOpen(); 2843 return findAll(patternCache.forName(patString)); 2844 } 2845 2846 class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> { 2847 final Pattern pattern; 2848 int expectedCount = -1; 2849 2850 FindSpliterator(Pattern pattern) { 2851 super(Long.MAX_VALUE, 2852 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2853 this.pattern = pattern; 2854 } 2855 2856 @Override 2857 public boolean tryAdvance(Consumer<? super MatchResult> cons) { 2858 ensureOpen(); 2859 if (expectedCount >= 0) { 2860 if (expectedCount != modCount) { 2861 throw new ConcurrentModificationException(); 2862 } 2863 } else { 2864 expectedCount = modCount; 2865 } 2866 2867 while (true) { 2868 // assert expectedCount == modCount 2869 if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount 2870 cons.accept(matcher.toMatchResult()); 2871 if (expectedCount != modCount) { 2872 throw new ConcurrentModificationException(); 2873 } 2874 return true; 2875 } 2876 if (needInput) 2877 readInput(); // doesn't increment modCount 2878 else 2879 return false; // reached end of input 2880 } 2881 } 2882 } 2883 2884 /** Small LRU cache of Patterns. */ 2885 private static class PatternLRUCache { 2886 2887 private Pattern[] oa = null; 2888 private final int size; 2889 2890 PatternLRUCache(int size) { 2891 this.size = size; 2892 } 2893 2894 boolean hasName(Pattern p, String s) { 2895 return p.pattern().equals(s); 2896 } 2897 2898 void moveToFront(Object[] oa, int i) { 2899 Object ob = oa[i]; 2900 for (int j = i; j > 0; j--) 2901 oa[j] = oa[j - 1]; 2902 oa[0] = ob; 2903 } 2904 2905 Pattern forName(String name) { 2906 if (oa == null) { 2907 Pattern[] temp = new Pattern[size]; 2908 oa = temp; 2909 } else { 2910 for (int i = 0; i < oa.length; i++) { 2911 Pattern ob = oa[i]; 2912 if (ob == null) 2913 continue; 2914 if (hasName(ob, name)) { 2915 if (i > 0) 2916 moveToFront(oa, i); 2917 return ob; 2918 } 2919 } 2920 } 2921 2922 // Create a new object 2923 Pattern ob = Pattern.compile(name); 2924 oa[oa.length - 1] = ob; 2925 moveToFront(oa, oa.length - 1); 2926 return ob; 2927 } 2928 } 2929 }