1 /* 2 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.io.*; 29 import java.math.*; 30 import java.nio.*; 31 import java.nio.channels.*; 32 import java.nio.charset.*; 33 import java.nio.file.Path; 34 import java.nio.file.Files; 35 import java.text.*; 36 import java.util.function.Consumer; 37 import java.util.regex.*; 38 import java.util.stream.Stream; 39 import java.util.stream.StreamSupport; 40 41 import sun.misc.LRUCache; 42 43 /** 44 * A simple text scanner which can parse primitive types and strings using 45 * regular expressions. 46 * 47 * <p>A <code>Scanner</code> breaks its input into tokens using a 48 * delimiter pattern, which by default matches whitespace. The resulting 49 * tokens may then be converted into values of different types using the 50 * various <tt>next</tt> methods. 51 * 52 * <p>For example, this code allows a user to read a number from 53 * <tt>System.in</tt>: 54 * <blockquote><pre>{@code 55 * Scanner sc = new Scanner(System.in); 56 * int i = sc.nextInt(); 57 * }</pre></blockquote> 58 * 59 * <p>As another example, this code allows <code>long</code> types to be 60 * assigned from entries in a file <code>myNumbers</code>: 61 * <blockquote><pre>{@code 62 * Scanner sc = new Scanner(new File("myNumbers")); 63 * while (sc.hasNextLong()) { 64 * long aLong = sc.nextLong(); 65 * } 66 * }</pre></blockquote> 67 * 68 * <p>The scanner can also use delimiters other than whitespace. This 69 * example reads several items in from a string: 70 * <blockquote><pre>{@code 71 * String input = "1 fish 2 fish red fish blue fish"; 72 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 73 * System.out.println(s.nextInt()); 74 * System.out.println(s.nextInt()); 75 * System.out.println(s.next()); 76 * System.out.println(s.next()); 77 * s.close(); 78 * }</pre></blockquote> 79 * <p> 80 * prints the following output: 81 * <blockquote><pre>{@code 82 * 1 83 * 2 84 * red 85 * blue 86 * }</pre></blockquote> 87 * 88 * <p>The same output can be generated with this code, which uses a regular 89 * expression to parse all four tokens at once: 90 * <blockquote><pre>{@code 91 * String input = "1 fish 2 fish red fish blue fish"; 92 * Scanner s = new Scanner(input); 93 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 94 * MatchResult result = s.match(); 95 * for (int i=1; i<=result.groupCount(); i++) 96 * System.out.println(result.group(i)); 97 * s.close(); 98 * }</pre></blockquote> 99 * 100 * <p>The <a name="default-delimiter">default whitespace delimiter</a> used 101 * by a scanner is as recognized by {@link java.lang.Character}.{@link 102 * java.lang.Character#isWhitespace(char) isWhitespace}. The {@link #reset} 103 * method will reset the value of the scanner's delimiter to the default 104 * whitespace delimiter regardless of whether it was previously changed. 105 * 106 * <p>A scanning operation may block waiting for input. 107 * 108 * <p>The {@link #next} and {@link #hasNext} methods and their 109 * primitive-type companion methods (such as {@link #nextInt} and 110 * {@link #hasNextInt}) first skip any input that matches the delimiter 111 * pattern, and then attempt to return the next token. Both <tt>hasNext</tt> 112 * and <tt>next</tt> methods may block waiting for further input. Whether a 113 * <tt>hasNext</tt> method blocks has no connection to whether or not its 114 * associated <tt>next</tt> method will block. 115 * 116 * <p> The {@link #findInLine}, {@link #findWithinHorizon}, and {@link #skip} 117 * methods operate independently of the delimiter pattern. These methods will 118 * attempt to match the specified pattern with no regard to delimiters in the 119 * input and thus can be used in special circumstances where delimiters are 120 * not relevant. These methods may block waiting for more input. 121 * 122 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 123 * will not pass the token that caused the exception, so that it may be 124 * retrieved or skipped via some other method. 125 * 126 * <p>Depending upon the type of delimiting pattern, empty tokens may be 127 * returned. For example, the pattern <tt>"\\s+"</tt> will return no empty 128 * tokens since it matches multiple instances of the delimiter. The delimiting 129 * pattern <tt>"\\s"</tt> could return empty tokens since it only passes one 130 * space at a time. 131 * 132 * <p> A scanner can read text from any object which implements the {@link 133 * java.lang.Readable} interface. If an invocation of the underlying 134 * readable's {@link java.lang.Readable#read} method throws an {@link 135 * java.io.IOException} then the scanner assumes that the end of the input 136 * has been reached. The most recent <tt>IOException</tt> thrown by the 137 * underlying readable can be retrieved via the {@link #ioException} method. 138 * 139 * <p>When a <code>Scanner</code> is closed, it will close its input source 140 * if the source implements the {@link java.io.Closeable} interface. 141 * 142 * <p>A <code>Scanner</code> is not safe for multithreaded use without 143 * external synchronization. 144 * 145 * <p>Unless otherwise mentioned, passing a <code>null</code> parameter into 146 * any method of a <code>Scanner</code> will cause a 147 * <code>NullPointerException</code> to be thrown. 148 * 149 * <p>A scanner will default to interpreting numbers as decimal unless a 150 * different radix has been set by using the {@link #useRadix} method. The 151 * {@link #reset} method will reset the value of the scanner's radix to 152 * <code>10</code> regardless of whether it was previously changed. 153 * 154 * <h3> <a name="localized-numbers">Localized numbers</a> </h3> 155 * 156 * <p> An instance of this class is capable of scanning numbers in the standard 157 * formats as well as in the formats of the scanner's locale. A scanner's 158 * <a name="initial-locale">initial locale </a>is the value returned by the {@link 159 * java.util.Locale#getDefault(Locale.Category) 160 * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link 161 * #useLocale} method. The {@link #reset} method will reset the value of the 162 * scanner's locale to the initial locale regardless of whether it was 163 * previously changed. 164 * 165 * <p>The localized formats are defined in terms of the following parameters, 166 * which for a particular locale are taken from that locale's {@link 167 * java.text.DecimalFormat DecimalFormat} object, <tt>df</tt>, and its and 168 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 169 * <tt>dfs</tt>. 170 * 171 * <blockquote><dl> 172 * <dt><i>LocalGroupSeparator </i> 173 * <dd>The character used to separate thousands groups, 174 * <i>i.e.,</i> <tt>dfs.</tt>{@link 175 * java.text.DecimalFormatSymbols#getGroupingSeparator 176 * getGroupingSeparator()} 177 * <dt><i>LocalDecimalSeparator </i> 178 * <dd>The character used for the decimal point, 179 * <i>i.e.,</i> <tt>dfs.</tt>{@link 180 * java.text.DecimalFormatSymbols#getDecimalSeparator 181 * getDecimalSeparator()} 182 * <dt><i>LocalPositivePrefix </i> 183 * <dd>The string that appears before a positive number (may 184 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 185 * java.text.DecimalFormat#getPositivePrefix 186 * getPositivePrefix()} 187 * <dt><i>LocalPositiveSuffix </i> 188 * <dd>The string that appears after a positive number (may be 189 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 190 * java.text.DecimalFormat#getPositiveSuffix 191 * getPositiveSuffix()} 192 * <dt><i>LocalNegativePrefix </i> 193 * <dd>The string that appears before a negative number (may 194 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 195 * java.text.DecimalFormat#getNegativePrefix 196 * getNegativePrefix()} 197 * <dt><i>LocalNegativeSuffix </i> 198 * <dd>The string that appears after a negative number (may be 199 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 200 * java.text.DecimalFormat#getNegativeSuffix 201 * getNegativeSuffix()} 202 * <dt><i>LocalNaN </i> 203 * <dd>The string that represents not-a-number for 204 * floating-point values, 205 * <i>i.e.,</i> <tt>dfs.</tt>{@link 206 * java.text.DecimalFormatSymbols#getNaN 207 * getNaN()} 208 * <dt><i>LocalInfinity </i> 209 * <dd>The string that represents infinity for floating-point 210 * values, <i>i.e.,</i> <tt>dfs.</tt>{@link 211 * java.text.DecimalFormatSymbols#getInfinity 212 * getInfinity()} 213 * </dl></blockquote> 214 * 215 * <h4> <a name="number-syntax">Number syntax</a> </h4> 216 * 217 * <p> The strings that can be parsed as numbers by an instance of this class 218 * are specified in terms of the following regular-expression grammar, where 219 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 in base 10). 220 * 221 * <dl> 222 * <dt><i>NonAsciiDigit</i>: 223 * <dd>A non-ASCII character c for which 224 * {@link java.lang.Character#isDigit Character.isDigit}<tt>(c)</tt> 225 * returns true 226 * 227 * <dt><i>Non0Digit</i>: 228 * <dd><tt>[1-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i> 229 * 230 * <dt><i>Digit</i>: 231 * <dd><tt>[0-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i> 232 * 233 * <dt><i>GroupedNumeral</i>: 234 * <dd><tt>( </tt><i>Non0Digit</i> 235 * <i>Digit</i><tt>? 236 * </tt><i>Digit</i><tt>?</tt> 237 * <dd> <tt>( </tt><i>LocalGroupSeparator</i> 238 * <i>Digit</i> 239 * <i>Digit</i> 240 * <i>Digit</i><tt> )+ )</tt> 241 * 242 * <dt><i>Numeral</i>: 243 * <dd><tt>( ( </tt><i>Digit</i><tt>+ ) 244 * | </tt><i>GroupedNumeral</i><tt> )</tt> 245 * 246 * <dt><a name="Integer-regex"><i>Integer</i>:</a> 247 * <dd><tt>( [-+]? ( </tt><i>Numeral</i><tt> 248 * ) )</tt> 249 * <dd><tt>| </tt><i>LocalPositivePrefix</i> <i>Numeral</i> 250 * <i>LocalPositiveSuffix</i> 251 * <dd><tt>| </tt><i>LocalNegativePrefix</i> <i>Numeral</i> 252 * <i>LocalNegativeSuffix</i> 253 * 254 * <dt><i>DecimalNumeral</i>: 255 * <dd><i>Numeral</i> 256 * <dd><tt>| </tt><i>Numeral</i> 257 * <i>LocalDecimalSeparator</i> 258 * <i>Digit</i><tt>*</tt> 259 * <dd><tt>| </tt><i>LocalDecimalSeparator</i> 260 * <i>Digit</i><tt>+</tt> 261 * 262 * <dt><i>Exponent</i>: 263 * <dd><tt>( [eE] [+-]? </tt><i>Digit</i><tt>+ )</tt> 264 * 265 * <dt><a name="Decimal-regex"><i>Decimal</i>:</a> 266 * <dd><tt>( [-+]? </tt><i>DecimalNumeral</i> 267 * <i>Exponent</i><tt>? )</tt> 268 * <dd><tt>| </tt><i>LocalPositivePrefix</i> 269 * <i>DecimalNumeral</i> 270 * <i>LocalPositiveSuffix</i> 271 * <i>Exponent</i><tt>?</tt> 272 * <dd><tt>| </tt><i>LocalNegativePrefix</i> 273 * <i>DecimalNumeral</i> 274 * <i>LocalNegativeSuffix</i> 275 * <i>Exponent</i><tt>?</tt> 276 * 277 * <dt><i>HexFloat</i>: 278 * <dd><tt>[-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 279 * ([pP][-+]?[0-9]+)?</tt> 280 * 281 * <dt><i>NonNumber</i>: 282 * <dd><tt>NaN 283 * | </tt><i>LocalNan</i><tt> 284 * | Infinity 285 * | </tt><i>LocalInfinity</i> 286 * 287 * <dt><i>SignedNonNumber</i>: 288 * <dd><tt>( [-+]? </tt><i>NonNumber</i><tt> )</tt> 289 * <dd><tt>| </tt><i>LocalPositivePrefix</i> 290 * <i>NonNumber</i> 291 * <i>LocalPositiveSuffix</i> 292 * <dd><tt>| </tt><i>LocalNegativePrefix</i> 293 * <i>NonNumber</i> 294 * <i>LocalNegativeSuffix</i> 295 * 296 * <dt><a name="Float-regex"><i>Float</i></a>: 297 * <dd><i>Decimal</i> 298 * <tt>| </tt><i>HexFloat</i> 299 * <tt>| </tt><i>SignedNonNumber</i> 300 * 301 * </dl> 302 * <p>Whitespace is not significant in the above regular expressions. 303 * 304 * @since 1.5 305 */ 306 public final class Scanner implements Iterator<String>, Closeable { 307 308 // Internal buffer used to hold input 309 private CharBuffer buf; 310 311 // Size of internal character buffer 312 private static final int BUFFER_SIZE = 1024; // change to 1024; 313 314 // The index into the buffer currently held by the Scanner 315 private int position; 316 317 // Internal matcher used for finding delimiters 318 private Matcher matcher; 319 320 // Pattern used to delimit tokens 321 private Pattern delimPattern; 322 323 // Pattern found in last hasNext operation 324 private Pattern hasNextPattern; 325 326 // Position after last hasNext operation 327 private int hasNextPosition; 328 329 // Result after last hasNext operation 330 private String hasNextResult; 331 332 // The input source 333 private Readable source; 334 335 // Boolean is true if source is done 336 private boolean sourceClosed = false; 337 338 // Boolean indicating more input is required 339 private boolean needInput = false; 340 341 // Boolean indicating if a delim has been skipped this operation 342 private boolean skipped = false; 343 344 // A store of a position that the scanner may fall back to 345 private int savedScannerPosition = -1; 346 347 // A cache of the last primitive type scanned 348 private Object typeCache = null; 349 350 // Boolean indicating if a match result is available 351 private boolean matchValid = false; 352 353 // Boolean indicating if this scanner has been closed 354 private boolean closed = false; 355 356 // The current radix used by this scanner 357 private int radix = 10; 358 359 // The default radix for this scanner 360 private int defaultRadix = 10; 361 362 // The locale used by this scanner 363 private Locale locale = null; 364 365 // A cache of the last few recently used Patterns 366 private LRUCache<String,Pattern> patternCache = 367 new LRUCache<String,Pattern>(7) { 368 protected Pattern create(String s) { 369 return Pattern.compile(s); 370 } 371 protected boolean hasName(Pattern p, String s) { 372 return p.pattern().equals(s); 373 } 374 }; 375 376 // A holder of the last IOException encountered 377 private IOException lastException; 378 379 // A pattern for java whitespace 380 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 381 "\\p{javaWhitespace}+"); 382 383 // A pattern for any token 384 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 385 386 // A pattern for non-ASCII digits 387 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 388 "[\\p{javaDigit}&&[^0-9]]"); 389 390 // Fields and methods to support scanning primitive types 391 392 /** 393 * Locale dependent values used to scan numbers 394 */ 395 private String groupSeparator = "\\,"; 396 private String decimalSeparator = "\\."; 397 private String nanString = "NaN"; 398 private String infinityString = "Infinity"; 399 private String positivePrefix = ""; 400 private String negativePrefix = "\\-"; 401 private String positiveSuffix = ""; 402 private String negativeSuffix = ""; 403 404 /** 405 * Fields and an accessor method to match booleans 406 */ 407 private static volatile Pattern boolPattern; 408 private static final String BOOLEAN_PATTERN = "true|false"; 409 private static Pattern boolPattern() { 410 Pattern bp = boolPattern; 411 if (bp == null) 412 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 413 Pattern.CASE_INSENSITIVE); 414 return bp; 415 } 416 417 /** 418 * Fields and methods to match bytes, shorts, ints, and longs 419 */ 420 private Pattern integerPattern; 421 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 422 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 423 private int SIMPLE_GROUP_INDEX = 5; 424 private String buildIntegerPatternString() { 425 String radixDigits = digits.substring(0, radix); 426 // \\p{javaDigit} is not guaranteed to be appropriate 427 // here but what can we do? The final authority will be 428 // whatever parse method is invoked, so ultimately the 429 // Scanner will do the right thing 430 String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})"; 431 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 432 groupSeparator+digit+digit+digit+")+)"; 433 // digit++ is the possessive form which is necessary for reducing 434 // backtracking that would otherwise cause unacceptable performance 435 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 436 String javaStyleInteger = "([-+]?(" + numeral + "))"; 437 String negativeInteger = negativePrefix + numeral + negativeSuffix; 438 String positiveInteger = positivePrefix + numeral + positiveSuffix; 439 return "("+ javaStyleInteger + ")|(" + 440 positiveInteger + ")|(" + 441 negativeInteger + ")"; 442 } 443 private Pattern integerPattern() { 444 if (integerPattern == null) { 445 integerPattern = patternCache.forName(buildIntegerPatternString()); 446 } 447 return integerPattern; 448 } 449 450 /** 451 * Fields and an accessor method to match line separators 452 */ 453 private static volatile Pattern separatorPattern; 454 private static volatile Pattern linePattern; 455 private static final String LINE_SEPARATOR_PATTERN = 456 "\r\n|[\n\r\u2028\u2029\u0085]"; 457 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 458 459 private static Pattern separatorPattern() { 460 Pattern sp = separatorPattern; 461 if (sp == null) 462 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 463 return sp; 464 } 465 466 private static Pattern linePattern() { 467 Pattern lp = linePattern; 468 if (lp == null) 469 linePattern = lp = Pattern.compile(LINE_PATTERN); 470 return lp; 471 } 472 473 /** 474 * Fields and methods to match floats and doubles 475 */ 476 private Pattern floatPattern; 477 private Pattern decimalPattern; 478 private void buildFloatAndDecimalPattern() { 479 // \\p{javaDigit} may not be perfect, see above 480 String digit = "([0-9]|(\\p{javaDigit}))"; 481 String exponent = "([eE][+-]?"+digit+"+)?"; 482 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 483 groupSeparator+digit+digit+digit+")+)"; 484 // Once again digit++ is used for performance, as above 485 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 486 String decimalNumeral = "("+numeral+"|"+numeral + 487 decimalSeparator + digit + "*+|"+ decimalSeparator + 488 digit + "++)"; 489 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 490 infinityString+")"; 491 String positiveFloat = "(" + positivePrefix + decimalNumeral + 492 positiveSuffix + exponent + ")"; 493 String negativeFloat = "(" + negativePrefix + decimalNumeral + 494 negativeSuffix + exponent + ")"; 495 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 496 positiveFloat + "|" + negativeFloat + ")"; 497 String hexFloat = 498 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 499 String positiveNonNumber = "(" + positivePrefix + nonNumber + 500 positiveSuffix + ")"; 501 String negativeNonNumber = "(" + negativePrefix + nonNumber + 502 negativeSuffix + ")"; 503 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 504 positiveNonNumber + "|" + 505 negativeNonNumber + ")"; 506 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 507 signedNonNumber); 508 decimalPattern = Pattern.compile(decimal); 509 } 510 private Pattern floatPattern() { 511 if (floatPattern == null) { 512 buildFloatAndDecimalPattern(); 513 } 514 return floatPattern; 515 } 516 private Pattern decimalPattern() { 517 if (decimalPattern == null) { 518 buildFloatAndDecimalPattern(); 519 } 520 return decimalPattern; 521 } 522 523 // Constructors 524 525 /** 526 * Constructs a <code>Scanner</code> that returns values scanned 527 * from the specified source delimited by the specified pattern. 528 * 529 * @param source A character source implementing the Readable interface 530 * @param pattern A delimiting pattern 531 */ 532 private Scanner(Readable source, Pattern pattern) { 533 assert source != null : "source should not be null"; 534 assert pattern != null : "pattern should not be null"; 535 this.source = source; 536 delimPattern = pattern; 537 buf = CharBuffer.allocate(BUFFER_SIZE); 538 buf.limit(0); 539 matcher = delimPattern.matcher(buf); 540 matcher.useTransparentBounds(true); 541 matcher.useAnchoringBounds(false); 542 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 543 } 544 545 /** 546 * Constructs a new <code>Scanner</code> that produces values scanned 547 * from the specified source. 548 * 549 * @param source A character source implementing the {@link Readable} 550 * interface 551 */ 552 public Scanner(Readable source) { 553 this(Objects.requireNonNull(source, "source"), WHITESPACE_PATTERN); 554 } 555 556 /** 557 * Constructs a new <code>Scanner</code> that produces values scanned 558 * from the specified input stream. Bytes from the stream are converted 559 * into characters using the underlying platform's 560 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 561 * 562 * @param source An input stream to be scanned 563 */ 564 public Scanner(InputStream source) { 565 this(new InputStreamReader(source), WHITESPACE_PATTERN); 566 } 567 568 /** 569 * Constructs a new <code>Scanner</code> that produces values scanned 570 * from the specified input stream. Bytes from the stream are converted 571 * into characters using the specified charset. 572 * 573 * @param source An input stream to be scanned 574 * @param charsetName The encoding type used to convert bytes from the 575 * stream into characters to be scanned 576 * @throws IllegalArgumentException if the specified character set 577 * does not exist 578 */ 579 public Scanner(InputStream source, String charsetName) { 580 this(makeReadable(Objects.requireNonNull(source, "source"), toCharset(charsetName)), 581 WHITESPACE_PATTERN); 582 } 583 584 /** 585 * Returns a charset object for the given charset name. 586 * @throws NullPointerException is csn is null 587 * @throws IllegalArgumentException if the charset is not supported 588 */ 589 private static Charset toCharset(String csn) { 590 Objects.requireNonNull(csn, "charsetName"); 591 try { 592 return Charset.forName(csn); 593 } catch (IllegalCharsetNameException|UnsupportedCharsetException e) { 594 // IllegalArgumentException should be thrown 595 throw new IllegalArgumentException(e); 596 } 597 } 598 599 private static Readable makeReadable(InputStream source, Charset charset) { 600 return new InputStreamReader(source, charset); 601 } 602 603 /** 604 * Constructs a new <code>Scanner</code> that produces values scanned 605 * from the specified file. Bytes from the file are converted into 606 * characters using the underlying platform's 607 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 608 * 609 * @param source A file to be scanned 610 * @throws FileNotFoundException if source is not found 611 */ 612 public Scanner(File source) throws FileNotFoundException { 613 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 614 } 615 616 /** 617 * Constructs a new <code>Scanner</code> that produces values scanned 618 * from the specified file. Bytes from the file are converted into 619 * characters using the specified charset. 620 * 621 * @param source A file to be scanned 622 * @param charsetName The encoding type used to convert bytes from the file 623 * into characters to be scanned 624 * @throws FileNotFoundException if source is not found 625 * @throws IllegalArgumentException if the specified encoding is 626 * not found 627 */ 628 public Scanner(File source, String charsetName) 629 throws FileNotFoundException 630 { 631 this(Objects.requireNonNull(source), toDecoder(charsetName)); 632 } 633 634 private Scanner(File source, CharsetDecoder dec) 635 throws FileNotFoundException 636 { 637 this(makeReadable((ReadableByteChannel)(new FileInputStream(source).getChannel()), dec)); 638 } 639 640 private static CharsetDecoder toDecoder(String charsetName) { 641 Objects.requireNonNull(charsetName, "charsetName"); 642 try { 643 return Charset.forName(charsetName).newDecoder(); 644 } catch (IllegalCharsetNameException|UnsupportedCharsetException unused) { 645 throw new IllegalArgumentException(charsetName); 646 } 647 } 648 649 private static Readable makeReadable(ReadableByteChannel source, 650 CharsetDecoder dec) { 651 return Channels.newReader(source, dec, -1); 652 } 653 654 /** 655 * Constructs a new <code>Scanner</code> that produces values scanned 656 * from the specified file. Bytes from the file are converted into 657 * characters using the underlying platform's 658 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 659 * 660 * @param source 661 * the path to the file to be scanned 662 * @throws IOException 663 * if an I/O error occurs opening source 664 * 665 * @since 1.7 666 */ 667 public Scanner(Path source) 668 throws IOException 669 { 670 this(Files.newInputStream(source)); 671 } 672 673 /** 674 * Constructs a new <code>Scanner</code> that produces values scanned 675 * from the specified file. Bytes from the file are converted into 676 * characters using the specified charset. 677 * 678 * @param source 679 * the path to the file to be scanned 680 * @param charsetName 681 * The encoding type used to convert bytes from the file 682 * into characters to be scanned 683 * @throws IOException 684 * if an I/O error occurs opening source 685 * @throws IllegalArgumentException 686 * if the specified encoding is not found 687 * @since 1.7 688 */ 689 public Scanner(Path source, String charsetName) throws IOException { 690 this(Objects.requireNonNull(source), toCharset(charsetName)); 691 } 692 693 private Scanner(Path source, Charset charset) throws IOException { 694 this(makeReadable(Files.newInputStream(source), charset)); 695 } 696 697 /** 698 * Constructs a new <code>Scanner</code> that produces values scanned 699 * from the specified string. 700 * 701 * @param source A string to scan 702 */ 703 public Scanner(String source) { 704 this(new StringReader(source), WHITESPACE_PATTERN); 705 } 706 707 /** 708 * Constructs a new <code>Scanner</code> that produces values scanned 709 * from the specified channel. Bytes from the source are converted into 710 * characters using the underlying platform's 711 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 712 * 713 * @param source A channel to scan 714 */ 715 public Scanner(ReadableByteChannel source) { 716 this(makeReadable(Objects.requireNonNull(source, "source")), 717 WHITESPACE_PATTERN); 718 } 719 720 private static Readable makeReadable(ReadableByteChannel source) { 721 return makeReadable(source, Charset.defaultCharset().newDecoder()); 722 } 723 724 /** 725 * Constructs a new <code>Scanner</code> that produces values scanned 726 * from the specified channel. Bytes from the source are converted into 727 * characters using the specified charset. 728 * 729 * @param source A channel to scan 730 * @param charsetName The encoding type used to convert bytes from the 731 * channel into characters to be scanned 732 * @throws IllegalArgumentException if the specified character set 733 * does not exist 734 */ 735 public Scanner(ReadableByteChannel source, String charsetName) { 736 this(makeReadable(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), 737 WHITESPACE_PATTERN); 738 } 739 740 // Private primitives used to support scanning 741 742 private void saveState() { 743 savedScannerPosition = position; 744 } 745 746 private void revertState() { 747 this.position = savedScannerPosition; 748 savedScannerPosition = -1; 749 skipped = false; 750 } 751 752 private boolean revertState(boolean b) { 753 this.position = savedScannerPosition; 754 savedScannerPosition = -1; 755 skipped = false; 756 return b; 757 } 758 759 private void cacheResult() { 760 hasNextResult = matcher.group(); 761 hasNextPosition = matcher.end(); 762 hasNextPattern = matcher.pattern(); 763 } 764 765 private void cacheResult(String result) { 766 hasNextResult = result; 767 hasNextPosition = matcher.end(); 768 hasNextPattern = matcher.pattern(); 769 } 770 771 // Clears both regular cache and type cache 772 private void clearCaches() { 773 hasNextPattern = null; 774 typeCache = null; 775 } 776 777 // Also clears both the regular cache and the type cache 778 private String getCachedResult() { 779 position = hasNextPosition; 780 hasNextPattern = null; 781 typeCache = null; 782 return hasNextResult; 783 } 784 785 // Also clears both the regular cache and the type cache 786 private void useTypeCache() { 787 if (closed) 788 throw new IllegalStateException("Scanner closed"); 789 position = hasNextPosition; 790 hasNextPattern = null; 791 typeCache = null; 792 } 793 794 // Tries to read more input. May block. 795 private void readInput() { 796 if (buf.limit() == buf.capacity()) 797 makeSpace(); 798 799 // Prepare to receive data 800 int p = buf.position(); 801 buf.position(buf.limit()); 802 buf.limit(buf.capacity()); 803 804 int n = 0; 805 try { 806 n = source.read(buf); 807 } catch (IOException ioe) { 808 lastException = ioe; 809 n = -1; 810 } 811 812 if (n == -1) { 813 sourceClosed = true; 814 needInput = false; 815 } 816 817 if (n > 0) 818 needInput = false; 819 820 // Restore current position and limit for reading 821 buf.limit(buf.position()); 822 buf.position(p); 823 } 824 825 // After this method is called there will either be an exception 826 // or else there will be space in the buffer 827 private boolean makeSpace() { 828 clearCaches(); 829 int offset = savedScannerPosition == -1 ? 830 position : savedScannerPosition; 831 buf.position(offset); 832 // Gain space by compacting buffer 833 if (offset > 0) { 834 buf.compact(); 835 translateSavedIndexes(offset); 836 position -= offset; 837 buf.flip(); 838 return true; 839 } 840 // Gain space by growing buffer 841 int newSize = buf.capacity() * 2; 842 CharBuffer newBuf = CharBuffer.allocate(newSize); 843 newBuf.put(buf); 844 newBuf.flip(); 845 translateSavedIndexes(offset); 846 position -= offset; 847 buf = newBuf; 848 matcher.reset(buf); 849 return true; 850 } 851 852 // When a buffer compaction/reallocation occurs the saved indexes must 853 // be modified appropriately 854 private void translateSavedIndexes(int offset) { 855 if (savedScannerPosition != -1) 856 savedScannerPosition -= offset; 857 } 858 859 // If we are at the end of input then NoSuchElement; 860 // If there is still input left then InputMismatch 861 private void throwFor() { 862 skipped = false; 863 if ((sourceClosed) && (position == buf.limit())) 864 throw new NoSuchElementException(); 865 else 866 throw new InputMismatchException(); 867 } 868 869 // Returns true if a complete token or partial token is in the buffer. 870 // It is not necessary to find a complete token since a partial token 871 // means that there will be another token with or without more input. 872 private boolean hasTokenInBuffer() { 873 matchValid = false; 874 matcher.usePattern(delimPattern); 875 matcher.region(position, buf.limit()); 876 877 // Skip delims first 878 if (matcher.lookingAt()) 879 position = matcher.end(); 880 881 // If we are sitting at the end, no more tokens in buffer 882 if (position == buf.limit()) 883 return false; 884 885 return true; 886 } 887 888 /* 889 * Returns a "complete token" that matches the specified pattern 890 * 891 * A token is complete if surrounded by delims; a partial token 892 * is prefixed by delims but not postfixed by them 893 * 894 * The position is advanced to the end of that complete token 895 * 896 * Pattern == null means accept any token at all 897 * 898 * Triple return: 899 * 1. valid string means it was found 900 * 2. null with needInput=false means we won't ever find it 901 * 3. null with needInput=true means try again after readInput 902 */ 903 private String getCompleteTokenInBuffer(Pattern pattern) { 904 matchValid = false; 905 906 // Skip delims first 907 matcher.usePattern(delimPattern); 908 if (!skipped) { // Enforcing only one skip of leading delims 909 matcher.region(position, buf.limit()); 910 if (matcher.lookingAt()) { 911 // If more input could extend the delimiters then we must wait 912 // for more input 913 if (matcher.hitEnd() && !sourceClosed) { 914 needInput = true; 915 return null; 916 } 917 // The delims were whole and the matcher should skip them 918 skipped = true; 919 position = matcher.end(); 920 } 921 } 922 923 // If we are sitting at the end, no more tokens in buffer 924 if (position == buf.limit()) { 925 if (sourceClosed) 926 return null; 927 needInput = true; 928 return null; 929 } 930 931 // Must look for next delims. Simply attempting to match the 932 // pattern at this point may find a match but it might not be 933 // the first longest match because of missing input, or it might 934 // match a partial token instead of the whole thing. 935 936 // Then look for next delims 937 matcher.region(position, buf.limit()); 938 boolean foundNextDelim = matcher.find(); 939 if (foundNextDelim && (matcher.end() == position)) { 940 // Zero length delimiter match; we should find the next one 941 // using the automatic advance past a zero length match; 942 // Otherwise we have just found the same one we just skipped 943 foundNextDelim = matcher.find(); 944 } 945 if (foundNextDelim) { 946 // In the rare case that more input could cause the match 947 // to be lost and there is more input coming we must wait 948 // for more input. Note that hitting the end is okay as long 949 // as the match cannot go away. It is the beginning of the 950 // next delims we want to be sure about, we don't care if 951 // they potentially extend further. 952 if (matcher.requireEnd() && !sourceClosed) { 953 needInput = true; 954 return null; 955 } 956 int tokenEnd = matcher.start(); 957 // There is a complete token. 958 if (pattern == null) { 959 // Must continue with match to provide valid MatchResult 960 pattern = FIND_ANY_PATTERN; 961 } 962 // Attempt to match against the desired pattern 963 matcher.usePattern(pattern); 964 matcher.region(position, tokenEnd); 965 if (matcher.matches()) { 966 String s = matcher.group(); 967 position = matcher.end(); 968 return s; 969 } else { // Complete token but it does not match 970 return null; 971 } 972 } 973 974 // If we can't find the next delims but no more input is coming, 975 // then we can treat the remainder as a whole token 976 if (sourceClosed) { 977 if (pattern == null) { 978 // Must continue with match to provide valid MatchResult 979 pattern = FIND_ANY_PATTERN; 980 } 981 // Last token; Match the pattern here or throw 982 matcher.usePattern(pattern); 983 matcher.region(position, buf.limit()); 984 if (matcher.matches()) { 985 String s = matcher.group(); 986 position = matcher.end(); 987 return s; 988 } 989 // Last piece does not match 990 return null; 991 } 992 993 // There is a partial token in the buffer; must read more 994 // to complete it 995 needInput = true; 996 return null; 997 } 998 999 // Finds the specified pattern in the buffer up to horizon. 1000 // Returns true if the specified input pattern was matched, 1001 // and leaves the matcher field with the current match state. 1002 private boolean findPatternInBuffer(Pattern pattern, int horizon) { 1003 matchValid = false; 1004 matcher.usePattern(pattern); 1005 int bufferLimit = buf.limit(); 1006 int horizonLimit = -1; 1007 int searchLimit = bufferLimit; 1008 if (horizon > 0) { 1009 horizonLimit = position + horizon; 1010 if (horizonLimit < bufferLimit) 1011 searchLimit = horizonLimit; 1012 } 1013 matcher.region(position, searchLimit); 1014 if (matcher.find()) { 1015 if (matcher.hitEnd() && (!sourceClosed)) { 1016 // The match may be longer if didn't hit horizon or real end 1017 if (searchLimit != horizonLimit) { 1018 // Hit an artificial end; try to extend the match 1019 needInput = true; 1020 return false; 1021 } 1022 // The match could go away depending on what is next 1023 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1024 // Rare case: we hit the end of input and it happens 1025 // that it is at the horizon and the end of input is 1026 // required for the match. 1027 needInput = true; 1028 return false; 1029 } 1030 } 1031 // Did not hit end, or hit real end, or hit horizon 1032 position = matcher.end(); 1033 return true; 1034 } 1035 1036 if (sourceClosed) 1037 return false; 1038 1039 // If there is no specified horizon, or if we have not searched 1040 // to the specified horizon yet, get more input 1041 if ((horizon == 0) || (searchLimit != horizonLimit)) 1042 needInput = true; 1043 return false; 1044 } 1045 1046 // Attempts to match a pattern anchored at the current position. 1047 // Returns true if the specified input pattern was matched, 1048 // and leaves the matcher field with the current match state. 1049 private boolean matchPatternInBuffer(Pattern pattern) { 1050 matchValid = false; 1051 matcher.usePattern(pattern); 1052 matcher.region(position, buf.limit()); 1053 if (matcher.lookingAt()) { 1054 if (matcher.hitEnd() && (!sourceClosed)) { 1055 // Get more input and try again 1056 needInput = true; 1057 return false; 1058 } 1059 position = matcher.end(); 1060 return true; 1061 } 1062 1063 if (sourceClosed) 1064 return false; 1065 1066 // Read more to find pattern 1067 needInput = true; 1068 return false; 1069 } 1070 1071 // Throws if the scanner is closed 1072 private void ensureOpen() { 1073 if (closed) 1074 throw new IllegalStateException("Scanner closed"); 1075 } 1076 1077 // Public methods 1078 1079 /** 1080 * Closes this scanner. 1081 * 1082 * <p> If this scanner has not yet been closed then if its underlying 1083 * {@linkplain java.lang.Readable readable} also implements the {@link 1084 * java.io.Closeable} interface then the readable's <tt>close</tt> method 1085 * will be invoked. If this scanner is already closed then invoking this 1086 * method will have no effect. 1087 * 1088 * <p>Attempting to perform search operations after a scanner has 1089 * been closed will result in an {@link IllegalStateException}. 1090 * 1091 */ 1092 public void close() { 1093 if (closed) 1094 return; 1095 if (source instanceof Closeable) { 1096 try { 1097 ((Closeable)source).close(); 1098 } catch (IOException ioe) { 1099 lastException = ioe; 1100 } 1101 } 1102 sourceClosed = true; 1103 source = null; 1104 closed = true; 1105 } 1106 1107 /** 1108 * Returns the <code>IOException</code> last thrown by this 1109 * <code>Scanner</code>'s underlying <code>Readable</code>. This method 1110 * returns <code>null</code> if no such exception exists. 1111 * 1112 * @return the last exception thrown by this scanner's readable 1113 */ 1114 public IOException ioException() { 1115 return lastException; 1116 } 1117 1118 /** 1119 * Returns the <code>Pattern</code> this <code>Scanner</code> is currently 1120 * using to match delimiters. 1121 * 1122 * @return this scanner's delimiting pattern. 1123 */ 1124 public Pattern delimiter() { 1125 return delimPattern; 1126 } 1127 1128 /** 1129 * Sets this scanner's delimiting pattern to the specified pattern. 1130 * 1131 * @param pattern A delimiting pattern 1132 * @return this scanner 1133 */ 1134 public Scanner useDelimiter(Pattern pattern) { 1135 delimPattern = pattern; 1136 return this; 1137 } 1138 1139 /** 1140 * Sets this scanner's delimiting pattern to a pattern constructed from 1141 * the specified <code>String</code>. 1142 * 1143 * <p> An invocation of this method of the form 1144 * <tt>useDelimiter(pattern)</tt> behaves in exactly the same way as the 1145 * invocation <tt>useDelimiter(Pattern.compile(pattern))</tt>. 1146 * 1147 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1148 * to the <a href= "#default-delimiter">default</a>. 1149 * 1150 * @param pattern A string specifying a delimiting pattern 1151 * @return this scanner 1152 */ 1153 public Scanner useDelimiter(String pattern) { 1154 delimPattern = patternCache.forName(pattern); 1155 return this; 1156 } 1157 1158 /** 1159 * Returns this scanner's locale. 1160 * 1161 * <p>A scanner's locale affects many elements of its default 1162 * primitive matching regular expressions; see 1163 * <a href= "#localized-numbers">localized numbers</a> above. 1164 * 1165 * @return this scanner's locale 1166 */ 1167 public Locale locale() { 1168 return this.locale; 1169 } 1170 1171 /** 1172 * Sets this scanner's locale to the specified locale. 1173 * 1174 * <p>A scanner's locale affects many elements of its default 1175 * primitive matching regular expressions; see 1176 * <a href= "#localized-numbers">localized numbers</a> above. 1177 * 1178 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1179 * the <a href= "#initial-locale">initial locale</a>. 1180 * 1181 * @param locale A string specifying the locale to use 1182 * @return this scanner 1183 */ 1184 public Scanner useLocale(Locale locale) { 1185 if (locale.equals(this.locale)) 1186 return this; 1187 1188 this.locale = locale; 1189 DecimalFormat df = 1190 (DecimalFormat)NumberFormat.getNumberInstance(locale); 1191 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1192 1193 // These must be literalized to avoid collision with regex 1194 // metacharacters such as dot or parenthesis 1195 groupSeparator = "\\" + dfs.getGroupingSeparator(); 1196 decimalSeparator = "\\" + dfs.getDecimalSeparator(); 1197 1198 // Quoting the nonzero length locale-specific things 1199 // to avoid potential conflict with metacharacters 1200 nanString = "\\Q" + dfs.getNaN() + "\\E"; 1201 infinityString = "\\Q" + dfs.getInfinity() + "\\E"; 1202 positivePrefix = df.getPositivePrefix(); 1203 if (positivePrefix.length() > 0) 1204 positivePrefix = "\\Q" + positivePrefix + "\\E"; 1205 negativePrefix = df.getNegativePrefix(); 1206 if (negativePrefix.length() > 0) 1207 negativePrefix = "\\Q" + negativePrefix + "\\E"; 1208 positiveSuffix = df.getPositiveSuffix(); 1209 if (positiveSuffix.length() > 0) 1210 positiveSuffix = "\\Q" + positiveSuffix + "\\E"; 1211 negativeSuffix = df.getNegativeSuffix(); 1212 if (negativeSuffix.length() > 0) 1213 negativeSuffix = "\\Q" + negativeSuffix + "\\E"; 1214 1215 // Force rebuilding and recompilation of locale dependent 1216 // primitive patterns 1217 integerPattern = null; 1218 floatPattern = null; 1219 1220 return this; 1221 } 1222 1223 /** 1224 * Returns this scanner's default radix. 1225 * 1226 * <p>A scanner's radix affects elements of its default 1227 * number matching regular expressions; see 1228 * <a href= "#localized-numbers">localized numbers</a> above. 1229 * 1230 * @return the default radix of this scanner 1231 */ 1232 public int radix() { 1233 return this.defaultRadix; 1234 } 1235 1236 /** 1237 * Sets this scanner's default radix to the specified radix. 1238 * 1239 * <p>A scanner's radix affects elements of its default 1240 * number matching regular expressions; see 1241 * <a href= "#localized-numbers">localized numbers</a> above. 1242 * 1243 * <p>If the radix is less than <code>Character.MIN_RADIX</code> 1244 * or greater than <code>Character.MAX_RADIX</code>, then an 1245 * <code>IllegalArgumentException</code> is thrown. 1246 * 1247 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1248 * <code>10</code>. 1249 * 1250 * @param radix The radix to use when scanning numbers 1251 * @return this scanner 1252 * @throws IllegalArgumentException if radix is out of range 1253 */ 1254 public Scanner useRadix(int radix) { 1255 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1256 throw new IllegalArgumentException("radix:"+radix); 1257 1258 if (this.defaultRadix == radix) 1259 return this; 1260 this.defaultRadix = radix; 1261 // Force rebuilding and recompilation of radix dependent patterns 1262 integerPattern = null; 1263 return this; 1264 } 1265 1266 // The next operation should occur in the specified radix but 1267 // the default is left untouched. 1268 private void setRadix(int radix) { 1269 if (this.radix != radix) { 1270 // Force rebuilding and recompilation of radix dependent patterns 1271 integerPattern = null; 1272 this.radix = radix; 1273 } 1274 } 1275 1276 /** 1277 * Returns the match result of the last scanning operation performed 1278 * by this scanner. This method throws <code>IllegalStateException</code> 1279 * if no match has been performed, or if the last match was 1280 * not successful. 1281 * 1282 * <p>The various <code>next</code> methods of <code>Scanner</code> 1283 * make a match result available if they complete without throwing an 1284 * exception. For instance, after an invocation of the {@link #nextInt} 1285 * method that returned an int, this method returns a 1286 * <code>MatchResult</code> for the search of the 1287 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1288 * defined above. Similarly the {@link #findInLine}, 1289 * {@link #findWithinHorizon}, and {@link #skip} methods will make a 1290 * match available if they succeed. 1291 * 1292 * @apiNote 1293 * <p>The offset values reported by the {@link MatchResult#start()}, 1294 * {@link MatchResult#start(int)}, {@link MatchResult#end()}, and 1295 * {@link MatchResult#end(int)} methods are generally not useful, as they 1296 * are offsets within the Scanner's internal buffer, which might contain 1297 * an arbitrary portion of the input. The returned {@link MatchResult} is 1298 * useful for extracting portions of the match using capturing groups, 1299 * via the {@link MatchResult#group()}, {@link MatchResult#group(int)}, 1300 * and {@link MatchResult#groupCount()} methods. 1301 * 1302 * @return a match result for the last match operation 1303 * @throws IllegalStateException If no match result is available 1304 */ 1305 public MatchResult match() { 1306 if (!matchValid) 1307 throw new IllegalStateException("No match result available"); 1308 return matcher.toMatchResult(); 1309 } 1310 1311 /** 1312 * <p>Returns the string representation of this <code>Scanner</code>. The 1313 * string representation of a <code>Scanner</code> contains information 1314 * that may be useful for debugging. The exact format is unspecified. 1315 * 1316 * @return The string representation of this scanner 1317 */ 1318 public String toString() { 1319 StringBuilder sb = new StringBuilder(); 1320 sb.append("java.util.Scanner"); 1321 sb.append("[delimiters=" + delimPattern + "]"); 1322 sb.append("[position=" + position + "]"); 1323 sb.append("[match valid=" + matchValid + "]"); 1324 sb.append("[need input=" + needInput + "]"); 1325 sb.append("[source closed=" + sourceClosed + "]"); 1326 sb.append("[skipped=" + skipped + "]"); 1327 sb.append("[group separator=" + groupSeparator + "]"); 1328 sb.append("[decimal separator=" + decimalSeparator + "]"); 1329 sb.append("[positive prefix=" + positivePrefix + "]"); 1330 sb.append("[negative prefix=" + negativePrefix + "]"); 1331 sb.append("[positive suffix=" + positiveSuffix + "]"); 1332 sb.append("[negative suffix=" + negativeSuffix + "]"); 1333 sb.append("[NaN string=" + nanString + "]"); 1334 sb.append("[infinity string=" + infinityString + "]"); 1335 return sb.toString(); 1336 } 1337 1338 /** 1339 * Returns true if this scanner has another token in its input. 1340 * This method may block while waiting for input to scan. 1341 * The scanner does not advance past any input. 1342 * 1343 * @return true if and only if this scanner has another token 1344 * @throws IllegalStateException if this scanner is closed 1345 * @see java.util.Iterator 1346 */ 1347 public boolean hasNext() { 1348 ensureOpen(); 1349 saveState(); 1350 while (!sourceClosed) { 1351 if (hasTokenInBuffer()) 1352 return revertState(true); 1353 readInput(); 1354 } 1355 boolean result = hasTokenInBuffer(); 1356 return revertState(result); 1357 } 1358 1359 /** 1360 * Finds and returns the next complete token from this scanner. 1361 * A complete token is preceded and followed by input that matches 1362 * the delimiter pattern. This method may block while waiting for input 1363 * to scan, even if a previous invocation of {@link #hasNext} returned 1364 * <code>true</code>. 1365 * 1366 * @return the next token 1367 * @throws NoSuchElementException if no more tokens are available 1368 * @throws IllegalStateException if this scanner is closed 1369 * @see java.util.Iterator 1370 */ 1371 public String next() { 1372 ensureOpen(); 1373 clearCaches(); 1374 1375 while (true) { 1376 String token = getCompleteTokenInBuffer(null); 1377 if (token != null) { 1378 matchValid = true; 1379 skipped = false; 1380 return token; 1381 } 1382 if (needInput) 1383 readInput(); 1384 else 1385 throwFor(); 1386 } 1387 } 1388 1389 /** 1390 * The remove operation is not supported by this implementation of 1391 * <code>Iterator</code>. 1392 * 1393 * @throws UnsupportedOperationException if this method is invoked. 1394 * @see java.util.Iterator 1395 */ 1396 public void remove() { 1397 throw new UnsupportedOperationException(); 1398 } 1399 1400 /** 1401 * Returns true if the next token matches the pattern constructed from the 1402 * specified string. The scanner does not advance past any input. 1403 * 1404 * <p> An invocation of this method of the form <tt>hasNext(pattern)</tt> 1405 * behaves in exactly the same way as the invocation 1406 * <tt>hasNext(Pattern.compile(pattern))</tt>. 1407 * 1408 * @param pattern a string specifying the pattern to scan 1409 * @return true if and only if this scanner has another token matching 1410 * the specified pattern 1411 * @throws IllegalStateException if this scanner is closed 1412 */ 1413 public boolean hasNext(String pattern) { 1414 return hasNext(patternCache.forName(pattern)); 1415 } 1416 1417 /** 1418 * Returns the next token if it matches the pattern constructed from the 1419 * specified string. If the match is successful, the scanner advances 1420 * past the input that matched the pattern. 1421 * 1422 * <p> An invocation of this method of the form <tt>next(pattern)</tt> 1423 * behaves in exactly the same way as the invocation 1424 * <tt>next(Pattern.compile(pattern))</tt>. 1425 * 1426 * @param pattern a string specifying the pattern to scan 1427 * @return the next token 1428 * @throws NoSuchElementException if no such tokens are available 1429 * @throws IllegalStateException if this scanner is closed 1430 */ 1431 public String next(String pattern) { 1432 return next(patternCache.forName(pattern)); 1433 } 1434 1435 /** 1436 * Returns true if the next complete token matches the specified pattern. 1437 * A complete token is prefixed and postfixed by input that matches 1438 * the delimiter pattern. This method may block while waiting for input. 1439 * The scanner does not advance past any input. 1440 * 1441 * @param pattern the pattern to scan for 1442 * @return true if and only if this scanner has another token matching 1443 * the specified pattern 1444 * @throws IllegalStateException if this scanner is closed 1445 */ 1446 public boolean hasNext(Pattern pattern) { 1447 ensureOpen(); 1448 if (pattern == null) 1449 throw new NullPointerException(); 1450 hasNextPattern = null; 1451 saveState(); 1452 1453 while (true) { 1454 if (getCompleteTokenInBuffer(pattern) != null) { 1455 matchValid = true; 1456 cacheResult(); 1457 return revertState(true); 1458 } 1459 if (needInput) 1460 readInput(); 1461 else 1462 return revertState(false); 1463 } 1464 } 1465 1466 /** 1467 * Returns the next token if it matches the specified pattern. This 1468 * method may block while waiting for input to scan, even if a previous 1469 * invocation of {@link #hasNext(Pattern)} returned <code>true</code>. 1470 * If the match is successful, the scanner advances past the input that 1471 * matched the pattern. 1472 * 1473 * @param pattern the pattern to scan for 1474 * @return the next token 1475 * @throws NoSuchElementException if no more tokens are available 1476 * @throws IllegalStateException if this scanner is closed 1477 */ 1478 public String next(Pattern pattern) { 1479 ensureOpen(); 1480 if (pattern == null) 1481 throw new NullPointerException(); 1482 1483 // Did we already find this pattern? 1484 if (hasNextPattern == pattern) 1485 return getCachedResult(); 1486 clearCaches(); 1487 1488 // Search for the pattern 1489 while (true) { 1490 String token = getCompleteTokenInBuffer(pattern); 1491 if (token != null) { 1492 matchValid = true; 1493 skipped = false; 1494 return token; 1495 } 1496 if (needInput) 1497 readInput(); 1498 else 1499 throwFor(); 1500 } 1501 } 1502 1503 /** 1504 * Returns true if there is another line in the input of this scanner. 1505 * This method may block while waiting for input. The scanner does not 1506 * advance past any input. 1507 * 1508 * @return true if and only if this scanner has another line of input 1509 * @throws IllegalStateException if this scanner is closed 1510 */ 1511 public boolean hasNextLine() { 1512 saveState(); 1513 1514 String result = findWithinHorizon(linePattern(), 0); 1515 if (result != null) { 1516 MatchResult mr = this.match(); 1517 String lineSep = mr.group(1); 1518 if (lineSep != null) { 1519 result = result.substring(0, result.length() - 1520 lineSep.length()); 1521 cacheResult(result); 1522 1523 } else { 1524 cacheResult(); 1525 } 1526 } 1527 revertState(); 1528 return (result != null); 1529 } 1530 1531 /** 1532 * Advances this scanner past the current line and returns the input 1533 * that was skipped. 1534 * 1535 * This method returns the rest of the current line, excluding any line 1536 * separator at the end. The position is set to the beginning of the next 1537 * line. 1538 * 1539 * <p>Since this method continues to search through the input looking 1540 * for a line separator, it may buffer all of the input searching for 1541 * the line to skip if no line separators are present. 1542 * 1543 * @return the line that was skipped 1544 * @throws NoSuchElementException if no line was found 1545 * @throws IllegalStateException if this scanner is closed 1546 */ 1547 public String nextLine() { 1548 if (hasNextPattern == linePattern()) 1549 return getCachedResult(); 1550 clearCaches(); 1551 1552 String result = findWithinHorizon(linePattern, 0); 1553 if (result == null) 1554 throw new NoSuchElementException("No line found"); 1555 MatchResult mr = this.match(); 1556 String lineSep = mr.group(1); 1557 if (lineSep != null) 1558 result = result.substring(0, result.length() - lineSep.length()); 1559 if (result == null) 1560 throw new NoSuchElementException(); 1561 else 1562 return result; 1563 } 1564 1565 // Public methods that ignore delimiters 1566 1567 /** 1568 * Attempts to find the next occurrence of a pattern constructed from the 1569 * specified string, ignoring delimiters. 1570 * 1571 * <p>An invocation of this method of the form <tt>findInLine(pattern)</tt> 1572 * behaves in exactly the same way as the invocation 1573 * <tt>findInLine(Pattern.compile(pattern))</tt>. 1574 * 1575 * @param pattern a string specifying the pattern to search for 1576 * @return the text that matched the specified pattern 1577 * @throws IllegalStateException if this scanner is closed 1578 */ 1579 public String findInLine(String pattern) { 1580 return findInLine(patternCache.forName(pattern)); 1581 } 1582 1583 /** 1584 * Attempts to find the next occurrence of the specified pattern ignoring 1585 * delimiters. If the pattern is found before the next line separator, the 1586 * scanner advances past the input that matched and returns the string that 1587 * matched the pattern. 1588 * If no such pattern is detected in the input up to the next line 1589 * separator, then <code>null</code> is returned and the scanner's 1590 * position is unchanged. This method may block waiting for input that 1591 * matches the pattern. 1592 * 1593 * <p>Since this method continues to search through the input looking 1594 * for the specified pattern, it may buffer all of the input searching for 1595 * the desired token if no line separators are present. 1596 * 1597 * @param pattern the pattern to scan for 1598 * @return the text that matched the specified pattern 1599 * @throws IllegalStateException if this scanner is closed 1600 */ 1601 public String findInLine(Pattern pattern) { 1602 ensureOpen(); 1603 if (pattern == null) 1604 throw new NullPointerException(); 1605 clearCaches(); 1606 // Expand buffer to include the next newline or end of input 1607 int endPosition = 0; 1608 saveState(); 1609 while (true) { 1610 if (findPatternInBuffer(separatorPattern(), 0)) { 1611 endPosition = matcher.start(); 1612 break; // up to next newline 1613 } 1614 if (needInput) { 1615 readInput(); 1616 } else { 1617 endPosition = buf.limit(); 1618 break; // up to end of input 1619 } 1620 } 1621 revertState(); 1622 int horizonForLine = endPosition - position; 1623 // If there is nothing between the current pos and the next 1624 // newline simply return null, invoking findWithinHorizon 1625 // with "horizon=0" will scan beyond the line bound. 1626 if (horizonForLine == 0) 1627 return null; 1628 // Search for the pattern 1629 return findWithinHorizon(pattern, horizonForLine); 1630 } 1631 1632 /** 1633 * Attempts to find the next occurrence of a pattern constructed from the 1634 * specified string, ignoring delimiters. 1635 * 1636 * <p>An invocation of this method of the form 1637 * <tt>findWithinHorizon(pattern)</tt> behaves in exactly the same way as 1638 * the invocation 1639 * <tt>findWithinHorizon(Pattern.compile(pattern), horizon)</tt>. 1640 * 1641 * @param pattern a string specifying the pattern to search for 1642 * @param horizon the search horizon 1643 * @return the text that matched the specified pattern 1644 * @throws IllegalStateException if this scanner is closed 1645 * @throws IllegalArgumentException if horizon is negative 1646 */ 1647 public String findWithinHorizon(String pattern, int horizon) { 1648 return findWithinHorizon(patternCache.forName(pattern), horizon); 1649 } 1650 1651 /** 1652 * Attempts to find the next occurrence of the specified pattern. 1653 * 1654 * <p>This method searches through the input up to the specified 1655 * search horizon, ignoring delimiters. If the pattern is found the 1656 * scanner advances past the input that matched and returns the string 1657 * that matched the pattern. If no such pattern is detected then the 1658 * null is returned and the scanner's position remains unchanged. This 1659 * method may block waiting for input that matches the pattern. 1660 * 1661 * <p>A scanner will never search more than <code>horizon</code> code 1662 * points beyond its current position. Note that a match may be clipped 1663 * by the horizon; that is, an arbitrary match result may have been 1664 * different if the horizon had been larger. The scanner treats the 1665 * horizon as a transparent, non-anchoring bound (see {@link 1666 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1667 * 1668 * <p>If horizon is <code>0</code>, then the horizon is ignored and 1669 * this method continues to search through the input looking for the 1670 * specified pattern without bound. In this case it may buffer all of 1671 * the input searching for the pattern. 1672 * 1673 * <p>If horizon is negative, then an IllegalArgumentException is 1674 * thrown. 1675 * 1676 * @param pattern the pattern to scan for 1677 * @param horizon the search horizon 1678 * @return the text that matched the specified pattern 1679 * @throws IllegalStateException if this scanner is closed 1680 * @throws IllegalArgumentException if horizon is negative 1681 */ 1682 public String findWithinHorizon(Pattern pattern, int horizon) { 1683 ensureOpen(); 1684 if (pattern == null) 1685 throw new NullPointerException(); 1686 if (horizon < 0) 1687 throw new IllegalArgumentException("horizon < 0"); 1688 clearCaches(); 1689 1690 // Search for the pattern 1691 while (true) { 1692 if (findPatternInBuffer(pattern, horizon)) { 1693 matchValid = true; 1694 return matcher.group(); 1695 } 1696 if (needInput) 1697 readInput(); 1698 else 1699 break; // up to end of input 1700 } 1701 return null; 1702 } 1703 1704 /** 1705 * Skips input that matches the specified pattern, ignoring delimiters. 1706 * This method will skip input if an anchored match of the specified 1707 * pattern succeeds. 1708 * 1709 * <p>If a match to the specified pattern is not found at the 1710 * current position, then no input is skipped and a 1711 * <tt>NoSuchElementException</tt> is thrown. 1712 * 1713 * <p>Since this method seeks to match the specified pattern starting at 1714 * the scanner's current position, patterns that can match a lot of 1715 * input (".*", for example) may cause the scanner to buffer a large 1716 * amount of input. 1717 * 1718 * <p>Note that it is possible to skip something without risking a 1719 * <code>NoSuchElementException</code> by using a pattern that can 1720 * match nothing, e.g., <code>sc.skip("[ \t]*")</code>. 1721 * 1722 * @param pattern a string specifying the pattern to skip over 1723 * @return this scanner 1724 * @throws NoSuchElementException if the specified pattern is not found 1725 * @throws IllegalStateException if this scanner is closed 1726 */ 1727 public Scanner skip(Pattern pattern) { 1728 ensureOpen(); 1729 if (pattern == null) 1730 throw new NullPointerException(); 1731 clearCaches(); 1732 1733 // Search for the pattern 1734 while (true) { 1735 if (matchPatternInBuffer(pattern)) { 1736 matchValid = true; 1737 position = matcher.end(); 1738 return this; 1739 } 1740 if (needInput) 1741 readInput(); 1742 else 1743 throw new NoSuchElementException(); 1744 } 1745 } 1746 1747 /** 1748 * Skips input that matches a pattern constructed from the specified 1749 * string. 1750 * 1751 * <p> An invocation of this method of the form <tt>skip(pattern)</tt> 1752 * behaves in exactly the same way as the invocation 1753 * <tt>skip(Pattern.compile(pattern))</tt>. 1754 * 1755 * @param pattern a string specifying the pattern to skip over 1756 * @return this scanner 1757 * @throws IllegalStateException if this scanner is closed 1758 */ 1759 public Scanner skip(String pattern) { 1760 return skip(patternCache.forName(pattern)); 1761 } 1762 1763 // Convenience methods for scanning primitives 1764 1765 /** 1766 * Returns true if the next token in this scanner's input can be 1767 * interpreted as a boolean value using a case insensitive pattern 1768 * created from the string "true|false". The scanner does not 1769 * advance past the input that matched. 1770 * 1771 * @return true if and only if this scanner's next token is a valid 1772 * boolean value 1773 * @throws IllegalStateException if this scanner is closed 1774 */ 1775 public boolean hasNextBoolean() { 1776 return hasNext(boolPattern()); 1777 } 1778 1779 /** 1780 * Scans the next token of the input into a boolean value and returns 1781 * that value. This method will throw <code>InputMismatchException</code> 1782 * if the next token cannot be translated into a valid boolean value. 1783 * If the match is successful, the scanner advances past the input that 1784 * matched. 1785 * 1786 * @return the boolean scanned from the input 1787 * @throws InputMismatchException if the next token is not a valid boolean 1788 * @throws NoSuchElementException if input is exhausted 1789 * @throws IllegalStateException if this scanner is closed 1790 */ 1791 public boolean nextBoolean() { 1792 clearCaches(); 1793 return Boolean.parseBoolean(next(boolPattern())); 1794 } 1795 1796 /** 1797 * Returns true if the next token in this scanner's input can be 1798 * interpreted as a byte value in the default radix using the 1799 * {@link #nextByte} method. The scanner does not advance past any input. 1800 * 1801 * @return true if and only if this scanner's next token is a valid 1802 * byte value 1803 * @throws IllegalStateException if this scanner is closed 1804 */ 1805 public boolean hasNextByte() { 1806 return hasNextByte(defaultRadix); 1807 } 1808 1809 /** 1810 * Returns true if the next token in this scanner's input can be 1811 * interpreted as a byte value in the specified radix using the 1812 * {@link #nextByte} method. The scanner does not advance past any input. 1813 * 1814 * @param radix the radix used to interpret the token as a byte value 1815 * @return true if and only if this scanner's next token is a valid 1816 * byte value 1817 * @throws IllegalStateException if this scanner is closed 1818 */ 1819 public boolean hasNextByte(int radix) { 1820 setRadix(radix); 1821 boolean result = hasNext(integerPattern()); 1822 if (result) { // Cache it 1823 try { 1824 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1825 processIntegerToken(hasNextResult) : 1826 hasNextResult; 1827 typeCache = Byte.parseByte(s, radix); 1828 } catch (NumberFormatException nfe) { 1829 result = false; 1830 } 1831 } 1832 return result; 1833 } 1834 1835 /** 1836 * Scans the next token of the input as a <tt>byte</tt>. 1837 * 1838 * <p> An invocation of this method of the form 1839 * <tt>nextByte()</tt> behaves in exactly the same way as the 1840 * invocation <tt>nextByte(radix)</tt>, where <code>radix</code> 1841 * is the default radix of this scanner. 1842 * 1843 * @return the <tt>byte</tt> scanned from the input 1844 * @throws InputMismatchException 1845 * if the next token does not match the <i>Integer</i> 1846 * regular expression, or is out of range 1847 * @throws NoSuchElementException if input is exhausted 1848 * @throws IllegalStateException if this scanner is closed 1849 */ 1850 public byte nextByte() { 1851 return nextByte(defaultRadix); 1852 } 1853 1854 /** 1855 * Scans the next token of the input as a <tt>byte</tt>. 1856 * This method will throw <code>InputMismatchException</code> 1857 * if the next token cannot be translated into a valid byte value as 1858 * described below. If the translation is successful, the scanner advances 1859 * past the input that matched. 1860 * 1861 * <p> If the next token matches the <a 1862 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1863 * above then the token is converted into a <tt>byte</tt> value as if by 1864 * removing all locale specific prefixes, group separators, and locale 1865 * specific suffixes, then mapping non-ASCII digits into ASCII 1866 * digits via {@link Character#digit Character.digit}, prepending a 1867 * negative sign (-) if the locale specific negative prefixes and suffixes 1868 * were present, and passing the resulting string to 1869 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1870 * specified radix. 1871 * 1872 * @param radix the radix used to interpret the token as a byte value 1873 * @return the <tt>byte</tt> scanned from the input 1874 * @throws InputMismatchException 1875 * if the next token does not match the <i>Integer</i> 1876 * regular expression, or is out of range 1877 * @throws NoSuchElementException if input is exhausted 1878 * @throws IllegalStateException if this scanner is closed 1879 */ 1880 public byte nextByte(int radix) { 1881 // Check cached result 1882 if ((typeCache != null) && (typeCache instanceof Byte) 1883 && this.radix == radix) { 1884 byte val = ((Byte)typeCache).byteValue(); 1885 useTypeCache(); 1886 return val; 1887 } 1888 setRadix(radix); 1889 clearCaches(); 1890 // Search for next byte 1891 try { 1892 String s = next(integerPattern()); 1893 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1894 s = processIntegerToken(s); 1895 return Byte.parseByte(s, radix); 1896 } catch (NumberFormatException nfe) { 1897 position = matcher.start(); // don't skip bad token 1898 throw new InputMismatchException(nfe.getMessage()); 1899 } 1900 } 1901 1902 /** 1903 * Returns true if the next token in this scanner's input can be 1904 * interpreted as a short value in the default radix using the 1905 * {@link #nextShort} method. The scanner does not advance past any input. 1906 * 1907 * @return true if and only if this scanner's next token is a valid 1908 * short value in the default radix 1909 * @throws IllegalStateException if this scanner is closed 1910 */ 1911 public boolean hasNextShort() { 1912 return hasNextShort(defaultRadix); 1913 } 1914 1915 /** 1916 * Returns true if the next token in this scanner's input can be 1917 * interpreted as a short value in the specified radix using the 1918 * {@link #nextShort} method. The scanner does not advance past any input. 1919 * 1920 * @param radix the radix used to interpret the token as a short value 1921 * @return true if and only if this scanner's next token is a valid 1922 * short value in the specified radix 1923 * @throws IllegalStateException if this scanner is closed 1924 */ 1925 public boolean hasNextShort(int radix) { 1926 setRadix(radix); 1927 boolean result = hasNext(integerPattern()); 1928 if (result) { // Cache it 1929 try { 1930 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1931 processIntegerToken(hasNextResult) : 1932 hasNextResult; 1933 typeCache = Short.parseShort(s, radix); 1934 } catch (NumberFormatException nfe) { 1935 result = false; 1936 } 1937 } 1938 return result; 1939 } 1940 1941 /** 1942 * Scans the next token of the input as a <tt>short</tt>. 1943 * 1944 * <p> An invocation of this method of the form 1945 * <tt>nextShort()</tt> behaves in exactly the same way as the 1946 * invocation <tt>nextShort(radix)</tt>, where <code>radix</code> 1947 * is the default radix of this scanner. 1948 * 1949 * @return the <tt>short</tt> scanned from the input 1950 * @throws InputMismatchException 1951 * if the next token does not match the <i>Integer</i> 1952 * regular expression, or is out of range 1953 * @throws NoSuchElementException if input is exhausted 1954 * @throws IllegalStateException if this scanner is closed 1955 */ 1956 public short nextShort() { 1957 return nextShort(defaultRadix); 1958 } 1959 1960 /** 1961 * Scans the next token of the input as a <tt>short</tt>. 1962 * This method will throw <code>InputMismatchException</code> 1963 * if the next token cannot be translated into a valid short value as 1964 * described below. If the translation is successful, the scanner advances 1965 * past the input that matched. 1966 * 1967 * <p> If the next token matches the <a 1968 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1969 * above then the token is converted into a <tt>short</tt> value as if by 1970 * removing all locale specific prefixes, group separators, and locale 1971 * specific suffixes, then mapping non-ASCII digits into ASCII 1972 * digits via {@link Character#digit Character.digit}, prepending a 1973 * negative sign (-) if the locale specific negative prefixes and suffixes 1974 * were present, and passing the resulting string to 1975 * {@link Short#parseShort(String, int) Short.parseShort} with the 1976 * specified radix. 1977 * 1978 * @param radix the radix used to interpret the token as a short value 1979 * @return the <tt>short</tt> scanned from the input 1980 * @throws InputMismatchException 1981 * if the next token does not match the <i>Integer</i> 1982 * regular expression, or is out of range 1983 * @throws NoSuchElementException if input is exhausted 1984 * @throws IllegalStateException if this scanner is closed 1985 */ 1986 public short nextShort(int radix) { 1987 // Check cached result 1988 if ((typeCache != null) && (typeCache instanceof Short) 1989 && this.radix == radix) { 1990 short val = ((Short)typeCache).shortValue(); 1991 useTypeCache(); 1992 return val; 1993 } 1994 setRadix(radix); 1995 clearCaches(); 1996 // Search for next short 1997 try { 1998 String s = next(integerPattern()); 1999 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2000 s = processIntegerToken(s); 2001 return Short.parseShort(s, radix); 2002 } catch (NumberFormatException nfe) { 2003 position = matcher.start(); // don't skip bad token 2004 throw new InputMismatchException(nfe.getMessage()); 2005 } 2006 } 2007 2008 /** 2009 * Returns true if the next token in this scanner's input can be 2010 * interpreted as an int value in the default radix using the 2011 * {@link #nextInt} method. The scanner does not advance past any input. 2012 * 2013 * @return true if and only if this scanner's next token is a valid 2014 * int value 2015 * @throws IllegalStateException if this scanner is closed 2016 */ 2017 public boolean hasNextInt() { 2018 return hasNextInt(defaultRadix); 2019 } 2020 2021 /** 2022 * Returns true if the next token in this scanner's input can be 2023 * interpreted as an int value in the specified radix using the 2024 * {@link #nextInt} method. The scanner does not advance past any input. 2025 * 2026 * @param radix the radix used to interpret the token as an int value 2027 * @return true if and only if this scanner's next token is a valid 2028 * int value 2029 * @throws IllegalStateException if this scanner is closed 2030 */ 2031 public boolean hasNextInt(int radix) { 2032 setRadix(radix); 2033 boolean result = hasNext(integerPattern()); 2034 if (result) { // Cache it 2035 try { 2036 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2037 processIntegerToken(hasNextResult) : 2038 hasNextResult; 2039 typeCache = Integer.parseInt(s, radix); 2040 } catch (NumberFormatException nfe) { 2041 result = false; 2042 } 2043 } 2044 return result; 2045 } 2046 2047 /** 2048 * The integer token must be stripped of prefixes, group separators, 2049 * and suffixes, non ascii digits must be converted into ascii digits 2050 * before parse will accept it. 2051 */ 2052 private String processIntegerToken(String token) { 2053 String result = token.replaceAll(""+groupSeparator, ""); 2054 boolean isNegative = false; 2055 int preLen = negativePrefix.length(); 2056 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2057 isNegative = true; 2058 result = result.substring(preLen); 2059 } 2060 int sufLen = negativeSuffix.length(); 2061 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2062 isNegative = true; 2063 result = result.substring(result.length() - sufLen, 2064 result.length()); 2065 } 2066 if (isNegative) 2067 result = "-" + result; 2068 return result; 2069 } 2070 2071 /** 2072 * Scans the next token of the input as an <tt>int</tt>. 2073 * 2074 * <p> An invocation of this method of the form 2075 * <tt>nextInt()</tt> behaves in exactly the same way as the 2076 * invocation <tt>nextInt(radix)</tt>, where <code>radix</code> 2077 * is the default radix of this scanner. 2078 * 2079 * @return the <tt>int</tt> scanned from the input 2080 * @throws InputMismatchException 2081 * if the next token does not match the <i>Integer</i> 2082 * regular expression, or is out of range 2083 * @throws NoSuchElementException if input is exhausted 2084 * @throws IllegalStateException if this scanner is closed 2085 */ 2086 public int nextInt() { 2087 return nextInt(defaultRadix); 2088 } 2089 2090 /** 2091 * Scans the next token of the input as an <tt>int</tt>. 2092 * This method will throw <code>InputMismatchException</code> 2093 * if the next token cannot be translated into a valid int value as 2094 * described below. If the translation is successful, the scanner advances 2095 * past the input that matched. 2096 * 2097 * <p> If the next token matches the <a 2098 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2099 * above then the token is converted into an <tt>int</tt> value as if by 2100 * removing all locale specific prefixes, group separators, and locale 2101 * specific suffixes, then mapping non-ASCII digits into ASCII 2102 * digits via {@link Character#digit Character.digit}, prepending a 2103 * negative sign (-) if the locale specific negative prefixes and suffixes 2104 * were present, and passing the resulting string to 2105 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2106 * specified radix. 2107 * 2108 * @param radix the radix used to interpret the token as an int value 2109 * @return the <tt>int</tt> scanned from the input 2110 * @throws InputMismatchException 2111 * if the next token does not match the <i>Integer</i> 2112 * regular expression, or is out of range 2113 * @throws NoSuchElementException if input is exhausted 2114 * @throws IllegalStateException if this scanner is closed 2115 */ 2116 public int nextInt(int radix) { 2117 // Check cached result 2118 if ((typeCache != null) && (typeCache instanceof Integer) 2119 && this.radix == radix) { 2120 int val = ((Integer)typeCache).intValue(); 2121 useTypeCache(); 2122 return val; 2123 } 2124 setRadix(radix); 2125 clearCaches(); 2126 // Search for next int 2127 try { 2128 String s = next(integerPattern()); 2129 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2130 s = processIntegerToken(s); 2131 return Integer.parseInt(s, radix); 2132 } catch (NumberFormatException nfe) { 2133 position = matcher.start(); // don't skip bad token 2134 throw new InputMismatchException(nfe.getMessage()); 2135 } 2136 } 2137 2138 /** 2139 * Returns true if the next token in this scanner's input can be 2140 * interpreted as a long value in the default radix using the 2141 * {@link #nextLong} method. The scanner does not advance past any input. 2142 * 2143 * @return true if and only if this scanner's next token is a valid 2144 * long value 2145 * @throws IllegalStateException if this scanner is closed 2146 */ 2147 public boolean hasNextLong() { 2148 return hasNextLong(defaultRadix); 2149 } 2150 2151 /** 2152 * Returns true if the next token in this scanner's input can be 2153 * interpreted as a long value in the specified radix using the 2154 * {@link #nextLong} method. The scanner does not advance past any input. 2155 * 2156 * @param radix the radix used to interpret the token as a long value 2157 * @return true if and only if this scanner's next token is a valid 2158 * long value 2159 * @throws IllegalStateException if this scanner is closed 2160 */ 2161 public boolean hasNextLong(int radix) { 2162 setRadix(radix); 2163 boolean result = hasNext(integerPattern()); 2164 if (result) { // Cache it 2165 try { 2166 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2167 processIntegerToken(hasNextResult) : 2168 hasNextResult; 2169 typeCache = Long.parseLong(s, radix); 2170 } catch (NumberFormatException nfe) { 2171 result = false; 2172 } 2173 } 2174 return result; 2175 } 2176 2177 /** 2178 * Scans the next token of the input as a <tt>long</tt>. 2179 * 2180 * <p> An invocation of this method of the form 2181 * <tt>nextLong()</tt> behaves in exactly the same way as the 2182 * invocation <tt>nextLong(radix)</tt>, where <code>radix</code> 2183 * is the default radix of this scanner. 2184 * 2185 * @return the <tt>long</tt> scanned from the input 2186 * @throws InputMismatchException 2187 * if the next token does not match the <i>Integer</i> 2188 * regular expression, or is out of range 2189 * @throws NoSuchElementException if input is exhausted 2190 * @throws IllegalStateException if this scanner is closed 2191 */ 2192 public long nextLong() { 2193 return nextLong(defaultRadix); 2194 } 2195 2196 /** 2197 * Scans the next token of the input as a <tt>long</tt>. 2198 * This method will throw <code>InputMismatchException</code> 2199 * if the next token cannot be translated into a valid long value as 2200 * described below. If the translation is successful, the scanner advances 2201 * past the input that matched. 2202 * 2203 * <p> If the next token matches the <a 2204 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2205 * above then the token is converted into a <tt>long</tt> value as if by 2206 * removing all locale specific prefixes, group separators, and locale 2207 * specific suffixes, then mapping non-ASCII digits into ASCII 2208 * digits via {@link Character#digit Character.digit}, prepending a 2209 * negative sign (-) if the locale specific negative prefixes and suffixes 2210 * were present, and passing the resulting string to 2211 * {@link Long#parseLong(String, int) Long.parseLong} with the 2212 * specified radix. 2213 * 2214 * @param radix the radix used to interpret the token as an int value 2215 * @return the <tt>long</tt> scanned from the input 2216 * @throws InputMismatchException 2217 * if the next token does not match the <i>Integer</i> 2218 * regular expression, or is out of range 2219 * @throws NoSuchElementException if input is exhausted 2220 * @throws IllegalStateException if this scanner is closed 2221 */ 2222 public long nextLong(int radix) { 2223 // Check cached result 2224 if ((typeCache != null) && (typeCache instanceof Long) 2225 && this.radix == radix) { 2226 long val = ((Long)typeCache).longValue(); 2227 useTypeCache(); 2228 return val; 2229 } 2230 setRadix(radix); 2231 clearCaches(); 2232 try { 2233 String s = next(integerPattern()); 2234 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2235 s = processIntegerToken(s); 2236 return Long.parseLong(s, radix); 2237 } catch (NumberFormatException nfe) { 2238 position = matcher.start(); // don't skip bad token 2239 throw new InputMismatchException(nfe.getMessage()); 2240 } 2241 } 2242 2243 /** 2244 * The float token must be stripped of prefixes, group separators, 2245 * and suffixes, non ascii digits must be converted into ascii digits 2246 * before parseFloat will accept it. 2247 * 2248 * If there are non-ascii digits in the token these digits must 2249 * be processed before the token is passed to parseFloat. 2250 */ 2251 private String processFloatToken(String token) { 2252 String result = token.replaceAll(groupSeparator, ""); 2253 if (!decimalSeparator.equals("\\.")) 2254 result = result.replaceAll(decimalSeparator, "."); 2255 boolean isNegative = false; 2256 int preLen = negativePrefix.length(); 2257 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2258 isNegative = true; 2259 result = result.substring(preLen); 2260 } 2261 int sufLen = negativeSuffix.length(); 2262 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2263 isNegative = true; 2264 result = result.substring(result.length() - sufLen, 2265 result.length()); 2266 } 2267 if (result.equals(nanString)) 2268 result = "NaN"; 2269 if (result.equals(infinityString)) 2270 result = "Infinity"; 2271 if (isNegative) 2272 result = "-" + result; 2273 2274 // Translate non-ASCII digits 2275 Matcher m = NON_ASCII_DIGIT.matcher(result); 2276 if (m.find()) { 2277 StringBuilder inASCII = new StringBuilder(); 2278 for (int i=0; i<result.length(); i++) { 2279 char nextChar = result.charAt(i); 2280 if (Character.isDigit(nextChar)) { 2281 int d = Character.digit(nextChar, 10); 2282 if (d != -1) 2283 inASCII.append(d); 2284 else 2285 inASCII.append(nextChar); 2286 } else { 2287 inASCII.append(nextChar); 2288 } 2289 } 2290 result = inASCII.toString(); 2291 } 2292 2293 return result; 2294 } 2295 2296 /** 2297 * Returns true if the next token in this scanner's input can be 2298 * interpreted as a float value using the {@link #nextFloat} 2299 * method. The scanner does not advance past any input. 2300 * 2301 * @return true if and only if this scanner's next token is a valid 2302 * float value 2303 * @throws IllegalStateException if this scanner is closed 2304 */ 2305 public boolean hasNextFloat() { 2306 setRadix(10); 2307 boolean result = hasNext(floatPattern()); 2308 if (result) { // Cache it 2309 try { 2310 String s = processFloatToken(hasNextResult); 2311 typeCache = Float.valueOf(Float.parseFloat(s)); 2312 } catch (NumberFormatException nfe) { 2313 result = false; 2314 } 2315 } 2316 return result; 2317 } 2318 2319 /** 2320 * Scans the next token of the input as a <tt>float</tt>. 2321 * This method will throw <code>InputMismatchException</code> 2322 * if the next token cannot be translated into a valid float value as 2323 * described below. If the translation is successful, the scanner advances 2324 * past the input that matched. 2325 * 2326 * <p> If the next token matches the <a 2327 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2328 * then the token is converted into a <tt>float</tt> value as if by 2329 * removing all locale specific prefixes, group separators, and locale 2330 * specific suffixes, then mapping non-ASCII digits into ASCII 2331 * digits via {@link Character#digit Character.digit}, prepending a 2332 * negative sign (-) if the locale specific negative prefixes and suffixes 2333 * were present, and passing the resulting string to 2334 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2335 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2336 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2337 * appropriate. 2338 * 2339 * @return the <tt>float</tt> scanned from the input 2340 * @throws InputMismatchException 2341 * if the next token does not match the <i>Float</i> 2342 * regular expression, or is out of range 2343 * @throws NoSuchElementException if input is exhausted 2344 * @throws IllegalStateException if this scanner is closed 2345 */ 2346 public float nextFloat() { 2347 // Check cached result 2348 if ((typeCache != null) && (typeCache instanceof Float)) { 2349 float val = ((Float)typeCache).floatValue(); 2350 useTypeCache(); 2351 return val; 2352 } 2353 setRadix(10); 2354 clearCaches(); 2355 try { 2356 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2357 } catch (NumberFormatException nfe) { 2358 position = matcher.start(); // don't skip bad token 2359 throw new InputMismatchException(nfe.getMessage()); 2360 } 2361 } 2362 2363 /** 2364 * Returns true if the next token in this scanner's input can be 2365 * interpreted as a double value using the {@link #nextDouble} 2366 * method. The scanner does not advance past any input. 2367 * 2368 * @return true if and only if this scanner's next token is a valid 2369 * double value 2370 * @throws IllegalStateException if this scanner is closed 2371 */ 2372 public boolean hasNextDouble() { 2373 setRadix(10); 2374 boolean result = hasNext(floatPattern()); 2375 if (result) { // Cache it 2376 try { 2377 String s = processFloatToken(hasNextResult); 2378 typeCache = Double.valueOf(Double.parseDouble(s)); 2379 } catch (NumberFormatException nfe) { 2380 result = false; 2381 } 2382 } 2383 return result; 2384 } 2385 2386 /** 2387 * Scans the next token of the input as a <tt>double</tt>. 2388 * This method will throw <code>InputMismatchException</code> 2389 * if the next token cannot be translated into a valid double value. 2390 * If the translation is successful, the scanner advances past the input 2391 * that matched. 2392 * 2393 * <p> If the next token matches the <a 2394 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2395 * then the token is converted into a <tt>double</tt> value as if by 2396 * removing all locale specific prefixes, group separators, and locale 2397 * specific suffixes, then mapping non-ASCII digits into ASCII 2398 * digits via {@link Character#digit Character.digit}, prepending a 2399 * negative sign (-) if the locale specific negative prefixes and suffixes 2400 * were present, and passing the resulting string to 2401 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2402 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2403 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2404 * appropriate. 2405 * 2406 * @return the <tt>double</tt> scanned from the input 2407 * @throws InputMismatchException 2408 * if the next token does not match the <i>Float</i> 2409 * regular expression, or is out of range 2410 * @throws NoSuchElementException if the input is exhausted 2411 * @throws IllegalStateException if this scanner is closed 2412 */ 2413 public double nextDouble() { 2414 // Check cached result 2415 if ((typeCache != null) && (typeCache instanceof Double)) { 2416 double val = ((Double)typeCache).doubleValue(); 2417 useTypeCache(); 2418 return val; 2419 } 2420 setRadix(10); 2421 clearCaches(); 2422 // Search for next float 2423 try { 2424 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2425 } catch (NumberFormatException nfe) { 2426 position = matcher.start(); // don't skip bad token 2427 throw new InputMismatchException(nfe.getMessage()); 2428 } 2429 } 2430 2431 // Convenience methods for scanning multi precision numbers 2432 2433 /** 2434 * Returns true if the next token in this scanner's input can be 2435 * interpreted as a <code>BigInteger</code> in the default radix using the 2436 * {@link #nextBigInteger} method. The scanner does not advance past any 2437 * input. 2438 * 2439 * @return true if and only if this scanner's next token is a valid 2440 * <code>BigInteger</code> 2441 * @throws IllegalStateException if this scanner is closed 2442 */ 2443 public boolean hasNextBigInteger() { 2444 return hasNextBigInteger(defaultRadix); 2445 } 2446 2447 /** 2448 * Returns true if the next token in this scanner's input can be 2449 * interpreted as a <code>BigInteger</code> in the specified radix using 2450 * the {@link #nextBigInteger} method. The scanner does not advance past 2451 * any input. 2452 * 2453 * @param radix the radix used to interpret the token as an integer 2454 * @return true if and only if this scanner's next token is a valid 2455 * <code>BigInteger</code> 2456 * @throws IllegalStateException if this scanner is closed 2457 */ 2458 public boolean hasNextBigInteger(int radix) { 2459 setRadix(radix); 2460 boolean result = hasNext(integerPattern()); 2461 if (result) { // Cache it 2462 try { 2463 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2464 processIntegerToken(hasNextResult) : 2465 hasNextResult; 2466 typeCache = new BigInteger(s, radix); 2467 } catch (NumberFormatException nfe) { 2468 result = false; 2469 } 2470 } 2471 return result; 2472 } 2473 2474 /** 2475 * Scans the next token of the input as a {@link java.math.BigInteger 2476 * BigInteger}. 2477 * 2478 * <p> An invocation of this method of the form 2479 * <tt>nextBigInteger()</tt> behaves in exactly the same way as the 2480 * invocation <tt>nextBigInteger(radix)</tt>, where <code>radix</code> 2481 * is the default radix of this scanner. 2482 * 2483 * @return the <tt>BigInteger</tt> scanned from the input 2484 * @throws InputMismatchException 2485 * if the next token does not match the <i>Integer</i> 2486 * regular expression, or is out of range 2487 * @throws NoSuchElementException if the input is exhausted 2488 * @throws IllegalStateException if this scanner is closed 2489 */ 2490 public BigInteger nextBigInteger() { 2491 return nextBigInteger(defaultRadix); 2492 } 2493 2494 /** 2495 * Scans the next token of the input as a {@link java.math.BigInteger 2496 * BigInteger}. 2497 * 2498 * <p> If the next token matches the <a 2499 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2500 * above then the token is converted into a <tt>BigInteger</tt> value as if 2501 * by removing all group separators, mapping non-ASCII digits into ASCII 2502 * digits via the {@link Character#digit Character.digit}, and passing the 2503 * resulting string to the {@link 2504 * java.math.BigInteger#BigInteger(java.lang.String) 2505 * BigInteger(String, int)} constructor with the specified radix. 2506 * 2507 * @param radix the radix used to interpret the token 2508 * @return the <tt>BigInteger</tt> scanned from the input 2509 * @throws InputMismatchException 2510 * if the next token does not match the <i>Integer</i> 2511 * regular expression, or is out of range 2512 * @throws NoSuchElementException if the input is exhausted 2513 * @throws IllegalStateException if this scanner is closed 2514 */ 2515 public BigInteger nextBigInteger(int radix) { 2516 // Check cached result 2517 if ((typeCache != null) && (typeCache instanceof BigInteger) 2518 && this.radix == radix) { 2519 BigInteger val = (BigInteger)typeCache; 2520 useTypeCache(); 2521 return val; 2522 } 2523 setRadix(radix); 2524 clearCaches(); 2525 // Search for next int 2526 try { 2527 String s = next(integerPattern()); 2528 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2529 s = processIntegerToken(s); 2530 return new BigInteger(s, radix); 2531 } catch (NumberFormatException nfe) { 2532 position = matcher.start(); // don't skip bad token 2533 throw new InputMismatchException(nfe.getMessage()); 2534 } 2535 } 2536 2537 /** 2538 * Returns true if the next token in this scanner's input can be 2539 * interpreted as a <code>BigDecimal</code> using the 2540 * {@link #nextBigDecimal} method. The scanner does not advance past any 2541 * input. 2542 * 2543 * @return true if and only if this scanner's next token is a valid 2544 * <code>BigDecimal</code> 2545 * @throws IllegalStateException if this scanner is closed 2546 */ 2547 public boolean hasNextBigDecimal() { 2548 setRadix(10); 2549 boolean result = hasNext(decimalPattern()); 2550 if (result) { // Cache it 2551 try { 2552 String s = processFloatToken(hasNextResult); 2553 typeCache = new BigDecimal(s); 2554 } catch (NumberFormatException nfe) { 2555 result = false; 2556 } 2557 } 2558 return result; 2559 } 2560 2561 /** 2562 * Scans the next token of the input as a {@link java.math.BigDecimal 2563 * BigDecimal}. 2564 * 2565 * <p> If the next token matches the <a 2566 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2567 * above then the token is converted into a <tt>BigDecimal</tt> value as if 2568 * by removing all group separators, mapping non-ASCII digits into ASCII 2569 * digits via the {@link Character#digit Character.digit}, and passing the 2570 * resulting string to the {@link 2571 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2572 * constructor. 2573 * 2574 * @return the <tt>BigDecimal</tt> scanned from the input 2575 * @throws InputMismatchException 2576 * if the next token does not match the <i>Decimal</i> 2577 * regular expression, or is out of range 2578 * @throws NoSuchElementException if the input is exhausted 2579 * @throws IllegalStateException if this scanner is closed 2580 */ 2581 public BigDecimal nextBigDecimal() { 2582 // Check cached result 2583 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2584 BigDecimal val = (BigDecimal)typeCache; 2585 useTypeCache(); 2586 return val; 2587 } 2588 setRadix(10); 2589 clearCaches(); 2590 // Search for next float 2591 try { 2592 String s = processFloatToken(next(decimalPattern())); 2593 return new BigDecimal(s); 2594 } catch (NumberFormatException nfe) { 2595 position = matcher.start(); // don't skip bad token 2596 throw new InputMismatchException(nfe.getMessage()); 2597 } 2598 } 2599 2600 /** 2601 * Resets this scanner. 2602 * 2603 * <p> Resetting a scanner discards all of its explicit state 2604 * information which may have been changed by invocations of {@link 2605 * #useDelimiter}, {@link #useLocale}, or {@link #useRadix}. 2606 * 2607 * <p> An invocation of this method of the form 2608 * <tt>scanner.reset()</tt> behaves in exactly the same way as the 2609 * invocation 2610 * 2611 * <blockquote><pre>{@code 2612 * scanner.useDelimiter("\\p{javaWhitespace}+") 2613 * .useLocale(Locale.getDefault(Locale.Category.FORMAT)) 2614 * .useRadix(10); 2615 * }</pre></blockquote> 2616 * 2617 * @return this scanner 2618 * 2619 * @since 1.6 2620 */ 2621 public Scanner reset() { 2622 delimPattern = WHITESPACE_PATTERN; 2623 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 2624 useRadix(10); 2625 clearCaches(); 2626 return this; 2627 } 2628 2629 /** 2630 * Returns a stream of delimiter-separated tokens from this scanner. The 2631 * stream contains the same tokens that would be returned, starting from 2632 * this scanner's current state, by calling the {@link #next} method 2633 * repeatedly until the {@link #hasNext} returns false. 2634 * 2635 * <p>The resulting stream is ordered, and all stream elements are 2636 * non-null. Closing the stream will close the underlying scanner. 2637 * 2638 * <p>After the {@code tokens()} method has been called, the scanner 2639 * should be considered to be under the control of the returned stream 2640 * object. Subsequent calls to any methods on this scanner other than 2641 * {@link #close} and {@link #ioException} may return undefined results or 2642 * may cause undefined effects on the returned stream. 2643 * 2644 * <p>If this scanner contains a resource that must be released, this scanner 2645 * should be closed, either by calling its {@link #close} method, or by 2646 * closing the returned stream. After the scanner has been closed, the 2647 * results of operating on the returned stream are undefined. 2648 * 2649 * <p>For example, the following code will create a list of 2650 * comma-delimited tokens from a string: 2651 * 2652 * <pre>{@code 2653 * List<String> result = new Scanner("abc,def,,ghi").useDelimiter(","). 2654 * .tokens().collect(Collectors.toList()); 2655 * }</pre> 2656 * 2657 * <p>The resulting list would contain {@code "abc"}, {@code "def"}, 2658 * the empty string, and {@code "ghi"}. 2659 * 2660 * @return the stream of token strings 2661 * @throws IllegalStateException if this scanner is closed 2662 * @since 1.9 2663 */ 2664 public Stream<String> tokens() { 2665 ensureOpen(); 2666 Stream<String> stream = StreamSupport.stream( 2667 Spliterators.spliteratorUnknownSize(this, 2668 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED), false); 2669 return stream.onClose(this::close); 2670 } 2671 2672 /** 2673 * Returns a stream of match results from this scanner. The stream 2674 * contains the same results in the same order that would be returned by 2675 * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match} 2676 * repeatedly as long as {@link #findWithinHorizon} finds matches. 2677 * 2678 * <p>The resulting stream is ordered, and all stream elements are 2679 * non-null. Closing the stream will close the underlying scanner. 2680 * 2681 * <p>After the {@code findAll()} method has been called, the scanner 2682 * should be considered to be under the control of the returned stream 2683 * object. Subsequent calls to any methods on this scanner other than 2684 * {@link #close} and {@link #ioException} may return undefined results or 2685 * may cause undefined effects on the returned stream. 2686 * 2687 * <p>If this scanner contains a resource that must be released, this scanner 2688 * should be closed, either by calling its {@link #close} method, or by 2689 * closing the returned stream. After the scanner has been closed, the 2690 * results of operating on the returned stream are undefined. 2691 * 2692 * <p>For example, the following code will read a file and return a list 2693 * of all sequences of characters consisting of seven or more Latin capital 2694 * letters: 2695 * 2696 * <pre>{@code 2697 * try (Scanner sc = new Scanner(Paths.get("input.txt"))) { 2698 * Pattern pat = Pattern.compile("[A-Z]{7,}"); 2699 * List<String> capWords = sc.findAll(pat) 2700 * .map(MatchResult::group) 2701 * .collect(Collectors.toList()); 2702 * } 2703 * }</pre> 2704 * 2705 * @apiNote 2706 * As with the {@link #findWithinHorizon} methods, this method might block 2707 * waiting for additional input, and it may buffer an unbounded amount of input 2708 * searching for a match. The offset values in the {@link MatchResult} stream 2709 * elements might not be useful; see the API Note for the {@link #match} method. 2710 * 2711 * @param pattern the pattern to be matched 2712 * @return a stream of match results 2713 * @throws NullPointerException if pattern is null 2714 * @throws IllegalStateException if this scanner is closed 2715 * @since 1.9 2716 */ 2717 public Stream<MatchResult> findAll(Pattern pattern) { 2718 Objects.requireNonNull(pattern); 2719 ensureOpen(); 2720 Stream<MatchResult> stream = StreamSupport.stream(new ScanSpliterator(pattern), false); 2721 return stream.onClose(this::close); 2722 } 2723 2724 /** 2725 * Returns a stream of match results that match the provided pattern string. 2726 * The effect is the same as the code: 2727 * 2728 * <pre>{@code 2729 * scanner.findAll(Pattern.compile(patString)) 2730 * }</pre> 2731 * 2732 * @param patString the pattern string 2733 * @return a stream of match results 2734 * @throws NullPointerException if patString is null 2735 * @throws IllegalStateException if this scanner is closed 2736 * @throws PatternSyntaxException if the regular expression's syntax is invalid 2737 * @since 1.9 2738 * @see java.util.regex.Pattern 2739 */ 2740 public Stream<MatchResult> findAll(String patString) { 2741 Objects.requireNonNull(patString); 2742 ensureOpen(); 2743 return findAll(patternCache.forName(patString)); 2744 } 2745 2746 2747 class ScanSpliterator extends Spliterators.AbstractSpliterator<MatchResult> { 2748 final Pattern pattern; 2749 2750 ScanSpliterator(Pattern pattern) { 2751 super(Long.MAX_VALUE, 2752 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2753 this.pattern = pattern; 2754 } 2755 2756 @Override 2757 public boolean tryAdvance(Consumer<? super MatchResult> cons) { 2758 while (true) { 2759 if (findPatternInBuffer(pattern, 0)) { 2760 cons.accept(matcher.toMatchResult()); 2761 return true; 2762 } 2763 if (needInput) 2764 readInput(); 2765 else 2766 return false; // reached end of input 2767 } 2768 } 2769 } 2770 2771 }