1 /* 2 * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.io.*; 29 import java.math.*; 30 import java.nio.*; 31 import java.nio.channels.*; 32 import java.nio.charset.*; 33 import java.nio.file.Path; 34 import java.nio.file.Files; 35 import java.text.*; 36 import java.text.spi.NumberFormatProvider; 37 import java.util.function.Consumer; 38 import java.util.regex.*; 39 import java.util.stream.Stream; 40 import java.util.stream.StreamSupport; 41 import sun.util.locale.provider.LocaleProviderAdapter; 42 import sun.util.locale.provider.ResourceBundleBasedAdapter; 43 44 /** 45 * A simple text scanner which can parse primitive types and strings using 46 * regular expressions. 47 * 48 * <p>A {@code Scanner} breaks its input into tokens using a 49 * delimiter pattern, which by default matches whitespace. The resulting 50 * tokens may then be converted into values of different types using the 51 * various {@code next} methods. 52 * 53 * <p>For example, this code allows a user to read a number from 54 * {@code System.in}: 55 * <blockquote><pre>{@code 56 * Scanner sc = new Scanner(System.in); 57 * int i = sc.nextInt(); 58 * }</pre></blockquote> 59 * 60 * <p>As another example, this code allows {@code long} types to be 61 * assigned from entries in a file {@code myNumbers}: 62 * <blockquote><pre>{@code 63 * Scanner sc = new Scanner(new File("myNumbers")); 64 * while (sc.hasNextLong()) { 65 * long aLong = sc.nextLong(); 66 * } 67 * }</pre></blockquote> 68 * 69 * <p>The scanner can also use delimiters other than whitespace. This 70 * example reads several items in from a string: 71 * <blockquote><pre>{@code 72 * String input = "1 fish 2 fish red fish blue fish"; 73 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 74 * System.out.println(s.nextInt()); 75 * System.out.println(s.nextInt()); 76 * System.out.println(s.next()); 77 * System.out.println(s.next()); 78 * s.close(); 79 * }</pre></blockquote> 80 * <p> 81 * prints the following output: 82 * <blockquote><pre>{@code 83 * 1 84 * 2 85 * red 86 * blue 87 * }</pre></blockquote> 88 * 89 * <p>The same output can be generated with this code, which uses a regular 90 * expression to parse all four tokens at once: 91 * <blockquote><pre>{@code 92 * String input = "1 fish 2 fish red fish blue fish"; 93 * Scanner s = new Scanner(input); 94 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 95 * MatchResult result = s.match(); 96 * for (int i=1; i<=result.groupCount(); i++) 97 * System.out.println(result.group(i)); 98 * s.close(); 99 * }</pre></blockquote> 100 * 101 * <p>The <a id="default-delimiter">default whitespace delimiter</a> used 102 * by a scanner is as recognized by {@link Character#isWhitespace(char) 103 * Character.isWhitespace()}. The {@link #reset reset()} 104 * method will reset the value of the scanner's delimiter to the default 105 * whitespace delimiter regardless of whether it was previously changed. 106 * 107 * <p>A scanning operation may block waiting for input. 108 * 109 * <p>The {@link #next} and {@link #hasNext} methods and their 110 * companion methods (such as {@link #nextInt} and 111 * {@link #hasNextInt}) first skip any input that matches the delimiter 112 * pattern, and then attempt to return the next token. Both {@code hasNext()} 113 * and {@code next()} methods may block waiting for further input. Whether a 114 * {@code hasNext()} method blocks has no connection to whether or not its 115 * associated {@code next()} method will block. The {@link #tokens} method 116 * may also block waiting for input. 117 * 118 * <p>The {@link #findInLine findInLine()}, 119 * {@link #findWithinHorizon findWithinHorizon()}, 120 * {@link #skip skip()}, and {@link #findAll findAll()} 121 * methods operate independently of the delimiter pattern. These methods will 122 * attempt to match the specified pattern with no regard to delimiters in the 123 * input and thus can be used in special circumstances where delimiters are 124 * not relevant. These methods may block waiting for more input. 125 * 126 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 127 * will not pass the token that caused the exception, so that it may be 128 * retrieved or skipped via some other method. 129 * 130 * <p>Depending upon the type of delimiting pattern, empty tokens may be 131 * returned. For example, the pattern {@code "\\s+"} will return no empty 132 * tokens since it matches multiple instances of the delimiter. The delimiting 133 * pattern {@code "\\s"} could return empty tokens since it only passes one 134 * space at a time. 135 * 136 * <p> A scanner can read text from any object which implements the {@link 137 * java.lang.Readable} interface. If an invocation of the underlying 138 * readable's {@link java.lang.Readable#read read()} method throws an {@link 139 * java.io.IOException} then the scanner assumes that the end of the input 140 * has been reached. The most recent {@code IOException} thrown by the 141 * underlying readable can be retrieved via the {@link #ioException} method. 142 * 143 * <p>When a {@code Scanner} is closed, it will close its input source 144 * if the source implements the {@link java.io.Closeable} interface. 145 * 146 * <p>A {@code Scanner} is not safe for multithreaded use without 147 * external synchronization. 148 * 149 * <p>Unless otherwise mentioned, passing a {@code null} parameter into 150 * any method of a {@code Scanner} will cause a 151 * {@code NullPointerException} to be thrown. 152 * 153 * <p>A scanner will default to interpreting numbers as decimal unless a 154 * different radix has been set by using the {@link #useRadix} method. The 155 * {@link #reset} method will reset the value of the scanner's radix to 156 * {@code 10} regardless of whether it was previously changed. 157 * 158 * <h3> <a id="localized-numbers">Localized numbers</a> </h3> 159 * 160 * <p> An instance of this class is capable of scanning numbers in the standard 161 * formats as well as in the formats of the scanner's locale. A scanner's 162 * <a id="initial-locale">initial locale </a>is the value returned by the {@link 163 * java.util.Locale#getDefault(Locale.Category) 164 * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link 165 * #useLocale useLocale()} method. The {@link #reset} method will reset the value of the 166 * scanner's locale to the initial locale regardless of whether it was 167 * previously changed. 168 * 169 * <p>The localized formats are defined in terms of the following parameters, 170 * which for a particular locale are taken from that locale's {@link 171 * java.text.DecimalFormat DecimalFormat} object, {@code df}, and its and 172 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 173 * {@code dfs}. 174 * 175 * <blockquote><dl> 176 * <dt><i>LocalGroupSeparator </i> 177 * <dd>The character used to separate thousands groups, 178 * <i>i.e.,</i> {@code dfs.}{@link 179 * java.text.DecimalFormatSymbols#getGroupingSeparator 180 * getGroupingSeparator()} 181 * <dt><i>LocalDecimalSeparator </i> 182 * <dd>The character used for the decimal point, 183 * <i>i.e.,</i> {@code dfs.}{@link 184 * java.text.DecimalFormatSymbols#getDecimalSeparator 185 * getDecimalSeparator()} 186 * <dt><i>LocalPositivePrefix </i> 187 * <dd>The string that appears before a positive number (may 188 * be empty), <i>i.e.,</i> {@code df.}{@link 189 * java.text.DecimalFormat#getPositivePrefix 190 * getPositivePrefix()} 191 * <dt><i>LocalPositiveSuffix </i> 192 * <dd>The string that appears after a positive number (may be 193 * empty), <i>i.e.,</i> {@code df.}{@link 194 * java.text.DecimalFormat#getPositiveSuffix 195 * getPositiveSuffix()} 196 * <dt><i>LocalNegativePrefix </i> 197 * <dd>The string that appears before a negative number (may 198 * be empty), <i>i.e.,</i> {@code df.}{@link 199 * java.text.DecimalFormat#getNegativePrefix 200 * getNegativePrefix()} 201 * <dt><i>LocalNegativeSuffix </i> 202 * <dd>The string that appears after a negative number (may be 203 * empty), <i>i.e.,</i> {@code df.}{@link 204 * java.text.DecimalFormat#getNegativeSuffix 205 * getNegativeSuffix()} 206 * <dt><i>LocalNaN </i> 207 * <dd>The string that represents not-a-number for 208 * floating-point values, 209 * <i>i.e.,</i> {@code dfs.}{@link 210 * java.text.DecimalFormatSymbols#getNaN 211 * getNaN()} 212 * <dt><i>LocalInfinity </i> 213 * <dd>The string that represents infinity for floating-point 214 * values, <i>i.e.,</i> {@code dfs.}{@link 215 * java.text.DecimalFormatSymbols#getInfinity 216 * getInfinity()} 217 * </dl></blockquote> 218 * 219 * <h4> <a id="number-syntax">Number syntax</a> </h4> 220 * 221 * <p> The strings that can be parsed as numbers by an instance of this class 222 * are specified in terms of the following regular-expression grammar, where 223 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 in base 10). 224 * 225 * <dl> 226 * <dt><i>NonAsciiDigit</i>: 227 * <dd>A non-ASCII character c for which 228 * {@link java.lang.Character#isDigit Character.isDigit}{@code (c)} 229 * returns true 230 * 231 * <dt><i>Non0Digit</i>: 232 * <dd>{@code [1-}<i>Rmax</i>{@code ] | }<i>NonASCIIDigit</i> 233 * 234 * <dt><i>Digit</i>: 235 * <dd>{@code [0-}<i>Rmax</i>{@code ] | }<i>NonASCIIDigit</i> 236 * 237 * <dt><i>GroupedNumeral</i>: 238 * <dd><code>( </code><i>Non0Digit</i> 239 * <i>Digit</i>{@code ? 240 * }<i>Digit</i>{@code ?} 241 * <dd> <code>( </code><i>LocalGroupSeparator</i> 242 * <i>Digit</i> 243 * <i>Digit</i> 244 * <i>Digit</i>{@code )+ )} 245 * 246 * <dt><i>Numeral</i>: 247 * <dd>{@code ( ( }<i>Digit</i>{@code + ) 248 * | }<i>GroupedNumeral</i>{@code )} 249 * 250 * <dt><a id="Integer-regex"><i>Integer</i>:</a> 251 * <dd>{@code ( [-+]? ( }<i>Numeral</i>{@code 252 * ) )} 253 * <dd>{@code | }<i>LocalPositivePrefix</i> <i>Numeral</i> 254 * <i>LocalPositiveSuffix</i> 255 * <dd>{@code | }<i>LocalNegativePrefix</i> <i>Numeral</i> 256 * <i>LocalNegativeSuffix</i> 257 * 258 * <dt><i>DecimalNumeral</i>: 259 * <dd><i>Numeral</i> 260 * <dd>{@code | }<i>Numeral</i> 261 * <i>LocalDecimalSeparator</i> 262 * <i>Digit</i>{@code *} 263 * <dd>{@code | }<i>LocalDecimalSeparator</i> 264 * <i>Digit</i>{@code +} 265 * 266 * <dt><i>Exponent</i>: 267 * <dd>{@code ( [eE] [+-]? }<i>Digit</i>{@code + )} 268 * 269 * <dt><a id="Decimal-regex"><i>Decimal</i>:</a> 270 * <dd>{@code ( [-+]? }<i>DecimalNumeral</i> 271 * <i>Exponent</i>{@code ? )} 272 * <dd>{@code | }<i>LocalPositivePrefix</i> 273 * <i>DecimalNumeral</i> 274 * <i>LocalPositiveSuffix</i> 275 * <i>Exponent</i>{@code ?} 276 * <dd>{@code | }<i>LocalNegativePrefix</i> 277 * <i>DecimalNumeral</i> 278 * <i>LocalNegativeSuffix</i> 279 * <i>Exponent</i>{@code ?} 280 * 281 * <dt><i>HexFloat</i>: 282 * <dd>{@code [-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 283 * ([pP][-+]?[0-9]+)?} 284 * 285 * <dt><i>NonNumber</i>: 286 * <dd>{@code NaN 287 * | }<i>LocalNan</i>{@code 288 * | Infinity 289 * | }<i>LocalInfinity</i> 290 * 291 * <dt><i>SignedNonNumber</i>: 292 * <dd>{@code ( [-+]? }<i>NonNumber</i>{@code )} 293 * <dd>{@code | }<i>LocalPositivePrefix</i> 294 * <i>NonNumber</i> 295 * <i>LocalPositiveSuffix</i> 296 * <dd>{@code | }<i>LocalNegativePrefix</i> 297 * <i>NonNumber</i> 298 * <i>LocalNegativeSuffix</i> 299 * 300 * <dt><a id="Float-regex"><i>Float</i></a>: 301 * <dd><i>Decimal</i> 302 * {@code | }<i>HexFloat</i> 303 * {@code | }<i>SignedNonNumber</i> 304 * 305 * </dl> 306 * <p>Whitespace is not significant in the above regular expressions. 307 * 308 * @since 1.5 309 */ 310 public final class Scanner implements Iterator<String>, Closeable, IterableOnce<String> { 311 312 // Internal buffer used to hold input 313 private CharBuffer buf; 314 315 // Size of internal character buffer 316 private static final int BUFFER_SIZE = 1024; // change to 1024; 317 318 // The index into the buffer currently held by the Scanner 319 private int position; 320 321 // Internal matcher used for finding delimiters 322 private Matcher matcher; 323 324 // Pattern used to delimit tokens 325 private Pattern delimPattern; 326 327 // Pattern found in last hasNext operation 328 private Pattern hasNextPattern; 329 330 // Position after last hasNext operation 331 private int hasNextPosition; 332 333 // Result after last hasNext operation 334 private String hasNextResult; 335 336 // The input source 337 private Readable source; 338 339 // Boolean is true if source is done 340 private boolean sourceClosed = false; 341 342 // Boolean indicating more input is required 343 private boolean needInput = false; 344 345 // Boolean indicating if a delim has been skipped this operation 346 private boolean skipped = false; 347 348 // A store of a position that the scanner may fall back to 349 private int savedScannerPosition = -1; 350 351 // A cache of the last primitive type scanned 352 private Object typeCache = null; 353 354 // Boolean indicating if a match result is available 355 private boolean matchValid = false; 356 357 // Boolean indicating if this scanner has been closed 358 private boolean closed = false; 359 360 // The current radix used by this scanner 361 private int radix = 10; 362 363 // The default radix for this scanner 364 private int defaultRadix = 10; 365 366 // The locale used by this scanner 367 private Locale locale = null; 368 369 // A cache of the last few recently used Patterns 370 private PatternLRUCache patternCache = new PatternLRUCache(7); 371 372 // A holder of the last IOException encountered 373 private IOException lastException; 374 375 // Whether this Scanner, as IterableOnce, has been consumed 376 private boolean consumed = false; 377 378 // Number of times this scanner's state has been modified. 379 // Generally incremented on most public APIs and checked 380 // within spliterator implementations. 381 int modCount; 382 383 // A pattern for java whitespace 384 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 385 "\\p{javaWhitespace}+"); 386 387 // A pattern for any token 388 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 389 390 // A pattern for non-ASCII digits 391 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 392 "[\\p{javaDigit}&&[^0-9]]"); 393 394 // Fields and methods to support scanning primitive types 395 396 /** 397 * Locale dependent values used to scan numbers 398 */ 399 private String groupSeparator = "\\,"; 400 private String decimalSeparator = "\\."; 401 private String nanString = "NaN"; 402 private String infinityString = "Infinity"; 403 private String positivePrefix = ""; 404 private String negativePrefix = "\\-"; 405 private String positiveSuffix = ""; 406 private String negativeSuffix = ""; 407 408 /** 409 * Fields and an accessor method to match booleans 410 */ 411 private static volatile Pattern boolPattern; 412 private static final String BOOLEAN_PATTERN = "true|false"; 413 private static Pattern boolPattern() { 414 Pattern bp = boolPattern; 415 if (bp == null) 416 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 417 Pattern.CASE_INSENSITIVE); 418 return bp; 419 } 420 421 /** 422 * Fields and methods to match bytes, shorts, ints, and longs 423 */ 424 private Pattern integerPattern; 425 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 426 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 427 private int SIMPLE_GROUP_INDEX = 5; 428 private String buildIntegerPatternString() { 429 String radixDigits = digits.substring(0, radix); 430 // \\p{javaDigit} is not guaranteed to be appropriate 431 // here but what can we do? The final authority will be 432 // whatever parse method is invoked, so ultimately the 433 // Scanner will do the right thing 434 String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})"; 435 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 436 groupSeparator+digit+digit+digit+")+)"; 437 // digit++ is the possessive form which is necessary for reducing 438 // backtracking that would otherwise cause unacceptable performance 439 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 440 String javaStyleInteger = "([-+]?(" + numeral + "))"; 441 String negativeInteger = negativePrefix + numeral + negativeSuffix; 442 String positiveInteger = positivePrefix + numeral + positiveSuffix; 443 return "("+ javaStyleInteger + ")|(" + 444 positiveInteger + ")|(" + 445 negativeInteger + ")"; 446 } 447 private Pattern integerPattern() { 448 if (integerPattern == null) { 449 integerPattern = patternCache.forName(buildIntegerPatternString()); 450 } 451 return integerPattern; 452 } 453 454 /** 455 * Fields and an accessor method to match line separators 456 */ 457 private static volatile Pattern separatorPattern; 458 private static volatile Pattern linePattern; 459 private static final String LINE_SEPARATOR_PATTERN = 460 "\r\n|[\n\r\u2028\u2029\u0085]"; 461 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 462 463 private static Pattern separatorPattern() { 464 Pattern sp = separatorPattern; 465 if (sp == null) 466 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 467 return sp; 468 } 469 470 private static Pattern linePattern() { 471 Pattern lp = linePattern; 472 if (lp == null) 473 linePattern = lp = Pattern.compile(LINE_PATTERN); 474 return lp; 475 } 476 477 /** 478 * Fields and methods to match floats and doubles 479 */ 480 private Pattern floatPattern; 481 private Pattern decimalPattern; 482 private void buildFloatAndDecimalPattern() { 483 // \\p{javaDigit} may not be perfect, see above 484 String digit = "([0-9]|(\\p{javaDigit}))"; 485 String exponent = "([eE][+-]?"+digit+"+)?"; 486 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 487 groupSeparator+digit+digit+digit+")+)"; 488 // Once again digit++ is used for performance, as above 489 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 490 String decimalNumeral = "("+numeral+"|"+numeral + 491 decimalSeparator + digit + "*+|"+ decimalSeparator + 492 digit + "++)"; 493 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 494 infinityString+")"; 495 String positiveFloat = "(" + positivePrefix + decimalNumeral + 496 positiveSuffix + exponent + ")"; 497 String negativeFloat = "(" + negativePrefix + decimalNumeral + 498 negativeSuffix + exponent + ")"; 499 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 500 positiveFloat + "|" + negativeFloat + ")"; 501 String hexFloat = 502 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 503 String positiveNonNumber = "(" + positivePrefix + nonNumber + 504 positiveSuffix + ")"; 505 String negativeNonNumber = "(" + negativePrefix + nonNumber + 506 negativeSuffix + ")"; 507 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 508 positiveNonNumber + "|" + 509 negativeNonNumber + ")"; 510 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 511 signedNonNumber); 512 decimalPattern = Pattern.compile(decimal); 513 } 514 private Pattern floatPattern() { 515 if (floatPattern == null) { 516 buildFloatAndDecimalPattern(); 517 } 518 return floatPattern; 519 } 520 private Pattern decimalPattern() { 521 if (decimalPattern == null) { 522 buildFloatAndDecimalPattern(); 523 } 524 return decimalPattern; 525 } 526 527 // Constructors 528 529 /** 530 * Constructs a {@code Scanner} that returns values scanned 531 * from the specified source delimited by the specified pattern. 532 * 533 * @param source A character source implementing the Readable interface 534 * @param pattern A delimiting pattern 535 */ 536 private Scanner(Readable source, Pattern pattern) { 537 assert source != null : "source should not be null"; 538 assert pattern != null : "pattern should not be null"; 539 this.source = source; 540 delimPattern = pattern; 541 buf = CharBuffer.allocate(BUFFER_SIZE); 542 buf.limit(0); 543 matcher = delimPattern.matcher(buf); 544 matcher.useTransparentBounds(true); 545 matcher.useAnchoringBounds(false); 546 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 547 } 548 549 /** 550 * Constructs a new {@code Scanner} that produces values scanned 551 * from the specified source. 552 * 553 * @param source A character source implementing the {@link Readable} 554 * interface 555 */ 556 public Scanner(Readable source) { 557 this(Objects.requireNonNull(source, "source"), WHITESPACE_PATTERN); 558 } 559 560 /** 561 * Constructs a new {@code Scanner} that produces values scanned 562 * from the specified input stream. Bytes from the stream are converted 563 * into characters using the underlying platform's 564 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 565 * 566 * @param source An input stream to be scanned 567 */ 568 public Scanner(InputStream source) { 569 this(new InputStreamReader(source), WHITESPACE_PATTERN); 570 } 571 572 /** 573 * Constructs a new {@code Scanner} that produces values scanned 574 * from the specified input stream. Bytes from the stream are converted 575 * into characters using the specified charset. 576 * 577 * @param source An input stream to be scanned 578 * @param charsetName The encoding type used to convert bytes from the 579 * stream into characters to be scanned 580 * @throws IllegalArgumentException if the specified character set 581 * does not exist 582 */ 583 public Scanner(InputStream source, String charsetName) { 584 this(source, toCharset(charsetName)); 585 } 586 587 /** 588 * Constructs a new {@code Scanner} that produces values scanned 589 * from the specified input stream. Bytes from the stream are converted 590 * into characters using the specified charset. 591 * 592 * @param source an input stream to be scanned 593 * @param charset the charset used to convert bytes from the file 594 * into characters to be scanned 595 * @since 10 596 */ 597 public Scanner(InputStream source, Charset charset) { 598 this(makeReadable(Objects.requireNonNull(source, "source"), charset), 599 WHITESPACE_PATTERN); 600 } 601 602 /** 603 * Returns a charset object for the given charset name. 604 * @throws NullPointerException is csn is null 605 * @throws IllegalArgumentException if the charset is not supported 606 */ 607 private static Charset toCharset(String csn) { 608 Objects.requireNonNull(csn, "charsetName"); 609 try { 610 return Charset.forName(csn); 611 } catch (IllegalCharsetNameException|UnsupportedCharsetException e) { 612 // IllegalArgumentException should be thrown 613 throw new IllegalArgumentException(e); 614 } 615 } 616 617 /* 618 * This method is added so that null-check on charset can be performed before 619 * creating InputStream as an existing test required it. 620 */ 621 private static Readable makeReadable(Path source, Charset charset) 622 throws IOException { 623 Objects.requireNonNull(charset, "charset"); 624 return makeReadable(Files.newInputStream(source), charset); 625 } 626 627 private static Readable makeReadable(InputStream source, Charset charset) { 628 Objects.requireNonNull(charset, "charset"); 629 return new InputStreamReader(source, charset); 630 } 631 632 /** 633 * Constructs a new {@code Scanner} that produces values scanned 634 * from the specified file. Bytes from the file are converted into 635 * characters using the underlying platform's 636 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 637 * 638 * @param source A file to be scanned 639 * @throws FileNotFoundException if source is not found 640 */ 641 public Scanner(File source) throws FileNotFoundException { 642 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 643 } 644 645 /** 646 * Constructs a new {@code Scanner} that produces values scanned 647 * from the specified file. Bytes from the file are converted into 648 * characters using the specified charset. 649 * 650 * @param source A file to be scanned 651 * @param charsetName The encoding type used to convert bytes from the file 652 * into characters to be scanned 653 * @throws FileNotFoundException if source is not found 654 * @throws IllegalArgumentException if the specified encoding is 655 * not found 656 */ 657 public Scanner(File source, String charsetName) 658 throws FileNotFoundException 659 { 660 this(Objects.requireNonNull(source), toDecoder(charsetName)); 661 } 662 663 /** 664 * Constructs a new {@code Scanner} that produces values scanned 665 * from the specified file. Bytes from the file are converted into 666 * characters using the specified charset. 667 * 668 * @param source A file to be scanned 669 * @param charset The charset used to convert bytes from the file 670 * into characters to be scanned 671 * @throws IOException 672 * if an I/O error occurs opening the source 673 * @since 10 674 */ 675 public Scanner(File source, Charset charset) throws IOException { 676 this(Objects.requireNonNull(source), charset.newDecoder()); 677 } 678 679 private Scanner(File source, CharsetDecoder dec) 680 throws FileNotFoundException 681 { 682 this(makeReadable((ReadableByteChannel)(new FileInputStream(source).getChannel()), dec)); 683 } 684 685 private static CharsetDecoder toDecoder(String charsetName) { 686 Objects.requireNonNull(charsetName, "charsetName"); 687 try { 688 return Charset.forName(charsetName).newDecoder(); 689 } catch (IllegalCharsetNameException|UnsupportedCharsetException unused) { 690 throw new IllegalArgumentException(charsetName); 691 } 692 } 693 694 private static Readable makeReadable(ReadableByteChannel source, 695 CharsetDecoder dec) { 696 return Channels.newReader(source, dec, -1); 697 } 698 699 private static Readable makeReadable(ReadableByteChannel source, 700 Charset charset) { 701 Objects.requireNonNull(charset, "charset"); 702 return Channels.newReader(source, charset); 703 } 704 705 /** 706 * Constructs a new {@code Scanner} that produces values scanned 707 * from the specified file. Bytes from the file are converted into 708 * characters using the underlying platform's 709 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 710 * 711 * @param source 712 * the path to the file to be scanned 713 * @throws IOException 714 * if an I/O error occurs opening source 715 * 716 * @since 1.7 717 */ 718 public Scanner(Path source) 719 throws IOException 720 { 721 this(Files.newInputStream(source)); 722 } 723 724 /** 725 * Constructs a new {@code Scanner} that produces values scanned 726 * from the specified file. Bytes from the file are converted into 727 * characters using the specified charset. 728 * 729 * @param source 730 * the path to the file to be scanned 731 * @param charsetName 732 * The encoding type used to convert bytes from the file 733 * into characters to be scanned 734 * @throws IOException 735 * if an I/O error occurs opening source 736 * @throws IllegalArgumentException 737 * if the specified encoding is not found 738 * @since 1.7 739 */ 740 public Scanner(Path source, String charsetName) throws IOException { 741 this(Objects.requireNonNull(source), toCharset(charsetName)); 742 } 743 744 /** 745 * Constructs a new {@code Scanner} that produces values scanned 746 * from the specified file. Bytes from the file are converted into 747 * characters using the specified charset. 748 * 749 * @param source 750 * the path to the file to be scanned 751 * @param charset 752 * the charset used to convert bytes from the file 753 * into characters to be scanned 754 * @throws IOException 755 * if an I/O error occurs opening the source 756 * @since 10 757 */ 758 public Scanner(Path source, Charset charset) throws IOException { 759 this(makeReadable(source, charset)); 760 } 761 762 /** 763 * Constructs a new {@code Scanner} that produces values scanned 764 * from the specified string. 765 * 766 * @param source A string to scan 767 */ 768 public Scanner(String source) { 769 this(new StringReader(source), WHITESPACE_PATTERN); 770 } 771 772 /** 773 * Constructs a new {@code Scanner} that produces values scanned 774 * from the specified channel. Bytes from the source are converted into 775 * characters using the underlying platform's 776 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 777 * 778 * @param source A channel to scan 779 */ 780 public Scanner(ReadableByteChannel source) { 781 this(makeReadable(Objects.requireNonNull(source, "source")), 782 WHITESPACE_PATTERN); 783 } 784 785 private static Readable makeReadable(ReadableByteChannel source) { 786 return makeReadable(source, Charset.defaultCharset().newDecoder()); 787 } 788 789 /** 790 * Constructs a new {@code Scanner} that produces values scanned 791 * from the specified channel. Bytes from the source are converted into 792 * characters using the specified charset. 793 * 794 * @param source A channel to scan 795 * @param charsetName The encoding type used to convert bytes from the 796 * channel into characters to be scanned 797 * @throws IllegalArgumentException if the specified character set 798 * does not exist 799 */ 800 public Scanner(ReadableByteChannel source, String charsetName) { 801 this(makeReadable(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), 802 WHITESPACE_PATTERN); 803 } 804 805 /** 806 * Constructs a new {@code Scanner} that produces values scanned 807 * from the specified channel. Bytes from the source are converted into 808 * characters using the specified charset. 809 * 810 * @param source a channel to scan 811 * @param charset the encoding type used to convert bytes from the 812 * channel into characters to be scanned 813 * @since 10 814 */ 815 public Scanner(ReadableByteChannel source, Charset charset) { 816 this(makeReadable(Objects.requireNonNull(source, "source"), charset), 817 WHITESPACE_PATTERN); 818 } 819 820 // Private primitives used to support scanning 821 822 private void saveState() { 823 savedScannerPosition = position; 824 } 825 826 private void revertState() { 827 this.position = savedScannerPosition; 828 savedScannerPosition = -1; 829 skipped = false; 830 } 831 832 private boolean revertState(boolean b) { 833 this.position = savedScannerPosition; 834 savedScannerPosition = -1; 835 skipped = false; 836 return b; 837 } 838 839 private void cacheResult() { 840 hasNextResult = matcher.group(); 841 hasNextPosition = matcher.end(); 842 hasNextPattern = matcher.pattern(); 843 } 844 845 private void cacheResult(String result) { 846 hasNextResult = result; 847 hasNextPosition = matcher.end(); 848 hasNextPattern = matcher.pattern(); 849 } 850 851 // Clears both regular cache and type cache 852 private void clearCaches() { 853 hasNextPattern = null; 854 typeCache = null; 855 } 856 857 // Also clears both the regular cache and the type cache 858 private String getCachedResult() { 859 position = hasNextPosition; 860 hasNextPattern = null; 861 typeCache = null; 862 return hasNextResult; 863 } 864 865 // Also clears both the regular cache and the type cache 866 private void useTypeCache() { 867 if (closed) 868 throw new IllegalStateException("Scanner closed"); 869 position = hasNextPosition; 870 hasNextPattern = null; 871 typeCache = null; 872 } 873 874 // Tries to read more input. May block. 875 private void readInput() { 876 if (buf.limit() == buf.capacity()) 877 makeSpace(); 878 // Prepare to receive data 879 int p = buf.position(); 880 buf.position(buf.limit()); 881 buf.limit(buf.capacity()); 882 883 int n = 0; 884 try { 885 n = source.read(buf); 886 } catch (IOException ioe) { 887 lastException = ioe; 888 n = -1; 889 } 890 if (n == -1) { 891 sourceClosed = true; 892 needInput = false; 893 } 894 if (n > 0) 895 needInput = false; 896 // Restore current position and limit for reading 897 buf.limit(buf.position()); 898 buf.position(p); 899 } 900 901 // After this method is called there will either be an exception 902 // or else there will be space in the buffer 903 private boolean makeSpace() { 904 clearCaches(); 905 int offset = savedScannerPosition == -1 ? 906 position : savedScannerPosition; 907 buf.position(offset); 908 // Gain space by compacting buffer 909 if (offset > 0) { 910 buf.compact(); 911 translateSavedIndexes(offset); 912 position -= offset; 913 buf.flip(); 914 return true; 915 } 916 // Gain space by growing buffer 917 int newSize = buf.capacity() * 2; 918 CharBuffer newBuf = CharBuffer.allocate(newSize); 919 newBuf.put(buf); 920 newBuf.flip(); 921 translateSavedIndexes(offset); 922 position -= offset; 923 buf = newBuf; 924 matcher.reset(buf); 925 return true; 926 } 927 928 // When a buffer compaction/reallocation occurs the saved indexes must 929 // be modified appropriately 930 private void translateSavedIndexes(int offset) { 931 if (savedScannerPosition != -1) 932 savedScannerPosition -= offset; 933 } 934 935 // If we are at the end of input then NoSuchElement; 936 // If there is still input left then InputMismatch 937 private void throwFor() { 938 skipped = false; 939 if ((sourceClosed) && (position == buf.limit())) 940 throw new NoSuchElementException(); 941 else 942 throw new InputMismatchException(); 943 } 944 945 // Returns true if a complete token or partial token is in the buffer. 946 // It is not necessary to find a complete token since a partial token 947 // means that there will be another token with or without more input. 948 private boolean hasTokenInBuffer() { 949 matchValid = false; 950 matcher.usePattern(delimPattern); 951 matcher.region(position, buf.limit()); 952 // Skip delims first 953 if (matcher.lookingAt()) { 954 if (matcher.hitEnd() && !sourceClosed) { 955 // more input might change the match of delims, in which 956 // might change whether or not if there is token left in 957 // buffer (don't update the "position" in this case) 958 needInput = true; 959 return false; 960 } 961 position = matcher.end(); 962 } 963 // If we are sitting at the end, no more tokens in buffer 964 if (position == buf.limit()) 965 return false; 966 return true; 967 } 968 969 /* 970 * Returns a "complete token" that matches the specified pattern 971 * 972 * A token is complete if surrounded by delims; a partial token 973 * is prefixed by delims but not postfixed by them 974 * 975 * The position is advanced to the end of that complete token 976 * 977 * Pattern == null means accept any token at all 978 * 979 * Triple return: 980 * 1. valid string means it was found 981 * 2. null with needInput=false means we won't ever find it 982 * 3. null with needInput=true means try again after readInput 983 */ 984 private String getCompleteTokenInBuffer(Pattern pattern) { 985 matchValid = false; 986 // Skip delims first 987 matcher.usePattern(delimPattern); 988 if (!skipped) { // Enforcing only one skip of leading delims 989 matcher.region(position, buf.limit()); 990 if (matcher.lookingAt()) { 991 // If more input could extend the delimiters then we must wait 992 // for more input 993 if (matcher.hitEnd() && !sourceClosed) { 994 needInput = true; 995 return null; 996 } 997 // The delims were whole and the matcher should skip them 998 skipped = true; 999 position = matcher.end(); 1000 } 1001 } 1002 1003 // If we are sitting at the end, no more tokens in buffer 1004 if (position == buf.limit()) { 1005 if (sourceClosed) 1006 return null; 1007 needInput = true; 1008 return null; 1009 } 1010 // Must look for next delims. Simply attempting to match the 1011 // pattern at this point may find a match but it might not be 1012 // the first longest match because of missing input, or it might 1013 // match a partial token instead of the whole thing. 1014 1015 // Then look for next delims 1016 matcher.region(position, buf.limit()); 1017 boolean foundNextDelim = matcher.find(); 1018 if (foundNextDelim && (matcher.end() == position)) { 1019 // Zero length delimiter match; we should find the next one 1020 // using the automatic advance past a zero length match; 1021 // Otherwise we have just found the same one we just skipped 1022 foundNextDelim = matcher.find(); 1023 } 1024 if (foundNextDelim) { 1025 // In the rare case that more input could cause the match 1026 // to be lost and there is more input coming we must wait 1027 // for more input. Note that hitting the end is okay as long 1028 // as the match cannot go away. It is the beginning of the 1029 // next delims we want to be sure about, we don't care if 1030 // they potentially extend further. 1031 if (matcher.requireEnd() && !sourceClosed) { 1032 needInput = true; 1033 return null; 1034 } 1035 int tokenEnd = matcher.start(); 1036 // There is a complete token. 1037 if (pattern == null) { 1038 // Must continue with match to provide valid MatchResult 1039 pattern = FIND_ANY_PATTERN; 1040 } 1041 // Attempt to match against the desired pattern 1042 matcher.usePattern(pattern); 1043 matcher.region(position, tokenEnd); 1044 if (matcher.matches()) { 1045 String s = matcher.group(); 1046 position = matcher.end(); 1047 return s; 1048 } else { // Complete token but it does not match 1049 return null; 1050 } 1051 } 1052 1053 // If we can't find the next delims but no more input is coming, 1054 // then we can treat the remainder as a whole token 1055 if (sourceClosed) { 1056 if (pattern == null) { 1057 // Must continue with match to provide valid MatchResult 1058 pattern = FIND_ANY_PATTERN; 1059 } 1060 // Last token; Match the pattern here or throw 1061 matcher.usePattern(pattern); 1062 matcher.region(position, buf.limit()); 1063 if (matcher.matches()) { 1064 String s = matcher.group(); 1065 position = matcher.end(); 1066 return s; 1067 } 1068 // Last piece does not match 1069 return null; 1070 } 1071 1072 // There is a partial token in the buffer; must read more 1073 // to complete it 1074 needInput = true; 1075 return null; 1076 } 1077 1078 // Finds the specified pattern in the buffer up to horizon. 1079 // Returns true if the specified input pattern was matched, 1080 // and leaves the matcher field with the current match state. 1081 private boolean findPatternInBuffer(Pattern pattern, int horizon) { 1082 matchValid = false; 1083 matcher.usePattern(pattern); 1084 int bufferLimit = buf.limit(); 1085 int horizonLimit = -1; 1086 int searchLimit = bufferLimit; 1087 if (horizon > 0) { 1088 horizonLimit = position + horizon; 1089 if (horizonLimit < bufferLimit) 1090 searchLimit = horizonLimit; 1091 } 1092 matcher.region(position, searchLimit); 1093 if (matcher.find()) { 1094 if (matcher.hitEnd() && (!sourceClosed)) { 1095 // The match may be longer if didn't hit horizon or real end 1096 if (searchLimit != horizonLimit) { 1097 // Hit an artificial end; try to extend the match 1098 needInput = true; 1099 return false; 1100 } 1101 // The match could go away depending on what is next 1102 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1103 // Rare case: we hit the end of input and it happens 1104 // that it is at the horizon and the end of input is 1105 // required for the match. 1106 needInput = true; 1107 return false; 1108 } 1109 } 1110 // Did not hit end, or hit real end, or hit horizon 1111 position = matcher.end(); 1112 return true; 1113 } 1114 1115 if (sourceClosed) 1116 return false; 1117 1118 // If there is no specified horizon, or if we have not searched 1119 // to the specified horizon yet, get more input 1120 if ((horizon == 0) || (searchLimit != horizonLimit)) 1121 needInput = true; 1122 return false; 1123 } 1124 1125 // Attempts to match a pattern anchored at the current position. 1126 // Returns true if the specified input pattern was matched, 1127 // and leaves the matcher field with the current match state. 1128 private boolean matchPatternInBuffer(Pattern pattern) { 1129 matchValid = false; 1130 matcher.usePattern(pattern); 1131 matcher.region(position, buf.limit()); 1132 if (matcher.lookingAt()) { 1133 if (matcher.hitEnd() && (!sourceClosed)) { 1134 // Get more input and try again 1135 needInput = true; 1136 return false; 1137 } 1138 position = matcher.end(); 1139 return true; 1140 } 1141 1142 if (sourceClosed) 1143 return false; 1144 1145 // Read more to find pattern 1146 needInput = true; 1147 return false; 1148 } 1149 1150 // Throws if the scanner is closed 1151 private void ensureOpen() { 1152 if (closed) 1153 throw new IllegalStateException("Scanner closed"); 1154 } 1155 1156 // Public methods 1157 1158 /** 1159 * Closes this scanner. 1160 * 1161 * <p> If this scanner has not yet been closed then if its underlying 1162 * {@linkplain java.lang.Readable readable} also implements the {@link 1163 * java.io.Closeable} interface then the readable's {@code close} method 1164 * will be invoked. If this scanner is already closed then invoking this 1165 * method will have no effect. 1166 * 1167 * <p>Attempting to perform search operations after a scanner has 1168 * been closed will result in an {@link IllegalStateException}. 1169 * 1170 */ 1171 public void close() { 1172 if (closed) 1173 return; 1174 if (source instanceof Closeable) { 1175 try { 1176 ((Closeable)source).close(); 1177 } catch (IOException ioe) { 1178 lastException = ioe; 1179 } 1180 } 1181 sourceClosed = true; 1182 source = null; 1183 closed = true; 1184 } 1185 1186 /** 1187 * Returns the {@code IOException} last thrown by this 1188 * {@code Scanner}'s underlying {@code Readable}. This method 1189 * returns {@code null} if no such exception exists. 1190 * 1191 * @return the last exception thrown by this scanner's readable 1192 */ 1193 public IOException ioException() { 1194 return lastException; 1195 } 1196 1197 /** 1198 * Returns the {@code Pattern} this {@code Scanner} is currently 1199 * using to match delimiters. 1200 * 1201 * @return this scanner's delimiting pattern. 1202 */ 1203 public Pattern delimiter() { 1204 return delimPattern; 1205 } 1206 1207 /** 1208 * Sets this scanner's delimiting pattern to the specified pattern. 1209 * 1210 * @param pattern A delimiting pattern 1211 * @return this scanner 1212 */ 1213 public Scanner useDelimiter(Pattern pattern) { 1214 modCount++; 1215 delimPattern = pattern; 1216 return this; 1217 } 1218 1219 /** 1220 * Sets this scanner's delimiting pattern to a pattern constructed from 1221 * the specified {@code String}. 1222 * 1223 * <p> An invocation of this method of the form 1224 * {@code useDelimiter(pattern)} behaves in exactly the same way as the 1225 * invocation {@code useDelimiter(Pattern.compile(pattern))}. 1226 * 1227 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1228 * to the <a href= "#default-delimiter">default</a>. 1229 * 1230 * @param pattern A string specifying a delimiting pattern 1231 * @return this scanner 1232 */ 1233 public Scanner useDelimiter(String pattern) { 1234 modCount++; 1235 delimPattern = patternCache.forName(pattern); 1236 return this; 1237 } 1238 1239 /** 1240 * Returns this scanner's locale. 1241 * 1242 * <p>A scanner's locale affects many elements of its default 1243 * primitive matching regular expressions; see 1244 * <a href= "#localized-numbers">localized numbers</a> above. 1245 * 1246 * @return this scanner's locale 1247 */ 1248 public Locale locale() { 1249 return this.locale; 1250 } 1251 1252 /** 1253 * Sets this scanner's locale to the specified locale. 1254 * 1255 * <p>A scanner's locale affects many elements of its default 1256 * primitive matching regular expressions; see 1257 * <a href= "#localized-numbers">localized numbers</a> above. 1258 * 1259 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1260 * the <a href= "#initial-locale">initial locale</a>. 1261 * 1262 * @param locale A string specifying the locale to use 1263 * @return this scanner 1264 */ 1265 public Scanner useLocale(Locale locale) { 1266 if (locale.equals(this.locale)) 1267 return this; 1268 1269 modCount++; 1270 this.locale = locale; 1271 1272 DecimalFormat df = null; 1273 NumberFormat nf = NumberFormat.getNumberInstance(locale); 1274 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1275 if (nf instanceof DecimalFormat) { 1276 df = (DecimalFormat) nf; 1277 } else { 1278 1279 // In case where NumberFormat.getNumberInstance() returns 1280 // other instance (non DecimalFormat) based on the provider 1281 // used and java.text.spi.NumberFormatProvider implementations, 1282 // DecimalFormat constructor is used to obtain the instance 1283 LocaleProviderAdapter adapter = LocaleProviderAdapter 1284 .getAdapter(NumberFormatProvider.class, locale); 1285 if (!(adapter instanceof ResourceBundleBasedAdapter)) { 1286 adapter = LocaleProviderAdapter.getResourceBundleBased(); 1287 } 1288 String[] all = adapter.getLocaleResources(locale) 1289 .getNumberPatterns(); 1290 df = new DecimalFormat(all[0], dfs); 1291 } 1292 1293 // These must be literalized to avoid collision with regex 1294 // metacharacters such as dot or parenthesis 1295 groupSeparator = "\\" + dfs.getGroupingSeparator(); 1296 decimalSeparator = "\\" + dfs.getDecimalSeparator(); 1297 1298 // Quoting the nonzero length locale-specific things 1299 // to avoid potential conflict with metacharacters 1300 nanString = "\\Q" + dfs.getNaN() + "\\E"; 1301 infinityString = "\\Q" + dfs.getInfinity() + "\\E"; 1302 positivePrefix = df.getPositivePrefix(); 1303 if (!positivePrefix.isEmpty()) 1304 positivePrefix = "\\Q" + positivePrefix + "\\E"; 1305 negativePrefix = df.getNegativePrefix(); 1306 if (!negativePrefix.isEmpty()) 1307 negativePrefix = "\\Q" + negativePrefix + "\\E"; 1308 positiveSuffix = df.getPositiveSuffix(); 1309 if (!positiveSuffix.isEmpty()) 1310 positiveSuffix = "\\Q" + positiveSuffix + "\\E"; 1311 negativeSuffix = df.getNegativeSuffix(); 1312 if (!negativeSuffix.isEmpty()) 1313 negativeSuffix = "\\Q" + negativeSuffix + "\\E"; 1314 1315 // Force rebuilding and recompilation of locale dependent 1316 // primitive patterns 1317 integerPattern = null; 1318 floatPattern = null; 1319 1320 return this; 1321 } 1322 1323 /** 1324 * Returns this scanner's default radix. 1325 * 1326 * <p>A scanner's radix affects elements of its default 1327 * number matching regular expressions; see 1328 * <a href= "#localized-numbers">localized numbers</a> above. 1329 * 1330 * @return the default radix of this scanner 1331 */ 1332 public int radix() { 1333 return this.defaultRadix; 1334 } 1335 1336 /** 1337 * Sets this scanner's default radix to the specified radix. 1338 * 1339 * <p>A scanner's radix affects elements of its default 1340 * number matching regular expressions; see 1341 * <a href= "#localized-numbers">localized numbers</a> above. 1342 * 1343 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1344 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1345 * {@code IllegalArgumentException} is thrown. 1346 * 1347 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1348 * {@code 10}. 1349 * 1350 * @param radix The radix to use when scanning numbers 1351 * @return this scanner 1352 * @throws IllegalArgumentException if radix is out of range 1353 */ 1354 public Scanner useRadix(int radix) { 1355 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1356 throw new IllegalArgumentException("radix:"+radix); 1357 1358 if (this.defaultRadix == radix) 1359 return this; 1360 modCount++; 1361 this.defaultRadix = radix; 1362 // Force rebuilding and recompilation of radix dependent patterns 1363 integerPattern = null; 1364 return this; 1365 } 1366 1367 // The next operation should occur in the specified radix but 1368 // the default is left untouched. 1369 private void setRadix(int radix) { 1370 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1371 throw new IllegalArgumentException("radix:"+radix); 1372 1373 if (this.radix != radix) { 1374 // Force rebuilding and recompilation of radix dependent patterns 1375 integerPattern = null; 1376 this.radix = radix; 1377 } 1378 } 1379 1380 /** 1381 * Returns the match result of the last scanning operation performed 1382 * by this scanner. This method throws {@code IllegalStateException} 1383 * if no match has been performed, or if the last match was 1384 * not successful. 1385 * 1386 * <p>The various {@code next} methods of {@code Scanner} 1387 * make a match result available if they complete without throwing an 1388 * exception. For instance, after an invocation of the {@link #nextInt} 1389 * method that returned an int, this method returns a 1390 * {@code MatchResult} for the search of the 1391 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1392 * defined above. Similarly the {@link #findInLine findInLine()}, 1393 * {@link #findWithinHorizon findWithinHorizon()}, and {@link #skip skip()} 1394 * methods will make a match available if they succeed. 1395 * 1396 * @return a match result for the last match operation 1397 * @throws IllegalStateException If no match result is available 1398 */ 1399 public MatchResult match() { 1400 if (!matchValid) 1401 throw new IllegalStateException("No match result available"); 1402 return matcher.toMatchResult(); 1403 } 1404 1405 /** 1406 * <p>Returns the string representation of this {@code Scanner}. The 1407 * string representation of a {@code Scanner} contains information 1408 * that may be useful for debugging. The exact format is unspecified. 1409 * 1410 * @return The string representation of this scanner 1411 */ 1412 public String toString() { 1413 StringBuilder sb = new StringBuilder(); 1414 sb.append("java.util.Scanner"); 1415 sb.append("[delimiters=" + delimPattern + "]"); 1416 sb.append("[position=" + position + "]"); 1417 sb.append("[match valid=" + matchValid + "]"); 1418 sb.append("[need input=" + needInput + "]"); 1419 sb.append("[source closed=" + sourceClosed + "]"); 1420 sb.append("[skipped=" + skipped + "]"); 1421 sb.append("[group separator=" + groupSeparator + "]"); 1422 sb.append("[decimal separator=" + decimalSeparator + "]"); 1423 sb.append("[positive prefix=" + positivePrefix + "]"); 1424 sb.append("[negative prefix=" + negativePrefix + "]"); 1425 sb.append("[positive suffix=" + positiveSuffix + "]"); 1426 sb.append("[negative suffix=" + negativeSuffix + "]"); 1427 sb.append("[NaN string=" + nanString + "]"); 1428 sb.append("[infinity string=" + infinityString + "]"); 1429 return sb.toString(); 1430 } 1431 1432 /** 1433 * Returns true if this scanner has another token in its input. 1434 * This method may block while waiting for input to scan. 1435 * The scanner does not advance past any input. 1436 * 1437 * @return true if and only if this scanner has another token 1438 * @throws IllegalStateException if this scanner is closed 1439 * @see java.util.Iterator 1440 */ 1441 public boolean hasNext() { 1442 ensureOpen(); 1443 saveState(); 1444 modCount++; 1445 while (!sourceClosed) { 1446 if (hasTokenInBuffer()) { 1447 return revertState(true); 1448 } 1449 readInput(); 1450 } 1451 boolean result = hasTokenInBuffer(); 1452 return revertState(result); 1453 } 1454 1455 /** 1456 * Finds and returns the next complete token from this scanner. 1457 * A complete token is preceded and followed by input that matches 1458 * the delimiter pattern. This method may block while waiting for input 1459 * to scan, even if a previous invocation of {@link #hasNext} returned 1460 * {@code true}. 1461 * 1462 * @return the next token 1463 * @throws NoSuchElementException if no more tokens are available 1464 * @throws IllegalStateException if this scanner is closed 1465 * @see java.util.Iterator 1466 */ 1467 public String next() { 1468 ensureOpen(); 1469 clearCaches(); 1470 modCount++; 1471 while (true) { 1472 String token = getCompleteTokenInBuffer(null); 1473 if (token != null) { 1474 matchValid = true; 1475 skipped = false; 1476 return token; 1477 } 1478 if (needInput) 1479 readInput(); 1480 else 1481 throwFor(); 1482 } 1483 } 1484 1485 /** 1486 * Returns this Scanner instance. This method exists to implement the 1487 * {@link IterableOnce} interface, allowing a Scanner instance to be 1488 * used in an enhanced-for ("for each") statement. This method can be 1489 * called no more than once on any Scanner instance. The second and 1490 * subsequent calls result in an exception. 1491 * 1492 * @throws IllegalStateException if this method had been called previously 1493 * @return this Scanner instance 1494 */ 1495 public Scanner iterator() { 1496 if (consumed) { 1497 throw new IllegalStateException(); 1498 } 1499 consumed = true; 1500 return this; 1501 } 1502 1503 /** 1504 * The remove operation is not supported by this implementation of 1505 * {@code Iterator}. 1506 * 1507 * @throws UnsupportedOperationException if this method is invoked. 1508 * @see java.util.Iterator 1509 */ 1510 public void remove() { 1511 throw new UnsupportedOperationException(); 1512 } 1513 1514 /** 1515 * Returns true if the next token matches the pattern constructed from the 1516 * specified string. The scanner does not advance past any input. 1517 * 1518 * <p> An invocation of this method of the form {@code hasNext(pattern)} 1519 * behaves in exactly the same way as the invocation 1520 * {@code hasNext(Pattern.compile(pattern))}. 1521 * 1522 * @param pattern a string specifying the pattern to scan 1523 * @return true if and only if this scanner has another token matching 1524 * the specified pattern 1525 * @throws IllegalStateException if this scanner is closed 1526 */ 1527 public boolean hasNext(String pattern) { 1528 return hasNext(patternCache.forName(pattern)); 1529 } 1530 1531 /** 1532 * Returns the next token if it matches the pattern constructed from the 1533 * specified string. If the match is successful, the scanner advances 1534 * past the input that matched the pattern. 1535 * 1536 * <p> An invocation of this method of the form {@code next(pattern)} 1537 * behaves in exactly the same way as the invocation 1538 * {@code next(Pattern.compile(pattern))}. 1539 * 1540 * @param pattern a string specifying the pattern to scan 1541 * @return the next token 1542 * @throws NoSuchElementException if no such tokens are available 1543 * @throws IllegalStateException if this scanner is closed 1544 */ 1545 public String next(String pattern) { 1546 return next(patternCache.forName(pattern)); 1547 } 1548 1549 /** 1550 * Returns true if the next complete token matches the specified pattern. 1551 * A complete token is prefixed and postfixed by input that matches 1552 * the delimiter pattern. This method may block while waiting for input. 1553 * The scanner does not advance past any input. 1554 * 1555 * @param pattern the pattern to scan for 1556 * @return true if and only if this scanner has another token matching 1557 * the specified pattern 1558 * @throws IllegalStateException if this scanner is closed 1559 */ 1560 public boolean hasNext(Pattern pattern) { 1561 ensureOpen(); 1562 if (pattern == null) 1563 throw new NullPointerException(); 1564 hasNextPattern = null; 1565 saveState(); 1566 modCount++; 1567 1568 while (true) { 1569 if (getCompleteTokenInBuffer(pattern) != null) { 1570 matchValid = true; 1571 cacheResult(); 1572 return revertState(true); 1573 } 1574 if (needInput) 1575 readInput(); 1576 else 1577 return revertState(false); 1578 } 1579 } 1580 1581 /** 1582 * Returns the next token if it matches the specified pattern. This 1583 * method may block while waiting for input to scan, even if a previous 1584 * invocation of {@link #hasNext(Pattern)} returned {@code true}. 1585 * If the match is successful, the scanner advances past the input that 1586 * matched the pattern. 1587 * 1588 * @param pattern the pattern to scan for 1589 * @return the next token 1590 * @throws NoSuchElementException if no more tokens are available 1591 * @throws IllegalStateException if this scanner is closed 1592 */ 1593 public String next(Pattern pattern) { 1594 ensureOpen(); 1595 if (pattern == null) 1596 throw new NullPointerException(); 1597 1598 modCount++; 1599 // Did we already find this pattern? 1600 if (hasNextPattern == pattern) 1601 return getCachedResult(); 1602 clearCaches(); 1603 1604 // Search for the pattern 1605 while (true) { 1606 String token = getCompleteTokenInBuffer(pattern); 1607 if (token != null) { 1608 matchValid = true; 1609 skipped = false; 1610 return token; 1611 } 1612 if (needInput) 1613 readInput(); 1614 else 1615 throwFor(); 1616 } 1617 } 1618 1619 /** 1620 * Returns true if there is another line in the input of this scanner. 1621 * This method may block while waiting for input. The scanner does not 1622 * advance past any input. 1623 * 1624 * @return true if and only if this scanner has another line of input 1625 * @throws IllegalStateException if this scanner is closed 1626 */ 1627 public boolean hasNextLine() { 1628 saveState(); 1629 1630 modCount++; 1631 String result = findWithinHorizon(linePattern(), 0); 1632 if (result != null) { 1633 MatchResult mr = this.match(); 1634 String lineSep = mr.group(1); 1635 if (lineSep != null) { 1636 result = result.substring(0, result.length() - 1637 lineSep.length()); 1638 cacheResult(result); 1639 1640 } else { 1641 cacheResult(); 1642 } 1643 } 1644 revertState(); 1645 return (result != null); 1646 } 1647 1648 /** 1649 * Advances this scanner past the current line and returns the input 1650 * that was skipped. 1651 * 1652 * This method returns the rest of the current line, excluding any line 1653 * separator at the end. The position is set to the beginning of the next 1654 * line. 1655 * 1656 * <p>Since this method continues to search through the input looking 1657 * for a line separator, it may buffer all of the input searching for 1658 * the line to skip if no line separators are present. 1659 * 1660 * @return the line that was skipped 1661 * @throws NoSuchElementException if no line was found 1662 * @throws IllegalStateException if this scanner is closed 1663 */ 1664 public String nextLine() { 1665 modCount++; 1666 if (hasNextPattern == linePattern()) 1667 return getCachedResult(); 1668 clearCaches(); 1669 1670 String result = findWithinHorizon(linePattern, 0); 1671 if (result == null) 1672 throw new NoSuchElementException("No line found"); 1673 MatchResult mr = this.match(); 1674 String lineSep = mr.group(1); 1675 if (lineSep != null) 1676 result = result.substring(0, result.length() - lineSep.length()); 1677 if (result == null) 1678 throw new NoSuchElementException(); 1679 else 1680 return result; 1681 } 1682 1683 // Public methods that ignore delimiters 1684 1685 /** 1686 * Attempts to find the next occurrence of a pattern constructed from the 1687 * specified string, ignoring delimiters. 1688 * 1689 * <p>An invocation of this method of the form {@code findInLine(pattern)} 1690 * behaves in exactly the same way as the invocation 1691 * {@code findInLine(Pattern.compile(pattern))}. 1692 * 1693 * @param pattern a string specifying the pattern to search for 1694 * @return the text that matched the specified pattern 1695 * @throws IllegalStateException if this scanner is closed 1696 */ 1697 public String findInLine(String pattern) { 1698 return findInLine(patternCache.forName(pattern)); 1699 } 1700 1701 /** 1702 * Attempts to find the next occurrence of the specified pattern ignoring 1703 * delimiters. If the pattern is found before the next line separator, the 1704 * scanner advances past the input that matched and returns the string that 1705 * matched the pattern. 1706 * If no such pattern is detected in the input up to the next line 1707 * separator, then {@code null} is returned and the scanner's 1708 * position is unchanged. This method may block waiting for input that 1709 * matches the pattern. 1710 * 1711 * <p>Since this method continues to search through the input looking 1712 * for the specified pattern, it may buffer all of the input searching for 1713 * the desired token if no line separators are present. 1714 * 1715 * @param pattern the pattern to scan for 1716 * @return the text that matched the specified pattern 1717 * @throws IllegalStateException if this scanner is closed 1718 */ 1719 public String findInLine(Pattern pattern) { 1720 ensureOpen(); 1721 if (pattern == null) 1722 throw new NullPointerException(); 1723 clearCaches(); 1724 modCount++; 1725 // Expand buffer to include the next newline or end of input 1726 int endPosition = 0; 1727 saveState(); 1728 while (true) { 1729 if (findPatternInBuffer(separatorPattern(), 0)) { 1730 endPosition = matcher.start(); 1731 break; // up to next newline 1732 } 1733 if (needInput) { 1734 readInput(); 1735 } else { 1736 endPosition = buf.limit(); 1737 break; // up to end of input 1738 } 1739 } 1740 revertState(); 1741 int horizonForLine = endPosition - position; 1742 // If there is nothing between the current pos and the next 1743 // newline simply return null, invoking findWithinHorizon 1744 // with "horizon=0" will scan beyond the line bound. 1745 if (horizonForLine == 0) 1746 return null; 1747 // Search for the pattern 1748 return findWithinHorizon(pattern, horizonForLine); 1749 } 1750 1751 /** 1752 * Attempts to find the next occurrence of a pattern constructed from the 1753 * specified string, ignoring delimiters. 1754 * 1755 * <p>An invocation of this method of the form 1756 * {@code findWithinHorizon(pattern)} behaves in exactly the same way as 1757 * the invocation 1758 * {@code findWithinHorizon(Pattern.compile(pattern), horizon)}. 1759 * 1760 * @param pattern a string specifying the pattern to search for 1761 * @param horizon the search horizon 1762 * @return the text that matched the specified pattern 1763 * @throws IllegalStateException if this scanner is closed 1764 * @throws IllegalArgumentException if horizon is negative 1765 */ 1766 public String findWithinHorizon(String pattern, int horizon) { 1767 return findWithinHorizon(patternCache.forName(pattern), horizon); 1768 } 1769 1770 /** 1771 * Attempts to find the next occurrence of the specified pattern. 1772 * 1773 * <p>This method searches through the input up to the specified 1774 * search horizon, ignoring delimiters. If the pattern is found the 1775 * scanner advances past the input that matched and returns the string 1776 * that matched the pattern. If no such pattern is detected then the 1777 * null is returned and the scanner's position remains unchanged. This 1778 * method may block waiting for input that matches the pattern. 1779 * 1780 * <p>A scanner will never search more than {@code horizon} code 1781 * points beyond its current position. Note that a match may be clipped 1782 * by the horizon; that is, an arbitrary match result may have been 1783 * different if the horizon had been larger. The scanner treats the 1784 * horizon as a transparent, non-anchoring bound (see {@link 1785 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1786 * 1787 * <p>If horizon is {@code 0}, then the horizon is ignored and 1788 * this method continues to search through the input looking for the 1789 * specified pattern without bound. In this case it may buffer all of 1790 * the input searching for the pattern. 1791 * 1792 * <p>If horizon is negative, then an IllegalArgumentException is 1793 * thrown. 1794 * 1795 * @param pattern the pattern to scan for 1796 * @param horizon the search horizon 1797 * @return the text that matched the specified pattern 1798 * @throws IllegalStateException if this scanner is closed 1799 * @throws IllegalArgumentException if horizon is negative 1800 */ 1801 public String findWithinHorizon(Pattern pattern, int horizon) { 1802 ensureOpen(); 1803 if (pattern == null) 1804 throw new NullPointerException(); 1805 if (horizon < 0) 1806 throw new IllegalArgumentException("horizon < 0"); 1807 clearCaches(); 1808 modCount++; 1809 1810 // Search for the pattern 1811 while (true) { 1812 if (findPatternInBuffer(pattern, horizon)) { 1813 matchValid = true; 1814 return matcher.group(); 1815 } 1816 if (needInput) 1817 readInput(); 1818 else 1819 break; // up to end of input 1820 } 1821 return null; 1822 } 1823 1824 /** 1825 * Skips input that matches the specified pattern, ignoring delimiters. 1826 * This method will skip input if an anchored match of the specified 1827 * pattern succeeds. 1828 * 1829 * <p>If a match to the specified pattern is not found at the 1830 * current position, then no input is skipped and a 1831 * {@code NoSuchElementException} is thrown. 1832 * 1833 * <p>Since this method seeks to match the specified pattern starting at 1834 * the scanner's current position, patterns that can match a lot of 1835 * input (".*", for example) may cause the scanner to buffer a large 1836 * amount of input. 1837 * 1838 * <p>Note that it is possible to skip something without risking a 1839 * {@code NoSuchElementException} by using a pattern that can 1840 * match nothing, e.g., {@code sc.skip("[ \t]*")}. 1841 * 1842 * @param pattern a string specifying the pattern to skip over 1843 * @return this scanner 1844 * @throws NoSuchElementException if the specified pattern is not found 1845 * @throws IllegalStateException if this scanner is closed 1846 */ 1847 public Scanner skip(Pattern pattern) { 1848 ensureOpen(); 1849 if (pattern == null) 1850 throw new NullPointerException(); 1851 clearCaches(); 1852 modCount++; 1853 1854 // Search for the pattern 1855 while (true) { 1856 if (matchPatternInBuffer(pattern)) { 1857 matchValid = true; 1858 position = matcher.end(); 1859 return this; 1860 } 1861 if (needInput) 1862 readInput(); 1863 else 1864 throw new NoSuchElementException(); 1865 } 1866 } 1867 1868 /** 1869 * Skips input that matches a pattern constructed from the specified 1870 * string. 1871 * 1872 * <p> An invocation of this method of the form {@code skip(pattern)} 1873 * behaves in exactly the same way as the invocation 1874 * {@code skip(Pattern.compile(pattern))}. 1875 * 1876 * @param pattern a string specifying the pattern to skip over 1877 * @return this scanner 1878 * @throws IllegalStateException if this scanner is closed 1879 */ 1880 public Scanner skip(String pattern) { 1881 return skip(patternCache.forName(pattern)); 1882 } 1883 1884 // Convenience methods for scanning primitives 1885 1886 /** 1887 * Returns true if the next token in this scanner's input can be 1888 * interpreted as a boolean value using a case insensitive pattern 1889 * created from the string "true|false". The scanner does not 1890 * advance past the input that matched. 1891 * 1892 * @return true if and only if this scanner's next token is a valid 1893 * boolean value 1894 * @throws IllegalStateException if this scanner is closed 1895 */ 1896 public boolean hasNextBoolean() { 1897 return hasNext(boolPattern()); 1898 } 1899 1900 /** 1901 * Scans the next token of the input into a boolean value and returns 1902 * that value. This method will throw {@code InputMismatchException} 1903 * if the next token cannot be translated into a valid boolean value. 1904 * If the match is successful, the scanner advances past the input that 1905 * matched. 1906 * 1907 * @return the boolean scanned from the input 1908 * @throws InputMismatchException if the next token is not a valid boolean 1909 * @throws NoSuchElementException if input is exhausted 1910 * @throws IllegalStateException if this scanner is closed 1911 */ 1912 public boolean nextBoolean() { 1913 clearCaches(); 1914 return Boolean.parseBoolean(next(boolPattern())); 1915 } 1916 1917 /** 1918 * Returns true if the next token in this scanner's input can be 1919 * interpreted as a byte value in the default radix using the 1920 * {@link #nextByte} method. The scanner does not advance past any input. 1921 * 1922 * @return true if and only if this scanner's next token is a valid 1923 * byte value 1924 * @throws IllegalStateException if this scanner is closed 1925 */ 1926 public boolean hasNextByte() { 1927 return hasNextByte(defaultRadix); 1928 } 1929 1930 /** 1931 * Returns true if the next token in this scanner's input can be 1932 * interpreted as a byte value in the specified radix using the 1933 * {@link #nextByte} method. The scanner does not advance past any input. 1934 * 1935 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1936 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 1937 * {@code IllegalArgumentException} is thrown. 1938 * 1939 * @param radix the radix used to interpret the token as a byte value 1940 * @return true if and only if this scanner's next token is a valid 1941 * byte value 1942 * @throws IllegalStateException if this scanner is closed 1943 * @throws IllegalArgumentException if the radix is out of range 1944 */ 1945 public boolean hasNextByte(int radix) { 1946 setRadix(radix); 1947 boolean result = hasNext(integerPattern()); 1948 if (result) { // Cache it 1949 try { 1950 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1951 processIntegerToken(hasNextResult) : 1952 hasNextResult; 1953 typeCache = Byte.parseByte(s, radix); 1954 } catch (NumberFormatException nfe) { 1955 result = false; 1956 } 1957 } 1958 return result; 1959 } 1960 1961 /** 1962 * Scans the next token of the input as a {@code byte}. 1963 * 1964 * <p> An invocation of this method of the form 1965 * {@code nextByte()} behaves in exactly the same way as the 1966 * invocation {@code nextByte(radix)}, where {@code radix} 1967 * is the default radix of this scanner. 1968 * 1969 * @return the {@code byte} scanned from the input 1970 * @throws InputMismatchException 1971 * if the next token does not match the <i>Integer</i> 1972 * regular expression, or is out of range 1973 * @throws NoSuchElementException if input is exhausted 1974 * @throws IllegalStateException if this scanner is closed 1975 */ 1976 public byte nextByte() { 1977 return nextByte(defaultRadix); 1978 } 1979 1980 /** 1981 * Scans the next token of the input as a {@code byte}. 1982 * This method will throw {@code InputMismatchException} 1983 * if the next token cannot be translated into a valid byte value as 1984 * described below. If the translation is successful, the scanner advances 1985 * past the input that matched. 1986 * 1987 * <p> If the next token matches the <a 1988 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1989 * above then the token is converted into a {@code byte} value as if by 1990 * removing all locale specific prefixes, group separators, and locale 1991 * specific suffixes, then mapping non-ASCII digits into ASCII 1992 * digits via {@link Character#digit Character.digit}, prepending a 1993 * negative sign (-) if the locale specific negative prefixes and suffixes 1994 * were present, and passing the resulting string to 1995 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1996 * specified radix. 1997 * 1998 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 1999 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2000 * {@code IllegalArgumentException} is thrown. 2001 * 2002 * @param radix the radix used to interpret the token as a byte value 2003 * @return the {@code byte} scanned from the input 2004 * @throws InputMismatchException 2005 * if the next token does not match the <i>Integer</i> 2006 * regular expression, or is out of range 2007 * @throws NoSuchElementException if input is exhausted 2008 * @throws IllegalStateException if this scanner is closed 2009 * @throws IllegalArgumentException if the radix is out of range 2010 */ 2011 public byte nextByte(int radix) { 2012 // Check cached result 2013 if ((typeCache != null) && (typeCache instanceof Byte) 2014 && this.radix == radix) { 2015 byte val = ((Byte)typeCache).byteValue(); 2016 useTypeCache(); 2017 return val; 2018 } 2019 setRadix(radix); 2020 clearCaches(); 2021 // Search for next byte 2022 try { 2023 String s = next(integerPattern()); 2024 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2025 s = processIntegerToken(s); 2026 return Byte.parseByte(s, radix); 2027 } catch (NumberFormatException nfe) { 2028 position = matcher.start(); // don't skip bad token 2029 throw new InputMismatchException(nfe.getMessage()); 2030 } 2031 } 2032 2033 /** 2034 * Returns true if the next token in this scanner's input can be 2035 * interpreted as a short value in the default radix using the 2036 * {@link #nextShort} method. The scanner does not advance past any input. 2037 * 2038 * @return true if and only if this scanner's next token is a valid 2039 * short value in the default radix 2040 * @throws IllegalStateException if this scanner is closed 2041 */ 2042 public boolean hasNextShort() { 2043 return hasNextShort(defaultRadix); 2044 } 2045 2046 /** 2047 * Returns true if the next token in this scanner's input can be 2048 * interpreted as a short value in the specified radix using the 2049 * {@link #nextShort} method. The scanner does not advance past any input. 2050 * 2051 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2052 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2053 * {@code IllegalArgumentException} is thrown. 2054 * 2055 * @param radix the radix used to interpret the token as a short value 2056 * @return true if and only if this scanner's next token is a valid 2057 * short value in the specified radix 2058 * @throws IllegalStateException if this scanner is closed 2059 * @throws IllegalArgumentException if the radix is out of range 2060 */ 2061 public boolean hasNextShort(int radix) { 2062 setRadix(radix); 2063 boolean result = hasNext(integerPattern()); 2064 if (result) { // Cache it 2065 try { 2066 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2067 processIntegerToken(hasNextResult) : 2068 hasNextResult; 2069 typeCache = Short.parseShort(s, radix); 2070 } catch (NumberFormatException nfe) { 2071 result = false; 2072 } 2073 } 2074 return result; 2075 } 2076 2077 /** 2078 * Scans the next token of the input as a {@code short}. 2079 * 2080 * <p> An invocation of this method of the form 2081 * {@code nextShort()} behaves in exactly the same way as the 2082 * invocation {@link #nextShort(int) nextShort(radix)}, where {@code radix} 2083 * is the default radix of this scanner. 2084 * 2085 * @return the {@code short} scanned from the input 2086 * @throws InputMismatchException 2087 * if the next token does not match the <i>Integer</i> 2088 * regular expression, or is out of range 2089 * @throws NoSuchElementException if input is exhausted 2090 * @throws IllegalStateException if this scanner is closed 2091 */ 2092 public short nextShort() { 2093 return nextShort(defaultRadix); 2094 } 2095 2096 /** 2097 * Scans the next token of the input as a {@code short}. 2098 * This method will throw {@code InputMismatchException} 2099 * if the next token cannot be translated into a valid short value as 2100 * described below. If the translation is successful, the scanner advances 2101 * past the input that matched. 2102 * 2103 * <p> If the next token matches the <a 2104 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2105 * above then the token is converted into a {@code short} value as if by 2106 * removing all locale specific prefixes, group separators, and locale 2107 * specific suffixes, then mapping non-ASCII digits into ASCII 2108 * digits via {@link Character#digit Character.digit}, prepending a 2109 * negative sign (-) if the locale specific negative prefixes and suffixes 2110 * were present, and passing the resulting string to 2111 * {@link Short#parseShort(String, int) Short.parseShort} with the 2112 * specified radix. 2113 * 2114 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2115 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2116 * {@code IllegalArgumentException} is thrown. 2117 * 2118 * @param radix the radix used to interpret the token as a short value 2119 * @return the {@code short} scanned from the input 2120 * @throws InputMismatchException 2121 * if the next token does not match the <i>Integer</i> 2122 * regular expression, or is out of range 2123 * @throws NoSuchElementException if input is exhausted 2124 * @throws IllegalStateException if this scanner is closed 2125 * @throws IllegalArgumentException if the radix is out of range 2126 */ 2127 public short nextShort(int radix) { 2128 // Check cached result 2129 if ((typeCache != null) && (typeCache instanceof Short) 2130 && this.radix == radix) { 2131 short val = ((Short)typeCache).shortValue(); 2132 useTypeCache(); 2133 return val; 2134 } 2135 setRadix(radix); 2136 clearCaches(); 2137 // Search for next short 2138 try { 2139 String s = next(integerPattern()); 2140 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2141 s = processIntegerToken(s); 2142 return Short.parseShort(s, radix); 2143 } catch (NumberFormatException nfe) { 2144 position = matcher.start(); // don't skip bad token 2145 throw new InputMismatchException(nfe.getMessage()); 2146 } 2147 } 2148 2149 /** 2150 * Returns true if the next token in this scanner's input can be 2151 * interpreted as an int value in the default radix using the 2152 * {@link #nextInt} method. The scanner does not advance past any input. 2153 * 2154 * @return true if and only if this scanner's next token is a valid 2155 * int value 2156 * @throws IllegalStateException if this scanner is closed 2157 */ 2158 public boolean hasNextInt() { 2159 return hasNextInt(defaultRadix); 2160 } 2161 2162 /** 2163 * Returns true if the next token in this scanner's input can be 2164 * interpreted as an int value in the specified radix using the 2165 * {@link #nextInt} method. The scanner does not advance past any input. 2166 * 2167 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2168 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2169 * {@code IllegalArgumentException} is thrown. 2170 * 2171 * @param radix the radix used to interpret the token as an int value 2172 * @return true if and only if this scanner's next token is a valid 2173 * int value 2174 * @throws IllegalStateException if this scanner is closed 2175 * @throws IllegalArgumentException if the radix is out of range 2176 */ 2177 public boolean hasNextInt(int radix) { 2178 setRadix(radix); 2179 boolean result = hasNext(integerPattern()); 2180 if (result) { // Cache it 2181 try { 2182 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2183 processIntegerToken(hasNextResult) : 2184 hasNextResult; 2185 typeCache = Integer.parseInt(s, radix); 2186 } catch (NumberFormatException nfe) { 2187 result = false; 2188 } 2189 } 2190 return result; 2191 } 2192 2193 /** 2194 * The integer token must be stripped of prefixes, group separators, 2195 * and suffixes, non ascii digits must be converted into ascii digits 2196 * before parse will accept it. 2197 */ 2198 private String processIntegerToken(String token) { 2199 String result = token.replaceAll(""+groupSeparator, ""); 2200 boolean isNegative = false; 2201 int preLen = negativePrefix.length(); 2202 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2203 isNegative = true; 2204 result = result.substring(preLen); 2205 } 2206 int sufLen = negativeSuffix.length(); 2207 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2208 isNegative = true; 2209 result = result.substring(result.length() - sufLen, 2210 result.length()); 2211 } 2212 if (isNegative) 2213 result = "-" + result; 2214 return result; 2215 } 2216 2217 /** 2218 * Scans the next token of the input as an {@code int}. 2219 * 2220 * <p> An invocation of this method of the form 2221 * {@code nextInt()} behaves in exactly the same way as the 2222 * invocation {@code nextInt(radix)}, where {@code radix} 2223 * is the default radix of this scanner. 2224 * 2225 * @return the {@code int} scanned from the input 2226 * @throws InputMismatchException 2227 * if the next token does not match the <i>Integer</i> 2228 * regular expression, or is out of range 2229 * @throws NoSuchElementException if input is exhausted 2230 * @throws IllegalStateException if this scanner is closed 2231 */ 2232 public int nextInt() { 2233 return nextInt(defaultRadix); 2234 } 2235 2236 /** 2237 * Scans the next token of the input as an {@code int}. 2238 * This method will throw {@code InputMismatchException} 2239 * if the next token cannot be translated into a valid int value as 2240 * described below. If the translation is successful, the scanner advances 2241 * past the input that matched. 2242 * 2243 * <p> If the next token matches the <a 2244 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2245 * above then the token is converted into an {@code int} value as if by 2246 * removing all locale specific prefixes, group separators, and locale 2247 * specific suffixes, then mapping non-ASCII digits into ASCII 2248 * digits via {@link Character#digit Character.digit}, prepending a 2249 * negative sign (-) if the locale specific negative prefixes and suffixes 2250 * were present, and passing the resulting string to 2251 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2252 * specified radix. 2253 * 2254 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2255 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2256 * {@code IllegalArgumentException} is thrown. 2257 * 2258 * @param radix the radix used to interpret the token as an int value 2259 * @return the {@code int} scanned from the input 2260 * @throws InputMismatchException 2261 * if the next token does not match the <i>Integer</i> 2262 * regular expression, or is out of range 2263 * @throws NoSuchElementException if input is exhausted 2264 * @throws IllegalStateException if this scanner is closed 2265 * @throws IllegalArgumentException if the radix is out of range 2266 */ 2267 public int nextInt(int radix) { 2268 // Check cached result 2269 if ((typeCache != null) && (typeCache instanceof Integer) 2270 && this.radix == radix) { 2271 int val = ((Integer)typeCache).intValue(); 2272 useTypeCache(); 2273 return val; 2274 } 2275 setRadix(radix); 2276 clearCaches(); 2277 // Search for next int 2278 try { 2279 String s = next(integerPattern()); 2280 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2281 s = processIntegerToken(s); 2282 return Integer.parseInt(s, radix); 2283 } catch (NumberFormatException nfe) { 2284 position = matcher.start(); // don't skip bad token 2285 throw new InputMismatchException(nfe.getMessage()); 2286 } 2287 } 2288 2289 /** 2290 * Returns true if the next token in this scanner's input can be 2291 * interpreted as a long value in the default radix using the 2292 * {@link #nextLong} method. The scanner does not advance past any input. 2293 * 2294 * @return true if and only if this scanner's next token is a valid 2295 * long value 2296 * @throws IllegalStateException if this scanner is closed 2297 */ 2298 public boolean hasNextLong() { 2299 return hasNextLong(defaultRadix); 2300 } 2301 2302 /** 2303 * Returns true if the next token in this scanner's input can be 2304 * interpreted as a long value in the specified radix using the 2305 * {@link #nextLong} method. The scanner does not advance past any input. 2306 * 2307 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2308 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2309 * {@code IllegalArgumentException} is thrown. 2310 * 2311 * @param radix the radix used to interpret the token as a long value 2312 * @return true if and only if this scanner's next token is a valid 2313 * long value 2314 * @throws IllegalStateException if this scanner is closed 2315 * @throws IllegalArgumentException if the radix is out of range 2316 */ 2317 public boolean hasNextLong(int radix) { 2318 setRadix(radix); 2319 boolean result = hasNext(integerPattern()); 2320 if (result) { // Cache it 2321 try { 2322 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2323 processIntegerToken(hasNextResult) : 2324 hasNextResult; 2325 typeCache = Long.parseLong(s, radix); 2326 } catch (NumberFormatException nfe) { 2327 result = false; 2328 } 2329 } 2330 return result; 2331 } 2332 2333 /** 2334 * Scans the next token of the input as a {@code long}. 2335 * 2336 * <p> An invocation of this method of the form 2337 * {@code nextLong()} behaves in exactly the same way as the 2338 * invocation {@code nextLong(radix)}, where {@code radix} 2339 * is the default radix of this scanner. 2340 * 2341 * @return the {@code long} scanned from the input 2342 * @throws InputMismatchException 2343 * if the next token does not match the <i>Integer</i> 2344 * regular expression, or is out of range 2345 * @throws NoSuchElementException if input is exhausted 2346 * @throws IllegalStateException if this scanner is closed 2347 */ 2348 public long nextLong() { 2349 return nextLong(defaultRadix); 2350 } 2351 2352 /** 2353 * Scans the next token of the input as a {@code long}. 2354 * This method will throw {@code InputMismatchException} 2355 * if the next token cannot be translated into a valid long value as 2356 * described below. If the translation is successful, the scanner advances 2357 * past the input that matched. 2358 * 2359 * <p> If the next token matches the <a 2360 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2361 * above then the token is converted into a {@code long} value as if by 2362 * removing all locale specific prefixes, group separators, and locale 2363 * specific suffixes, then mapping non-ASCII digits into ASCII 2364 * digits via {@link Character#digit Character.digit}, prepending a 2365 * negative sign (-) if the locale specific negative prefixes and suffixes 2366 * were present, and passing the resulting string to 2367 * {@link Long#parseLong(String, int) Long.parseLong} with the 2368 * specified radix. 2369 * 2370 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2371 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2372 * {@code IllegalArgumentException} is thrown. 2373 * 2374 * @param radix the radix used to interpret the token as an int value 2375 * @return the {@code long} scanned from the input 2376 * @throws InputMismatchException 2377 * if the next token does not match the <i>Integer</i> 2378 * regular expression, or is out of range 2379 * @throws NoSuchElementException if input is exhausted 2380 * @throws IllegalStateException if this scanner is closed 2381 * @throws IllegalArgumentException if the radix is out of range 2382 */ 2383 public long nextLong(int radix) { 2384 // Check cached result 2385 if ((typeCache != null) && (typeCache instanceof Long) 2386 && this.radix == radix) { 2387 long val = ((Long)typeCache).longValue(); 2388 useTypeCache(); 2389 return val; 2390 } 2391 setRadix(radix); 2392 clearCaches(); 2393 try { 2394 String s = next(integerPattern()); 2395 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2396 s = processIntegerToken(s); 2397 return Long.parseLong(s, radix); 2398 } catch (NumberFormatException nfe) { 2399 position = matcher.start(); // don't skip bad token 2400 throw new InputMismatchException(nfe.getMessage()); 2401 } 2402 } 2403 2404 /** 2405 * The float token must be stripped of prefixes, group separators, 2406 * and suffixes, non ascii digits must be converted into ascii digits 2407 * before parseFloat will accept it. 2408 * 2409 * If there are non-ascii digits in the token these digits must 2410 * be processed before the token is passed to parseFloat. 2411 */ 2412 private String processFloatToken(String token) { 2413 String result = token.replaceAll(groupSeparator, ""); 2414 if (!decimalSeparator.equals("\\.")) 2415 result = result.replaceAll(decimalSeparator, "."); 2416 boolean isNegative = false; 2417 int preLen = negativePrefix.length(); 2418 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2419 isNegative = true; 2420 result = result.substring(preLen); 2421 } 2422 int sufLen = negativeSuffix.length(); 2423 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2424 isNegative = true; 2425 result = result.substring(result.length() - sufLen, 2426 result.length()); 2427 } 2428 if (result.equals(nanString)) 2429 result = "NaN"; 2430 if (result.equals(infinityString)) 2431 result = "Infinity"; 2432 if (isNegative) 2433 result = "-" + result; 2434 2435 // Translate non-ASCII digits 2436 Matcher m = NON_ASCII_DIGIT.matcher(result); 2437 if (m.find()) { 2438 StringBuilder inASCII = new StringBuilder(); 2439 for (int i=0; i<result.length(); i++) { 2440 char nextChar = result.charAt(i); 2441 if (Character.isDigit(nextChar)) { 2442 int d = Character.digit(nextChar, 10); 2443 if (d != -1) 2444 inASCII.append(d); 2445 else 2446 inASCII.append(nextChar); 2447 } else { 2448 inASCII.append(nextChar); 2449 } 2450 } 2451 result = inASCII.toString(); 2452 } 2453 2454 return result; 2455 } 2456 2457 /** 2458 * Returns true if the next token in this scanner's input can be 2459 * interpreted as a float value using the {@link #nextFloat} 2460 * method. The scanner does not advance past any input. 2461 * 2462 * @return true if and only if this scanner's next token is a valid 2463 * float value 2464 * @throws IllegalStateException if this scanner is closed 2465 */ 2466 public boolean hasNextFloat() { 2467 setRadix(10); 2468 boolean result = hasNext(floatPattern()); 2469 if (result) { // Cache it 2470 try { 2471 String s = processFloatToken(hasNextResult); 2472 typeCache = Float.valueOf(Float.parseFloat(s)); 2473 } catch (NumberFormatException nfe) { 2474 result = false; 2475 } 2476 } 2477 return result; 2478 } 2479 2480 /** 2481 * Scans the next token of the input as a {@code float}. 2482 * This method will throw {@code InputMismatchException} 2483 * if the next token cannot be translated into a valid float value as 2484 * described below. If the translation is successful, the scanner advances 2485 * past the input that matched. 2486 * 2487 * <p> If the next token matches the <a 2488 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2489 * then the token is converted into a {@code float} value as if by 2490 * removing all locale specific prefixes, group separators, and locale 2491 * specific suffixes, then mapping non-ASCII digits into ASCII 2492 * digits via {@link Character#digit Character.digit}, prepending a 2493 * negative sign (-) if the locale specific negative prefixes and suffixes 2494 * were present, and passing the resulting string to 2495 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2496 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2497 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2498 * appropriate. 2499 * 2500 * @return the {@code float} scanned from the input 2501 * @throws InputMismatchException 2502 * if the next token does not match the <i>Float</i> 2503 * regular expression, or is out of range 2504 * @throws NoSuchElementException if input is exhausted 2505 * @throws IllegalStateException if this scanner is closed 2506 */ 2507 public float nextFloat() { 2508 // Check cached result 2509 if ((typeCache != null) && (typeCache instanceof Float)) { 2510 float val = ((Float)typeCache).floatValue(); 2511 useTypeCache(); 2512 return val; 2513 } 2514 setRadix(10); 2515 clearCaches(); 2516 try { 2517 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2518 } catch (NumberFormatException nfe) { 2519 position = matcher.start(); // don't skip bad token 2520 throw new InputMismatchException(nfe.getMessage()); 2521 } 2522 } 2523 2524 /** 2525 * Returns true if the next token in this scanner's input can be 2526 * interpreted as a double value using the {@link #nextDouble} 2527 * method. The scanner does not advance past any input. 2528 * 2529 * @return true if and only if this scanner's next token is a valid 2530 * double value 2531 * @throws IllegalStateException if this scanner is closed 2532 */ 2533 public boolean hasNextDouble() { 2534 setRadix(10); 2535 boolean result = hasNext(floatPattern()); 2536 if (result) { // Cache it 2537 try { 2538 String s = processFloatToken(hasNextResult); 2539 typeCache = Double.valueOf(Double.parseDouble(s)); 2540 } catch (NumberFormatException nfe) { 2541 result = false; 2542 } 2543 } 2544 return result; 2545 } 2546 2547 /** 2548 * Scans the next token of the input as a {@code double}. 2549 * This method will throw {@code InputMismatchException} 2550 * if the next token cannot be translated into a valid double value. 2551 * If the translation is successful, the scanner advances past the input 2552 * that matched. 2553 * 2554 * <p> If the next token matches the <a 2555 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2556 * then the token is converted into a {@code double} value as if by 2557 * removing all locale specific prefixes, group separators, and locale 2558 * specific suffixes, then mapping non-ASCII digits into ASCII 2559 * digits via {@link Character#digit Character.digit}, prepending a 2560 * negative sign (-) if the locale specific negative prefixes and suffixes 2561 * were present, and passing the resulting string to 2562 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2563 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2564 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2565 * appropriate. 2566 * 2567 * @return the {@code double} scanned from the input 2568 * @throws InputMismatchException 2569 * if the next token does not match the <i>Float</i> 2570 * regular expression, or is out of range 2571 * @throws NoSuchElementException if the input is exhausted 2572 * @throws IllegalStateException if this scanner is closed 2573 */ 2574 public double nextDouble() { 2575 // Check cached result 2576 if ((typeCache != null) && (typeCache instanceof Double)) { 2577 double val = ((Double)typeCache).doubleValue(); 2578 useTypeCache(); 2579 return val; 2580 } 2581 setRadix(10); 2582 clearCaches(); 2583 // Search for next float 2584 try { 2585 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2586 } catch (NumberFormatException nfe) { 2587 position = matcher.start(); // don't skip bad token 2588 throw new InputMismatchException(nfe.getMessage()); 2589 } 2590 } 2591 2592 // Convenience methods for scanning multi precision numbers 2593 2594 /** 2595 * Returns true if the next token in this scanner's input can be 2596 * interpreted as a {@code BigInteger} in the default radix using the 2597 * {@link #nextBigInteger} method. The scanner does not advance past any 2598 * input. 2599 * 2600 * @return true if and only if this scanner's next token is a valid 2601 * {@code BigInteger} 2602 * @throws IllegalStateException if this scanner is closed 2603 */ 2604 public boolean hasNextBigInteger() { 2605 return hasNextBigInteger(defaultRadix); 2606 } 2607 2608 /** 2609 * Returns true if the next token in this scanner's input can be 2610 * interpreted as a {@code BigInteger} in the specified radix using 2611 * the {@link #nextBigInteger} method. The scanner does not advance past 2612 * any input. 2613 * 2614 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2615 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2616 * {@code IllegalArgumentException} is thrown. 2617 * 2618 * @param radix the radix used to interpret the token as an integer 2619 * @return true if and only if this scanner's next token is a valid 2620 * {@code BigInteger} 2621 * @throws IllegalStateException if this scanner is closed 2622 * @throws IllegalArgumentException if the radix is out of range 2623 */ 2624 public boolean hasNextBigInteger(int radix) { 2625 setRadix(radix); 2626 boolean result = hasNext(integerPattern()); 2627 if (result) { // Cache it 2628 try { 2629 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2630 processIntegerToken(hasNextResult) : 2631 hasNextResult; 2632 typeCache = new BigInteger(s, radix); 2633 } catch (NumberFormatException nfe) { 2634 result = false; 2635 } 2636 } 2637 return result; 2638 } 2639 2640 /** 2641 * Scans the next token of the input as a {@link java.math.BigInteger 2642 * BigInteger}. 2643 * 2644 * <p> An invocation of this method of the form 2645 * {@code nextBigInteger()} behaves in exactly the same way as the 2646 * invocation {@code nextBigInteger(radix)}, where {@code radix} 2647 * is the default radix of this scanner. 2648 * 2649 * @return the {@code BigInteger} scanned from the input 2650 * @throws InputMismatchException 2651 * if the next token does not match the <i>Integer</i> 2652 * regular expression, or is out of range 2653 * @throws NoSuchElementException if the input is exhausted 2654 * @throws IllegalStateException if this scanner is closed 2655 */ 2656 public BigInteger nextBigInteger() { 2657 return nextBigInteger(defaultRadix); 2658 } 2659 2660 /** 2661 * Scans the next token of the input as a {@link java.math.BigInteger 2662 * BigInteger}. 2663 * 2664 * <p> If the next token matches the <a 2665 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2666 * above then the token is converted into a {@code BigInteger} value as if 2667 * by removing all group separators, mapping non-ASCII digits into ASCII 2668 * digits via the {@link Character#digit Character.digit}, and passing the 2669 * resulting string to the {@link 2670 * java.math.BigInteger#BigInteger(java.lang.String) 2671 * BigInteger(String, int)} constructor with the specified radix. 2672 * 2673 * <p>If the radix is less than {@link Character#MIN_RADIX Character.MIN_RADIX} 2674 * or greater than {@link Character#MAX_RADIX Character.MAX_RADIX}, then an 2675 * {@code IllegalArgumentException} is thrown. 2676 * 2677 * @param radix the radix used to interpret the token 2678 * @return the {@code BigInteger} scanned from the input 2679 * @throws InputMismatchException 2680 * if the next token does not match the <i>Integer</i> 2681 * regular expression, or is out of range 2682 * @throws NoSuchElementException if the input is exhausted 2683 * @throws IllegalStateException if this scanner is closed 2684 * @throws IllegalArgumentException if the radix is out of range 2685 */ 2686 public BigInteger nextBigInteger(int radix) { 2687 // Check cached result 2688 if ((typeCache != null) && (typeCache instanceof BigInteger) 2689 && this.radix == radix) { 2690 BigInteger val = (BigInteger)typeCache; 2691 useTypeCache(); 2692 return val; 2693 } 2694 setRadix(radix); 2695 clearCaches(); 2696 // Search for next int 2697 try { 2698 String s = next(integerPattern()); 2699 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2700 s = processIntegerToken(s); 2701 return new BigInteger(s, radix); 2702 } catch (NumberFormatException nfe) { 2703 position = matcher.start(); // don't skip bad token 2704 throw new InputMismatchException(nfe.getMessage()); 2705 } 2706 } 2707 2708 /** 2709 * Returns true if the next token in this scanner's input can be 2710 * interpreted as a {@code BigDecimal} using the 2711 * {@link #nextBigDecimal} method. The scanner does not advance past any 2712 * input. 2713 * 2714 * @return true if and only if this scanner's next token is a valid 2715 * {@code BigDecimal} 2716 * @throws IllegalStateException if this scanner is closed 2717 */ 2718 public boolean hasNextBigDecimal() { 2719 setRadix(10); 2720 boolean result = hasNext(decimalPattern()); 2721 if (result) { // Cache it 2722 try { 2723 String s = processFloatToken(hasNextResult); 2724 typeCache = new BigDecimal(s); 2725 } catch (NumberFormatException nfe) { 2726 result = false; 2727 } 2728 } 2729 return result; 2730 } 2731 2732 /** 2733 * Scans the next token of the input as a {@link java.math.BigDecimal 2734 * BigDecimal}. 2735 * 2736 * <p> If the next token matches the <a 2737 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2738 * above then the token is converted into a {@code BigDecimal} value as if 2739 * by removing all group separators, mapping non-ASCII digits into ASCII 2740 * digits via the {@link Character#digit Character.digit}, and passing the 2741 * resulting string to the {@link 2742 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2743 * constructor. 2744 * 2745 * @return the {@code BigDecimal} scanned from the input 2746 * @throws InputMismatchException 2747 * if the next token does not match the <i>Decimal</i> 2748 * regular expression, or is out of range 2749 * @throws NoSuchElementException if the input is exhausted 2750 * @throws IllegalStateException if this scanner is closed 2751 */ 2752 public BigDecimal nextBigDecimal() { 2753 // Check cached result 2754 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2755 BigDecimal val = (BigDecimal)typeCache; 2756 useTypeCache(); 2757 return val; 2758 } 2759 setRadix(10); 2760 clearCaches(); 2761 // Search for next float 2762 try { 2763 String s = processFloatToken(next(decimalPattern())); 2764 return new BigDecimal(s); 2765 } catch (NumberFormatException nfe) { 2766 position = matcher.start(); // don't skip bad token 2767 throw new InputMismatchException(nfe.getMessage()); 2768 } 2769 } 2770 2771 /** 2772 * Resets this scanner. 2773 * 2774 * <p> Resetting a scanner discards all of its explicit state 2775 * information which may have been changed by invocations of 2776 * {@link #useDelimiter useDelimiter()}, 2777 * {@link #useLocale useLocale()}, or 2778 * {@link #useRadix useRadix()}. 2779 * 2780 * <p> An invocation of this method of the form 2781 * {@code scanner.reset()} behaves in exactly the same way as the 2782 * invocation 2783 * 2784 * <blockquote><pre>{@code 2785 * scanner.useDelimiter("\\p{javaWhitespace}+") 2786 * .useLocale(Locale.getDefault(Locale.Category.FORMAT)) 2787 * .useRadix(10); 2788 * }</pre></blockquote> 2789 * 2790 * @return this scanner 2791 * 2792 * @since 1.6 2793 */ 2794 public Scanner reset() { 2795 delimPattern = WHITESPACE_PATTERN; 2796 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 2797 useRadix(10); 2798 clearCaches(); 2799 modCount++; 2800 return this; 2801 } 2802 2803 /** 2804 * Returns a stream of delimiter-separated tokens from this scanner. The 2805 * stream contains the same tokens that would be returned, starting from 2806 * this scanner's current state, by calling the {@link #next} method 2807 * repeatedly until the {@link #hasNext} method returns false. 2808 * 2809 * <p>The resulting stream is sequential and ordered. All stream elements are 2810 * non-null. 2811 * 2812 * <p>Scanning starts upon initiation of the terminal stream operation, using the 2813 * current state of this scanner. Subsequent calls to any methods on this scanner 2814 * other than {@link #close} and {@link #ioException} may return undefined results 2815 * or may cause undefined effects on the returned stream. The returned stream's source 2816 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a 2817 * {@link java.util.ConcurrentModificationException} if any such calls are detected 2818 * during stream pipeline execution. 2819 * 2820 * <p>After stream pipeline execution completes, this scanner is left in an indeterminate 2821 * state and cannot be reused. 2822 * 2823 * <p>If this scanner contains a resource that must be released, this scanner 2824 * should be closed, either by calling its {@link #close} method, or by 2825 * closing the returned stream. Closing the stream will close the underlying scanner. 2826 * {@code IllegalStateException} is thrown if the scanner has been closed when this 2827 * method is called, or if this scanner is closed during stream pipeline execution. 2828 * 2829 * <p>This method might block waiting for more input. 2830 * 2831 * @apiNote 2832 * For example, the following code will create a list of 2833 * comma-delimited tokens from a string: 2834 * 2835 * <pre>{@code 2836 * List<String> result = new Scanner("abc,def,,ghi") 2837 * .useDelimiter(",") 2838 * .tokens() 2839 * .collect(Collectors.toList()); 2840 * }</pre> 2841 * 2842 * <p>The resulting list would contain {@code "abc"}, {@code "def"}, 2843 * the empty string, and {@code "ghi"}. 2844 * 2845 * @return a sequential stream of token strings 2846 * @throws IllegalStateException if this scanner is closed 2847 * @since 9 2848 */ 2849 public Stream<String> tokens() { 2850 ensureOpen(); 2851 Stream<String> stream = StreamSupport.stream(new TokenSpliterator(), false); 2852 return stream.onClose(this::close); 2853 } 2854 2855 class TokenSpliterator extends Spliterators.AbstractSpliterator<String> { 2856 int expectedCount = -1; 2857 2858 TokenSpliterator() { 2859 super(Long.MAX_VALUE, 2860 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2861 } 2862 2863 @Override 2864 public boolean tryAdvance(Consumer<? super String> cons) { 2865 if (expectedCount >= 0 && expectedCount != modCount) { 2866 throw new ConcurrentModificationException(); 2867 } 2868 2869 if (hasNext()) { 2870 String token = next(); 2871 expectedCount = modCount; 2872 cons.accept(token); 2873 if (expectedCount != modCount) { 2874 throw new ConcurrentModificationException(); 2875 } 2876 return true; 2877 } else { 2878 expectedCount = modCount; 2879 return false; 2880 } 2881 } 2882 } 2883 2884 /** 2885 * Returns a stream of match results from this scanner. The stream 2886 * contains the same results in the same order that would be returned by 2887 * calling {@code findWithinHorizon(pattern, 0)} and then {@link #match} 2888 * successively as long as {@link #findWithinHorizon findWithinHorizon()} 2889 * finds matches. 2890 * 2891 * <p>The resulting stream is sequential and ordered. All stream elements are 2892 * non-null. 2893 * 2894 * <p>Scanning starts upon initiation of the terminal stream operation, using the 2895 * current state of this scanner. Subsequent calls to any methods on this scanner 2896 * other than {@link #close} and {@link #ioException} may return undefined results 2897 * or may cause undefined effects on the returned stream. The returned stream's source 2898 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort basis, throw a 2899 * {@link java.util.ConcurrentModificationException} if any such calls are detected 2900 * during stream pipeline execution. 2901 * 2902 * <p>After stream pipeline execution completes, this scanner is left in an indeterminate 2903 * state and cannot be reused. 2904 * 2905 * <p>If this scanner contains a resource that must be released, this scanner 2906 * should be closed, either by calling its {@link #close} method, or by 2907 * closing the returned stream. Closing the stream will close the underlying scanner. 2908 * {@code IllegalStateException} is thrown if the scanner has been closed when this 2909 * method is called, or if this scanner is closed during stream pipeline execution. 2910 * 2911 * <p>As with the {@link #findWithinHorizon findWithinHorizon()} methods, this method 2912 * might block waiting for additional input, and it might buffer an unbounded amount of 2913 * input searching for a match. 2914 * 2915 * @apiNote 2916 * For example, the following code will read a file and return a list 2917 * of all sequences of characters consisting of seven or more Latin capital 2918 * letters: 2919 * 2920 * <pre>{@code 2921 * try (Scanner sc = new Scanner(Path.of("input.txt"))) { 2922 * Pattern pat = Pattern.compile("[A-Z]{7,}"); 2923 * List<String> capWords = sc.findAll(pat) 2924 * .map(MatchResult::group) 2925 * .collect(Collectors.toList()); 2926 * } 2927 * }</pre> 2928 * 2929 * @param pattern the pattern to be matched 2930 * @return a sequential stream of match results 2931 * @throws NullPointerException if pattern is null 2932 * @throws IllegalStateException if this scanner is closed 2933 * @since 9 2934 */ 2935 public Stream<MatchResult> findAll(Pattern pattern) { 2936 Objects.requireNonNull(pattern); 2937 ensureOpen(); 2938 Stream<MatchResult> stream = StreamSupport.stream(new FindSpliterator(pattern), false); 2939 return stream.onClose(this::close); 2940 } 2941 2942 /** 2943 * Returns a stream of match results that match the provided pattern string. 2944 * The effect is equivalent to the following code: 2945 * 2946 * <pre>{@code 2947 * scanner.findAll(Pattern.compile(patString)) 2948 * }</pre> 2949 * 2950 * @param patString the pattern string 2951 * @return a sequential stream of match results 2952 * @throws NullPointerException if patString is null 2953 * @throws IllegalStateException if this scanner is closed 2954 * @throws PatternSyntaxException if the regular expression's syntax is invalid 2955 * @since 9 2956 * @see java.util.regex.Pattern 2957 */ 2958 public Stream<MatchResult> findAll(String patString) { 2959 Objects.requireNonNull(patString); 2960 ensureOpen(); 2961 return findAll(patternCache.forName(patString)); 2962 } 2963 2964 class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> { 2965 final Pattern pattern; 2966 int expectedCount = -1; 2967 private boolean advance = false; // true if we need to auto-advance 2968 2969 FindSpliterator(Pattern pattern) { 2970 super(Long.MAX_VALUE, 2971 Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED); 2972 this.pattern = pattern; 2973 } 2974 2975 @Override 2976 public boolean tryAdvance(Consumer<? super MatchResult> cons) { 2977 ensureOpen(); 2978 if (expectedCount >= 0) { 2979 if (expectedCount != modCount) { 2980 throw new ConcurrentModificationException(); 2981 } 2982 } else { 2983 // init 2984 matchValid = false; 2985 matcher.usePattern(pattern); 2986 expectedCount = modCount; 2987 } 2988 2989 while (true) { 2990 // assert expectedCount == modCount 2991 if (nextInBuffer()) { // doesn't increment modCount 2992 cons.accept(matcher.toMatchResult()); 2993 if (expectedCount != modCount) { 2994 throw new ConcurrentModificationException(); 2995 } 2996 return true; 2997 } 2998 if (needInput) 2999 readInput(); // doesn't increment modCount 3000 else 3001 return false; // reached end of input 3002 } 3003 } 3004 3005 // reimplementation of findPatternInBuffer with auto-advance on zero-length matches 3006 private boolean nextInBuffer() { 3007 if (advance) { 3008 if (position + 1 > buf.limit()) { 3009 if (!sourceClosed) 3010 needInput = true; 3011 return false; 3012 } 3013 position++; 3014 advance = false; 3015 } 3016 matcher.region(position, buf.limit()); 3017 if (matcher.find() && (!matcher.hitEnd() || sourceClosed)) { 3018 // Did not hit end, or hit real end 3019 position = matcher.end(); 3020 advance = matcher.start() == position; 3021 return true; 3022 } 3023 if (!sourceClosed) 3024 needInput = true; 3025 return false; 3026 } 3027 } 3028 3029 /** Small LRU cache of Patterns. */ 3030 private static class PatternLRUCache { 3031 3032 private Pattern[] oa = null; 3033 private final int size; 3034 3035 PatternLRUCache(int size) { 3036 this.size = size; 3037 } 3038 3039 boolean hasName(Pattern p, String s) { 3040 return p.pattern().equals(s); 3041 } 3042 3043 void moveToFront(Object[] oa, int i) { 3044 Object ob = oa[i]; 3045 for (int j = i; j > 0; j--) 3046 oa[j] = oa[j - 1]; 3047 oa[0] = ob; 3048 } 3049 3050 Pattern forName(String name) { 3051 if (oa == null) { 3052 Pattern[] temp = new Pattern[size]; 3053 oa = temp; 3054 } else { 3055 for (int i = 0; i < oa.length; i++) { 3056 Pattern ob = oa[i]; 3057 if (ob == null) 3058 continue; 3059 if (hasName(ob, name)) { 3060 if (i > 0) 3061 moveToFront(oa, i); 3062 return ob; 3063 } 3064 } 3065 } 3066 3067 // Create a new object 3068 Pattern ob = Pattern.compile(name); 3069 oa[oa.length - 1] = ob; 3070 moveToFront(oa, oa.length - 1); 3071 return ob; 3072 } 3073 } 3074 }