1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.io.*; 29 import java.math.*; 30 import java.nio.*; 31 import java.nio.channels.*; 32 import java.nio.charset.*; 33 import java.nio.file.Path; 34 import java.nio.file.Files; 35 import java.text.*; 36 import java.util.regex.*; 37 38 import sun.misc.LRUCache; 39 40 /** 41 * A simple text scanner which can parse primitive types and strings using 42 * regular expressions. 43 * 44 * <p>A <code>Scanner</code> breaks its input into tokens using a 45 * delimiter pattern, which by default matches whitespace. The resulting 46 * tokens may then be converted into values of different types using the 47 * various <tt>next</tt> methods. 48 * 49 * <p>For example, this code allows a user to read a number from 50 * <tt>System.in</tt>: 51 * <blockquote><pre>{@code 52 * Scanner sc = new Scanner(System.in); 53 * int i = sc.nextInt(); 54 * }</pre></blockquote> 55 * 56 * <p>As another example, this code allows <code>long</code> types to be 57 * assigned from entries in a file <code>myNumbers</code>: 58 * <blockquote><pre>{@code 59 * Scanner sc = new Scanner(new File("myNumbers")); 60 * while (sc.hasNextLong()) { 61 * long aLong = sc.nextLong(); 62 * } 63 * }</pre></blockquote> 64 * 65 * <p>The scanner can also use delimiters other than whitespace. This 66 * example reads several items in from a string: 67 * <blockquote><pre>{@code 68 * String input = "1 fish 2 fish red fish blue fish"; 69 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 70 * System.out.println(s.nextInt()); 71 * System.out.println(s.nextInt()); 72 * System.out.println(s.next()); 73 * System.out.println(s.next()); 74 * s.close(); 75 * }</pre></blockquote> 76 * <p> 77 * prints the following output: 78 * <blockquote><pre>{@code 79 * 1 80 * 2 81 * red 82 * blue 83 * }</pre></blockquote> 84 * 85 * <p>The same output can be generated with this code, which uses a regular 86 * expression to parse all four tokens at once: 87 * <blockquote><pre>{@code 88 * String input = "1 fish 2 fish red fish blue fish"; 89 * Scanner s = new Scanner(input); 90 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 91 * MatchResult result = s.match(); 92 * for (int i=1; i<=result.groupCount(); i++) 93 * System.out.println(result.group(i)); 94 * s.close(); 95 * }</pre></blockquote> 96 * 97 * <p>The <a name="default-delimiter">default whitespace delimiter</a> used 98 * by a scanner is as recognized by {@link java.lang.Character}.{@link 99 * java.lang.Character#isWhitespace(char) isWhitespace}. The {@link #reset} 100 * method will reset the value of the scanner's delimiter to the default 101 * whitespace delimiter regardless of whether it was previously changed. 102 * 103 * <p>A scanning operation may block waiting for input. 104 * 105 * <p>The {@link #next} and {@link #hasNext} methods and their 106 * primitive-type companion methods (such as {@link #nextInt} and 107 * {@link #hasNextInt}) first skip any input that matches the delimiter 108 * pattern, and then attempt to return the next token. Both <tt>hasNext</tt> 109 * and <tt>next</tt> methods may block waiting for further input. Whether a 110 * <tt>hasNext</tt> method blocks has no connection to whether or not its 111 * associated <tt>next</tt> method will block. 112 * 113 * <p> The {@link #findInLine}, {@link #findWithinHorizon}, and {@link #skip} 114 * methods operate independently of the delimiter pattern. These methods will 115 * attempt to match the specified pattern with no regard to delimiters in the 116 * input and thus can be used in special circumstances where delimiters are 117 * not relevant. These methods may block waiting for more input. 118 * 119 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 120 * will not pass the token that caused the exception, so that it may be 121 * retrieved or skipped via some other method. 122 * 123 * <p>Depending upon the type of delimiting pattern, empty tokens may be 124 * returned. For example, the pattern <tt>"\\s+"</tt> will return no empty 125 * tokens since it matches multiple instances of the delimiter. The delimiting 126 * pattern <tt>"\\s"</tt> could return empty tokens since it only passes one 127 * space at a time. 128 * 129 * <p> A scanner can read text from any object which implements the {@link 130 * java.lang.Readable} interface. If an invocation of the underlying 131 * readable's {@link java.lang.Readable#read} method throws an {@link 132 * java.io.IOException} then the scanner assumes that the end of the input 133 * has been reached. The most recent <tt>IOException</tt> thrown by the 134 * underlying readable can be retrieved via the {@link #ioException} method. 135 * 136 * <p>When a <code>Scanner</code> is closed, it will close its input source 137 * if the source implements the {@link java.io.Closeable} interface. 138 * 139 * <p>A <code>Scanner</code> is not safe for multithreaded use without 140 * external synchronization. 141 * 142 * <p>Unless otherwise mentioned, passing a <code>null</code> parameter into 143 * any method of a <code>Scanner</code> will cause a 144 * <code>NullPointerException</code> to be thrown. 145 * 146 * <p>A scanner will default to interpreting numbers as decimal unless a 147 * different radix has been set by using the {@link #useRadix} method. The 148 * {@link #reset} method will reset the value of the scanner's radix to 149 * <code>10</code> regardless of whether it was previously changed. 150 * 151 * <h3> <a name="localized-numbers">Localized numbers</a> </h3> 152 * 153 * <p> An instance of this class is capable of scanning numbers in the standard 154 * formats as well as in the formats of the scanner's locale. A scanner's 155 * <a name="initial-locale">initial locale </a>is the value returned by the {@link 156 * java.util.Locale#getDefault(Locale.Category) 157 * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link 158 * #useLocale} method. The {@link #reset} method will reset the value of the 159 * scanner's locale to the initial locale regardless of whether it was 160 * previously changed. 161 * 162 * <p>The localized formats are defined in terms of the following parameters, 163 * which for a particular locale are taken from that locale's {@link 164 * java.text.DecimalFormat DecimalFormat} object, <tt>df</tt>, and its and 165 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 166 * <tt>dfs</tt>. 167 * 168 * <blockquote><dl> 169 * <dt><i>LocalGroupSeparator </i> 170 * <dd>The character used to separate thousands groups, 171 * <i>i.e.,</i> <tt>dfs.</tt>{@link 172 * java.text.DecimalFormatSymbols#getGroupingSeparator 173 * getGroupingSeparator()} 174 * <dt><i>LocalDecimalSeparator </i> 175 * <dd>The character used for the decimal point, 176 * <i>i.e.,</i> <tt>dfs.</tt>{@link 177 * java.text.DecimalFormatSymbols#getDecimalSeparator 178 * getDecimalSeparator()} 179 * <dt><i>LocalPositivePrefix </i> 180 * <dd>The string that appears before a positive number (may 181 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 182 * java.text.DecimalFormat#getPositivePrefix 183 * getPositivePrefix()} 184 * <dt><i>LocalPositiveSuffix </i> 185 * <dd>The string that appears after a positive number (may be 186 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 187 * java.text.DecimalFormat#getPositiveSuffix 188 * getPositiveSuffix()} 189 * <dt><i>LocalNegativePrefix </i> 190 * <dd>The string that appears before a negative number (may 191 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 192 * java.text.DecimalFormat#getNegativePrefix 193 * getNegativePrefix()} 194 * <dt><i>LocalNegativeSuffix </i> 195 * <dd>The string that appears after a negative number (may be 196 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 197 * java.text.DecimalFormat#getNegativeSuffix 198 * getNegativeSuffix()} 199 * <dt><i>LocalNaN </i> 200 * <dd>The string that represents not-a-number for 201 * floating-point values, 202 * <i>i.e.,</i> <tt>dfs.</tt>{@link 203 * java.text.DecimalFormatSymbols#getNaN 204 * getNaN()} 205 * <dt><i>LocalInfinity </i> 206 * <dd>The string that represents infinity for floating-point 207 * values, <i>i.e.,</i> <tt>dfs.</tt>{@link 208 * java.text.DecimalFormatSymbols#getInfinity 209 * getInfinity()} 210 * </dl></blockquote> 211 * 212 * <h4> <a name="number-syntax">Number syntax</a> </h4> 213 * 214 * <p> The strings that can be parsed as numbers by an instance of this class 215 * are specified in terms of the following regular-expression grammar, where 216 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 in base 10). 217 * 218 * <dl> 219 * <dt><i>NonAsciiDigit</i>: 220 * <dd>A non-ASCII character c for which 221 * {@link java.lang.Character#isDigit Character.isDigit}<tt>(c)</tt> 222 * returns true 223 * 224 * <dt><i>Non0Digit</i>: 225 * <dd><tt>[1-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i> 226 * 227 * <dt><i>Digit</i>: 228 * <dd><tt>[0-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i> 229 * 230 * <dt><i>GroupedNumeral</i>: 231 * <dd><tt>( </tt><i>Non0Digit</i> 232 * <i>Digit</i><tt>? 233 * </tt><i>Digit</i><tt>?</tt> 234 * <dd> <tt>( </tt><i>LocalGroupSeparator</i> 235 * <i>Digit</i> 236 * <i>Digit</i> 237 * <i>Digit</i><tt> )+ )</tt> 238 * 239 * <dt><i>Numeral</i>: 240 * <dd><tt>( ( </tt><i>Digit</i><tt>+ ) 241 * | </tt><i>GroupedNumeral</i><tt> )</tt> 242 * 243 * <dt><a name="Integer-regex"><i>Integer</i>:</a> 244 * <dd><tt>( [-+]? ( </tt><i>Numeral</i><tt> 245 * ) )</tt> 246 * <dd><tt>| </tt><i>LocalPositivePrefix</i> <i>Numeral</i> 247 * <i>LocalPositiveSuffix</i> 248 * <dd><tt>| </tt><i>LocalNegativePrefix</i> <i>Numeral</i> 249 * <i>LocalNegativeSuffix</i> 250 * 251 * <dt><i>DecimalNumeral</i>: 252 * <dd><i>Numeral</i> 253 * <dd><tt>| </tt><i>Numeral</i> 254 * <i>LocalDecimalSeparator</i> 255 * <i>Digit</i><tt>*</tt> 256 * <dd><tt>| </tt><i>LocalDecimalSeparator</i> 257 * <i>Digit</i><tt>+</tt> 258 * 259 * <dt><i>Exponent</i>: 260 * <dd><tt>( [eE] [+-]? </tt><i>Digit</i><tt>+ )</tt> 261 * 262 * <dt><a name="Decimal-regex"><i>Decimal</i>:</a> 263 * <dd><tt>( [-+]? </tt><i>DecimalNumeral</i> 264 * <i>Exponent</i><tt>? )</tt> 265 * <dd><tt>| </tt><i>LocalPositivePrefix</i> 266 * <i>DecimalNumeral</i> 267 * <i>LocalPositiveSuffix</i> 268 * <i>Exponent</i><tt>?</tt> 269 * <dd><tt>| </tt><i>LocalNegativePrefix</i> 270 * <i>DecimalNumeral</i> 271 * <i>LocalNegativeSuffix</i> 272 * <i>Exponent</i><tt>?</tt> 273 * 274 * <dt><i>HexFloat</i>: 275 * <dd><tt>[-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 276 * ([pP][-+]?[0-9]+)?</tt> 277 * 278 * <dt><i>NonNumber</i>: 279 * <dd><tt>NaN 280 * | </tt><i>LocalNan</i><tt> 281 * | Infinity 282 * | </tt><i>LocalInfinity</i> 283 * 284 * <dt><i>SignedNonNumber</i>: 285 * <dd><tt>( [-+]? </tt><i>NonNumber</i><tt> )</tt> 286 * <dd><tt>| </tt><i>LocalPositivePrefix</i> 287 * <i>NonNumber</i> 288 * <i>LocalPositiveSuffix</i> 289 * <dd><tt>| </tt><i>LocalNegativePrefix</i> 290 * <i>NonNumber</i> 291 * <i>LocalNegativeSuffix</i> 292 * 293 * <dt><a name="Float-regex"><i>Float</i></a>: 294 * <dd><i>Decimal</i> 295 * <tt>| </tt><i>HexFloat</i> 296 * <tt>| </tt><i>SignedNonNumber</i> 297 * 298 * </dl> 299 * <p>Whitespace is not significant in the above regular expressions. 300 * 301 * @since 1.5 302 */ 303 public final class Scanner implements Iterator<String>, Closeable { 304 305 // Internal buffer used to hold input 306 private CharBuffer buf; 307 308 // Size of internal character buffer 309 private static final int BUFFER_SIZE = 1024; // change to 1024; 310 311 // The index into the buffer currently held by the Scanner 312 private int position; 313 314 // Internal matcher used for finding delimiters 315 private Matcher matcher; 316 317 // Pattern used to delimit tokens 318 private Pattern delimPattern; 319 320 // Pattern found in last hasNext operation 321 private Pattern hasNextPattern; 322 323 // Position after last hasNext operation 324 private int hasNextPosition; 325 326 // Result after last hasNext operation 327 private String hasNextResult; 328 329 // The input source 330 private Readable source; 331 332 // Boolean is true if source is done 333 private boolean sourceClosed = false; 334 335 // Boolean indicating more input is required 336 private boolean needInput = false; 337 338 // Boolean indicating if a delim has been skipped this operation 339 private boolean skipped = false; 340 341 // A store of a position that the scanner may fall back to 342 private int savedScannerPosition = -1; 343 344 // A cache of the last primitive type scanned 345 private Object typeCache = null; 346 347 // Boolean indicating if a match result is available 348 private boolean matchValid = false; 349 350 // Boolean indicating if this scanner has been closed 351 private boolean closed = false; 352 353 // The current radix used by this scanner 354 private int radix = 10; 355 356 // The default radix for this scanner 357 private int defaultRadix = 10; 358 359 // The locale used by this scanner 360 private Locale locale = null; 361 362 // A cache of the last few recently used Patterns 363 private LRUCache<String,Pattern> patternCache = 364 new LRUCache<String,Pattern>(7) { 365 protected Pattern create(String s) { 366 return Pattern.compile(s); 367 } 368 protected boolean hasName(Pattern p, String s) { 369 return p.pattern().equals(s); 370 } 371 }; 372 373 // A holder of the last IOException encountered 374 private IOException lastException; 375 376 // A pattern for java whitespace 377 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 378 "\\p{javaWhitespace}+"); 379 380 // A pattern for any token 381 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 382 383 // A pattern for non-ASCII digits 384 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 385 "[\\p{javaDigit}&&[^0-9]]"); 386 387 // Fields and methods to support scanning primitive types 388 389 /** 390 * Locale dependent values used to scan numbers 391 */ 392 private String groupSeparator = "\\,"; 393 private String decimalSeparator = "\\."; 394 private String nanString = "NaN"; 395 private String infinityString = "Infinity"; 396 private String positivePrefix = ""; 397 private String negativePrefix = "\\-"; 398 private String positiveSuffix = ""; 399 private String negativeSuffix = ""; 400 401 /** 402 * Fields and an accessor method to match booleans 403 */ 404 private static volatile Pattern boolPattern; 405 private static final String BOOLEAN_PATTERN = "true|false"; 406 private static Pattern boolPattern() { 407 Pattern bp = boolPattern; 408 if (bp == null) 409 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 410 Pattern.CASE_INSENSITIVE); 411 return bp; 412 } 413 414 /** 415 * Fields and methods to match bytes, shorts, ints, and longs 416 */ 417 private Pattern integerPattern; 418 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 419 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 420 private int SIMPLE_GROUP_INDEX = 5; 421 private String buildIntegerPatternString() { 422 String radixDigits = digits.substring(0, radix); 423 // \\p{javaDigit} is not guaranteed to be appropriate 424 // here but what can we do? The final authority will be 425 // whatever parse method is invoked, so ultimately the 426 // Scanner will do the right thing 427 String digit = "((?i)["+radixDigits+"\\p{javaDigit}])"; 428 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 429 groupSeparator+digit+digit+digit+")+)"; 430 // digit++ is the possessive form which is necessary for reducing 431 // backtracking that would otherwise cause unacceptable performance 432 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 433 String javaStyleInteger = "([-+]?(" + numeral + "))"; 434 String negativeInteger = negativePrefix + numeral + negativeSuffix; 435 String positiveInteger = positivePrefix + numeral + positiveSuffix; 436 return "("+ javaStyleInteger + ")|(" + 437 positiveInteger + ")|(" + 438 negativeInteger + ")"; 439 } 440 private Pattern integerPattern() { 441 if (integerPattern == null) { 442 integerPattern = patternCache.forName(buildIntegerPatternString()); 443 } 444 return integerPattern; 445 } 446 447 /** 448 * Fields and an accessor method to match line separators 449 */ 450 private static volatile Pattern separatorPattern; 451 private static volatile Pattern linePattern; 452 private static final String LINE_SEPARATOR_PATTERN = 453 "\r\n|[\n\r\u2028\u2029\u0085]"; 454 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 455 456 private static Pattern separatorPattern() { 457 Pattern sp = separatorPattern; 458 if (sp == null) 459 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 460 return sp; 461 } 462 463 private static Pattern linePattern() { 464 Pattern lp = linePattern; 465 if (lp == null) 466 linePattern = lp = Pattern.compile(LINE_PATTERN); 467 return lp; 468 } 469 470 /** 471 * Fields and methods to match floats and doubles 472 */ 473 private Pattern floatPattern; 474 private Pattern decimalPattern; 475 private void buildFloatAndDecimalPattern() { 476 // \\p{javaDigit} may not be perfect, see above 477 String digit = "(([0-9\\p{javaDigit}]))"; 478 String exponent = "([eE][+-]?"+digit+"+)?"; 479 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 480 groupSeparator+digit+digit+digit+")+)"; 481 // Once again digit++ is used for performance, as above 482 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 483 String decimalNumeral = "("+numeral+"|"+numeral + 484 decimalSeparator + digit + "*+|"+ decimalSeparator + 485 digit + "++)"; 486 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 487 infinityString+")"; 488 String positiveFloat = "(" + positivePrefix + decimalNumeral + 489 positiveSuffix + exponent + ")"; 490 String negativeFloat = "(" + negativePrefix + decimalNumeral + 491 negativeSuffix + exponent + ")"; 492 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 493 positiveFloat + "|" + negativeFloat + ")"; 494 String hexFloat = 495 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 496 String positiveNonNumber = "(" + positivePrefix + nonNumber + 497 positiveSuffix + ")"; 498 String negativeNonNumber = "(" + negativePrefix + nonNumber + 499 negativeSuffix + ")"; 500 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 501 positiveNonNumber + "|" + 502 negativeNonNumber + ")"; 503 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 504 signedNonNumber); 505 decimalPattern = Pattern.compile(decimal); 506 } 507 private Pattern floatPattern() { 508 if (floatPattern == null) { 509 buildFloatAndDecimalPattern(); 510 } 511 return floatPattern; 512 } 513 private Pattern decimalPattern() { 514 if (decimalPattern == null) { 515 buildFloatAndDecimalPattern(); 516 } 517 return decimalPattern; 518 } 519 520 // Constructors 521 522 /** 523 * Constructs a <code>Scanner</code> that returns values scanned 524 * from the specified source delimited by the specified pattern. 525 * 526 * @param source A character source implementing the Readable interface 527 * @param pattern A delimiting pattern 528 */ 529 private Scanner(Readable source, Pattern pattern) { 530 assert source != null : "source should not be null"; 531 assert pattern != null : "pattern should not be null"; 532 this.source = source; 533 delimPattern = pattern; 534 buf = CharBuffer.allocate(BUFFER_SIZE); 535 buf.limit(0); 536 matcher = delimPattern.matcher(buf); 537 matcher.useTransparentBounds(true); 538 matcher.useAnchoringBounds(false); 539 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 540 } 541 542 /** 543 * Constructs a new <code>Scanner</code> that produces values scanned 544 * from the specified source. 545 * 546 * @param source A character source implementing the {@link Readable} 547 * interface 548 */ 549 public Scanner(Readable source) { 550 this(Objects.requireNonNull(source, "source"), WHITESPACE_PATTERN); 551 } 552 553 /** 554 * Constructs a new <code>Scanner</code> that produces values scanned 555 * from the specified input stream. Bytes from the stream are converted 556 * into characters using the underlying platform's 557 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 558 * 559 * @param source An input stream to be scanned 560 */ 561 public Scanner(InputStream source) { 562 this(new InputStreamReader(source), WHITESPACE_PATTERN); 563 } 564 565 /** 566 * Constructs a new <code>Scanner</code> that produces values scanned 567 * from the specified input stream. Bytes from the stream are converted 568 * into characters using the specified charset. 569 * 570 * @param source An input stream to be scanned 571 * @param charsetName The encoding type used to convert bytes from the 572 * stream into characters to be scanned 573 * @throws IllegalArgumentException if the specified character set 574 * does not exist 575 */ 576 public Scanner(InputStream source, String charsetName) { 577 this(makeReadable(Objects.requireNonNull(source, "source"), toCharset(charsetName)), 578 WHITESPACE_PATTERN); 579 } 580 581 /** 582 * Returns a charset object for the given charset name. 583 * @throws NullPointerException is csn is null 584 * @throws IllegalArgumentException if the charset is not supported 585 */ 586 private static Charset toCharset(String csn) { 587 Objects.requireNonNull(csn, "charsetName"); 588 try { 589 return Charset.forName(csn); 590 } catch (IllegalCharsetNameException|UnsupportedCharsetException e) { 591 // IllegalArgumentException should be thrown 592 throw new IllegalArgumentException(e); 593 } 594 } 595 596 private static Readable makeReadable(InputStream source, Charset charset) { 597 return new InputStreamReader(source, charset); 598 } 599 600 /** 601 * Constructs a new <code>Scanner</code> that produces values scanned 602 * from the specified file. Bytes from the file are converted into 603 * characters using the underlying platform's 604 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 605 * 606 * @param source A file to be scanned 607 * @throws FileNotFoundException if source is not found 608 */ 609 public Scanner(File source) throws FileNotFoundException { 610 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 611 } 612 613 /** 614 * Constructs a new <code>Scanner</code> that produces values scanned 615 * from the specified file. Bytes from the file are converted into 616 * characters using the specified charset. 617 * 618 * @param source A file to be scanned 619 * @param charsetName The encoding type used to convert bytes from the file 620 * into characters to be scanned 621 * @throws FileNotFoundException if source is not found 622 * @throws IllegalArgumentException if the specified encoding is 623 * not found 624 */ 625 public Scanner(File source, String charsetName) 626 throws FileNotFoundException 627 { 628 this(Objects.requireNonNull(source), toDecoder(charsetName)); 629 } 630 631 private Scanner(File source, CharsetDecoder dec) 632 throws FileNotFoundException 633 { 634 this(makeReadable((ReadableByteChannel)(new FileInputStream(source).getChannel()), dec)); 635 } 636 637 private static CharsetDecoder toDecoder(String charsetName) { 638 Objects.requireNonNull(charsetName, "charsetName"); 639 try { 640 return Charset.forName(charsetName).newDecoder(); 641 } catch (IllegalCharsetNameException|UnsupportedCharsetException unused) { 642 throw new IllegalArgumentException(charsetName); 643 } 644 } 645 646 private static Readable makeReadable(ReadableByteChannel source, 647 CharsetDecoder dec) { 648 return Channels.newReader(source, dec, -1); 649 } 650 651 /** 652 * Constructs a new <code>Scanner</code> that produces values scanned 653 * from the specified file. Bytes from the file are converted into 654 * characters using the underlying platform's 655 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 656 * 657 * @param source 658 * the path to the file to be scanned 659 * @throws IOException 660 * if an I/O error occurs opening source 661 * 662 * @since 1.7 663 */ 664 public Scanner(Path source) 665 throws IOException 666 { 667 this(Files.newInputStream(source)); 668 } 669 670 /** 671 * Constructs a new <code>Scanner</code> that produces values scanned 672 * from the specified file. Bytes from the file are converted into 673 * characters using the specified charset. 674 * 675 * @param source 676 * the path to the file to be scanned 677 * @param charsetName 678 * The encoding type used to convert bytes from the file 679 * into characters to be scanned 680 * @throws IOException 681 * if an I/O error occurs opening source 682 * @throws IllegalArgumentException 683 * if the specified encoding is not found 684 * @since 1.7 685 */ 686 public Scanner(Path source, String charsetName) throws IOException { 687 this(Objects.requireNonNull(source), toCharset(charsetName)); 688 } 689 690 private Scanner(Path source, Charset charset) throws IOException { 691 this(makeReadable(Files.newInputStream(source), charset)); 692 } 693 694 /** 695 * Constructs a new <code>Scanner</code> that produces values scanned 696 * from the specified string. 697 * 698 * @param source A string to scan 699 */ 700 public Scanner(String source) { 701 this(new StringReader(source), WHITESPACE_PATTERN); 702 } 703 704 /** 705 * Constructs a new <code>Scanner</code> that produces values scanned 706 * from the specified channel. Bytes from the source are converted into 707 * characters using the underlying platform's 708 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 709 * 710 * @param source A channel to scan 711 */ 712 public Scanner(ReadableByteChannel source) { 713 this(makeReadable(Objects.requireNonNull(source, "source")), 714 WHITESPACE_PATTERN); 715 } 716 717 private static Readable makeReadable(ReadableByteChannel source) { 718 return makeReadable(source, Charset.defaultCharset().newDecoder()); 719 } 720 721 /** 722 * Constructs a new <code>Scanner</code> that produces values scanned 723 * from the specified channel. Bytes from the source are converted into 724 * characters using the specified charset. 725 * 726 * @param source A channel to scan 727 * @param charsetName The encoding type used to convert bytes from the 728 * channel into characters to be scanned 729 * @throws IllegalArgumentException if the specified character set 730 * does not exist 731 */ 732 public Scanner(ReadableByteChannel source, String charsetName) { 733 this(makeReadable(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), 734 WHITESPACE_PATTERN); 735 } 736 737 // Private primitives used to support scanning 738 739 private void saveState() { 740 savedScannerPosition = position; 741 } 742 743 private void revertState() { 744 this.position = savedScannerPosition; 745 savedScannerPosition = -1; 746 skipped = false; 747 } 748 749 private boolean revertState(boolean b) { 750 this.position = savedScannerPosition; 751 savedScannerPosition = -1; 752 skipped = false; 753 return b; 754 } 755 756 private void cacheResult() { 757 hasNextResult = matcher.group(); 758 hasNextPosition = matcher.end(); 759 hasNextPattern = matcher.pattern(); 760 } 761 762 private void cacheResult(String result) { 763 hasNextResult = result; 764 hasNextPosition = matcher.end(); 765 hasNextPattern = matcher.pattern(); 766 } 767 768 // Clears both regular cache and type cache 769 private void clearCaches() { 770 hasNextPattern = null; 771 typeCache = null; 772 } 773 774 // Also clears both the regular cache and the type cache 775 private String getCachedResult() { 776 position = hasNextPosition; 777 hasNextPattern = null; 778 typeCache = null; 779 return hasNextResult; 780 } 781 782 // Also clears both the regular cache and the type cache 783 private void useTypeCache() { 784 if (closed) 785 throw new IllegalStateException("Scanner closed"); 786 position = hasNextPosition; 787 hasNextPattern = null; 788 typeCache = null; 789 } 790 791 // Tries to read more input. May block. 792 private void readInput() { 793 if (buf.limit() == buf.capacity()) 794 makeSpace(); 795 796 // Prepare to receive data 797 int p = buf.position(); 798 buf.position(buf.limit()); 799 buf.limit(buf.capacity()); 800 801 int n = 0; 802 try { 803 n = source.read(buf); 804 } catch (IOException ioe) { 805 lastException = ioe; 806 n = -1; 807 } 808 809 if (n == -1) { 810 sourceClosed = true; 811 needInput = false; 812 } 813 814 if (n > 0) 815 needInput = false; 816 817 // Restore current position and limit for reading 818 buf.limit(buf.position()); 819 buf.position(p); 820 } 821 822 // After this method is called there will either be an exception 823 // or else there will be space in the buffer 824 private boolean makeSpace() { 825 clearCaches(); 826 int offset = savedScannerPosition == -1 ? 827 position : savedScannerPosition; 828 buf.position(offset); 829 // Gain space by compacting buffer 830 if (offset > 0) { 831 buf.compact(); 832 translateSavedIndexes(offset); 833 position -= offset; 834 buf.flip(); 835 return true; 836 } 837 // Gain space by growing buffer 838 int newSize = buf.capacity() * 2; 839 CharBuffer newBuf = CharBuffer.allocate(newSize); 840 newBuf.put(buf); 841 newBuf.flip(); 842 translateSavedIndexes(offset); 843 position -= offset; 844 buf = newBuf; 845 matcher.reset(buf); 846 return true; 847 } 848 849 // When a buffer compaction/reallocation occurs the saved indexes must 850 // be modified appropriately 851 private void translateSavedIndexes(int offset) { 852 if (savedScannerPosition != -1) 853 savedScannerPosition -= offset; 854 } 855 856 // If we are at the end of input then NoSuchElement; 857 // If there is still input left then InputMismatch 858 private void throwFor() { 859 skipped = false; 860 if ((sourceClosed) && (position == buf.limit())) 861 throw new NoSuchElementException(); 862 else 863 throw new InputMismatchException(); 864 } 865 866 // Returns true if a complete token or partial token is in the buffer. 867 // It is not necessary to find a complete token since a partial token 868 // means that there will be another token with or without more input. 869 private boolean hasTokenInBuffer() { 870 matchValid = false; 871 matcher.usePattern(delimPattern); 872 matcher.region(position, buf.limit()); 873 874 // Skip delims first 875 if (matcher.lookingAt()) 876 position = matcher.end(); 877 878 // If we are sitting at the end, no more tokens in buffer 879 if (position == buf.limit()) 880 return false; 881 882 return true; 883 } 884 885 /* 886 * Returns a "complete token" that matches the specified pattern 887 * 888 * A token is complete if surrounded by delims; a partial token 889 * is prefixed by delims but not postfixed by them 890 * 891 * The position is advanced to the end of that complete token 892 * 893 * Pattern == null means accept any token at all 894 * 895 * Triple return: 896 * 1. valid string means it was found 897 * 2. null with needInput=false means we won't ever find it 898 * 3. null with needInput=true means try again after readInput 899 */ 900 private String getCompleteTokenInBuffer(Pattern pattern) { 901 matchValid = false; 902 903 // Skip delims first 904 matcher.usePattern(delimPattern); 905 if (!skipped) { // Enforcing only one skip of leading delims 906 matcher.region(position, buf.limit()); 907 if (matcher.lookingAt()) { 908 // If more input could extend the delimiters then we must wait 909 // for more input 910 if (matcher.hitEnd() && !sourceClosed) { 911 needInput = true; 912 return null; 913 } 914 // The delims were whole and the matcher should skip them 915 skipped = true; 916 position = matcher.end(); 917 } 918 } 919 920 // If we are sitting at the end, no more tokens in buffer 921 if (position == buf.limit()) { 922 if (sourceClosed) 923 return null; 924 needInput = true; 925 return null; 926 } 927 928 // Must look for next delims. Simply attempting to match the 929 // pattern at this point may find a match but it might not be 930 // the first longest match because of missing input, or it might 931 // match a partial token instead of the whole thing. 932 933 // Then look for next delims 934 matcher.region(position, buf.limit()); 935 boolean foundNextDelim = matcher.find(); 936 if (foundNextDelim && (matcher.end() == position)) { 937 // Zero length delimiter match; we should find the next one 938 // using the automatic advance past a zero length match; 939 // Otherwise we have just found the same one we just skipped 940 foundNextDelim = matcher.find(); 941 } 942 if (foundNextDelim) { 943 // In the rare case that more input could cause the match 944 // to be lost and there is more input coming we must wait 945 // for more input. Note that hitting the end is okay as long 946 // as the match cannot go away. It is the beginning of the 947 // next delims we want to be sure about, we don't care if 948 // they potentially extend further. 949 if (matcher.requireEnd() && !sourceClosed) { 950 needInput = true; 951 return null; 952 } 953 int tokenEnd = matcher.start(); 954 // There is a complete token. 955 if (pattern == null) { 956 // Must continue with match to provide valid MatchResult 957 pattern = FIND_ANY_PATTERN; 958 } 959 // Attempt to match against the desired pattern 960 matcher.usePattern(pattern); 961 matcher.region(position, tokenEnd); 962 if (matcher.matches()) { 963 String s = matcher.group(); 964 position = matcher.end(); 965 return s; 966 } else { // Complete token but it does not match 967 return null; 968 } 969 } 970 971 // If we can't find the next delims but no more input is coming, 972 // then we can treat the remainder as a whole token 973 if (sourceClosed) { 974 if (pattern == null) { 975 // Must continue with match to provide valid MatchResult 976 pattern = FIND_ANY_PATTERN; 977 } 978 // Last token; Match the pattern here or throw 979 matcher.usePattern(pattern); 980 matcher.region(position, buf.limit()); 981 if (matcher.matches()) { 982 String s = matcher.group(); 983 position = matcher.end(); 984 return s; 985 } 986 // Last piece does not match 987 return null; 988 } 989 990 // There is a partial token in the buffer; must read more 991 // to complete it 992 needInput = true; 993 return null; 994 } 995 996 // Finds the specified pattern in the buffer up to horizon. 997 // Returns true if the specified input pattern was matched, 998 // and leaves the matcher field with the current match state. 999 private boolean findPatternInBuffer(Pattern pattern, int horizon) { 1000 matchValid = false; 1001 matcher.usePattern(pattern); 1002 int bufferLimit = buf.limit(); 1003 int horizonLimit = -1; 1004 int searchLimit = bufferLimit; 1005 if (horizon > 0) { 1006 horizonLimit = position + horizon; 1007 if (horizonLimit < bufferLimit) 1008 searchLimit = horizonLimit; 1009 } 1010 matcher.region(position, searchLimit); 1011 if (matcher.find()) { 1012 if (matcher.hitEnd() && (!sourceClosed)) { 1013 // The match may be longer if didn't hit horizon or real end 1014 if (searchLimit != horizonLimit) { 1015 // Hit an artificial end; try to extend the match 1016 needInput = true; 1017 return false; 1018 } 1019 // The match could go away depending on what is next 1020 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1021 // Rare case: we hit the end of input and it happens 1022 // that it is at the horizon and the end of input is 1023 // required for the match. 1024 needInput = true; 1025 return false; 1026 } 1027 } 1028 // Did not hit end, or hit real end, or hit horizon 1029 position = matcher.end(); 1030 return true; 1031 } 1032 1033 if (sourceClosed) 1034 return false; 1035 1036 // If there is no specified horizon, or if we have not searched 1037 // to the specified horizon yet, get more input 1038 if ((horizon == 0) || (searchLimit != horizonLimit)) 1039 needInput = true; 1040 return false; 1041 } 1042 1043 // Attempts to match a pattern anchored at the current position. 1044 // Returns true if the specified input pattern was matched, 1045 // and leaves the matcher field with the current match state. 1046 private boolean matchPatternInBuffer(Pattern pattern) { 1047 matchValid = false; 1048 matcher.usePattern(pattern); 1049 matcher.region(position, buf.limit()); 1050 if (matcher.lookingAt()) { 1051 if (matcher.hitEnd() && (!sourceClosed)) { 1052 // Get more input and try again 1053 needInput = true; 1054 return false; 1055 } 1056 position = matcher.end(); 1057 return true; 1058 } 1059 1060 if (sourceClosed) 1061 return false; 1062 1063 // Read more to find pattern 1064 needInput = true; 1065 return false; 1066 } 1067 1068 // Throws if the scanner is closed 1069 private void ensureOpen() { 1070 if (closed) 1071 throw new IllegalStateException("Scanner closed"); 1072 } 1073 1074 // Public methods 1075 1076 /** 1077 * Closes this scanner. 1078 * 1079 * <p> If this scanner has not yet been closed then if its underlying 1080 * {@linkplain java.lang.Readable readable} also implements the {@link 1081 * java.io.Closeable} interface then the readable's <tt>close</tt> method 1082 * will be invoked. If this scanner is already closed then invoking this 1083 * method will have no effect. 1084 * 1085 * <p>Attempting to perform search operations after a scanner has 1086 * been closed will result in an {@link IllegalStateException}. 1087 * 1088 */ 1089 public void close() { 1090 if (closed) 1091 return; 1092 if (source instanceof Closeable) { 1093 try { 1094 ((Closeable)source).close(); 1095 } catch (IOException ioe) { 1096 lastException = ioe; 1097 } 1098 } 1099 sourceClosed = true; 1100 source = null; 1101 closed = true; 1102 } 1103 1104 /** 1105 * Returns the <code>IOException</code> last thrown by this 1106 * <code>Scanner</code>'s underlying <code>Readable</code>. This method 1107 * returns <code>null</code> if no such exception exists. 1108 * 1109 * @return the last exception thrown by this scanner's readable 1110 */ 1111 public IOException ioException() { 1112 return lastException; 1113 } 1114 1115 /** 1116 * Returns the <code>Pattern</code> this <code>Scanner</code> is currently 1117 * using to match delimiters. 1118 * 1119 * @return this scanner's delimiting pattern. 1120 */ 1121 public Pattern delimiter() { 1122 return delimPattern; 1123 } 1124 1125 /** 1126 * Sets this scanner's delimiting pattern to the specified pattern. 1127 * 1128 * @param pattern A delimiting pattern 1129 * @return this scanner 1130 */ 1131 public Scanner useDelimiter(Pattern pattern) { 1132 delimPattern = pattern; 1133 return this; 1134 } 1135 1136 /** 1137 * Sets this scanner's delimiting pattern to a pattern constructed from 1138 * the specified <code>String</code>. 1139 * 1140 * <p> An invocation of this method of the form 1141 * <tt>useDelimiter(pattern)</tt> behaves in exactly the same way as the 1142 * invocation <tt>useDelimiter(Pattern.compile(pattern))</tt>. 1143 * 1144 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1145 * to the <a href= "#default-delimiter">default</a>. 1146 * 1147 * @param pattern A string specifying a delimiting pattern 1148 * @return this scanner 1149 */ 1150 public Scanner useDelimiter(String pattern) { 1151 delimPattern = patternCache.forName(pattern); 1152 return this; 1153 } 1154 1155 /** 1156 * Returns this scanner's locale. 1157 * 1158 * <p>A scanner's locale affects many elements of its default 1159 * primitive matching regular expressions; see 1160 * <a href= "#localized-numbers">localized numbers</a> above. 1161 * 1162 * @return this scanner's locale 1163 */ 1164 public Locale locale() { 1165 return this.locale; 1166 } 1167 1168 /** 1169 * Sets this scanner's locale to the specified locale. 1170 * 1171 * <p>A scanner's locale affects many elements of its default 1172 * primitive matching regular expressions; see 1173 * <a href= "#localized-numbers">localized numbers</a> above. 1174 * 1175 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1176 * the <a href= "#initial-locale">initial locale</a>. 1177 * 1178 * @param locale A string specifying the locale to use 1179 * @return this scanner 1180 */ 1181 public Scanner useLocale(Locale locale) { 1182 if (locale.equals(this.locale)) 1183 return this; 1184 1185 this.locale = locale; 1186 DecimalFormat df = 1187 (DecimalFormat)NumberFormat.getNumberInstance(locale); 1188 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1189 1190 // These must be literalized to avoid collision with regex 1191 // metacharacters such as dot or parenthesis 1192 groupSeparator = "\\x{" + Integer.toHexString(dfs.getGroupingSeparator()) + "}"; 1193 decimalSeparator = "\\x{" + Integer.toHexString(dfs.getDecimalSeparator()) + "}"; 1194 1195 // Quoting the nonzero length locale-specific things 1196 // to avoid potential conflict with metacharacters 1197 nanString = Pattern.quote(dfs.getNaN()); 1198 infinityString = Pattern.quote(dfs.getInfinity()); 1199 positivePrefix = df.getPositivePrefix(); 1200 if (positivePrefix.length() > 0) 1201 positivePrefix = Pattern.quote(positivePrefix); 1202 negativePrefix = df.getNegativePrefix(); 1203 if (negativePrefix.length() > 0) 1204 negativePrefix = Pattern.quote(negativePrefix); 1205 positiveSuffix = df.getPositiveSuffix(); 1206 if (positiveSuffix.length() > 0) 1207 positiveSuffix = Pattern.quote(positiveSuffix); 1208 negativeSuffix = df.getNegativeSuffix(); 1209 if (negativeSuffix.length() > 0) 1210 negativeSuffix = Pattern.quote(negativeSuffix); 1211 1212 // Force rebuilding and recompilation of locale dependent 1213 // primitive patterns 1214 integerPattern = null; 1215 floatPattern = null; 1216 1217 return this; 1218 } 1219 1220 /** 1221 * Returns this scanner's default radix. 1222 * 1223 * <p>A scanner's radix affects elements of its default 1224 * number matching regular expressions; see 1225 * <a href= "#localized-numbers">localized numbers</a> above. 1226 * 1227 * @return the default radix of this scanner 1228 */ 1229 public int radix() { 1230 return this.defaultRadix; 1231 } 1232 1233 /** 1234 * Sets this scanner's default radix to the specified radix. 1235 * 1236 * <p>A scanner's radix affects elements of its default 1237 * number matching regular expressions; see 1238 * <a href= "#localized-numbers">localized numbers</a> above. 1239 * 1240 * <p>If the radix is less than <code>Character.MIN_RADIX</code> 1241 * or greater than <code>Character.MAX_RADIX</code>, then an 1242 * <code>IllegalArgumentException</code> is thrown. 1243 * 1244 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1245 * <code>10</code>. 1246 * 1247 * @param radix The radix to use when scanning numbers 1248 * @return this scanner 1249 * @throws IllegalArgumentException if radix is out of range 1250 */ 1251 public Scanner useRadix(int radix) { 1252 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1253 throw new IllegalArgumentException("radix:"+radix); 1254 1255 if (this.defaultRadix == radix) 1256 return this; 1257 this.defaultRadix = radix; 1258 // Force rebuilding and recompilation of radix dependent patterns 1259 integerPattern = null; 1260 return this; 1261 } 1262 1263 // The next operation should occur in the specified radix but 1264 // the default is left untouched. 1265 private void setRadix(int radix) { 1266 if (this.radix != radix) { 1267 // Force rebuilding and recompilation of radix dependent patterns 1268 integerPattern = null; 1269 this.radix = radix; 1270 } 1271 } 1272 1273 /** 1274 * Returns the match result of the last scanning operation performed 1275 * by this scanner. This method throws <code>IllegalStateException</code> 1276 * if no match has been performed, or if the last match was 1277 * not successful. 1278 * 1279 * <p>The various <code>next</code>methods of <code>Scanner</code> 1280 * make a match result available if they complete without throwing an 1281 * exception. For instance, after an invocation of the {@link #nextInt} 1282 * method that returned an int, this method returns a 1283 * <code>MatchResult</code> for the search of the 1284 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1285 * defined above. Similarly the {@link #findInLine}, 1286 * {@link #findWithinHorizon}, and {@link #skip} methods will make a 1287 * match available if they succeed. 1288 * 1289 * @return a match result for the last match operation 1290 * @throws IllegalStateException If no match result is available 1291 */ 1292 public MatchResult match() { 1293 if (!matchValid) 1294 throw new IllegalStateException("No match result available"); 1295 return matcher.toMatchResult(); 1296 } 1297 1298 /** 1299 * <p>Returns the string representation of this <code>Scanner</code>. The 1300 * string representation of a <code>Scanner</code> contains information 1301 * that may be useful for debugging. The exact format is unspecified. 1302 * 1303 * @return The string representation of this scanner 1304 */ 1305 public String toString() { 1306 StringBuilder sb = new StringBuilder(); 1307 sb.append("java.util.Scanner"); 1308 sb.append("[delimiters=" + delimPattern + "]"); 1309 sb.append("[position=" + position + "]"); 1310 sb.append("[match valid=" + matchValid + "]"); 1311 sb.append("[need input=" + needInput + "]"); 1312 sb.append("[source closed=" + sourceClosed + "]"); 1313 sb.append("[skipped=" + skipped + "]"); 1314 sb.append("[group separator=" + groupSeparator + "]"); 1315 sb.append("[decimal separator=" + decimalSeparator + "]"); 1316 sb.append("[positive prefix=" + positivePrefix + "]"); 1317 sb.append("[negative prefix=" + negativePrefix + "]"); 1318 sb.append("[positive suffix=" + positiveSuffix + "]"); 1319 sb.append("[negative suffix=" + negativeSuffix + "]"); 1320 sb.append("[NaN string=" + nanString + "]"); 1321 sb.append("[infinity string=" + infinityString + "]"); 1322 return sb.toString(); 1323 } 1324 1325 /** 1326 * Returns true if this scanner has another token in its input. 1327 * This method may block while waiting for input to scan. 1328 * The scanner does not advance past any input. 1329 * 1330 * @return true if and only if this scanner has another token 1331 * @throws IllegalStateException if this scanner is closed 1332 * @see java.util.Iterator 1333 */ 1334 public boolean hasNext() { 1335 ensureOpen(); 1336 saveState(); 1337 while (!sourceClosed) { 1338 if (hasTokenInBuffer()) 1339 return revertState(true); 1340 readInput(); 1341 } 1342 boolean result = hasTokenInBuffer(); 1343 return revertState(result); 1344 } 1345 1346 /** 1347 * Finds and returns the next complete token from this scanner. 1348 * A complete token is preceded and followed by input that matches 1349 * the delimiter pattern. This method may block while waiting for input 1350 * to scan, even if a previous invocation of {@link #hasNext} returned 1351 * <code>true</code>. 1352 * 1353 * @return the next token 1354 * @throws NoSuchElementException if no more tokens are available 1355 * @throws IllegalStateException if this scanner is closed 1356 * @see java.util.Iterator 1357 */ 1358 public String next() { 1359 ensureOpen(); 1360 clearCaches(); 1361 1362 while (true) { 1363 String token = getCompleteTokenInBuffer(null); 1364 if (token != null) { 1365 matchValid = true; 1366 skipped = false; 1367 return token; 1368 } 1369 if (needInput) 1370 readInput(); 1371 else 1372 throwFor(); 1373 } 1374 } 1375 1376 /** 1377 * The remove operation is not supported by this implementation of 1378 * <code>Iterator</code>. 1379 * 1380 * @throws UnsupportedOperationException if this method is invoked. 1381 * @see java.util.Iterator 1382 */ 1383 public void remove() { 1384 throw new UnsupportedOperationException(); 1385 } 1386 1387 /** 1388 * Returns true if the next token matches the pattern constructed from the 1389 * specified string. The scanner does not advance past any input. 1390 * 1391 * <p> An invocation of this method of the form <tt>hasNext(pattern)</tt> 1392 * behaves in exactly the same way as the invocation 1393 * <tt>hasNext(Pattern.compile(pattern))</tt>. 1394 * 1395 * @param pattern a string specifying the pattern to scan 1396 * @return true if and only if this scanner has another token matching 1397 * the specified pattern 1398 * @throws IllegalStateException if this scanner is closed 1399 */ 1400 public boolean hasNext(String pattern) { 1401 return hasNext(patternCache.forName(pattern)); 1402 } 1403 1404 /** 1405 * Returns the next token if it matches the pattern constructed from the 1406 * specified string. If the match is successful, the scanner advances 1407 * past the input that matched the pattern. 1408 * 1409 * <p> An invocation of this method of the form <tt>next(pattern)</tt> 1410 * behaves in exactly the same way as the invocation 1411 * <tt>next(Pattern.compile(pattern))</tt>. 1412 * 1413 * @param pattern a string specifying the pattern to scan 1414 * @return the next token 1415 * @throws NoSuchElementException if no such tokens are available 1416 * @throws IllegalStateException if this scanner is closed 1417 */ 1418 public String next(String pattern) { 1419 return next(patternCache.forName(pattern)); 1420 } 1421 1422 /** 1423 * Returns true if the next complete token matches the specified pattern. 1424 * A complete token is prefixed and postfixed by input that matches 1425 * the delimiter pattern. This method may block while waiting for input. 1426 * The scanner does not advance past any input. 1427 * 1428 * @param pattern the pattern to scan for 1429 * @return true if and only if this scanner has another token matching 1430 * the specified pattern 1431 * @throws IllegalStateException if this scanner is closed 1432 */ 1433 public boolean hasNext(Pattern pattern) { 1434 ensureOpen(); 1435 if (pattern == null) 1436 throw new NullPointerException(); 1437 hasNextPattern = null; 1438 saveState(); 1439 1440 while (true) { 1441 if (getCompleteTokenInBuffer(pattern) != null) { 1442 matchValid = true; 1443 cacheResult(); 1444 return revertState(true); 1445 } 1446 if (needInput) 1447 readInput(); 1448 else 1449 return revertState(false); 1450 } 1451 } 1452 1453 /** 1454 * Returns the next token if it matches the specified pattern. This 1455 * method may block while waiting for input to scan, even if a previous 1456 * invocation of {@link #hasNext(Pattern)} returned <code>true</code>. 1457 * If the match is successful, the scanner advances past the input that 1458 * matched the pattern. 1459 * 1460 * @param pattern the pattern to scan for 1461 * @return the next token 1462 * @throws NoSuchElementException if no more tokens are available 1463 * @throws IllegalStateException if this scanner is closed 1464 */ 1465 public String next(Pattern pattern) { 1466 ensureOpen(); 1467 if (pattern == null) 1468 throw new NullPointerException(); 1469 1470 // Did we already find this pattern? 1471 if (hasNextPattern == pattern) 1472 return getCachedResult(); 1473 clearCaches(); 1474 1475 // Search for the pattern 1476 while (true) { 1477 String token = getCompleteTokenInBuffer(pattern); 1478 if (token != null) { 1479 matchValid = true; 1480 skipped = false; 1481 return token; 1482 } 1483 if (needInput) 1484 readInput(); 1485 else 1486 throwFor(); 1487 } 1488 } 1489 1490 /** 1491 * Returns true if there is another line in the input of this scanner. 1492 * This method may block while waiting for input. The scanner does not 1493 * advance past any input. 1494 * 1495 * @return true if and only if this scanner has another line of input 1496 * @throws IllegalStateException if this scanner is closed 1497 */ 1498 public boolean hasNextLine() { 1499 saveState(); 1500 1501 String result = findWithinHorizon(linePattern(), 0); 1502 if (result != null) { 1503 MatchResult mr = this.match(); 1504 String lineSep = mr.group(1); 1505 if (lineSep != null) { 1506 result = result.substring(0, result.length() - 1507 lineSep.length()); 1508 cacheResult(result); 1509 1510 } else { 1511 cacheResult(); 1512 } 1513 } 1514 revertState(); 1515 return (result != null); 1516 } 1517 1518 /** 1519 * Advances this scanner past the current line and returns the input 1520 * that was skipped. 1521 * 1522 * This method returns the rest of the current line, excluding any line 1523 * separator at the end. The position is set to the beginning of the next 1524 * line. 1525 * 1526 * <p>Since this method continues to search through the input looking 1527 * for a line separator, it may buffer all of the input searching for 1528 * the line to skip if no line separators are present. 1529 * 1530 * @return the line that was skipped 1531 * @throws NoSuchElementException if no line was found 1532 * @throws IllegalStateException if this scanner is closed 1533 */ 1534 public String nextLine() { 1535 if (hasNextPattern == linePattern()) 1536 return getCachedResult(); 1537 clearCaches(); 1538 1539 String result = findWithinHorizon(linePattern, 0); 1540 if (result == null) 1541 throw new NoSuchElementException("No line found"); 1542 MatchResult mr = this.match(); 1543 String lineSep = mr.group(1); 1544 if (lineSep != null) 1545 result = result.substring(0, result.length() - lineSep.length()); 1546 if (result == null) 1547 throw new NoSuchElementException(); 1548 else 1549 return result; 1550 } 1551 1552 // Public methods that ignore delimiters 1553 1554 /** 1555 * Attempts to find the next occurrence of a pattern constructed from the 1556 * specified string, ignoring delimiters. 1557 * 1558 * <p>An invocation of this method of the form <tt>findInLine(pattern)</tt> 1559 * behaves in exactly the same way as the invocation 1560 * <tt>findInLine(Pattern.compile(pattern))</tt>. 1561 * 1562 * @param pattern a string specifying the pattern to search for 1563 * @return the text that matched the specified pattern 1564 * @throws IllegalStateException if this scanner is closed 1565 */ 1566 public String findInLine(String pattern) { 1567 return findInLine(patternCache.forName(pattern)); 1568 } 1569 1570 /** 1571 * Attempts to find the next occurrence of the specified pattern ignoring 1572 * delimiters. If the pattern is found before the next line separator, the 1573 * scanner advances past the input that matched and returns the string that 1574 * matched the pattern. 1575 * If no such pattern is detected in the input up to the next line 1576 * separator, then <code>null</code> is returned and the scanner's 1577 * position is unchanged. This method may block waiting for input that 1578 * matches the pattern. 1579 * 1580 * <p>Since this method continues to search through the input looking 1581 * for the specified pattern, it may buffer all of the input searching for 1582 * the desired token if no line separators are present. 1583 * 1584 * @param pattern the pattern to scan for 1585 * @return the text that matched the specified pattern 1586 * @throws IllegalStateException if this scanner is closed 1587 */ 1588 public String findInLine(Pattern pattern) { 1589 ensureOpen(); 1590 if (pattern == null) 1591 throw new NullPointerException(); 1592 clearCaches(); 1593 // Expand buffer to include the next newline or end of input 1594 int endPosition = 0; 1595 saveState(); 1596 while (true) { 1597 if (findPatternInBuffer(separatorPattern(), 0)) { 1598 endPosition = matcher.start(); 1599 break; // up to next newline 1600 } 1601 if (needInput) { 1602 readInput(); 1603 } else { 1604 endPosition = buf.limit(); 1605 break; // up to end of input 1606 } 1607 } 1608 revertState(); 1609 int horizonForLine = endPosition - position; 1610 // If there is nothing between the current pos and the next 1611 // newline simply return null, invoking findWithinHorizon 1612 // with "horizon=0" will scan beyond the line bound. 1613 if (horizonForLine == 0) 1614 return null; 1615 // Search for the pattern 1616 return findWithinHorizon(pattern, horizonForLine); 1617 } 1618 1619 /** 1620 * Attempts to find the next occurrence of a pattern constructed from the 1621 * specified string, ignoring delimiters. 1622 * 1623 * <p>An invocation of this method of the form 1624 * <tt>findWithinHorizon(pattern)</tt> behaves in exactly the same way as 1625 * the invocation 1626 * <tt>findWithinHorizon(Pattern.compile(pattern, horizon))</tt>. 1627 * 1628 * @param pattern a string specifying the pattern to search for 1629 * @param horizon the search horizon 1630 * @return the text that matched the specified pattern 1631 * @throws IllegalStateException if this scanner is closed 1632 * @throws IllegalArgumentException if horizon is negative 1633 */ 1634 public String findWithinHorizon(String pattern, int horizon) { 1635 return findWithinHorizon(patternCache.forName(pattern), horizon); 1636 } 1637 1638 /** 1639 * Attempts to find the next occurrence of the specified pattern. 1640 * 1641 * <p>This method searches through the input up to the specified 1642 * search horizon, ignoring delimiters. If the pattern is found the 1643 * scanner advances past the input that matched and returns the string 1644 * that matched the pattern. If no such pattern is detected then the 1645 * null is returned and the scanner's position remains unchanged. This 1646 * method may block waiting for input that matches the pattern. 1647 * 1648 * <p>A scanner will never search more than <code>horizon</code> code 1649 * points beyond its current position. Note that a match may be clipped 1650 * by the horizon; that is, an arbitrary match result may have been 1651 * different if the horizon had been larger. The scanner treats the 1652 * horizon as a transparent, non-anchoring bound (see {@link 1653 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1654 * 1655 * <p>If horizon is <code>0</code>, then the horizon is ignored and 1656 * this method continues to search through the input looking for the 1657 * specified pattern without bound. In this case it may buffer all of 1658 * the input searching for the pattern. 1659 * 1660 * <p>If horizon is negative, then an IllegalArgumentException is 1661 * thrown. 1662 * 1663 * @param pattern the pattern to scan for 1664 * @param horizon the search horizon 1665 * @return the text that matched the specified pattern 1666 * @throws IllegalStateException if this scanner is closed 1667 * @throws IllegalArgumentException if horizon is negative 1668 */ 1669 public String findWithinHorizon(Pattern pattern, int horizon) { 1670 ensureOpen(); 1671 if (pattern == null) 1672 throw new NullPointerException(); 1673 if (horizon < 0) 1674 throw new IllegalArgumentException("horizon < 0"); 1675 clearCaches(); 1676 1677 // Search for the pattern 1678 while (true) { 1679 if (findPatternInBuffer(pattern, horizon)) { 1680 matchValid = true; 1681 return matcher.group(); 1682 } 1683 if (needInput) 1684 readInput(); 1685 else 1686 break; // up to end of input 1687 } 1688 return null; 1689 } 1690 1691 /** 1692 * Skips input that matches the specified pattern, ignoring delimiters. 1693 * This method will skip input if an anchored match of the specified 1694 * pattern succeeds. 1695 * 1696 * <p>If a match to the specified pattern is not found at the 1697 * current position, then no input is skipped and a 1698 * <tt>NoSuchElementException</tt> is thrown. 1699 * 1700 * <p>Since this method seeks to match the specified pattern starting at 1701 * the scanner's current position, patterns that can match a lot of 1702 * input (".*", for example) may cause the scanner to buffer a large 1703 * amount of input. 1704 * 1705 * <p>Note that it is possible to skip something without risking a 1706 * <code>NoSuchElementException</code> by using a pattern that can 1707 * match nothing, e.g., <code>sc.skip("[ \t]*")</code>. 1708 * 1709 * @param pattern a string specifying the pattern to skip over 1710 * @return this scanner 1711 * @throws NoSuchElementException if the specified pattern is not found 1712 * @throws IllegalStateException if this scanner is closed 1713 */ 1714 public Scanner skip(Pattern pattern) { 1715 ensureOpen(); 1716 if (pattern == null) 1717 throw new NullPointerException(); 1718 clearCaches(); 1719 1720 // Search for the pattern 1721 while (true) { 1722 if (matchPatternInBuffer(pattern)) { 1723 matchValid = true; 1724 position = matcher.end(); 1725 return this; 1726 } 1727 if (needInput) 1728 readInput(); 1729 else 1730 throw new NoSuchElementException(); 1731 } 1732 } 1733 1734 /** 1735 * Skips input that matches a pattern constructed from the specified 1736 * string. 1737 * 1738 * <p> An invocation of this method of the form <tt>skip(pattern)</tt> 1739 * behaves in exactly the same way as the invocation 1740 * <tt>skip(Pattern.compile(pattern))</tt>. 1741 * 1742 * @param pattern a string specifying the pattern to skip over 1743 * @return this scanner 1744 * @throws IllegalStateException if this scanner is closed 1745 */ 1746 public Scanner skip(String pattern) { 1747 return skip(patternCache.forName(pattern)); 1748 } 1749 1750 // Convenience methods for scanning primitives 1751 1752 /** 1753 * Returns true if the next token in this scanner's input can be 1754 * interpreted as a boolean value using a case insensitive pattern 1755 * created from the string "true|false". The scanner does not 1756 * advance past the input that matched. 1757 * 1758 * @return true if and only if this scanner's next token is a valid 1759 * boolean value 1760 * @throws IllegalStateException if this scanner is closed 1761 */ 1762 public boolean hasNextBoolean() { 1763 return hasNext(boolPattern()); 1764 } 1765 1766 /** 1767 * Scans the next token of the input into a boolean value and returns 1768 * that value. This method will throw <code>InputMismatchException</code> 1769 * if the next token cannot be translated into a valid boolean value. 1770 * If the match is successful, the scanner advances past the input that 1771 * matched. 1772 * 1773 * @return the boolean scanned from the input 1774 * @throws InputMismatchException if the next token is not a valid boolean 1775 * @throws NoSuchElementException if input is exhausted 1776 * @throws IllegalStateException if this scanner is closed 1777 */ 1778 public boolean nextBoolean() { 1779 clearCaches(); 1780 return Boolean.parseBoolean(next(boolPattern())); 1781 } 1782 1783 /** 1784 * Returns true if the next token in this scanner's input can be 1785 * interpreted as a byte value in the default radix using the 1786 * {@link #nextByte} method. The scanner does not advance past any input. 1787 * 1788 * @return true if and only if this scanner's next token is a valid 1789 * byte value 1790 * @throws IllegalStateException if this scanner is closed 1791 */ 1792 public boolean hasNextByte() { 1793 return hasNextByte(defaultRadix); 1794 } 1795 1796 /** 1797 * Returns true if the next token in this scanner's input can be 1798 * interpreted as a byte value in the specified radix using the 1799 * {@link #nextByte} method. The scanner does not advance past any input. 1800 * 1801 * @param radix the radix used to interpret the token as a byte value 1802 * @return true if and only if this scanner's next token is a valid 1803 * byte value 1804 * @throws IllegalStateException if this scanner is closed 1805 */ 1806 public boolean hasNextByte(int radix) { 1807 setRadix(radix); 1808 boolean result = hasNext(integerPattern()); 1809 if (result) { // Cache it 1810 try { 1811 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1812 processIntegerToken(hasNextResult) : 1813 hasNextResult; 1814 typeCache = Byte.parseByte(s, radix); 1815 } catch (NumberFormatException nfe) { 1816 result = false; 1817 } 1818 } 1819 return result; 1820 } 1821 1822 /** 1823 * Scans the next token of the input as a <tt>byte</tt>. 1824 * 1825 * <p> An invocation of this method of the form 1826 * <tt>nextByte()</tt> behaves in exactly the same way as the 1827 * invocation <tt>nextByte(radix)</tt>, where <code>radix</code> 1828 * is the default radix of this scanner. 1829 * 1830 * @return the <tt>byte</tt> scanned from the input 1831 * @throws InputMismatchException 1832 * if the next token does not match the <i>Integer</i> 1833 * regular expression, or is out of range 1834 * @throws NoSuchElementException if input is exhausted 1835 * @throws IllegalStateException if this scanner is closed 1836 */ 1837 public byte nextByte() { 1838 return nextByte(defaultRadix); 1839 } 1840 1841 /** 1842 * Scans the next token of the input as a <tt>byte</tt>. 1843 * This method will throw <code>InputMismatchException</code> 1844 * if the next token cannot be translated into a valid byte value as 1845 * described below. If the translation is successful, the scanner advances 1846 * past the input that matched. 1847 * 1848 * <p> If the next token matches the <a 1849 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1850 * above then the token is converted into a <tt>byte</tt> value as if by 1851 * removing all locale specific prefixes, group separators, and locale 1852 * specific suffixes, then mapping non-ASCII digits into ASCII 1853 * digits via {@link Character#digit Character.digit}, prepending a 1854 * negative sign (-) if the locale specific negative prefixes and suffixes 1855 * were present, and passing the resulting string to 1856 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1857 * specified radix. 1858 * 1859 * @param radix the radix used to interpret the token as a byte value 1860 * @return the <tt>byte</tt> scanned from the input 1861 * @throws InputMismatchException 1862 * if the next token does not match the <i>Integer</i> 1863 * regular expression, or is out of range 1864 * @throws NoSuchElementException if input is exhausted 1865 * @throws IllegalStateException if this scanner is closed 1866 */ 1867 public byte nextByte(int radix) { 1868 // Check cached result 1869 if ((typeCache != null) && (typeCache instanceof Byte) 1870 && this.radix == radix) { 1871 byte val = ((Byte)typeCache).byteValue(); 1872 useTypeCache(); 1873 return val; 1874 } 1875 setRadix(radix); 1876 clearCaches(); 1877 // Search for next byte 1878 try { 1879 String s = next(integerPattern()); 1880 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1881 s = processIntegerToken(s); 1882 return Byte.parseByte(s, radix); 1883 } catch (NumberFormatException nfe) { 1884 position = matcher.start(); // don't skip bad token 1885 throw new InputMismatchException(nfe.getMessage()); 1886 } 1887 } 1888 1889 /** 1890 * Returns true if the next token in this scanner's input can be 1891 * interpreted as a short value in the default radix using the 1892 * {@link #nextShort} method. The scanner does not advance past any input. 1893 * 1894 * @return true if and only if this scanner's next token is a valid 1895 * short value in the default radix 1896 * @throws IllegalStateException if this scanner is closed 1897 */ 1898 public boolean hasNextShort() { 1899 return hasNextShort(defaultRadix); 1900 } 1901 1902 /** 1903 * Returns true if the next token in this scanner's input can be 1904 * interpreted as a short value in the specified radix using the 1905 * {@link #nextShort} method. The scanner does not advance past any input. 1906 * 1907 * @param radix the radix used to interpret the token as a short value 1908 * @return true if and only if this scanner's next token is a valid 1909 * short value in the specified radix 1910 * @throws IllegalStateException if this scanner is closed 1911 */ 1912 public boolean hasNextShort(int radix) { 1913 setRadix(radix); 1914 boolean result = hasNext(integerPattern()); 1915 if (result) { // Cache it 1916 try { 1917 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1918 processIntegerToken(hasNextResult) : 1919 hasNextResult; 1920 typeCache = Short.parseShort(s, radix); 1921 } catch (NumberFormatException nfe) { 1922 result = false; 1923 } 1924 } 1925 return result; 1926 } 1927 1928 /** 1929 * Scans the next token of the input as a <tt>short</tt>. 1930 * 1931 * <p> An invocation of this method of the form 1932 * <tt>nextShort()</tt> behaves in exactly the same way as the 1933 * invocation <tt>nextShort(radix)</tt>, where <code>radix</code> 1934 * is the default radix of this scanner. 1935 * 1936 * @return the <tt>short</tt> scanned from the input 1937 * @throws InputMismatchException 1938 * if the next token does not match the <i>Integer</i> 1939 * regular expression, or is out of range 1940 * @throws NoSuchElementException if input is exhausted 1941 * @throws IllegalStateException if this scanner is closed 1942 */ 1943 public short nextShort() { 1944 return nextShort(defaultRadix); 1945 } 1946 1947 /** 1948 * Scans the next token of the input as a <tt>short</tt>. 1949 * This method will throw <code>InputMismatchException</code> 1950 * if the next token cannot be translated into a valid short value as 1951 * described below. If the translation is successful, the scanner advances 1952 * past the input that matched. 1953 * 1954 * <p> If the next token matches the <a 1955 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1956 * above then the token is converted into a <tt>short</tt> value as if by 1957 * removing all locale specific prefixes, group separators, and locale 1958 * specific suffixes, then mapping non-ASCII digits into ASCII 1959 * digits via {@link Character#digit Character.digit}, prepending a 1960 * negative sign (-) if the locale specific negative prefixes and suffixes 1961 * were present, and passing the resulting string to 1962 * {@link Short#parseShort(String, int) Short.parseShort} with the 1963 * specified radix. 1964 * 1965 * @param radix the radix used to interpret the token as a short value 1966 * @return the <tt>short</tt> scanned from the input 1967 * @throws InputMismatchException 1968 * if the next token does not match the <i>Integer</i> 1969 * regular expression, or is out of range 1970 * @throws NoSuchElementException if input is exhausted 1971 * @throws IllegalStateException if this scanner is closed 1972 */ 1973 public short nextShort(int radix) { 1974 // Check cached result 1975 if ((typeCache != null) && (typeCache instanceof Short) 1976 && this.radix == radix) { 1977 short val = ((Short)typeCache).shortValue(); 1978 useTypeCache(); 1979 return val; 1980 } 1981 setRadix(radix); 1982 clearCaches(); 1983 // Search for next short 1984 try { 1985 String s = next(integerPattern()); 1986 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1987 s = processIntegerToken(s); 1988 return Short.parseShort(s, radix); 1989 } catch (NumberFormatException nfe) { 1990 position = matcher.start(); // don't skip bad token 1991 throw new InputMismatchException(nfe.getMessage()); 1992 } 1993 } 1994 1995 /** 1996 * Returns true if the next token in this scanner's input can be 1997 * interpreted as an int value in the default radix using the 1998 * {@link #nextInt} method. The scanner does not advance past any input. 1999 * 2000 * @return true if and only if this scanner's next token is a valid 2001 * int value 2002 * @throws IllegalStateException if this scanner is closed 2003 */ 2004 public boolean hasNextInt() { 2005 return hasNextInt(defaultRadix); 2006 } 2007 2008 /** 2009 * Returns true if the next token in this scanner's input can be 2010 * interpreted as an int value in the specified radix using the 2011 * {@link #nextInt} method. The scanner does not advance past any input. 2012 * 2013 * @param radix the radix used to interpret the token as an int value 2014 * @return true if and only if this scanner's next token is a valid 2015 * int value 2016 * @throws IllegalStateException if this scanner is closed 2017 */ 2018 public boolean hasNextInt(int radix) { 2019 setRadix(radix); 2020 boolean result = hasNext(integerPattern()); 2021 if (result) { // Cache it 2022 try { 2023 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2024 processIntegerToken(hasNextResult) : 2025 hasNextResult; 2026 typeCache = Integer.parseInt(s, radix); 2027 } catch (NumberFormatException nfe) { 2028 result = false; 2029 } 2030 } 2031 return result; 2032 } 2033 2034 /** 2035 * The integer token must be stripped of prefixes, group separators, 2036 * and suffixes, non ascii digits must be converted into ascii digits 2037 * before parse will accept it. 2038 */ 2039 private String processIntegerToken(String token) { 2040 String result = token.replaceAll(""+groupSeparator, ""); 2041 boolean isNegative = false; 2042 int preLen = negativePrefix.length(); 2043 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2044 isNegative = true; 2045 result = result.substring(preLen); 2046 } 2047 int sufLen = negativeSuffix.length(); 2048 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2049 isNegative = true; 2050 result = result.substring(result.length() - sufLen, 2051 result.length()); 2052 } 2053 if (isNegative) 2054 result = "-" + result; 2055 return result; 2056 } 2057 2058 /** 2059 * Scans the next token of the input as an <tt>int</tt>. 2060 * 2061 * <p> An invocation of this method of the form 2062 * <tt>nextInt()</tt> behaves in exactly the same way as the 2063 * invocation <tt>nextInt(radix)</tt>, where <code>radix</code> 2064 * is the default radix of this scanner. 2065 * 2066 * @return the <tt>int</tt> scanned from the input 2067 * @throws InputMismatchException 2068 * if the next token does not match the <i>Integer</i> 2069 * regular expression, or is out of range 2070 * @throws NoSuchElementException if input is exhausted 2071 * @throws IllegalStateException if this scanner is closed 2072 */ 2073 public int nextInt() { 2074 return nextInt(defaultRadix); 2075 } 2076 2077 /** 2078 * Scans the next token of the input as an <tt>int</tt>. 2079 * This method will throw <code>InputMismatchException</code> 2080 * if the next token cannot be translated into a valid int value as 2081 * described below. If the translation is successful, the scanner advances 2082 * past the input that matched. 2083 * 2084 * <p> If the next token matches the <a 2085 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2086 * above then the token is converted into an <tt>int</tt> value as if by 2087 * removing all locale specific prefixes, group separators, and locale 2088 * specific suffixes, then mapping non-ASCII digits into ASCII 2089 * digits via {@link Character#digit Character.digit}, prepending a 2090 * negative sign (-) if the locale specific negative prefixes and suffixes 2091 * were present, and passing the resulting string to 2092 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2093 * specified radix. 2094 * 2095 * @param radix the radix used to interpret the token as an int value 2096 * @return the <tt>int</tt> scanned from the input 2097 * @throws InputMismatchException 2098 * if the next token does not match the <i>Integer</i> 2099 * regular expression, or is out of range 2100 * @throws NoSuchElementException if input is exhausted 2101 * @throws IllegalStateException if this scanner is closed 2102 */ 2103 public int nextInt(int radix) { 2104 // Check cached result 2105 if ((typeCache != null) && (typeCache instanceof Integer) 2106 && this.radix == radix) { 2107 int val = ((Integer)typeCache).intValue(); 2108 useTypeCache(); 2109 return val; 2110 } 2111 setRadix(radix); 2112 clearCaches(); 2113 // Search for next int 2114 try { 2115 String s = next(integerPattern()); 2116 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2117 s = processIntegerToken(s); 2118 return Integer.parseInt(s, radix); 2119 } catch (NumberFormatException nfe) { 2120 position = matcher.start(); // don't skip bad token 2121 throw new InputMismatchException(nfe.getMessage()); 2122 } 2123 } 2124 2125 /** 2126 * Returns true if the next token in this scanner's input can be 2127 * interpreted as a long value in the default radix using the 2128 * {@link #nextLong} method. The scanner does not advance past any input. 2129 * 2130 * @return true if and only if this scanner's next token is a valid 2131 * long value 2132 * @throws IllegalStateException if this scanner is closed 2133 */ 2134 public boolean hasNextLong() { 2135 return hasNextLong(defaultRadix); 2136 } 2137 2138 /** 2139 * Returns true if the next token in this scanner's input can be 2140 * interpreted as a long value in the specified radix using the 2141 * {@link #nextLong} method. The scanner does not advance past any input. 2142 * 2143 * @param radix the radix used to interpret the token as a long value 2144 * @return true if and only if this scanner's next token is a valid 2145 * long value 2146 * @throws IllegalStateException if this scanner is closed 2147 */ 2148 public boolean hasNextLong(int radix) { 2149 setRadix(radix); 2150 boolean result = hasNext(integerPattern()); 2151 if (result) { // Cache it 2152 try { 2153 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2154 processIntegerToken(hasNextResult) : 2155 hasNextResult; 2156 typeCache = Long.parseLong(s, radix); 2157 } catch (NumberFormatException nfe) { 2158 result = false; 2159 } 2160 } 2161 return result; 2162 } 2163 2164 /** 2165 * Scans the next token of the input as a <tt>long</tt>. 2166 * 2167 * <p> An invocation of this method of the form 2168 * <tt>nextLong()</tt> behaves in exactly the same way as the 2169 * invocation <tt>nextLong(radix)</tt>, where <code>radix</code> 2170 * is the default radix of this scanner. 2171 * 2172 * @return the <tt>long</tt> scanned from the input 2173 * @throws InputMismatchException 2174 * if the next token does not match the <i>Integer</i> 2175 * regular expression, or is out of range 2176 * @throws NoSuchElementException if input is exhausted 2177 * @throws IllegalStateException if this scanner is closed 2178 */ 2179 public long nextLong() { 2180 return nextLong(defaultRadix); 2181 } 2182 2183 /** 2184 * Scans the next token of the input as a <tt>long</tt>. 2185 * This method will throw <code>InputMismatchException</code> 2186 * if the next token cannot be translated into a valid long value as 2187 * described below. If the translation is successful, the scanner advances 2188 * past the input that matched. 2189 * 2190 * <p> If the next token matches the <a 2191 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2192 * above then the token is converted into a <tt>long</tt> value as if by 2193 * removing all locale specific prefixes, group separators, and locale 2194 * specific suffixes, then mapping non-ASCII digits into ASCII 2195 * digits via {@link Character#digit Character.digit}, prepending a 2196 * negative sign (-) if the locale specific negative prefixes and suffixes 2197 * were present, and passing the resulting string to 2198 * {@link Long#parseLong(String, int) Long.parseLong} with the 2199 * specified radix. 2200 * 2201 * @param radix the radix used to interpret the token as an int value 2202 * @return the <tt>long</tt> scanned from the input 2203 * @throws InputMismatchException 2204 * if the next token does not match the <i>Integer</i> 2205 * regular expression, or is out of range 2206 * @throws NoSuchElementException if input is exhausted 2207 * @throws IllegalStateException if this scanner is closed 2208 */ 2209 public long nextLong(int radix) { 2210 // Check cached result 2211 if ((typeCache != null) && (typeCache instanceof Long) 2212 && this.radix == radix) { 2213 long val = ((Long)typeCache).longValue(); 2214 useTypeCache(); 2215 return val; 2216 } 2217 setRadix(radix); 2218 clearCaches(); 2219 try { 2220 String s = next(integerPattern()); 2221 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2222 s = processIntegerToken(s); 2223 return Long.parseLong(s, radix); 2224 } catch (NumberFormatException nfe) { 2225 position = matcher.start(); // don't skip bad token 2226 throw new InputMismatchException(nfe.getMessage()); 2227 } 2228 } 2229 2230 /** 2231 * The float token must be stripped of prefixes, group separators, 2232 * and suffixes, non ascii digits must be converted into ascii digits 2233 * before parseFloat will accept it. 2234 * 2235 * If there are non-ascii digits in the token these digits must 2236 * be processed before the token is passed to parseFloat. 2237 */ 2238 private String processFloatToken(String token) { 2239 String result = token.replaceAll(groupSeparator, ""); 2240 if (!decimalSeparator.equals("\\.")) 2241 result = result.replaceAll(decimalSeparator, "."); 2242 boolean isNegative = false; 2243 int preLen = negativePrefix.length(); 2244 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2245 isNegative = true; 2246 result = result.substring(preLen); 2247 } 2248 int sufLen = negativeSuffix.length(); 2249 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2250 isNegative = true; 2251 result = result.substring(result.length() - sufLen, 2252 result.length()); 2253 } 2254 if (result.equals(nanString)) 2255 result = "NaN"; 2256 if (result.equals(infinityString)) 2257 result = "Infinity"; 2258 if (isNegative) 2259 result = "-" + result; 2260 2261 // Translate non-ASCII digits 2262 Matcher m = NON_ASCII_DIGIT.matcher(result); 2263 if (m.find()) { 2264 StringBuilder inASCII = new StringBuilder(); 2265 for (int i=0; i<result.length(); i++) { 2266 char nextChar = result.charAt(i); 2267 if (Character.isDigit(nextChar)) { 2268 int d = Character.digit(nextChar, 10); 2269 if (d != -1) 2270 inASCII.append(d); 2271 else 2272 inASCII.append(nextChar); 2273 } else { 2274 inASCII.append(nextChar); 2275 } 2276 } 2277 result = inASCII.toString(); 2278 } 2279 2280 return result; 2281 } 2282 2283 /** 2284 * Returns true if the next token in this scanner's input can be 2285 * interpreted as a float value using the {@link #nextFloat} 2286 * method. The scanner does not advance past any input. 2287 * 2288 * @return true if and only if this scanner's next token is a valid 2289 * float value 2290 * @throws IllegalStateException if this scanner is closed 2291 */ 2292 public boolean hasNextFloat() { 2293 setRadix(10); 2294 boolean result = hasNext(floatPattern()); 2295 if (result) { // Cache it 2296 try { 2297 String s = processFloatToken(hasNextResult); 2298 typeCache = Float.valueOf(Float.parseFloat(s)); 2299 } catch (NumberFormatException nfe) { 2300 result = false; 2301 } 2302 } 2303 return result; 2304 } 2305 2306 /** 2307 * Scans the next token of the input as a <tt>float</tt>. 2308 * This method will throw <code>InputMismatchException</code> 2309 * if the next token cannot be translated into a valid float value as 2310 * described below. If the translation is successful, the scanner advances 2311 * past the input that matched. 2312 * 2313 * <p> If the next token matches the <a 2314 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2315 * then the token is converted into a <tt>float</tt> value as if by 2316 * removing all locale specific prefixes, group separators, and locale 2317 * specific suffixes, then mapping non-ASCII digits into ASCII 2318 * digits via {@link Character#digit Character.digit}, prepending a 2319 * negative sign (-) if the locale specific negative prefixes and suffixes 2320 * were present, and passing the resulting string to 2321 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2322 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2323 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2324 * appropriate. 2325 * 2326 * @return the <tt>float</tt> scanned from the input 2327 * @throws InputMismatchException 2328 * if the next token does not match the <i>Float</i> 2329 * regular expression, or is out of range 2330 * @throws NoSuchElementException if input is exhausted 2331 * @throws IllegalStateException if this scanner is closed 2332 */ 2333 public float nextFloat() { 2334 // Check cached result 2335 if ((typeCache != null) && (typeCache instanceof Float)) { 2336 float val = ((Float)typeCache).floatValue(); 2337 useTypeCache(); 2338 return val; 2339 } 2340 setRadix(10); 2341 clearCaches(); 2342 try { 2343 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2344 } catch (NumberFormatException nfe) { 2345 position = matcher.start(); // don't skip bad token 2346 throw new InputMismatchException(nfe.getMessage()); 2347 } 2348 } 2349 2350 /** 2351 * Returns true if the next token in this scanner's input can be 2352 * interpreted as a double value using the {@link #nextDouble} 2353 * method. The scanner does not advance past any input. 2354 * 2355 * @return true if and only if this scanner's next token is a valid 2356 * double value 2357 * @throws IllegalStateException if this scanner is closed 2358 */ 2359 public boolean hasNextDouble() { 2360 setRadix(10); 2361 boolean result = hasNext(floatPattern()); 2362 if (result) { // Cache it 2363 try { 2364 String s = processFloatToken(hasNextResult); 2365 typeCache = Double.valueOf(Double.parseDouble(s)); 2366 } catch (NumberFormatException nfe) { 2367 result = false; 2368 } 2369 } 2370 return result; 2371 } 2372 2373 /** 2374 * Scans the next token of the input as a <tt>double</tt>. 2375 * This method will throw <code>InputMismatchException</code> 2376 * if the next token cannot be translated into a valid double value. 2377 * If the translation is successful, the scanner advances past the input 2378 * that matched. 2379 * 2380 * <p> If the next token matches the <a 2381 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2382 * then the token is converted into a <tt>double</tt> value as if by 2383 * removing all locale specific prefixes, group separators, and locale 2384 * specific suffixes, then mapping non-ASCII digits into ASCII 2385 * digits via {@link Character#digit Character.digit}, prepending a 2386 * negative sign (-) if the locale specific negative prefixes and suffixes 2387 * were present, and passing the resulting string to 2388 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2389 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2390 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2391 * appropriate. 2392 * 2393 * @return the <tt>double</tt> scanned from the input 2394 * @throws InputMismatchException 2395 * if the next token does not match the <i>Float</i> 2396 * regular expression, or is out of range 2397 * @throws NoSuchElementException if the input is exhausted 2398 * @throws IllegalStateException if this scanner is closed 2399 */ 2400 public double nextDouble() { 2401 // Check cached result 2402 if ((typeCache != null) && (typeCache instanceof Double)) { 2403 double val = ((Double)typeCache).doubleValue(); 2404 useTypeCache(); 2405 return val; 2406 } 2407 setRadix(10); 2408 clearCaches(); 2409 // Search for next float 2410 try { 2411 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2412 } catch (NumberFormatException nfe) { 2413 position = matcher.start(); // don't skip bad token 2414 throw new InputMismatchException(nfe.getMessage()); 2415 } 2416 } 2417 2418 // Convenience methods for scanning multi precision numbers 2419 2420 /** 2421 * Returns true if the next token in this scanner's input can be 2422 * interpreted as a <code>BigInteger</code> in the default radix using the 2423 * {@link #nextBigInteger} method. The scanner does not advance past any 2424 * input. 2425 * 2426 * @return true if and only if this scanner's next token is a valid 2427 * <code>BigInteger</code> 2428 * @throws IllegalStateException if this scanner is closed 2429 */ 2430 public boolean hasNextBigInteger() { 2431 return hasNextBigInteger(defaultRadix); 2432 } 2433 2434 /** 2435 * Returns true if the next token in this scanner's input can be 2436 * interpreted as a <code>BigInteger</code> in the specified radix using 2437 * the {@link #nextBigInteger} method. The scanner does not advance past 2438 * any input. 2439 * 2440 * @param radix the radix used to interpret the token as an integer 2441 * @return true if and only if this scanner's next token is a valid 2442 * <code>BigInteger</code> 2443 * @throws IllegalStateException if this scanner is closed 2444 */ 2445 public boolean hasNextBigInteger(int radix) { 2446 setRadix(radix); 2447 boolean result = hasNext(integerPattern()); 2448 if (result) { // Cache it 2449 try { 2450 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2451 processIntegerToken(hasNextResult) : 2452 hasNextResult; 2453 typeCache = new BigInteger(s, radix); 2454 } catch (NumberFormatException nfe) { 2455 result = false; 2456 } 2457 } 2458 return result; 2459 } 2460 2461 /** 2462 * Scans the next token of the input as a {@link java.math.BigInteger 2463 * BigInteger}. 2464 * 2465 * <p> An invocation of this method of the form 2466 * <tt>nextBigInteger()</tt> behaves in exactly the same way as the 2467 * invocation <tt>nextBigInteger(radix)</tt>, where <code>radix</code> 2468 * is the default radix of this scanner. 2469 * 2470 * @return the <tt>BigInteger</tt> scanned from the input 2471 * @throws InputMismatchException 2472 * if the next token does not match the <i>Integer</i> 2473 * regular expression, or is out of range 2474 * @throws NoSuchElementException if the input is exhausted 2475 * @throws IllegalStateException if this scanner is closed 2476 */ 2477 public BigInteger nextBigInteger() { 2478 return nextBigInteger(defaultRadix); 2479 } 2480 2481 /** 2482 * Scans the next token of the input as a {@link java.math.BigInteger 2483 * BigInteger}. 2484 * 2485 * <p> If the next token matches the <a 2486 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2487 * above then the token is converted into a <tt>BigInteger</tt> value as if 2488 * by removing all group separators, mapping non-ASCII digits into ASCII 2489 * digits via the {@link Character#digit Character.digit}, and passing the 2490 * resulting string to the {@link 2491 * java.math.BigInteger#BigInteger(java.lang.String) 2492 * BigInteger(String, int)} constructor with the specified radix. 2493 * 2494 * @param radix the radix used to interpret the token 2495 * @return the <tt>BigInteger</tt> scanned from the input 2496 * @throws InputMismatchException 2497 * if the next token does not match the <i>Integer</i> 2498 * regular expression, or is out of range 2499 * @throws NoSuchElementException if the input is exhausted 2500 * @throws IllegalStateException if this scanner is closed 2501 */ 2502 public BigInteger nextBigInteger(int radix) { 2503 // Check cached result 2504 if ((typeCache != null) && (typeCache instanceof BigInteger) 2505 && this.radix == radix) { 2506 BigInteger val = (BigInteger)typeCache; 2507 useTypeCache(); 2508 return val; 2509 } 2510 setRadix(radix); 2511 clearCaches(); 2512 // Search for next int 2513 try { 2514 String s = next(integerPattern()); 2515 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2516 s = processIntegerToken(s); 2517 return new BigInteger(s, radix); 2518 } catch (NumberFormatException nfe) { 2519 position = matcher.start(); // don't skip bad token 2520 throw new InputMismatchException(nfe.getMessage()); 2521 } 2522 } 2523 2524 /** 2525 * Returns true if the next token in this scanner's input can be 2526 * interpreted as a <code>BigDecimal</code> using the 2527 * {@link #nextBigDecimal} method. The scanner does not advance past any 2528 * input. 2529 * 2530 * @return true if and only if this scanner's next token is a valid 2531 * <code>BigDecimal</code> 2532 * @throws IllegalStateException if this scanner is closed 2533 */ 2534 public boolean hasNextBigDecimal() { 2535 setRadix(10); 2536 boolean result = hasNext(decimalPattern()); 2537 if (result) { // Cache it 2538 try { 2539 String s = processFloatToken(hasNextResult); 2540 typeCache = new BigDecimal(s); 2541 } catch (NumberFormatException nfe) { 2542 result = false; 2543 } 2544 } 2545 return result; 2546 } 2547 2548 /** 2549 * Scans the next token of the input as a {@link java.math.BigDecimal 2550 * BigDecimal}. 2551 * 2552 * <p> If the next token matches the <a 2553 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2554 * above then the token is converted into a <tt>BigDecimal</tt> value as if 2555 * by removing all group separators, mapping non-ASCII digits into ASCII 2556 * digits via the {@link Character#digit Character.digit}, and passing the 2557 * resulting string to the {@link 2558 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2559 * constructor. 2560 * 2561 * @return the <tt>BigDecimal</tt> scanned from the input 2562 * @throws InputMismatchException 2563 * if the next token does not match the <i>Decimal</i> 2564 * regular expression, or is out of range 2565 * @throws NoSuchElementException if the input is exhausted 2566 * @throws IllegalStateException if this scanner is closed 2567 */ 2568 public BigDecimal nextBigDecimal() { 2569 // Check cached result 2570 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2571 BigDecimal val = (BigDecimal)typeCache; 2572 useTypeCache(); 2573 return val; 2574 } 2575 setRadix(10); 2576 clearCaches(); 2577 // Search for next float 2578 try { 2579 String s = processFloatToken(next(decimalPattern())); 2580 return new BigDecimal(s); 2581 } catch (NumberFormatException nfe) { 2582 position = matcher.start(); // don't skip bad token 2583 throw new InputMismatchException(nfe.getMessage()); 2584 } 2585 } 2586 2587 /** 2588 * Resets this scanner. 2589 * 2590 * <p> Resetting a scanner discards all of its explicit state 2591 * information which may have been changed by invocations of {@link 2592 * #useDelimiter}, {@link #useLocale}, or {@link #useRadix}. 2593 * 2594 * <p> An invocation of this method of the form 2595 * <tt>scanner.reset()</tt> behaves in exactly the same way as the 2596 * invocation 2597 * 2598 * <blockquote><pre>{@code 2599 * scanner.useDelimiter("\\p{javaWhitespace}+") 2600 * .useLocale(Locale.getDefault(Locale.Category.FORMAT)) 2601 * .useRadix(10); 2602 * }</pre></blockquote> 2603 * 2604 * @return this scanner 2605 * 2606 * @since 1.6 2607 */ 2608 public Scanner reset() { 2609 delimPattern = WHITESPACE_PATTERN; 2610 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 2611 useRadix(10); 2612 clearCaches(); 2613 return this; 2614 } 2615 }