1 /* 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.nio.file.Path; 29 import java.nio.file.Files; 30 import java.util.regex.*; 31 import java.io.*; 32 import java.math.*; 33 import java.nio.*; 34 import java.nio.channels.*; 35 import java.nio.charset.*; 36 import java.text.*; 37 import java.util.Locale; 38 39 import sun.misc.LRUCache; 40 41 /** 42 * A simple text scanner which can parse primitive types and strings using 43 * regular expressions. 44 * 45 * <p>A <code>Scanner</code> breaks its input into tokens using a 46 * delimiter pattern, which by default matches whitespace. The resulting 47 * tokens may then be converted into values of different types using the 48 * various <tt>next</tt> methods. 49 * 50 * <p>For example, this code allows a user to read a number from 51 * <tt>System.in</tt>: 52 * <blockquote><pre>{@code 53 * Scanner sc = new Scanner(System.in); 54 * int i = sc.nextInt(); 55 * }</pre></blockquote> 56 * 57 * <p>As another example, this code allows <code>long</code> types to be 58 * assigned from entries in a file <code>myNumbers</code>: 59 * <blockquote><pre>{@code 60 * Scanner sc = new Scanner(new File("myNumbers")); 61 * while (sc.hasNextLong()) { 62 * long aLong = sc.nextLong(); 63 * } 64 * }</pre></blockquote> 65 * 66 * <p>The scanner can also use delimiters other than whitespace. This 67 * example reads several items in from a string: 68 * <blockquote><pre>{@code 69 * String input = "1 fish 2 fish red fish blue fish"; 70 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 71 * System.out.println(s.nextInt()); 72 * System.out.println(s.nextInt()); 73 * System.out.println(s.next()); 74 * System.out.println(s.next()); 75 * s.close(); 76 * }</pre></blockquote> 77 * <p> 78 * prints the following output: 79 * <blockquote><pre>{@code 80 * 1 81 * 2 82 * red 83 * blue 84 * }</pre></blockquote> 85 * 86 * <p>The same output can be generated with this code, which uses a regular 87 * expression to parse all four tokens at once: 88 * <blockquote><pre>{@code 89 * String input = "1 fish 2 fish red fish blue fish"; 90 * Scanner s = new Scanner(input); 91 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 92 * MatchResult result = s.match(); 93 * for (int i=1; i<=result.groupCount(); i++) 94 * System.out.println(result.group(i)); 95 * s.close(); 96 * }</pre></blockquote> 97 * 98 * <p>The <a name="default-delimiter">default whitespace delimiter</a> used 99 * by a scanner is as recognized by {@link java.lang.Character}.{@link 100 * java.lang.Character#isWhitespace(char) isWhitespace}. The {@link #reset} 101 * method will reset the value of the scanner's delimiter to the default 102 * whitespace delimiter regardless of whether it was previously changed. 103 * 104 * <p>A scanning operation may block waiting for input. 105 * 106 * <p>The {@link #next} and {@link #hasNext} methods and their 107 * primitive-type companion methods (such as {@link #nextInt} and 108 * {@link #hasNextInt}) first skip any input that matches the delimiter 109 * pattern, and then attempt to return the next token. Both <tt>hasNext</tt> 110 * and <tt>next</tt> methods may block waiting for further input. Whether a 111 * <tt>hasNext</tt> method blocks has no connection to whether or not its 112 * associated <tt>next</tt> method will block. 113 * 114 * <p> The {@link #findInLine}, {@link #findWithinHorizon}, and {@link #skip} 115 * methods operate independently of the delimiter pattern. These methods will 116 * attempt to match the specified pattern with no regard to delimiters in the 117 * input and thus can be used in special circumstances where delimiters are 118 * not relevant. These methods may block waiting for more input. 119 * 120 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 121 * will not pass the token that caused the exception, so that it may be 122 * retrieved or skipped via some other method. 123 * 124 * <p>Depending upon the type of delimiting pattern, empty tokens may be 125 * returned. For example, the pattern <tt>"\\s+"</tt> will return no empty 126 * tokens since it matches multiple instances of the delimiter. The delimiting 127 * pattern <tt>"\\s"</tt> could return empty tokens since it only passes one 128 * space at a time. 129 * 130 * <p> A scanner can read text from any object which implements the {@link 131 * java.lang.Readable} interface. If an invocation of the underlying 132 * readable's {@link java.lang.Readable#read} method throws an {@link 133 * java.io.IOException} then the scanner assumes that the end of the input 134 * has been reached. The most recent <tt>IOException</tt> thrown by the 135 * underlying readable can be retrieved via the {@link #ioException} method. 136 * 137 * <p>When a <code>Scanner</code> is closed, it will close its input source 138 * if the source implements the {@link java.io.Closeable} interface. 139 * 140 * <p>A <code>Scanner</code> is not safe for multithreaded use without 141 * external synchronization. 142 * 143 * <p>Unless otherwise mentioned, passing a <code>null</code> parameter into 144 * any method of a <code>Scanner</code> will cause a 145 * <code>NullPointerException</code> to be thrown. 146 * 147 * <p>A scanner will default to interpreting numbers as decimal unless a 148 * different radix has been set by using the {@link #useRadix} method. The 149 * {@link #reset} method will reset the value of the scanner's radix to 150 * <code>10</code> regardless of whether it was previously changed. 151 * 152 * <h3> <a name="localized-numbers">Localized numbers</a> </h3> 153 * 154 * <p> An instance of this class is capable of scanning numbers in the standard 155 * formats as well as in the formats of the scanner's locale. A scanner's 156 * <a name="initial-locale">initial locale </a>is the value returned by the {@link 157 * java.util.Locale#getDefault(Locale.Category) 158 * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link 159 * #useLocale} method. The {@link #reset} method will reset the value of the 160 * scanner's locale to the initial locale regardless of whether it was 161 * previously changed. 162 * 163 * <p>The localized formats are defined in terms of the following parameters, 164 * which for a particular locale are taken from that locale's {@link 165 * java.text.DecimalFormat DecimalFormat} object, <tt>df</tt>, and its and 166 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 167 * <tt>dfs</tt>. 168 * 169 * <blockquote><dl> 170 * <dt><i>LocalGroupSeparator </i> 171 * <dd>The character used to separate thousands groups, 172 * <i>i.e.,</i> <tt>dfs.</tt>{@link 173 * java.text.DecimalFormatSymbols#getGroupingSeparator 174 * getGroupingSeparator()} 175 * <dt><i>LocalDecimalSeparator </i> 176 * <dd>The character used for the decimal point, 177 * <i>i.e.,</i> <tt>dfs.</tt>{@link 178 * java.text.DecimalFormatSymbols#getDecimalSeparator 179 * getDecimalSeparator()} 180 * <dt><i>LocalPositivePrefix </i> 181 * <dd>The string that appears before a positive number (may 182 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 183 * java.text.DecimalFormat#getPositivePrefix 184 * getPositivePrefix()} 185 * <dt><i>LocalPositiveSuffix </i> 186 * <dd>The string that appears after a positive number (may be 187 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 188 * java.text.DecimalFormat#getPositiveSuffix 189 * getPositiveSuffix()} 190 * <dt><i>LocalNegativePrefix </i> 191 * <dd>The string that appears before a negative number (may 192 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 193 * java.text.DecimalFormat#getNegativePrefix 194 * getNegativePrefix()} 195 * <dt><i>LocalNegativeSuffix </i> 196 * <dd>The string that appears after a negative number (may be 197 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 198 * java.text.DecimalFormat#getNegativeSuffix 199 * getNegativeSuffix()} 200 * <dt><i>LocalNaN </i> 201 * <dd>The string that represents not-a-number for 202 * floating-point values, 203 * <i>i.e.,</i> <tt>dfs.</tt>{@link 204 * java.text.DecimalFormatSymbols#getNaN 205 * getNaN()} 206 * <dt><i>LocalInfinity </i> 207 * <dd>The string that represents infinity for floating-point 208 * values, <i>i.e.,</i> <tt>dfs.</tt>{@link 209 * java.text.DecimalFormatSymbols#getInfinity 210 * getInfinity()} 211 * </dl></blockquote> 212 * 213 * <h4> <a name="number-syntax">Number syntax</a> </h4> 214 * 215 * <p> The strings that can be parsed as numbers by an instance of this class 216 * are specified in terms of the following regular-expression grammar, where 217 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 in base 10). 218 * 219 * <dl> 220 * <dt><i>NonAsciiDigit</i>: 221 * <dd>A non-ASCII character c for which 222 * {@link java.lang.Character#isDigit Character.isDigit}<tt>(c)</tt> 223 * returns true 224 * 225 * <dt><i>Non0Digit</i>: 226 * <dd><tt>[1-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i> 227 * 228 * <dt><i>Digit</i>: 229 * <dd><tt>[0-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i> 230 * 231 * <dt><i>GroupedNumeral</i>: 232 * <dd><tt>( </tt><i>Non0Digit</i> 233 * <i>Digit</i><tt>? 234 * </tt><i>Digit</i><tt>?</tt> 235 * <dd> <tt>( </tt><i>LocalGroupSeparator</i> 236 * <i>Digit</i> 237 * <i>Digit</i> 238 * <i>Digit</i><tt> )+ )</tt> 239 * 240 * <dt><i>Numeral</i>: 241 * <dd><tt>( ( </tt><i>Digit</i><tt>+ ) 242 * | </tt><i>GroupedNumeral</i><tt> )</tt> 243 * 244 * <dt><a name="Integer-regex"><i>Integer</i>:</a> 245 * <dd><tt>( [-+]? ( </tt><i>Numeral</i><tt> 246 * ) )</tt> 247 * <dd><tt>| </tt><i>LocalPositivePrefix</i> <i>Numeral</i> 248 * <i>LocalPositiveSuffix</i> 249 * <dd><tt>| </tt><i>LocalNegativePrefix</i> <i>Numeral</i> 250 * <i>LocalNegativeSuffix</i> 251 * 252 * <dt><i>DecimalNumeral</i>: 253 * <dd><i>Numeral</i> 254 * <dd><tt>| </tt><i>Numeral</i> 255 * <i>LocalDecimalSeparator</i> 256 * <i>Digit</i><tt>*</tt> 257 * <dd><tt>| </tt><i>LocalDecimalSeparator</i> 258 * <i>Digit</i><tt>+</tt> 259 * 260 * <dt><i>Exponent</i>: 261 * <dd><tt>( [eE] [+-]? </tt><i>Digit</i><tt>+ )</tt> 262 * 263 * <dt><a name="Decimal-regex"><i>Decimal</i>:</a> 264 * <dd><tt>( [-+]? </tt><i>DecimalNumeral</i> 265 * <i>Exponent</i><tt>? )</tt> 266 * <dd><tt>| </tt><i>LocalPositivePrefix</i> 267 * <i>DecimalNumeral</i> 268 * <i>LocalPositiveSuffix</i> 269 * <i>Exponent</i><tt>?</tt> 270 * <dd><tt>| </tt><i>LocalNegativePrefix</i> 271 * <i>DecimalNumeral</i> 272 * <i>LocalNegativeSuffix</i> 273 * <i>Exponent</i><tt>?</tt> 274 * 275 * <dt><i>HexFloat</i>: 276 * <dd><tt>[-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 277 * ([pP][-+]?[0-9]+)?</tt> 278 * 279 * <dt><i>NonNumber</i>: 280 * <dd><tt>NaN 281 * | </tt><i>LocalNan</i><tt> 282 * | Infinity 283 * | </tt><i>LocalInfinity</i> 284 * 285 * <dt><i>SignedNonNumber</i>: 286 * <dd><tt>( [-+]? </tt><i>NonNumber</i><tt> )</tt> 287 * <dd><tt>| </tt><i>LocalPositivePrefix</i> 288 * <i>NonNumber</i> 289 * <i>LocalPositiveSuffix</i> 290 * <dd><tt>| </tt><i>LocalNegativePrefix</i> 291 * <i>NonNumber</i> 292 * <i>LocalNegativeSuffix</i> 293 * 294 * <dt><a name="Float-regex"><i>Float</i></a>: 295 * <dd><i>Decimal</i> 296 * <tt>| </tt><i>HexFloat</i> 297 * <tt>| </tt><i>SignedNonNumber</i> 298 * 299 * </dl> 300 * <p>Whitespace is not significant in the above regular expressions. 301 * 302 * @since 1.5 303 */ 304 public final class Scanner implements Iterator<String>, Closeable { 305 306 // Internal buffer used to hold input 307 private CharBuffer buf; 308 309 // Size of internal character buffer 310 private static final int BUFFER_SIZE = 1024; // change to 1024; 311 312 // The index into the buffer currently held by the Scanner 313 private int position; 314 315 // Internal matcher used for finding delimiters 316 private Matcher matcher; 317 318 // Pattern used to delimit tokens 319 private Pattern delimPattern; 320 321 // Pattern found in last hasNext operation 322 private Pattern hasNextPattern; 323 324 // Position after last hasNext operation 325 private int hasNextPosition; 326 327 // Result after last hasNext operation 328 private String hasNextResult; 329 330 // The input source 331 private Readable source; 332 333 // Boolean is true if source is done 334 private boolean sourceClosed = false; 335 336 // Boolean indicating more input is required 337 private boolean needInput = false; 338 339 // Boolean indicating if a delim has been skipped this operation 340 private boolean skipped = false; 341 342 // A store of a position that the scanner may fall back to 343 private int savedScannerPosition = -1; 344 345 // A cache of the last primitive type scanned 346 private Object typeCache = null; 347 348 // Boolean indicating if a match result is available 349 private boolean matchValid = false; 350 351 // Boolean indicating if this scanner has been closed 352 private boolean closed = false; 353 354 // The current radix used by this scanner 355 private int radix = 10; 356 357 // The default radix for this scanner 358 private int defaultRadix = 10; 359 360 // The locale used by this scanner 361 private Locale locale = null; 362 363 // A cache of the last few recently used Patterns 364 private LRUCache<String,Pattern> patternCache = 365 new LRUCache<String,Pattern>(7) { 366 protected Pattern create(String s) { 367 return Pattern.compile(s); 368 } 369 protected boolean hasName(Pattern p, String s) { 370 return p.pattern().equals(s); 371 } 372 }; 373 374 // A holder of the last IOException encountered 375 private IOException lastException; 376 377 // A pattern for java whitespace 378 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 379 "\\p{javaWhitespace}+"); 380 381 // A pattern for any token 382 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 383 384 // A pattern for non-ASCII digits 385 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 386 "[\\p{javaDigit}&&[^0-9]]"); 387 388 // Fields and methods to support scanning primitive types 389 390 /** 391 * Locale dependent values used to scan numbers 392 */ 393 private String groupSeparator = "\\,"; 394 private String decimalSeparator = "\\."; 395 private String nanString = "NaN"; 396 private String infinityString = "Infinity"; 397 private String positivePrefix = ""; 398 private String negativePrefix = "\\-"; 399 private String positiveSuffix = ""; 400 private String negativeSuffix = ""; 401 402 /** 403 * Fields and an accessor method to match booleans 404 */ 405 private static volatile Pattern boolPattern; 406 private static final String BOOLEAN_PATTERN = "true|false"; 407 private static Pattern boolPattern() { 408 Pattern bp = boolPattern; 409 if (bp == null) 410 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 411 Pattern.CASE_INSENSITIVE); 412 return bp; 413 } 414 415 /** 416 * Fields and methods to match bytes, shorts, ints, and longs 417 */ 418 private Pattern integerPattern; 419 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 420 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 421 private int SIMPLE_GROUP_INDEX = 5; 422 private String buildIntegerPatternString() { 423 String radixDigits = digits.substring(0, radix); 424 // \\p{javaDigit} is not guaranteed to be appropriate 425 // here but what can we do? The final authority will be 426 // whatever parse method is invoked, so ultimately the 427 // Scanner will do the right thing 428 String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})"; 429 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 430 groupSeparator+digit+digit+digit+")+)"; 431 // digit++ is the possessive form which is necessary for reducing 432 // backtracking that would otherwise cause unacceptable performance 433 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 434 String javaStyleInteger = "([-+]?(" + numeral + "))"; 435 String negativeInteger = negativePrefix + numeral + negativeSuffix; 436 String positiveInteger = positivePrefix + numeral + positiveSuffix; 437 return "("+ javaStyleInteger + ")|(" + 438 positiveInteger + ")|(" + 439 negativeInteger + ")"; 440 } 441 private Pattern integerPattern() { 442 if (integerPattern == null) { 443 integerPattern = patternCache.forName(buildIntegerPatternString()); 444 } 445 return integerPattern; 446 } 447 448 /** 449 * Fields and an accessor method to match line separators 450 */ 451 private static volatile Pattern separatorPattern; 452 private static volatile Pattern linePattern; 453 private static final String LINE_SEPARATOR_PATTERN = 454 "\r\n|[\n\r\u2028\u2029\u0085]"; 455 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 456 457 private static Pattern separatorPattern() { 458 Pattern sp = separatorPattern; 459 if (sp == null) 460 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 461 return sp; 462 } 463 464 private static Pattern linePattern() { 465 Pattern lp = linePattern; 466 if (lp == null) 467 linePattern = lp = Pattern.compile(LINE_PATTERN); 468 return lp; 469 } 470 471 /** 472 * Fields and methods to match floats and doubles 473 */ 474 private Pattern floatPattern; 475 private Pattern decimalPattern; 476 private void buildFloatAndDecimalPattern() { 477 // \\p{javaDigit} may not be perfect, see above 478 String digit = "([0-9]|(\\p{javaDigit}))"; 479 String exponent = "([eE][+-]?"+digit+"+)?"; 480 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 481 groupSeparator+digit+digit+digit+")+)"; 482 // Once again digit++ is used for performance, as above 483 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 484 String decimalNumeral = "("+numeral+"|"+numeral + 485 decimalSeparator + digit + "*+|"+ decimalSeparator + 486 digit + "++)"; 487 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 488 infinityString+")"; 489 String positiveFloat = "(" + positivePrefix + decimalNumeral + 490 positiveSuffix + exponent + ")"; 491 String negativeFloat = "(" + negativePrefix + decimalNumeral + 492 negativeSuffix + exponent + ")"; 493 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 494 positiveFloat + "|" + negativeFloat + ")"; 495 String hexFloat = 496 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 497 String positiveNonNumber = "(" + positivePrefix + nonNumber + 498 positiveSuffix + ")"; 499 String negativeNonNumber = "(" + negativePrefix + nonNumber + 500 negativeSuffix + ")"; 501 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 502 positiveNonNumber + "|" + 503 negativeNonNumber + ")"; 504 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 505 signedNonNumber); 506 decimalPattern = Pattern.compile(decimal); 507 } 508 private Pattern floatPattern() { 509 if (floatPattern == null) { 510 buildFloatAndDecimalPattern(); 511 } 512 return floatPattern; 513 } 514 private Pattern decimalPattern() { 515 if (decimalPattern == null) { 516 buildFloatAndDecimalPattern(); 517 } 518 return decimalPattern; 519 } 520 521 // Constructors 522 523 /** 524 * Constructs a <code>Scanner</code> that returns values scanned 525 * from the specified source delimited by the specified pattern. 526 * 527 * @param source A character source implementing the Readable interface 528 * @param pattern A delimiting pattern 529 */ 530 private Scanner(Readable source, Pattern pattern) { 531 assert source != null : "source should not be null"; 532 assert pattern != null : "pattern should not be null"; 533 this.source = source; 534 delimPattern = pattern; 535 buf = CharBuffer.allocate(BUFFER_SIZE); 536 buf.limit(0); 537 matcher = delimPattern.matcher(buf); 538 matcher.useTransparentBounds(true); 539 matcher.useAnchoringBounds(false); 540 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 541 } 542 543 /** 544 * Constructs a new <code>Scanner</code> that produces values scanned 545 * from the specified source. 546 * 547 * @param source A character source implementing the {@link Readable} 548 * interface 549 */ 550 public Scanner(Readable source) { 551 this(Objects.requireNonNull(source, "source"), WHITESPACE_PATTERN); 552 } 553 554 /** 555 * Constructs a new <code>Scanner</code> that produces values scanned 556 * from the specified input stream. Bytes from the stream are converted 557 * into characters using the underlying platform's 558 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 559 * 560 * @param source An input stream to be scanned 561 */ 562 public Scanner(InputStream source) { 563 this(new InputStreamReader(source), WHITESPACE_PATTERN); 564 } 565 566 /** 567 * Constructs a new <code>Scanner</code> that produces values scanned 568 * from the specified input stream. Bytes from the stream are converted 569 * into characters using the specified charset. 570 * 571 * @param source An input stream to be scanned 572 * @param charsetName The encoding type used to convert bytes from the 573 * stream into characters to be scanned 574 * @throws IllegalArgumentException if the specified character set 575 * does not exist 576 */ 577 public Scanner(InputStream source, String charsetName) { 578 this(makeReadable(Objects.requireNonNull(source, "source"), toCharset(charsetName)), 579 WHITESPACE_PATTERN); 580 } 581 582 /** 583 * Returns a charset object for the given charset name. 584 * @throws NullPointerException is csn is null 585 * @throws IllegalArgumentException if the charset is not supported 586 */ 587 private static Charset toCharset(String csn) { 588 Objects.requireNonNull(csn, "charsetName"); 589 try { 590 return Charset.forName(csn); 591 } catch (IllegalCharsetNameException|UnsupportedCharsetException e) { 592 // IllegalArgumentException should be thrown 593 throw new IllegalArgumentException(e); 594 } 595 } 596 597 private static Readable makeReadable(InputStream source, Charset charset) { 598 return new InputStreamReader(source, charset); 599 } 600 601 /** 602 * Constructs a new <code>Scanner</code> that produces values scanned 603 * from the specified file. Bytes from the file are converted into 604 * characters using the underlying platform's 605 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 606 * 607 * @param source A file to be scanned 608 * @throws FileNotFoundException if source is not found 609 */ 610 public Scanner(File source) throws FileNotFoundException { 611 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 612 } 613 614 /** 615 * Constructs a new <code>Scanner</code> that produces values scanned 616 * from the specified file. Bytes from the file are converted into 617 * characters using the specified charset. 618 * 619 * @param source A file to be scanned 620 * @param charsetName The encoding type used to convert bytes from the file 621 * into characters to be scanned 622 * @throws FileNotFoundException if source is not found 623 * @throws IllegalArgumentException if the specified encoding is 624 * not found 625 */ 626 public Scanner(File source, String charsetName) 627 throws FileNotFoundException 628 { 629 this(Objects.requireNonNull(source), toDecoder(charsetName)); 630 } 631 632 private Scanner(File source, CharsetDecoder dec) 633 throws FileNotFoundException 634 { 635 this(makeReadable((ReadableByteChannel)(new FileInputStream(source).getChannel()), dec)); 636 } 637 638 private static CharsetDecoder toDecoder(String charsetName) { 639 Objects.requireNonNull(charsetName, "charsetName"); 640 try { 641 return Charset.forName(charsetName).newDecoder(); 642 } catch (IllegalCharsetNameException|UnsupportedCharsetException unused) { 643 throw new IllegalArgumentException(charsetName); 644 } 645 } 646 647 private static Readable makeReadable(ReadableByteChannel source, 648 CharsetDecoder dec) { 649 return Channels.newReader(source, dec, -1); 650 } 651 652 /** 653 * Constructs a new <code>Scanner</code> that produces values scanned 654 * from the specified file. Bytes from the file are converted into 655 * characters using the underlying platform's 656 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 657 * 658 * @param source 659 * the path to the file to be scanned 660 * @throws IOException 661 * if an I/O error occurs opening source 662 * 663 * @since 1.7 664 */ 665 public Scanner(Path source) 666 throws IOException 667 { 668 this(Files.newInputStream(source)); 669 } 670 671 /** 672 * Constructs a new <code>Scanner</code> that produces values scanned 673 * from the specified file. Bytes from the file are converted into 674 * characters using the specified charset. 675 * 676 * @param source 677 * the path to the file to be scanned 678 * @param charsetName 679 * The encoding type used to convert bytes from the file 680 * into characters to be scanned 681 * @throws IOException 682 * if an I/O error occurs opening source 683 * @throws IllegalArgumentException 684 * if the specified encoding is not found 685 * @since 1.7 686 */ 687 public Scanner(Path source, String charsetName) throws IOException { 688 this(Objects.requireNonNull(source), toCharset(charsetName)); 689 } 690 691 private Scanner(Path source, Charset charset) throws IOException { 692 this(makeReadable(Files.newInputStream(source), charset)); 693 } 694 695 /** 696 * Constructs a new <code>Scanner</code> that produces values scanned 697 * from the specified string. 698 * 699 * @param source A string to scan 700 */ 701 public Scanner(String source) { 702 this(new StringReader(source), WHITESPACE_PATTERN); 703 } 704 705 /** 706 * Constructs a new <code>Scanner</code> that produces values scanned 707 * from the specified channel. Bytes from the source are converted into 708 * characters using the underlying platform's 709 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 710 * 711 * @param source A channel to scan 712 */ 713 public Scanner(ReadableByteChannel source) { 714 this(makeReadable(Objects.requireNonNull(source, "source")), 715 WHITESPACE_PATTERN); 716 } 717 718 private static Readable makeReadable(ReadableByteChannel source) { 719 return makeReadable(source, Charset.defaultCharset().newDecoder()); 720 } 721 722 /** 723 * Constructs a new <code>Scanner</code> that produces values scanned 724 * from the specified channel. Bytes from the source are converted into 725 * characters using the specified charset. 726 * 727 * @param source A channel to scan 728 * @param charsetName The encoding type used to convert bytes from the 729 * channel into characters to be scanned 730 * @throws IllegalArgumentException if the specified character set 731 * does not exist 732 */ 733 public Scanner(ReadableByteChannel source, String charsetName) { 734 this(makeReadable(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), 735 WHITESPACE_PATTERN); 736 } 737 738 // Private primitives used to support scanning 739 740 private void saveState() { 741 savedScannerPosition = position; 742 } 743 744 private void revertState() { 745 this.position = savedScannerPosition; 746 savedScannerPosition = -1; 747 skipped = false; 748 } 749 750 private boolean revertState(boolean b) { 751 this.position = savedScannerPosition; 752 savedScannerPosition = -1; 753 skipped = false; 754 return b; 755 } 756 757 private void cacheResult() { 758 hasNextResult = matcher.group(); 759 hasNextPosition = matcher.end(); 760 hasNextPattern = matcher.pattern(); 761 } 762 763 private void cacheResult(String result) { 764 hasNextResult = result; 765 hasNextPosition = matcher.end(); 766 hasNextPattern = matcher.pattern(); 767 } 768 769 // Clears both regular cache and type cache 770 private void clearCaches() { 771 hasNextPattern = null; 772 typeCache = null; 773 } 774 775 // Also clears both the regular cache and the type cache 776 private String getCachedResult() { 777 position = hasNextPosition; 778 hasNextPattern = null; 779 typeCache = null; 780 return hasNextResult; 781 } 782 783 // Also clears both the regular cache and the type cache 784 private void useTypeCache() { 785 if (closed) 786 throw new IllegalStateException("Scanner closed"); 787 position = hasNextPosition; 788 hasNextPattern = null; 789 typeCache = null; 790 } 791 792 // Tries to read more input. May block. 793 private void readInput() { 794 if (buf.limit() == buf.capacity()) 795 makeSpace(); 796 797 // Prepare to receive data 798 int p = buf.position(); 799 buf.position(buf.limit()); 800 buf.limit(buf.capacity()); 801 802 int n = 0; 803 try { 804 n = source.read(buf); 805 } catch (IOException ioe) { 806 lastException = ioe; 807 n = -1; 808 } 809 810 if (n == -1) { 811 sourceClosed = true; 812 needInput = false; 813 } 814 815 if (n > 0) 816 needInput = false; 817 818 // Restore current position and limit for reading 819 buf.limit(buf.position()); 820 buf.position(p); 821 } 822 823 // After this method is called there will either be an exception 824 // or else there will be space in the buffer 825 private boolean makeSpace() { 826 clearCaches(); 827 int offset = savedScannerPosition == -1 ? 828 position : savedScannerPosition; 829 buf.position(offset); 830 // Gain space by compacting buffer 831 if (offset > 0) { 832 buf.compact(); 833 translateSavedIndexes(offset); 834 position -= offset; 835 buf.flip(); 836 return true; 837 } 838 // Gain space by growing buffer 839 int newSize = buf.capacity() * 2; 840 CharBuffer newBuf = CharBuffer.allocate(newSize); 841 newBuf.put(buf); 842 newBuf.flip(); 843 translateSavedIndexes(offset); 844 position -= offset; 845 buf = newBuf; 846 matcher.reset(buf); 847 return true; 848 } 849 850 // When a buffer compaction/reallocation occurs the saved indexes must 851 // be modified appropriately 852 private void translateSavedIndexes(int offset) { 853 if (savedScannerPosition != -1) 854 savedScannerPosition -= offset; 855 } 856 857 // If we are at the end of input then NoSuchElement; 858 // If there is still input left then InputMismatch 859 private void throwFor() { 860 skipped = false; 861 if ((sourceClosed) && (position == buf.limit())) 862 throw new NoSuchElementException(); 863 else 864 throw new InputMismatchException(); 865 } 866 867 // Returns true if a complete token or partial token is in the buffer. 868 // It is not necessary to find a complete token since a partial token 869 // means that there will be another token with or without more input. 870 private boolean hasTokenInBuffer() { 871 matchValid = false; 872 matcher.usePattern(delimPattern); 873 matcher.region(position, buf.limit()); 874 875 // Skip delims first 876 if (matcher.lookingAt()) 877 position = matcher.end(); 878 879 // If we are sitting at the end, no more tokens in buffer 880 if (position == buf.limit()) 881 return false; 882 883 return true; 884 } 885 886 /* 887 * Returns a "complete token" that matches the specified pattern 888 * 889 * A token is complete if surrounded by delims; a partial token 890 * is prefixed by delims but not postfixed by them 891 * 892 * The position is advanced to the end of that complete token 893 * 894 * Pattern == null means accept any token at all 895 * 896 * Triple return: 897 * 1. valid string means it was found 898 * 2. null with needInput=false means we won't ever find it 899 * 3. null with needInput=true means try again after readInput 900 */ 901 private String getCompleteTokenInBuffer(Pattern pattern) { 902 matchValid = false; 903 904 // Skip delims first 905 matcher.usePattern(delimPattern); 906 if (!skipped) { // Enforcing only one skip of leading delims 907 matcher.region(position, buf.limit()); 908 if (matcher.lookingAt()) { 909 // If more input could extend the delimiters then we must wait 910 // for more input 911 if (matcher.hitEnd() && !sourceClosed) { 912 needInput = true; 913 return null; 914 } 915 // The delims were whole and the matcher should skip them 916 skipped = true; 917 position = matcher.end(); 918 } 919 } 920 921 // If we are sitting at the end, no more tokens in buffer 922 if (position == buf.limit()) { 923 if (sourceClosed) 924 return null; 925 needInput = true; 926 return null; 927 } 928 929 // Must look for next delims. Simply attempting to match the 930 // pattern at this point may find a match but it might not be 931 // the first longest match because of missing input, or it might 932 // match a partial token instead of the whole thing. 933 934 // Then look for next delims 935 matcher.region(position, buf.limit()); 936 boolean foundNextDelim = matcher.find(); 937 if (foundNextDelim && (matcher.end() == position)) { 938 // Zero length delimiter match; we should find the next one 939 // using the automatic advance past a zero length match; 940 // Otherwise we have just found the same one we just skipped 941 foundNextDelim = matcher.find(); 942 } 943 if (foundNextDelim) { 944 // In the rare case that more input could cause the match 945 // to be lost and there is more input coming we must wait 946 // for more input. Note that hitting the end is okay as long 947 // as the match cannot go away. It is the beginning of the 948 // next delims we want to be sure about, we don't care if 949 // they potentially extend further. 950 if (matcher.requireEnd() && !sourceClosed) { 951 needInput = true; 952 return null; 953 } 954 int tokenEnd = matcher.start(); 955 // There is a complete token. 956 if (pattern == null) { 957 // Must continue with match to provide valid MatchResult 958 pattern = FIND_ANY_PATTERN; 959 } 960 // Attempt to match against the desired pattern 961 matcher.usePattern(pattern); 962 matcher.region(position, tokenEnd); 963 if (matcher.matches()) { 964 String s = matcher.group(); 965 position = matcher.end(); 966 return s; 967 } else { // Complete token but it does not match 968 return null; 969 } 970 } 971 972 // If we can't find the next delims but no more input is coming, 973 // then we can treat the remainder as a whole token 974 if (sourceClosed) { 975 if (pattern == null) { 976 // Must continue with match to provide valid MatchResult 977 pattern = FIND_ANY_PATTERN; 978 } 979 // Last token; Match the pattern here or throw 980 matcher.usePattern(pattern); 981 matcher.region(position, buf.limit()); 982 if (matcher.matches()) { 983 String s = matcher.group(); 984 position = matcher.end(); 985 return s; 986 } 987 // Last piece does not match 988 return null; 989 } 990 991 // There is a partial token in the buffer; must read more 992 // to complete it 993 needInput = true; 994 return null; 995 } 996 997 // Finds the specified pattern in the buffer up to horizon. 998 // Returns a match for the specified input pattern. 999 private String findPatternInBuffer(Pattern pattern, int horizon) { 1000 matchValid = false; 1001 matcher.usePattern(pattern); 1002 int bufferLimit = buf.limit(); 1003 int horizonLimit = -1; 1004 int searchLimit = bufferLimit; 1005 if (horizon > 0) { 1006 horizonLimit = position + horizon; 1007 if (horizonLimit < bufferLimit) 1008 searchLimit = horizonLimit; 1009 } 1010 matcher.region(position, searchLimit); 1011 if (matcher.find()) { 1012 if (matcher.hitEnd() && (!sourceClosed)) { 1013 // The match may be longer if didn't hit horizon or real end 1014 if (searchLimit != horizonLimit) { 1015 // Hit an artificial end; try to extend the match 1016 needInput = true; 1017 return null; 1018 } 1019 // The match could go away depending on what is next 1020 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1021 // Rare case: we hit the end of input and it happens 1022 // that it is at the horizon and the end of input is 1023 // required for the match. 1024 needInput = true; 1025 return null; 1026 } 1027 } 1028 // Did not hit end, or hit real end, or hit horizon 1029 position = matcher.end(); 1030 return matcher.group(); 1031 } 1032 1033 if (sourceClosed) 1034 return null; 1035 1036 // If there is no specified horizon, or if we have not searched 1037 // to the specified horizon yet, get more input 1038 if ((horizon == 0) || (searchLimit != horizonLimit)) 1039 needInput = true; 1040 return null; 1041 } 1042 1043 // Returns a match for the specified input pattern anchored at 1044 // the current position 1045 private String matchPatternInBuffer(Pattern pattern) { 1046 matchValid = false; 1047 matcher.usePattern(pattern); 1048 matcher.region(position, buf.limit()); 1049 if (matcher.lookingAt()) { 1050 if (matcher.hitEnd() && (!sourceClosed)) { 1051 // Get more input and try again 1052 needInput = true; 1053 return null; 1054 } 1055 position = matcher.end(); 1056 return matcher.group(); 1057 } 1058 1059 if (sourceClosed) 1060 return null; 1061 1062 // Read more to find pattern 1063 needInput = true; 1064 return null; 1065 } 1066 1067 // Throws if the scanner is closed 1068 private void ensureOpen() { 1069 if (closed) 1070 throw new IllegalStateException("Scanner closed"); 1071 } 1072 1073 // Public methods 1074 1075 /** 1076 * Closes this scanner. 1077 * 1078 * <p> If this scanner has not yet been closed then if its underlying 1079 * {@linkplain java.lang.Readable readable} also implements the {@link 1080 * java.io.Closeable} interface then the readable's <tt>close</tt> method 1081 * will be invoked. If this scanner is already closed then invoking this 1082 * method will have no effect. 1083 * 1084 * <p>Attempting to perform search operations after a scanner has 1085 * been closed will result in an {@link IllegalStateException}. 1086 * 1087 */ 1088 public void close() { 1089 if (closed) 1090 return; 1091 if (source instanceof Closeable) { 1092 try { 1093 ((Closeable)source).close(); 1094 } catch (IOException ioe) { 1095 lastException = ioe; 1096 } 1097 } 1098 sourceClosed = true; 1099 source = null; 1100 closed = true; 1101 } 1102 1103 /** 1104 * Returns the <code>IOException</code> last thrown by this 1105 * <code>Scanner</code>'s underlying <code>Readable</code>. This method 1106 * returns <code>null</code> if no such exception exists. 1107 * 1108 * @return the last exception thrown by this scanner's readable 1109 */ 1110 public IOException ioException() { 1111 return lastException; 1112 } 1113 1114 /** 1115 * Returns the <code>Pattern</code> this <code>Scanner</code> is currently 1116 * using to match delimiters. 1117 * 1118 * @return this scanner's delimiting pattern. 1119 */ 1120 public Pattern delimiter() { 1121 return delimPattern; 1122 } 1123 1124 /** 1125 * Sets this scanner's delimiting pattern to the specified pattern. 1126 * 1127 * @param pattern A delimiting pattern 1128 * @return this scanner 1129 */ 1130 public Scanner useDelimiter(Pattern pattern) { 1131 delimPattern = pattern; 1132 return this; 1133 } 1134 1135 /** 1136 * Sets this scanner's delimiting pattern to a pattern constructed from 1137 * the specified <code>String</code>. 1138 * 1139 * <p> An invocation of this method of the form 1140 * <tt>useDelimiter(pattern)</tt> behaves in exactly the same way as the 1141 * invocation <tt>useDelimiter(Pattern.compile(pattern))</tt>. 1142 * 1143 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1144 * to the <a href= "#default-delimiter">default</a>. 1145 * 1146 * @param pattern A string specifying a delimiting pattern 1147 * @return this scanner 1148 */ 1149 public Scanner useDelimiter(String pattern) { 1150 delimPattern = patternCache.forName(pattern); 1151 return this; 1152 } 1153 1154 /** 1155 * Returns this scanner's locale. 1156 * 1157 * <p>A scanner's locale affects many elements of its default 1158 * primitive matching regular expressions; see 1159 * <a href= "#localized-numbers">localized numbers</a> above. 1160 * 1161 * @return this scanner's locale 1162 */ 1163 public Locale locale() { 1164 return this.locale; 1165 } 1166 1167 /** 1168 * Sets this scanner's locale to the specified locale. 1169 * 1170 * <p>A scanner's locale affects many elements of its default 1171 * primitive matching regular expressions; see 1172 * <a href= "#localized-numbers">localized numbers</a> above. 1173 * 1174 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1175 * the <a href= "#initial-locale">initial locale</a>. 1176 * 1177 * @param locale A string specifying the locale to use 1178 * @return this scanner 1179 */ 1180 public Scanner useLocale(Locale locale) { 1181 if (locale.equals(this.locale)) 1182 return this; 1183 1184 this.locale = locale; 1185 DecimalFormat df = 1186 (DecimalFormat)NumberFormat.getNumberInstance(locale); 1187 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1188 1189 // These must be literalized to avoid collision with regex 1190 // metacharacters such as dot or parenthesis 1191 groupSeparator = "\\" + dfs.getGroupingSeparator(); 1192 decimalSeparator = "\\" + dfs.getDecimalSeparator(); 1193 1194 // Quoting the nonzero length locale-specific things 1195 // to avoid potential conflict with metacharacters 1196 nanString = "\\Q" + dfs.getNaN() + "\\E"; 1197 infinityString = "\\Q" + dfs.getInfinity() + "\\E"; 1198 positivePrefix = df.getPositivePrefix(); 1199 if (positivePrefix.length() > 0) 1200 positivePrefix = "\\Q" + positivePrefix + "\\E"; 1201 negativePrefix = df.getNegativePrefix(); 1202 if (negativePrefix.length() > 0) 1203 negativePrefix = "\\Q" + negativePrefix + "\\E"; 1204 positiveSuffix = df.getPositiveSuffix(); 1205 if (positiveSuffix.length() > 0) 1206 positiveSuffix = "\\Q" + positiveSuffix + "\\E"; 1207 negativeSuffix = df.getNegativeSuffix(); 1208 if (negativeSuffix.length() > 0) 1209 negativeSuffix = "\\Q" + negativeSuffix + "\\E"; 1210 1211 // Force rebuilding and recompilation of locale dependent 1212 // primitive patterns 1213 integerPattern = null; 1214 floatPattern = null; 1215 1216 return this; 1217 } 1218 1219 /** 1220 * Returns this scanner's default radix. 1221 * 1222 * <p>A scanner's radix affects elements of its default 1223 * number matching regular expressions; see 1224 * <a href= "#localized-numbers">localized numbers</a> above. 1225 * 1226 * @return the default radix of this scanner 1227 */ 1228 public int radix() { 1229 return this.defaultRadix; 1230 } 1231 1232 /** 1233 * Sets this scanner's default radix to the specified radix. 1234 * 1235 * <p>A scanner's radix affects elements of its default 1236 * number matching regular expressions; see 1237 * <a href= "#localized-numbers">localized numbers</a> above. 1238 * 1239 * <p>If the radix is less than <code>Character.MIN_RADIX</code> 1240 * or greater than <code>Character.MAX_RADIX</code>, then an 1241 * <code>IllegalArgumentException</code> is thrown. 1242 * 1243 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1244 * <code>10</code>. 1245 * 1246 * @param radix The radix to use when scanning numbers 1247 * @return this scanner 1248 * @throws IllegalArgumentException if radix is out of range 1249 */ 1250 public Scanner useRadix(int radix) { 1251 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1252 throw new IllegalArgumentException("radix:"+radix); 1253 1254 if (this.defaultRadix == radix) 1255 return this; 1256 this.defaultRadix = radix; 1257 // Force rebuilding and recompilation of radix dependent patterns 1258 integerPattern = null; 1259 return this; 1260 } 1261 1262 // The next operation should occur in the specified radix but 1263 // the default is left untouched. 1264 private void setRadix(int radix) { 1265 if (this.radix != radix) { 1266 // Force rebuilding and recompilation of radix dependent patterns 1267 integerPattern = null; 1268 this.radix = radix; 1269 } 1270 } 1271 1272 /** 1273 * Returns the match result of the last scanning operation performed 1274 * by this scanner. This method throws <code>IllegalStateException</code> 1275 * if no match has been performed, or if the last match was 1276 * not successful. 1277 * 1278 * <p>The various <code>next</code>methods of <code>Scanner</code> 1279 * make a match result available if they complete without throwing an 1280 * exception. For instance, after an invocation of the {@link #nextInt} 1281 * method that returned an int, this method returns a 1282 * <code>MatchResult</code> for the search of the 1283 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1284 * defined above. Similarly the {@link #findInLine}, 1285 * {@link #findWithinHorizon}, and {@link #skip} methods will make a 1286 * match available if they succeed. 1287 * 1288 * @return a match result for the last match operation 1289 * @throws IllegalStateException If no match result is available 1290 */ 1291 public MatchResult match() { 1292 if (!matchValid) 1293 throw new IllegalStateException("No match result available"); 1294 return matcher.toMatchResult(); 1295 } 1296 1297 /** 1298 * <p>Returns the string representation of this <code>Scanner</code>. The 1299 * string representation of a <code>Scanner</code> contains information 1300 * that may be useful for debugging. The exact format is unspecified. 1301 * 1302 * @return The string representation of this scanner 1303 */ 1304 public String toString() { 1305 StringBuilder sb = new StringBuilder(); 1306 sb.append("java.util.Scanner"); 1307 sb.append("[delimiters=").append(delimPattern).append(']'); 1308 sb.append("[position=").append(position).append(']'); 1309 sb.append("[match valid=").append(matchValid).append(']'); 1310 sb.append("[need input=").append(needInput).append(']'); 1311 sb.append("[source closed=").append(sourceClosed).append(']'); 1312 sb.append("[skipped=").append(skipped).append(']'); 1313 sb.append("[group separator=").append(groupSeparator).append(']'); 1314 sb.append("[decimal separator=").append(decimalSeparator).append(']'); 1315 sb.append("[positive prefix=").append(positivePrefix).append(']'); 1316 sb.append("[negative prefix=").append(negativePrefix).append(']'); 1317 sb.append("[positive suffix=").append(positiveSuffix).append(']'); 1318 sb.append("[negative suffix=").append(negativeSuffix).append(']'); 1319 sb.append("[NaN string=").append(nanString).append(']'); 1320 sb.append("[infinity string=").append(infinityString).append(']'); 1321 return sb.toString(); 1322 } 1323 1324 /** 1325 * Returns true if this scanner has another token in its input. 1326 * This method may block while waiting for input to scan. 1327 * The scanner does not advance past any input. 1328 * 1329 * @return true if and only if this scanner has another token 1330 * @throws IllegalStateException if this scanner is closed 1331 * @see java.util.Iterator 1332 */ 1333 public boolean hasNext() { 1334 ensureOpen(); 1335 saveState(); 1336 while (!sourceClosed) { 1337 if (hasTokenInBuffer()) 1338 return revertState(true); 1339 readInput(); 1340 } 1341 boolean result = hasTokenInBuffer(); 1342 return revertState(result); 1343 } 1344 1345 /** 1346 * Finds and returns the next complete token from this scanner. 1347 * A complete token is preceded and followed by input that matches 1348 * the delimiter pattern. This method may block while waiting for input 1349 * to scan, even if a previous invocation of {@link #hasNext} returned 1350 * <code>true</code>. 1351 * 1352 * @return the next token 1353 * @throws NoSuchElementException if no more tokens are available 1354 * @throws IllegalStateException if this scanner is closed 1355 * @see java.util.Iterator 1356 */ 1357 public String next() { 1358 ensureOpen(); 1359 clearCaches(); 1360 1361 while (true) { 1362 String token = getCompleteTokenInBuffer(null); 1363 if (token != null) { 1364 matchValid = true; 1365 skipped = false; 1366 return token; 1367 } 1368 if (needInput) 1369 readInput(); 1370 else 1371 throwFor(); 1372 } 1373 } 1374 1375 /** 1376 * The remove operation is not supported by this implementation of 1377 * <code>Iterator</code>. 1378 * 1379 * @throws UnsupportedOperationException if this method is invoked. 1380 * @see java.util.Iterator 1381 */ 1382 public void remove() { 1383 throw new UnsupportedOperationException(); 1384 } 1385 1386 /** 1387 * Returns true if the next token matches the pattern constructed from the 1388 * specified string. The scanner does not advance past any input. 1389 * 1390 * <p> An invocation of this method of the form <tt>hasNext(pattern)</tt> 1391 * behaves in exactly the same way as the invocation 1392 * <tt>hasNext(Pattern.compile(pattern))</tt>. 1393 * 1394 * @param pattern a string specifying the pattern to scan 1395 * @return true if and only if this scanner has another token matching 1396 * the specified pattern 1397 * @throws IllegalStateException if this scanner is closed 1398 */ 1399 public boolean hasNext(String pattern) { 1400 return hasNext(patternCache.forName(pattern)); 1401 } 1402 1403 /** 1404 * Returns the next token if it matches the pattern constructed from the 1405 * specified string. If the match is successful, the scanner advances 1406 * past the input that matched the pattern. 1407 * 1408 * <p> An invocation of this method of the form <tt>next(pattern)</tt> 1409 * behaves in exactly the same way as the invocation 1410 * <tt>next(Pattern.compile(pattern))</tt>. 1411 * 1412 * @param pattern a string specifying the pattern to scan 1413 * @return the next token 1414 * @throws NoSuchElementException if no such tokens are available 1415 * @throws IllegalStateException if this scanner is closed 1416 */ 1417 public String next(String pattern) { 1418 return next(patternCache.forName(pattern)); 1419 } 1420 1421 /** 1422 * Returns true if the next complete token matches the specified pattern. 1423 * A complete token is prefixed and postfixed by input that matches 1424 * the delimiter pattern. This method may block while waiting for input. 1425 * The scanner does not advance past any input. 1426 * 1427 * @param pattern the pattern to scan for 1428 * @return true if and only if this scanner has another token matching 1429 * the specified pattern 1430 * @throws IllegalStateException if this scanner is closed 1431 */ 1432 public boolean hasNext(Pattern pattern) { 1433 ensureOpen(); 1434 if (pattern == null) 1435 throw new NullPointerException(); 1436 hasNextPattern = null; 1437 saveState(); 1438 1439 while (true) { 1440 if (getCompleteTokenInBuffer(pattern) != null) { 1441 matchValid = true; 1442 cacheResult(); 1443 return revertState(true); 1444 } 1445 if (needInput) 1446 readInput(); 1447 else 1448 return revertState(false); 1449 } 1450 } 1451 1452 /** 1453 * Returns the next token if it matches the specified pattern. This 1454 * method may block while waiting for input to scan, even if a previous 1455 * invocation of {@link #hasNext(Pattern)} returned <code>true</code>. 1456 * If the match is successful, the scanner advances past the input that 1457 * matched the pattern. 1458 * 1459 * @param pattern the pattern to scan for 1460 * @return the next token 1461 * @throws NoSuchElementException if no more tokens are available 1462 * @throws IllegalStateException if this scanner is closed 1463 */ 1464 public String next(Pattern pattern) { 1465 ensureOpen(); 1466 if (pattern == null) 1467 throw new NullPointerException(); 1468 1469 // Did we already find this pattern? 1470 if (hasNextPattern == pattern) 1471 return getCachedResult(); 1472 clearCaches(); 1473 1474 // Search for the pattern 1475 while (true) { 1476 String token = getCompleteTokenInBuffer(pattern); 1477 if (token != null) { 1478 matchValid = true; 1479 skipped = false; 1480 return token; 1481 } 1482 if (needInput) 1483 readInput(); 1484 else 1485 throwFor(); 1486 } 1487 } 1488 1489 /** 1490 * Returns true if there is another line in the input of this scanner. 1491 * This method may block while waiting for input. The scanner does not 1492 * advance past any input. 1493 * 1494 * @return true if and only if this scanner has another line of input 1495 * @throws IllegalStateException if this scanner is closed 1496 */ 1497 public boolean hasNextLine() { 1498 saveState(); 1499 1500 String result = findWithinHorizon(linePattern(), 0); 1501 if (result != null) { 1502 MatchResult mr = this.match(); 1503 String lineSep = mr.group(1); 1504 if (lineSep != null) { 1505 result = result.substring(0, result.length() - 1506 lineSep.length()); 1507 cacheResult(result); 1508 1509 } else { 1510 cacheResult(); 1511 } 1512 } 1513 revertState(); 1514 return (result != null); 1515 } 1516 1517 /** 1518 * Advances this scanner past the current line and returns the input 1519 * that was skipped. 1520 * 1521 * This method returns the rest of the current line, excluding any line 1522 * separator at the end. The position is set to the beginning of the next 1523 * line. 1524 * 1525 * <p>Since this method continues to search through the input looking 1526 * for a line separator, it may buffer all of the input searching for 1527 * the line to skip if no line separators are present. 1528 * 1529 * @return the line that was skipped 1530 * @throws NoSuchElementException if no line was found 1531 * @throws IllegalStateException if this scanner is closed 1532 */ 1533 public String nextLine() { 1534 if (hasNextPattern == linePattern()) 1535 return getCachedResult(); 1536 clearCaches(); 1537 1538 String result = findWithinHorizon(linePattern, 0); 1539 if (result == null) 1540 throw new NoSuchElementException("No line found"); 1541 MatchResult mr = this.match(); 1542 String lineSep = mr.group(1); 1543 if (lineSep != null) 1544 result = result.substring(0, result.length() - lineSep.length()); 1545 if (result == null) 1546 throw new NoSuchElementException(); 1547 else 1548 return result; 1549 } 1550 1551 // Public methods that ignore delimiters 1552 1553 /** 1554 * Attempts to find the next occurrence of a pattern constructed from the 1555 * specified string, ignoring delimiters. 1556 * 1557 * <p>An invocation of this method of the form <tt>findInLine(pattern)</tt> 1558 * behaves in exactly the same way as the invocation 1559 * <tt>findInLine(Pattern.compile(pattern))</tt>. 1560 * 1561 * @param pattern a string specifying the pattern to search for 1562 * @return the text that matched the specified pattern 1563 * @throws IllegalStateException if this scanner is closed 1564 */ 1565 public String findInLine(String pattern) { 1566 return findInLine(patternCache.forName(pattern)); 1567 } 1568 1569 /** 1570 * Attempts to find the next occurrence of the specified pattern ignoring 1571 * delimiters. If the pattern is found before the next line separator, the 1572 * scanner advances past the input that matched and returns the string that 1573 * matched the pattern. 1574 * If no such pattern is detected in the input up to the next line 1575 * separator, then <code>null</code> is returned and the scanner's 1576 * position is unchanged. This method may block waiting for input that 1577 * matches the pattern. 1578 * 1579 * <p>Since this method continues to search through the input looking 1580 * for the specified pattern, it may buffer all of the input searching for 1581 * the desired token if no line separators are present. 1582 * 1583 * @param pattern the pattern to scan for 1584 * @return the text that matched the specified pattern 1585 * @throws IllegalStateException if this scanner is closed 1586 */ 1587 public String findInLine(Pattern pattern) { 1588 ensureOpen(); 1589 if (pattern == null) 1590 throw new NullPointerException(); 1591 clearCaches(); 1592 // Expand buffer to include the next newline or end of input 1593 int endPosition = 0; 1594 saveState(); 1595 while (true) { 1596 String token = findPatternInBuffer(separatorPattern(), 0); 1597 if (token != null) { 1598 endPosition = matcher.start(); 1599 break; // up to next newline 1600 } 1601 if (needInput) { 1602 readInput(); 1603 } else { 1604 endPosition = buf.limit(); 1605 break; // up to end of input 1606 } 1607 } 1608 revertState(); 1609 int horizonForLine = endPosition - position; 1610 // If there is nothing between the current pos and the next 1611 // newline simply return null, invoking findWithinHorizon 1612 // with "horizon=0" will scan beyond the line bound. 1613 if (horizonForLine == 0) 1614 return null; 1615 // Search for the pattern 1616 return findWithinHorizon(pattern, horizonForLine); 1617 } 1618 1619 /** 1620 * Attempts to find the next occurrence of a pattern constructed from the 1621 * specified string, ignoring delimiters. 1622 * 1623 * <p>An invocation of this method of the form 1624 * <tt>findWithinHorizon(pattern)</tt> behaves in exactly the same way as 1625 * the invocation 1626 * <tt>findWithinHorizon(Pattern.compile(pattern, horizon))</tt>. 1627 * 1628 * @param pattern a string specifying the pattern to search for 1629 * @param horizon the search horizon 1630 * @return the text that matched the specified pattern 1631 * @throws IllegalStateException if this scanner is closed 1632 * @throws IllegalArgumentException if horizon is negative 1633 */ 1634 public String findWithinHorizon(String pattern, int horizon) { 1635 return findWithinHorizon(patternCache.forName(pattern), horizon); 1636 } 1637 1638 /** 1639 * Attempts to find the next occurrence of the specified pattern. 1640 * 1641 * <p>This method searches through the input up to the specified 1642 * search horizon, ignoring delimiters. If the pattern is found the 1643 * scanner advances past the input that matched and returns the string 1644 * that matched the pattern. If no such pattern is detected then the 1645 * null is returned and the scanner's position remains unchanged. This 1646 * method may block waiting for input that matches the pattern. 1647 * 1648 * <p>A scanner will never search more than <code>horizon</code> code 1649 * points beyond its current position. Note that a match may be clipped 1650 * by the horizon; that is, an arbitrary match result may have been 1651 * different if the horizon had been larger. The scanner treats the 1652 * horizon as a transparent, non-anchoring bound (see {@link 1653 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1654 * 1655 * <p>If horizon is <code>0</code>, then the horizon is ignored and 1656 * this method continues to search through the input looking for the 1657 * specified pattern without bound. In this case it may buffer all of 1658 * the input searching for the pattern. 1659 * 1660 * <p>If horizon is negative, then an IllegalArgumentException is 1661 * thrown. 1662 * 1663 * @param pattern the pattern to scan for 1664 * @param horizon the search horizon 1665 * @return the text that matched the specified pattern 1666 * @throws IllegalStateException if this scanner is closed 1667 * @throws IllegalArgumentException if horizon is negative 1668 */ 1669 public String findWithinHorizon(Pattern pattern, int horizon) { 1670 ensureOpen(); 1671 if (pattern == null) 1672 throw new NullPointerException(); 1673 if (horizon < 0) 1674 throw new IllegalArgumentException("horizon < 0"); 1675 clearCaches(); 1676 1677 // Search for the pattern 1678 while (true) { 1679 String token = findPatternInBuffer(pattern, horizon); 1680 if (token != null) { 1681 matchValid = true; 1682 return token; 1683 } 1684 if (needInput) 1685 readInput(); 1686 else 1687 break; // up to end of input 1688 } 1689 return null; 1690 } 1691 1692 /** 1693 * Skips input that matches the specified pattern, ignoring delimiters. 1694 * This method will skip input if an anchored match of the specified 1695 * pattern succeeds. 1696 * 1697 * <p>If a match to the specified pattern is not found at the 1698 * current position, then no input is skipped and a 1699 * <tt>NoSuchElementException</tt> is thrown. 1700 * 1701 * <p>Since this method seeks to match the specified pattern starting at 1702 * the scanner's current position, patterns that can match a lot of 1703 * input (".*", for example) may cause the scanner to buffer a large 1704 * amount of input. 1705 * 1706 * <p>Note that it is possible to skip something without risking a 1707 * <code>NoSuchElementException</code> by using a pattern that can 1708 * match nothing, e.g., <code>sc.skip("[ \t]*")</code>. 1709 * 1710 * @param pattern a string specifying the pattern to skip over 1711 * @return this scanner 1712 * @throws NoSuchElementException if the specified pattern is not found 1713 * @throws IllegalStateException if this scanner is closed 1714 */ 1715 public Scanner skip(Pattern pattern) { 1716 ensureOpen(); 1717 if (pattern == null) 1718 throw new NullPointerException(); 1719 clearCaches(); 1720 1721 // Search for the pattern 1722 while (true) { 1723 String token = matchPatternInBuffer(pattern); 1724 if (token != null) { 1725 matchValid = true; 1726 position = matcher.end(); 1727 return this; 1728 } 1729 if (needInput) 1730 readInput(); 1731 else 1732 throw new NoSuchElementException(); 1733 } 1734 } 1735 1736 /** 1737 * Skips input that matches a pattern constructed from the specified 1738 * string. 1739 * 1740 * <p> An invocation of this method of the form <tt>skip(pattern)</tt> 1741 * behaves in exactly the same way as the invocation 1742 * <tt>skip(Pattern.compile(pattern))</tt>. 1743 * 1744 * @param pattern a string specifying the pattern to skip over 1745 * @return this scanner 1746 * @throws IllegalStateException if this scanner is closed 1747 */ 1748 public Scanner skip(String pattern) { 1749 return skip(patternCache.forName(pattern)); 1750 } 1751 1752 // Convenience methods for scanning primitives 1753 1754 /** 1755 * Returns true if the next token in this scanner's input can be 1756 * interpreted as a boolean value using a case insensitive pattern 1757 * created from the string "true|false". The scanner does not 1758 * advance past the input that matched. 1759 * 1760 * @return true if and only if this scanner's next token is a valid 1761 * boolean value 1762 * @throws IllegalStateException if this scanner is closed 1763 */ 1764 public boolean hasNextBoolean() { 1765 return hasNext(boolPattern()); 1766 } 1767 1768 /** 1769 * Scans the next token of the input into a boolean value and returns 1770 * that value. This method will throw <code>InputMismatchException</code> 1771 * if the next token cannot be translated into a valid boolean value. 1772 * If the match is successful, the scanner advances past the input that 1773 * matched. 1774 * 1775 * @return the boolean scanned from the input 1776 * @throws InputMismatchException if the next token is not a valid boolean 1777 * @throws NoSuchElementException if input is exhausted 1778 * @throws IllegalStateException if this scanner is closed 1779 */ 1780 public boolean nextBoolean() { 1781 clearCaches(); 1782 return Boolean.parseBoolean(next(boolPattern())); 1783 } 1784 1785 /** 1786 * Returns true if the next token in this scanner's input can be 1787 * interpreted as a byte value in the default radix using the 1788 * {@link #nextByte} method. The scanner does not advance past any input. 1789 * 1790 * @return true if and only if this scanner's next token is a valid 1791 * byte value 1792 * @throws IllegalStateException if this scanner is closed 1793 */ 1794 public boolean hasNextByte() { 1795 return hasNextByte(defaultRadix); 1796 } 1797 1798 /** 1799 * Returns true if the next token in this scanner's input can be 1800 * interpreted as a byte value in the specified radix using the 1801 * {@link #nextByte} method. The scanner does not advance past any input. 1802 * 1803 * @param radix the radix used to interpret the token as a byte value 1804 * @return true if and only if this scanner's next token is a valid 1805 * byte value 1806 * @throws IllegalStateException if this scanner is closed 1807 */ 1808 public boolean hasNextByte(int radix) { 1809 setRadix(radix); 1810 boolean result = hasNext(integerPattern()); 1811 if (result) { // Cache it 1812 try { 1813 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1814 processIntegerToken(hasNextResult) : 1815 hasNextResult; 1816 typeCache = Byte.parseByte(s, radix); 1817 } catch (NumberFormatException nfe) { 1818 result = false; 1819 } 1820 } 1821 return result; 1822 } 1823 1824 /** 1825 * Scans the next token of the input as a <tt>byte</tt>. 1826 * 1827 * <p> An invocation of this method of the form 1828 * <tt>nextByte()</tt> behaves in exactly the same way as the 1829 * invocation <tt>nextByte(radix)</tt>, where <code>radix</code> 1830 * is the default radix of this scanner. 1831 * 1832 * @return the <tt>byte</tt> scanned from the input 1833 * @throws InputMismatchException 1834 * if the next token does not match the <i>Integer</i> 1835 * regular expression, or is out of range 1836 * @throws NoSuchElementException if input is exhausted 1837 * @throws IllegalStateException if this scanner is closed 1838 */ 1839 public byte nextByte() { 1840 return nextByte(defaultRadix); 1841 } 1842 1843 /** 1844 * Scans the next token of the input as a <tt>byte</tt>. 1845 * This method will throw <code>InputMismatchException</code> 1846 * if the next token cannot be translated into a valid byte value as 1847 * described below. If the translation is successful, the scanner advances 1848 * past the input that matched. 1849 * 1850 * <p> If the next token matches the <a 1851 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1852 * above then the token is converted into a <tt>byte</tt> value as if by 1853 * removing all locale specific prefixes, group separators, and locale 1854 * specific suffixes, then mapping non-ASCII digits into ASCII 1855 * digits via {@link Character#digit Character.digit}, prepending a 1856 * negative sign (-) if the locale specific negative prefixes and suffixes 1857 * were present, and passing the resulting string to 1858 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1859 * specified radix. 1860 * 1861 * @param radix the radix used to interpret the token as a byte value 1862 * @return the <tt>byte</tt> scanned from the input 1863 * @throws InputMismatchException 1864 * if the next token does not match the <i>Integer</i> 1865 * regular expression, or is out of range 1866 * @throws NoSuchElementException if input is exhausted 1867 * @throws IllegalStateException if this scanner is closed 1868 */ 1869 public byte nextByte(int radix) { 1870 // Check cached result 1871 if ((typeCache != null) && (typeCache instanceof Byte) 1872 && this.radix == radix) { 1873 byte val = ((Byte)typeCache).byteValue(); 1874 useTypeCache(); 1875 return val; 1876 } 1877 setRadix(radix); 1878 clearCaches(); 1879 // Search for next byte 1880 try { 1881 String s = next(integerPattern()); 1882 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1883 s = processIntegerToken(s); 1884 return Byte.parseByte(s, radix); 1885 } catch (NumberFormatException nfe) { 1886 position = matcher.start(); // don't skip bad token 1887 throw new InputMismatchException(nfe.getMessage()); 1888 } 1889 } 1890 1891 /** 1892 * Returns true if the next token in this scanner's input can be 1893 * interpreted as a short value in the default radix using the 1894 * {@link #nextShort} method. The scanner does not advance past any input. 1895 * 1896 * @return true if and only if this scanner's next token is a valid 1897 * short value in the default radix 1898 * @throws IllegalStateException if this scanner is closed 1899 */ 1900 public boolean hasNextShort() { 1901 return hasNextShort(defaultRadix); 1902 } 1903 1904 /** 1905 * Returns true if the next token in this scanner's input can be 1906 * interpreted as a short value in the specified radix using the 1907 * {@link #nextShort} method. The scanner does not advance past any input. 1908 * 1909 * @param radix the radix used to interpret the token as a short value 1910 * @return true if and only if this scanner's next token is a valid 1911 * short value in the specified radix 1912 * @throws IllegalStateException if this scanner is closed 1913 */ 1914 public boolean hasNextShort(int radix) { 1915 setRadix(radix); 1916 boolean result = hasNext(integerPattern()); 1917 if (result) { // Cache it 1918 try { 1919 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1920 processIntegerToken(hasNextResult) : 1921 hasNextResult; 1922 typeCache = Short.parseShort(s, radix); 1923 } catch (NumberFormatException nfe) { 1924 result = false; 1925 } 1926 } 1927 return result; 1928 } 1929 1930 /** 1931 * Scans the next token of the input as a <tt>short</tt>. 1932 * 1933 * <p> An invocation of this method of the form 1934 * <tt>nextShort()</tt> behaves in exactly the same way as the 1935 * invocation <tt>nextShort(radix)</tt>, where <code>radix</code> 1936 * is the default radix of this scanner. 1937 * 1938 * @return the <tt>short</tt> scanned from the input 1939 * @throws InputMismatchException 1940 * if the next token does not match the <i>Integer</i> 1941 * regular expression, or is out of range 1942 * @throws NoSuchElementException if input is exhausted 1943 * @throws IllegalStateException if this scanner is closed 1944 */ 1945 public short nextShort() { 1946 return nextShort(defaultRadix); 1947 } 1948 1949 /** 1950 * Scans the next token of the input as a <tt>short</tt>. 1951 * This method will throw <code>InputMismatchException</code> 1952 * if the next token cannot be translated into a valid short value as 1953 * described below. If the translation is successful, the scanner advances 1954 * past the input that matched. 1955 * 1956 * <p> If the next token matches the <a 1957 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1958 * above then the token is converted into a <tt>short</tt> value as if by 1959 * removing all locale specific prefixes, group separators, and locale 1960 * specific suffixes, then mapping non-ASCII digits into ASCII 1961 * digits via {@link Character#digit Character.digit}, prepending a 1962 * negative sign (-) if the locale specific negative prefixes and suffixes 1963 * were present, and passing the resulting string to 1964 * {@link Short#parseShort(String, int) Short.parseShort} with the 1965 * specified radix. 1966 * 1967 * @param radix the radix used to interpret the token as a short value 1968 * @return the <tt>short</tt> scanned from the input 1969 * @throws InputMismatchException 1970 * if the next token does not match the <i>Integer</i> 1971 * regular expression, or is out of range 1972 * @throws NoSuchElementException if input is exhausted 1973 * @throws IllegalStateException if this scanner is closed 1974 */ 1975 public short nextShort(int radix) { 1976 // Check cached result 1977 if ((typeCache != null) && (typeCache instanceof Short) 1978 && this.radix == radix) { 1979 short val = ((Short)typeCache).shortValue(); 1980 useTypeCache(); 1981 return val; 1982 } 1983 setRadix(radix); 1984 clearCaches(); 1985 // Search for next short 1986 try { 1987 String s = next(integerPattern()); 1988 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1989 s = processIntegerToken(s); 1990 return Short.parseShort(s, radix); 1991 } catch (NumberFormatException nfe) { 1992 position = matcher.start(); // don't skip bad token 1993 throw new InputMismatchException(nfe.getMessage()); 1994 } 1995 } 1996 1997 /** 1998 * Returns true if the next token in this scanner's input can be 1999 * interpreted as an int value in the default radix using the 2000 * {@link #nextInt} method. The scanner does not advance past any input. 2001 * 2002 * @return true if and only if this scanner's next token is a valid 2003 * int value 2004 * @throws IllegalStateException if this scanner is closed 2005 */ 2006 public boolean hasNextInt() { 2007 return hasNextInt(defaultRadix); 2008 } 2009 2010 /** 2011 * Returns true if the next token in this scanner's input can be 2012 * interpreted as an int value in the specified radix using the 2013 * {@link #nextInt} method. The scanner does not advance past any input. 2014 * 2015 * @param radix the radix used to interpret the token as an int value 2016 * @return true if and only if this scanner's next token is a valid 2017 * int value 2018 * @throws IllegalStateException if this scanner is closed 2019 */ 2020 public boolean hasNextInt(int radix) { 2021 setRadix(radix); 2022 boolean result = hasNext(integerPattern()); 2023 if (result) { // Cache it 2024 try { 2025 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2026 processIntegerToken(hasNextResult) : 2027 hasNextResult; 2028 typeCache = Integer.parseInt(s, radix); 2029 } catch (NumberFormatException nfe) { 2030 result = false; 2031 } 2032 } 2033 return result; 2034 } 2035 2036 /** 2037 * The integer token must be stripped of prefixes, group separators, 2038 * and suffixes, non ascii digits must be converted into ascii digits 2039 * before parse will accept it. 2040 */ 2041 private String processIntegerToken(String token) { 2042 String result = token.replaceAll(""+groupSeparator, ""); 2043 boolean isNegative = false; 2044 int preLen = negativePrefix.length(); 2045 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2046 isNegative = true; 2047 result = result.substring(preLen); 2048 } 2049 int sufLen = negativeSuffix.length(); 2050 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2051 isNegative = true; 2052 result = result.substring(result.length() - sufLen, 2053 result.length()); 2054 } 2055 if (isNegative) 2056 result = "-" + result; 2057 return result; 2058 } 2059 2060 /** 2061 * Scans the next token of the input as an <tt>int</tt>. 2062 * 2063 * <p> An invocation of this method of the form 2064 * <tt>nextInt()</tt> behaves in exactly the same way as the 2065 * invocation <tt>nextInt(radix)</tt>, where <code>radix</code> 2066 * is the default radix of this scanner. 2067 * 2068 * @return the <tt>int</tt> scanned from the input 2069 * @throws InputMismatchException 2070 * if the next token does not match the <i>Integer</i> 2071 * regular expression, or is out of range 2072 * @throws NoSuchElementException if input is exhausted 2073 * @throws IllegalStateException if this scanner is closed 2074 */ 2075 public int nextInt() { 2076 return nextInt(defaultRadix); 2077 } 2078 2079 /** 2080 * Scans the next token of the input as an <tt>int</tt>. 2081 * This method will throw <code>InputMismatchException</code> 2082 * if the next token cannot be translated into a valid int value as 2083 * described below. If the translation is successful, the scanner advances 2084 * past the input that matched. 2085 * 2086 * <p> If the next token matches the <a 2087 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2088 * above then the token is converted into an <tt>int</tt> value as if by 2089 * removing all locale specific prefixes, group separators, and locale 2090 * specific suffixes, then mapping non-ASCII digits into ASCII 2091 * digits via {@link Character#digit Character.digit}, prepending a 2092 * negative sign (-) if the locale specific negative prefixes and suffixes 2093 * were present, and passing the resulting string to 2094 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2095 * specified radix. 2096 * 2097 * @param radix the radix used to interpret the token as an int value 2098 * @return the <tt>int</tt> scanned from the input 2099 * @throws InputMismatchException 2100 * if the next token does not match the <i>Integer</i> 2101 * regular expression, or is out of range 2102 * @throws NoSuchElementException if input is exhausted 2103 * @throws IllegalStateException if this scanner is closed 2104 */ 2105 public int nextInt(int radix) { 2106 // Check cached result 2107 if ((typeCache != null) && (typeCache instanceof Integer) 2108 && this.radix == radix) { 2109 int val = ((Integer)typeCache).intValue(); 2110 useTypeCache(); 2111 return val; 2112 } 2113 setRadix(radix); 2114 clearCaches(); 2115 // Search for next int 2116 try { 2117 String s = next(integerPattern()); 2118 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2119 s = processIntegerToken(s); 2120 return Integer.parseInt(s, radix); 2121 } catch (NumberFormatException nfe) { 2122 position = matcher.start(); // don't skip bad token 2123 throw new InputMismatchException(nfe.getMessage()); 2124 } 2125 } 2126 2127 /** 2128 * Returns true if the next token in this scanner's input can be 2129 * interpreted as a long value in the default radix using the 2130 * {@link #nextLong} method. The scanner does not advance past any input. 2131 * 2132 * @return true if and only if this scanner's next token is a valid 2133 * long value 2134 * @throws IllegalStateException if this scanner is closed 2135 */ 2136 public boolean hasNextLong() { 2137 return hasNextLong(defaultRadix); 2138 } 2139 2140 /** 2141 * Returns true if the next token in this scanner's input can be 2142 * interpreted as a long value in the specified radix using the 2143 * {@link #nextLong} method. The scanner does not advance past any input. 2144 * 2145 * @param radix the radix used to interpret the token as a long value 2146 * @return true if and only if this scanner's next token is a valid 2147 * long value 2148 * @throws IllegalStateException if this scanner is closed 2149 */ 2150 public boolean hasNextLong(int radix) { 2151 setRadix(radix); 2152 boolean result = hasNext(integerPattern()); 2153 if (result) { // Cache it 2154 try { 2155 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2156 processIntegerToken(hasNextResult) : 2157 hasNextResult; 2158 typeCache = Long.parseLong(s, radix); 2159 } catch (NumberFormatException nfe) { 2160 result = false; 2161 } 2162 } 2163 return result; 2164 } 2165 2166 /** 2167 * Scans the next token of the input as a <tt>long</tt>. 2168 * 2169 * <p> An invocation of this method of the form 2170 * <tt>nextLong()</tt> behaves in exactly the same way as the 2171 * invocation <tt>nextLong(radix)</tt>, where <code>radix</code> 2172 * is the default radix of this scanner. 2173 * 2174 * @return the <tt>long</tt> scanned from the input 2175 * @throws InputMismatchException 2176 * if the next token does not match the <i>Integer</i> 2177 * regular expression, or is out of range 2178 * @throws NoSuchElementException if input is exhausted 2179 * @throws IllegalStateException if this scanner is closed 2180 */ 2181 public long nextLong() { 2182 return nextLong(defaultRadix); 2183 } 2184 2185 /** 2186 * Scans the next token of the input as a <tt>long</tt>. 2187 * This method will throw <code>InputMismatchException</code> 2188 * if the next token cannot be translated into a valid long value as 2189 * described below. If the translation is successful, the scanner advances 2190 * past the input that matched. 2191 * 2192 * <p> If the next token matches the <a 2193 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2194 * above then the token is converted into a <tt>long</tt> value as if by 2195 * removing all locale specific prefixes, group separators, and locale 2196 * specific suffixes, then mapping non-ASCII digits into ASCII 2197 * digits via {@link Character#digit Character.digit}, prepending a 2198 * negative sign (-) if the locale specific negative prefixes and suffixes 2199 * were present, and passing the resulting string to 2200 * {@link Long#parseLong(String, int) Long.parseLong} with the 2201 * specified radix. 2202 * 2203 * @param radix the radix used to interpret the token as an int value 2204 * @return the <tt>long</tt> scanned from the input 2205 * @throws InputMismatchException 2206 * if the next token does not match the <i>Integer</i> 2207 * regular expression, or is out of range 2208 * @throws NoSuchElementException if input is exhausted 2209 * @throws IllegalStateException if this scanner is closed 2210 */ 2211 public long nextLong(int radix) { 2212 // Check cached result 2213 if ((typeCache != null) && (typeCache instanceof Long) 2214 && this.radix == radix) { 2215 long val = ((Long)typeCache).longValue(); 2216 useTypeCache(); 2217 return val; 2218 } 2219 setRadix(radix); 2220 clearCaches(); 2221 try { 2222 String s = next(integerPattern()); 2223 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2224 s = processIntegerToken(s); 2225 return Long.parseLong(s, radix); 2226 } catch (NumberFormatException nfe) { 2227 position = matcher.start(); // don't skip bad token 2228 throw new InputMismatchException(nfe.getMessage()); 2229 } 2230 } 2231 2232 /** 2233 * The float token must be stripped of prefixes, group separators, 2234 * and suffixes, non ascii digits must be converted into ascii digits 2235 * before parseFloat will accept it. 2236 * 2237 * If there are non-ascii digits in the token these digits must 2238 * be processed before the token is passed to parseFloat. 2239 */ 2240 private String processFloatToken(String token) { 2241 String result = token.replaceAll(groupSeparator, ""); 2242 if (!decimalSeparator.equals("\\.")) 2243 result = result.replaceAll(decimalSeparator, "."); 2244 boolean isNegative = false; 2245 int preLen = negativePrefix.length(); 2246 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2247 isNegative = true; 2248 result = result.substring(preLen); 2249 } 2250 int sufLen = negativeSuffix.length(); 2251 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2252 isNegative = true; 2253 result = result.substring(result.length() - sufLen, 2254 result.length()); 2255 } 2256 if (result.equals(nanString)) 2257 result = "NaN"; 2258 if (result.equals(infinityString)) 2259 result = "Infinity"; 2260 if (isNegative) 2261 result = "-" + result; 2262 2263 // Translate non-ASCII digits 2264 Matcher m = NON_ASCII_DIGIT.matcher(result); 2265 if (m.find()) { 2266 StringBuilder inASCII = new StringBuilder(); 2267 for (int i=0; i<result.length(); i++) { 2268 char nextChar = result.charAt(i); 2269 if (Character.isDigit(nextChar)) { 2270 int d = Character.digit(nextChar, 10); 2271 if (d != -1) 2272 inASCII.append(d); 2273 else 2274 inASCII.append(nextChar); 2275 } else { 2276 inASCII.append(nextChar); 2277 } 2278 } 2279 result = inASCII.toString(); 2280 } 2281 2282 return result; 2283 } 2284 2285 /** 2286 * Returns true if the next token in this scanner's input can be 2287 * interpreted as a float value using the {@link #nextFloat} 2288 * method. The scanner does not advance past any input. 2289 * 2290 * @return true if and only if this scanner's next token is a valid 2291 * float value 2292 * @throws IllegalStateException if this scanner is closed 2293 */ 2294 public boolean hasNextFloat() { 2295 setRadix(10); 2296 boolean result = hasNext(floatPattern()); 2297 if (result) { // Cache it 2298 try { 2299 String s = processFloatToken(hasNextResult); 2300 typeCache = Float.valueOf(Float.parseFloat(s)); 2301 } catch (NumberFormatException nfe) { 2302 result = false; 2303 } 2304 } 2305 return result; 2306 } 2307 2308 /** 2309 * Scans the next token of the input as a <tt>float</tt>. 2310 * This method will throw <code>InputMismatchException</code> 2311 * if the next token cannot be translated into a valid float value as 2312 * described below. If the translation is successful, the scanner advances 2313 * past the input that matched. 2314 * 2315 * <p> If the next token matches the <a 2316 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2317 * then the token is converted into a <tt>float</tt> value as if by 2318 * removing all locale specific prefixes, group separators, and locale 2319 * specific suffixes, then mapping non-ASCII digits into ASCII 2320 * digits via {@link Character#digit Character.digit}, prepending a 2321 * negative sign (-) if the locale specific negative prefixes and suffixes 2322 * were present, and passing the resulting string to 2323 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2324 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2325 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2326 * appropriate. 2327 * 2328 * @return the <tt>float</tt> scanned from the input 2329 * @throws InputMismatchException 2330 * if the next token does not match the <i>Float</i> 2331 * regular expression, or is out of range 2332 * @throws NoSuchElementException if input is exhausted 2333 * @throws IllegalStateException if this scanner is closed 2334 */ 2335 public float nextFloat() { 2336 // Check cached result 2337 if ((typeCache != null) && (typeCache instanceof Float)) { 2338 float val = ((Float)typeCache).floatValue(); 2339 useTypeCache(); 2340 return val; 2341 } 2342 setRadix(10); 2343 clearCaches(); 2344 try { 2345 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2346 } catch (NumberFormatException nfe) { 2347 position = matcher.start(); // don't skip bad token 2348 throw new InputMismatchException(nfe.getMessage()); 2349 } 2350 } 2351 2352 /** 2353 * Returns true if the next token in this scanner's input can be 2354 * interpreted as a double value using the {@link #nextDouble} 2355 * method. The scanner does not advance past any input. 2356 * 2357 * @return true if and only if this scanner's next token is a valid 2358 * double value 2359 * @throws IllegalStateException if this scanner is closed 2360 */ 2361 public boolean hasNextDouble() { 2362 setRadix(10); 2363 boolean result = hasNext(floatPattern()); 2364 if (result) { // Cache it 2365 try { 2366 String s = processFloatToken(hasNextResult); 2367 typeCache = Double.valueOf(Double.parseDouble(s)); 2368 } catch (NumberFormatException nfe) { 2369 result = false; 2370 } 2371 } 2372 return result; 2373 } 2374 2375 /** 2376 * Scans the next token of the input as a <tt>double</tt>. 2377 * This method will throw <code>InputMismatchException</code> 2378 * if the next token cannot be translated into a valid double value. 2379 * If the translation is successful, the scanner advances past the input 2380 * that matched. 2381 * 2382 * <p> If the next token matches the <a 2383 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2384 * then the token is converted into a <tt>double</tt> value as if by 2385 * removing all locale specific prefixes, group separators, and locale 2386 * specific suffixes, then mapping non-ASCII digits into ASCII 2387 * digits via {@link Character#digit Character.digit}, prepending a 2388 * negative sign (-) if the locale specific negative prefixes and suffixes 2389 * were present, and passing the resulting string to 2390 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2391 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2392 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2393 * appropriate. 2394 * 2395 * @return the <tt>double</tt> scanned from the input 2396 * @throws InputMismatchException 2397 * if the next token does not match the <i>Float</i> 2398 * regular expression, or is out of range 2399 * @throws NoSuchElementException if the input is exhausted 2400 * @throws IllegalStateException if this scanner is closed 2401 */ 2402 public double nextDouble() { 2403 // Check cached result 2404 if ((typeCache != null) && (typeCache instanceof Double)) { 2405 double val = ((Double)typeCache).doubleValue(); 2406 useTypeCache(); 2407 return val; 2408 } 2409 setRadix(10); 2410 clearCaches(); 2411 // Search for next float 2412 try { 2413 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2414 } catch (NumberFormatException nfe) { 2415 position = matcher.start(); // don't skip bad token 2416 throw new InputMismatchException(nfe.getMessage()); 2417 } 2418 } 2419 2420 // Convenience methods for scanning multi precision numbers 2421 2422 /** 2423 * Returns true if the next token in this scanner's input can be 2424 * interpreted as a <code>BigInteger</code> in the default radix using the 2425 * {@link #nextBigInteger} method. The scanner does not advance past any 2426 * input. 2427 * 2428 * @return true if and only if this scanner's next token is a valid 2429 * <code>BigInteger</code> 2430 * @throws IllegalStateException if this scanner is closed 2431 */ 2432 public boolean hasNextBigInteger() { 2433 return hasNextBigInteger(defaultRadix); 2434 } 2435 2436 /** 2437 * Returns true if the next token in this scanner's input can be 2438 * interpreted as a <code>BigInteger</code> in the specified radix using 2439 * the {@link #nextBigInteger} method. The scanner does not advance past 2440 * any input. 2441 * 2442 * @param radix the radix used to interpret the token as an integer 2443 * @return true if and only if this scanner's next token is a valid 2444 * <code>BigInteger</code> 2445 * @throws IllegalStateException if this scanner is closed 2446 */ 2447 public boolean hasNextBigInteger(int radix) { 2448 setRadix(radix); 2449 boolean result = hasNext(integerPattern()); 2450 if (result) { // Cache it 2451 try { 2452 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2453 processIntegerToken(hasNextResult) : 2454 hasNextResult; 2455 typeCache = new BigInteger(s, radix); 2456 } catch (NumberFormatException nfe) { 2457 result = false; 2458 } 2459 } 2460 return result; 2461 } 2462 2463 /** 2464 * Scans the next token of the input as a {@link java.math.BigInteger 2465 * BigInteger}. 2466 * 2467 * <p> An invocation of this method of the form 2468 * <tt>nextBigInteger()</tt> behaves in exactly the same way as the 2469 * invocation <tt>nextBigInteger(radix)</tt>, where <code>radix</code> 2470 * is the default radix of this scanner. 2471 * 2472 * @return the <tt>BigInteger</tt> scanned from the input 2473 * @throws InputMismatchException 2474 * if the next token does not match the <i>Integer</i> 2475 * regular expression, or is out of range 2476 * @throws NoSuchElementException if the input is exhausted 2477 * @throws IllegalStateException if this scanner is closed 2478 */ 2479 public BigInteger nextBigInteger() { 2480 return nextBigInteger(defaultRadix); 2481 } 2482 2483 /** 2484 * Scans the next token of the input as a {@link java.math.BigInteger 2485 * BigInteger}. 2486 * 2487 * <p> If the next token matches the <a 2488 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2489 * above then the token is converted into a <tt>BigInteger</tt> value as if 2490 * by removing all group separators, mapping non-ASCII digits into ASCII 2491 * digits via the {@link Character#digit Character.digit}, and passing the 2492 * resulting string to the {@link 2493 * java.math.BigInteger#BigInteger(java.lang.String) 2494 * BigInteger(String, int)} constructor with the specified radix. 2495 * 2496 * @param radix the radix used to interpret the token 2497 * @return the <tt>BigInteger</tt> scanned from the input 2498 * @throws InputMismatchException 2499 * if the next token does not match the <i>Integer</i> 2500 * regular expression, or is out of range 2501 * @throws NoSuchElementException if the input is exhausted 2502 * @throws IllegalStateException if this scanner is closed 2503 */ 2504 public BigInteger nextBigInteger(int radix) { 2505 // Check cached result 2506 if ((typeCache != null) && (typeCache instanceof BigInteger) 2507 && this.radix == radix) { 2508 BigInteger val = (BigInteger)typeCache; 2509 useTypeCache(); 2510 return val; 2511 } 2512 setRadix(radix); 2513 clearCaches(); 2514 // Search for next int 2515 try { 2516 String s = next(integerPattern()); 2517 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2518 s = processIntegerToken(s); 2519 return new BigInteger(s, radix); 2520 } catch (NumberFormatException nfe) { 2521 position = matcher.start(); // don't skip bad token 2522 throw new InputMismatchException(nfe.getMessage()); 2523 } 2524 } 2525 2526 /** 2527 * Returns true if the next token in this scanner's input can be 2528 * interpreted as a <code>BigDecimal</code> using the 2529 * {@link #nextBigDecimal} method. The scanner does not advance past any 2530 * input. 2531 * 2532 * @return true if and only if this scanner's next token is a valid 2533 * <code>BigDecimal</code> 2534 * @throws IllegalStateException if this scanner is closed 2535 */ 2536 public boolean hasNextBigDecimal() { 2537 setRadix(10); 2538 boolean result = hasNext(decimalPattern()); 2539 if (result) { // Cache it 2540 try { 2541 String s = processFloatToken(hasNextResult); 2542 typeCache = new BigDecimal(s); 2543 } catch (NumberFormatException nfe) { 2544 result = false; 2545 } 2546 } 2547 return result; 2548 } 2549 2550 /** 2551 * Scans the next token of the input as a {@link java.math.BigDecimal 2552 * BigDecimal}. 2553 * 2554 * <p> If the next token matches the <a 2555 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2556 * above then the token is converted into a <tt>BigDecimal</tt> value as if 2557 * by removing all group separators, mapping non-ASCII digits into ASCII 2558 * digits via the {@link Character#digit Character.digit}, and passing the 2559 * resulting string to the {@link 2560 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2561 * constructor. 2562 * 2563 * @return the <tt>BigDecimal</tt> scanned from the input 2564 * @throws InputMismatchException 2565 * if the next token does not match the <i>Decimal</i> 2566 * regular expression, or is out of range 2567 * @throws NoSuchElementException if the input is exhausted 2568 * @throws IllegalStateException if this scanner is closed 2569 */ 2570 public BigDecimal nextBigDecimal() { 2571 // Check cached result 2572 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2573 BigDecimal val = (BigDecimal)typeCache; 2574 useTypeCache(); 2575 return val; 2576 } 2577 setRadix(10); 2578 clearCaches(); 2579 // Search for next float 2580 try { 2581 String s = processFloatToken(next(decimalPattern())); 2582 return new BigDecimal(s); 2583 } catch (NumberFormatException nfe) { 2584 position = matcher.start(); // don't skip bad token 2585 throw new InputMismatchException(nfe.getMessage()); 2586 } 2587 } 2588 2589 /** 2590 * Resets this scanner. 2591 * 2592 * <p> Resetting a scanner discards all of its explicit state 2593 * information which may have been changed by invocations of {@link 2594 * #useDelimiter}, {@link #useLocale}, or {@link #useRadix}. 2595 * 2596 * <p> An invocation of this method of the form 2597 * <tt>scanner.reset()</tt> behaves in exactly the same way as the 2598 * invocation 2599 * 2600 * <blockquote><pre>{@code 2601 * scanner.useDelimiter("\\p{javaWhitespace}+") 2602 * .useLocale(Locale.getDefault(Locale.Category.FORMAT)) 2603 * .useRadix(10); 2604 * }</pre></blockquote> 2605 * 2606 * @return this scanner 2607 * 2608 * @since 1.6 2609 */ 2610 public Scanner reset() { 2611 delimPattern = WHITESPACE_PATTERN; 2612 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 2613 useRadix(10); 2614 clearCaches(); 2615 return this; 2616 } 2617 }