1 /* 2 * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util; 27 28 import java.nio.file.Path; 29 import java.nio.file.Files; 30 import java.util.regex.*; 31 import java.io.*; 32 import java.math.*; 33 import java.nio.*; 34 import java.nio.channels.*; 35 import java.nio.charset.*; 36 import java.text.*; 37 import java.util.Locale; 38 39 import sun.misc.LRUCache; 40 41 /** 42 * A simple text scanner which can parse primitive types and strings using 43 * regular expressions. 44 * 45 * <p>A <code>Scanner</code> breaks its input into tokens using a 46 * delimiter pattern, which by default matches whitespace. The resulting 47 * tokens may then be converted into values of different types using the 48 * various <tt>next</tt> methods. 49 * 50 * <p>For example, this code allows a user to read a number from 51 * <tt>System.in</tt>: 52 * <blockquote><pre>{@code 53 * Scanner sc = new Scanner(System.in); 54 * int i = sc.nextInt(); 55 * }</pre></blockquote> 56 * 57 * <p>As another example, this code allows <code>long</code> types to be 58 * assigned from entries in a file <code>myNumbers</code>: 59 * <blockquote><pre>{@code 60 * Scanner sc = new Scanner(new File("myNumbers")); 61 * while (sc.hasNextLong()) { 62 * long aLong = sc.nextLong(); 63 * } 64 * }</pre></blockquote> 65 * 66 * <p>The scanner can also use delimiters other than whitespace. This 67 * example reads several items in from a string: 68 * <blockquote><pre>{@code 69 * String input = "1 fish 2 fish red fish blue fish"; 70 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 71 * System.out.println(s.nextInt()); 72 * System.out.println(s.nextInt()); 73 * System.out.println(s.next()); 74 * System.out.println(s.next()); 75 * s.close(); 76 * }</pre></blockquote> 77 * <p> 78 * prints the following output: 79 * <blockquote><pre>{@code 80 * 1 81 * 2 82 * red 83 * blue 84 * }</pre></blockquote> 85 * 86 * <p>The same output can be generated with this code, which uses a regular 87 * expression to parse all four tokens at once: 88 * <blockquote><pre>{@code 89 * String input = "1 fish 2 fish red fish blue fish"; 90 * Scanner s = new Scanner(input); 91 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 92 * MatchResult result = s.match(); 93 * for (int i=1; i<=result.groupCount(); i++) 94 * System.out.println(result.group(i)); 95 * s.close(); 96 * }</pre></blockquote> 97 * 98 * <p>The <a name="default-delimiter">default whitespace delimiter</a> used 99 * by a scanner is as recognized by {@link java.lang.Character}.{@link 100 * java.lang.Character#isWhitespace(char) isWhitespace}. The {@link #reset} 101 * method will reset the value of the scanner's delimiter to the default 102 * whitespace delimiter regardless of whether it was previously changed. 103 * 104 * <p>A scanning operation may block waiting for input. 105 * 106 * <p>The {@link #next} and {@link #hasNext} methods and their 107 * primitive-type companion methods (such as {@link #nextInt} and 108 * {@link #hasNextInt}) first skip any input that matches the delimiter 109 * pattern, and then attempt to return the next token. Both <tt>hasNext</tt> 110 * and <tt>next</tt> methods may block waiting for further input. Whether a 111 * <tt>hasNext</tt> method blocks has no connection to whether or not its 112 * associated <tt>next</tt> method will block. 113 * 114 * <p> The {@link #findInLine}, {@link #findWithinHorizon}, and {@link #skip} 115 * methods operate independently of the delimiter pattern. These methods will 116 * attempt to match the specified pattern with no regard to delimiters in the 117 * input and thus can be used in special circumstances where delimiters are 118 * not relevant. These methods may block waiting for more input. 119 * 120 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 121 * will not pass the token that caused the exception, so that it may be 122 * retrieved or skipped via some other method. 123 * 124 * <p>Depending upon the type of delimiting pattern, empty tokens may be 125 * returned. For example, the pattern <tt>"\\s+"</tt> will return no empty 126 * tokens since it matches multiple instances of the delimiter. The delimiting 127 * pattern <tt>"\\s"</tt> could return empty tokens since it only passes one 128 * space at a time. 129 * 130 * <p> A scanner can read text from any object which implements the {@link 131 * java.lang.Readable} interface. If an invocation of the underlying 132 * readable's {@link java.lang.Readable#read} method throws an {@link 133 * java.io.IOException} then the scanner assumes that the end of the input 134 * has been reached. The most recent <tt>IOException</tt> thrown by the 135 * underlying readable can be retrieved via the {@link #ioException} method. 136 * 137 * <p>When a <code>Scanner</code> is closed, it will close its input source 138 * if the source implements the {@link java.io.Closeable} interface. 139 * 140 * <p>A <code>Scanner</code> is not safe for multithreaded use without 141 * external synchronization. 142 * 143 * <p>Unless otherwise mentioned, passing a <code>null</code> parameter into 144 * any method of a <code>Scanner</code> will cause a 145 * <code>NullPointerException</code> to be thrown. 146 * 147 * <p>A scanner will default to interpreting numbers as decimal unless a 148 * different radix has been set by using the {@link #useRadix} method. The 149 * {@link #reset} method will reset the value of the scanner's radix to 150 * <code>10</code> regardless of whether it was previously changed. 151 * 152 * <h3> <a name="localized-numbers">Localized numbers</a> </h3> 153 * 154 * <p> An instance of this class is capable of scanning numbers in the standard 155 * formats as well as in the formats of the scanner's locale. A scanner's 156 * <a name="initial-locale">initial locale </a>is the value returned by the {@link 157 * java.util.Locale#getDefault(Locale.Category) 158 * Locale.getDefault(Locale.Category.FORMAT)} method; it may be changed via the {@link 159 * #useLocale} method. The {@link #reset} method will reset the value of the 160 * scanner's locale to the initial locale regardless of whether it was 161 * previously changed. 162 * 163 * <p>The localized formats are defined in terms of the following parameters, 164 * which for a particular locale are taken from that locale's {@link 165 * java.text.DecimalFormat DecimalFormat} object, <tt>df</tt>, and its and 166 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 167 * <tt>dfs</tt>. 168 * 169 * <blockquote><dl> 170 * <dt><i>LocalGroupSeparator </i> 171 * <dd>The character used to separate thousands groups, 172 * <i>i.e.,</i> <tt>dfs.</tt>{@link 173 * java.text.DecimalFormatSymbols#getGroupingSeparator 174 * getGroupingSeparator()} 175 * <dt><i>LocalDecimalSeparator </i> 176 * <dd>The character used for the decimal point, 177 * <i>i.e.,</i> <tt>dfs.</tt>{@link 178 * java.text.DecimalFormatSymbols#getDecimalSeparator 179 * getDecimalSeparator()} 180 * <dt><i>LocalPositivePrefix </i> 181 * <dd>The string that appears before a positive number (may 182 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 183 * java.text.DecimalFormat#getPositivePrefix 184 * getPositivePrefix()} 185 * <dt><i>LocalPositiveSuffix </i> 186 * <dd>The string that appears after a positive number (may be 187 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 188 * java.text.DecimalFormat#getPositiveSuffix 189 * getPositiveSuffix()} 190 * <dt><i>LocalNegativePrefix </i> 191 * <dd>The string that appears before a negative number (may 192 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 193 * java.text.DecimalFormat#getNegativePrefix 194 * getNegativePrefix()} 195 * <dt><i>LocalNegativeSuffix </i> 196 * <dd>The string that appears after a negative number (may be 197 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 198 * java.text.DecimalFormat#getNegativeSuffix 199 * getNegativeSuffix()} 200 * <dt><i>LocalNaN </i> 201 * <dd>The string that represents not-a-number for 202 * floating-point values, 203 * <i>i.e.,</i> <tt>dfs.</tt>{@link 204 * java.text.DecimalFormatSymbols#getNaN 205 * getNaN()} 206 * <dt><i>LocalInfinity </i> 207 * <dd>The string that represents infinity for floating-point 208 * values, <i>i.e.,</i> <tt>dfs.</tt>{@link 209 * java.text.DecimalFormatSymbols#getInfinity 210 * getInfinity()} 211 * </dl></blockquote> 212 * 213 * <h4> <a name="number-syntax">Number syntax</a> </h4> 214 * 215 * <p> The strings that can be parsed as numbers by an instance of this class 216 * are specified in terms of the following regular-expression grammar, where 217 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 in base 10). 218 * 219 * <p> 220 * <dl> 221 * <dt><i>NonAsciiDigit</i>: 222 * <dd>A non-ASCII character c for which 223 * {@link java.lang.Character#isDigit Character.isDigit}<tt>(c)</tt> 224 * returns true 225 * 226 * <dt><i>Non0Digit</i>: 227 * <dd><tt>[1-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i> 228 * 229 * <dt><i>Digit</i>: 230 * <dd><tt>[0-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i> 231 * 232 * <dt><i>GroupedNumeral</i>: 233 * <dd><tt>( </tt><i>Non0Digit</i> 234 * <i>Digit</i><tt>? 235 * </tt><i>Digit</i><tt>?</tt> 236 * <dd> <tt>( </tt><i>LocalGroupSeparator</i> 237 * <i>Digit</i> 238 * <i>Digit</i> 239 * <i>Digit</i><tt> )+ )</tt> 240 * 241 * <dt><i>Numeral</i>: 242 * <dd><tt>( ( </tt><i>Digit</i><tt>+ ) 243 * | </tt><i>GroupedNumeral</i><tt> )</tt> 244 * 245 * <dt><a name="Integer-regex"><i>Integer</i>:</a> 246 * <dd><tt>( [-+]? ( </tt><i>Numeral</i><tt> 247 * ) )</tt> 248 * <dd><tt>| </tt><i>LocalPositivePrefix</i> <i>Numeral</i> 249 * <i>LocalPositiveSuffix</i> 250 * <dd><tt>| </tt><i>LocalNegativePrefix</i> <i>Numeral</i> 251 * <i>LocalNegativeSuffix</i> 252 * 253 * <dt><i>DecimalNumeral</i>: 254 * <dd><i>Numeral</i> 255 * <dd><tt>| </tt><i>Numeral</i> 256 * <i>LocalDecimalSeparator</i> 257 * <i>Digit</i><tt>*</tt> 258 * <dd><tt>| </tt><i>LocalDecimalSeparator</i> 259 * <i>Digit</i><tt>+</tt> 260 * 261 * <dt><i>Exponent</i>: 262 * <dd><tt>( [eE] [+-]? </tt><i>Digit</i><tt>+ )</tt> 263 * 264 * <dt><a name="Decimal-regex"><i>Decimal</i>:</a> 265 * <dd><tt>( [-+]? </tt><i>DecimalNumeral</i> 266 * <i>Exponent</i><tt>? )</tt> 267 * <dd><tt>| </tt><i>LocalPositivePrefix</i> 268 * <i>DecimalNumeral</i> 269 * <i>LocalPositiveSuffix</i> 270 * <i>Exponent</i><tt>?</tt> 271 * <dd><tt>| </tt><i>LocalNegativePrefix</i> 272 * <i>DecimalNumeral</i> 273 * <i>LocalNegativeSuffix</i> 274 * <i>Exponent</i><tt>?</tt> 275 * 276 * <dt><i>HexFloat</i>: 277 * <dd><tt>[-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 278 * ([pP][-+]?[0-9]+)?</tt> 279 * 280 * <dt><i>NonNumber</i>: 281 * <dd><tt>NaN 282 * | </tt><i>LocalNan</i><tt> 283 * | Infinity 284 * | </tt><i>LocalInfinity</i> 285 * 286 * <dt><i>SignedNonNumber</i>: 287 * <dd><tt>( [-+]? </tt><i>NonNumber</i><tt> )</tt> 288 * <dd><tt>| </tt><i>LocalPositivePrefix</i> 289 * <i>NonNumber</i> 290 * <i>LocalPositiveSuffix</i> 291 * <dd><tt>| </tt><i>LocalNegativePrefix</i> 292 * <i>NonNumber</i> 293 * <i>LocalNegativeSuffix</i> 294 * 295 * <dt><a name="Float-regex"><i>Float</i></a>: 296 * <dd><i>Decimal</i> 297 * <tt>| </tt><i>HexFloat</i> 298 * <tt>| </tt><i>SignedNonNumber</i> 299 * 300 * </dl> 301 * <p>Whitespace is not significant in the above regular expressions. 302 * 303 * @since 1.5 304 */ 305 public final class Scanner implements Iterator<String>, Closeable { 306 307 // Internal buffer used to hold input 308 private CharBuffer buf; 309 310 // Size of internal character buffer 311 private static final int BUFFER_SIZE = 1024; // change to 1024; 312 313 // The index into the buffer currently held by the Scanner 314 private int position; 315 316 // Internal matcher used for finding delimiters 317 private Matcher matcher; 318 319 // Pattern used to delimit tokens 320 private Pattern delimPattern; 321 322 // Pattern found in last hasNext operation 323 private Pattern hasNextPattern; 324 325 // Position after last hasNext operation 326 private int hasNextPosition; 327 328 // Result after last hasNext operation 329 private String hasNextResult; 330 331 // The input source 332 private Readable source; 333 334 // Boolean is true if source is done 335 private boolean sourceClosed = false; 336 337 // Boolean indicating more input is required 338 private boolean needInput = false; 339 340 // Boolean indicating if a delim has been skipped this operation 341 private boolean skipped = false; 342 343 // A store of a position that the scanner may fall back to 344 private int savedScannerPosition = -1; 345 346 // A cache of the last primitive type scanned 347 private Object typeCache = null; 348 349 // Boolean indicating if a match result is available 350 private boolean matchValid = false; 351 352 // Boolean indicating if this scanner has been closed 353 private boolean closed = false; 354 355 // The current radix used by this scanner 356 private int radix = 10; 357 358 // The default radix for this scanner 359 private int defaultRadix = 10; 360 361 // The locale used by this scanner 362 private Locale locale = null; 363 364 // A cache of the last few recently used Patterns 365 private LRUCache<String,Pattern> patternCache = 366 new LRUCache<String,Pattern>(7) { 367 protected Pattern create(String s) { 368 return Pattern.compile(s); 369 } 370 protected boolean hasName(Pattern p, String s) { 371 return p.pattern().equals(s); 372 } 373 }; 374 375 // A holder of the last IOException encountered 376 private IOException lastException; 377 378 // A pattern for java whitespace 379 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 380 "\\p{javaWhitespace}+"); 381 382 // A pattern for any token 383 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 384 385 // A pattern for non-ASCII digits 386 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 387 "[\\p{javaDigit}&&[^0-9]]"); 388 389 // Fields and methods to support scanning primitive types 390 391 /** 392 * Locale dependent values used to scan numbers 393 */ 394 private String groupSeparator = "\\,"; 395 private String decimalSeparator = "\\."; 396 private String nanString = "NaN"; 397 private String infinityString = "Infinity"; 398 private String positivePrefix = ""; 399 private String negativePrefix = "\\-"; 400 private String positiveSuffix = ""; 401 private String negativeSuffix = ""; 402 403 /** 404 * Fields and an accessor method to match booleans 405 */ 406 private static volatile Pattern boolPattern; 407 private static final String BOOLEAN_PATTERN = "true|false"; 408 private static Pattern boolPattern() { 409 Pattern bp = boolPattern; 410 if (bp == null) 411 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 412 Pattern.CASE_INSENSITIVE); 413 return bp; 414 } 415 416 /** 417 * Fields and methods to match bytes, shorts, ints, and longs 418 */ 419 private Pattern integerPattern; 420 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 421 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 422 private int SIMPLE_GROUP_INDEX = 5; 423 private String buildIntegerPatternString() { 424 String radixDigits = digits.substring(0, radix); 425 // \\p{javaDigit} is not guaranteed to be appropriate 426 // here but what can we do? The final authority will be 427 // whatever parse method is invoked, so ultimately the 428 // Scanner will do the right thing 429 String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})"; 430 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 431 groupSeparator+digit+digit+digit+")+)"; 432 // digit++ is the possessive form which is necessary for reducing 433 // backtracking that would otherwise cause unacceptable performance 434 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 435 String javaStyleInteger = "([-+]?(" + numeral + "))"; 436 String negativeInteger = negativePrefix + numeral + negativeSuffix; 437 String positiveInteger = positivePrefix + numeral + positiveSuffix; 438 return "("+ javaStyleInteger + ")|(" + 439 positiveInteger + ")|(" + 440 negativeInteger + ")"; 441 } 442 private Pattern integerPattern() { 443 if (integerPattern == null) { 444 integerPattern = patternCache.forName(buildIntegerPatternString()); 445 } 446 return integerPattern; 447 } 448 449 /** 450 * Fields and an accessor method to match line separators 451 */ 452 private static volatile Pattern separatorPattern; 453 private static volatile Pattern linePattern; 454 private static final String LINE_SEPARATOR_PATTERN = 455 "\r\n|[\n\r\u2028\u2029\u0085]"; 456 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 457 458 private static Pattern separatorPattern() { 459 Pattern sp = separatorPattern; 460 if (sp == null) 461 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 462 return sp; 463 } 464 465 private static Pattern linePattern() { 466 Pattern lp = linePattern; 467 if (lp == null) 468 linePattern = lp = Pattern.compile(LINE_PATTERN); 469 return lp; 470 } 471 472 /** 473 * Fields and methods to match floats and doubles 474 */ 475 private Pattern floatPattern; 476 private Pattern decimalPattern; 477 private void buildFloatAndDecimalPattern() { 478 // \\p{javaDigit} may not be perfect, see above 479 String digit = "([0-9]|(\\p{javaDigit}))"; 480 String exponent = "([eE][+-]?"+digit+"+)?"; 481 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 482 groupSeparator+digit+digit+digit+")+)"; 483 // Once again digit++ is used for performance, as above 484 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 485 String decimalNumeral = "("+numeral+"|"+numeral + 486 decimalSeparator + digit + "*+|"+ decimalSeparator + 487 digit + "++)"; 488 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 489 infinityString+")"; 490 String positiveFloat = "(" + positivePrefix + decimalNumeral + 491 positiveSuffix + exponent + ")"; 492 String negativeFloat = "(" + negativePrefix + decimalNumeral + 493 negativeSuffix + exponent + ")"; 494 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 495 positiveFloat + "|" + negativeFloat + ")"; 496 String hexFloat = 497 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 498 String positiveNonNumber = "(" + positivePrefix + nonNumber + 499 positiveSuffix + ")"; 500 String negativeNonNumber = "(" + negativePrefix + nonNumber + 501 negativeSuffix + ")"; 502 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 503 positiveNonNumber + "|" + 504 negativeNonNumber + ")"; 505 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 506 signedNonNumber); 507 decimalPattern = Pattern.compile(decimal); 508 } 509 private Pattern floatPattern() { 510 if (floatPattern == null) { 511 buildFloatAndDecimalPattern(); 512 } 513 return floatPattern; 514 } 515 private Pattern decimalPattern() { 516 if (decimalPattern == null) { 517 buildFloatAndDecimalPattern(); 518 } 519 return decimalPattern; 520 } 521 522 // Constructors 523 524 /** 525 * Constructs a <code>Scanner</code> that returns values scanned 526 * from the specified source delimited by the specified pattern. 527 * 528 * @param source A character source implementing the Readable interface 529 * @param pattern A delimiting pattern 530 */ 531 private Scanner(Readable source, Pattern pattern) { 532 assert source != null : "source should not be null"; 533 assert pattern != null : "pattern should not be null"; 534 this.source = source; 535 delimPattern = pattern; 536 buf = CharBuffer.allocate(BUFFER_SIZE); 537 buf.limit(0); 538 matcher = delimPattern.matcher(buf); 539 matcher.useTransparentBounds(true); 540 matcher.useAnchoringBounds(false); 541 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 542 } 543 544 /** 545 * Constructs a new <code>Scanner</code> that produces values scanned 546 * from the specified source. 547 * 548 * @param source A character source implementing the {@link Readable} 549 * interface 550 */ 551 public Scanner(Readable source) { 552 this(Objects.requireNonNull(source, "source"), WHITESPACE_PATTERN); 553 } 554 555 /** 556 * Constructs a new <code>Scanner</code> that produces values scanned 557 * from the specified input stream. Bytes from the stream are converted 558 * into characters using the underlying platform's 559 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 560 * 561 * @param source An input stream to be scanned 562 */ 563 public Scanner(InputStream source) { 564 this(new InputStreamReader(source), WHITESPACE_PATTERN); 565 } 566 567 /** 568 * Constructs a new <code>Scanner</code> that produces values scanned 569 * from the specified input stream. Bytes from the stream are converted 570 * into characters using the specified charset. 571 * 572 * @param source An input stream to be scanned 573 * @param charsetName The encoding type used to convert bytes from the 574 * stream into characters to be scanned 575 * @throws IllegalArgumentException if the specified character set 576 * does not exist 577 */ 578 public Scanner(InputStream source, String charsetName) { 579 this(makeReadable(Objects.requireNonNull(source, "source"), toCharset(charsetName)), 580 WHITESPACE_PATTERN); 581 } 582 583 /** 584 * Returns a charset object for the given charset name. 585 * @throws NullPointerException is csn is null 586 * @throws IllegalArgumentException if the charset is not supported 587 */ 588 private static Charset toCharset(String csn) { 589 Objects.requireNonNull(csn, "charsetName"); 590 try { 591 return Charset.forName(csn); 592 } catch (IllegalCharsetNameException|UnsupportedCharsetException e) { 593 // IllegalArgumentException should be thrown 594 throw new IllegalArgumentException(e); 595 } 596 } 597 598 private static Readable makeReadable(InputStream source, Charset charset) { 599 return new InputStreamReader(source, charset); 600 } 601 602 /** 603 * Constructs a new <code>Scanner</code> that produces values scanned 604 * from the specified file. Bytes from the file are converted into 605 * characters using the underlying platform's 606 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 607 * 608 * @param source A file to be scanned 609 * @throws FileNotFoundException if source is not found 610 */ 611 public Scanner(File source) throws FileNotFoundException { 612 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 613 } 614 615 /** 616 * Constructs a new <code>Scanner</code> that produces values scanned 617 * from the specified file. Bytes from the file are converted into 618 * characters using the specified charset. 619 * 620 * @param source A file to be scanned 621 * @param charsetName The encoding type used to convert bytes from the file 622 * into characters to be scanned 623 * @throws FileNotFoundException if source is not found 624 * @throws IllegalArgumentException if the specified encoding is 625 * not found 626 */ 627 public Scanner(File source, String charsetName) 628 throws FileNotFoundException 629 { 630 this(Objects.requireNonNull(source), toDecoder(charsetName)); 631 } 632 633 private Scanner(File source, CharsetDecoder dec) 634 throws FileNotFoundException 635 { 636 this(makeReadable((ReadableByteChannel)(new FileInputStream(source).getChannel()), dec)); 637 } 638 639 private static CharsetDecoder toDecoder(String charsetName) { 640 Objects.requireNonNull(charsetName, "charsetName"); 641 try { 642 return Charset.forName(charsetName).newDecoder(); 643 } catch (IllegalCharsetNameException|UnsupportedCharsetException unused) { 644 throw new IllegalArgumentException(charsetName); 645 } 646 } 647 648 private static Readable makeReadable(ReadableByteChannel source, 649 CharsetDecoder dec) { 650 return Channels.newReader(source, dec, -1); 651 } 652 653 /** 654 * Constructs a new <code>Scanner</code> that produces values scanned 655 * from the specified file. Bytes from the file are converted into 656 * characters using the underlying platform's 657 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 658 * 659 * @param source 660 * the path to the file to be scanned 661 * @throws IOException 662 * if an I/O error occurs opening source 663 * 664 * @since 1.7 665 */ 666 public Scanner(Path source) 667 throws IOException 668 { 669 this(Files.newInputStream(source)); 670 } 671 672 /** 673 * Constructs a new <code>Scanner</code> that produces values scanned 674 * from the specified file. Bytes from the file are converted into 675 * characters using the specified charset. 676 * 677 * @param source 678 * the path to the file to be scanned 679 * @param charsetName 680 * The encoding type used to convert bytes from the file 681 * into characters to be scanned 682 * @throws IOException 683 * if an I/O error occurs opening source 684 * @throws IllegalArgumentException 685 * if the specified encoding is not found 686 * @since 1.7 687 */ 688 public Scanner(Path source, String charsetName) throws IOException { 689 this(Objects.requireNonNull(source), toCharset(charsetName)); 690 } 691 692 private Scanner(Path source, Charset charset) throws IOException { 693 this(makeReadable(Files.newInputStream(source), charset)); 694 } 695 696 /** 697 * Constructs a new <code>Scanner</code> that produces values scanned 698 * from the specified string. 699 * 700 * @param source A string to scan 701 */ 702 public Scanner(String source) { 703 this(new StringReader(source), WHITESPACE_PATTERN); 704 } 705 706 /** 707 * Constructs a new <code>Scanner</code> that produces values scanned 708 * from the specified channel. Bytes from the source are converted into 709 * characters using the underlying platform's 710 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 711 * 712 * @param source A channel to scan 713 */ 714 public Scanner(ReadableByteChannel source) { 715 this(makeReadable(Objects.requireNonNull(source, "source")), 716 WHITESPACE_PATTERN); 717 } 718 719 private static Readable makeReadable(ReadableByteChannel source) { 720 return makeReadable(source, Charset.defaultCharset().newDecoder()); 721 } 722 723 /** 724 * Constructs a new <code>Scanner</code> that produces values scanned 725 * from the specified channel. Bytes from the source are converted into 726 * characters using the specified charset. 727 * 728 * @param source A channel to scan 729 * @param charsetName The encoding type used to convert bytes from the 730 * channel into characters to be scanned 731 * @throws IllegalArgumentException if the specified character set 732 * does not exist 733 */ 734 public Scanner(ReadableByteChannel source, String charsetName) { 735 this(makeReadable(Objects.requireNonNull(source, "source"), toDecoder(charsetName)), 736 WHITESPACE_PATTERN); 737 } 738 739 // Private primitives used to support scanning 740 741 private void saveState() { 742 savedScannerPosition = position; 743 } 744 745 private void revertState() { 746 this.position = savedScannerPosition; 747 savedScannerPosition = -1; 748 skipped = false; 749 } 750 751 private boolean revertState(boolean b) { 752 this.position = savedScannerPosition; 753 savedScannerPosition = -1; 754 skipped = false; 755 return b; 756 } 757 758 private void cacheResult() { 759 hasNextResult = matcher.group(); 760 hasNextPosition = matcher.end(); 761 hasNextPattern = matcher.pattern(); 762 } 763 764 private void cacheResult(String result) { 765 hasNextResult = result; 766 hasNextPosition = matcher.end(); 767 hasNextPattern = matcher.pattern(); 768 } 769 770 // Clears both regular cache and type cache 771 private void clearCaches() { 772 hasNextPattern = null; 773 typeCache = null; 774 } 775 776 // Also clears both the regular cache and the type cache 777 private String getCachedResult() { 778 position = hasNextPosition; 779 hasNextPattern = null; 780 typeCache = null; 781 return hasNextResult; 782 } 783 784 // Also clears both the regular cache and the type cache 785 private void useTypeCache() { 786 if (closed) 787 throw new IllegalStateException("Scanner closed"); 788 position = hasNextPosition; 789 hasNextPattern = null; 790 typeCache = null; 791 } 792 793 // Tries to read more input. May block. 794 private void readInput() { 795 if (buf.limit() == buf.capacity()) 796 makeSpace(); 797 798 // Prepare to receive data 799 int p = buf.position(); 800 buf.position(buf.limit()); 801 buf.limit(buf.capacity()); 802 803 int n = 0; 804 try { 805 n = source.read(buf); 806 } catch (IOException ioe) { 807 lastException = ioe; 808 n = -1; 809 } 810 811 if (n == -1) { 812 sourceClosed = true; 813 needInput = false; 814 } 815 816 if (n > 0) 817 needInput = false; 818 819 // Restore current position and limit for reading 820 buf.limit(buf.position()); 821 buf.position(p); 822 } 823 824 // After this method is called there will either be an exception 825 // or else there will be space in the buffer 826 private boolean makeSpace() { 827 clearCaches(); 828 int offset = savedScannerPosition == -1 ? 829 position : savedScannerPosition; 830 buf.position(offset); 831 // Gain space by compacting buffer 832 if (offset > 0) { 833 buf.compact(); 834 translateSavedIndexes(offset); 835 position -= offset; 836 buf.flip(); 837 return true; 838 } 839 // Gain space by growing buffer 840 int newSize = buf.capacity() * 2; 841 CharBuffer newBuf = CharBuffer.allocate(newSize); 842 newBuf.put(buf); 843 newBuf.flip(); 844 translateSavedIndexes(offset); 845 position -= offset; 846 buf = newBuf; 847 matcher.reset(buf); 848 return true; 849 } 850 851 // When a buffer compaction/reallocation occurs the saved indexes must 852 // be modified appropriately 853 private void translateSavedIndexes(int offset) { 854 if (savedScannerPosition != -1) 855 savedScannerPosition -= offset; 856 } 857 858 // If we are at the end of input then NoSuchElement; 859 // If there is still input left then InputMismatch 860 private void throwFor() { 861 skipped = false; 862 if ((sourceClosed) && (position == buf.limit())) 863 throw new NoSuchElementException(); 864 else 865 throw new InputMismatchException(); 866 } 867 868 // Returns true if a complete token or partial token is in the buffer. 869 // It is not necessary to find a complete token since a partial token 870 // means that there will be another token with or without more input. 871 private boolean hasTokenInBuffer() { 872 matchValid = false; 873 matcher.usePattern(delimPattern); 874 matcher.region(position, buf.limit()); 875 876 // Skip delims first 877 if (matcher.lookingAt()) 878 position = matcher.end(); 879 880 // If we are sitting at the end, no more tokens in buffer 881 if (position == buf.limit()) 882 return false; 883 884 return true; 885 } 886 887 /* 888 * Returns a "complete token" that matches the specified pattern 889 * 890 * A token is complete if surrounded by delims; a partial token 891 * is prefixed by delims but not postfixed by them 892 * 893 * The position is advanced to the end of that complete token 894 * 895 * Pattern == null means accept any token at all 896 * 897 * Triple return: 898 * 1. valid string means it was found 899 * 2. null with needInput=false means we won't ever find it 900 * 3. null with needInput=true means try again after readInput 901 */ 902 private String getCompleteTokenInBuffer(Pattern pattern) { 903 matchValid = false; 904 905 // Skip delims first 906 matcher.usePattern(delimPattern); 907 if (!skipped) { // Enforcing only one skip of leading delims 908 matcher.region(position, buf.limit()); 909 if (matcher.lookingAt()) { 910 // If more input could extend the delimiters then we must wait 911 // for more input 912 if (matcher.hitEnd() && !sourceClosed) { 913 needInput = true; 914 return null; 915 } 916 // The delims were whole and the matcher should skip them 917 skipped = true; 918 position = matcher.end(); 919 } 920 } 921 922 // If we are sitting at the end, no more tokens in buffer 923 if (position == buf.limit()) { 924 if (sourceClosed) 925 return null; 926 needInput = true; 927 return null; 928 } 929 930 // Must look for next delims. Simply attempting to match the 931 // pattern at this point may find a match but it might not be 932 // the first longest match because of missing input, or it might 933 // match a partial token instead of the whole thing. 934 935 // Then look for next delims 936 matcher.region(position, buf.limit()); 937 boolean foundNextDelim = matcher.find(); 938 if (foundNextDelim && (matcher.end() == position)) { 939 // Zero length delimiter match; we should find the next one 940 // using the automatic advance past a zero length match; 941 // Otherwise we have just found the same one we just skipped 942 foundNextDelim = matcher.find(); 943 } 944 if (foundNextDelim) { 945 // In the rare case that more input could cause the match 946 // to be lost and there is more input coming we must wait 947 // for more input. Note that hitting the end is okay as long 948 // as the match cannot go away. It is the beginning of the 949 // next delims we want to be sure about, we don't care if 950 // they potentially extend further. 951 if (matcher.requireEnd() && !sourceClosed) { 952 needInput = true; 953 return null; 954 } 955 int tokenEnd = matcher.start(); 956 // There is a complete token. 957 if (pattern == null) { 958 // Must continue with match to provide valid MatchResult 959 pattern = FIND_ANY_PATTERN; 960 } 961 // Attempt to match against the desired pattern 962 matcher.usePattern(pattern); 963 matcher.region(position, tokenEnd); 964 if (matcher.matches()) { 965 String s = matcher.group(); 966 position = matcher.end(); 967 return s; 968 } else { // Complete token but it does not match 969 return null; 970 } 971 } 972 973 // If we can't find the next delims but no more input is coming, 974 // then we can treat the remainder as a whole token 975 if (sourceClosed) { 976 if (pattern == null) { 977 // Must continue with match to provide valid MatchResult 978 pattern = FIND_ANY_PATTERN; 979 } 980 // Last token; Match the pattern here or throw 981 matcher.usePattern(pattern); 982 matcher.region(position, buf.limit()); 983 if (matcher.matches()) { 984 String s = matcher.group(); 985 position = matcher.end(); 986 return s; 987 } 988 // Last piece does not match 989 return null; 990 } 991 992 // There is a partial token in the buffer; must read more 993 // to complete it 994 needInput = true; 995 return null; 996 } 997 998 // Finds the specified pattern in the buffer up to horizon. 999 // Returns a match for the specified input pattern. 1000 private String findPatternInBuffer(Pattern pattern, int horizon) { 1001 matchValid = false; 1002 matcher.usePattern(pattern); 1003 int bufferLimit = buf.limit(); 1004 int horizonLimit = -1; 1005 int searchLimit = bufferLimit; 1006 if (horizon > 0) { 1007 horizonLimit = position + horizon; 1008 if (horizonLimit < bufferLimit) 1009 searchLimit = horizonLimit; 1010 } 1011 matcher.region(position, searchLimit); 1012 if (matcher.find()) { 1013 if (matcher.hitEnd() && (!sourceClosed)) { 1014 // The match may be longer if didn't hit horizon or real end 1015 if (searchLimit != horizonLimit) { 1016 // Hit an artificial end; try to extend the match 1017 needInput = true; 1018 return null; 1019 } 1020 // The match could go away depending on what is next 1021 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1022 // Rare case: we hit the end of input and it happens 1023 // that it is at the horizon and the end of input is 1024 // required for the match. 1025 needInput = true; 1026 return null; 1027 } 1028 } 1029 // Did not hit end, or hit real end, or hit horizon 1030 position = matcher.end(); 1031 return matcher.group(); 1032 } 1033 1034 if (sourceClosed) 1035 return null; 1036 1037 // If there is no specified horizon, or if we have not searched 1038 // to the specified horizon yet, get more input 1039 if ((horizon == 0) || (searchLimit != horizonLimit)) 1040 needInput = true; 1041 return null; 1042 } 1043 1044 // Returns a match for the specified input pattern anchored at 1045 // the current position 1046 private String matchPatternInBuffer(Pattern pattern) { 1047 matchValid = false; 1048 matcher.usePattern(pattern); 1049 matcher.region(position, buf.limit()); 1050 if (matcher.lookingAt()) { 1051 if (matcher.hitEnd() && (!sourceClosed)) { 1052 // Get more input and try again 1053 needInput = true; 1054 return null; 1055 } 1056 position = matcher.end(); 1057 return matcher.group(); 1058 } 1059 1060 if (sourceClosed) 1061 return null; 1062 1063 // Read more to find pattern 1064 needInput = true; 1065 return null; 1066 } 1067 1068 // Throws if the scanner is closed 1069 private void ensureOpen() { 1070 if (closed) 1071 throw new IllegalStateException("Scanner closed"); 1072 } 1073 1074 // Public methods 1075 1076 /** 1077 * Closes this scanner. 1078 * 1079 * <p> If this scanner has not yet been closed then if its underlying 1080 * {@linkplain java.lang.Readable readable} also implements the {@link 1081 * java.io.Closeable} interface then the readable's <tt>close</tt> method 1082 * will be invoked. If this scanner is already closed then invoking this 1083 * method will have no effect. 1084 * 1085 * <p>Attempting to perform search operations after a scanner has 1086 * been closed will result in an {@link IllegalStateException}. 1087 * 1088 */ 1089 public void close() { 1090 if (closed) 1091 return; 1092 if (source instanceof Closeable) { 1093 try { 1094 ((Closeable)source).close(); 1095 } catch (IOException ioe) { 1096 lastException = ioe; 1097 } 1098 } 1099 sourceClosed = true; 1100 source = null; 1101 closed = true; 1102 } 1103 1104 /** 1105 * Returns the <code>IOException</code> last thrown by this 1106 * <code>Scanner</code>'s underlying <code>Readable</code>. This method 1107 * returns <code>null</code> if no such exception exists. 1108 * 1109 * @return the last exception thrown by this scanner's readable 1110 */ 1111 public IOException ioException() { 1112 return lastException; 1113 } 1114 1115 /** 1116 * Returns the <code>Pattern</code> this <code>Scanner</code> is currently 1117 * using to match delimiters. 1118 * 1119 * @return this scanner's delimiting pattern. 1120 */ 1121 public Pattern delimiter() { 1122 return delimPattern; 1123 } 1124 1125 /** 1126 * Sets this scanner's delimiting pattern to the specified pattern. 1127 * 1128 * @param pattern A delimiting pattern 1129 * @return this scanner 1130 */ 1131 public Scanner useDelimiter(Pattern pattern) { 1132 delimPattern = pattern; 1133 return this; 1134 } 1135 1136 /** 1137 * Sets this scanner's delimiting pattern to a pattern constructed from 1138 * the specified <code>String</code>. 1139 * 1140 * <p> An invocation of this method of the form 1141 * <tt>useDelimiter(pattern)</tt> behaves in exactly the same way as the 1142 * invocation <tt>useDelimiter(Pattern.compile(pattern))</tt>. 1143 * 1144 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1145 * to the <a href= "#default-delimiter">default</a>. 1146 * 1147 * @param pattern A string specifying a delimiting pattern 1148 * @return this scanner 1149 */ 1150 public Scanner useDelimiter(String pattern) { 1151 delimPattern = patternCache.forName(pattern); 1152 return this; 1153 } 1154 1155 /** 1156 * Returns this scanner's locale. 1157 * 1158 * <p>A scanner's locale affects many elements of its default 1159 * primitive matching regular expressions; see 1160 * <a href= "#localized-numbers">localized numbers</a> above. 1161 * 1162 * @return this scanner's locale 1163 */ 1164 public Locale locale() { 1165 return this.locale; 1166 } 1167 1168 /** 1169 * Sets this scanner's locale to the specified locale. 1170 * 1171 * <p>A scanner's locale affects many elements of its default 1172 * primitive matching regular expressions; see 1173 * <a href= "#localized-numbers">localized numbers</a> above. 1174 * 1175 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1176 * the <a href= "#initial-locale">initial locale</a>. 1177 * 1178 * @param locale A string specifying the locale to use 1179 * @return this scanner 1180 */ 1181 public Scanner useLocale(Locale locale) { 1182 if (locale.equals(this.locale)) 1183 return this; 1184 1185 this.locale = locale; 1186 DecimalFormat df = 1187 (DecimalFormat)NumberFormat.getNumberInstance(locale); 1188 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1189 1190 // These must be literalized to avoid collision with regex 1191 // metacharacters such as dot or parenthesis 1192 groupSeparator = "\\" + dfs.getGroupingSeparator(); 1193 decimalSeparator = "\\" + dfs.getDecimalSeparator(); 1194 1195 // Quoting the nonzero length locale-specific things 1196 // to avoid potential conflict with metacharacters 1197 nanString = "\\Q" + dfs.getNaN() + "\\E"; 1198 infinityString = "\\Q" + dfs.getInfinity() + "\\E"; 1199 positivePrefix = df.getPositivePrefix(); 1200 if (positivePrefix.length() > 0) 1201 positivePrefix = "\\Q" + positivePrefix + "\\E"; 1202 negativePrefix = df.getNegativePrefix(); 1203 if (negativePrefix.length() > 0) 1204 negativePrefix = "\\Q" + negativePrefix + "\\E"; 1205 positiveSuffix = df.getPositiveSuffix(); 1206 if (positiveSuffix.length() > 0) 1207 positiveSuffix = "\\Q" + positiveSuffix + "\\E"; 1208 negativeSuffix = df.getNegativeSuffix(); 1209 if (negativeSuffix.length() > 0) 1210 negativeSuffix = "\\Q" + negativeSuffix + "\\E"; 1211 1212 // Force rebuilding and recompilation of locale dependent 1213 // primitive patterns 1214 integerPattern = null; 1215 floatPattern = null; 1216 1217 return this; 1218 } 1219 1220 /** 1221 * Returns this scanner's default radix. 1222 * 1223 * <p>A scanner's radix affects elements of its default 1224 * number matching regular expressions; see 1225 * <a href= "#localized-numbers">localized numbers</a> above. 1226 * 1227 * @return the default radix of this scanner 1228 */ 1229 public int radix() { 1230 return this.defaultRadix; 1231 } 1232 1233 /** 1234 * Sets this scanner's default radix to the specified radix. 1235 * 1236 * <p>A scanner's radix affects elements of its default 1237 * number matching regular expressions; see 1238 * <a href= "#localized-numbers">localized numbers</a> above. 1239 * 1240 * <p>If the radix is less than <code>Character.MIN_RADIX</code> 1241 * or greater than <code>Character.MAX_RADIX</code>, then an 1242 * <code>IllegalArgumentException</code> is thrown. 1243 * 1244 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1245 * <code>10</code>. 1246 * 1247 * @param radix The radix to use when scanning numbers 1248 * @return this scanner 1249 * @throws IllegalArgumentException if radix is out of range 1250 */ 1251 public Scanner useRadix(int radix) { 1252 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1253 throw new IllegalArgumentException("radix:"+radix); 1254 1255 if (this.defaultRadix == radix) 1256 return this; 1257 this.defaultRadix = radix; 1258 // Force rebuilding and recompilation of radix dependent patterns 1259 integerPattern = null; 1260 return this; 1261 } 1262 1263 // The next operation should occur in the specified radix but 1264 // the default is left untouched. 1265 private void setRadix(int radix) { 1266 if (this.radix != radix) { 1267 // Force rebuilding and recompilation of radix dependent patterns 1268 integerPattern = null; 1269 this.radix = radix; 1270 } 1271 } 1272 1273 /** 1274 * Returns the match result of the last scanning operation performed 1275 * by this scanner. This method throws <code>IllegalStateException</code> 1276 * if no match has been performed, or if the last match was 1277 * not successful. 1278 * 1279 * <p>The various <code>next</code>methods of <code>Scanner</code> 1280 * make a match result available if they complete without throwing an 1281 * exception. For instance, after an invocation of the {@link #nextInt} 1282 * method that returned an int, this method returns a 1283 * <code>MatchResult</code> for the search of the 1284 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1285 * defined above. Similarly the {@link #findInLine}, 1286 * {@link #findWithinHorizon}, and {@link #skip} methods will make a 1287 * match available if they succeed. 1288 * 1289 * @return a match result for the last match operation 1290 * @throws IllegalStateException If no match result is available 1291 */ 1292 public MatchResult match() { 1293 if (!matchValid) 1294 throw new IllegalStateException("No match result available"); 1295 return matcher.toMatchResult(); 1296 } 1297 1298 /** 1299 * <p>Returns the string representation of this <code>Scanner</code>. The 1300 * string representation of a <code>Scanner</code> contains information 1301 * that may be useful for debugging. The exact format is unspecified. 1302 * 1303 * @return The string representation of this scanner 1304 */ 1305 public String toString() { 1306 StringBuilder sb = new StringBuilder(); 1307 sb.append("java.util.Scanner"); 1308 sb.append("[delimiters=" + delimPattern + "]"); 1309 sb.append("[position=" + position + "]"); 1310 sb.append("[match valid=" + matchValid + "]"); 1311 sb.append("[need input=" + needInput + "]"); 1312 sb.append("[source closed=" + sourceClosed + "]"); 1313 sb.append("[skipped=" + skipped + "]"); 1314 sb.append("[group separator=" + groupSeparator + "]"); 1315 sb.append("[decimal separator=" + decimalSeparator + "]"); 1316 sb.append("[positive prefix=" + positivePrefix + "]"); 1317 sb.append("[negative prefix=" + negativePrefix + "]"); 1318 sb.append("[positive suffix=" + positiveSuffix + "]"); 1319 sb.append("[negative suffix=" + negativeSuffix + "]"); 1320 sb.append("[NaN string=" + nanString + "]"); 1321 sb.append("[infinity string=" + infinityString + "]"); 1322 return sb.toString(); 1323 } 1324 1325 /** 1326 * Returns true if this scanner has another token in its input. 1327 * This method may block while waiting for input to scan. 1328 * The scanner does not advance past any input. 1329 * 1330 * @return true if and only if this scanner has another token 1331 * @throws IllegalStateException if this scanner is closed 1332 * @see java.util.Iterator 1333 */ 1334 public boolean hasNext() { 1335 ensureOpen(); 1336 saveState(); 1337 while (!sourceClosed) { 1338 if (hasTokenInBuffer()) 1339 return revertState(true); 1340 readInput(); 1341 } 1342 boolean result = hasTokenInBuffer(); 1343 return revertState(result); 1344 } 1345 1346 /** 1347 * Finds and returns the next complete token from this scanner. 1348 * A complete token is preceded and followed by input that matches 1349 * the delimiter pattern. This method may block while waiting for input 1350 * to scan, even if a previous invocation of {@link #hasNext} returned 1351 * <code>true</code>. 1352 * 1353 * @return the next token 1354 * @throws NoSuchElementException if no more tokens are available 1355 * @throws IllegalStateException if this scanner is closed 1356 * @see java.util.Iterator 1357 */ 1358 public String next() { 1359 ensureOpen(); 1360 clearCaches(); 1361 1362 while (true) { 1363 String token = getCompleteTokenInBuffer(null); 1364 if (token != null) { 1365 matchValid = true; 1366 skipped = false; 1367 return token; 1368 } 1369 if (needInput) 1370 readInput(); 1371 else 1372 throwFor(); 1373 } 1374 } 1375 1376 /** 1377 * The remove operation is not supported by this implementation of 1378 * <code>Iterator</code>. 1379 * 1380 * @throws UnsupportedOperationException if this method is invoked. 1381 * @see java.util.Iterator 1382 */ 1383 public void remove() { 1384 throw new UnsupportedOperationException(); 1385 } 1386 1387 /** 1388 * Returns true if the next token matches the pattern constructed from the 1389 * specified string. The scanner does not advance past any input. 1390 * 1391 * <p> An invocation of this method of the form <tt>hasNext(pattern)</tt> 1392 * behaves in exactly the same way as the invocation 1393 * <tt>hasNext(Pattern.compile(pattern))</tt>. 1394 * 1395 * @param pattern a string specifying the pattern to scan 1396 * @return true if and only if this scanner has another token matching 1397 * the specified pattern 1398 * @throws IllegalStateException if this scanner is closed 1399 */ 1400 public boolean hasNext(String pattern) { 1401 return hasNext(patternCache.forName(pattern)); 1402 } 1403 1404 /** 1405 * Returns the next token if it matches the pattern constructed from the 1406 * specified string. If the match is successful, the scanner advances 1407 * past the input that matched the pattern. 1408 * 1409 * <p> An invocation of this method of the form <tt>next(pattern)</tt> 1410 * behaves in exactly the same way as the invocation 1411 * <tt>next(Pattern.compile(pattern))</tt>. 1412 * 1413 * @param pattern a string specifying the pattern to scan 1414 * @return the next token 1415 * @throws NoSuchElementException if no such tokens are available 1416 * @throws IllegalStateException if this scanner is closed 1417 */ 1418 public String next(String pattern) { 1419 return next(patternCache.forName(pattern)); 1420 } 1421 1422 /** 1423 * Returns true if the next complete token matches the specified pattern. 1424 * A complete token is prefixed and postfixed by input that matches 1425 * the delimiter pattern. This method may block while waiting for input. 1426 * The scanner does not advance past any input. 1427 * 1428 * @param pattern the pattern to scan for 1429 * @return true if and only if this scanner has another token matching 1430 * the specified pattern 1431 * @throws IllegalStateException if this scanner is closed 1432 */ 1433 public boolean hasNext(Pattern pattern) { 1434 ensureOpen(); 1435 if (pattern == null) 1436 throw new NullPointerException(); 1437 hasNextPattern = null; 1438 saveState(); 1439 1440 while (true) { 1441 if (getCompleteTokenInBuffer(pattern) != null) { 1442 matchValid = true; 1443 cacheResult(); 1444 return revertState(true); 1445 } 1446 if (needInput) 1447 readInput(); 1448 else 1449 return revertState(false); 1450 } 1451 } 1452 1453 /** 1454 * Returns the next token if it matches the specified pattern. This 1455 * method may block while waiting for input to scan, even if a previous 1456 * invocation of {@link #hasNext(Pattern)} returned <code>true</code>. 1457 * If the match is successful, the scanner advances past the input that 1458 * matched the pattern. 1459 * 1460 * @param pattern the pattern to scan for 1461 * @return the next token 1462 * @throws NoSuchElementException if no more tokens are available 1463 * @throws IllegalStateException if this scanner is closed 1464 */ 1465 public String next(Pattern pattern) { 1466 ensureOpen(); 1467 if (pattern == null) 1468 throw new NullPointerException(); 1469 1470 // Did we already find this pattern? 1471 if (hasNextPattern == pattern) 1472 return getCachedResult(); 1473 clearCaches(); 1474 1475 // Search for the pattern 1476 while (true) { 1477 String token = getCompleteTokenInBuffer(pattern); 1478 if (token != null) { 1479 matchValid = true; 1480 skipped = false; 1481 return token; 1482 } 1483 if (needInput) 1484 readInput(); 1485 else 1486 throwFor(); 1487 } 1488 } 1489 1490 /** 1491 * Returns true if there is another line in the input of this scanner. 1492 * This method may block while waiting for input. The scanner does not 1493 * advance past any input. 1494 * 1495 * @return true if and only if this scanner has another line of input 1496 * @throws IllegalStateException if this scanner is closed 1497 */ 1498 public boolean hasNextLine() { 1499 saveState(); 1500 1501 String result = findWithinHorizon(linePattern(), 0); 1502 if (result != null) { 1503 MatchResult mr = this.match(); 1504 String lineSep = mr.group(1); 1505 if (lineSep != null) { 1506 result = result.substring(0, result.length() - 1507 lineSep.length()); 1508 cacheResult(result); 1509 1510 } else { 1511 cacheResult(); 1512 } 1513 } 1514 revertState(); 1515 return (result != null); 1516 } 1517 1518 /** 1519 * Advances this scanner past the current line and returns the input 1520 * that was skipped. 1521 * 1522 * This method returns the rest of the current line, excluding any line 1523 * separator at the end. The position is set to the beginning of the next 1524 * line. 1525 * 1526 * <p>Since this method continues to search through the input looking 1527 * for a line separator, it may buffer all of the input searching for 1528 * the line to skip if no line separators are present. 1529 * 1530 * @return the line that was skipped 1531 * @throws NoSuchElementException if no line was found 1532 * @throws IllegalStateException if this scanner is closed 1533 */ 1534 public String nextLine() { 1535 if (hasNextPattern == linePattern()) 1536 return getCachedResult(); 1537 clearCaches(); 1538 1539 String result = findWithinHorizon(linePattern, 0); 1540 if (result == null) 1541 throw new NoSuchElementException("No line found"); 1542 MatchResult mr = this.match(); 1543 String lineSep = mr.group(1); 1544 if (lineSep != null) 1545 result = result.substring(0, result.length() - lineSep.length()); 1546 if (result == null) 1547 throw new NoSuchElementException(); 1548 else 1549 return result; 1550 } 1551 1552 // Public methods that ignore delimiters 1553 1554 /** 1555 * Attempts to find the next occurrence of a pattern constructed from the 1556 * specified string, ignoring delimiters. 1557 * 1558 * <p>An invocation of this method of the form <tt>findInLine(pattern)</tt> 1559 * behaves in exactly the same way as the invocation 1560 * <tt>findInLine(Pattern.compile(pattern))</tt>. 1561 * 1562 * @param pattern a string specifying the pattern to search for 1563 * @return the text that matched the specified pattern 1564 * @throws IllegalStateException if this scanner is closed 1565 */ 1566 public String findInLine(String pattern) { 1567 return findInLine(patternCache.forName(pattern)); 1568 } 1569 1570 /** 1571 * Attempts to find the next occurrence of the specified pattern ignoring 1572 * delimiters. If the pattern is found before the next line separator, the 1573 * scanner advances past the input that matched and returns the string that 1574 * matched the pattern. 1575 * If no such pattern is detected in the input up to the next line 1576 * separator, then <code>null</code> is returned and the scanner's 1577 * position is unchanged. This method may block waiting for input that 1578 * matches the pattern. 1579 * 1580 * <p>Since this method continues to search through the input looking 1581 * for the specified pattern, it may buffer all of the input searching for 1582 * the desired token if no line separators are present. 1583 * 1584 * @param pattern the pattern to scan for 1585 * @return the text that matched the specified pattern 1586 * @throws IllegalStateException if this scanner is closed 1587 */ 1588 public String findInLine(Pattern pattern) { 1589 ensureOpen(); 1590 if (pattern == null) 1591 throw new NullPointerException(); 1592 clearCaches(); 1593 // Expand buffer to include the next newline or end of input 1594 int endPosition = 0; 1595 saveState(); 1596 while (true) { 1597 String token = findPatternInBuffer(separatorPattern(), 0); 1598 if (token != null) { 1599 endPosition = matcher.start(); 1600 break; // up to next newline 1601 } 1602 if (needInput) { 1603 readInput(); 1604 } else { 1605 endPosition = buf.limit(); 1606 break; // up to end of input 1607 } 1608 } 1609 revertState(); 1610 int horizonForLine = endPosition - position; 1611 // If there is nothing between the current pos and the next 1612 // newline simply return null, invoking findWithinHorizon 1613 // with "horizon=0" will scan beyond the line bound. 1614 if (horizonForLine == 0) 1615 return null; 1616 // Search for the pattern 1617 return findWithinHorizon(pattern, horizonForLine); 1618 } 1619 1620 /** 1621 * Attempts to find the next occurrence of a pattern constructed from the 1622 * specified string, ignoring delimiters. 1623 * 1624 * <p>An invocation of this method of the form 1625 * <tt>findWithinHorizon(pattern)</tt> behaves in exactly the same way as 1626 * the invocation 1627 * <tt>findWithinHorizon(Pattern.compile(pattern, horizon))</tt>. 1628 * 1629 * @param pattern a string specifying the pattern to search for 1630 * @param horizon the search horizon 1631 * @return the text that matched the specified pattern 1632 * @throws IllegalStateException if this scanner is closed 1633 * @throws IllegalArgumentException if horizon is negative 1634 */ 1635 public String findWithinHorizon(String pattern, int horizon) { 1636 return findWithinHorizon(patternCache.forName(pattern), horizon); 1637 } 1638 1639 /** 1640 * Attempts to find the next occurrence of the specified pattern. 1641 * 1642 * <p>This method searches through the input up to the specified 1643 * search horizon, ignoring delimiters. If the pattern is found the 1644 * scanner advances past the input that matched and returns the string 1645 * that matched the pattern. If no such pattern is detected then the 1646 * null is returned and the scanner's position remains unchanged. This 1647 * method may block waiting for input that matches the pattern. 1648 * 1649 * <p>A scanner will never search more than <code>horizon</code> code 1650 * points beyond its current position. Note that a match may be clipped 1651 * by the horizon; that is, an arbitrary match result may have been 1652 * different if the horizon had been larger. The scanner treats the 1653 * horizon as a transparent, non-anchoring bound (see {@link 1654 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1655 * 1656 * <p>If horizon is <code>0</code>, then the horizon is ignored and 1657 * this method continues to search through the input looking for the 1658 * specified pattern without bound. In this case it may buffer all of 1659 * the input searching for the pattern. 1660 * 1661 * <p>If horizon is negative, then an IllegalArgumentException is 1662 * thrown. 1663 * 1664 * @param pattern the pattern to scan for 1665 * @param horizon the search horizon 1666 * @return the text that matched the specified pattern 1667 * @throws IllegalStateException if this scanner is closed 1668 * @throws IllegalArgumentException if horizon is negative 1669 */ 1670 public String findWithinHorizon(Pattern pattern, int horizon) { 1671 ensureOpen(); 1672 if (pattern == null) 1673 throw new NullPointerException(); 1674 if (horizon < 0) 1675 throw new IllegalArgumentException("horizon < 0"); 1676 clearCaches(); 1677 1678 // Search for the pattern 1679 while (true) { 1680 String token = findPatternInBuffer(pattern, horizon); 1681 if (token != null) { 1682 matchValid = true; 1683 return token; 1684 } 1685 if (needInput) 1686 readInput(); 1687 else 1688 break; // up to end of input 1689 } 1690 return null; 1691 } 1692 1693 /** 1694 * Skips input that matches the specified pattern, ignoring delimiters. 1695 * This method will skip input if an anchored match of the specified 1696 * pattern succeeds. 1697 * 1698 * <p>If a match to the specified pattern is not found at the 1699 * current position, then no input is skipped and a 1700 * <tt>NoSuchElementException</tt> is thrown. 1701 * 1702 * <p>Since this method seeks to match the specified pattern starting at 1703 * the scanner's current position, patterns that can match a lot of 1704 * input (".*", for example) may cause the scanner to buffer a large 1705 * amount of input. 1706 * 1707 * <p>Note that it is possible to skip something without risking a 1708 * <code>NoSuchElementException</code> by using a pattern that can 1709 * match nothing, e.g., <code>sc.skip("[ \t]*")</code>. 1710 * 1711 * @param pattern a string specifying the pattern to skip over 1712 * @return this scanner 1713 * @throws NoSuchElementException if the specified pattern is not found 1714 * @throws IllegalStateException if this scanner is closed 1715 */ 1716 public Scanner skip(Pattern pattern) { 1717 ensureOpen(); 1718 if (pattern == null) 1719 throw new NullPointerException(); 1720 clearCaches(); 1721 1722 // Search for the pattern 1723 while (true) { 1724 String token = matchPatternInBuffer(pattern); 1725 if (token != null) { 1726 matchValid = true; 1727 position = matcher.end(); 1728 return this; 1729 } 1730 if (needInput) 1731 readInput(); 1732 else 1733 throw new NoSuchElementException(); 1734 } 1735 } 1736 1737 /** 1738 * Skips input that matches a pattern constructed from the specified 1739 * string. 1740 * 1741 * <p> An invocation of this method of the form <tt>skip(pattern)</tt> 1742 * behaves in exactly the same way as the invocation 1743 * <tt>skip(Pattern.compile(pattern))</tt>. 1744 * 1745 * @param pattern a string specifying the pattern to skip over 1746 * @return this scanner 1747 * @throws IllegalStateException if this scanner is closed 1748 */ 1749 public Scanner skip(String pattern) { 1750 return skip(patternCache.forName(pattern)); 1751 } 1752 1753 // Convenience methods for scanning primitives 1754 1755 /** 1756 * Returns true if the next token in this scanner's input can be 1757 * interpreted as a boolean value using a case insensitive pattern 1758 * created from the string "true|false". The scanner does not 1759 * advance past the input that matched. 1760 * 1761 * @return true if and only if this scanner's next token is a valid 1762 * boolean value 1763 * @throws IllegalStateException if this scanner is closed 1764 */ 1765 public boolean hasNextBoolean() { 1766 return hasNext(boolPattern()); 1767 } 1768 1769 /** 1770 * Scans the next token of the input into a boolean value and returns 1771 * that value. This method will throw <code>InputMismatchException</code> 1772 * if the next token cannot be translated into a valid boolean value. 1773 * If the match is successful, the scanner advances past the input that 1774 * matched. 1775 * 1776 * @return the boolean scanned from the input 1777 * @throws InputMismatchException if the next token is not a valid boolean 1778 * @throws NoSuchElementException if input is exhausted 1779 * @throws IllegalStateException if this scanner is closed 1780 */ 1781 public boolean nextBoolean() { 1782 clearCaches(); 1783 return Boolean.parseBoolean(next(boolPattern())); 1784 } 1785 1786 /** 1787 * Returns true if the next token in this scanner's input can be 1788 * interpreted as a byte value in the default radix using the 1789 * {@link #nextByte} method. The scanner does not advance past any input. 1790 * 1791 * @return true if and only if this scanner's next token is a valid 1792 * byte value 1793 * @throws IllegalStateException if this scanner is closed 1794 */ 1795 public boolean hasNextByte() { 1796 return hasNextByte(defaultRadix); 1797 } 1798 1799 /** 1800 * Returns true if the next token in this scanner's input can be 1801 * interpreted as a byte value in the specified radix using the 1802 * {@link #nextByte} method. The scanner does not advance past any input. 1803 * 1804 * @param radix the radix used to interpret the token as a byte value 1805 * @return true if and only if this scanner's next token is a valid 1806 * byte value 1807 * @throws IllegalStateException if this scanner is closed 1808 */ 1809 public boolean hasNextByte(int radix) { 1810 setRadix(radix); 1811 boolean result = hasNext(integerPattern()); 1812 if (result) { // Cache it 1813 try { 1814 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1815 processIntegerToken(hasNextResult) : 1816 hasNextResult; 1817 typeCache = Byte.parseByte(s, radix); 1818 } catch (NumberFormatException nfe) { 1819 result = false; 1820 } 1821 } 1822 return result; 1823 } 1824 1825 /** 1826 * Scans the next token of the input as a <tt>byte</tt>. 1827 * 1828 * <p> An invocation of this method of the form 1829 * <tt>nextByte()</tt> behaves in exactly the same way as the 1830 * invocation <tt>nextByte(radix)</tt>, where <code>radix</code> 1831 * is the default radix of this scanner. 1832 * 1833 * @return the <tt>byte</tt> scanned from the input 1834 * @throws InputMismatchException 1835 * if the next token does not match the <i>Integer</i> 1836 * regular expression, or is out of range 1837 * @throws NoSuchElementException if input is exhausted 1838 * @throws IllegalStateException if this scanner is closed 1839 */ 1840 public byte nextByte() { 1841 return nextByte(defaultRadix); 1842 } 1843 1844 /** 1845 * Scans the next token of the input as a <tt>byte</tt>. 1846 * This method will throw <code>InputMismatchException</code> 1847 * if the next token cannot be translated into a valid byte value as 1848 * described below. If the translation is successful, the scanner advances 1849 * past the input that matched. 1850 * 1851 * <p> If the next token matches the <a 1852 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1853 * above then the token is converted into a <tt>byte</tt> value as if by 1854 * removing all locale specific prefixes, group separators, and locale 1855 * specific suffixes, then mapping non-ASCII digits into ASCII 1856 * digits via {@link Character#digit Character.digit}, prepending a 1857 * negative sign (-) if the locale specific negative prefixes and suffixes 1858 * were present, and passing the resulting string to 1859 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1860 * specified radix. 1861 * 1862 * @param radix the radix used to interpret the token as a byte value 1863 * @return the <tt>byte</tt> scanned from the input 1864 * @throws InputMismatchException 1865 * if the next token does not match the <i>Integer</i> 1866 * regular expression, or is out of range 1867 * @throws NoSuchElementException if input is exhausted 1868 * @throws IllegalStateException if this scanner is closed 1869 */ 1870 public byte nextByte(int radix) { 1871 // Check cached result 1872 if ((typeCache != null) && (typeCache instanceof Byte) 1873 && this.radix == radix) { 1874 byte val = ((Byte)typeCache).byteValue(); 1875 useTypeCache(); 1876 return val; 1877 } 1878 setRadix(radix); 1879 clearCaches(); 1880 // Search for next byte 1881 try { 1882 String s = next(integerPattern()); 1883 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1884 s = processIntegerToken(s); 1885 return Byte.parseByte(s, radix); 1886 } catch (NumberFormatException nfe) { 1887 position = matcher.start(); // don't skip bad token 1888 throw new InputMismatchException(nfe.getMessage()); 1889 } 1890 } 1891 1892 /** 1893 * Returns true if the next token in this scanner's input can be 1894 * interpreted as a short value in the default radix using the 1895 * {@link #nextShort} method. The scanner does not advance past any input. 1896 * 1897 * @return true if and only if this scanner's next token is a valid 1898 * short value in the default radix 1899 * @throws IllegalStateException if this scanner is closed 1900 */ 1901 public boolean hasNextShort() { 1902 return hasNextShort(defaultRadix); 1903 } 1904 1905 /** 1906 * Returns true if the next token in this scanner's input can be 1907 * interpreted as a short value in the specified radix using the 1908 * {@link #nextShort} method. The scanner does not advance past any input. 1909 * 1910 * @param radix the radix used to interpret the token as a short value 1911 * @return true if and only if this scanner's next token is a valid 1912 * short value in the specified radix 1913 * @throws IllegalStateException if this scanner is closed 1914 */ 1915 public boolean hasNextShort(int radix) { 1916 setRadix(radix); 1917 boolean result = hasNext(integerPattern()); 1918 if (result) { // Cache it 1919 try { 1920 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1921 processIntegerToken(hasNextResult) : 1922 hasNextResult; 1923 typeCache = Short.parseShort(s, radix); 1924 } catch (NumberFormatException nfe) { 1925 result = false; 1926 } 1927 } 1928 return result; 1929 } 1930 1931 /** 1932 * Scans the next token of the input as a <tt>short</tt>. 1933 * 1934 * <p> An invocation of this method of the form 1935 * <tt>nextShort()</tt> behaves in exactly the same way as the 1936 * invocation <tt>nextShort(radix)</tt>, where <code>radix</code> 1937 * is the default radix of this scanner. 1938 * 1939 * @return the <tt>short</tt> scanned from the input 1940 * @throws InputMismatchException 1941 * if the next token does not match the <i>Integer</i> 1942 * regular expression, or is out of range 1943 * @throws NoSuchElementException if input is exhausted 1944 * @throws IllegalStateException if this scanner is closed 1945 */ 1946 public short nextShort() { 1947 return nextShort(defaultRadix); 1948 } 1949 1950 /** 1951 * Scans the next token of the input as a <tt>short</tt>. 1952 * This method will throw <code>InputMismatchException</code> 1953 * if the next token cannot be translated into a valid short value as 1954 * described below. If the translation is successful, the scanner advances 1955 * past the input that matched. 1956 * 1957 * <p> If the next token matches the <a 1958 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1959 * above then the token is converted into a <tt>short</tt> value as if by 1960 * removing all locale specific prefixes, group separators, and locale 1961 * specific suffixes, then mapping non-ASCII digits into ASCII 1962 * digits via {@link Character#digit Character.digit}, prepending a 1963 * negative sign (-) if the locale specific negative prefixes and suffixes 1964 * were present, and passing the resulting string to 1965 * {@link Short#parseShort(String, int) Short.parseShort} with the 1966 * specified radix. 1967 * 1968 * @param radix the radix used to interpret the token as a short value 1969 * @return the <tt>short</tt> scanned from the input 1970 * @throws InputMismatchException 1971 * if the next token does not match the <i>Integer</i> 1972 * regular expression, or is out of range 1973 * @throws NoSuchElementException if input is exhausted 1974 * @throws IllegalStateException if this scanner is closed 1975 */ 1976 public short nextShort(int radix) { 1977 // Check cached result 1978 if ((typeCache != null) && (typeCache instanceof Short) 1979 && this.radix == radix) { 1980 short val = ((Short)typeCache).shortValue(); 1981 useTypeCache(); 1982 return val; 1983 } 1984 setRadix(radix); 1985 clearCaches(); 1986 // Search for next short 1987 try { 1988 String s = next(integerPattern()); 1989 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1990 s = processIntegerToken(s); 1991 return Short.parseShort(s, radix); 1992 } catch (NumberFormatException nfe) { 1993 position = matcher.start(); // don't skip bad token 1994 throw new InputMismatchException(nfe.getMessage()); 1995 } 1996 } 1997 1998 /** 1999 * Returns true if the next token in this scanner's input can be 2000 * interpreted as an int value in the default radix using the 2001 * {@link #nextInt} method. The scanner does not advance past any input. 2002 * 2003 * @return true if and only if this scanner's next token is a valid 2004 * int value 2005 * @throws IllegalStateException if this scanner is closed 2006 */ 2007 public boolean hasNextInt() { 2008 return hasNextInt(defaultRadix); 2009 } 2010 2011 /** 2012 * Returns true if the next token in this scanner's input can be 2013 * interpreted as an int value in the specified radix using the 2014 * {@link #nextInt} method. The scanner does not advance past any input. 2015 * 2016 * @param radix the radix used to interpret the token as an int value 2017 * @return true if and only if this scanner's next token is a valid 2018 * int value 2019 * @throws IllegalStateException if this scanner is closed 2020 */ 2021 public boolean hasNextInt(int radix) { 2022 setRadix(radix); 2023 boolean result = hasNext(integerPattern()); 2024 if (result) { // Cache it 2025 try { 2026 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2027 processIntegerToken(hasNextResult) : 2028 hasNextResult; 2029 typeCache = Integer.parseInt(s, radix); 2030 } catch (NumberFormatException nfe) { 2031 result = false; 2032 } 2033 } 2034 return result; 2035 } 2036 2037 /** 2038 * The integer token must be stripped of prefixes, group separators, 2039 * and suffixes, non ascii digits must be converted into ascii digits 2040 * before parse will accept it. 2041 */ 2042 private String processIntegerToken(String token) { 2043 String result = token.replaceAll(""+groupSeparator, ""); 2044 boolean isNegative = false; 2045 int preLen = negativePrefix.length(); 2046 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2047 isNegative = true; 2048 result = result.substring(preLen); 2049 } 2050 int sufLen = negativeSuffix.length(); 2051 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2052 isNegative = true; 2053 result = result.substring(result.length() - sufLen, 2054 result.length()); 2055 } 2056 if (isNegative) 2057 result = "-" + result; 2058 return result; 2059 } 2060 2061 /** 2062 * Scans the next token of the input as an <tt>int</tt>. 2063 * 2064 * <p> An invocation of this method of the form 2065 * <tt>nextInt()</tt> behaves in exactly the same way as the 2066 * invocation <tt>nextInt(radix)</tt>, where <code>radix</code> 2067 * is the default radix of this scanner. 2068 * 2069 * @return the <tt>int</tt> scanned from the input 2070 * @throws InputMismatchException 2071 * if the next token does not match the <i>Integer</i> 2072 * regular expression, or is out of range 2073 * @throws NoSuchElementException if input is exhausted 2074 * @throws IllegalStateException if this scanner is closed 2075 */ 2076 public int nextInt() { 2077 return nextInt(defaultRadix); 2078 } 2079 2080 /** 2081 * Scans the next token of the input as an <tt>int</tt>. 2082 * This method will throw <code>InputMismatchException</code> 2083 * if the next token cannot be translated into a valid int value as 2084 * described below. If the translation is successful, the scanner advances 2085 * past the input that matched. 2086 * 2087 * <p> If the next token matches the <a 2088 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2089 * above then the token is converted into an <tt>int</tt> value as if by 2090 * removing all locale specific prefixes, group separators, and locale 2091 * specific suffixes, then mapping non-ASCII digits into ASCII 2092 * digits via {@link Character#digit Character.digit}, prepending a 2093 * negative sign (-) if the locale specific negative prefixes and suffixes 2094 * were present, and passing the resulting string to 2095 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2096 * specified radix. 2097 * 2098 * @param radix the radix used to interpret the token as an int value 2099 * @return the <tt>int</tt> scanned from the input 2100 * @throws InputMismatchException 2101 * if the next token does not match the <i>Integer</i> 2102 * regular expression, or is out of range 2103 * @throws NoSuchElementException if input is exhausted 2104 * @throws IllegalStateException if this scanner is closed 2105 */ 2106 public int nextInt(int radix) { 2107 // Check cached result 2108 if ((typeCache != null) && (typeCache instanceof Integer) 2109 && this.radix == radix) { 2110 int val = ((Integer)typeCache).intValue(); 2111 useTypeCache(); 2112 return val; 2113 } 2114 setRadix(radix); 2115 clearCaches(); 2116 // Search for next int 2117 try { 2118 String s = next(integerPattern()); 2119 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2120 s = processIntegerToken(s); 2121 return Integer.parseInt(s, radix); 2122 } catch (NumberFormatException nfe) { 2123 position = matcher.start(); // don't skip bad token 2124 throw new InputMismatchException(nfe.getMessage()); 2125 } 2126 } 2127 2128 /** 2129 * Returns true if the next token in this scanner's input can be 2130 * interpreted as a long value in the default radix using the 2131 * {@link #nextLong} method. The scanner does not advance past any input. 2132 * 2133 * @return true if and only if this scanner's next token is a valid 2134 * long value 2135 * @throws IllegalStateException if this scanner is closed 2136 */ 2137 public boolean hasNextLong() { 2138 return hasNextLong(defaultRadix); 2139 } 2140 2141 /** 2142 * Returns true if the next token in this scanner's input can be 2143 * interpreted as a long value in the specified radix using the 2144 * {@link #nextLong} method. The scanner does not advance past any input. 2145 * 2146 * @param radix the radix used to interpret the token as a long value 2147 * @return true if and only if this scanner's next token is a valid 2148 * long value 2149 * @throws IllegalStateException if this scanner is closed 2150 */ 2151 public boolean hasNextLong(int radix) { 2152 setRadix(radix); 2153 boolean result = hasNext(integerPattern()); 2154 if (result) { // Cache it 2155 try { 2156 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2157 processIntegerToken(hasNextResult) : 2158 hasNextResult; 2159 typeCache = Long.parseLong(s, radix); 2160 } catch (NumberFormatException nfe) { 2161 result = false; 2162 } 2163 } 2164 return result; 2165 } 2166 2167 /** 2168 * Scans the next token of the input as a <tt>long</tt>. 2169 * 2170 * <p> An invocation of this method of the form 2171 * <tt>nextLong()</tt> behaves in exactly the same way as the 2172 * invocation <tt>nextLong(radix)</tt>, where <code>radix</code> 2173 * is the default radix of this scanner. 2174 * 2175 * @return the <tt>long</tt> scanned from the input 2176 * @throws InputMismatchException 2177 * if the next token does not match the <i>Integer</i> 2178 * regular expression, or is out of range 2179 * @throws NoSuchElementException if input is exhausted 2180 * @throws IllegalStateException if this scanner is closed 2181 */ 2182 public long nextLong() { 2183 return nextLong(defaultRadix); 2184 } 2185 2186 /** 2187 * Scans the next token of the input as a <tt>long</tt>. 2188 * This method will throw <code>InputMismatchException</code> 2189 * if the next token cannot be translated into a valid long value as 2190 * described below. If the translation is successful, the scanner advances 2191 * past the input that matched. 2192 * 2193 * <p> If the next token matches the <a 2194 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2195 * above then the token is converted into a <tt>long</tt> value as if by 2196 * removing all locale specific prefixes, group separators, and locale 2197 * specific suffixes, then mapping non-ASCII digits into ASCII 2198 * digits via {@link Character#digit Character.digit}, prepending a 2199 * negative sign (-) if the locale specific negative prefixes and suffixes 2200 * were present, and passing the resulting string to 2201 * {@link Long#parseLong(String, int) Long.parseLong} with the 2202 * specified radix. 2203 * 2204 * @param radix the radix used to interpret the token as an int value 2205 * @return the <tt>long</tt> scanned from the input 2206 * @throws InputMismatchException 2207 * if the next token does not match the <i>Integer</i> 2208 * regular expression, or is out of range 2209 * @throws NoSuchElementException if input is exhausted 2210 * @throws IllegalStateException if this scanner is closed 2211 */ 2212 public long nextLong(int radix) { 2213 // Check cached result 2214 if ((typeCache != null) && (typeCache instanceof Long) 2215 && this.radix == radix) { 2216 long val = ((Long)typeCache).longValue(); 2217 useTypeCache(); 2218 return val; 2219 } 2220 setRadix(radix); 2221 clearCaches(); 2222 try { 2223 String s = next(integerPattern()); 2224 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2225 s = processIntegerToken(s); 2226 return Long.parseLong(s, radix); 2227 } catch (NumberFormatException nfe) { 2228 position = matcher.start(); // don't skip bad token 2229 throw new InputMismatchException(nfe.getMessage()); 2230 } 2231 } 2232 2233 /** 2234 * The float token must be stripped of prefixes, group separators, 2235 * and suffixes, non ascii digits must be converted into ascii digits 2236 * before parseFloat will accept it. 2237 * 2238 * If there are non-ascii digits in the token these digits must 2239 * be processed before the token is passed to parseFloat. 2240 */ 2241 private String processFloatToken(String token) { 2242 String result = token.replaceAll(groupSeparator, ""); 2243 if (!decimalSeparator.equals("\\.")) 2244 result = result.replaceAll(decimalSeparator, "."); 2245 boolean isNegative = false; 2246 int preLen = negativePrefix.length(); 2247 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2248 isNegative = true; 2249 result = result.substring(preLen); 2250 } 2251 int sufLen = negativeSuffix.length(); 2252 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2253 isNegative = true; 2254 result = result.substring(result.length() - sufLen, 2255 result.length()); 2256 } 2257 if (result.equals(nanString)) 2258 result = "NaN"; 2259 if (result.equals(infinityString)) 2260 result = "Infinity"; 2261 if (isNegative) 2262 result = "-" + result; 2263 2264 // Translate non-ASCII digits 2265 Matcher m = NON_ASCII_DIGIT.matcher(result); 2266 if (m.find()) { 2267 StringBuilder inASCII = new StringBuilder(); 2268 for (int i=0; i<result.length(); i++) { 2269 char nextChar = result.charAt(i); 2270 if (Character.isDigit(nextChar)) { 2271 int d = Character.digit(nextChar, 10); 2272 if (d != -1) 2273 inASCII.append(d); 2274 else 2275 inASCII.append(nextChar); 2276 } else { 2277 inASCII.append(nextChar); 2278 } 2279 } 2280 result = inASCII.toString(); 2281 } 2282 2283 return result; 2284 } 2285 2286 /** 2287 * Returns true if the next token in this scanner's input can be 2288 * interpreted as a float value using the {@link #nextFloat} 2289 * method. The scanner does not advance past any input. 2290 * 2291 * @return true if and only if this scanner's next token is a valid 2292 * float value 2293 * @throws IllegalStateException if this scanner is closed 2294 */ 2295 public boolean hasNextFloat() { 2296 setRadix(10); 2297 boolean result = hasNext(floatPattern()); 2298 if (result) { // Cache it 2299 try { 2300 String s = processFloatToken(hasNextResult); 2301 typeCache = Float.valueOf(Float.parseFloat(s)); 2302 } catch (NumberFormatException nfe) { 2303 result = false; 2304 } 2305 } 2306 return result; 2307 } 2308 2309 /** 2310 * Scans the next token of the input as a <tt>float</tt>. 2311 * This method will throw <code>InputMismatchException</code> 2312 * if the next token cannot be translated into a valid float value as 2313 * described below. If the translation is successful, the scanner advances 2314 * past the input that matched. 2315 * 2316 * <p> If the next token matches the <a 2317 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2318 * then the token is converted into a <tt>float</tt> value as if by 2319 * removing all locale specific prefixes, group separators, and locale 2320 * specific suffixes, then mapping non-ASCII digits into ASCII 2321 * digits via {@link Character#digit Character.digit}, prepending a 2322 * negative sign (-) if the locale specific negative prefixes and suffixes 2323 * were present, and passing the resulting string to 2324 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2325 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2326 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2327 * appropriate. 2328 * 2329 * @return the <tt>float</tt> scanned from the input 2330 * @throws InputMismatchException 2331 * if the next token does not match the <i>Float</i> 2332 * regular expression, or is out of range 2333 * @throws NoSuchElementException if input is exhausted 2334 * @throws IllegalStateException if this scanner is closed 2335 */ 2336 public float nextFloat() { 2337 // Check cached result 2338 if ((typeCache != null) && (typeCache instanceof Float)) { 2339 float val = ((Float)typeCache).floatValue(); 2340 useTypeCache(); 2341 return val; 2342 } 2343 setRadix(10); 2344 clearCaches(); 2345 try { 2346 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2347 } catch (NumberFormatException nfe) { 2348 position = matcher.start(); // don't skip bad token 2349 throw new InputMismatchException(nfe.getMessage()); 2350 } 2351 } 2352 2353 /** 2354 * Returns true if the next token in this scanner's input can be 2355 * interpreted as a double value using the {@link #nextDouble} 2356 * method. The scanner does not advance past any input. 2357 * 2358 * @return true if and only if this scanner's next token is a valid 2359 * double value 2360 * @throws IllegalStateException if this scanner is closed 2361 */ 2362 public boolean hasNextDouble() { 2363 setRadix(10); 2364 boolean result = hasNext(floatPattern()); 2365 if (result) { // Cache it 2366 try { 2367 String s = processFloatToken(hasNextResult); 2368 typeCache = Double.valueOf(Double.parseDouble(s)); 2369 } catch (NumberFormatException nfe) { 2370 result = false; 2371 } 2372 } 2373 return result; 2374 } 2375 2376 /** 2377 * Scans the next token of the input as a <tt>double</tt>. 2378 * This method will throw <code>InputMismatchException</code> 2379 * if the next token cannot be translated into a valid double value. 2380 * If the translation is successful, the scanner advances past the input 2381 * that matched. 2382 * 2383 * <p> If the next token matches the <a 2384 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2385 * then the token is converted into a <tt>double</tt> value as if by 2386 * removing all locale specific prefixes, group separators, and locale 2387 * specific suffixes, then mapping non-ASCII digits into ASCII 2388 * digits via {@link Character#digit Character.digit}, prepending a 2389 * negative sign (-) if the locale specific negative prefixes and suffixes 2390 * were present, and passing the resulting string to 2391 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2392 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2393 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2394 * appropriate. 2395 * 2396 * @return the <tt>double</tt> scanned from the input 2397 * @throws InputMismatchException 2398 * if the next token does not match the <i>Float</i> 2399 * regular expression, or is out of range 2400 * @throws NoSuchElementException if the input is exhausted 2401 * @throws IllegalStateException if this scanner is closed 2402 */ 2403 public double nextDouble() { 2404 // Check cached result 2405 if ((typeCache != null) && (typeCache instanceof Double)) { 2406 double val = ((Double)typeCache).doubleValue(); 2407 useTypeCache(); 2408 return val; 2409 } 2410 setRadix(10); 2411 clearCaches(); 2412 // Search for next float 2413 try { 2414 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2415 } catch (NumberFormatException nfe) { 2416 position = matcher.start(); // don't skip bad token 2417 throw new InputMismatchException(nfe.getMessage()); 2418 } 2419 } 2420 2421 // Convenience methods for scanning multi precision numbers 2422 2423 /** 2424 * Returns true if the next token in this scanner's input can be 2425 * interpreted as a <code>BigInteger</code> in the default radix using the 2426 * {@link #nextBigInteger} method. The scanner does not advance past any 2427 * input. 2428 * 2429 * @return true if and only if this scanner's next token is a valid 2430 * <code>BigInteger</code> 2431 * @throws IllegalStateException if this scanner is closed 2432 */ 2433 public boolean hasNextBigInteger() { 2434 return hasNextBigInteger(defaultRadix); 2435 } 2436 2437 /** 2438 * Returns true if the next token in this scanner's input can be 2439 * interpreted as a <code>BigInteger</code> in the specified radix using 2440 * the {@link #nextBigInteger} method. The scanner does not advance past 2441 * any input. 2442 * 2443 * @param radix the radix used to interpret the token as an integer 2444 * @return true if and only if this scanner's next token is a valid 2445 * <code>BigInteger</code> 2446 * @throws IllegalStateException if this scanner is closed 2447 */ 2448 public boolean hasNextBigInteger(int radix) { 2449 setRadix(radix); 2450 boolean result = hasNext(integerPattern()); 2451 if (result) { // Cache it 2452 try { 2453 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2454 processIntegerToken(hasNextResult) : 2455 hasNextResult; 2456 typeCache = new BigInteger(s, radix); 2457 } catch (NumberFormatException nfe) { 2458 result = false; 2459 } 2460 } 2461 return result; 2462 } 2463 2464 /** 2465 * Scans the next token of the input as a {@link java.math.BigInteger 2466 * BigInteger}. 2467 * 2468 * <p> An invocation of this method of the form 2469 * <tt>nextBigInteger()</tt> behaves in exactly the same way as the 2470 * invocation <tt>nextBigInteger(radix)</tt>, where <code>radix</code> 2471 * is the default radix of this scanner. 2472 * 2473 * @return the <tt>BigInteger</tt> scanned from the input 2474 * @throws InputMismatchException 2475 * if the next token does not match the <i>Integer</i> 2476 * regular expression, or is out of range 2477 * @throws NoSuchElementException if the input is exhausted 2478 * @throws IllegalStateException if this scanner is closed 2479 */ 2480 public BigInteger nextBigInteger() { 2481 return nextBigInteger(defaultRadix); 2482 } 2483 2484 /** 2485 * Scans the next token of the input as a {@link java.math.BigInteger 2486 * BigInteger}. 2487 * 2488 * <p> If the next token matches the <a 2489 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2490 * above then the token is converted into a <tt>BigInteger</tt> value as if 2491 * by removing all group separators, mapping non-ASCII digits into ASCII 2492 * digits via the {@link Character#digit Character.digit}, and passing the 2493 * resulting string to the {@link 2494 * java.math.BigInteger#BigInteger(java.lang.String) 2495 * BigInteger(String, int)} constructor with the specified radix. 2496 * 2497 * @param radix the radix used to interpret the token 2498 * @return the <tt>BigInteger</tt> scanned from the input 2499 * @throws InputMismatchException 2500 * if the next token does not match the <i>Integer</i> 2501 * regular expression, or is out of range 2502 * @throws NoSuchElementException if the input is exhausted 2503 * @throws IllegalStateException if this scanner is closed 2504 */ 2505 public BigInteger nextBigInteger(int radix) { 2506 // Check cached result 2507 if ((typeCache != null) && (typeCache instanceof BigInteger) 2508 && this.radix == radix) { 2509 BigInteger val = (BigInteger)typeCache; 2510 useTypeCache(); 2511 return val; 2512 } 2513 setRadix(radix); 2514 clearCaches(); 2515 // Search for next int 2516 try { 2517 String s = next(integerPattern()); 2518 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2519 s = processIntegerToken(s); 2520 return new BigInteger(s, radix); 2521 } catch (NumberFormatException nfe) { 2522 position = matcher.start(); // don't skip bad token 2523 throw new InputMismatchException(nfe.getMessage()); 2524 } 2525 } 2526 2527 /** 2528 * Returns true if the next token in this scanner's input can be 2529 * interpreted as a <code>BigDecimal</code> using the 2530 * {@link #nextBigDecimal} method. The scanner does not advance past any 2531 * input. 2532 * 2533 * @return true if and only if this scanner's next token is a valid 2534 * <code>BigDecimal</code> 2535 * @throws IllegalStateException if this scanner is closed 2536 */ 2537 public boolean hasNextBigDecimal() { 2538 setRadix(10); 2539 boolean result = hasNext(decimalPattern()); 2540 if (result) { // Cache it 2541 try { 2542 String s = processFloatToken(hasNextResult); 2543 typeCache = new BigDecimal(s); 2544 } catch (NumberFormatException nfe) { 2545 result = false; 2546 } 2547 } 2548 return result; 2549 } 2550 2551 /** 2552 * Scans the next token of the input as a {@link java.math.BigDecimal 2553 * BigDecimal}. 2554 * 2555 * <p> If the next token matches the <a 2556 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2557 * above then the token is converted into a <tt>BigDecimal</tt> value as if 2558 * by removing all group separators, mapping non-ASCII digits into ASCII 2559 * digits via the {@link Character#digit Character.digit}, and passing the 2560 * resulting string to the {@link 2561 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2562 * constructor. 2563 * 2564 * @return the <tt>BigDecimal</tt> scanned from the input 2565 * @throws InputMismatchException 2566 * if the next token does not match the <i>Decimal</i> 2567 * regular expression, or is out of range 2568 * @throws NoSuchElementException if the input is exhausted 2569 * @throws IllegalStateException if this scanner is closed 2570 */ 2571 public BigDecimal nextBigDecimal() { 2572 // Check cached result 2573 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2574 BigDecimal val = (BigDecimal)typeCache; 2575 useTypeCache(); 2576 return val; 2577 } 2578 setRadix(10); 2579 clearCaches(); 2580 // Search for next float 2581 try { 2582 String s = processFloatToken(next(decimalPattern())); 2583 return new BigDecimal(s); 2584 } catch (NumberFormatException nfe) { 2585 position = matcher.start(); // don't skip bad token 2586 throw new InputMismatchException(nfe.getMessage()); 2587 } 2588 } 2589 2590 /** 2591 * Resets this scanner. 2592 * 2593 * <p> Resetting a scanner discards all of its explicit state 2594 * information which may have been changed by invocations of {@link 2595 * #useDelimiter}, {@link #useLocale}, or {@link #useRadix}. 2596 * 2597 * <p> An invocation of this method of the form 2598 * <tt>scanner.reset()</tt> behaves in exactly the same way as the 2599 * invocation 2600 * 2601 * <blockquote><pre>{@code 2602 * scanner.useDelimiter("\\p{javaWhitespace}+") 2603 * .useLocale(Locale.getDefault(Locale.Category.FORMAT)) 2604 * .useRadix(10); 2605 * }</pre></blockquote> 2606 * 2607 * @return this scanner 2608 * 2609 * @since 1.6 2610 */ 2611 public Scanner reset() { 2612 delimPattern = WHITESPACE_PATTERN; 2613 useLocale(Locale.getDefault(Locale.Category.FORMAT)); 2614 useRadix(10); 2615 clearCaches(); 2616 return this; 2617 } 2618 }