1 /* 2 * Copyright 2003-2009 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 package java.util; 27 28 import java.nio.file.FileRef; 29 import java.util.regex.*; 30 import java.io.*; 31 import java.math.*; 32 import java.nio.*; 33 import java.nio.channels.*; 34 import java.nio.charset.*; 35 import java.text.*; 36 import java.util.Locale; 37 import sun.misc.LRUCache; 38 39 /** 40 * A simple text scanner which can parse primitive types and strings using 41 * regular expressions. 42 * 43 * <p>A <code>Scanner</code> breaks its input into tokens using a 44 * delimiter pattern, which by default matches whitespace. The resulting 45 * tokens may then be converted into values of different types using the 46 * various <tt>next</tt> methods. 47 * 48 * <p>For example, this code allows a user to read a number from 49 * <tt>System.in</tt>: 50 * <blockquote><pre> 51 * Scanner sc = new Scanner(System.in); 52 * int i = sc.nextInt(); 53 * </pre></blockquote> 54 * 55 * <p>As another example, this code allows <code>long</code> types to be 56 * assigned from entries in a file <code>myNumbers</code>: 57 * <blockquote><pre> 58 * Scanner sc = new Scanner(new File("myNumbers")); 59 * while (sc.hasNextLong()) { 60 * long aLong = sc.nextLong(); 61 * }</pre></blockquote> 62 * 63 * <p>The scanner can also use delimiters other than whitespace. This 64 * example reads several items in from a string: 65 *<blockquote><pre> 66 * String input = "1 fish 2 fish red fish blue fish"; 67 * Scanner s = new Scanner(input).useDelimiter("\\s*fish\\s*"); 68 * System.out.println(s.nextInt()); 69 * System.out.println(s.nextInt()); 70 * System.out.println(s.next()); 71 * System.out.println(s.next()); 72 * s.close(); </pre></blockquote> 73 * <p> 74 * prints the following output: 75 * <blockquote><pre> 76 * 1 77 * 2 78 * red 79 * blue </pre></blockquote> 80 * 81 * <p>The same output can be generated with this code, which uses a regular 82 * expression to parse all four tokens at once: 83 *<blockquote><pre> 84 * String input = "1 fish 2 fish red fish blue fish"; 85 * Scanner s = new Scanner(input); 86 * s.findInLine("(\\d+) fish (\\d+) fish (\\w+) fish (\\w+)"); 87 * MatchResult result = s.match(); 88 * for (int i=1; i<=result.groupCount(); i++) 89 * System.out.println(result.group(i)); 90 * s.close(); </pre></blockquote> 91 * 92 * <p>The <a name="default-delimiter">default whitespace delimiter</a> used 93 * by a scanner is as recognized by {@link java.lang.Character}.{@link 94 * java.lang.Character#isWhitespace(char) isWhitespace}. The {@link #reset} 95 * method will reset the value of the scanner's delimiter to the default 96 * whitespace delimiter regardless of whether it was previously changed. 97 * 98 * <p>A scanning operation may block waiting for input. 99 * 100 * <p>The {@link #next} and {@link #hasNext} methods and their 101 * primitive-type companion methods (such as {@link #nextInt} and 102 * {@link #hasNextInt}) first skip any input that matches the delimiter 103 * pattern, and then attempt to return the next token. Both <tt>hasNext</tt> 104 * and <tt>next</tt> methods may block waiting for further input. Whether a 105 * <tt>hasNext</tt> method blocks has no connection to whether or not its 106 * associated <tt>next</tt> method will block. 107 * 108 * <p> The {@link #findInLine}, {@link #findWithinHorizon}, and {@link #skip} 109 * methods operate independently of the delimiter pattern. These methods will 110 * attempt to match the specified pattern with no regard to delimiters in the 111 * input and thus can be used in special circumstances where delimiters are 112 * not relevant. These methods may block waiting for more input. 113 * 114 * <p>When a scanner throws an {@link InputMismatchException}, the scanner 115 * will not pass the token that caused the exception, so that it may be 116 * retrieved or skipped via some other method. 117 * 118 * <p>Depending upon the type of delimiting pattern, empty tokens may be 119 * returned. For example, the pattern <tt>"\\s+"</tt> will return no empty 120 * tokens since it matches multiple instances of the delimiter. The delimiting 121 * pattern <tt>"\\s"</tt> could return empty tokens since it only passes one 122 * space at a time. 123 * 124 * <p> A scanner can read text from any object which implements the {@link 125 * java.lang.Readable} interface. If an invocation of the underlying 126 * readable's {@link java.lang.Readable#read} method throws an {@link 127 * java.io.IOException} then the scanner assumes that the end of the input 128 * has been reached. The most recent <tt>IOException</tt> thrown by the 129 * underlying readable can be retrieved via the {@link #ioException} method. 130 * 131 * <p>When a <code>Scanner</code> is closed, it will close its input source 132 * if the source implements the {@link java.io.Closeable} interface. 133 * 134 * <p>A <code>Scanner</code> is not safe for multithreaded use without 135 * external synchronization. 136 * 137 * <p>Unless otherwise mentioned, passing a <code>null</code> parameter into 138 * any method of a <code>Scanner</code> will cause a 139 * <code>NullPointerException</code> to be thrown. 140 * 141 * <p>A scanner will default to interpreting numbers as decimal unless a 142 * different radix has been set by using the {@link #useRadix} method. The 143 * {@link #reset} method will reset the value of the scanner's radix to 144 * <code>10</code> regardless of whether it was previously changed. 145 * 146 * <a name="localized-numbers"> 147 * <h4> Localized numbers </h4> 148 * 149 * <p> An instance of this class is capable of scanning numbers in the standard 150 * formats as well as in the formats of the scanner's locale. A scanner's 151 * <a name="initial-locale">initial locale </a>is the value returned by the {@link 152 * java.util.Locale#getDefault} method; it may be changed via the {@link 153 * #useLocale} method. The {@link #reset} method will reset the value of the 154 * scanner's locale to the initial locale regardless of whether it was 155 * previously changed. 156 * 157 * <p>The localized formats are defined in terms of the following parameters, 158 * which for a particular locale are taken from that locale's {@link 159 * java.text.DecimalFormat DecimalFormat} object, <tt>df</tt>, and its and 160 * {@link java.text.DecimalFormatSymbols DecimalFormatSymbols} object, 161 * <tt>dfs</tt>. 162 * 163 * <blockquote><table> 164 * <tr><td valign="top"><i>LocalGroupSeparator </i></td> 165 * <td valign="top">The character used to separate thousands groups, 166 * <i>i.e.,</i> <tt>dfs.</tt>{@link 167 * java.text.DecimalFormatSymbols#getGroupingSeparator 168 * getGroupingSeparator()}</td></tr> 169 * <tr><td valign="top"><i>LocalDecimalSeparator </i></td> 170 * <td valign="top">The character used for the decimal point, 171 * <i>i.e.,</i> <tt>dfs.</tt>{@link 172 * java.text.DecimalFormatSymbols#getDecimalSeparator 173 * getDecimalSeparator()}</td></tr> 174 * <tr><td valign="top"><i>LocalPositivePrefix </i></td> 175 * <td valign="top">The string that appears before a positive number (may 176 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 177 * java.text.DecimalFormat#getPositivePrefix 178 * getPositivePrefix()}</td></tr> 179 * <tr><td valign="top"><i>LocalPositiveSuffix </i></td> 180 * <td valign="top">The string that appears after a positive number (may be 181 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 182 * java.text.DecimalFormat#getPositiveSuffix 183 * getPositiveSuffix()}</td></tr> 184 * <tr><td valign="top"><i>LocalNegativePrefix </i></td> 185 * <td valign="top">The string that appears before a negative number (may 186 * be empty), <i>i.e.,</i> <tt>df.</tt>{@link 187 * java.text.DecimalFormat#getNegativePrefix 188 * getNegativePrefix()}</td></tr> 189 * <tr><td valign="top"><i>LocalNegativeSuffix </i></td> 190 * <td valign="top">The string that appears after a negative number (may be 191 * empty), <i>i.e.,</i> <tt>df.</tt>{@link 192 * java.text.DecimalFormat#getNegativeSuffix 193 * getNegativeSuffix()}</td></tr> 194 * <tr><td valign="top"><i>LocalNaN </i></td> 195 * <td valign="top">The string that represents not-a-number for 196 * floating-point values, 197 * <i>i.e.,</i> <tt>dfs.</tt>{@link 198 * java.text.DecimalFormatSymbols#getNaN 199 * getNaN()}</td></tr> 200 * <tr><td valign="top"><i>LocalInfinity </i></td> 201 * <td valign="top">The string that represents infinity for floating-point 202 * values, <i>i.e.,</i> <tt>dfs.</tt>{@link 203 * java.text.DecimalFormatSymbols#getInfinity 204 * getInfinity()}</td></tr> 205 * </table></blockquote> 206 * 207 * <a name="number-syntax"> 208 * <h4> Number syntax </h4> 209 * 210 * <p> The strings that can be parsed as numbers by an instance of this class 211 * are specified in terms of the following regular-expression grammar, where 212 * Rmax is the highest digit in the radix being used (for example, Rmax is 9 213 * in base 10). 214 * 215 * <p> 216 * <table cellspacing=0 cellpadding=0 align=center> 217 * 218 * <tr><td valign=top align=right><i>NonASCIIDigit</i> ::</td> 219 * <td valign=top>= A non-ASCII character c for which 220 * {@link java.lang.Character#isDigit Character.isDigit}<tt>(c)</tt> 221 * returns true</td></tr> 222 * 223 * <tr><td> </td></tr> 224 * 225 * <tr><td align=right><i>Non0Digit</i> ::</td> 226 * <td><tt>= [1-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i></td></tr> 227 * 228 * <tr><td> </td></tr> 229 * 230 * <tr><td align=right><i>Digit</i> ::</td> 231 * <td><tt>= [0-</tt><i>Rmax</i><tt>] | </tt><i>NonASCIIDigit</i></td></tr> 232 * 233 * <tr><td> </td></tr> 234 * 235 * <tr><td valign=top align=right><i>GroupedNumeral</i> ::</td> 236 * <td valign=top> 237 * <table cellpadding=0 cellspacing=0> 238 * <tr><td><tt>= ( </tt></td> 239 * <td><i>Non0Digit</i><tt> 240 * </tt><i>Digit</i><tt>? 241 * </tt><i>Digit</i><tt>?</tt></td></tr> 242 * <tr><td></td> 243 * <td><tt>( </tt><i>LocalGroupSeparator</i><tt> 244 * </tt><i>Digit</i><tt> 245 * </tt><i>Digit</i><tt> 246 * </tt><i>Digit</i><tt> )+ )</tt></td></tr> 247 * </table></td></tr> 248 * 249 * <tr><td> </td></tr> 250 * 251 * <tr><td align=right><i>Numeral</i> ::</td> 252 * <td><tt>= ( ( </tt><i>Digit</i><tt>+ ) 253 * | </tt><i>GroupedNumeral</i><tt> )</tt></td></tr> 254 * 255 * <tr><td> </td></tr> 256 * 257 * <tr><td valign=top align=right> 258 * <a name="Integer-regex"><i>Integer</i> ::</td> 259 * <td valign=top><tt>= ( [-+]? ( </tt><i>Numeral</i><tt> 260 * ) )</tt></td></tr> 261 * <tr><td></td> 262 * <td><tt>| </tt><i>LocalPositivePrefix</i><tt> </tt><i>Numeral</i><tt> 263 * </tt><i>LocalPositiveSuffix</i></td></tr> 264 * <tr><td></td> 265 * <td><tt>| </tt><i>LocalNegativePrefix</i><tt> </tt><i>Numeral</i><tt> 266 * </tt><i>LocalNegativeSuffix</i></td></tr> 267 * 268 * <tr><td> </td></tr> 269 * 270 * <tr><td align=right><i>DecimalNumeral</i> ::</td> 271 * <td><tt>= </tt><i>Numeral</i></td></tr> 272 * <tr><td></td> 273 * <td><tt>| </tt><i>Numeral</i><tt> 274 * </tt><i>LocalDecimalSeparator</i><tt> 275 * </tt><i>Digit</i><tt>*</tt></td></tr> 276 * <tr><td></td> 277 * <td><tt>| </tt><i>LocalDecimalSeparator</i><tt> 278 * </tt><i>Digit</i><tt>+</tt></td></tr> 279 * 280 * <tr><td> </td></tr> 281 * 282 * <tr><td align=right><i>Exponent</i> ::</td> 283 * <td><tt>= ( [eE] [+-]? </tt><i>Digit</i><tt>+ )</tt></td></tr> 284 * 285 * <tr><td> </td></tr> 286 * 287 * <tr><td align=right> 288 * <a name="Decimal-regex"><i>Decimal</i> ::</td> 289 * <td><tt>= ( [-+]? </tt><i>DecimalNumeral</i><tt> 290 * </tt><i>Exponent</i><tt>? )</tt></td></tr> 291 * <tr><td></td> 292 * <td><tt>| </tt><i>LocalPositivePrefix</i><tt> 293 * </tt><i>DecimalNumeral</i><tt> 294 * </tt><i>LocalPositiveSuffix</i> 295 * </tt><i>Exponent</i><tt>?</td></tr> 296 * <tr><td></td> 297 * <td><tt>| </tt><i>LocalNegativePrefix</i><tt> 298 * </tt><i>DecimalNumeral</i><tt> 299 * </tt><i>LocalNegativeSuffix</i> 300 * </tt><i>Exponent</i><tt>?</td></tr> 301 * 302 * <tr><td> </td></tr> 303 * 304 * <tr><td align=right><i>HexFloat</i> ::</td> 305 * <td><tt>= [-+]? 0[xX][0-9a-fA-F]*\.[0-9a-fA-F]+ 306 * ([pP][-+]?[0-9]+)?</tt></td></tr> 307 * 308 * <tr><td> </td></tr> 309 * 310 * <tr><td align=right><i>NonNumber</i> ::</td> 311 * <td valign=top><tt>= NaN 312 * | </tt><i>LocalNan</i><tt> 313 * | Infinity 314 * | </tt><i>LocalInfinity</i></td></tr> 315 * 316 * <tr><td> </td></tr> 317 * 318 * <tr><td align=right><i>SignedNonNumber</i> ::</td> 319 * <td><tt>= ( [-+]? </tt><i>NonNumber</i><tt> )</tt></td></tr> 320 * <tr><td></td> 321 * <td><tt>| </tt><i>LocalPositivePrefix</i><tt> 322 * </tt><i>NonNumber</i><tt> 323 * </tt><i>LocalPositiveSuffix</i></td></tr> 324 * <tr><td></td> 325 * <td><tt>| </tt><i>LocalNegativePrefix</i><tt> 326 * </tt><i>NonNumber</i><tt> 327 * </tt><i>LocalNegativeSuffix</i></td></tr> 328 * 329 * <tr><td> </td></tr> 330 * 331 * <tr><td valign=top align=right> 332 * <a name="Float-regex"><i>Float</i> ::</td> 333 * <td valign=top><tt>= </tt><i>Decimal</i><tt></td></tr> 334 * <tr><td></td> 335 * <td><tt>| </tt><i>HexFloat</i><tt></td></tr> 336 * <tr><td></td> 337 * <td><tt>| </tt><i>SignedNonNumber</i><tt></td></tr> 338 * 339 * </table> 340 * </center> 341 * 342 * <p> Whitespace is not significant in the above regular expressions. 343 * 344 * @since 1.5 345 */ 346 public final class Scanner implements Iterator<String> { 347 348 // Internal buffer used to hold input 349 private CharBuffer buf; 350 351 // Size of internal character buffer 352 private static final int BUFFER_SIZE = 1024; // change to 1024; 353 354 // The index into the buffer currently held by the Scanner 355 private int position; 356 357 // Internal matcher used for finding delimiters 358 private Matcher matcher; 359 360 // Pattern used to delimit tokens 361 private Pattern delimPattern; 362 363 // Pattern found in last hasNext operation 364 private Pattern hasNextPattern; 365 366 // Position after last hasNext operation 367 private int hasNextPosition; 368 369 // Result after last hasNext operation 370 private String hasNextResult; 371 372 // The input source 373 private Readable source; 374 375 // Boolean is true if source is done 376 private boolean sourceClosed = false; 377 378 // Boolean indicating more input is required 379 private boolean needInput = false; 380 381 // Boolean indicating if a delim has been skipped this operation 382 private boolean skipped = false; 383 384 // A store of a position that the scanner may fall back to 385 private int savedScannerPosition = -1; 386 387 // A cache of the last primitive type scanned 388 private Object typeCache = null; 389 390 // Boolean indicating if a match result is available 391 private boolean matchValid = false; 392 393 // Boolean indicating if this scanner has been closed 394 private boolean closed = false; 395 396 // The current radix used by this scanner 397 private int radix = 10; 398 399 // The default radix for this scanner 400 private int defaultRadix = 10; 401 402 // The locale used by this scanner 403 private Locale locale = null; 404 405 // A cache of the last few recently used Patterns 406 private LRUCache<String,Pattern> patternCache = 407 new LRUCache<String,Pattern>(7) { 408 protected Pattern create(String s) { 409 return Pattern.compile(s); 410 } 411 protected boolean hasName(Pattern p, String s) { 412 return p.pattern().equals(s); 413 } 414 }; 415 416 // A holder of the last IOException encountered 417 private IOException lastException; 418 419 // A pattern for java whitespace 420 private static Pattern WHITESPACE_PATTERN = Pattern.compile( 421 "\\p{javaWhitespace}+"); 422 423 // A pattern for any token 424 private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*"); 425 426 // A pattern for non-ASCII digits 427 private static Pattern NON_ASCII_DIGIT = Pattern.compile( 428 "[\\p{javaDigit}&&[^0-9]]"); 429 430 // Fields and methods to support scanning primitive types 431 432 /** 433 * Locale dependent values used to scan numbers 434 */ 435 private String groupSeparator = "\\,"; 436 private String decimalSeparator = "\\."; 437 private String nanString = "NaN"; 438 private String infinityString = "Infinity"; 439 private String positivePrefix = ""; 440 private String negativePrefix = "\\-"; 441 private String positiveSuffix = ""; 442 private String negativeSuffix = ""; 443 444 /** 445 * Fields and an accessor method to match booleans 446 */ 447 private static volatile Pattern boolPattern; 448 private static final String BOOLEAN_PATTERN = "true|false"; 449 private static Pattern boolPattern() { 450 Pattern bp = boolPattern; 451 if (bp == null) 452 boolPattern = bp = Pattern.compile(BOOLEAN_PATTERN, 453 Pattern.CASE_INSENSITIVE); 454 return bp; 455 } 456 457 /** 458 * Fields and methods to match bytes, shorts, ints, and longs 459 */ 460 private Pattern integerPattern; 461 private String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; 462 private String non0Digit = "[\\p{javaDigit}&&[^0]]"; 463 private int SIMPLE_GROUP_INDEX = 5; 464 private String buildIntegerPatternString() { 465 String radixDigits = digits.substring(0, radix); 466 // \\p{javaDigit} is not guaranteed to be appropriate 467 // here but what can we do? The final authority will be 468 // whatever parse method is invoked, so ultimately the 469 // Scanner will do the right thing 470 String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})"; 471 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 472 groupSeparator+digit+digit+digit+")+)"; 473 // digit++ is the possessive form which is necessary for reducing 474 // backtracking that would otherwise cause unacceptable performance 475 String numeral = "(("+ digit+"++)|"+groupedNumeral+")"; 476 String javaStyleInteger = "([-+]?(" + numeral + "))"; 477 String negativeInteger = negativePrefix + numeral + negativeSuffix; 478 String positiveInteger = positivePrefix + numeral + positiveSuffix; 479 return "("+ javaStyleInteger + ")|(" + 480 positiveInteger + ")|(" + 481 negativeInteger + ")"; 482 } 483 private Pattern integerPattern() { 484 if (integerPattern == null) { 485 integerPattern = patternCache.forName(buildIntegerPatternString()); 486 } 487 return integerPattern; 488 } 489 490 /** 491 * Fields and an accessor method to match line separators 492 */ 493 private static volatile Pattern separatorPattern; 494 private static volatile Pattern linePattern; 495 private static final String LINE_SEPARATOR_PATTERN = 496 "\r\n|[\n\r\u2028\u2029\u0085]"; 497 private static final String LINE_PATTERN = ".*("+LINE_SEPARATOR_PATTERN+")|.+$"; 498 499 private static Pattern separatorPattern() { 500 Pattern sp = separatorPattern; 501 if (sp == null) 502 separatorPattern = sp = Pattern.compile(LINE_SEPARATOR_PATTERN); 503 return sp; 504 } 505 506 private static Pattern linePattern() { 507 Pattern lp = linePattern; 508 if (lp == null) 509 linePattern = lp = Pattern.compile(LINE_PATTERN); 510 return lp; 511 } 512 513 /** 514 * Fields and methods to match floats and doubles 515 */ 516 private Pattern floatPattern; 517 private Pattern decimalPattern; 518 private void buildFloatAndDecimalPattern() { 519 // \\p{javaDigit} may not be perfect, see above 520 String digit = "([0-9]|(\\p{javaDigit}))"; 521 String exponent = "([eE][+-]?"+digit+"+)?"; 522 String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+ 523 groupSeparator+digit+digit+digit+")+)"; 524 // Once again digit++ is used for performance, as above 525 String numeral = "(("+digit+"++)|"+groupedNumeral+")"; 526 String decimalNumeral = "("+numeral+"|"+numeral + 527 decimalSeparator + digit + "*+|"+ decimalSeparator + 528 digit + "++)"; 529 String nonNumber = "(NaN|"+nanString+"|Infinity|"+ 530 infinityString+")"; 531 String positiveFloat = "(" + positivePrefix + decimalNumeral + 532 positiveSuffix + exponent + ")"; 533 String negativeFloat = "(" + negativePrefix + decimalNumeral + 534 negativeSuffix + exponent + ")"; 535 String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+ 536 positiveFloat + "|" + negativeFloat + ")"; 537 String hexFloat = 538 "[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?"; 539 String positiveNonNumber = "(" + positivePrefix + nonNumber + 540 positiveSuffix + ")"; 541 String negativeNonNumber = "(" + negativePrefix + nonNumber + 542 negativeSuffix + ")"; 543 String signedNonNumber = "(([-+]?"+nonNumber+")|" + 544 positiveNonNumber + "|" + 545 negativeNonNumber + ")"; 546 floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" + 547 signedNonNumber); 548 decimalPattern = Pattern.compile(decimal); 549 } 550 private Pattern floatPattern() { 551 if (floatPattern == null) { 552 buildFloatAndDecimalPattern(); 553 } 554 return floatPattern; 555 } 556 private Pattern decimalPattern() { 557 if (decimalPattern == null) { 558 buildFloatAndDecimalPattern(); 559 } 560 return decimalPattern; 561 } 562 563 // Constructors 564 565 /** 566 * Constructs a <code>Scanner</code> that returns values scanned 567 * from the specified source delimited by the specified pattern. 568 * 569 * @param source A character source implementing the Readable interface 570 * @param pattern A delimiting pattern 571 * @return A scanner with the specified source and pattern 572 */ 573 private Scanner(Readable source, Pattern pattern) { 574 if (source == null) 575 throw new NullPointerException("source"); 576 if (pattern == null) 577 throw new NullPointerException("pattern"); 578 this.source = source; 579 delimPattern = pattern; 580 buf = CharBuffer.allocate(BUFFER_SIZE); 581 buf.limit(0); 582 matcher = delimPattern.matcher(buf); 583 matcher.useTransparentBounds(true); 584 matcher.useAnchoringBounds(false); 585 useLocale(Locale.getDefault()); 586 } 587 588 /** 589 * Constructs a new <code>Scanner</code> that produces values scanned 590 * from the specified source. 591 * 592 * @param source A character source implementing the {@link Readable} 593 * interface 594 */ 595 public Scanner(Readable source) { 596 this(source, WHITESPACE_PATTERN); 597 } 598 599 /** 600 * Constructs a new <code>Scanner</code> that produces values scanned 601 * from the specified input stream. Bytes from the stream are converted 602 * into characters using the underlying platform's 603 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 604 * 605 * @param source An input stream to be scanned 606 */ 607 public Scanner(InputStream source) { 608 this(new InputStreamReader(source), WHITESPACE_PATTERN); 609 } 610 611 /** 612 * Constructs a new <code>Scanner</code> that produces values scanned 613 * from the specified input stream. Bytes from the stream are converted 614 * into characters using the specified charset. 615 * 616 * @param source An input stream to be scanned 617 * @param charsetName The encoding type used to convert bytes from the 618 * stream into characters to be scanned 619 * @throws IllegalArgumentException if the specified character set 620 * does not exist 621 */ 622 public Scanner(InputStream source, String charsetName) { 623 this(makeReadable(source, charsetName), WHITESPACE_PATTERN); 624 } 625 626 private static Readable makeReadable(InputStream source, 627 String charsetName) 628 { 629 if (source == null) 630 throw new NullPointerException("source"); 631 InputStreamReader isr = null; 632 try { 633 isr = new InputStreamReader(source, charsetName); 634 } catch (UnsupportedEncodingException uee) { 635 IllegalArgumentException iae = new IllegalArgumentException(); 636 iae.initCause(uee); 637 throw iae; 638 } 639 return isr; 640 } 641 642 /** 643 * Constructs a new <code>Scanner</code> that produces values scanned 644 * from the specified file. Bytes from the file are converted into 645 * characters using the underlying platform's 646 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 647 * 648 * @param source A file to be scanned 649 * @throws FileNotFoundException if source is not found 650 */ 651 public Scanner(File source) 652 throws FileNotFoundException 653 { 654 this((ReadableByteChannel)(new FileInputStream(source).getChannel())); 655 } 656 657 /** 658 * Constructs a new <code>Scanner</code> that produces values scanned 659 * from the specified file. Bytes from the file are converted into 660 * characters using the specified charset. 661 * 662 * @param source A file to be scanned 663 * @param charsetName The encoding type used to convert bytes from the file 664 * into characters to be scanned 665 * @throws FileNotFoundException if source is not found 666 * @throws IllegalArgumentException if the specified encoding is 667 * not found 668 */ 669 public Scanner(File source, String charsetName) 670 throws FileNotFoundException 671 { 672 this((ReadableByteChannel)(new FileInputStream(source).getChannel()), 673 charsetName); 674 } 675 676 /** 677 * Constructs a new <code>Scanner</code> that produces values scanned 678 * from the specified file. Bytes from the file are converted into 679 * characters using the underlying platform's 680 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 681 * 682 * @param source 683 * A file to be scanned 684 * @throws IOException 685 * if an I/O error occurs opening source 686 * 687 * @since 1.7 688 */ 689 public Scanner(FileRef source) 690 throws IOException 691 { 692 this(source.newInputStream()); 693 } 694 695 /** 696 * Constructs a new <code>Scanner</code> that produces values scanned 697 * from the specified file. Bytes from the file are converted into 698 * characters using the specified charset. 699 * 700 * @param source 701 * A file to be scanned 702 * @param charsetName 703 * The encoding type used to convert bytes from the file 704 * into characters to be scanned 705 * @throws IOException 706 * if an I/O error occurs opening source 707 * @throws IllegalArgumentException 708 * if the specified encoding is not found 709 * @since 1.7 710 */ 711 public Scanner(FileRef source, String charsetName) 712 throws IOException 713 { 714 this(source.newInputStream(), charsetName); 715 } 716 717 /** 718 * Constructs a new <code>Scanner</code> that produces values scanned 719 * from the specified string. 720 * 721 * @param source A string to scan 722 */ 723 public Scanner(String source) { 724 this(new StringReader(source), WHITESPACE_PATTERN); 725 } 726 727 /** 728 * Constructs a new <code>Scanner</code> that produces values scanned 729 * from the specified channel. Bytes from the source are converted into 730 * characters using the underlying platform's 731 * {@linkplain java.nio.charset.Charset#defaultCharset() default charset}. 732 * 733 * @param source A channel to scan 734 */ 735 public Scanner(ReadableByteChannel source) { 736 this(makeReadable(source), WHITESPACE_PATTERN); 737 } 738 739 private static Readable makeReadable(ReadableByteChannel source) { 740 if (source == null) 741 throw new NullPointerException("source"); 742 String defaultCharsetName = 743 java.nio.charset.Charset.defaultCharset().name(); 744 return Channels.newReader(source, 745 java.nio.charset.Charset.defaultCharset().name()); 746 } 747 748 /** 749 * Constructs a new <code>Scanner</code> that produces values scanned 750 * from the specified channel. Bytes from the source are converted into 751 * characters using the specified charset. 752 * 753 * @param source A channel to scan 754 * @param charsetName The encoding type used to convert bytes from the 755 * channel into characters to be scanned 756 * @throws IllegalArgumentException if the specified character set 757 * does not exist 758 */ 759 public Scanner(ReadableByteChannel source, String charsetName) { 760 this(makeReadable(source, charsetName), WHITESPACE_PATTERN); 761 } 762 763 private static Readable makeReadable(ReadableByteChannel source, 764 String charsetName) 765 { 766 if (source == null) 767 throw new NullPointerException("source"); 768 if (!Charset.isSupported(charsetName)) 769 throw new IllegalArgumentException(charsetName); 770 return Channels.newReader(source, charsetName); 771 } 772 773 // Private primitives used to support scanning 774 775 private void saveState() { 776 savedScannerPosition = position; 777 } 778 779 private void revertState() { 780 this.position = savedScannerPosition; 781 savedScannerPosition = -1; 782 skipped = false; 783 } 784 785 private boolean revertState(boolean b) { 786 this.position = savedScannerPosition; 787 savedScannerPosition = -1; 788 skipped = false; 789 return b; 790 } 791 792 private void cacheResult() { 793 hasNextResult = matcher.group(); 794 hasNextPosition = matcher.end(); 795 hasNextPattern = matcher.pattern(); 796 } 797 798 private void cacheResult(String result) { 799 hasNextResult = result; 800 hasNextPosition = matcher.end(); 801 hasNextPattern = matcher.pattern(); 802 } 803 804 // Clears both regular cache and type cache 805 private void clearCaches() { 806 hasNextPattern = null; 807 typeCache = null; 808 } 809 810 // Also clears both the regular cache and the type cache 811 private String getCachedResult() { 812 position = hasNextPosition; 813 hasNextPattern = null; 814 typeCache = null; 815 return hasNextResult; 816 } 817 818 // Also clears both the regular cache and the type cache 819 private void useTypeCache() { 820 if (closed) 821 throw new IllegalStateException("Scanner closed"); 822 position = hasNextPosition; 823 hasNextPattern = null; 824 typeCache = null; 825 } 826 827 // Tries to read more input. May block. 828 private void readInput() { 829 if (buf.limit() == buf.capacity()) 830 makeSpace(); 831 832 // Prepare to receive data 833 int p = buf.position(); 834 buf.position(buf.limit()); 835 buf.limit(buf.capacity()); 836 837 int n = 0; 838 try { 839 n = source.read(buf); 840 } catch (IOException ioe) { 841 lastException = ioe; 842 n = -1; 843 } 844 845 if (n == -1) { 846 sourceClosed = true; 847 needInput = false; 848 } 849 850 if (n > 0) 851 needInput = false; 852 853 // Restore current position and limit for reading 854 buf.limit(buf.position()); 855 buf.position(p); 856 } 857 858 // After this method is called there will either be an exception 859 // or else there will be space in the buffer 860 private boolean makeSpace() { 861 clearCaches(); 862 int offset = savedScannerPosition == -1 ? 863 position : savedScannerPosition; 864 buf.position(offset); 865 // Gain space by compacting buffer 866 if (offset > 0) { 867 buf.compact(); 868 translateSavedIndexes(offset); 869 position -= offset; 870 buf.flip(); 871 return true; 872 } 873 // Gain space by growing buffer 874 int newSize = buf.capacity() * 2; 875 CharBuffer newBuf = CharBuffer.allocate(newSize); 876 newBuf.put(buf); 877 newBuf.flip(); 878 translateSavedIndexes(offset); 879 position -= offset; 880 buf = newBuf; 881 matcher.reset(buf); 882 return true; 883 } 884 885 // When a buffer compaction/reallocation occurs the saved indexes must 886 // be modified appropriately 887 private void translateSavedIndexes(int offset) { 888 if (savedScannerPosition != -1) 889 savedScannerPosition -= offset; 890 } 891 892 // If we are at the end of input then NoSuchElement; 893 // If there is still input left then InputMismatch 894 private void throwFor() { 895 skipped = false; 896 if ((sourceClosed) && (position == buf.limit())) 897 throw new NoSuchElementException(); 898 else 899 throw new InputMismatchException(); 900 } 901 902 // Returns true if a complete token or partial token is in the buffer. 903 // It is not necessary to find a complete token since a partial token 904 // means that there will be another token with or without more input. 905 private boolean hasTokenInBuffer() { 906 matchValid = false; 907 matcher.usePattern(delimPattern); 908 matcher.region(position, buf.limit()); 909 910 // Skip delims first 911 if (matcher.lookingAt()) 912 position = matcher.end(); 913 914 // If we are sitting at the end, no more tokens in buffer 915 if (position == buf.limit()) 916 return false; 917 918 return true; 919 } 920 921 /* 922 * Returns a "complete token" that matches the specified pattern 923 * 924 * A token is complete if surrounded by delims; a partial token 925 * is prefixed by delims but not postfixed by them 926 * 927 * The position is advanced to the end of that complete token 928 * 929 * Pattern == null means accept any token at all 930 * 931 * Triple return: 932 * 1. valid string means it was found 933 * 2. null with needInput=false means we won't ever find it 934 * 3. null with needInput=true means try again after readInput 935 */ 936 private String getCompleteTokenInBuffer(Pattern pattern) { 937 matchValid = false; 938 939 // Skip delims first 940 matcher.usePattern(delimPattern); 941 if (!skipped) { // Enforcing only one skip of leading delims 942 matcher.region(position, buf.limit()); 943 if (matcher.lookingAt()) { 944 // If more input could extend the delimiters then we must wait 945 // for more input 946 if (matcher.hitEnd() && !sourceClosed) { 947 needInput = true; 948 return null; 949 } 950 // The delims were whole and the matcher should skip them 951 skipped = true; 952 position = matcher.end(); 953 } 954 } 955 956 // If we are sitting at the end, no more tokens in buffer 957 if (position == buf.limit()) { 958 if (sourceClosed) 959 return null; 960 needInput = true; 961 return null; 962 } 963 964 // Must look for next delims. Simply attempting to match the 965 // pattern at this point may find a match but it might not be 966 // the first longest match because of missing input, or it might 967 // match a partial token instead of the whole thing. 968 969 // Then look for next delims 970 matcher.region(position, buf.limit()); 971 boolean foundNextDelim = matcher.find(); 972 if (foundNextDelim && (matcher.end() == position)) { 973 // Zero length delimiter match; we should find the next one 974 // using the automatic advance past a zero length match; 975 // Otherwise we have just found the same one we just skipped 976 foundNextDelim = matcher.find(); 977 } 978 if (foundNextDelim) { 979 // In the rare case that more input could cause the match 980 // to be lost and there is more input coming we must wait 981 // for more input. Note that hitting the end is okay as long 982 // as the match cannot go away. It is the beginning of the 983 // next delims we want to be sure about, we don't care if 984 // they potentially extend further. 985 if (matcher.requireEnd() && !sourceClosed) { 986 needInput = true; 987 return null; 988 } 989 int tokenEnd = matcher.start(); 990 // There is a complete token. 991 if (pattern == null) { 992 // Must continue with match to provide valid MatchResult 993 pattern = FIND_ANY_PATTERN; 994 } 995 // Attempt to match against the desired pattern 996 matcher.usePattern(pattern); 997 matcher.region(position, tokenEnd); 998 if (matcher.matches()) { 999 String s = matcher.group(); 1000 position = matcher.end(); 1001 return s; 1002 } else { // Complete token but it does not match 1003 return null; 1004 } 1005 } 1006 1007 // If we can't find the next delims but no more input is coming, 1008 // then we can treat the remainder as a whole token 1009 if (sourceClosed) { 1010 if (pattern == null) { 1011 // Must continue with match to provide valid MatchResult 1012 pattern = FIND_ANY_PATTERN; 1013 } 1014 // Last token; Match the pattern here or throw 1015 matcher.usePattern(pattern); 1016 matcher.region(position, buf.limit()); 1017 if (matcher.matches()) { 1018 String s = matcher.group(); 1019 position = matcher.end(); 1020 return s; 1021 } 1022 // Last piece does not match 1023 return null; 1024 } 1025 1026 // There is a partial token in the buffer; must read more 1027 // to complete it 1028 needInput = true; 1029 return null; 1030 } 1031 1032 // Finds the specified pattern in the buffer up to horizon. 1033 // Returns a match for the specified input pattern. 1034 private String findPatternInBuffer(Pattern pattern, int horizon) { 1035 matchValid = false; 1036 matcher.usePattern(pattern); 1037 int bufferLimit = buf.limit(); 1038 int horizonLimit = -1; 1039 int searchLimit = bufferLimit; 1040 if (horizon > 0) { 1041 horizonLimit = position + horizon; 1042 if (horizonLimit < bufferLimit) 1043 searchLimit = horizonLimit; 1044 } 1045 matcher.region(position, searchLimit); 1046 if (matcher.find()) { 1047 if (matcher.hitEnd() && (!sourceClosed)) { 1048 // The match may be longer if didn't hit horizon or real end 1049 if (searchLimit != horizonLimit) { 1050 // Hit an artificial end; try to extend the match 1051 needInput = true; 1052 return null; 1053 } 1054 // The match could go away depending on what is next 1055 if ((searchLimit == horizonLimit) && matcher.requireEnd()) { 1056 // Rare case: we hit the end of input and it happens 1057 // that it is at the horizon and the end of input is 1058 // required for the match. 1059 needInput = true; 1060 return null; 1061 } 1062 } 1063 // Did not hit end, or hit real end, or hit horizon 1064 position = matcher.end(); 1065 return matcher.group(); 1066 } 1067 1068 if (sourceClosed) 1069 return null; 1070 1071 // If there is no specified horizon, or if we have not searched 1072 // to the specified horizon yet, get more input 1073 if ((horizon == 0) || (searchLimit != horizonLimit)) 1074 needInput = true; 1075 return null; 1076 } 1077 1078 // Returns a match for the specified input pattern anchored at 1079 // the current position 1080 private String matchPatternInBuffer(Pattern pattern) { 1081 matchValid = false; 1082 matcher.usePattern(pattern); 1083 matcher.region(position, buf.limit()); 1084 if (matcher.lookingAt()) { 1085 if (matcher.hitEnd() && (!sourceClosed)) { 1086 // Get more input and try again 1087 needInput = true; 1088 return null; 1089 } 1090 position = matcher.end(); 1091 return matcher.group(); 1092 } 1093 1094 if (sourceClosed) 1095 return null; 1096 1097 // Read more to find pattern 1098 needInput = true; 1099 return null; 1100 } 1101 1102 // Throws if the scanner is closed 1103 private void ensureOpen() { 1104 if (closed) 1105 throw new IllegalStateException("Scanner closed"); 1106 } 1107 1108 // Public methods 1109 1110 /** 1111 * Closes this scanner. 1112 * 1113 * <p> If this scanner has not yet been closed then if its underlying 1114 * {@linkplain java.lang.Readable readable} also implements the {@link 1115 * java.io.Closeable} interface then the readable's <tt>close</tt> method 1116 * will be invoked. If this scanner is already closed then invoking this 1117 * method will have no effect. 1118 * 1119 * <p>Attempting to perform search operations after a scanner has 1120 * been closed will result in an {@link IllegalStateException}. 1121 * 1122 */ 1123 public void close() { 1124 if (closed) 1125 return; 1126 if (source instanceof Closeable) { 1127 try { 1128 ((Closeable)source).close(); 1129 } catch (IOException ioe) { 1130 lastException = ioe; 1131 } 1132 } 1133 sourceClosed = true; 1134 source = null; 1135 closed = true; 1136 } 1137 1138 /** 1139 * Returns the <code>IOException</code> last thrown by this 1140 * <code>Scanner</code>'s underlying <code>Readable</code>. This method 1141 * returns <code>null</code> if no such exception exists. 1142 * 1143 * @return the last exception thrown by this scanner's readable 1144 */ 1145 public IOException ioException() { 1146 return lastException; 1147 } 1148 1149 /** 1150 * Returns the <code>Pattern</code> this <code>Scanner</code> is currently 1151 * using to match delimiters. 1152 * 1153 * @return this scanner's delimiting pattern. 1154 */ 1155 public Pattern delimiter() { 1156 return delimPattern; 1157 } 1158 1159 /** 1160 * Sets this scanner's delimiting pattern to the specified pattern. 1161 * 1162 * @param pattern A delimiting pattern 1163 * @return this scanner 1164 */ 1165 public Scanner useDelimiter(Pattern pattern) { 1166 delimPattern = pattern; 1167 return this; 1168 } 1169 1170 /** 1171 * Sets this scanner's delimiting pattern to a pattern constructed from 1172 * the specified <code>String</code>. 1173 * 1174 * <p> An invocation of this method of the form 1175 * <tt>useDelimiter(pattern)</tt> behaves in exactly the same way as the 1176 * invocation <tt>useDelimiter(Pattern.compile(pattern))</tt>. 1177 * 1178 * <p> Invoking the {@link #reset} method will set the scanner's delimiter 1179 * to the <a href= "#default-delimiter">default</a>. 1180 * 1181 * @param pattern A string specifying a delimiting pattern 1182 * @return this scanner 1183 */ 1184 public Scanner useDelimiter(String pattern) { 1185 delimPattern = patternCache.forName(pattern); 1186 return this; 1187 } 1188 1189 /** 1190 * Returns this scanner's locale. 1191 * 1192 * <p>A scanner's locale affects many elements of its default 1193 * primitive matching regular expressions; see 1194 * <a href= "#localized-numbers">localized numbers</a> above. 1195 * 1196 * @return this scanner's locale 1197 */ 1198 public Locale locale() { 1199 return this.locale; 1200 } 1201 1202 /** 1203 * Sets this scanner's locale to the specified locale. 1204 * 1205 * <p>A scanner's locale affects many elements of its default 1206 * primitive matching regular expressions; see 1207 * <a href= "#localized-numbers">localized numbers</a> above. 1208 * 1209 * <p>Invoking the {@link #reset} method will set the scanner's locale to 1210 * the <a href= "#initial-locale">initial locale</a>. 1211 * 1212 * @param locale A string specifying the locale to use 1213 * @return this scanner 1214 */ 1215 public Scanner useLocale(Locale locale) { 1216 if (locale.equals(this.locale)) 1217 return this; 1218 1219 this.locale = locale; 1220 DecimalFormat df = 1221 (DecimalFormat)NumberFormat.getNumberInstance(locale); 1222 DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale); 1223 1224 // These must be literalized to avoid collision with regex 1225 // metacharacters such as dot or parenthesis 1226 groupSeparator = "\\" + dfs.getGroupingSeparator(); 1227 decimalSeparator = "\\" + dfs.getDecimalSeparator(); 1228 1229 // Quoting the nonzero length locale-specific things 1230 // to avoid potential conflict with metacharacters 1231 nanString = "\\Q" + dfs.getNaN() + "\\E"; 1232 infinityString = "\\Q" + dfs.getInfinity() + "\\E"; 1233 positivePrefix = df.getPositivePrefix(); 1234 if (positivePrefix.length() > 0) 1235 positivePrefix = "\\Q" + positivePrefix + "\\E"; 1236 negativePrefix = df.getNegativePrefix(); 1237 if (negativePrefix.length() > 0) 1238 negativePrefix = "\\Q" + negativePrefix + "\\E"; 1239 positiveSuffix = df.getPositiveSuffix(); 1240 if (positiveSuffix.length() > 0) 1241 positiveSuffix = "\\Q" + positiveSuffix + "\\E"; 1242 negativeSuffix = df.getNegativeSuffix(); 1243 if (negativeSuffix.length() > 0) 1244 negativeSuffix = "\\Q" + negativeSuffix + "\\E"; 1245 1246 // Force rebuilding and recompilation of locale dependent 1247 // primitive patterns 1248 integerPattern = null; 1249 floatPattern = null; 1250 1251 return this; 1252 } 1253 1254 /** 1255 * Returns this scanner's default radix. 1256 * 1257 * <p>A scanner's radix affects elements of its default 1258 * number matching regular expressions; see 1259 * <a href= "#localized-numbers">localized numbers</a> above. 1260 * 1261 * @return the default radix of this scanner 1262 */ 1263 public int radix() { 1264 return this.defaultRadix; 1265 } 1266 1267 /** 1268 * Sets this scanner's default radix to the specified radix. 1269 * 1270 * <p>A scanner's radix affects elements of its default 1271 * number matching regular expressions; see 1272 * <a href= "#localized-numbers">localized numbers</a> above. 1273 * 1274 * <p>If the radix is less than <code>Character.MIN_RADIX</code> 1275 * or greater than <code>Character.MAX_RADIX</code>, then an 1276 * <code>IllegalArgumentException</code> is thrown. 1277 * 1278 * <p>Invoking the {@link #reset} method will set the scanner's radix to 1279 * <code>10</code>. 1280 * 1281 * @param radix The radix to use when scanning numbers 1282 * @return this scanner 1283 * @throws IllegalArgumentException if radix is out of range 1284 */ 1285 public Scanner useRadix(int radix) { 1286 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) 1287 throw new IllegalArgumentException("radix:"+radix); 1288 1289 if (this.defaultRadix == radix) 1290 return this; 1291 this.defaultRadix = radix; 1292 // Force rebuilding and recompilation of radix dependent patterns 1293 integerPattern = null; 1294 return this; 1295 } 1296 1297 // The next operation should occur in the specified radix but 1298 // the default is left untouched. 1299 private void setRadix(int radix) { 1300 if (this.radix != radix) { 1301 // Force rebuilding and recompilation of radix dependent patterns 1302 integerPattern = null; 1303 this.radix = radix; 1304 } 1305 } 1306 1307 /** 1308 * Returns the match result of the last scanning operation performed 1309 * by this scanner. This method throws <code>IllegalStateException</code> 1310 * if no match has been performed, or if the last match was 1311 * not successful. 1312 * 1313 * <p>The various <code>next</code>methods of <code>Scanner</code> 1314 * make a match result available if they complete without throwing an 1315 * exception. For instance, after an invocation of the {@link #nextInt} 1316 * method that returned an int, this method returns a 1317 * <code>MatchResult</code> for the search of the 1318 * <a href="#Integer-regex"><i>Integer</i></a> regular expression 1319 * defined above. Similarly the {@link #findInLine}, 1320 * {@link #findWithinHorizon}, and {@link #skip} methods will make a 1321 * match available if they succeed. 1322 * 1323 * @return a match result for the last match operation 1324 * @throws IllegalStateException If no match result is available 1325 */ 1326 public MatchResult match() { 1327 if (!matchValid) 1328 throw new IllegalStateException("No match result available"); 1329 return matcher.toMatchResult(); 1330 } 1331 1332 /** 1333 * <p>Returns the string representation of this <code>Scanner</code>. The 1334 * string representation of a <code>Scanner</code> contains information 1335 * that may be useful for debugging. The exact format is unspecified. 1336 * 1337 * @return The string representation of this scanner 1338 */ 1339 public String toString() { 1340 StringBuilder sb = new StringBuilder(); 1341 sb.append("java.util.Scanner"); 1342 sb.append("[delimiters=" + delimPattern + "]"); 1343 sb.append("[position=" + position + "]"); 1344 sb.append("[match valid=" + matchValid + "]"); 1345 sb.append("[need input=" + needInput + "]"); 1346 sb.append("[source closed=" + sourceClosed + "]"); 1347 sb.append("[skipped=" + skipped + "]"); 1348 sb.append("[group separator=" + groupSeparator + "]"); 1349 sb.append("[decimal separator=" + decimalSeparator + "]"); 1350 sb.append("[positive prefix=" + positivePrefix + "]"); 1351 sb.append("[negative prefix=" + negativePrefix + "]"); 1352 sb.append("[positive suffix=" + positiveSuffix + "]"); 1353 sb.append("[negative suffix=" + negativeSuffix + "]"); 1354 sb.append("[NaN string=" + nanString + "]"); 1355 sb.append("[infinity string=" + infinityString + "]"); 1356 return sb.toString(); 1357 } 1358 1359 /** 1360 * Returns true if this scanner has another token in its input. 1361 * This method may block while waiting for input to scan. 1362 * The scanner does not advance past any input. 1363 * 1364 * @return true if and only if this scanner has another token 1365 * @throws IllegalStateException if this scanner is closed 1366 * @see java.util.Iterator 1367 */ 1368 public boolean hasNext() { 1369 ensureOpen(); 1370 saveState(); 1371 while (!sourceClosed) { 1372 if (hasTokenInBuffer()) 1373 return revertState(true); 1374 readInput(); 1375 } 1376 boolean result = hasTokenInBuffer(); 1377 return revertState(result); 1378 } 1379 1380 /** 1381 * Finds and returns the next complete token from this scanner. 1382 * A complete token is preceded and followed by input that matches 1383 * the delimiter pattern. This method may block while waiting for input 1384 * to scan, even if a previous invocation of {@link #hasNext} returned 1385 * <code>true</code>. 1386 * 1387 * @return the next token 1388 * @throws NoSuchElementException if no more tokens are available 1389 * @throws IllegalStateException if this scanner is closed 1390 * @see java.util.Iterator 1391 */ 1392 public String next() { 1393 ensureOpen(); 1394 clearCaches(); 1395 1396 while (true) { 1397 String token = getCompleteTokenInBuffer(null); 1398 if (token != null) { 1399 matchValid = true; 1400 skipped = false; 1401 return token; 1402 } 1403 if (needInput) 1404 readInput(); 1405 else 1406 throwFor(); 1407 } 1408 } 1409 1410 /** 1411 * The remove operation is not supported by this implementation of 1412 * <code>Iterator</code>. 1413 * 1414 * @throws UnsupportedOperationException if this method is invoked. 1415 * @see java.util.Iterator 1416 */ 1417 public void remove() { 1418 throw new UnsupportedOperationException(); 1419 } 1420 1421 /** 1422 * Returns true if the next token matches the pattern constructed from the 1423 * specified string. The scanner does not advance past any input. 1424 * 1425 * <p> An invocation of this method of the form <tt>hasNext(pattern)</tt> 1426 * behaves in exactly the same way as the invocation 1427 * <tt>hasNext(Pattern.compile(pattern))</tt>. 1428 * 1429 * @param pattern a string specifying the pattern to scan 1430 * @return true if and only if this scanner has another token matching 1431 * the specified pattern 1432 * @throws IllegalStateException if this scanner is closed 1433 */ 1434 public boolean hasNext(String pattern) { 1435 return hasNext(patternCache.forName(pattern)); 1436 } 1437 1438 /** 1439 * Returns the next token if it matches the pattern constructed from the 1440 * specified string. If the match is successful, the scanner advances 1441 * past the input that matched the pattern. 1442 * 1443 * <p> An invocation of this method of the form <tt>next(pattern)</tt> 1444 * behaves in exactly the same way as the invocation 1445 * <tt>next(Pattern.compile(pattern))</tt>. 1446 * 1447 * @param pattern a string specifying the pattern to scan 1448 * @return the next token 1449 * @throws NoSuchElementException if no such tokens are available 1450 * @throws IllegalStateException if this scanner is closed 1451 */ 1452 public String next(String pattern) { 1453 return next(patternCache.forName(pattern)); 1454 } 1455 1456 /** 1457 * Returns true if the next complete token matches the specified pattern. 1458 * A complete token is prefixed and postfixed by input that matches 1459 * the delimiter pattern. This method may block while waiting for input. 1460 * The scanner does not advance past any input. 1461 * 1462 * @param pattern the pattern to scan for 1463 * @return true if and only if this scanner has another token matching 1464 * the specified pattern 1465 * @throws IllegalStateException if this scanner is closed 1466 */ 1467 public boolean hasNext(Pattern pattern) { 1468 ensureOpen(); 1469 if (pattern == null) 1470 throw new NullPointerException(); 1471 hasNextPattern = null; 1472 saveState(); 1473 1474 while (true) { 1475 if (getCompleteTokenInBuffer(pattern) != null) { 1476 matchValid = true; 1477 cacheResult(); 1478 return revertState(true); 1479 } 1480 if (needInput) 1481 readInput(); 1482 else 1483 return revertState(false); 1484 } 1485 } 1486 1487 /** 1488 * Returns the next token if it matches the specified pattern. This 1489 * method may block while waiting for input to scan, even if a previous 1490 * invocation of {@link #hasNext(Pattern)} returned <code>true</code>. 1491 * If the match is successful, the scanner advances past the input that 1492 * matched the pattern. 1493 * 1494 * @param pattern the pattern to scan for 1495 * @return the next token 1496 * @throws NoSuchElementException if no more tokens are available 1497 * @throws IllegalStateException if this scanner is closed 1498 */ 1499 public String next(Pattern pattern) { 1500 ensureOpen(); 1501 if (pattern == null) 1502 throw new NullPointerException(); 1503 1504 // Did we already find this pattern? 1505 if (hasNextPattern == pattern) 1506 return getCachedResult(); 1507 clearCaches(); 1508 1509 // Search for the pattern 1510 while (true) { 1511 String token = getCompleteTokenInBuffer(pattern); 1512 if (token != null) { 1513 matchValid = true; 1514 skipped = false; 1515 return token; 1516 } 1517 if (needInput) 1518 readInput(); 1519 else 1520 throwFor(); 1521 } 1522 } 1523 1524 /** 1525 * Returns true if there is another line in the input of this scanner. 1526 * This method may block while waiting for input. The scanner does not 1527 * advance past any input. 1528 * 1529 * @return true if and only if this scanner has another line of input 1530 * @throws IllegalStateException if this scanner is closed 1531 */ 1532 public boolean hasNextLine() { 1533 saveState(); 1534 1535 String result = findWithinHorizon(linePattern(), 0); 1536 if (result != null) { 1537 MatchResult mr = this.match(); 1538 String lineSep = mr.group(1); 1539 if (lineSep != null) { 1540 result = result.substring(0, result.length() - 1541 lineSep.length()); 1542 cacheResult(result); 1543 1544 } else { 1545 cacheResult(); 1546 } 1547 } 1548 revertState(); 1549 return (result != null); 1550 } 1551 1552 /** 1553 * Advances this scanner past the current line and returns the input 1554 * that was skipped. 1555 * 1556 * This method returns the rest of the current line, excluding any line 1557 * separator at the end. The position is set to the beginning of the next 1558 * line. 1559 * 1560 * <p>Since this method continues to search through the input looking 1561 * for a line separator, it may buffer all of the input searching for 1562 * the line to skip if no line separators are present. 1563 * 1564 * @return the line that was skipped 1565 * @throws NoSuchElementException if no line was found 1566 * @throws IllegalStateException if this scanner is closed 1567 */ 1568 public String nextLine() { 1569 if (hasNextPattern == linePattern()) 1570 return getCachedResult(); 1571 clearCaches(); 1572 1573 String result = findWithinHorizon(linePattern, 0); 1574 if (result == null) 1575 throw new NoSuchElementException("No line found"); 1576 MatchResult mr = this.match(); 1577 String lineSep = mr.group(1); 1578 if (lineSep != null) 1579 result = result.substring(0, result.length() - lineSep.length()); 1580 if (result == null) 1581 throw new NoSuchElementException(); 1582 else 1583 return result; 1584 } 1585 1586 // Public methods that ignore delimiters 1587 1588 /** 1589 * Attempts to find the next occurrence of a pattern constructed from the 1590 * specified string, ignoring delimiters. 1591 * 1592 * <p>An invocation of this method of the form <tt>findInLine(pattern)</tt> 1593 * behaves in exactly the same way as the invocation 1594 * <tt>findInLine(Pattern.compile(pattern))</tt>. 1595 * 1596 * @param pattern a string specifying the pattern to search for 1597 * @return the text that matched the specified pattern 1598 * @throws IllegalStateException if this scanner is closed 1599 */ 1600 public String findInLine(String pattern) { 1601 return findInLine(patternCache.forName(pattern)); 1602 } 1603 1604 /** 1605 * Attempts to find the next occurrence of the specified pattern ignoring 1606 * delimiters. If the pattern is found before the next line separator, the 1607 * scanner advances past the input that matched and returns the string that 1608 * matched the pattern. 1609 * If no such pattern is detected in the input up to the next line 1610 * separator, then <code>null</code> is returned and the scanner's 1611 * position is unchanged. This method may block waiting for input that 1612 * matches the pattern. 1613 * 1614 * <p>Since this method continues to search through the input looking 1615 * for the specified pattern, it may buffer all of the input searching for 1616 * the desired token if no line separators are present. 1617 * 1618 * @param pattern the pattern to scan for 1619 * @return the text that matched the specified pattern 1620 * @throws IllegalStateException if this scanner is closed 1621 */ 1622 public String findInLine(Pattern pattern) { 1623 ensureOpen(); 1624 if (pattern == null) 1625 throw new NullPointerException(); 1626 clearCaches(); 1627 // Expand buffer to include the next newline or end of input 1628 int endPosition = 0; 1629 saveState(); 1630 while (true) { 1631 String token = findPatternInBuffer(separatorPattern(), 0); 1632 if (token != null) { 1633 endPosition = matcher.start(); 1634 break; // up to next newline 1635 } 1636 if (needInput) { 1637 readInput(); 1638 } else { 1639 endPosition = buf.limit(); 1640 break; // up to end of input 1641 } 1642 } 1643 revertState(); 1644 int horizonForLine = endPosition - position; 1645 // If there is nothing between the current pos and the next 1646 // newline simply return null, invoking findWithinHorizon 1647 // with "horizon=0" will scan beyond the line bound. 1648 if (horizonForLine == 0) 1649 return null; 1650 // Search for the pattern 1651 return findWithinHorizon(pattern, horizonForLine); 1652 } 1653 1654 /** 1655 * Attempts to find the next occurrence of a pattern constructed from the 1656 * specified string, ignoring delimiters. 1657 * 1658 * <p>An invocation of this method of the form 1659 * <tt>findWithinHorizon(pattern)</tt> behaves in exactly the same way as 1660 * the invocation 1661 * <tt>findWithinHorizon(Pattern.compile(pattern, horizon))</tt>. 1662 * 1663 * @param pattern a string specifying the pattern to search for 1664 * @return the text that matched the specified pattern 1665 * @throws IllegalStateException if this scanner is closed 1666 * @throws IllegalArgumentException if horizon is negative 1667 */ 1668 public String findWithinHorizon(String pattern, int horizon) { 1669 return findWithinHorizon(patternCache.forName(pattern), horizon); 1670 } 1671 1672 /** 1673 * Attempts to find the next occurrence of the specified pattern. 1674 * 1675 * <p>This method searches through the input up to the specified 1676 * search horizon, ignoring delimiters. If the pattern is found the 1677 * scanner advances past the input that matched and returns the string 1678 * that matched the pattern. If no such pattern is detected then the 1679 * null is returned and the scanner's position remains unchanged. This 1680 * method may block waiting for input that matches the pattern. 1681 * 1682 * <p>A scanner will never search more than <code>horizon</code> code 1683 * points beyond its current position. Note that a match may be clipped 1684 * by the horizon; that is, an arbitrary match result may have been 1685 * different if the horizon had been larger. The scanner treats the 1686 * horizon as a transparent, non-anchoring bound (see {@link 1687 * Matcher#useTransparentBounds} and {@link Matcher#useAnchoringBounds}). 1688 * 1689 * <p>If horizon is <code>0</code>, then the horizon is ignored and 1690 * this method continues to search through the input looking for the 1691 * specified pattern without bound. In this case it may buffer all of 1692 * the input searching for the pattern. 1693 * 1694 * <p>If horizon is negative, then an IllegalArgumentException is 1695 * thrown. 1696 * 1697 * @param pattern the pattern to scan for 1698 * @return the text that matched the specified pattern 1699 * @throws IllegalStateException if this scanner is closed 1700 * @throws IllegalArgumentException if horizon is negative 1701 */ 1702 public String findWithinHorizon(Pattern pattern, int horizon) { 1703 ensureOpen(); 1704 if (pattern == null) 1705 throw new NullPointerException(); 1706 if (horizon < 0) 1707 throw new IllegalArgumentException("horizon < 0"); 1708 clearCaches(); 1709 1710 // Search for the pattern 1711 while (true) { 1712 String token = findPatternInBuffer(pattern, horizon); 1713 if (token != null) { 1714 matchValid = true; 1715 return token; 1716 } 1717 if (needInput) 1718 readInput(); 1719 else 1720 break; // up to end of input 1721 } 1722 return null; 1723 } 1724 1725 /** 1726 * Skips input that matches the specified pattern, ignoring delimiters. 1727 * This method will skip input if an anchored match of the specified 1728 * pattern succeeds. 1729 * 1730 * <p>If a match to the specified pattern is not found at the 1731 * current position, then no input is skipped and a 1732 * <tt>NoSuchElementException</tt> is thrown. 1733 * 1734 * <p>Since this method seeks to match the specified pattern starting at 1735 * the scanner's current position, patterns that can match a lot of 1736 * input (".*", for example) may cause the scanner to buffer a large 1737 * amount of input. 1738 * 1739 * <p>Note that it is possible to skip something without risking a 1740 * <code>NoSuchElementException</code> by using a pattern that can 1741 * match nothing, e.g., <code>sc.skip("[ \t]*")</code>. 1742 * 1743 * @param pattern a string specifying the pattern to skip over 1744 * @return this scanner 1745 * @throws NoSuchElementException if the specified pattern is not found 1746 * @throws IllegalStateException if this scanner is closed 1747 */ 1748 public Scanner skip(Pattern pattern) { 1749 ensureOpen(); 1750 if (pattern == null) 1751 throw new NullPointerException(); 1752 clearCaches(); 1753 1754 // Search for the pattern 1755 while (true) { 1756 String token = matchPatternInBuffer(pattern); 1757 if (token != null) { 1758 matchValid = true; 1759 position = matcher.end(); 1760 return this; 1761 } 1762 if (needInput) 1763 readInput(); 1764 else 1765 throw new NoSuchElementException(); 1766 } 1767 } 1768 1769 /** 1770 * Skips input that matches a pattern constructed from the specified 1771 * string. 1772 * 1773 * <p> An invocation of this method of the form <tt>skip(pattern)</tt> 1774 * behaves in exactly the same way as the invocation 1775 * <tt>skip(Pattern.compile(pattern))</tt>. 1776 * 1777 * @param pattern a string specifying the pattern to skip over 1778 * @return this scanner 1779 * @throws IllegalStateException if this scanner is closed 1780 */ 1781 public Scanner skip(String pattern) { 1782 return skip(patternCache.forName(pattern)); 1783 } 1784 1785 // Convenience methods for scanning primitives 1786 1787 /** 1788 * Returns true if the next token in this scanner's input can be 1789 * interpreted as a boolean value using a case insensitive pattern 1790 * created from the string "true|false". The scanner does not 1791 * advance past the input that matched. 1792 * 1793 * @return true if and only if this scanner's next token is a valid 1794 * boolean value 1795 * @throws IllegalStateException if this scanner is closed 1796 */ 1797 public boolean hasNextBoolean() { 1798 return hasNext(boolPattern()); 1799 } 1800 1801 /** 1802 * Scans the next token of the input into a boolean value and returns 1803 * that value. This method will throw <code>InputMismatchException</code> 1804 * if the next token cannot be translated into a valid boolean value. 1805 * If the match is successful, the scanner advances past the input that 1806 * matched. 1807 * 1808 * @return the boolean scanned from the input 1809 * @throws InputMismatchException if the next token is not a valid boolean 1810 * @throws NoSuchElementException if input is exhausted 1811 * @throws IllegalStateException if this scanner is closed 1812 */ 1813 public boolean nextBoolean() { 1814 clearCaches(); 1815 return Boolean.parseBoolean(next(boolPattern())); 1816 } 1817 1818 /** 1819 * Returns true if the next token in this scanner's input can be 1820 * interpreted as a byte value in the default radix using the 1821 * {@link #nextByte} method. The scanner does not advance past any input. 1822 * 1823 * @return true if and only if this scanner's next token is a valid 1824 * byte value 1825 * @throws IllegalStateException if this scanner is closed 1826 */ 1827 public boolean hasNextByte() { 1828 return hasNextByte(defaultRadix); 1829 } 1830 1831 /** 1832 * Returns true if the next token in this scanner's input can be 1833 * interpreted as a byte value in the specified radix using the 1834 * {@link #nextByte} method. The scanner does not advance past any input. 1835 * 1836 * @param radix the radix used to interpret the token as a byte value 1837 * @return true if and only if this scanner's next token is a valid 1838 * byte value 1839 * @throws IllegalStateException if this scanner is closed 1840 */ 1841 public boolean hasNextByte(int radix) { 1842 setRadix(radix); 1843 boolean result = hasNext(integerPattern()); 1844 if (result) { // Cache it 1845 try { 1846 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1847 processIntegerToken(hasNextResult) : 1848 hasNextResult; 1849 typeCache = Byte.parseByte(s, radix); 1850 } catch (NumberFormatException nfe) { 1851 result = false; 1852 } 1853 } 1854 return result; 1855 } 1856 1857 /** 1858 * Scans the next token of the input as a <tt>byte</tt>. 1859 * 1860 * <p> An invocation of this method of the form 1861 * <tt>nextByte()</tt> behaves in exactly the same way as the 1862 * invocation <tt>nextByte(radix)</tt>, where <code>radix</code> 1863 * is the default radix of this scanner. 1864 * 1865 * @return the <tt>byte</tt> scanned from the input 1866 * @throws InputMismatchException 1867 * if the next token does not match the <i>Integer</i> 1868 * regular expression, or is out of range 1869 * @throws NoSuchElementException if input is exhausted 1870 * @throws IllegalStateException if this scanner is closed 1871 */ 1872 public byte nextByte() { 1873 return nextByte(defaultRadix); 1874 } 1875 1876 /** 1877 * Scans the next token of the input as a <tt>byte</tt>. 1878 * This method will throw <code>InputMismatchException</code> 1879 * if the next token cannot be translated into a valid byte value as 1880 * described below. If the translation is successful, the scanner advances 1881 * past the input that matched. 1882 * 1883 * <p> If the next token matches the <a 1884 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1885 * above then the token is converted into a <tt>byte</tt> value as if by 1886 * removing all locale specific prefixes, group separators, and locale 1887 * specific suffixes, then mapping non-ASCII digits into ASCII 1888 * digits via {@link Character#digit Character.digit}, prepending a 1889 * negative sign (-) if the locale specific negative prefixes and suffixes 1890 * were present, and passing the resulting string to 1891 * {@link Byte#parseByte(String, int) Byte.parseByte} with the 1892 * specified radix. 1893 * 1894 * @param radix the radix used to interpret the token as a byte value 1895 * @return the <tt>byte</tt> scanned from the input 1896 * @throws InputMismatchException 1897 * if the next token does not match the <i>Integer</i> 1898 * regular expression, or is out of range 1899 * @throws NoSuchElementException if input is exhausted 1900 * @throws IllegalStateException if this scanner is closed 1901 */ 1902 public byte nextByte(int radix) { 1903 // Check cached result 1904 if ((typeCache != null) && (typeCache instanceof Byte) 1905 && this.radix == radix) { 1906 byte val = ((Byte)typeCache).byteValue(); 1907 useTypeCache(); 1908 return val; 1909 } 1910 setRadix(radix); 1911 clearCaches(); 1912 // Search for next byte 1913 try { 1914 String s = next(integerPattern()); 1915 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 1916 s = processIntegerToken(s); 1917 return Byte.parseByte(s, radix); 1918 } catch (NumberFormatException nfe) { 1919 position = matcher.start(); // don't skip bad token 1920 throw new InputMismatchException(nfe.getMessage()); 1921 } 1922 } 1923 1924 /** 1925 * Returns true if the next token in this scanner's input can be 1926 * interpreted as a short value in the default radix using the 1927 * {@link #nextShort} method. The scanner does not advance past any input. 1928 * 1929 * @return true if and only if this scanner's next token is a valid 1930 * short value in the default radix 1931 * @throws IllegalStateException if this scanner is closed 1932 */ 1933 public boolean hasNextShort() { 1934 return hasNextShort(defaultRadix); 1935 } 1936 1937 /** 1938 * Returns true if the next token in this scanner's input can be 1939 * interpreted as a short value in the specified radix using the 1940 * {@link #nextShort} method. The scanner does not advance past any input. 1941 * 1942 * @param radix the radix used to interpret the token as a short value 1943 * @return true if and only if this scanner's next token is a valid 1944 * short value in the specified radix 1945 * @throws IllegalStateException if this scanner is closed 1946 */ 1947 public boolean hasNextShort(int radix) { 1948 setRadix(radix); 1949 boolean result = hasNext(integerPattern()); 1950 if (result) { // Cache it 1951 try { 1952 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 1953 processIntegerToken(hasNextResult) : 1954 hasNextResult; 1955 typeCache = Short.parseShort(s, radix); 1956 } catch (NumberFormatException nfe) { 1957 result = false; 1958 } 1959 } 1960 return result; 1961 } 1962 1963 /** 1964 * Scans the next token of the input as a <tt>short</tt>. 1965 * 1966 * <p> An invocation of this method of the form 1967 * <tt>nextShort()</tt> behaves in exactly the same way as the 1968 * invocation <tt>nextShort(radix)</tt>, where <code>radix</code> 1969 * is the default radix of this scanner. 1970 * 1971 * @return the <tt>short</tt> scanned from the input 1972 * @throws InputMismatchException 1973 * if the next token does not match the <i>Integer</i> 1974 * regular expression, or is out of range 1975 * @throws NoSuchElementException if input is exhausted 1976 * @throws IllegalStateException if this scanner is closed 1977 */ 1978 public short nextShort() { 1979 return nextShort(defaultRadix); 1980 } 1981 1982 /** 1983 * Scans the next token of the input as a <tt>short</tt>. 1984 * This method will throw <code>InputMismatchException</code> 1985 * if the next token cannot be translated into a valid short value as 1986 * described below. If the translation is successful, the scanner advances 1987 * past the input that matched. 1988 * 1989 * <p> If the next token matches the <a 1990 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 1991 * above then the token is converted into a <tt>short</tt> value as if by 1992 * removing all locale specific prefixes, group separators, and locale 1993 * specific suffixes, then mapping non-ASCII digits into ASCII 1994 * digits via {@link Character#digit Character.digit}, prepending a 1995 * negative sign (-) if the locale specific negative prefixes and suffixes 1996 * were present, and passing the resulting string to 1997 * {@link Short#parseShort(String, int) Short.parseShort} with the 1998 * specified radix. 1999 * 2000 * @param radix the radix used to interpret the token as a short value 2001 * @return the <tt>short</tt> scanned from the input 2002 * @throws InputMismatchException 2003 * if the next token does not match the <i>Integer</i> 2004 * regular expression, or is out of range 2005 * @throws NoSuchElementException if input is exhausted 2006 * @throws IllegalStateException if this scanner is closed 2007 */ 2008 public short nextShort(int radix) { 2009 // Check cached result 2010 if ((typeCache != null) && (typeCache instanceof Short) 2011 && this.radix == radix) { 2012 short val = ((Short)typeCache).shortValue(); 2013 useTypeCache(); 2014 return val; 2015 } 2016 setRadix(radix); 2017 clearCaches(); 2018 // Search for next short 2019 try { 2020 String s = next(integerPattern()); 2021 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2022 s = processIntegerToken(s); 2023 return Short.parseShort(s, radix); 2024 } catch (NumberFormatException nfe) { 2025 position = matcher.start(); // don't skip bad token 2026 throw new InputMismatchException(nfe.getMessage()); 2027 } 2028 } 2029 2030 /** 2031 * Returns true if the next token in this scanner's input can be 2032 * interpreted as an int value in the default radix using the 2033 * {@link #nextInt} method. The scanner does not advance past any input. 2034 * 2035 * @return true if and only if this scanner's next token is a valid 2036 * int value 2037 * @throws IllegalStateException if this scanner is closed 2038 */ 2039 public boolean hasNextInt() { 2040 return hasNextInt(defaultRadix); 2041 } 2042 2043 /** 2044 * Returns true if the next token in this scanner's input can be 2045 * interpreted as an int value in the specified radix using the 2046 * {@link #nextInt} method. The scanner does not advance past any input. 2047 * 2048 * @param radix the radix used to interpret the token as an int value 2049 * @return true if and only if this scanner's next token is a valid 2050 * int value 2051 * @throws IllegalStateException if this scanner is closed 2052 */ 2053 public boolean hasNextInt(int radix) { 2054 setRadix(radix); 2055 boolean result = hasNext(integerPattern()); 2056 if (result) { // Cache it 2057 try { 2058 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2059 processIntegerToken(hasNextResult) : 2060 hasNextResult; 2061 typeCache = Integer.parseInt(s, radix); 2062 } catch (NumberFormatException nfe) { 2063 result = false; 2064 } 2065 } 2066 return result; 2067 } 2068 2069 /** 2070 * The integer token must be stripped of prefixes, group separators, 2071 * and suffixes, non ascii digits must be converted into ascii digits 2072 * before parse will accept it. 2073 */ 2074 private String processIntegerToken(String token) { 2075 String result = token.replaceAll(""+groupSeparator, ""); 2076 boolean isNegative = false; 2077 int preLen = negativePrefix.length(); 2078 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2079 isNegative = true; 2080 result = result.substring(preLen); 2081 } 2082 int sufLen = negativeSuffix.length(); 2083 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2084 isNegative = true; 2085 result = result.substring(result.length() - sufLen, 2086 result.length()); 2087 } 2088 if (isNegative) 2089 result = "-" + result; 2090 return result; 2091 } 2092 2093 /** 2094 * Scans the next token of the input as an <tt>int</tt>. 2095 * 2096 * <p> An invocation of this method of the form 2097 * <tt>nextInt()</tt> behaves in exactly the same way as the 2098 * invocation <tt>nextInt(radix)</tt>, where <code>radix</code> 2099 * is the default radix of this scanner. 2100 * 2101 * @return the <tt>int</tt> scanned from the input 2102 * @throws InputMismatchException 2103 * if the next token does not match the <i>Integer</i> 2104 * regular expression, or is out of range 2105 * @throws NoSuchElementException if input is exhausted 2106 * @throws IllegalStateException if this scanner is closed 2107 */ 2108 public int nextInt() { 2109 return nextInt(defaultRadix); 2110 } 2111 2112 /** 2113 * Scans the next token of the input as an <tt>int</tt>. 2114 * This method will throw <code>InputMismatchException</code> 2115 * if the next token cannot be translated into a valid int value as 2116 * described below. If the translation is successful, the scanner advances 2117 * past the input that matched. 2118 * 2119 * <p> If the next token matches the <a 2120 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2121 * above then the token is converted into an <tt>int</tt> value as if by 2122 * removing all locale specific prefixes, group separators, and locale 2123 * specific suffixes, then mapping non-ASCII digits into ASCII 2124 * digits via {@link Character#digit Character.digit}, prepending a 2125 * negative sign (-) if the locale specific negative prefixes and suffixes 2126 * were present, and passing the resulting string to 2127 * {@link Integer#parseInt(String, int) Integer.parseInt} with the 2128 * specified radix. 2129 * 2130 * @param radix the radix used to interpret the token as an int value 2131 * @return the <tt>int</tt> scanned from the input 2132 * @throws InputMismatchException 2133 * if the next token does not match the <i>Integer</i> 2134 * regular expression, or is out of range 2135 * @throws NoSuchElementException if input is exhausted 2136 * @throws IllegalStateException if this scanner is closed 2137 */ 2138 public int nextInt(int radix) { 2139 // Check cached result 2140 if ((typeCache != null) && (typeCache instanceof Integer) 2141 && this.radix == radix) { 2142 int val = ((Integer)typeCache).intValue(); 2143 useTypeCache(); 2144 return val; 2145 } 2146 setRadix(radix); 2147 clearCaches(); 2148 // Search for next int 2149 try { 2150 String s = next(integerPattern()); 2151 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2152 s = processIntegerToken(s); 2153 return Integer.parseInt(s, radix); 2154 } catch (NumberFormatException nfe) { 2155 position = matcher.start(); // don't skip bad token 2156 throw new InputMismatchException(nfe.getMessage()); 2157 } 2158 } 2159 2160 /** 2161 * Returns true if the next token in this scanner's input can be 2162 * interpreted as a long value in the default radix using the 2163 * {@link #nextLong} method. The scanner does not advance past any input. 2164 * 2165 * @return true if and only if this scanner's next token is a valid 2166 * long value 2167 * @throws IllegalStateException if this scanner is closed 2168 */ 2169 public boolean hasNextLong() { 2170 return hasNextLong(defaultRadix); 2171 } 2172 2173 /** 2174 * Returns true if the next token in this scanner's input can be 2175 * interpreted as a long value in the specified radix using the 2176 * {@link #nextLong} method. The scanner does not advance past any input. 2177 * 2178 * @param radix the radix used to interpret the token as a long value 2179 * @return true if and only if this scanner's next token is a valid 2180 * long value 2181 * @throws IllegalStateException if this scanner is closed 2182 */ 2183 public boolean hasNextLong(int radix) { 2184 setRadix(radix); 2185 boolean result = hasNext(integerPattern()); 2186 if (result) { // Cache it 2187 try { 2188 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2189 processIntegerToken(hasNextResult) : 2190 hasNextResult; 2191 typeCache = Long.parseLong(s, radix); 2192 } catch (NumberFormatException nfe) { 2193 result = false; 2194 } 2195 } 2196 return result; 2197 } 2198 2199 /** 2200 * Scans the next token of the input as a <tt>long</tt>. 2201 * 2202 * <p> An invocation of this method of the form 2203 * <tt>nextLong()</tt> behaves in exactly the same way as the 2204 * invocation <tt>nextLong(radix)</tt>, where <code>radix</code> 2205 * is the default radix of this scanner. 2206 * 2207 * @return the <tt>long</tt> scanned from the input 2208 * @throws InputMismatchException 2209 * if the next token does not match the <i>Integer</i> 2210 * regular expression, or is out of range 2211 * @throws NoSuchElementException if input is exhausted 2212 * @throws IllegalStateException if this scanner is closed 2213 */ 2214 public long nextLong() { 2215 return nextLong(defaultRadix); 2216 } 2217 2218 /** 2219 * Scans the next token of the input as a <tt>long</tt>. 2220 * This method will throw <code>InputMismatchException</code> 2221 * if the next token cannot be translated into a valid long value as 2222 * described below. If the translation is successful, the scanner advances 2223 * past the input that matched. 2224 * 2225 * <p> If the next token matches the <a 2226 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2227 * above then the token is converted into a <tt>long</tt> value as if by 2228 * removing all locale specific prefixes, group separators, and locale 2229 * specific suffixes, then mapping non-ASCII digits into ASCII 2230 * digits via {@link Character#digit Character.digit}, prepending a 2231 * negative sign (-) if the locale specific negative prefixes and suffixes 2232 * were present, and passing the resulting string to 2233 * {@link Long#parseLong(String, int) Long.parseLong} with the 2234 * specified radix. 2235 * 2236 * @param radix the radix used to interpret the token as an int value 2237 * @return the <tt>long</tt> scanned from the input 2238 * @throws InputMismatchException 2239 * if the next token does not match the <i>Integer</i> 2240 * regular expression, or is out of range 2241 * @throws NoSuchElementException if input is exhausted 2242 * @throws IllegalStateException if this scanner is closed 2243 */ 2244 public long nextLong(int radix) { 2245 // Check cached result 2246 if ((typeCache != null) && (typeCache instanceof Long) 2247 && this.radix == radix) { 2248 long val = ((Long)typeCache).longValue(); 2249 useTypeCache(); 2250 return val; 2251 } 2252 setRadix(radix); 2253 clearCaches(); 2254 try { 2255 String s = next(integerPattern()); 2256 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2257 s = processIntegerToken(s); 2258 return Long.parseLong(s, radix); 2259 } catch (NumberFormatException nfe) { 2260 position = matcher.start(); // don't skip bad token 2261 throw new InputMismatchException(nfe.getMessage()); 2262 } 2263 } 2264 2265 /** 2266 * The float token must be stripped of prefixes, group separators, 2267 * and suffixes, non ascii digits must be converted into ascii digits 2268 * before parseFloat will accept it. 2269 * 2270 * If there are non-ascii digits in the token these digits must 2271 * be processed before the token is passed to parseFloat. 2272 */ 2273 private String processFloatToken(String token) { 2274 String result = token.replaceAll(groupSeparator, ""); 2275 if (!decimalSeparator.equals("\\.")) 2276 result = result.replaceAll(decimalSeparator, "."); 2277 boolean isNegative = false; 2278 int preLen = negativePrefix.length(); 2279 if ((preLen > 0) && result.startsWith(negativePrefix)) { 2280 isNegative = true; 2281 result = result.substring(preLen); 2282 } 2283 int sufLen = negativeSuffix.length(); 2284 if ((sufLen > 0) && result.endsWith(negativeSuffix)) { 2285 isNegative = true; 2286 result = result.substring(result.length() - sufLen, 2287 result.length()); 2288 } 2289 if (result.equals(nanString)) 2290 result = "NaN"; 2291 if (result.equals(infinityString)) 2292 result = "Infinity"; 2293 if (isNegative) 2294 result = "-" + result; 2295 2296 // Translate non-ASCII digits 2297 Matcher m = NON_ASCII_DIGIT.matcher(result); 2298 if (m.find()) { 2299 StringBuilder inASCII = new StringBuilder(); 2300 for (int i=0; i<result.length(); i++) { 2301 char nextChar = result.charAt(i); 2302 if (Character.isDigit(nextChar)) { 2303 int d = Character.digit(nextChar, 10); 2304 if (d != -1) 2305 inASCII.append(d); 2306 else 2307 inASCII.append(nextChar); 2308 } else { 2309 inASCII.append(nextChar); 2310 } 2311 } 2312 result = inASCII.toString(); 2313 } 2314 2315 return result; 2316 } 2317 2318 /** 2319 * Returns true if the next token in this scanner's input can be 2320 * interpreted as a float value using the {@link #nextFloat} 2321 * method. The scanner does not advance past any input. 2322 * 2323 * @return true if and only if this scanner's next token is a valid 2324 * float value 2325 * @throws IllegalStateException if this scanner is closed 2326 */ 2327 public boolean hasNextFloat() { 2328 setRadix(10); 2329 boolean result = hasNext(floatPattern()); 2330 if (result) { // Cache it 2331 try { 2332 String s = processFloatToken(hasNextResult); 2333 typeCache = Float.valueOf(Float.parseFloat(s)); 2334 } catch (NumberFormatException nfe) { 2335 result = false; 2336 } 2337 } 2338 return result; 2339 } 2340 2341 /** 2342 * Scans the next token of the input as a <tt>float</tt>. 2343 * This method will throw <code>InputMismatchException</code> 2344 * if the next token cannot be translated into a valid float value as 2345 * described below. If the translation is successful, the scanner advances 2346 * past the input that matched. 2347 * 2348 * <p> If the next token matches the <a 2349 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2350 * then the token is converted into a <tt>float</tt> value as if by 2351 * removing all locale specific prefixes, group separators, and locale 2352 * specific suffixes, then mapping non-ASCII digits into ASCII 2353 * digits via {@link Character#digit Character.digit}, prepending a 2354 * negative sign (-) if the locale specific negative prefixes and suffixes 2355 * were present, and passing the resulting string to 2356 * {@link Float#parseFloat Float.parseFloat}. If the token matches 2357 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2358 * is passed to {@link Float#parseFloat(String) Float.parseFloat} as 2359 * appropriate. 2360 * 2361 * @return the <tt>float</tt> scanned from the input 2362 * @throws InputMismatchException 2363 * if the next token does not match the <i>Float</i> 2364 * regular expression, or is out of range 2365 * @throws NoSuchElementException if input is exhausted 2366 * @throws IllegalStateException if this scanner is closed 2367 */ 2368 public float nextFloat() { 2369 // Check cached result 2370 if ((typeCache != null) && (typeCache instanceof Float)) { 2371 float val = ((Float)typeCache).floatValue(); 2372 useTypeCache(); 2373 return val; 2374 } 2375 setRadix(10); 2376 clearCaches(); 2377 try { 2378 return Float.parseFloat(processFloatToken(next(floatPattern()))); 2379 } catch (NumberFormatException nfe) { 2380 position = matcher.start(); // don't skip bad token 2381 throw new InputMismatchException(nfe.getMessage()); 2382 } 2383 } 2384 2385 /** 2386 * Returns true if the next token in this scanner's input can be 2387 * interpreted as a double value using the {@link #nextDouble} 2388 * method. The scanner does not advance past any input. 2389 * 2390 * @return true if and only if this scanner's next token is a valid 2391 * double value 2392 * @throws IllegalStateException if this scanner is closed 2393 */ 2394 public boolean hasNextDouble() { 2395 setRadix(10); 2396 boolean result = hasNext(floatPattern()); 2397 if (result) { // Cache it 2398 try { 2399 String s = processFloatToken(hasNextResult); 2400 typeCache = Double.valueOf(Double.parseDouble(s)); 2401 } catch (NumberFormatException nfe) { 2402 result = false; 2403 } 2404 } 2405 return result; 2406 } 2407 2408 /** 2409 * Scans the next token of the input as a <tt>double</tt>. 2410 * This method will throw <code>InputMismatchException</code> 2411 * if the next token cannot be translated into a valid double value. 2412 * If the translation is successful, the scanner advances past the input 2413 * that matched. 2414 * 2415 * <p> If the next token matches the <a 2416 * href="#Float-regex"><i>Float</i></a> regular expression defined above 2417 * then the token is converted into a <tt>double</tt> value as if by 2418 * removing all locale specific prefixes, group separators, and locale 2419 * specific suffixes, then mapping non-ASCII digits into ASCII 2420 * digits via {@link Character#digit Character.digit}, prepending a 2421 * negative sign (-) if the locale specific negative prefixes and suffixes 2422 * were present, and passing the resulting string to 2423 * {@link Double#parseDouble Double.parseDouble}. If the token matches 2424 * the localized NaN or infinity strings, then either "Nan" or "Infinity" 2425 * is passed to {@link Double#parseDouble(String) Double.parseDouble} as 2426 * appropriate. 2427 * 2428 * @return the <tt>double</tt> scanned from the input 2429 * @throws InputMismatchException 2430 * if the next token does not match the <i>Float</i> 2431 * regular expression, or is out of range 2432 * @throws NoSuchElementException if the input is exhausted 2433 * @throws IllegalStateException if this scanner is closed 2434 */ 2435 public double nextDouble() { 2436 // Check cached result 2437 if ((typeCache != null) && (typeCache instanceof Double)) { 2438 double val = ((Double)typeCache).doubleValue(); 2439 useTypeCache(); 2440 return val; 2441 } 2442 setRadix(10); 2443 clearCaches(); 2444 // Search for next float 2445 try { 2446 return Double.parseDouble(processFloatToken(next(floatPattern()))); 2447 } catch (NumberFormatException nfe) { 2448 position = matcher.start(); // don't skip bad token 2449 throw new InputMismatchException(nfe.getMessage()); 2450 } 2451 } 2452 2453 // Convenience methods for scanning multi precision numbers 2454 2455 /** 2456 * Returns true if the next token in this scanner's input can be 2457 * interpreted as a <code>BigInteger</code> in the default radix using the 2458 * {@link #nextBigInteger} method. The scanner does not advance past any 2459 * input. 2460 * 2461 * @return true if and only if this scanner's next token is a valid 2462 * <code>BigInteger</code> 2463 * @throws IllegalStateException if this scanner is closed 2464 */ 2465 public boolean hasNextBigInteger() { 2466 return hasNextBigInteger(defaultRadix); 2467 } 2468 2469 /** 2470 * Returns true if the next token in this scanner's input can be 2471 * interpreted as a <code>BigInteger</code> in the specified radix using 2472 * the {@link #nextBigInteger} method. The scanner does not advance past 2473 * any input. 2474 * 2475 * @param radix the radix used to interpret the token as an integer 2476 * @return true if and only if this scanner's next token is a valid 2477 * <code>BigInteger</code> 2478 * @throws IllegalStateException if this scanner is closed 2479 */ 2480 public boolean hasNextBigInteger(int radix) { 2481 setRadix(radix); 2482 boolean result = hasNext(integerPattern()); 2483 if (result) { // Cache it 2484 try { 2485 String s = (matcher.group(SIMPLE_GROUP_INDEX) == null) ? 2486 processIntegerToken(hasNextResult) : 2487 hasNextResult; 2488 typeCache = new BigInteger(s, radix); 2489 } catch (NumberFormatException nfe) { 2490 result = false; 2491 } 2492 } 2493 return result; 2494 } 2495 2496 /** 2497 * Scans the next token of the input as a {@link java.math.BigInteger 2498 * BigInteger}. 2499 * 2500 * <p> An invocation of this method of the form 2501 * <tt>nextBigInteger()</tt> behaves in exactly the same way as the 2502 * invocation <tt>nextBigInteger(radix)</tt>, where <code>radix</code> 2503 * is the default radix of this scanner. 2504 * 2505 * @return the <tt>BigInteger</tt> scanned from the input 2506 * @throws InputMismatchException 2507 * if the next token does not match the <i>Integer</i> 2508 * regular expression, or is out of range 2509 * @throws NoSuchElementException if the input is exhausted 2510 * @throws IllegalStateException if this scanner is closed 2511 */ 2512 public BigInteger nextBigInteger() { 2513 return nextBigInteger(defaultRadix); 2514 } 2515 2516 /** 2517 * Scans the next token of the input as a {@link java.math.BigInteger 2518 * BigInteger}. 2519 * 2520 * <p> If the next token matches the <a 2521 * href="#Integer-regex"><i>Integer</i></a> regular expression defined 2522 * above then the token is converted into a <tt>BigInteger</tt> value as if 2523 * by removing all group separators, mapping non-ASCII digits into ASCII 2524 * digits via the {@link Character#digit Character.digit}, and passing the 2525 * resulting string to the {@link 2526 * java.math.BigInteger#BigInteger(java.lang.String) 2527 * BigInteger(String, int)} constructor with the specified radix. 2528 * 2529 * @param radix the radix used to interpret the token 2530 * @return the <tt>BigInteger</tt> scanned from the input 2531 * @throws InputMismatchException 2532 * if the next token does not match the <i>Integer</i> 2533 * regular expression, or is out of range 2534 * @throws NoSuchElementException if the input is exhausted 2535 * @throws IllegalStateException if this scanner is closed 2536 */ 2537 public BigInteger nextBigInteger(int radix) { 2538 // Check cached result 2539 if ((typeCache != null) && (typeCache instanceof BigInteger) 2540 && this.radix == radix) { 2541 BigInteger val = (BigInteger)typeCache; 2542 useTypeCache(); 2543 return val; 2544 } 2545 setRadix(radix); 2546 clearCaches(); 2547 // Search for next int 2548 try { 2549 String s = next(integerPattern()); 2550 if (matcher.group(SIMPLE_GROUP_INDEX) == null) 2551 s = processIntegerToken(s); 2552 return new BigInteger(s, radix); 2553 } catch (NumberFormatException nfe) { 2554 position = matcher.start(); // don't skip bad token 2555 throw new InputMismatchException(nfe.getMessage()); 2556 } 2557 } 2558 2559 /** 2560 * Returns true if the next token in this scanner's input can be 2561 * interpreted as a <code>BigDecimal</code> using the 2562 * {@link #nextBigDecimal} method. The scanner does not advance past any 2563 * input. 2564 * 2565 * @return true if and only if this scanner's next token is a valid 2566 * <code>BigDecimal</code> 2567 * @throws IllegalStateException if this scanner is closed 2568 */ 2569 public boolean hasNextBigDecimal() { 2570 setRadix(10); 2571 boolean result = hasNext(decimalPattern()); 2572 if (result) { // Cache it 2573 try { 2574 String s = processFloatToken(hasNextResult); 2575 typeCache = new BigDecimal(s); 2576 } catch (NumberFormatException nfe) { 2577 result = false; 2578 } 2579 } 2580 return result; 2581 } 2582 2583 /** 2584 * Scans the next token of the input as a {@link java.math.BigDecimal 2585 * BigDecimal}. 2586 * 2587 * <p> If the next token matches the <a 2588 * href="#Decimal-regex"><i>Decimal</i></a> regular expression defined 2589 * above then the token is converted into a <tt>BigDecimal</tt> value as if 2590 * by removing all group separators, mapping non-ASCII digits into ASCII 2591 * digits via the {@link Character#digit Character.digit}, and passing the 2592 * resulting string to the {@link 2593 * java.math.BigDecimal#BigDecimal(java.lang.String) BigDecimal(String)} 2594 * constructor. 2595 * 2596 * @return the <tt>BigDecimal</tt> scanned from the input 2597 * @throws InputMismatchException 2598 * if the next token does not match the <i>Decimal</i> 2599 * regular expression, or is out of range 2600 * @throws NoSuchElementException if the input is exhausted 2601 * @throws IllegalStateException if this scanner is closed 2602 */ 2603 public BigDecimal nextBigDecimal() { 2604 // Check cached result 2605 if ((typeCache != null) && (typeCache instanceof BigDecimal)) { 2606 BigDecimal val = (BigDecimal)typeCache; 2607 useTypeCache(); 2608 return val; 2609 } 2610 setRadix(10); 2611 clearCaches(); 2612 // Search for next float 2613 try { 2614 String s = processFloatToken(next(decimalPattern())); 2615 return new BigDecimal(s); 2616 } catch (NumberFormatException nfe) { 2617 position = matcher.start(); // don't skip bad token 2618 throw new InputMismatchException(nfe.getMessage()); 2619 } 2620 } 2621 2622 /** 2623 * Resets this scanner. 2624 * 2625 * <p> Resetting a scanner discards all of its explicit state 2626 * information which may have been changed by invocations of {@link 2627 * #useDelimiter}, {@link #useLocale}, or {@link #useRadix}. 2628 * 2629 * <p> An invocation of this method of the form 2630 * <tt>scanner.reset()</tt> behaves in exactly the same way as the 2631 * invocation 2632 * 2633 * <blockquote><pre> 2634 * scanner.useDelimiter("\\p{javaWhitespace}+") 2635 * .useLocale(Locale.getDefault()) 2636 * .useRadix(10); 2637 * </pre></blockquote> 2638 * 2639 * @return this scanner 2640 * 2641 * @since 1.6 2642 */ 2643 public Scanner reset() { 2644 delimPattern = WHITESPACE_PATTERN; 2645 useLocale(Locale.getDefault()); 2646 useRadix(10); 2647 clearCaches(); 2648 return this; 2649 } 2650 }