New src/share/classes/java/io/StreamTokenizer.java

   1 /*
   2  * Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.io;
  27 
  28 import java.util.Arrays;
  29 
  30 /**
  31  * The {@code StreamTokenizer} class takes an input stream and
  32  * parses it into "tokens", allowing the tokens to be
  33  * read one at a time. The parsing process is controlled by a table
  34  * and a number of flags that can be set to various states. The
  35  * stream tokenizer can recognize identifiers, numbers, quoted
  36  * strings, and various comment styles.
  37  * <p>
  38  * Each byte read from the input stream is regarded as a character
  39  * in the range {@code '\u005Cu0000'} through {@code '\u005Cu00FF'}.
  40  * The character value is used to look up five possible attributes of
  41  * the character: <i>white space</i>, <i>alphabetic</i>,
  42  * <i>numeric</i>, <i>string quote</i>, and <i>comment character</i>.
  43  * Each character can have zero or more of these attributes.
  44  * <p>
  45  * In addition, an instance has four flags. These flags indicate:
  46  * <ul>
  47  * <li>Whether line terminators are to be returned as tokens or treated
  48  *     as white space that merely separates tokens.
  49  * <li>Whether C-style comments are to be recognized and skipped.
  50  * <li>Whether C++-style comments are to be recognized and skipped.
  51  * <li>Whether the characters of identifiers are converted to lowercase.
  52  * </ul>
  53  * <p>
  54  * A typical application first constructs an instance of this class,
  55  * sets up the syntax tables, and then repeatedly loops calling the
  56  * {@code nextToken} method in each iteration of the loop until
  57  * it returns the value {@code TT_EOF}.
  58  *
  59  * @author  James Gosling
  60  * @see     java.io.StreamTokenizer#nextToken()
  61  * @see     java.io.StreamTokenizer#TT_EOF
  62  * @since   1.0
  63  */
  64 
  65 public class StreamTokenizer {
  66 
  67     /* Only one of these will be non-null */
  68     private Reader reader = null;
  69     private InputStream input = null;
  70 
  71     private char buf[] = new char[20];
  72 
  73     /**
  74      * The next character to be considered by the nextToken method.  May also
  75      * be NEED_CHAR to indicate that a new character should be read, or SKIP_LF
  76      * to indicate that a new character should be read and, if it is a '\n'
  77      * character, it should be discarded and a second new character should be
  78      * read.
  79      */
  80     private int peekc = NEED_CHAR;
  81 
  82     private static final int NEED_CHAR = Integer.MAX_VALUE;
  83     private static final int SKIP_LF = Integer.MAX_VALUE - 1;
  84 
  85     private boolean pushedBack;
  86     private boolean forceLower;
  87     /** The line number of the last token read */
  88     private int LINENO = 1;
  89 
  90     private boolean eolIsSignificantP = false;
  91     private boolean slashSlashCommentsP = false;
  92     private boolean slashStarCommentsP = false;
  93 
  94     private byte ctype[] = new byte[256];
  95     private static final byte CT_WHITESPACE = 1;
  96     private static final byte CT_DIGIT = 2;
  97     private static final byte CT_ALPHA = 4;
  98     private static final byte CT_QUOTE = 8;
  99     private static final byte CT_COMMENT = 16;
 100 
 101     /**
 102      * After a call to the {@code nextToken} method, this field
 103      * contains the type of the token just read. For a single character
 104      * token, its value is the single character, converted to an integer.
 105      * For a quoted string token, its value is the quote character.
 106      * Otherwise, its value is one of the following:
 107      * <ul>
 108      * <li>{@code TT_WORD} indicates that the token is a word.
 109      * <li>{@code TT_NUMBER} indicates that the token is a number.
 110      * <li>{@code TT_EOL} indicates that the end of line has been read.
 111      *     The field can only have this value if the
 112      *     {@code eolIsSignificant} method has been called with the
 113      *     argument {@code true}.
 114      * <li>{@code TT_EOF} indicates that the end of the input stream
 115      *     has been reached.
 116      * </ul>
 117      * <p>
 118      * The initial value of this field is -4.
 119      *
 120      * @see     java.io.StreamTokenizer#eolIsSignificant(boolean)
 121      * @see     java.io.StreamTokenizer#nextToken()
 122      * @see     java.io.StreamTokenizer#quoteChar(int)
 123      * @see     java.io.StreamTokenizer#TT_EOF
 124      * @see     java.io.StreamTokenizer#TT_EOL
 125      * @see     java.io.StreamTokenizer#TT_NUMBER
 126      * @see     java.io.StreamTokenizer#TT_WORD
 127      */
 128     public int ttype = TT_NOTHING;
 129 
 130     /**
 131      * A constant indicating that the end of the stream has been read.
 132      */
 133     public static final int TT_EOF = -1;
 134 
 135     /**
 136      * A constant indicating that the end of the line has been read.
 137      */
 138     public static final int TT_EOL = '\n';
 139 
 140     /**
 141      * A constant indicating that a number token has been read.
 142      */
 143     public static final int TT_NUMBER = -2;
 144 
 145     /**
 146      * A constant indicating that a word token has been read.
 147      */
 148     public static final int TT_WORD = -3;
 149 
 150     /* A constant indicating that no token has been read, used for
 151      * initializing ttype.  FIXME This could be made public and
 152      * made available as the part of the API in a future release.
 153      */
 154     private static final int TT_NOTHING = -4;
 155 
 156     /**
 157      * If the current token is a word token, this field contains a
 158      * string giving the characters of the word token. When the current
 159      * token is a quoted string token, this field contains the body of
 160      * the string.
 161      * <p>
 162      * The current token is a word when the value of the
 163      * {@code ttype} field is {@code TT_WORD}. The current token is
 164      * a quoted string token when the value of the {@code ttype} field is
 165      * a quote character.
 166      * <p>
 167      * The initial value of this field is null.
 168      *
 169      * @see     java.io.StreamTokenizer#quoteChar(int)
 170      * @see     java.io.StreamTokenizer#TT_WORD
 171      * @see     java.io.StreamTokenizer#ttype
 172      */
 173     public String sval;
 174 
 175     /**
 176      * If the current token is a number, this field contains the value
 177      * of that number. The current token is a number when the value of
 178      * the {@code ttype} field is {@code TT_NUMBER}.
 179      * <p>
 180      * The initial value of this field is 0.0.
 181      *
 182      * @see     java.io.StreamTokenizer#TT_NUMBER
 183      * @see     java.io.StreamTokenizer#ttype
 184      */
 185     public double nval;
 186 
 187     /** Private constructor that initializes everything except the streams. */
 188     private StreamTokenizer() {
 189         wordChars('a', 'z');
 190         wordChars('A', 'Z');
 191         wordChars(128 + 32, 255);
 192         whitespaceChars(0, ' ');
 193         commentChar('/');
 194         quoteChar('"');
 195         quoteChar('\'');
 196         parseNumbers();
 197     }
 198 
 199     /**
 200      * Creates a stream tokenizer that parses the specified input
 201      * stream. The stream tokenizer is initialized to the following
 202      * default state:
 203      * <ul>
 204      * <li>All byte values {@code 'A'} through {@code 'Z'},
 205      *     {@code 'a'} through {@code 'z'}, and
 206      *     {@code '\u005Cu00A0'} through {@code '\u005Cu00FF'} are
 207      *     considered to be alphabetic.
 208      * <li>All byte values {@code '\u005Cu0000'} through
 209      *     {@code '\u005Cu0020'} are considered to be white space.
 210      * <li>{@code '/'} is a comment character.
 211      * <li>Single quote {@code '\u005C''} and double quote {@code '"'}
 212      *     are string quote characters.
 213      * <li>Numbers are parsed.
 214      * <li>Ends of lines are treated as white space, not as separate tokens.
 215      * <li>C-style and C++-style comments are not recognized.
 216      * </ul>
 217      *
 218      * @deprecated As of JDK version 1.1, the preferred way to tokenize an
 219      * input stream is to convert it into a character stream, for example:
 220      * <blockquote><pre>
 221      *   Reader r = new BufferedReader(new InputStreamReader(is));
 222      *   StreamTokenizer st = new StreamTokenizer(r);
 223      * </pre></blockquote>
 224      *
 225      * @param      is        an input stream.
 226      * @see        java.io.BufferedReader
 227      * @see        java.io.InputStreamReader
 228      * @see        java.io.StreamTokenizer#StreamTokenizer(java.io.Reader)
 229      */
 230     @Deprecated
 231     public StreamTokenizer(InputStream is) {
 232         this();
 233         if (is == null) {
 234             throw new NullPointerException();
 235         }
 236         input = is;
 237     }
 238 
 239     /**
 240      * Create a tokenizer that parses the given character stream.
 241      *
 242      * @param r  a Reader object providing the input stream.
 243      * @since   1.1
 244      */
 245     public StreamTokenizer(Reader r) {
 246         this();
 247         if (r == null) {
 248             throw new NullPointerException();
 249         }
 250         reader = r;
 251     }
 252 
 253     /**
 254      * Resets this tokenizer's syntax table so that all characters are
 255      * "ordinary." See the {@code ordinaryChar} method
 256      * for more information on a character being ordinary.
 257      *
 258      * @see     java.io.StreamTokenizer#ordinaryChar(int)
 259      */
 260     public void resetSyntax() {
 261         for (int i = ctype.length; --i >= 0;)
 262             ctype[i] = 0;
 263     }
 264 
 265     /**
 266      * Specifies that all characters <i>c</i> in the range
 267      * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
 268      * are word constituents. A word token consists of a word constituent
 269      * followed by zero or more word constituents or number constituents.
 270      *
 271      * @param   low   the low end of the range.
 272      * @param   hi    the high end of the range.
 273      */
 274     public void wordChars(int low, int hi) {
 275         if (low < 0)
 276             low = 0;
 277         if (hi >= ctype.length)
 278             hi = ctype.length - 1;
 279         while (low <= hi)
 280             ctype[low++] |= CT_ALPHA;
 281     }
 282 
 283     /**
 284      * Specifies that all characters <i>c</i> in the range
 285      * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
 286      * are white space characters. White space characters serve only to
 287      * separate tokens in the input stream.
 288      *
 289      * <p>Any other attribute settings for the characters in the specified
 290      * range are cleared.
 291      *
 292      * @param   low   the low end of the range.
 293      * @param   hi    the high end of the range.
 294      */
 295     public void whitespaceChars(int low, int hi) {
 296         if (low < 0)
 297             low = 0;
 298         if (hi >= ctype.length)
 299             hi = ctype.length - 1;
 300         while (low <= hi)
 301             ctype[low++] = CT_WHITESPACE;
 302     }
 303 
 304     /**
 305      * Specifies that all characters <i>c</i> in the range
 306      * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
 307      * are "ordinary" in this tokenizer. See the
 308      * {@code ordinaryChar} method for more information on a
 309      * character being ordinary.
 310      *
 311      * @param   low   the low end of the range.
 312      * @param   hi    the high end of the range.
 313      * @see     java.io.StreamTokenizer#ordinaryChar(int)
 314      */
 315     public void ordinaryChars(int low, int hi) {
 316         if (low < 0)
 317             low = 0;
 318         if (hi >= ctype.length)
 319             hi = ctype.length - 1;
 320         while (low <= hi)
 321             ctype[low++] = 0;
 322     }
 323 
 324     /**
 325      * Specifies that the character argument is "ordinary"
 326      * in this tokenizer. It removes any special significance the
 327      * character has as a comment character, word component, string
 328      * delimiter, white space, or number character. When such a character
 329      * is encountered by the parser, the parser treats it as a
 330      * single-character token and sets {@code ttype} field to the
 331      * character value.
 332      *
 333      * <p>Making a line terminator character "ordinary" may interfere
 334      * with the ability of a {@code StreamTokenizer} to count
 335      * lines. The {@code lineno} method may no longer reflect
 336      * the presence of such terminator characters in its line count.
 337      *
 338      * @param   ch   the character.
 339      * @see     java.io.StreamTokenizer#ttype
 340      */
 341     public void ordinaryChar(int ch) {
 342         if (ch >= 0 && ch < ctype.length)
 343             ctype[ch] = 0;
 344     }
 345 
 346     /**
 347      * Specified that the character argument starts a single-line
 348      * comment. All characters from the comment character to the end of
 349      * the line are ignored by this stream tokenizer.
 350      *
 351      * <p>Any other attribute settings for the specified character are cleared.
 352      *
 353      * @param   ch   the character.
 354      */
 355     public void commentChar(int ch) {
 356         if (ch >= 0 && ch < ctype.length)
 357             ctype[ch] = CT_COMMENT;
 358     }
 359 
 360     /**
 361      * Specifies that matching pairs of this character delimit string
 362      * constants in this tokenizer.
 363      * <p>
 364      * When the {@code nextToken} method encounters a string
 365      * constant, the {@code ttype} field is set to the string
 366      * delimiter and the {@code sval} field is set to the body of
 367      * the string.
 368      * <p>
 369      * If a string quote character is encountered, then a string is
 370      * recognized, consisting of all characters after (but not including)
 371      * the string quote character, up to (but not including) the next
 372      * occurrence of that same string quote character, or a line
 373      * terminator, or end of file. The usual escape sequences such as
 374      * {@code "\u005Cn"} and {@code "\u005Ct"} are recognized and
 375      * converted to single characters as the string is parsed.
 376      *
 377      * <p>Any other attribute settings for the specified character are cleared.
 378      *
 379      * @param   ch   the character.
 380      * @see     java.io.StreamTokenizer#nextToken()
 381      * @see     java.io.StreamTokenizer#sval
 382      * @see     java.io.StreamTokenizer#ttype
 383      */
 384     public void quoteChar(int ch) {
 385         if (ch >= 0 && ch < ctype.length)
 386             ctype[ch] = CT_QUOTE;
 387     }
 388 
 389     /**
 390      * Specifies that numbers should be parsed by this tokenizer. The
 391      * syntax table of this tokenizer is modified so that each of the twelve
 392      * characters:
 393      * <blockquote><pre>
 394      *      0 1 2 3 4 5 6 7 8 9 . -
 395      * </pre></blockquote>
 396      * <p>
 397      * has the "numeric" attribute.
 398      * <p>
 399      * When the parser encounters a word token that has the format of a
 400      * double precision floating-point number, it treats the token as a
 401      * number rather than a word, by setting the {@code ttype}
 402      * field to the value {@code TT_NUMBER} and putting the numeric
 403      * value of the token into the {@code nval} field.
 404      *
 405      * @see     java.io.StreamTokenizer#nval
 406      * @see     java.io.StreamTokenizer#TT_NUMBER
 407      * @see     java.io.StreamTokenizer#ttype
 408      */
 409     public void parseNumbers() {
 410         for (int i = '0'; i <= '9'; i++)
 411             ctype[i] |= CT_DIGIT;
 412         ctype['.'] |= CT_DIGIT;
 413         ctype['-'] |= CT_DIGIT;
 414     }
 415 
 416     /**
 417      * Determines whether or not ends of line are treated as tokens.
 418      * If the flag argument is true, this tokenizer treats end of lines
 419      * as tokens; the {@code nextToken} method returns
 420      * {@code TT_EOL} and also sets the {@code ttype} field to
 421      * this value when an end of line is read.
 422      * <p>
 423      * A line is a sequence of characters ending with either a
 424      * carriage-return character ({@code '\u005Cr'}) or a newline
 425      * character ({@code '\u005Cn'}). In addition, a carriage-return
 426      * character followed immediately by a newline character is treated
 427      * as a single end-of-line token.
 428      * <p>
 429      * If the {@code flag} is false, end-of-line characters are
 430      * treated as white space and serve only to separate tokens.
 431      *
 432      * @param   flag   {@code true} indicates that end-of-line characters
 433      *                 are separate tokens; {@code false} indicates that
 434      *                 end-of-line characters are white space.
 435      * @see     java.io.StreamTokenizer#nextToken()
 436      * @see     java.io.StreamTokenizer#ttype
 437      * @see     java.io.StreamTokenizer#TT_EOL
 438      */
 439     public void eolIsSignificant(boolean flag) {
 440         eolIsSignificantP = flag;
 441     }
 442 
 443     /**
 444      * Determines whether or not the tokenizer recognizes C-style comments.
 445      * If the flag argument is {@code true}, this stream tokenizer
 446      * recognizes C-style comments. All text between successive
 447      * occurrences of {@code /*} and <code>*/</code> are discarded.
 448      * <p>
 449      * If the flag argument is {@code false}, then C-style comments
 450      * are not treated specially.
 451      *
 452      * @param   flag   {@code true} indicates to recognize and ignore
 453      *                 C-style comments.
 454      */
 455     public void slashStarComments(boolean flag) {
 456         slashStarCommentsP = flag;
 457     }
 458 
 459     /**
 460      * Determines whether or not the tokenizer recognizes C++-style comments.
 461      * If the flag argument is {@code true}, this stream tokenizer
 462      * recognizes C++-style comments. Any occurrence of two consecutive
 463      * slash characters ({@code '/'}) is treated as the beginning of
 464      * a comment that extends to the end of the line.
 465      * <p>
 466      * If the flag argument is {@code false}, then C++-style
 467      * comments are not treated specially.
 468      *
 469      * @param   flag   {@code true} indicates to recognize and ignore
 470      *                 C++-style comments.
 471      */
 472     public void slashSlashComments(boolean flag) {
 473         slashSlashCommentsP = flag;
 474     }
 475 
 476     /**
 477      * Determines whether or not word token are automatically lowercased.
 478      * If the flag argument is {@code true}, then the value in the
 479      * {@code sval} field is lowercased whenever a word token is
 480      * returned (the {@code ttype} field has the
 481      * value {@code TT_WORD} by the {@code nextToken} method
 482      * of this tokenizer.
 483      * <p>
 484      * If the flag argument is {@code false}, then the
 485      * {@code sval} field is not modified.
 486      *
 487      * @param   fl   {@code true} indicates that all word tokens should
 488      *               be lowercased.
 489      * @see     java.io.StreamTokenizer#nextToken()
 490      * @see     java.io.StreamTokenizer#ttype
 491      * @see     java.io.StreamTokenizer#TT_WORD
 492      */
 493     public void lowerCaseMode(boolean fl) {
 494         forceLower = fl;
 495     }
 496 
 497     /** Read the next character */
 498     private int read() throws IOException {
 499         if (reader != null)
 500             return reader.read();
 501         else if (input != null)
 502             return input.read();
 503         else
 504             throw new IllegalStateException();
 505     }
 506 
 507     /**
 508      * Parses the next token from the input stream of this tokenizer.
 509      * The type of the next token is returned in the {@code ttype}
 510      * field. Additional information about the token may be in the
 511      * {@code nval} field or the {@code sval} field of this
 512      * tokenizer.
 513      * <p>
 514      * Typical clients of this
 515      * class first set up the syntax tables and then sit in a loop
 516      * calling nextToken to parse successive tokens until TT_EOF
 517      * is returned.
 518      *
 519      * @return     the value of the {@code ttype} field.
 520      * @exception  IOException  if an I/O error occurs.
 521      * @see        java.io.StreamTokenizer#nval
 522      * @see        java.io.StreamTokenizer#sval
 523      * @see        java.io.StreamTokenizer#ttype
 524      */
 525     public int nextToken() throws IOException {
 526         if (pushedBack) {
 527             pushedBack = false;
 528             return ttype;
 529         }
 530         byte ct[] = ctype;
 531         sval = null;
 532 
 533         int c = peekc;
 534         if (c < 0)
 535             c = NEED_CHAR;
 536         if (c == SKIP_LF) {
 537             c = read();
 538             if (c < 0)
 539                 return ttype = TT_EOF;
 540             if (c == '\n')
 541                 c = NEED_CHAR;
 542         }
 543         if (c == NEED_CHAR) {
 544             c = read();
 545             if (c < 0)
 546                 return ttype = TT_EOF;
 547         }
 548         ttype = c;              /* Just to be safe */
 549 
 550         /* Set peekc so that the next invocation of nextToken will read
 551          * another character unless peekc is reset in this invocation
 552          */
 553         peekc = NEED_CHAR;
 554 
 555         int ctype = c < 256 ? ct[c] : CT_ALPHA;
 556         while ((ctype & CT_WHITESPACE) != 0) {
 557             if (c == '\r') {
 558                 LINENO++;
 559                 if (eolIsSignificantP) {
 560                     peekc = SKIP_LF;
 561                     return ttype = TT_EOL;
 562                 }
 563                 c = read();
 564                 if (c == '\n')
 565                     c = read();
 566             } else {
 567                 if (c == '\n') {
 568                     LINENO++;
 569                     if (eolIsSignificantP) {
 570                         return ttype = TT_EOL;
 571                     }
 572                 }
 573                 c = read();
 574             }
 575             if (c < 0)
 576                 return ttype = TT_EOF;
 577             ctype = c < 256 ? ct[c] : CT_ALPHA;
 578         }
 579 
 580         if ((ctype & CT_DIGIT) != 0) {
 581             boolean neg = false;
 582             if (c == '-') {
 583                 c = read();
 584                 if (c != '.' && (c < '0' || c > '9')) {
 585                     peekc = c;
 586                     return ttype = '-';
 587                 }
 588                 neg = true;
 589             }
 590             double v = 0;
 591             int decexp = 0;
 592             int seendot = 0;
 593             while (true) {
 594                 if (c == '.' && seendot == 0)
 595                     seendot = 1;
 596                 else if ('0' <= c && c <= '9') {
 597                     v = v * 10 + (c - '0');
 598                     decexp += seendot;
 599                 } else
 600                     break;
 601                 c = read();
 602             }
 603             peekc = c;
 604             if (decexp != 0) {
 605                 double denom = 10;
 606                 decexp--;
 607                 while (decexp > 0) {
 608                     denom *= 10;
 609                     decexp--;
 610                 }
 611                 /* Do one division of a likely-to-be-more-accurate number */
 612                 v = v / denom;
 613             }
 614             nval = neg ? -v : v;
 615             return ttype = TT_NUMBER;
 616         }
 617 
 618         if ((ctype & CT_ALPHA) != 0) {
 619             int i = 0;
 620             do {
 621                 if (i >= buf.length) {
 622                     buf = Arrays.copyOf(buf, buf.length * 2);
 623                 }
 624                 buf[i++] = (char) c;
 625                 c = read();
 626                 ctype = c < 0 ? CT_WHITESPACE : c < 256 ? ct[c] : CT_ALPHA;
 627             } while ((ctype & (CT_ALPHA | CT_DIGIT)) != 0);
 628             peekc = c;
 629             sval = String.copyValueOf(buf, 0, i);
 630             if (forceLower)
 631                 sval = sval.toLowerCase();
 632             return ttype = TT_WORD;
 633         }
 634 
 635         if ((ctype & CT_QUOTE) != 0) {
 636             ttype = c;
 637             int i = 0;
 638             /* Invariants (because \Octal needs a lookahead):
 639              *   (i)  c contains char value
 640              *   (ii) d contains the lookahead
 641              */
 642             int d = read();
 643             while (d >= 0 && d != ttype && d != '\n' && d != '\r') {
 644                 if (d == '\\') {
 645                     c = read();
 646                     int first = c;   /* To allow \377, but not \477 */
 647                     if (c >= '0' && c <= '7') {
 648                         c = c - '0';
 649                         int c2 = read();
 650                         if ('0' <= c2 && c2 <= '7') {
 651                             c = (c << 3) + (c2 - '0');
 652                             c2 = read();
 653                             if ('0' <= c2 && c2 <= '7' && first <= '3') {
 654                                 c = (c << 3) + (c2 - '0');
 655                                 d = read();
 656                             } else
 657                                 d = c2;
 658                         } else
 659                           d = c2;
 660                     } else {
 661                         switch (c) {
 662                         case 'a':
 663                             c = 0x7;
 664                             break;
 665                         case 'b':
 666                             c = '\b';
 667                             break;
 668                         case 'f':
 669                             c = 0xC;
 670                             break;
 671                         case 'n':
 672                             c = '\n';
 673                             break;
 674                         case 'r':
 675                             c = '\r';
 676                             break;
 677                         case 't':
 678                             c = '\t';
 679                             break;
 680                         case 'v':
 681                             c = 0xB;
 682                             break;
 683                         }
 684                         d = read();
 685                     }
 686                 } else {
 687                     c = d;
 688                     d = read();
 689                 }
 690                 if (i >= buf.length) {
 691                     buf = Arrays.copyOf(buf, buf.length * 2);
 692                 }
 693                 buf[i++] = (char)c;
 694             }
 695 
 696             /* If we broke out of the loop because we found a matching quote
 697              * character then arrange to read a new character next time
 698              * around; otherwise, save the character.
 699              */
 700             peekc = (d == ttype) ? NEED_CHAR : d;
 701 
 702             sval = String.copyValueOf(buf, 0, i);
 703             return ttype;
 704         }
 705 
 706         if (c == '/' && (slashSlashCommentsP || slashStarCommentsP)) {
 707             c = read();
 708             if (c == '*' && slashStarCommentsP) {
 709                 int prevc = 0;
 710                 while ((c = read()) != '/' || prevc != '*') {
 711                     if (c == '\r') {
 712                         LINENO++;
 713                         c = read();
 714                         if (c == '\n') {
 715                             c = read();
 716                         }
 717                     } else {
 718                         if (c == '\n') {
 719                             LINENO++;
 720                             c = read();
 721                         }
 722                     }
 723                     if (c < 0)
 724                         return ttype = TT_EOF;
 725                     prevc = c;
 726                 }
 727                 return nextToken();
 728             } else if (c == '/' && slashSlashCommentsP) {
 729                 while ((c = read()) != '\n' && c != '\r' && c >= 0);
 730                 peekc = c;
 731                 return nextToken();
 732             } else {
 733                 /* Now see if it is still a single line comment */
 734                 if ((ct['/'] & CT_COMMENT) != 0) {
 735                     while ((c = read()) != '\n' && c != '\r' && c >= 0);
 736                     peekc = c;
 737                     return nextToken();
 738                 } else {
 739                     peekc = c;
 740                     return ttype = '/';
 741                 }
 742             }
 743         }
 744 
 745         if ((ctype & CT_COMMENT) != 0) {
 746             while ((c = read()) != '\n' && c != '\r' && c >= 0);
 747             peekc = c;
 748             return nextToken();
 749         }
 750 
 751         return ttype = c;
 752     }
 753 
 754     /**
 755      * Causes the next call to the {@code nextToken} method of this
 756      * tokenizer to return the current value in the {@code ttype}
 757      * field, and not to modify the value in the {@code nval} or
 758      * {@code sval} field.
 759      *
 760      * @see     java.io.StreamTokenizer#nextToken()
 761      * @see     java.io.StreamTokenizer#nval
 762      * @see     java.io.StreamTokenizer#sval
 763      * @see     java.io.StreamTokenizer#ttype
 764      */
 765     public void pushBack() {
 766         if (ttype != TT_NOTHING)   /* No-op if nextToken() not called */
 767             pushedBack = true;
 768     }
 769 
 770     /**
 771      * Return the current line number.
 772      *
 773      * @return  the current line number of this stream tokenizer.
 774      */
 775     public int lineno() {
 776         return LINENO;
 777     }
 778 
 779     /**
 780      * Returns the string representation of the current stream token and
 781      * the line number it occurs on.
 782      *
 783      * <p>The precise string returned is unspecified, although the following
 784      * example can be considered typical:
 785      *
 786      * <blockquote><pre>Token['a'], line 10</pre></blockquote>
 787      *
 788      * @return  a string representation of the token
 789      * @see     java.io.StreamTokenizer#nval
 790      * @see     java.io.StreamTokenizer#sval
 791      * @see     java.io.StreamTokenizer#ttype
 792      */
 793     public String toString() {
 794         String ret;
 795         switch (ttype) {
 796           case TT_EOF:
 797             ret = "EOF";
 798             break;
 799           case TT_EOL:
 800             ret = "EOL";
 801             break;
 802           case TT_WORD:
 803             ret = sval;
 804             break;
 805           case TT_NUMBER:
 806             ret = "n=" + nval;
 807             break;
 808           case TT_NOTHING:
 809             ret = "NOTHING";
 810             break;
 811           default: {
 812                 /*
 813                  * ttype is the first character of either a quoted string or
 814                  * is an ordinary character. ttype can definitely not be less
 815                  * than 0, since those are reserved values used in the previous
 816                  * case statements
 817                  */
 818                 if (ttype < 256 &&
 819                     ((ctype[ttype] & CT_QUOTE) != 0)) {
 820                     ret = sval;
 821                     break;
 822                 }
 823 
 824                 char s[] = new char[3];
 825                 s[0] = s[2] = '\'';
 826                 s[1] = (char) ttype;
 827                 ret = new String(s);
 828                 break;
 829             }
 830         }
 831         return "Token[" + ret + "], line " + LINENO;
 832     }
 833 
 834 }