New src/share/classes/sun/tools/java/Scanner.java

   1 /*
   2  * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.tools.java;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import java.util.Hashtable;
  31 
  32 /**
  33  * A Scanner for Java tokens. Errors are reported
  34  * to the environment object.<p>
  35  *
  36  * The scanner keeps track of the current token,
  37  * the value of the current token (if any), and the start
  38  * position of the current token.<p>
  39  *
  40  * The scan() method advances the scanner to the next
  41  * token in the input.<p>
  42  *
  43  * The match() method is used to quickly match opening
  44  * brackets (ie: '(', '{', or '[') with their closing
  45  * counter part. This is useful during error recovery.<p>
  46  *
  47  * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
  48  * this means that both the line number and the exact offset into
  49  * the file are encoded in each position value.<p>
  50  *
  51  * The compiler treats either "\n", "\r" or "\r\n" as the
  52  * end of a line.<p>
  53  *
  54  * WARNING: The contents of this source file are not part of any
  55  * supported API.  Code that depends on them does so at its own risk:
  56  * they are subject to change or removal without notice.
  57  *
  58  * @author      Arthur van Hoff
  59  */
  60 
  61 public
  62 class Scanner implements Constants {
  63     /**
  64      * The increment for each character.
  65      */
  66     public static final long OFFSETINC = 1;
  67 
  68     /**
  69      * The increment for each line.
  70      */
  71     public static final long LINEINC = 1L << WHEREOFFSETBITS;
  72 
  73     /**
  74      * End of input
  75      */
  76     public static final int EOF = -1;
  77 
  78     /**
  79      * Where errors are reported
  80      */
  81     public Environment env;
  82 
  83     /**
  84      * Input reader
  85      */
  86     protected ScannerInputReader in;
  87 
  88     /**
  89      * If true, present all comments as tokens.
  90      * Contents are not saved, but positions are recorded accurately,
  91      * so the comment can be recovered from the text.
  92      * Line terminations are also returned as comment tokens,
  93      * and may be distinguished by their start and end positions,
  94      * which are equal (meaning, these tokens contain no chars).
  95      */
  96    public boolean scanComments = false;
  97 
  98     /**
  99      * Current token
 100      */
 101     public int token;
 102 
 103     /**
 104      * The position of the current token
 105      */
 106     public long pos;
 107 
 108     /**
 109      * The position of the previous token
 110      */
 111     public long prevPos;
 112 
 113     /**
 114      * The current character
 115      */
 116     protected int ch;
 117 
 118     /*
 119      * Token values.
 120      */
 121     public char charValue;
 122     public int intValue;
 123     public long longValue;
 124     public float floatValue;
 125     public double doubleValue;
 126     public String stringValue;
 127     public Identifier idValue;
 128     public int radix;   // Radix, when reading int or long
 129 
 130     /*
 131      * A doc comment preceding the most recent token
 132      */
 133     public String docComment;
 134 
 135     /*
 136      * A growable character buffer.
 137      */
 138     private int count;
 139     private char buffer[] = new char[1024];
 140     private void growBuffer() {
 141         char newBuffer[] = new char[buffer.length * 2];
 142         System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
 143         buffer = newBuffer;
 144     }
 145 
 146     // The following two methods have been hand-inlined in
 147     // scanDocComment.  If you make changes here, you should
 148     // check to see if scanDocComment also needs modification.
 149     private void putc(int ch) {
 150         if (count == buffer.length) {
 151             growBuffer();
 152         }
 153         buffer[count++] = (char)ch;
 154     }
 155 
 156     private String bufferString() {
 157         return new String(buffer, 0, count);
 158     }
 159 
 160     /**
 161      * Create a scanner to scan an input stream.
 162      */
 163     public Scanner(Environment env, InputStream in) throws IOException {
 164         this.env = env;
 165         useInputStream(in);
 166     }
 167 
 168     /**
 169      * Setup input from the given input stream,
 170      * and scan the first token from it.
 171      */
 172     protected void useInputStream(InputStream in) throws IOException {
 173         try {
 174             this.in = new ScannerInputReader(env, in);
 175         } catch (Exception e) {
 176             env.setCharacterEncoding(null);
 177             this.in = new ScannerInputReader(env, in);
 178         }
 179 
 180         ch = this.in.read();
 181         prevPos = this.in.pos;
 182 
 183         scan();
 184     }
 185 
 186     /**
 187      * Create a scanner to scan an input stream.
 188      */
 189     protected Scanner(Environment env) {
 190         this.env = env;
 191         // Expect the subclass to call useInputStream at the right time.
 192     }
 193 
 194     /**
 195      * Define a keyword.
 196      */
 197     private static void defineKeyword(int val) {
 198         Identifier.lookup(opNames[val]).setType(val);
 199     }
 200 
 201     /**
 202      * Initialized keyword and token Hashtables
 203      */
 204     static {
 205         // Statement keywords
 206         defineKeyword(FOR);
 207         defineKeyword(IF);
 208         defineKeyword(ELSE);
 209         defineKeyword(WHILE);
 210         defineKeyword(DO);
 211         defineKeyword(SWITCH);
 212         defineKeyword(CASE);
 213         defineKeyword(DEFAULT);
 214         defineKeyword(BREAK);
 215         defineKeyword(CONTINUE);
 216         defineKeyword(RETURN);
 217         defineKeyword(TRY);
 218         defineKeyword(CATCH);
 219         defineKeyword(FINALLY);
 220         defineKeyword(THROW);
 221 
 222         // Type defineKeywords
 223         defineKeyword(BYTE);
 224         defineKeyword(CHAR);
 225         defineKeyword(SHORT);
 226         defineKeyword(INT);
 227         defineKeyword(LONG);
 228         defineKeyword(FLOAT);
 229         defineKeyword(DOUBLE);
 230         defineKeyword(VOID);
 231         defineKeyword(BOOLEAN);
 232 
 233         // Expression keywords
 234         defineKeyword(INSTANCEOF);
 235         defineKeyword(TRUE);
 236         defineKeyword(FALSE);
 237         defineKeyword(NEW);
 238         defineKeyword(THIS);
 239         defineKeyword(SUPER);
 240         defineKeyword(NULL);
 241 
 242         // Declaration keywords
 243         defineKeyword(IMPORT);
 244         defineKeyword(CLASS);
 245         defineKeyword(EXTENDS);
 246         defineKeyword(IMPLEMENTS);
 247         defineKeyword(INTERFACE);
 248         defineKeyword(PACKAGE);
 249         defineKeyword(THROWS);
 250 
 251         // Modifier keywords
 252         defineKeyword(PRIVATE);
 253         defineKeyword(PUBLIC);
 254         defineKeyword(PROTECTED);
 255         defineKeyword(STATIC);
 256         defineKeyword(TRANSIENT);
 257         defineKeyword(SYNCHRONIZED);
 258         defineKeyword(NATIVE);
 259         defineKeyword(ABSTRACT);
 260         defineKeyword(VOLATILE);
 261         defineKeyword(FINAL);
 262         defineKeyword(STRICTFP);
 263 
 264         // reserved keywords
 265         defineKeyword(CONST);
 266         defineKeyword(GOTO);
 267     }
 268 
 269     /**
 270      * Scan a comment. This method should be
 271      * called once the initial /, * and the next
 272      * character have been read.
 273      */
 274     private void skipComment() throws IOException {
 275         while (true) {
 276             switch (ch) {
 277               case EOF:
 278                 env.error(pos, "eof.in.comment");
 279                 return;
 280 
 281               case '*':
 282                 if ((ch = in.read()) == '/')  {
 283                     ch = in.read();
 284                     return;
 285                 }
 286                 break;
 287 
 288               default:
 289                 ch = in.read();
 290                 break;
 291             }
 292         }
 293     }
 294 
 295     /**
 296      * Scan a doc comment. This method should be called
 297      * once the initial /, * and * have been read. It gathers
 298      * the content of the comment (witout leading spaces and '*'s)
 299      * in the string buffer.
 300      */
 301     private String scanDocComment() throws IOException {
 302         // Note: this method has been hand-optimized to yield
 303         // better performance.  This was done after it was noted
 304         // that javadoc spent a great deal of its time here.
 305         // This should also help the performance of the compiler
 306         // as well -- it scans the doc comments to find
 307         // @deprecated tags.
 308         //
 309         // The logic of the method has been completely rewritten
 310         // to avoid the use of flags that need to be looked at
 311         // for every character read.  Members that are accessed
 312         // more than once have been stored in local variables.
 313         // The methods putc() and bufferString() have been
 314         // inlined by hand.  Extra cases have been added to
 315         // switch statements to trick the compiler into generating
 316         // a tableswitch instead of a lookupswitch.
 317         //
 318         // This implementation aims to preserve the previous
 319         // behavior of this method.
 320 
 321         int c;
 322 
 323         // Put `in' in a local variable.
 324         final ScannerInputReader in = this.in;
 325 
 326         // We maintain the buffer locally rather than calling putc().
 327         char[] buffer = this.buffer;
 328         int count = 0;
 329 
 330         // We are called pointing at the second star of the doc
 331         // comment:
 332         //
 333         // Input: /** the rest of the comment ... */
 334         //          ^
 335         //
 336         // We rely on this in the code below.
 337 
 338         // Consume any number of stars.
 339         while ((c = in.read()) == '*')
 340             ;
 341 
 342         // Is the comment of the form /**/, /***/, /****/, etc.?
 343         if (c == '/') {
 344             // Set ch and return
 345             ch = in.read();
 346             return "";
 347         }
 348 
 349         // Skip a newline on the first line of the comment.
 350         if (c == '\n') {
 351             c = in.read();
 352         }
 353 
 354     outerLoop:
 355         // The outerLoop processes the doc comment, looping once
 356         // for each line.  For each line, it first strips off
 357         // whitespace, then it consumes any stars, then it
 358         // puts the rest of the line into our buffer.
 359         while (true) {
 360 
 361             // The wsLoop consumes whitespace from the beginning
 362             // of each line.
 363         wsLoop:
 364             while (true) {
 365                 switch (c) {
 366                 case ' ':
 367                 case '\t':
 368                     // We could check for other forms of whitespace
 369                     // as well, but this is left as is for minimum
 370                     // disturbance of functionality.
 371                     //
 372                     // Just skip whitespace.
 373                     c = in.read();
 374                     break;
 375 
 376                 // We have added extra cases here to trick the
 377                 // compiler into using a tableswitch instead of
 378                 // a lookupswitch.  They can be removed without
 379                 // a change in meaning.
 380                 case 10: case 11: case 12: case 13: case 14: case 15:
 381                 case 16: case 17: case 18: case 19: case 20: case 21:
 382                 case 22: case 23: case 24: case 25: case 26: case 27:
 383                 case 28: case 29: case 30: case 31:
 384                 default:
 385                     // We've seen something that isn't whitespace,
 386                     // jump out.
 387                     break wsLoop;
 388                 }
 389             } // end wsLoop.
 390 
 391             // Are there stars here?  If so, consume them all
 392             // and check for the end of comment.
 393             if (c == '*') {
 394                 // Skip all of the stars...
 395                 do {
 396                     c = in.read();
 397                 } while (c == '*');
 398 
 399                 // ...then check for the closing slash.
 400                 if (c == '/') {
 401                     // We're done with the doc comment.
 402                     // Set ch and break out.
 403                     ch = in.read();
 404                     break outerLoop;
 405                 }
 406             }
 407 
 408             // The textLoop processes the rest of the characters
 409             // on the line, adding them to our buffer.
 410         textLoop:
 411             while (true) {
 412                 switch (c) {
 413                 case EOF:
 414                     // We've seen a premature EOF.  Break out
 415                     // of the loop.
 416                     env.error(pos, "eof.in.comment");
 417                     ch = EOF;
 418                     break outerLoop;
 419 
 420                 case '*':
 421                     // Is this just a star?  Or is this the
 422                     // end of a comment?
 423                     c = in.read();
 424                     if (c == '/') {
 425                         // This is the end of the comment,
 426                         // set ch and return our buffer.
 427                         ch = in.read();
 428                         break outerLoop;
 429                     }
 430                     // This is just an ordinary star.  Add it to
 431                     // the buffer.
 432                     if (count == buffer.length) {
 433                         growBuffer();
 434                         buffer = this.buffer;
 435                     }
 436                     buffer[count++] = '*';
 437                     break;
 438 
 439                 case '\n':
 440                     // We've seen a newline.  Add it to our
 441                     // buffer and break out of this loop,
 442                     // starting fresh on a new line.
 443                     if (count == buffer.length) {
 444                         growBuffer();
 445                         buffer = this.buffer;
 446                     }
 447                     buffer[count++] = '\n';
 448                     c = in.read();
 449                     break textLoop;
 450 
 451                 // Again, the extra cases here are a trick
 452                 // to get the compiler to generate a tableswitch.
 453                 case 0: case 1: case 2: case 3: case 4: case 5:
 454                 case 6: case 7: case 8: case 11: case 12: case 13:
 455                 case 14: case 15: case 16: case 17: case 18: case 19:
 456                 case 20: case 21: case 22: case 23: case 24: case 25:
 457                 case 26: case 27: case 28: case 29: case 30: case 31:
 458                 case 32: case 33: case 34: case 35: case 36: case 37:
 459                 case 38: case 39: case 40:
 460                 default:
 461                     // Add the character to our buffer.
 462                     if (count == buffer.length) {
 463                         growBuffer();
 464                         buffer = this.buffer;
 465                     }
 466                     buffer[count++] = (char)c;
 467                     c = in.read();
 468                     break;
 469                 }
 470             } // end textLoop
 471         } // end outerLoop
 472 
 473         // We have scanned our doc comment.  It is stored in
 474         // buffer.  The previous implementation of scanDocComment
 475         // stripped off all trailing spaces and stars from the comment.
 476         // We will do this as well, so as to cause a minimum of
 477         // disturbance.  Is this what we want?
 478         if (count > 0) {
 479             int i = count - 1;
 480         trailLoop:
 481             while (i > -1) {
 482                 switch (buffer[i]) {
 483                 case ' ':
 484                 case '\t':
 485                 case '*':
 486                     i--;
 487                     break;
 488                 // And again, the extra cases here are a trick
 489                 // to get the compiler to generate a tableswitch.
 490                 case 0: case 1: case 2: case 3: case 4: case 5:
 491                 case 6: case 7: case 8: case 10: case 11: case 12:
 492                 case 13: case 14: case 15: case 16: case 17: case 18:
 493                 case 19: case 20: case 21: case 22: case 23: case 24:
 494                 case 25: case 26: case 27: case 28: case 29: case 30:
 495                 case 31: case 33: case 34: case 35: case 36: case 37:
 496                 case 38: case 39: case 40:
 497                 default:
 498                     break trailLoop;
 499                 }
 500             }
 501             count = i + 1;
 502 
 503             // Return the text of the doc comment.
 504             return new String(buffer, 0, count);
 505         } else {
 506             return "";
 507         }
 508     }
 509 
 510     /**
 511      * Scan a number. The first digit of the number should be the current
 512      * character.  We may be scanning hex, decimal, or octal at this point
 513      */
 514     @SuppressWarnings("fallthrough")
 515     private void scanNumber() throws IOException {
 516         boolean seenNonOctal = false;
 517         boolean overflow = false;
 518         boolean seenDigit = false; // used to detect invalid hex number 0xL
 519         radix = (ch == '0' ? 8 : 10);
 520         long value = ch - '0';
 521         count = 0;
 522         putc(ch);               // save character in buffer
 523     numberLoop:
 524         for (;;) {
 525             switch (ch = in.read()) {
 526               case '.':
 527                 if (radix == 16)
 528                     break numberLoop; // an illegal character
 529                 scanReal();
 530                 return;
 531 
 532               case '8': case '9':
 533                 // We can't yet throw an error if reading an octal.  We might
 534                 // discover we're really reading a real.
 535                 seenNonOctal = true;
 536                 // Fall through
 537               case '0': case '1': case '2': case '3':
 538               case '4': case '5': case '6': case '7':
 539                 seenDigit = true;
 540                 putc(ch);
 541                 if (radix == 10) {
 542                     overflow = overflow || (value * 10)/10 != value;
 543                     value = (value * 10) + (ch - '0');
 544                     overflow = overflow || (value - 1 < -1);
 545                 } else if (radix == 8) {
 546                     overflow = overflow || (value >>> 61) != 0;
 547                     value = (value << 3) + (ch - '0');
 548                 } else {
 549                     overflow = overflow || (value >>> 60) != 0;
 550                     value = (value << 4) + (ch - '0');
 551                 }
 552                 break;
 553 
 554               case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
 555                 if (radix != 16) {
 556                     scanReal();
 557                     return;
 558                 }
 559                 // fall through
 560               case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
 561                 seenDigit = true;
 562                 putc(ch);
 563                 if (radix != 16)
 564                     break numberLoop; // an illegal character
 565                 overflow = overflow || (value >>> 60) != 0;
 566                 value = (value << 4) + 10 +
 567                          Character.toLowerCase((char)ch) - 'a';
 568                 break;
 569 
 570               case 'l': case 'L':
 571                 ch = in.read(); // skip over 'l'
 572                 longValue = value;
 573                 token = LONGVAL;
 574                 break numberLoop;
 575 
 576               case 'x': case 'X':
 577                 // if the first character is a '0' and this is the second
 578                 // letter, then read in a hexadecimal number.  Otherwise, error.
 579                 if (count == 1 && radix == 8) {
 580                     radix = 16;
 581                     seenDigit = false;
 582                     break;
 583                 } else {
 584                     // we'll get an illegal character error
 585                     break numberLoop;
 586                 }
 587 
 588               default:
 589                 intValue = (int)value;
 590                 token = INTVAL;
 591                 break numberLoop;
 592             }
 593         } // while true
 594 
 595         // We have just finished reading the number.  The next thing better
 596         // not be a letter or digit.
 597         // Note:  There will be deprecation warnings against these uses
 598         // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
 599         // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
 600         if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
 601             env.error(in.pos, "invalid.number");
 602             do { ch = in.read(); }
 603             while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
 604             intValue = 0;
 605             token = INTVAL;
 606         } else if (radix == 8 && seenNonOctal) {
 607             // A bogus octal literal.
 608             intValue = 0;
 609             token = INTVAL;
 610             env.error(pos, "invalid.octal.number");
 611         } else if (radix == 16 && seenDigit == false) {
 612             // A hex literal with no digits, 0xL, for example.
 613             intValue = 0;
 614             token = INTVAL;
 615             env.error(pos, "invalid.hex.number");
 616         } else {
 617             if (token == INTVAL) {
 618                 // Check for overflow.  Note that base 10 literals
 619                 // have different rules than base 8 and 16.
 620                 overflow = overflow ||
 621                     (value & 0xFFFFFFFF00000000L) != 0 ||
 622                     (radix == 10 && value > 2147483648L);
 623 
 624                 if (overflow) {
 625                     intValue = 0;
 626 
 627                     // Give a specific error message which tells
 628                     // the user the range.
 629                     switch (radix) {
 630                     case 8:
 631                         env.error(pos, "overflow.int.oct");
 632                         break;
 633                     case 10:
 634                         env.error(pos, "overflow.int.dec");
 635                         break;
 636                     case 16:
 637                         env.error(pos, "overflow.int.hex");
 638                         break;
 639                     default:
 640                         throw new CompilerError("invalid radix");
 641                     }
 642                 }
 643             } else {
 644                 if (overflow) {
 645                     longValue = 0;
 646 
 647                     // Give a specific error message which tells
 648                     // the user the range.
 649                     switch (radix) {
 650                     case 8:
 651                         env.error(pos, "overflow.long.oct");
 652                         break;
 653                     case 10:
 654                         env.error(pos, "overflow.long.dec");
 655                         break;
 656                     case 16:
 657                         env.error(pos, "overflow.long.hex");
 658                         break;
 659                     default:
 660                         throw new CompilerError("invalid radix");
 661                     }
 662                 }
 663             }
 664         }
 665     }
 666 
 667     /**
 668      * Scan a float.  We are either looking at the decimal, or we have already
 669      * seen it and put it into the buffer.  We haven't seen an exponent.
 670      * Scan a float.  Should be called with the current character is either
 671      * the 'e', 'E' or '.'
 672      */
 673     @SuppressWarnings("fallthrough")
 674     private void scanReal() throws IOException {
 675         boolean seenExponent = false;
 676         boolean isSingleFloat = false;
 677         char lastChar;
 678         if (ch == '.') {
 679             putc(ch);
 680             ch = in.read();
 681         }
 682 
 683     numberLoop:
 684         for ( ; ; ch = in.read()) {
 685             switch (ch) {
 686                 case '0': case '1': case '2': case '3': case '4':
 687                 case '5': case '6': case '7': case '8': case '9':
 688                     putc(ch);
 689                     break;
 690 
 691                 case 'e': case 'E':
 692                     if (seenExponent)
 693                         break numberLoop; // we'll get a format error
 694                     putc(ch);
 695                     seenExponent = true;
 696                     break;
 697 
 698                 case '+': case '-':
 699                     lastChar = buffer[count - 1];
 700                     if (lastChar != 'e' && lastChar != 'E')
 701                         break numberLoop; // this isn't an error, though!
 702                     putc(ch);
 703                     break;
 704 
 705                 case 'f': case 'F':
 706                     ch = in.read(); // skip over 'f'
 707                     isSingleFloat = true;
 708                     break numberLoop;
 709 
 710                 case 'd': case 'D':
 711                     ch = in.read(); // skip over 'd'
 712                     // fall through
 713                 default:
 714                     break numberLoop;
 715             } // sswitch
 716         } // loop
 717 
 718         // we have just finished reading the number.  The next thing better
 719         // not be a letter or digit.
 720         if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
 721             env.error(in.pos, "invalid.number");
 722             do { ch = in.read(); }
 723             while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
 724             doubleValue = 0;
 725             token = DOUBLEVAL;
 726         } else {
 727             token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
 728             try {
 729                 lastChar = buffer[count - 1];
 730                 if (lastChar == 'e' || lastChar == 'E'
 731                        || lastChar == '+' || lastChar == '-') {
 732                     env.error(in.pos -1, "float.format");
 733                 } else if (isSingleFloat) {
 734                     String string = bufferString();
 735                     floatValue = Float.valueOf(string).floatValue();
 736                     if (Float.isInfinite(floatValue)) {
 737                         env.error(pos, "overflow.float");
 738                     } else if (floatValue == 0 && !looksLikeZero(string)) {
 739                         env.error(pos, "underflow.float");
 740                     }
 741                 } else {
 742                     String string = bufferString();
 743                     doubleValue = Double.valueOf(string).doubleValue();
 744                     if (Double.isInfinite(doubleValue)) {
 745                         env.error(pos, "overflow.double");
 746                     } else if (doubleValue == 0 && !looksLikeZero(string)) {
 747                         env.error(pos, "underflow.double");
 748                     }
 749                 }
 750             } catch (NumberFormatException ee) {
 751                 env.error(pos, "float.format");
 752                 doubleValue = 0;
 753                 floatValue = 0;
 754             }
 755         }
 756         return;
 757     }
 758 
 759     // We have a token that parses as a number.  Is this token possibly zero?
 760     // i.e. does it have a non-zero value in the mantissa?
 761     private static boolean looksLikeZero(String token) {
 762         int length = token.length();
 763         for (int i = 0; i < length; i++) {
 764             switch (token.charAt(i)) {
 765                 case 0: case '.':
 766                     continue;
 767                 case '1': case '2': case '3': case '4': case '5':
 768                 case '6': case '7': case '8': case '9':
 769                     return false;
 770                 case 'e': case 'E': case 'f': case 'F':
 771                     return true;
 772             }
 773         }
 774         return true;
 775     }
 776 
 777     /**
 778      * Scan an escape character.
 779      * @return the character or -1 if it escaped an
 780      * end-of-line.
 781      */
 782     private int scanEscapeChar() throws IOException {
 783         long p = in.pos;
 784 
 785         switch (ch = in.read()) {
 786           case '0': case '1': case '2': case '3':
 787           case '4': case '5': case '6': case '7': {
 788             int n = ch - '0';
 789             for (int i = 2 ; i > 0 ; i--) {
 790                 switch (ch = in.read()) {
 791                   case '0': case '1': case '2': case '3':
 792                   case '4': case '5': case '6': case '7':
 793                     n = (n << 3) + ch - '0';
 794                     break;
 795 
 796                   default:
 797                     if (n > 0xFF) {
 798                         env.error(p, "invalid.escape.char");
 799                     }
 800                     return n;
 801                 }
 802             }
 803             ch = in.read();
 804             if (n > 0xFF) {
 805                 env.error(p, "invalid.escape.char");
 806             }
 807             return n;
 808           }
 809 
 810           case 'r':  ch = in.read(); return '\r';
 811           case 'n':  ch = in.read(); return '\n';
 812           case 'f':  ch = in.read(); return '\f';
 813           case 'b':  ch = in.read(); return '\b';
 814           case 't':  ch = in.read(); return '\t';
 815           case '\\': ch = in.read(); return '\\';
 816           case '\"': ch = in.read(); return '\"';
 817           case '\'': ch = in.read(); return '\'';
 818         }
 819 
 820         env.error(p, "invalid.escape.char");
 821         ch = in.read();
 822         return -1;
 823     }
 824 
 825     /**
 826      * Scan a string. The current character
 827      * should be the opening " of the string.
 828      */
 829     private void scanString() throws IOException {
 830         token = STRINGVAL;
 831         count = 0;
 832         ch = in.read();
 833 
 834         // Scan a String
 835         while (true) {
 836             switch (ch) {
 837               case EOF:
 838                 env.error(pos, "eof.in.string");
 839                 stringValue = bufferString();
 840                 return;
 841 
 842               case '\r':
 843               case '\n':
 844                 ch = in.read();
 845                 env.error(pos, "newline.in.string");
 846                 stringValue = bufferString();
 847                 return;
 848 
 849               case '"':
 850                 ch = in.read();
 851                 stringValue = bufferString();
 852                 return;
 853 
 854               case '\\': {
 855                 int c = scanEscapeChar();
 856                 if (c >= 0) {
 857                     putc((char)c);
 858                 }
 859                 break;
 860               }
 861 
 862               default:
 863                 putc(ch);
 864                 ch = in.read();
 865                 break;
 866             }
 867         }
 868     }
 869 
 870     /**
 871      * Scan a character. The current character should be
 872      * the opening ' of the character constant.
 873      */
 874     private void scanCharacter() throws IOException {
 875         token = CHARVAL;
 876 
 877         switch (ch = in.read()) {
 878           case '\\':
 879             int c = scanEscapeChar();
 880             charValue = (char)((c >= 0) ? c : 0);
 881             break;
 882 
 883         case '\'':
 884             // There are two standard problems this case deals with.  One
 885             // is the malformed single quote constant (i.e. the programmer
 886             // uses ''' instead of '\'') and the other is the empty
 887             // character constant (i.e. '').  Just consume any number of
 888             // single quotes and emit an error message.
 889             charValue = 0;
 890             env.error(pos, "invalid.char.constant");
 891             ch = in.read();
 892             while (ch == '\'') {
 893                 ch = in.read();
 894             }
 895             return;
 896 
 897           case '\r':
 898           case '\n':
 899             charValue = 0;
 900             env.error(pos, "invalid.char.constant");
 901             return;
 902 
 903           default:
 904             charValue = (char)ch;
 905             ch = in.read();
 906             break;
 907         }
 908 
 909         if (ch == '\'') {
 910             ch = in.read();
 911         } else {
 912             env.error(pos, "invalid.char.constant");
 913             while (true) {
 914                 switch (ch) {
 915                   case '\'':
 916                     ch = in.read();
 917                     return;
 918                   case ';':
 919                   case '\n':
 920                   case EOF:
 921                     return;
 922                   default:
 923                     ch = in.read();
 924                 }
 925             }
 926         }
 927     }
 928 
 929     /**
 930      * Scan an Identifier. The current character should
 931      * be the first character of the identifier.
 932      */
 933     private void scanIdentifier() throws IOException {
 934         count = 0;
 935 
 936         while (true) {
 937             putc(ch);
 938             switch (ch = in.read()) {
 939               case 'a': case 'b': case 'c': case 'd': case 'e':
 940               case 'f': case 'g': case 'h': case 'i': case 'j':
 941               case 'k': case 'l': case 'm': case 'n': case 'o':
 942               case 'p': case 'q': case 'r': case 's': case 't':
 943               case 'u': case 'v': case 'w': case 'x': case 'y':
 944               case 'z':
 945               case 'A': case 'B': case 'C': case 'D': case 'E':
 946               case 'F': case 'G': case 'H': case 'I': case 'J':
 947               case 'K': case 'L': case 'M': case 'N': case 'O':
 948               case 'P': case 'Q': case 'R': case 'S': case 'T':
 949               case 'U': case 'V': case 'W': case 'X': case 'Y':
 950               case 'Z':
 951               case '0': case '1': case '2': case '3': case '4':
 952               case '5': case '6': case '7': case '8': case '9':
 953               case '$': case '_':
 954                 break;
 955 
 956               default:
 957                 if (!Character.isJavaLetterOrDigit((char)ch)) {
 958                     idValue = Identifier.lookup(bufferString());
 959                     token = idValue.getType();
 960                     return;
 961                 }
 962             }
 963         }
 964     }
 965 
 966     /**
 967      * The ending position of the current token
 968      */
 969     // Note: This should be part of the pos itself.
 970     public long getEndPos() {
 971         return in.pos;
 972     }
 973 
 974     /**
 975      * If the current token is IDENT, return the identifier occurrence.
 976      * It will be freshly allocated.
 977      */
 978     public IdentifierToken getIdToken() {
 979         return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
 980     }
 981 
 982     /**
 983      * Scan the next token.
 984      * @return the position of the previous token.
 985      */
 986    public long scan() throws IOException {
 987        return xscan();
 988    }
 989 
 990     @SuppressWarnings("fallthrough")
 991     protected long xscan() throws IOException {
 992         final ScannerInputReader in = this.in;
 993         long retPos = pos;
 994         prevPos = in.pos;
 995         docComment = null;
 996         while (true) {
 997             pos = in.pos;
 998 
 999             switch (ch) {
1000               case EOF:
1001                 token = EOF;
1002                 return retPos;
1003 
1004               case '\n':
1005                 if (scanComments) {
1006                     ch = ' ';
1007                     // Avoid this path the next time around.
1008                     // Do not just call in.read; we want to present
1009                     // a null token (and also avoid read-ahead).
1010                     token = COMMENT;
1011                     return retPos;
1012                 }
1013                 // Fall through 
1014               case ' ':
1015               case '\t':
1016               case '\f':
1017                 ch = in.read();
1018                 break;
1019 
1020               case '/':
1021                 switch (ch = in.read()) {
1022                   case '/':
1023                     // Parse a // comment
1024                     while (((ch = in.read()) != EOF) && (ch != '\n'));
1025                     if (scanComments) {
1026                         token = COMMENT;
1027                         return retPos;
1028                     }
1029                     break;
1030 
1031                   case '*':
1032                     ch = in.read();
1033                     if (ch == '*') {
1034                         docComment = scanDocComment();
1035                     } else {
1036                         skipComment();
1037                     }
1038                     if (scanComments) {
1039                         return retPos;
1040                     }
1041                     break;
1042 
1043                   case '=':
1044                     ch = in.read();
1045                     token = ASGDIV;
1046                     return retPos;
1047 
1048                   default:
1049                     token = DIV;
1050                     return retPos;
1051                 }
1052                 break;
1053 
1054               case '"':
1055                 scanString();
1056                 return retPos;
1057 
1058               case '\'':
1059                 scanCharacter();
1060                 return retPos;
1061 
1062               case '0': case '1': case '2': case '3': case '4':
1063               case '5': case '6': case '7': case '8': case '9':
1064                 scanNumber();
1065                 return retPos;
1066 
1067               case '.':
1068                 switch (ch = in.read()) {
1069                   case '0': case '1': case '2': case '3': case '4':
1070                   case '5': case '6': case '7': case '8': case '9':
1071                     count = 0;
1072                     putc('.');
1073                     scanReal();
1074                     break;
1075                   default:
1076                     token = FIELD;
1077                 }
1078                 return retPos;
1079 
1080               case '{':
1081                 ch = in.read();
1082                 token = LBRACE;
1083                 return retPos;
1084 
1085               case '}':
1086                 ch = in.read();
1087                 token = RBRACE;
1088                 return retPos;
1089 
1090               case '(':
1091                 ch = in.read();
1092                 token = LPAREN;
1093                 return retPos;
1094 
1095               case ')':
1096                 ch = in.read();
1097                 token = RPAREN;
1098                 return retPos;
1099 
1100               case '[':
1101                 ch = in.read();
1102                 token = LSQBRACKET;
1103                 return retPos;
1104 
1105               case ']':
1106                 ch = in.read();
1107                 token = RSQBRACKET;
1108                 return retPos;
1109 
1110               case ',':
1111                 ch = in.read();
1112                 token = COMMA;
1113                 return retPos;
1114 
1115               case ';':
1116                 ch = in.read();
1117                 token = SEMICOLON;
1118                 return retPos;
1119 
1120               case '?':
1121                 ch = in.read();
1122                 token = QUESTIONMARK;
1123                 return retPos;
1124 
1125               case '~':
1126                 ch = in.read();
1127                 token = BITNOT;
1128                 return retPos;
1129 
1130               case ':':
1131                 ch = in.read();
1132                 token = COLON;
1133                 return retPos;
1134 
1135               case '-':
1136                 switch (ch = in.read()) {
1137                   case '-':
1138                     ch = in.read();
1139                     token = DEC;
1140                     return retPos;
1141 
1142                   case '=':
1143                     ch = in.read();
1144                     token = ASGSUB;
1145                     return retPos;
1146                 }
1147                 token = SUB;
1148                 return retPos;
1149 
1150               case '+':
1151                 switch (ch = in.read()) {
1152                   case '+':
1153                     ch = in.read();
1154                     token = INC;
1155                     return retPos;
1156 
1157                   case '=':
1158                     ch = in.read();
1159                     token = ASGADD;
1160                     return retPos;
1161                 }
1162                 token = ADD;
1163                 return retPos;
1164 
1165               case '<':
1166                 switch (ch = in.read()) {
1167                   case '<':
1168                     if ((ch = in.read()) == '=') {
1169                         ch = in.read();
1170                         token = ASGLSHIFT;
1171                         return retPos;
1172                     }
1173                     token = LSHIFT;
1174                     return retPos;
1175 
1176                   case '=':
1177                     ch = in.read();
1178                     token = LE;
1179                     return retPos;
1180                 }
1181                 token = LT;
1182                 return retPos;
1183 
1184               case '>':
1185                 switch (ch = in.read()) {
1186                   case '>':
1187                     switch (ch = in.read()) {
1188                       case '=':
1189                         ch = in.read();
1190                         token = ASGRSHIFT;
1191                         return retPos;
1192 
1193                       case '>':
1194                         if ((ch = in.read()) == '=') {
1195                             ch = in.read();
1196                             token = ASGURSHIFT;
1197                             return retPos;
1198                         }
1199                         token = URSHIFT;
1200                         return retPos;
1201                     }
1202                     token = RSHIFT;
1203                     return retPos;
1204 
1205                   case '=':
1206                     ch = in.read();
1207                     token = GE;
1208                     return retPos;
1209                 }
1210                 token = GT;
1211                 return retPos;
1212 
1213               case '|':
1214                 switch (ch = in.read()) {
1215                   case '|':
1216                     ch = in.read();
1217                     token = OR;
1218                     return retPos;
1219 
1220                   case '=':
1221                     ch = in.read();
1222                     token = ASGBITOR;
1223                     return retPos;
1224                 }
1225                 token = BITOR;
1226                 return retPos;
1227 
1228               case '&':
1229                 switch (ch = in.read()) {
1230                   case '&':
1231                     ch = in.read();
1232                     token = AND;
1233                     return retPos;
1234 
1235                   case '=':
1236                     ch = in.read();
1237                     token = ASGBITAND;
1238                     return retPos;
1239                 }
1240                 token = BITAND;
1241                 return retPos;
1242 
1243               case '=':
1244                 if ((ch = in.read()) == '=') {
1245                     ch = in.read();
1246                     token = EQ;
1247                     return retPos;
1248                 }
1249                 token = ASSIGN;
1250                 return retPos;
1251 
1252               case '%':
1253                 if ((ch = in.read()) == '=') {
1254                     ch = in.read();
1255                     token = ASGREM;
1256                     return retPos;
1257                 }
1258                 token = REM;
1259                 return retPos;
1260 
1261               case '^':
1262                 if ((ch = in.read()) == '=') {
1263                     ch = in.read();
1264                     token = ASGBITXOR;
1265                     return retPos;
1266                 }
1267                 token = BITXOR;
1268                 return retPos;
1269 
1270               case '!':
1271                 if ((ch = in.read()) == '=') {
1272                     ch = in.read();
1273                     token = NE;
1274                     return retPos;
1275                 }
1276                 token = NOT;
1277                 return retPos;
1278 
1279               case '*':
1280                 if ((ch = in.read()) == '=') {
1281                     ch = in.read();
1282                     token = ASGMUL;
1283                     return retPos;
1284                 }
1285                 token = MUL;
1286                 return retPos;
1287 
1288               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1289               case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1290               case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1291               case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1292               case 'y': case 'z':
1293               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1294               case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1295               case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1296               case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1297               case 'Y': case 'Z':
1298               case '$': case '_':
1299                 scanIdentifier();
1300                 return retPos;
1301 
1302               case '\u001a':
1303                 // Our one concession to DOS.
1304                 if ((ch = in.read()) == EOF) {
1305                     token = EOF;
1306                     return retPos;
1307                 }
1308                 env.error(pos, "funny.char");
1309                 ch = in.read();
1310                 break;
1311 
1312 
1313               default:
1314                 if (Character.isJavaLetter((char)ch)) {
1315                     scanIdentifier();
1316                     return retPos;
1317                 }
1318                 env.error(pos, "funny.char");
1319                 ch = in.read();
1320                 break;
1321             }
1322         }
1323     }
1324 
1325     /**
1326      * Scan to a matching '}', ']' or ')'. The current token must be
1327      * a '{', '[' or '(';
1328      */
1329     public void match(int open, int close) throws IOException {
1330         int depth = 1;
1331 
1332         while (true) {
1333             scan();
1334             if (token == open) {
1335                 depth++;
1336             } else if (token == close) {
1337                 if (--depth == 0) {
1338                     return;
1339                 }
1340             } else if (token == EOF) {
1341                 env.error(pos, "unbalanced.paren");
1342                 return;
1343             }
1344         }
1345     }
1346 }