1 /*
   2  * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.tools.java;
  27 
  28 import java.io.IOException;
  29 import java.io.InputStream;
  30 import java.util.Hashtable;
  31 
  32 /**
  33  * A Scanner for Java tokens. Errors are reported
  34  * to the environment object.<p>
  35  *
  36  * The scanner keeps track of the current token,
  37  * the value of the current token (if any), and the start
  38  * position of the current token.<p>
  39  *
  40  * The scan() method advances the scanner to the next
  41  * token in the input.<p>
  42  *
  43  * The match() method is used to quickly match opening
  44  * brackets (ie: '(', '{', or '[') with their closing
  45  * counter part. This is useful during error recovery.<p>
  46  *
  47  * An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
  48  * this means that both the line number and the exact offset into
  49  * the file are encoded in each position value.<p>
  50  *
  51  * The compiler treats either "\n", "\r" or "\r\n" as the
  52  * end of a line.<p>
  53  *
  54  * WARNING: The contents of this source file are not part of any
  55  * supported API.  Code that depends on them does so at its own risk:
  56  * they are subject to change or removal without notice.
  57  *
  58  * @author      Arthur van Hoff
  59  */
  60 
  61 public
  62 class Scanner implements Constants {
  63     /**
  64      * The increment for each character.
  65      */
  66     public static final long OFFSETINC = 1;
  67 
  68     /**
  69      * The increment for each line.
  70      */
  71     public static final long LINEINC = 1L << WHEREOFFSETBITS;
  72 
  73     /**
  74      * End of input
  75      */
  76     public static final int EOF = -1;
  77 
  78     /**
  79      * Where errors are reported
  80      */
  81     public Environment env;
  82 
  83     /**
  84      * Input reader
  85      */
  86     protected ScannerInputReader in;
  87 
  88     /**
  89      * If true, present all comments as tokens.
  90      * Contents are not saved, but positions are recorded accurately,
  91      * so the comment can be recovered from the text.
  92      * Line terminations are also returned as comment tokens,
  93      * and may be distinguished by their start and end positions,
  94      * which are equal (meaning, these tokens contain no chars).
  95      */
  96    public boolean scanComments = false;
  97 
  98     /**
  99      * Current token
 100      */
 101     public int token;
 102 
 103     /**
 104      * The position of the current token
 105      */
 106     public long pos;
 107 
 108     /**
 109      * The position of the previous token
 110      */
 111     public long prevPos;
 112 
 113     /**
 114      * The current character
 115      */
 116     protected int ch;
 117 
 118     /*
 119      * Token values.
 120      */
 121     public char charValue;
 122     public int intValue;
 123     public long longValue;
 124     public float floatValue;
 125     public double doubleValue;
 126     public String stringValue;
 127     public Identifier idValue;
 128     public int radix;   // Radix, when reading int or long
 129 
 130     /*
 131      * A doc comment preceding the most recent token
 132      */
 133     public String docComment;
 134 
 135     /*
 136      * A growable character buffer.
 137      */
 138     private int count;
 139     private char buffer[] = new char[1024];
 140     private void growBuffer() {
 141         char newBuffer[] = new char[buffer.length * 2];
 142         System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
 143         buffer = newBuffer;
 144     }
 145 
 146     // The following two methods have been hand-inlined in
 147     // scanDocComment.  If you make changes here, you should
 148     // check to see if scanDocComment also needs modification.
 149     private void putc(int ch) {
 150         if (count == buffer.length) {
 151             growBuffer();
 152         }
 153         buffer[count++] = (char)ch;
 154     }
 155 
 156     private String bufferString() {
 157         return new String(buffer, 0, count);
 158     }
 159 
 160     /**
 161      * Create a scanner to scan an input stream.
 162      */
 163     public Scanner(Environment env, InputStream in) throws IOException {
 164         this.env = env;
 165         useInputStream(in);
 166     }
 167 
 168     /**
 169      * Setup input from the given input stream,
 170      * and scan the first token from it.
 171      */
 172     protected void useInputStream(InputStream in) throws IOException {
 173         try {
 174             this.in = new ScannerInputReader(env, in);
 175         } catch (Exception e) {
 176             env.setCharacterEncoding(null);
 177             this.in = new ScannerInputReader(env, in);
 178         }
 179 
 180         ch = this.in.read();
 181         prevPos = this.in.pos;
 182 
 183         scan();
 184     }
 185 
 186     /**
 187      * Create a scanner to scan an input stream.
 188      */
 189     protected Scanner(Environment env) {
 190         this.env = env;
 191         // Expect the subclass to call useInputStream at the right time.
 192     }
 193 
 194     /**
 195      * Define a keyword.
 196      */
 197     private static void defineKeyword(int val) {
 198         Identifier.lookup(opNames[val]).setType(val);
 199     }
 200 
 201     /**
 202      * Initialized keyword and token Hashtables
 203      */
 204     static {
 205         // Statement keywords
 206         defineKeyword(FOR);
 207         defineKeyword(IF);
 208         defineKeyword(ELSE);
 209         defineKeyword(WHILE);
 210         defineKeyword(DO);
 211         defineKeyword(SWITCH);
 212         defineKeyword(CASE);
 213         defineKeyword(DEFAULT);
 214         defineKeyword(BREAK);
 215         defineKeyword(CONTINUE);
 216         defineKeyword(RETURN);
 217         defineKeyword(TRY);
 218         defineKeyword(CATCH);
 219         defineKeyword(FINALLY);
 220         defineKeyword(THROW);
 221 
 222         // Type defineKeywords
 223         defineKeyword(BYTE);
 224         defineKeyword(CHAR);
 225         defineKeyword(SHORT);
 226         defineKeyword(INT);
 227         defineKeyword(LONG);
 228         defineKeyword(FLOAT);
 229         defineKeyword(DOUBLE);
 230         defineKeyword(VOID);
 231         defineKeyword(BOOLEAN);
 232 
 233         // Expression keywords
 234         defineKeyword(INSTANCEOF);
 235         defineKeyword(TRUE);
 236         defineKeyword(FALSE);
 237         defineKeyword(NEW);
 238         defineKeyword(THIS);
 239         defineKeyword(SUPER);
 240         defineKeyword(NULL);
 241 
 242         // Declaration keywords
 243         defineKeyword(IMPORT);
 244         defineKeyword(CLASS);
 245         defineKeyword(EXTENDS);
 246         defineKeyword(IMPLEMENTS);
 247         defineKeyword(INTERFACE);
 248         defineKeyword(PACKAGE);
 249         defineKeyword(THROWS);
 250 
 251         // Modifier keywords
 252         defineKeyword(PRIVATE);
 253         defineKeyword(PUBLIC);
 254         defineKeyword(PROTECTED);
 255         defineKeyword(STATIC);
 256         defineKeyword(TRANSIENT);
 257         defineKeyword(SYNCHRONIZED);
 258         defineKeyword(NATIVE);
 259         defineKeyword(ABSTRACT);
 260         defineKeyword(VOLATILE);
 261         defineKeyword(FINAL);
 262         defineKeyword(STRICTFP);
 263 
 264         // reserved keywords
 265         defineKeyword(CONST);
 266         defineKeyword(GOTO);
 267     }
 268 
 269     /**
 270      * Scan a comment. This method should be
 271      * called once the initial /, * and the next
 272      * character have been read.
 273      */
 274     private void skipComment() throws IOException {
 275         while (true) {
 276             switch (ch) {
 277               case EOF:
 278                 env.error(pos, "eof.in.comment");
 279                 return;
 280 
 281               case '*':
 282                 if ((ch = in.read()) == '/')  {
 283                     ch = in.read();
 284                     return;
 285                 }
 286                 break;
 287 
 288               default:
 289                 ch = in.read();
 290                 break;
 291             }
 292         }
 293     }
 294 
 295     /**
 296      * Scan a doc comment. This method should be called
 297      * once the initial /, * and * have been read. It gathers
 298      * the content of the comment (witout leading spaces and '*'s)
 299      * in the string buffer.
 300      */
 301     private String scanDocComment() throws IOException {
 302         // Note: this method has been hand-optimized to yield
 303         // better performance.  This was done after it was noted
 304         // that javadoc spent a great deal of its time here.
 305         // This should also help the performance of the compiler
 306         // as well -- it scans the doc comments to find
 307         // @deprecated tags.
 308         //
 309         // The logic of the method has been completely rewritten
 310         // to avoid the use of flags that need to be looked at
 311         // for every character read.  Members that are accessed
 312         // more than once have been stored in local variables.
 313         // The methods putc() and bufferString() have been
 314         // inlined by hand.  Extra cases have been added to
 315         // switch statements to trick the compiler into generating
 316         // a tableswitch instead of a lookupswitch.
 317         //
 318         // This implementation aims to preserve the previous
 319         // behavior of this method.
 320 
 321         int c;
 322 
 323         // Put `in' in a local variable.
 324         final ScannerInputReader in = this.in;
 325 
 326         // We maintain the buffer locally rather than calling putc().
 327         char[] buffer = this.buffer;
 328         int count = 0;
 329 
 330         // We are called pointing at the second star of the doc
 331         // comment:
 332         //
 333         // Input: /** the rest of the comment ... */
 334         //          ^
 335         //
 336         // We rely on this in the code below.
 337 
 338         // Consume any number of stars.
 339         while ((c = in.read()) == '*')
 340             ;
 341 
 342         // Is the comment of the form /**/, /***/, /****/, etc.?
 343         if (c == '/') {
 344             // Set ch and return
 345             ch = in.read();
 346             return "";
 347         }
 348 
 349         // Skip a newline on the first line of the comment.
 350         if (c == '\n') {
 351             c = in.read();
 352         }
 353 
 354     outerLoop:
 355         // The outerLoop processes the doc comment, looping once
 356         // for each line.  For each line, it first strips off
 357         // whitespace, then it consumes any stars, then it
 358         // puts the rest of the line into our buffer.
 359         while (true) {
 360 
 361             // The wsLoop consumes whitespace from the beginning
 362             // of each line.
 363         wsLoop:
 364             while (true) {
 365                 switch (c) {
 366                 case ' ':
 367                 case '\t':
 368                     // We could check for other forms of whitespace
 369                     // as well, but this is left as is for minimum
 370                     // disturbance of functionality.
 371                     //
 372                     // Just skip whitespace.
 373                     c = in.read();
 374                     break;
 375 
 376                 // We have added extra cases here to trick the
 377                 // compiler into using a tableswitch instead of
 378                 // a lookupswitch.  They can be removed without
 379                 // a change in meaning.
 380                 case 10: case 11: case 12: case 13: case 14: case 15:
 381                 case 16: case 17: case 18: case 19: case 20: case 21:
 382                 case 22: case 23: case 24: case 25: case 26: case 27:
 383                 case 28: case 29: case 30: case 31:
 384                 default:
 385                     // We've seen something that isn't whitespace,
 386                     // jump out.
 387                     break wsLoop;
 388                 }
 389             } // end wsLoop.
 390 
 391             // Are there stars here?  If so, consume them all
 392             // and check for the end of comment.
 393             if (c == '*') {
 394                 // Skip all of the stars...
 395                 do {
 396                     c = in.read();
 397                 } while (c == '*');
 398 
 399                 // ...then check for the closing slash.
 400                 if (c == '/') {
 401                     // We're done with the doc comment.
 402                     // Set ch and break out.
 403                     ch = in.read();
 404                     break outerLoop;
 405                 }
 406             }
 407 
 408             // The textLoop processes the rest of the characters
 409             // on the line, adding them to our buffer.
 410         textLoop:
 411             while (true) {
 412                 switch (c) {
 413                 case EOF:
 414                     // We've seen a premature EOF.  Break out
 415                     // of the loop.
 416                     env.error(pos, "eof.in.comment");
 417                     ch = EOF;
 418                     break outerLoop;
 419 
 420                 case '*':
 421                     // Is this just a star?  Or is this the
 422                     // end of a comment?
 423                     c = in.read();
 424                     if (c == '/') {
 425                         // This is the end of the comment,
 426                         // set ch and return our buffer.
 427                         ch = in.read();
 428                         break outerLoop;
 429                     }
 430                     // This is just an ordinary star.  Add it to
 431                     // the buffer.
 432                     if (count == buffer.length) {
 433                         growBuffer();
 434                         buffer = this.buffer;
 435                     }
 436                     buffer[count++] = '*';
 437                     break;
 438 
 439                 case '\n':
 440                     // We've seen a newline.  Add it to our
 441                     // buffer and break out of this loop,
 442                     // starting fresh on a new line.
 443                     if (count == buffer.length) {
 444                         growBuffer();
 445                         buffer = this.buffer;
 446                     }
 447                     buffer[count++] = '\n';
 448                     c = in.read();
 449                     break textLoop;
 450 
 451                 // Again, the extra cases here are a trick
 452                 // to get the compiler to generate a tableswitch.
 453                 case 0: case 1: case 2: case 3: case 4: case 5:
 454                 case 6: case 7: case 8: case 11: case 12: case 13:
 455                 case 14: case 15: case 16: case 17: case 18: case 19:
 456                 case 20: case 21: case 22: case 23: case 24: case 25:
 457                 case 26: case 27: case 28: case 29: case 30: case 31:
 458                 case 32: case 33: case 34: case 35: case 36: case 37:
 459                 case 38: case 39: case 40:
 460                 default:
 461                     // Add the character to our buffer.
 462                     if (count == buffer.length) {
 463                         growBuffer();
 464                         buffer = this.buffer;
 465                     }
 466                     buffer[count++] = (char)c;
 467                     c = in.read();
 468                     break;
 469                 }
 470             } // end textLoop
 471         } // end outerLoop
 472 
 473         // We have scanned our doc comment.  It is stored in
 474         // buffer.  The previous implementation of scanDocComment
 475         // stripped off all trailing spaces and stars from the comment.
 476         // We will do this as well, so as to cause a minimum of
 477         // disturbance.  Is this what we want?
 478         if (count > 0) {
 479             int i = count - 1;
 480         trailLoop:
 481             while (i > -1) {
 482                 switch (buffer[i]) {
 483                 case ' ':
 484                 case '\t':
 485                 case '*':
 486                     i--;
 487                     break;
 488                 // And again, the extra cases here are a trick
 489                 // to get the compiler to generate a tableswitch.
 490                 case 0: case 1: case 2: case 3: case 4: case 5:
 491                 case 6: case 7: case 8: case 10: case 11: case 12:
 492                 case 13: case 14: case 15: case 16: case 17: case 18:
 493                 case 19: case 20: case 21: case 22: case 23: case 24:
 494                 case 25: case 26: case 27: case 28: case 29: case 30:
 495                 case 31: case 33: case 34: case 35: case 36: case 37:
 496                 case 38: case 39: case 40:
 497                 default:
 498                     break trailLoop;
 499                 }
 500             }
 501             count = i + 1;
 502 
 503             // Return the text of the doc comment.
 504             return new String(buffer, 0, count);
 505         } else {
 506             return "";
 507         }
 508     }
 509 
 510     /**
 511      * Scan a number. The first digit of the number should be the current
 512      * character.  We may be scanning hex, decimal, or octal at this point
 513      */
 514     private void scanNumber() throws IOException {
 515         boolean seenNonOctal = false;
 516         boolean overflow = false;
 517         boolean seenDigit = false; // used to detect invalid hex number 0xL
 518         radix = (ch == '0' ? 8 : 10);
 519         long value = ch - '0';
 520         count = 0;
 521         putc(ch);               // save character in buffer
 522     numberLoop:
 523         for (;;) {
 524             switch (ch = in.read()) {
 525               case '.':
 526                 if (radix == 16)
 527                     break numberLoop; // an illegal character
 528                 scanReal();
 529                 return;
 530 
 531               case '8': case '9':
 532                 // We can't yet throw an error if reading an octal.  We might
 533                 // discover we're really reading a real.
 534                 seenNonOctal = true;
 535               case '0': case '1': case '2': case '3':
 536               case '4': case '5': case '6': case '7':
 537                 seenDigit = true;
 538                 putc(ch);
 539                 if (radix == 10) {
 540                     overflow = overflow || (value * 10)/10 != value;
 541                     value = (value * 10) + (ch - '0');
 542                     overflow = overflow || (value - 1 < -1);
 543                 } else if (radix == 8) {
 544                     overflow = overflow || (value >>> 61) != 0;
 545                     value = (value << 3) + (ch - '0');
 546                 } else {
 547                     overflow = overflow || (value >>> 60) != 0;
 548                     value = (value << 4) + (ch - '0');
 549                 }
 550                 break;
 551 
 552               case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
 553                 if (radix != 16) {
 554                     scanReal();
 555                     return;
 556                 }
 557                 // fall through
 558               case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
 559                 seenDigit = true;
 560                 putc(ch);
 561                 if (radix != 16)
 562                     break numberLoop; // an illegal character
 563                 overflow = overflow || (value >>> 60) != 0;
 564                 value = (value << 4) + 10 +
 565                          Character.toLowerCase((char)ch) - 'a';
 566                 break;
 567 
 568               case 'l': case 'L':
 569                 ch = in.read(); // skip over 'l'
 570                 longValue = value;
 571                 token = LONGVAL;
 572                 break numberLoop;
 573 
 574               case 'x': case 'X':
 575                 // if the first character is a '0' and this is the second
 576                 // letter, then read in a hexadecimal number.  Otherwise, error.
 577                 if (count == 1 && radix == 8) {
 578                     radix = 16;
 579                     seenDigit = false;
 580                     break;
 581                 } else {
 582                     // we'll get an illegal character error
 583                     break numberLoop;
 584                 }
 585 
 586               default:
 587                 intValue = (int)value;
 588                 token = INTVAL;
 589                 break numberLoop;
 590             }
 591         } // while true
 592 
 593         // We have just finished reading the number.  The next thing better
 594         // not be a letter or digit.
 595         // Note:  There will be deprecation warnings against these uses
 596         // of Character.isJavaLetterOrDigit and Character.isJavaLetter.
 597         // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
 598         if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
 599             env.error(in.pos, "invalid.number");
 600             do { ch = in.read(); }
 601             while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
 602             intValue = 0;
 603             token = INTVAL;
 604         } else if (radix == 8 && seenNonOctal) {
 605             // A bogus octal literal.
 606             intValue = 0;
 607             token = INTVAL;
 608             env.error(pos, "invalid.octal.number");
 609         } else if (radix == 16 && seenDigit == false) {
 610             // A hex literal with no digits, 0xL, for example.
 611             intValue = 0;
 612             token = INTVAL;
 613             env.error(pos, "invalid.hex.number");
 614         } else {
 615             if (token == INTVAL) {
 616                 // Check for overflow.  Note that base 10 literals
 617                 // have different rules than base 8 and 16.
 618                 overflow = overflow ||
 619                     (value & 0xFFFFFFFF00000000L) != 0 ||
 620                     (radix == 10 && value > 2147483648L);
 621 
 622                 if (overflow) {
 623                     intValue = 0;
 624 
 625                     // Give a specific error message which tells
 626                     // the user the range.
 627                     switch (radix) {
 628                     case 8:
 629                         env.error(pos, "overflow.int.oct");
 630                         break;
 631                     case 10:
 632                         env.error(pos, "overflow.int.dec");
 633                         break;
 634                     case 16:
 635                         env.error(pos, "overflow.int.hex");
 636                         break;
 637                     default:
 638                         throw new CompilerError("invalid radix");
 639                     }
 640                 }
 641             } else {
 642                 if (overflow) {
 643                     longValue = 0;
 644 
 645                     // Give a specific error message which tells
 646                     // the user the range.
 647                     switch (radix) {
 648                     case 8:
 649                         env.error(pos, "overflow.long.oct");
 650                         break;
 651                     case 10:
 652                         env.error(pos, "overflow.long.dec");
 653                         break;
 654                     case 16:
 655                         env.error(pos, "overflow.long.hex");
 656                         break;
 657                     default:
 658                         throw new CompilerError("invalid radix");
 659                     }
 660                 }
 661             }
 662         }
 663     }
 664 
 665     /**
 666      * Scan a float.  We are either looking at the decimal, or we have already
 667      * seen it and put it into the buffer.  We haven't seen an exponent.
 668      * Scan a float.  Should be called with the current character is either
 669      * the 'e', 'E' or '.'
 670      */
 671     private void scanReal() throws IOException {
 672         boolean seenExponent = false;
 673         boolean isSingleFloat = false;
 674         char lastChar;
 675         if (ch == '.') {
 676             putc(ch);
 677             ch = in.read();
 678         }
 679 
 680     numberLoop:
 681         for ( ; ; ch = in.read()) {
 682             switch (ch) {
 683                 case '0': case '1': case '2': case '3': case '4':
 684                 case '5': case '6': case '7': case '8': case '9':
 685                     putc(ch);
 686                     break;
 687 
 688                 case 'e': case 'E':
 689                     if (seenExponent)
 690                         break numberLoop; // we'll get a format error
 691                     putc(ch);
 692                     seenExponent = true;
 693                     break;
 694 
 695                 case '+': case '-':
 696                     lastChar = buffer[count - 1];
 697                     if (lastChar != 'e' && lastChar != 'E')
 698                         break numberLoop; // this isn't an error, though!
 699                     putc(ch);
 700                     break;
 701 
 702                 case 'f': case 'F':
 703                     ch = in.read(); // skip over 'f'
 704                     isSingleFloat = true;
 705                     break numberLoop;
 706 
 707                 case 'd': case 'D':
 708                     ch = in.read(); // skip over 'd'
 709                     // fall through
 710                 default:
 711                     break numberLoop;
 712             } // sswitch
 713         } // loop
 714 
 715         // we have just finished reading the number.  The next thing better
 716         // not be a letter or digit.
 717         if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
 718             env.error(in.pos, "invalid.number");
 719             do { ch = in.read(); }
 720             while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
 721             doubleValue = 0;
 722             token = DOUBLEVAL;
 723         } else {
 724             token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
 725             try {
 726                 lastChar = buffer[count - 1];
 727                 if (lastChar == 'e' || lastChar == 'E'
 728                        || lastChar == '+' || lastChar == '-') {
 729                     env.error(in.pos -1, "float.format");
 730                 } else if (isSingleFloat) {
 731                     String string = bufferString();
 732                     floatValue = Float.valueOf(string).floatValue();
 733                     if (Float.isInfinite(floatValue)) {
 734                         env.error(pos, "overflow.float");
 735                     } else if (floatValue == 0 && !looksLikeZero(string)) {
 736                         env.error(pos, "underflow.float");
 737                     }
 738                 } else {
 739                     String string = bufferString();
 740                     doubleValue = Double.valueOf(string).doubleValue();
 741                     if (Double.isInfinite(doubleValue)) {
 742                         env.error(pos, "overflow.double");
 743                     } else if (doubleValue == 0 && !looksLikeZero(string)) {
 744                         env.error(pos, "underflow.double");
 745                     }
 746                 }
 747             } catch (NumberFormatException ee) {
 748                 env.error(pos, "float.format");
 749                 doubleValue = 0;
 750                 floatValue = 0;
 751             }
 752         }
 753         return;
 754     }
 755 
 756     // We have a token that parses as a number.  Is this token possibly zero?
 757     // i.e. does it have a non-zero value in the mantissa?
 758     private static boolean looksLikeZero(String token) {
 759         int length = token.length();
 760         for (int i = 0; i < length; i++) {
 761             switch (token.charAt(i)) {
 762                 case 0: case '.':
 763                     continue;
 764                 case '1': case '2': case '3': case '4': case '5':
 765                 case '6': case '7': case '8': case '9':
 766                     return false;
 767                 case 'e': case 'E': case 'f': case 'F':
 768                     return true;
 769             }
 770         }
 771         return true;
 772     }
 773 
 774     /**
 775      * Scan an escape character.
 776      * @return the character or -1 if it escaped an
 777      * end-of-line.
 778      */
 779     private int scanEscapeChar() throws IOException {
 780         long p = in.pos;
 781 
 782         switch (ch = in.read()) {
 783           case '0': case '1': case '2': case '3':
 784           case '4': case '5': case '6': case '7': {
 785             int n = ch - '0';
 786             for (int i = 2 ; i > 0 ; i--) {
 787                 switch (ch = in.read()) {
 788                   case '0': case '1': case '2': case '3':
 789                   case '4': case '5': case '6': case '7':
 790                     n = (n << 3) + ch - '0';
 791                     break;
 792 
 793                   default:
 794                     if (n > 0xFF) {
 795                         env.error(p, "invalid.escape.char");
 796                     }
 797                     return n;
 798                 }
 799             }
 800             ch = in.read();
 801             if (n > 0xFF) {
 802                 env.error(p, "invalid.escape.char");
 803             }
 804             return n;
 805           }
 806 
 807           case 'r':  ch = in.read(); return '\r';
 808           case 'n':  ch = in.read(); return '\n';
 809           case 'f':  ch = in.read(); return '\f';
 810           case 'b':  ch = in.read(); return '\b';
 811           case 't':  ch = in.read(); return '\t';
 812           case '\\': ch = in.read(); return '\\';
 813           case '\"': ch = in.read(); return '\"';
 814           case '\'': ch = in.read(); return '\'';
 815         }
 816 
 817         env.error(p, "invalid.escape.char");
 818         ch = in.read();
 819         return -1;
 820     }
 821 
 822     /**
 823      * Scan a string. The current character
 824      * should be the opening " of the string.
 825      */
 826     private void scanString() throws IOException {
 827         token = STRINGVAL;
 828         count = 0;
 829         ch = in.read();
 830 
 831         // Scan a String
 832         while (true) {
 833             switch (ch) {
 834               case EOF:
 835                 env.error(pos, "eof.in.string");
 836                 stringValue = bufferString();
 837                 return;
 838 
 839               case '\r':
 840               case '\n':
 841                 ch = in.read();
 842                 env.error(pos, "newline.in.string");
 843                 stringValue = bufferString();
 844                 return;
 845 
 846               case '"':
 847                 ch = in.read();
 848                 stringValue = bufferString();
 849                 return;
 850 
 851               case '\\': {
 852                 int c = scanEscapeChar();
 853                 if (c >= 0) {
 854                     putc((char)c);
 855                 }
 856                 break;
 857               }
 858 
 859               default:
 860                 putc(ch);
 861                 ch = in.read();
 862                 break;
 863             }
 864         }
 865     }
 866 
 867     /**
 868      * Scan a character. The current character should be
 869      * the opening ' of the character constant.
 870      */
 871     private void scanCharacter() throws IOException {
 872         token = CHARVAL;
 873 
 874         switch (ch = in.read()) {
 875           case '\\':
 876             int c = scanEscapeChar();
 877             charValue = (char)((c >= 0) ? c : 0);
 878             break;
 879 
 880         case '\'':
 881             // There are two standard problems this case deals with.  One
 882             // is the malformed single quote constant (i.e. the programmer
 883             // uses ''' instead of '\'') and the other is the empty
 884             // character constant (i.e. '').  Just consume any number of
 885             // single quotes and emit an error message.
 886             charValue = 0;
 887             env.error(pos, "invalid.char.constant");
 888             ch = in.read();
 889             while (ch == '\'') {
 890                 ch = in.read();
 891             }
 892             return;
 893 
 894           case '\r':
 895           case '\n':
 896             charValue = 0;
 897             env.error(pos, "invalid.char.constant");
 898             return;
 899 
 900           default:
 901             charValue = (char)ch;
 902             ch = in.read();
 903             break;
 904         }
 905 
 906         if (ch == '\'') {
 907             ch = in.read();
 908         } else {
 909             env.error(pos, "invalid.char.constant");
 910             while (true) {
 911                 switch (ch) {
 912                   case '\'':
 913                     ch = in.read();
 914                     return;
 915                   case ';':
 916                   case '\n':
 917                   case EOF:
 918                     return;
 919                   default:
 920                     ch = in.read();
 921                 }
 922             }
 923         }
 924     }
 925 
 926     /**
 927      * Scan an Identifier. The current character should
 928      * be the first character of the identifier.
 929      */
 930     private void scanIdentifier() throws IOException {
 931         count = 0;
 932 
 933         while (true) {
 934             putc(ch);
 935             switch (ch = in.read()) {
 936               case 'a': case 'b': case 'c': case 'd': case 'e':
 937               case 'f': case 'g': case 'h': case 'i': case 'j':
 938               case 'k': case 'l': case 'm': case 'n': case 'o':
 939               case 'p': case 'q': case 'r': case 's': case 't':
 940               case 'u': case 'v': case 'w': case 'x': case 'y':
 941               case 'z':
 942               case 'A': case 'B': case 'C': case 'D': case 'E':
 943               case 'F': case 'G': case 'H': case 'I': case 'J':
 944               case 'K': case 'L': case 'M': case 'N': case 'O':
 945               case 'P': case 'Q': case 'R': case 'S': case 'T':
 946               case 'U': case 'V': case 'W': case 'X': case 'Y':
 947               case 'Z':
 948               case '0': case '1': case '2': case '3': case '4':
 949               case '5': case '6': case '7': case '8': case '9':
 950               case '$': case '_':
 951                 break;
 952 
 953               default:
 954                 if (!Character.isJavaLetterOrDigit((char)ch)) {
 955                     idValue = Identifier.lookup(bufferString());
 956                     token = idValue.getType();
 957                     return;
 958                 }
 959             }
 960         }
 961     }
 962 
 963     /**
 964      * The ending position of the current token
 965      */
 966     // Note: This should be part of the pos itself.
 967     public long getEndPos() {
 968         return in.pos;
 969     }
 970 
 971     /**
 972      * If the current token is IDENT, return the identifier occurrence.
 973      * It will be freshly allocated.
 974      */
 975     public IdentifierToken getIdToken() {
 976         return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
 977     }
 978 
 979     /**
 980      * Scan the next token.
 981      * @return the position of the previous token.
 982      */
 983    public long scan() throws IOException {
 984        return xscan();
 985    }
 986 
 987     protected long xscan() throws IOException {
 988         final ScannerInputReader in = this.in;
 989         long retPos = pos;
 990         prevPos = in.pos;
 991         docComment = null;
 992         while (true) {
 993             pos = in.pos;
 994 
 995             switch (ch) {
 996               case EOF:
 997                 token = EOF;
 998                 return retPos;
 999 
1000               case '\n':
1001                 if (scanComments) {
1002                     ch = ' ';
1003                     // Avoid this path the next time around.
1004                     // Do not just call in.read; we want to present
1005                     // a null token (and also avoid read-ahead).
1006                     token = COMMENT;
1007                     return retPos;
1008                 }
1009               case ' ':
1010               case '\t':
1011               case '\f':
1012                 ch = in.read();
1013                 break;
1014 
1015               case '/':
1016                 switch (ch = in.read()) {
1017                   case '/':
1018                     // Parse a // comment
1019                     while (((ch = in.read()) != EOF) && (ch != '\n'));
1020                     if (scanComments) {
1021                         token = COMMENT;
1022                         return retPos;
1023                     }
1024                     break;
1025 
1026                   case '*':
1027                     ch = in.read();
1028                     if (ch == '*') {
1029                         docComment = scanDocComment();
1030                     } else {
1031                         skipComment();
1032                     }
1033                     if (scanComments) {
1034                         return retPos;
1035                     }
1036                     break;
1037 
1038                   case '=':
1039                     ch = in.read();
1040                     token = ASGDIV;
1041                     return retPos;
1042 
1043                   default:
1044                     token = DIV;
1045                     return retPos;
1046                 }
1047                 break;
1048 
1049               case '"':
1050                 scanString();
1051                 return retPos;
1052 
1053               case '\'':
1054                 scanCharacter();
1055                 return retPos;
1056 
1057               case '0': case '1': case '2': case '3': case '4':
1058               case '5': case '6': case '7': case '8': case '9':
1059                 scanNumber();
1060                 return retPos;
1061 
1062               case '.':
1063                 switch (ch = in.read()) {
1064                   case '0': case '1': case '2': case '3': case '4':
1065                   case '5': case '6': case '7': case '8': case '9':
1066                     count = 0;
1067                     putc('.');
1068                     scanReal();
1069                     break;
1070                   default:
1071                     token = FIELD;
1072                 }
1073                 return retPos;
1074 
1075               case '{':
1076                 ch = in.read();
1077                 token = LBRACE;
1078                 return retPos;
1079 
1080               case '}':
1081                 ch = in.read();
1082                 token = RBRACE;
1083                 return retPos;
1084 
1085               case '(':
1086                 ch = in.read();
1087                 token = LPAREN;
1088                 return retPos;
1089 
1090               case ')':
1091                 ch = in.read();
1092                 token = RPAREN;
1093                 return retPos;
1094 
1095               case '[':
1096                 ch = in.read();
1097                 token = LSQBRACKET;
1098                 return retPos;
1099 
1100               case ']':
1101                 ch = in.read();
1102                 token = RSQBRACKET;
1103                 return retPos;
1104 
1105               case ',':
1106                 ch = in.read();
1107                 token = COMMA;
1108                 return retPos;
1109 
1110               case ';':
1111                 ch = in.read();
1112                 token = SEMICOLON;
1113                 return retPos;
1114 
1115               case '?':
1116                 ch = in.read();
1117                 token = QUESTIONMARK;
1118                 return retPos;
1119 
1120               case '~':
1121                 ch = in.read();
1122                 token = BITNOT;
1123                 return retPos;
1124 
1125               case ':':
1126                 ch = in.read();
1127                 token = COLON;
1128                 return retPos;
1129 
1130               case '-':
1131                 switch (ch = in.read()) {
1132                   case '-':
1133                     ch = in.read();
1134                     token = DEC;
1135                     return retPos;
1136 
1137                   case '=':
1138                     ch = in.read();
1139                     token = ASGSUB;
1140                     return retPos;
1141                 }
1142                 token = SUB;
1143                 return retPos;
1144 
1145               case '+':
1146                 switch (ch = in.read()) {
1147                   case '+':
1148                     ch = in.read();
1149                     token = INC;
1150                     return retPos;
1151 
1152                   case '=':
1153                     ch = in.read();
1154                     token = ASGADD;
1155                     return retPos;
1156                 }
1157                 token = ADD;
1158                 return retPos;
1159 
1160               case '<':
1161                 switch (ch = in.read()) {
1162                   case '<':
1163                     if ((ch = in.read()) == '=') {
1164                         ch = in.read();
1165                         token = ASGLSHIFT;
1166                         return retPos;
1167                     }
1168                     token = LSHIFT;
1169                     return retPos;
1170 
1171                   case '=':
1172                     ch = in.read();
1173                     token = LE;
1174                     return retPos;
1175                 }
1176                 token = LT;
1177                 return retPos;
1178 
1179               case '>':
1180                 switch (ch = in.read()) {
1181                   case '>':
1182                     switch (ch = in.read()) {
1183                       case '=':
1184                         ch = in.read();
1185                         token = ASGRSHIFT;
1186                         return retPos;
1187 
1188                       case '>':
1189                         if ((ch = in.read()) == '=') {
1190                             ch = in.read();
1191                             token = ASGURSHIFT;
1192                             return retPos;
1193                         }
1194                         token = URSHIFT;
1195                         return retPos;
1196                     }
1197                     token = RSHIFT;
1198                     return retPos;
1199 
1200                   case '=':
1201                     ch = in.read();
1202                     token = GE;
1203                     return retPos;
1204                 }
1205                 token = GT;
1206                 return retPos;
1207 
1208               case '|':
1209                 switch (ch = in.read()) {
1210                   case '|':
1211                     ch = in.read();
1212                     token = OR;
1213                     return retPos;
1214 
1215                   case '=':
1216                     ch = in.read();
1217                     token = ASGBITOR;
1218                     return retPos;
1219                 }
1220                 token = BITOR;
1221                 return retPos;
1222 
1223               case '&':
1224                 switch (ch = in.read()) {
1225                   case '&':
1226                     ch = in.read();
1227                     token = AND;
1228                     return retPos;
1229 
1230                   case '=':
1231                     ch = in.read();
1232                     token = ASGBITAND;
1233                     return retPos;
1234                 }
1235                 token = BITAND;
1236                 return retPos;
1237 
1238               case '=':
1239                 if ((ch = in.read()) == '=') {
1240                     ch = in.read();
1241                     token = EQ;
1242                     return retPos;
1243                 }
1244                 token = ASSIGN;
1245                 return retPos;
1246 
1247               case '%':
1248                 if ((ch = in.read()) == '=') {
1249                     ch = in.read();
1250                     token = ASGREM;
1251                     return retPos;
1252                 }
1253                 token = REM;
1254                 return retPos;
1255 
1256               case '^':
1257                 if ((ch = in.read()) == '=') {
1258                     ch = in.read();
1259                     token = ASGBITXOR;
1260                     return retPos;
1261                 }
1262                 token = BITXOR;
1263                 return retPos;
1264 
1265               case '!':
1266                 if ((ch = in.read()) == '=') {
1267                     ch = in.read();
1268                     token = NE;
1269                     return retPos;
1270                 }
1271                 token = NOT;
1272                 return retPos;
1273 
1274               case '*':
1275                 if ((ch = in.read()) == '=') {
1276                     ch = in.read();
1277                     token = ASGMUL;
1278                     return retPos;
1279                 }
1280                 token = MUL;
1281                 return retPos;
1282 
1283               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1284               case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1285               case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1286               case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1287               case 'y': case 'z':
1288               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1289               case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1290               case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1291               case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1292               case 'Y': case 'Z':
1293               case '$': case '_':
1294                 scanIdentifier();
1295                 return retPos;
1296 
1297               case '\u001a':
1298                 // Our one concession to DOS.
1299                 if ((ch = in.read()) == EOF) {
1300                     token = EOF;
1301                     return retPos;
1302                 }
1303                 env.error(pos, "funny.char");
1304                 ch = in.read();
1305                 break;
1306 
1307 
1308               default:
1309                 if (Character.isJavaLetter((char)ch)) {
1310                     scanIdentifier();
1311                     return retPos;
1312                 }
1313                 env.error(pos, "funny.char");
1314                 ch = in.read();
1315                 break;
1316             }
1317         }
1318     }
1319 
1320     /**
1321      * Scan to a matching '}', ']' or ')'. The current token must be
1322      * a '{', '[' or '(';
1323      */
1324     public void match(int open, int close) throws IOException {
1325         int depth = 1;
1326 
1327         while (true) {
1328             scan();
1329             if (token == open) {
1330                 depth++;
1331             } else if (token == close) {
1332                 if (--depth == 0) {
1333                     return;
1334                 }
1335             } else if (token == EOF) {
1336                 env.error(pos, "unbalanced.paren");
1337                 return;
1338             }
1339         }
1340     }
1341 }