1 /*
   2  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import java.nio.*;
  29 
  30 import com.sun.tools.javac.code.Source;
  31 import com.sun.tools.javac.file.JavacFileManager;
  32 import com.sun.tools.javac.util.*;
  33 
  34 
  35 import static com.sun.tools.javac.parser.Token.*;
  36 import static com.sun.tools.javac.util.LayoutCharacters.*;
  37 
  38 /** The lexical analyzer maps an input stream consisting of
  39  *  ASCII characters and Unicode escapes into a token sequence.
  40  *
  41  *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
  42  *  you write code that depends on this, you do so at your own risk.
  43  *  This code and its internal interfaces are subject to change or
  44  *  deletion without notice.</b>
  45  */
  46 public class Scanner implements Lexer {
  47 
  48     private static boolean scannerDebug = false;
  49 
  50     /** A factory for creating scanners. */
  51     public static class Factory {
  52         /** The context key for the scanner factory. */
  53         public static final Context.Key<Scanner.Factory> scannerFactoryKey =
  54             new Context.Key<Scanner.Factory>();
  55 
  56         /** Get the Factory instance for this context. */
  57         public static Factory instance(Context context) {
  58             Factory instance = context.get(scannerFactoryKey);
  59             if (instance == null)
  60                 instance = new Factory(context);
  61             return instance;
  62         }
  63 
  64         final Log log;
  65         final Names names;
  66         final Source source;
  67         final Keywords keywords;
  68 
  69         /** Create a new scanner factory. */
  70         protected Factory(Context context) {
  71             context.put(scannerFactoryKey, this);
  72             this.log = Log.instance(context);
  73             this.names = Names.instance(context);
  74             this.source = Source.instance(context);
  75             this.keywords = Keywords.instance(context);
  76         }
  77 
  78         public Scanner newScanner(CharSequence input) {
  79             if (input instanceof CharBuffer) {
  80                 return new Scanner(this, (CharBuffer)input);
  81             } else {
  82                 char[] array = input.toString().toCharArray();
  83                 return newScanner(array, array.length);
  84             }
  85         }
  86 
  87         public Scanner newScanner(char[] input, int inputLength) {
  88             return new Scanner(this, input, inputLength);
  89         }
  90     }
  91 
  92     /* Output variables; set by nextToken():
  93      */
  94 
  95     /** The token, set by nextToken().
  96      */
  97     private Token token;
  98 
  99     /** Allow hex floating-point literals.
 100      */
 101     private boolean allowHexFloats;
 102 
 103     /** Allow binary literals.
 104      */
 105     private boolean allowBinaryLiterals;
 106 
 107     /** Allow underscores in literals.
 108      */
 109     private boolean allowUnderscoresInLiterals;
 110 
 111     /** The source language setting.
 112      */
 113     private Source source;
 114 
 115     /** The token's position, 0-based offset from beginning of text.
 116      */
 117     private int pos;
 118 
 119     /** Character position just after the last character of the token.
 120      */
 121     private int endPos;
 122 
 123     /** The last character position of the previous token.
 124      */
 125     private int prevEndPos;
 126 
 127     /** The position where a lexical error occurred;
 128      */
 129     private int errPos = Position.NOPOS;
 130 
 131     /** The name of an identifier or token:
 132      */
 133     private Name name;
 134 
 135     /** The radix of a numeric literal token.
 136      */
 137     private int radix;
 138 
 139     /** Has a @deprecated been encountered in last doc comment?
 140      *  this needs to be reset by client.
 141      */
 142     protected boolean deprecatedFlag = false;
 143 
 144     /** A character buffer for literals.
 145      */
 146     private char[] sbuf = new char[128];
 147     private int sp;
 148 
 149     /** The input buffer, index of next chacter to be read,
 150      *  index of one past last character in buffer.
 151      */
 152     private char[] buf;
 153     private int bp;
 154     private int buflen;
 155     private int eofPos;
 156 
 157     /** The current character.
 158      */
 159     private char ch;
 160 
 161     /** The buffer index of the last converted unicode character
 162      */
 163     private int unicodeConversionBp = -1;
 164 
 165     /** The log to be used for error reporting.
 166      */
 167     private final Log log;
 168 
 169     /** The name table. */
 170     private final Names names;
 171 
 172     /** The keyword table. */
 173     private final Keywords keywords;
 174 
 175     /** Common code for constructors. */
 176     private Scanner(Factory fac) {
 177         log = fac.log;
 178         names = fac.names;
 179         keywords = fac.keywords;
 180         source = fac.source;
 181         allowBinaryLiterals = source.allowBinaryLiterals();
 182         allowHexFloats = source.allowHexFloats();
 183         allowUnderscoresInLiterals = source.allowBinaryLiterals();
 184     }
 185 
 186     private static final boolean hexFloatsWork = hexFloatsWork();
 187     private static boolean hexFloatsWork() {
 188         try {
 189             Float.valueOf("0x1.0p1");
 190             return true;
 191         } catch (NumberFormatException ex) {
 192             return false;
 193         }
 194     }
 195 
 196     /** Create a scanner from the input buffer.  buffer must implement
 197      *  array() and compact(), and remaining() must be less than limit().
 198      */
 199     protected Scanner(Factory fac, CharBuffer buffer) {
 200         this(fac, JavacFileManager.toArray(buffer), buffer.limit());
 201     }
 202 
 203     /**
 204      * Create a scanner from the input array.  This method might
 205      * modify the array.  To avoid copying the input array, ensure
 206      * that {@code inputLength < input.length} or
 207      * {@code input[input.length -1]} is a white space character.
 208      *
 209      * @param fac the factory which created this Scanner
 210      * @param input the input, might be modified
 211      * @param inputLength the size of the input.
 212      * Must be positive and less than or equal to input.length.
 213      */
 214     protected Scanner(Factory fac, char[] input, int inputLength) {
 215         this(fac);
 216         eofPos = inputLength;
 217         if (inputLength == input.length) {
 218             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
 219                 inputLength--;
 220             } else {
 221                 char[] newInput = new char[inputLength + 1];
 222                 System.arraycopy(input, 0, newInput, 0, input.length);
 223                 input = newInput;
 224             }
 225         }
 226         buf = input;
 227         buflen = inputLength;
 228         buf[buflen] = EOI;
 229         bp = -1;
 230         scanChar();
 231     }
 232 
 233     /** Report an error at the given position using the provided arguments.
 234      */
 235     private void lexError(int pos, String key, Object... args) {
 236         log.error(pos, key, args);
 237         token = ERROR;
 238         errPos = pos;
 239     }
 240 
 241     /** Report an error at the current token position using the provided
 242      *  arguments.
 243      */
 244     private void lexError(String key, Object... args) {
 245         lexError(pos, key, args);
 246     }
 247 
 248     /** Convert an ASCII digit from its base (8, 10, or 16)
 249      *  to its value.
 250      */
 251     private int digit(int base) {
 252         char c = ch;
 253         int result = Character.digit(c, base);
 254         if (result >= 0 && c > 0x7f) {
 255             lexError(pos+1, "illegal.nonascii.digit");
 256             ch = "0123456789abcdef".charAt(result);
 257         }
 258         return result;
 259     }
 260 
 261     /** Convert unicode escape; bp points to initial '\' character
 262      *  (Spec 3.3).
 263      */
 264     private void convertUnicode() {
 265         if (ch == '\\' && unicodeConversionBp != bp) {
 266             bp++; ch = buf[bp];
 267             if (ch == 'u') {
 268                 do {
 269                     bp++; ch = buf[bp];
 270                 } while (ch == 'u');
 271                 int limit = bp + 3;
 272                 if (limit < buflen) {
 273                     int d = digit(16);
 274                     int code = d;
 275                     while (bp < limit && d >= 0) {
 276                         bp++; ch = buf[bp];
 277                         d = digit(16);
 278                         code = (code << 4) + d;
 279                     }
 280                     if (d >= 0) {
 281                         ch = (char)code;
 282                         unicodeConversionBp = bp;
 283                         return;
 284                     }
 285                 }
 286                 lexError(bp, "illegal.unicode.esc");
 287             } else {
 288                 bp--;
 289                 ch = '\\';
 290             }
 291         }
 292     }
 293 
 294     /** Read next character.
 295      */
 296     private void scanChar() {
 297         ch = buf[++bp];
 298         if (ch == '\\') {
 299             convertUnicode();
 300         }
 301     }
 302 
 303     /** Read next character in comment, skipping over double '\' characters.
 304      */
 305     private void scanCommentChar() {
 306         scanChar();
 307         if (ch == '\\') {
 308             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
 309                 bp++;
 310             } else {
 311                 convertUnicode();
 312             }
 313         }
 314     }
 315 
 316     /** Append a character to sbuf.
 317      */
 318     private void putChar(char ch) {
 319         if (sp == sbuf.length) {
 320             char[] newsbuf = new char[sbuf.length * 2];
 321             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
 322             sbuf = newsbuf;
 323         }
 324         sbuf[sp++] = ch;
 325     }
 326 
 327     /** For debugging purposes: print character.
 328      */
 329     private void dch() {
 330         System.err.print(ch); System.out.flush();
 331     }
 332 
 333     /** Read next character in character or string literal and copy into sbuf.
 334      */
 335     private void scanLitChar(boolean forBytecodeName) {
 336         if (ch == '\\') {
 337             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
 338                 bp++;
 339                 putChar('\\');
 340                 scanChar();
 341             } else {
 342                 scanChar();
 343                 switch (ch) {
 344                 case '0': case '1': case '2': case '3':
 345                 case '4': case '5': case '6': case '7':
 346                     char leadch = ch;
 347                     int oct = digit(8);
 348                     scanChar();
 349                     if ('0' <= ch && ch <= '7') {
 350                         oct = oct * 8 + digit(8);
 351                         scanChar();
 352                         if (leadch <= '3' && '0' <= ch && ch <= '7') {
 353                             oct = oct * 8 + digit(8);
 354                             scanChar();
 355                         }
 356                     }
 357                     putChar((char)oct);
 358                     break;
 359                 case 'b':
 360                     putChar('\b'); scanChar(); break;
 361                 case 't':
 362                     putChar('\t'); scanChar(); break;
 363                 case 'n':
 364                     putChar('\n'); scanChar(); break;
 365                 case 'f':
 366                     putChar('\f'); scanChar(); break;
 367                 case 'r':
 368                     putChar('\r'); scanChar(); break;
 369                 case '\'':
 370                     putChar('\''); scanChar(); break;
 371                 case '\"':
 372                     putChar('\"'); scanChar(); break;
 373                 case '\\':
 374                     putChar('\\'); scanChar(); break;
 375                 case '|': case ',': case '?': case '%':
 376                 case '^': case '_': case '{': case '}':
 377                 case '!': case '-': case '=':
 378                     if (forBytecodeName) {
 379                         // Accept escape sequences for dangerous bytecode chars.
 380                         // This is illegal in normal Java string or character literals.
 381                         // Note that the escape sequence itself is passed through.
 382                         putChar('\\'); putChar(ch); scanChar();
 383                     } else {
 384                         lexError(bp, "illegal.esc.char");
 385                     }
 386                     break;
 387                 default:
 388                     lexError(bp, "illegal.esc.char");
 389                 }
 390             }
 391         } else if (bp != buflen) {
 392             putChar(ch); scanChar();
 393         }
 394     }
 395     private void scanLitChar() {
 396         scanLitChar(false);
 397     }
 398 
 399     /** Read next character in an exotic name #"foo"
 400      */
 401     private void scanBytecodeNameChar() {
 402         switch (ch) {
 403         // reject any "dangerous" char which is illegal somewhere in the JVM spec
 404         // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
 405         case '/': case '.': case ';':  // illegal everywhere
 406         case '<': case '>':  // illegal in methods, dangerous in classes
 407         case '[':  // illegal in classes
 408             lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch));
 409             break;
 410         }
 411         scanLitChar(true);
 412     }
 413 
 414     private void scanDigits(int digitRadix) {
 415         char saveCh;
 416         int savePos;
 417         do {
 418             if (ch != '_') {
 419                 putChar(ch);
 420             } else {
 421                 if (!allowUnderscoresInLiterals) {
 422                     lexError("unsupported.underscore", source.name);
 423                     allowUnderscoresInLiterals = true;
 424                 }
 425             }
 426             saveCh = ch;
 427             savePos = bp;
 428             scanChar();
 429         } while (digit(digitRadix) >= 0 || ch == '_');
 430         if (saveCh == '_')
 431             lexError(savePos, "illegal.underscore");
 432     }
 433 
 434     /** Read fractional part of hexadecimal floating point number.
 435      */
 436     private void scanHexExponentAndSuffix() {
 437         if (ch == 'p' || ch == 'P') {
 438             putChar(ch);
 439             scanChar();
 440             skipIllegalUnderscores();
 441             if (ch == '+' || ch == '-') {
 442                 putChar(ch);
 443                 scanChar();
 444             }
 445             skipIllegalUnderscores();
 446             if ('0' <= ch && ch <= '9') {
 447                 scanDigits(10);
 448                 if (!allowHexFloats) {
 449                     lexError("unsupported.fp.lit", source.name);
 450                     allowHexFloats = true;
 451                 }
 452                 else if (!hexFloatsWork)
 453                     lexError("unsupported.cross.fp.lit");
 454             } else
 455                 lexError("malformed.fp.lit");
 456         } else {
 457             lexError("malformed.fp.lit");
 458         }
 459         if (ch == 'f' || ch == 'F') {
 460             putChar(ch);
 461             scanChar();
 462             token = FLOATLITERAL;
 463         } else {
 464             if (ch == 'd' || ch == 'D') {
 465                 putChar(ch);
 466                 scanChar();
 467             }
 468             token = DOUBLELITERAL;
 469         }
 470     }
 471 
 472     /** Read fractional part of floating point number.
 473      */
 474     private void scanFraction() {
 475         skipIllegalUnderscores();
 476         if ('0' <= ch && ch <= '9') {
 477             scanDigits(10);
 478         }
 479         int sp1 = sp;
 480         if (ch == 'e' || ch == 'E') {
 481             putChar(ch);
 482             scanChar();
 483             skipIllegalUnderscores();
 484             if (ch == '+' || ch == '-') {
 485                 putChar(ch);
 486                 scanChar();
 487             }
 488             skipIllegalUnderscores();
 489             if ('0' <= ch && ch <= '9') {
 490                 scanDigits(10);
 491                 return;
 492             }
 493             lexError("malformed.fp.lit");
 494             sp = sp1;
 495         }
 496     }
 497 
 498     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 499      */
 500     private void scanFractionAndSuffix() {
 501         this.radix = 10;
 502         scanFraction();
 503         if (ch == 'f' || ch == 'F') {
 504             putChar(ch);
 505             scanChar();
 506             token = FLOATLITERAL;
 507         } else {
 508             if (ch == 'd' || ch == 'D') {
 509                 putChar(ch);
 510                 scanChar();
 511             }
 512             token = DOUBLELITERAL;
 513         }
 514     }
 515 
 516     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 517      */
 518     private void scanHexFractionAndSuffix(boolean seendigit) {
 519         this.radix = 16;
 520         assert ch == '.';
 521         putChar(ch);
 522         scanChar();
 523         skipIllegalUnderscores();
 524         if (digit(16) >= 0) {
 525             seendigit = true;
 526             scanDigits(16);
 527         }
 528         if (!seendigit)
 529             lexError("invalid.hex.number");
 530         else
 531             scanHexExponentAndSuffix();
 532     }
 533 
 534     private void skipIllegalUnderscores() {
 535         if (ch == '_') {
 536             lexError(bp, "illegal.underscore");
 537             while (ch == '_')
 538                 scanChar();
 539         }
 540     }
 541 
 542     /** Read a number.
 543      *  @param radix  The radix of the number; one of 2, j8, 10, 16.
 544      */
 545     private void scanNumber(int radix) {
 546         this.radix = radix;
 547         // for octal, allow base-10 digit in case it's a float literal
 548         int digitRadix = (radix == 8 ? 10 : radix);
 549         boolean seendigit = false;
 550         if (digit(digitRadix) >= 0) {
 551             seendigit = true;
 552             scanDigits(digitRadix);
 553         }
 554         if (radix == 16 && ch == '.') {
 555             scanHexFractionAndSuffix(seendigit);
 556         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
 557             scanHexExponentAndSuffix();
 558         } else if (digitRadix == 10 && ch == '.') {
 559             putChar(ch);
 560             scanChar();
 561             scanFractionAndSuffix();
 562         } else if (digitRadix == 10 &&
 563                    (ch == 'e' || ch == 'E' ||
 564                     ch == 'f' || ch == 'F' ||
 565                     ch == 'd' || ch == 'D')) {
 566             scanFractionAndSuffix();
 567         } else {
 568             if (ch == 'l' || ch == 'L') {
 569                 scanChar();
 570                 token = LONGLITERAL;
 571             } else {
 572                 token = INTLITERAL;
 573             }
 574         }
 575     }
 576 
 577     /** Read an identifier.
 578      */
 579     private void scanIdent() {
 580         boolean isJavaIdentifierPart;
 581         char high;
 582         do {
 583             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
 584             // optimization, was: putChar(ch);
 585 
 586             scanChar();
 587             switch (ch) {
 588             case 'A': case 'B': case 'C': case 'D': case 'E':
 589             case 'F': case 'G': case 'H': case 'I': case 'J':
 590             case 'K': case 'L': case 'M': case 'N': case 'O':
 591             case 'P': case 'Q': case 'R': case 'S': case 'T':
 592             case 'U': case 'V': case 'W': case 'X': case 'Y':
 593             case 'Z':
 594             case 'a': case 'b': case 'c': case 'd': case 'e':
 595             case 'f': case 'g': case 'h': case 'i': case 'j':
 596             case 'k': case 'l': case 'm': case 'n': case 'o':
 597             case 'p': case 'q': case 'r': case 's': case 't':
 598             case 'u': case 'v': case 'w': case 'x': case 'y':
 599             case 'z':
 600             case '$': case '_':
 601             case '0': case '1': case '2': case '3': case '4':
 602             case '5': case '6': case '7': case '8': case '9':
 603             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
 604             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
 605             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
 606             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
 607             case '\u0015': case '\u0016': case '\u0017':
 608             case '\u0018': case '\u0019': case '\u001B':
 609             case '\u007F':
 610                 break;
 611             case '\u001A': // EOI is also a legal identifier part
 612                 if (bp >= buflen) {
 613                     name = names.fromChars(sbuf, 0, sp);
 614                     token = keywords.key(name);
 615                     return;
 616                 }
 617                 break;
 618             default:
 619                 if (ch < '\u0080') {
 620                     // all ASCII range chars already handled, above
 621                     isJavaIdentifierPart = false;
 622                 } else {
 623                     high = scanSurrogates();
 624                     if (high != 0) {
 625                         if (sp == sbuf.length) {
 626                             putChar(high);
 627                         } else {
 628                             sbuf[sp++] = high;
 629                         }
 630                         isJavaIdentifierPart = Character.isJavaIdentifierPart(
 631                             Character.toCodePoint(high, ch));
 632                     } else {
 633                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
 634                     }
 635                 }
 636                 if (!isJavaIdentifierPart) {
 637                     name = names.fromChars(sbuf, 0, sp);
 638                     token = keywords.key(name);
 639                     return;
 640                 }
 641             }
 642         } while (true);
 643     }
 644 
 645     /** Are surrogates supported?
 646      */
 647     final static boolean surrogatesSupported = surrogatesSupported();
 648     private static boolean surrogatesSupported() {
 649         try {
 650             Character.isHighSurrogate('a');
 651             return true;
 652         } catch (NoSuchMethodError ex) {
 653             return false;
 654         }
 655     }
 656 
 657     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
 658      *  the next character is a low surrogate, then put the low
 659      *  surrogate in 'ch', and return the high surrogate.
 660      *  otherwise, just return 0.
 661      */
 662     private char scanSurrogates() {
 663         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
 664             char high = ch;
 665 
 666             scanChar();
 667 
 668             if (Character.isLowSurrogate(ch)) {
 669                 return high;
 670             }
 671 
 672             ch = high;
 673         }
 674 
 675         return 0;
 676     }
 677 
 678     /** Return true if ch can be part of an operator.
 679      */
 680     private boolean isSpecial(char ch) {
 681         switch (ch) {
 682         case '!': case '%': case '&': case '*': case '?':
 683         case '+': case '-': case ':': case '<': case '=':
 684         case '>': case '^': case '|': case '~':
 685         case '@':
 686             return true;
 687         default:
 688             return false;
 689         }
 690     }
 691 
 692     /** Read longest possible sequence of special characters and convert
 693      *  to token.
 694      */
 695     private void scanOperator() {
 696         while (true) {
 697             putChar(ch);
 698             Name newname = names.fromChars(sbuf, 0, sp);
 699             if (keywords.key(newname) == IDENTIFIER) {
 700                 sp--;
 701                 break;
 702             }
 703             name = newname;
 704             token = keywords.key(newname);
 705             scanChar();
 706             if (!isSpecial(ch)) break;
 707         }
 708     }
 709 
 710     /**
 711      * Scan a documention comment; determine if a deprecated tag is present.
 712      * Called once the initial /, * have been skipped, positioned at the second *
 713      * (which is treated as the beginning of the first line).
 714      * Stops positioned at the closing '/'.
 715      */
 716     @SuppressWarnings("fallthrough")
 717     private void scanDocComment() {
 718         boolean deprecatedPrefix = false;
 719 
 720         forEachLine:
 721         while (bp < buflen) {
 722 
 723             // Skip optional WhiteSpace at beginning of line
 724             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
 725                 scanCommentChar();
 726             }
 727 
 728             // Skip optional consecutive Stars
 729             while (bp < buflen && ch == '*') {
 730                 scanCommentChar();
 731                 if (ch == '/') {
 732                     return;
 733                 }
 734             }
 735 
 736             // Skip optional WhiteSpace after Stars
 737             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
 738                 scanCommentChar();
 739             }
 740 
 741             deprecatedPrefix = false;
 742             // At beginning of line in the JavaDoc sense.
 743             if (bp < buflen && ch == '@' && !deprecatedFlag) {
 744                 scanCommentChar();
 745                 if (bp < buflen && ch == 'd') {
 746                     scanCommentChar();
 747                     if (bp < buflen && ch == 'e') {
 748                         scanCommentChar();
 749                         if (bp < buflen && ch == 'p') {
 750                             scanCommentChar();
 751                             if (bp < buflen && ch == 'r') {
 752                                 scanCommentChar();
 753                                 if (bp < buflen && ch == 'e') {
 754                                     scanCommentChar();
 755                                     if (bp < buflen && ch == 'c') {
 756                                         scanCommentChar();
 757                                         if (bp < buflen && ch == 'a') {
 758                                             scanCommentChar();
 759                                             if (bp < buflen && ch == 't') {
 760                                                 scanCommentChar();
 761                                                 if (bp < buflen && ch == 'e') {
 762                                                     scanCommentChar();
 763                                                     if (bp < buflen && ch == 'd') {
 764                                                         deprecatedPrefix = true;
 765                                                         scanCommentChar();
 766                                                     }}}}}}}}}}}
 767             if (deprecatedPrefix && bp < buflen) {
 768                 if (Character.isWhitespace(ch)) {
 769                     deprecatedFlag = true;
 770                 } else if (ch == '*') {
 771                     scanCommentChar();
 772                     if (ch == '/') {
 773                         deprecatedFlag = true;
 774                         return;
 775                     }
 776                 }
 777             }
 778 
 779             // Skip rest of line
 780             while (bp < buflen) {
 781                 switch (ch) {
 782                 case '*':
 783                     scanCommentChar();
 784                     if (ch == '/') {
 785                         return;
 786                     }
 787                     break;
 788                 case CR: // (Spec 3.4)
 789                     scanCommentChar();
 790                     if (ch != LF) {
 791                         continue forEachLine;
 792                     }
 793                     /* fall through to LF case */
 794                 case LF: // (Spec 3.4)
 795                     scanCommentChar();
 796                     continue forEachLine;
 797                 default:
 798                     scanCommentChar();
 799                 }
 800             } // rest of line
 801         } // forEachLine
 802         return;
 803     }
 804 
 805     /** The value of a literal token, recorded as a string.
 806      *  For integers, leading 0x and 'l' suffixes are suppressed.
 807      */
 808     public String stringVal() {
 809         return new String(sbuf, 0, sp);
 810     }
 811 
 812     /** Read token.
 813      */
 814     public void nextToken() {
 815 
 816         try {
 817             prevEndPos = endPos;
 818             sp = 0;
 819 
 820             while (true) {
 821                 pos = bp;
 822                 switch (ch) {
 823                 case ' ': // (Spec 3.6)
 824                 case '\t': // (Spec 3.6)
 825                 case FF: // (Spec 3.6)
 826                     do {
 827                         scanChar();
 828                     } while (ch == ' ' || ch == '\t' || ch == FF);
 829                     endPos = bp;
 830                     processWhiteSpace();
 831                     break;
 832                 case LF: // (Spec 3.4)
 833                     scanChar();
 834                     endPos = bp;
 835                     processLineTerminator();
 836                     break;
 837                 case CR: // (Spec 3.4)
 838                     scanChar();
 839                     if (ch == LF) {
 840                         scanChar();
 841                     }
 842                     endPos = bp;
 843                     processLineTerminator();
 844                     break;
 845                 case 'A': case 'B': case 'C': case 'D': case 'E':
 846                 case 'F': case 'G': case 'H': case 'I': case 'J':
 847                 case 'K': case 'L': case 'M': case 'N': case 'O':
 848                 case 'P': case 'Q': case 'R': case 'S': case 'T':
 849                 case 'U': case 'V': case 'W': case 'X': case 'Y':
 850                 case 'Z':
 851                 case 'a': case 'b': case 'c': case 'd': case 'e':
 852                 case 'f': case 'g': case 'h': case 'i': case 'j':
 853                 case 'k': case 'l': case 'm': case 'n': case 'o':
 854                 case 'p': case 'q': case 'r': case 's': case 't':
 855                 case 'u': case 'v': case 'w': case 'x': case 'y':
 856                 case 'z':
 857                 case '$': case '_':
 858                     scanIdent();
 859                     return;
 860                 case '0':
 861                     scanChar();
 862                     if (ch == 'x' || ch == 'X') {
 863                         scanChar();
 864                         skipIllegalUnderscores();
 865                         if (ch == '.') {
 866                             scanHexFractionAndSuffix(false);
 867                         } else if (digit(16) < 0) {
 868                             lexError("invalid.hex.number");
 869                         } else {
 870                             scanNumber(16);
 871                         }
 872                     } else if (ch == 'b' || ch == 'B') {
 873                         if (!allowBinaryLiterals) {
 874                             lexError("unsupported.binary.lit", source.name);
 875                             allowBinaryLiterals = true;
 876                         }
 877                         scanChar();
 878                         skipIllegalUnderscores();
 879                         if (digit(2) < 0) {
 880                             lexError("invalid.binary.number");
 881                         } else {
 882                             scanNumber(2);
 883                         }
 884                     } else {
 885                         putChar('0');
 886                         if (ch == '_') {
 887                             int savePos = bp;
 888                             do {
 889                                 scanChar();
 890                             } while (ch == '_');
 891                             if (digit(10) < 0) {
 892                                 lexError(savePos, "illegal.underscore");
 893                             }
 894                         }
 895                         scanNumber(8);
 896                     }
 897                     return;
 898                 case '1': case '2': case '3': case '4':
 899                 case '5': case '6': case '7': case '8': case '9':
 900                     scanNumber(10);
 901                     return;
 902                 case '.':
 903                     scanChar();
 904                     if ('0' <= ch && ch <= '9') {
 905                         putChar('.');
 906                         scanFractionAndSuffix();
 907                     } else if (ch == '.') {
 908                         putChar('.'); putChar('.');
 909                         scanChar();
 910                         if (ch == '.') {
 911                             scanChar();
 912                             putChar('.');
 913                             token = ELLIPSIS;
 914                         } else {
 915                             lexError("malformed.fp.lit");
 916                         }
 917                     } else {
 918                         token = DOT;
 919                     }
 920                     return;
 921                 case ',':
 922                     scanChar(); token = COMMA; return;
 923                 case ';':
 924                     scanChar(); token = SEMI; return;
 925                 case '(':
 926                     scanChar(); token = LPAREN; return;
 927                 case ')':
 928                     scanChar(); token = RPAREN; return;
 929                 case '[':
 930                     scanChar(); token = LBRACKET; return;
 931                 case ']':
 932                     scanChar(); token = RBRACKET; return;
 933                 case '{':
 934                     scanChar(); token = LBRACE; return;
 935                 case '}':
 936                     scanChar(); token = RBRACE; return;
 937                 case '/':
 938                     scanChar();
 939                     if (ch == '/') {
 940                         do {
 941                             scanCommentChar();
 942                         } while (ch != CR && ch != LF && bp < buflen);
 943                         if (bp < buflen) {
 944                             endPos = bp;
 945                             processComment(CommentStyle.LINE);
 946                         }
 947                         break;
 948                     } else if (ch == '*') {
 949                         scanChar();
 950                         CommentStyle style;
 951                         if (ch == '*') {
 952                             style = CommentStyle.JAVADOC;
 953                             scanDocComment();
 954                         } else {
 955                             style = CommentStyle.BLOCK;
 956                             while (bp < buflen) {
 957                                 if (ch == '*') {
 958                                     scanChar();
 959                                     if (ch == '/') break;
 960                                 } else {
 961                                     scanCommentChar();
 962                                 }
 963                             }
 964                         }
 965                         if (ch == '/') {
 966                             scanChar();
 967                             endPos = bp;
 968                             processComment(style);
 969                             break;
 970                         } else {
 971                             lexError("unclosed.comment");
 972                             return;
 973                         }
 974                     } else if (ch == '=') {
 975                         name = names.slashequals;
 976                         token = SLASHEQ;
 977                         scanChar();
 978                     } else {
 979                         name = names.slash;
 980                         token = SLASH;
 981                     }
 982                     return;
 983                 case '\'':
 984                     scanChar();
 985                     if (ch == '\'') {
 986                         lexError("empty.char.lit");
 987                     } else {
 988                         if (ch == CR || ch == LF)
 989                             lexError(pos, "illegal.line.end.in.char.lit");
 990                         scanLitChar();
 991                         if (ch == '\'') {
 992                             scanChar();
 993                             token = CHARLITERAL;
 994                         } else {
 995                             lexError(pos, "unclosed.char.lit");
 996                         }
 997                     }
 998                     return;
 999                 case '\"':
1000                     scanChar();
1001                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
1002                         scanLitChar();
1003                     if (ch == '\"') {
1004                         token = STRINGLITERAL;
1005                         scanChar();
1006                     } else {
1007                         lexError(pos, "unclosed.str.lit");
1008                     }
1009                     return;
1010                 case '#':
1011                     scanChar();
1012                     if (ch == '\"') {
1013                         scanChar();
1014                         if (ch == '\"')
1015                             lexError(pos, "empty.bytecode.ident");
1016                         while (ch != '\"' && ch != CR && ch != LF && bp < buflen) {
1017                             scanBytecodeNameChar();
1018                         }
1019                         if (ch == '\"') {
1020                             name = names.fromChars(sbuf, 0, sp);
1021                             token = IDENTIFIER;  // even if #"int" or #"do"
1022                             scanChar();
1023                         } else {
1024                             lexError(pos, "unclosed.bytecode.ident");
1025                         }
1026                     } else {
1027                         lexError("illegal.char", String.valueOf((int)'#'));
1028                     }
1029                     return;
1030                 default:
1031                     if (isSpecial(ch)) {
1032                         scanOperator();
1033                     } else {
1034                         boolean isJavaIdentifierStart;
1035                         if (ch < '\u0080') {
1036                             // all ASCII range chars already handled, above
1037                             isJavaIdentifierStart = false;
1038                         } else {
1039                             char high = scanSurrogates();
1040                             if (high != 0) {
1041                                 if (sp == sbuf.length) {
1042                                     putChar(high);
1043                                 } else {
1044                                     sbuf[sp++] = high;
1045                                 }
1046 
1047                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
1048                                     Character.toCodePoint(high, ch));
1049                             } else {
1050                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
1051                             }
1052                         }
1053                         if (isJavaIdentifierStart) {
1054                             scanIdent();
1055                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
1056                             token = EOF;
1057                             pos = bp = eofPos;
1058                         } else {
1059                             lexError("illegal.char", String.valueOf((int)ch));
1060                             scanChar();
1061                         }
1062                     }
1063                     return;
1064                 }
1065             }
1066         } finally {
1067             endPos = bp;
1068             if (scannerDebug)
1069                 System.out.println("nextToken(" + pos
1070                                    + "," + endPos + ")=|" +
1071                                    new String(getRawCharacters(pos, endPos))
1072                                    + "|");
1073         }
1074     }
1075 
1076     /** Return the current token, set by nextToken().
1077      */
1078     public Token token() {
1079         return token;
1080     }
1081 
1082     /** Sets the current token.
1083      */
1084     public void token(Token token) {
1085         this.token = token;
1086     }
1087 
1088     /** Return the current token's position: a 0-based
1089      *  offset from beginning of the raw input stream
1090      *  (before unicode translation)
1091      */
1092     public int pos() {
1093         return pos;
1094     }
1095 
1096     /** Return the last character position of the current token.
1097      */
1098     public int endPos() {
1099         return endPos;
1100     }
1101 
1102     /** Return the last character position of the previous token.
1103      */
1104     public int prevEndPos() {
1105         return prevEndPos;
1106     }
1107 
1108     /** Return the position where a lexical error occurred;
1109      */
1110     public int errPos() {
1111         return errPos;
1112     }
1113 
1114     /** Set the position where a lexical error occurred;
1115      */
1116     public void errPos(int pos) {
1117         errPos = pos;
1118     }
1119 
1120     /** Return the name of an identifier or token for the current token.
1121      */
1122     public Name name() {
1123         return name;
1124     }
1125 
1126     /** Return the radix of a numeric literal token.
1127      */
1128     public int radix() {
1129         return radix;
1130     }
1131 
1132     /** Has a @deprecated been encountered in last doc comment?
1133      *  This needs to be reset by client with resetDeprecatedFlag.
1134      */
1135     public boolean deprecatedFlag() {
1136         return deprecatedFlag;
1137     }
1138 
1139     public void resetDeprecatedFlag() {
1140         deprecatedFlag = false;
1141     }
1142 
1143     /**
1144      * Returns the documentation string of the current token.
1145      */
1146     public String docComment() {
1147         return null;
1148     }
1149 
1150     /**
1151      * Returns a copy of the input buffer, up to its inputLength.
1152      * Unicode escape sequences are not translated.
1153      */
1154     public char[] getRawCharacters() {
1155         char[] chars = new char[buflen];
1156         System.arraycopy(buf, 0, chars, 0, buflen);
1157         return chars;
1158     }
1159 
1160     /**
1161      * Returns a copy of a character array subset of the input buffer.
1162      * The returned array begins at the <code>beginIndex</code> and
1163      * extends to the character at index <code>endIndex - 1</code>.
1164      * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1165      * This behavior is like
1166      * <code>String.substring(beginIndex, endIndex)</code>.
1167      * Unicode escape sequences are not translated.
1168      *
1169      * @param beginIndex the beginning index, inclusive.
1170      * @param endIndex the ending index, exclusive.
1171      * @throws IndexOutOfBounds if either offset is outside of the
1172      *         array bounds
1173      */
1174     public char[] getRawCharacters(int beginIndex, int endIndex) {
1175         int length = endIndex - beginIndex;
1176         char[] chars = new char[length];
1177         System.arraycopy(buf, beginIndex, chars, 0, length);
1178         return chars;
1179     }
1180 
1181     public enum CommentStyle {
1182         LINE,
1183         BLOCK,
1184         JAVADOC,
1185     }
1186 
1187     /**
1188      * Called when a complete comment has been scanned. pos and endPos
1189      * will mark the comment boundary.
1190      */
1191     protected void processComment(CommentStyle style) {
1192         if (scannerDebug)
1193             System.out.println("processComment(" + pos
1194                                + "," + endPos + "," + style + ")=|"
1195                                + new String(getRawCharacters(pos, endPos))
1196                                + "|");
1197     }
1198 
1199     /**
1200      * Called when a complete whitespace run has been scanned. pos and endPos
1201      * will mark the whitespace boundary.
1202      */
1203     protected void processWhiteSpace() {
1204         if (scannerDebug)
1205             System.out.println("processWhitespace(" + pos
1206                                + "," + endPos + ")=|" +
1207                                new String(getRawCharacters(pos, endPos))
1208                                + "|");
1209     }
1210 
1211     /**
1212      * Called when a line terminator has been processed.
1213      */
1214     protected void processLineTerminator() {
1215         if (scannerDebug)
1216             System.out.println("processTerminator(" + pos
1217                                + "," + endPos + ")=|" +
1218                                new String(getRawCharacters(pos, endPos))
1219                                + "|");
1220     }
1221 
1222     /** Build a map for translating between line numbers and
1223      * positions in the input.
1224      *
1225      * @return a LineMap */
1226     public Position.LineMap getLineMap() {
1227         return Position.makeLineMap(buf, buflen, false);
1228     }
1229 
1230 }