1 /*
   2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import com.sun.tools.javac.code.Preview;
  29 import com.sun.tools.javac.code.Source;
  30 import com.sun.tools.javac.code.Source.Feature;
  31 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  32 import com.sun.tools.javac.resources.CompilerProperties.Errors;
  33 import com.sun.tools.javac.util.*;
  34 import com.sun.tools.javac.util.JCDiagnostic.DiagnosticFlag;
  35 
  36 import java.nio.CharBuffer;
  37 
  38 import static com.sun.tools.javac.parser.Tokens.*;
  39 import static com.sun.tools.javac.util.LayoutCharacters.*;
  40 
  41 /** The lexical analyzer maps an input stream consisting of
  42  *  ASCII characters and Unicode escapes into a token sequence.
  43  *
  44  *  <p><b>This is NOT part of any supported API.
  45  *  If you write code that depends on this, you do so at your own risk.
  46  *  This code and its internal interfaces are subject to change or
  47  *  deletion without notice.</b>
  48  */
  49 public class JavaTokenizer {
  50 
  51     private static final boolean scannerDebug = false;
  52 
  53     /** The source language setting.
  54      */
  55     private Source source;
  56 
  57     /** The preview language setting. */
  58     private Preview preview;
  59 
  60     /** The log to be used for error reporting.
  61      */
  62     private final Log log;
  63 
  64     /** The token factory. */
  65     private final Tokens tokens;
  66 
  67     /** The token kind, set by nextToken().
  68      */
  69     protected TokenKind tk;
  70 
  71     /** The token's radix, set by nextToken().
  72      */
  73     protected int radix;
  74 
  75     /** The token's name, set by nextToken().
  76      */
  77     protected Name name;
  78 
  79     /** The position where a lexical error occurred;
  80      */
  81     protected int errPos = Position.NOPOS;
  82 
  83     /** The Unicode reader (low-level stream reader).
  84      */
  85     protected UnicodeReader reader;
  86 
  87     protected ScannerFactory fac;
  88 
  89     private static final boolean hexFloatsWork = hexFloatsWork();
  90     private static boolean hexFloatsWork() {
  91         try {
  92             Float.valueOf("0x1.0p1");
  93             return true;
  94         } catch (NumberFormatException ex) {
  95             return false;
  96         }
  97     }
  98 
  99     /**
 100      * Create a scanner from the input array.  This method might
 101      * modify the array.  To avoid copying the input array, ensure
 102      * that {@code inputLength < input.length} or
 103      * {@code input[input.length -1]} is a white space character.
 104      *
 105      * @param fac the factory which created this Scanner
 106      * @param buf the input, might be modified
 107      * Must be positive and less than or equal to input.length.
 108      */
 109     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
 110         this(fac, new UnicodeReader(fac, buf));
 111     }
 112 
 113     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
 114         this(fac, new UnicodeReader(fac, buf, inputLength));
 115     }
 116 
 117     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
 118         this.fac = fac;
 119         this.log = fac.log;
 120         this.tokens = fac.tokens;
 121         this.source = fac.source;
 122         this.preview = fac.preview;
 123         this.reader = reader;
 124     }
 125 
 126     protected void checkSourceLevel(int pos, Feature feature) {
 127         if (preview.isPreview(feature) && !preview.isEnabled()) {
 128             //preview feature without --preview flag, error
 129             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 130         } else if (!feature.allowedInSource(source)) {
 131             //incompatible source level, error
 132             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 133         } else if (preview.isPreview(feature)) {
 134             //use of preview feature, warn
 135             preview.warnPreview(pos, feature);
 136         }
 137     }
 138 
 139     /** Report an error at the given position using the provided arguments.
 140      */
 141     protected void lexError(int pos, JCDiagnostic.Error key) {
 142         log.error(pos, key);
 143         tk = TokenKind.ERROR;
 144         errPos = pos;
 145     }
 146 
 147     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 148         log.error(flags, pos, key);
 149         tk = TokenKind.ERROR;
 150         errPos = pos;
 151     }
 152 
 153     /** Read next character in character or string literal and copy into sbuf.
 154      */
 155     private void scanLitChar(int pos) {
 156         if (reader.ch == '\\') {
 157             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 158                 reader.skipChar();
 159                 reader.putChar('\\', true);
 160             } else {
 161                 reader.scanChar();
 162                 switch (reader.ch) {
 163                 case '0': case '1': case '2': case '3':
 164                 case '4': case '5': case '6': case '7':
 165                     char leadch = reader.ch;
 166                     int oct = reader.digit(pos, 8);
 167                     reader.scanChar();
 168                     if ('0' <= reader.ch && reader.ch <= '7') {
 169                         oct = oct * 8 + reader.digit(pos, 8);
 170                         reader.scanChar();
 171                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 172                             oct = oct * 8 + reader.digit(pos, 8);
 173                             reader.scanChar();
 174                         }
 175                     }
 176                     reader.putChar((char)oct);
 177                     break;
 178                 case 'b':
 179                     reader.putChar('\b', true); break;
 180                 case 't':
 181                     reader.putChar('\t', true); break;
 182                 case 'n':
 183                     reader.putChar('\n', true); break;
 184                 case 'f':
 185                     reader.putChar('\f', true); break;
 186                 case 'r':
 187                     reader.putChar('\r', true); break;
 188                 case '\'':
 189                     reader.putChar('\'', true); break;
 190                 case '\"':
 191                     reader.putChar('\"', true); break;
 192                 case '\\':
 193                     reader.putChar('\\', true); break;
 194                 default:
 195                     lexError(reader.bp, Errors.IllegalEscChar);
 196                 }
 197             }
 198         } else if (reader.bp != reader.buflen) {
 199             reader.putChar(true);
 200         }
 201     }
 202 
 203     private void scanDigits(int pos, int digitRadix) {
 204         char saveCh;
 205         int savePos;
 206         do {
 207             if (reader.ch != '_') {
 208                 reader.putChar(false);
 209             } else {
 210                 checkSourceLevel(pos, Feature.UNDERSCORES_IN_LITERALS);
 211             }
 212             saveCh = reader.ch;
 213             savePos = reader.bp;
 214             reader.scanChar();
 215         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
 216         if (saveCh == '_')
 217             lexError(savePos, Errors.IllegalUnderscore);
 218     }
 219 
 220     /** Read fractional part of hexadecimal floating point number.
 221      */
 222     private void scanHexExponentAndSuffix(int pos) {
 223         if (reader.ch == 'p' || reader.ch == 'P') {
 224             reader.putChar(true);
 225             skipIllegalUnderscores();
 226             if (reader.ch == '+' || reader.ch == '-') {
 227                 reader.putChar(true);
 228             }
 229             skipIllegalUnderscores();
 230             if (reader.digit(pos, 10) >= 0) {
 231                 scanDigits(pos, 10);
 232                 if (!hexFloatsWork)
 233                     lexError(pos, Errors.UnsupportedCrossFpLit);
 234             } else
 235                 lexError(pos, Errors.MalformedFpLit);
 236         } else {
 237             lexError(pos, Errors.MalformedFpLit);
 238         }
 239         if (reader.ch == 'f' || reader.ch == 'F') {
 240             reader.putChar(true);
 241             tk = TokenKind.FLOATLITERAL;
 242             radix = 16;
 243         } else {
 244             if (reader.ch == 'd' || reader.ch == 'D') {
 245                 reader.putChar(true);
 246             }
 247             tk = TokenKind.DOUBLELITERAL;
 248             radix = 16;
 249         }
 250     }
 251 
 252     /** Read fractional part of floating point number.
 253      */
 254     private void scanFraction(int pos) {
 255         skipIllegalUnderscores();
 256         if (reader.digit(pos, 10) >= 0) {
 257             scanDigits(pos, 10);
 258         }
 259         int sp1 = reader.sp;
 260         if (reader.ch == 'e' || reader.ch == 'E') {
 261             reader.putChar(true);
 262             skipIllegalUnderscores();
 263             if (reader.ch == '+' || reader.ch == '-') {
 264                 reader.putChar(true);
 265             }
 266             skipIllegalUnderscores();
 267             if (reader.digit(pos, 10) >= 0) {
 268                 scanDigits(pos, 10);
 269                 return;
 270             }
 271             lexError(pos, Errors.MalformedFpLit);
 272             reader.sp = sp1;
 273         }
 274     }
 275 
 276     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 277      */
 278     private void scanFractionAndSuffix(int pos) {
 279         radix = 10;
 280         scanFraction(pos);
 281         if (reader.ch == 'f' || reader.ch == 'F') {
 282             reader.putChar(true);
 283             tk = TokenKind.FLOATLITERAL;
 284         } else {
 285             if (reader.ch == 'd' || reader.ch == 'D') {
 286                 reader.putChar(true);
 287             }
 288             tk = TokenKind.DOUBLELITERAL;
 289         }
 290     }
 291 
 292     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 293      */
 294     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
 295         radix = 16;
 296         Assert.check(reader.ch == '.');
 297         reader.putChar(true);
 298         skipIllegalUnderscores();
 299         if (reader.digit(pos, 16) >= 0) {
 300             seendigit = true;
 301             scanDigits(pos, 16);
 302         }
 303         if (!seendigit)
 304             lexError(pos, Errors.InvalidHexNumber);
 305         else
 306             scanHexExponentAndSuffix(pos);
 307     }
 308 
 309     private void skipIllegalUnderscores() {
 310         if (reader.ch == '_') {
 311             lexError(reader.bp, Errors.IllegalUnderscore);
 312             while (reader.ch == '_')
 313                 reader.scanChar();
 314         }
 315     }
 316 
 317     /** Read a number.
 318      *  @param radix  The radix of the number; one of 2, 8, 10, 16.
 319      */
 320     private void scanNumber(int pos, int radix) {
 321         // for octal, allow base-10 digit in case it's a float literal
 322         this.radix = radix;
 323         int digitRadix = (radix == 8 ? 10 : radix);
 324         int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
 325         boolean seendigit = firstDigit >= 0;
 326         boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
 327         if (seendigit) {
 328             scanDigits(pos, digitRadix);
 329         }
 330         if (radix == 16 && reader.ch == '.') {
 331             scanHexFractionAndSuffix(pos, seendigit);
 332         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
 333             scanHexExponentAndSuffix(pos);
 334         } else if (digitRadix == 10 && reader.ch == '.') {
 335             reader.putChar(true);
 336             scanFractionAndSuffix(pos);
 337         } else if (digitRadix == 10 &&
 338                    (reader.ch == 'e' || reader.ch == 'E' ||
 339                     reader.ch == 'f' || reader.ch == 'F' ||
 340                     reader.ch == 'd' || reader.ch == 'D')) {
 341             scanFractionAndSuffix(pos);
 342         } else {
 343             if (!seenValidDigit) {
 344                 switch (radix) {
 345                 case 2:
 346                     lexError(pos, Errors.InvalidBinaryNumber);
 347                     break;
 348                 case 16:
 349                     lexError(pos, Errors.InvalidHexNumber);
 350                     break;
 351                 }
 352             }
 353             if (reader.ch == 'l' || reader.ch == 'L') {
 354                 reader.scanChar();
 355                 tk = TokenKind.LONGLITERAL;
 356             } else {
 357                 tk = TokenKind.INTLITERAL;
 358             }
 359         }
 360     }
 361 
 362     /** Read an identifier.
 363      */
 364     private void scanIdent() {
 365         boolean isJavaIdentifierPart;
 366         char high;
 367         reader.putChar(true);
 368         do {
 369             switch (reader.ch) {
 370             case 'A': case 'B': case 'C': case 'D': case 'E':
 371             case 'F': case 'G': case 'H': case 'I': case 'J':
 372             case 'K': case 'L': case 'M': case 'N': case 'O':
 373             case 'P': case 'Q': case 'R': case 'S': case 'T':
 374             case 'U': case 'V': case 'W': case 'X': case 'Y':
 375             case 'Z':
 376             case 'a': case 'b': case 'c': case 'd': case 'e':
 377             case 'f': case 'g': case 'h': case 'i': case 'j':
 378             case 'k': case 'l': case 'm': case 'n': case 'o':
 379             case 'p': case 'q': case 'r': case 's': case 't':
 380             case 'u': case 'v': case 'w': case 'x': case 'y':
 381             case 'z':
 382             case '$': case '_':
 383             case '0': case '1': case '2': case '3': case '4':
 384             case '5': case '6': case '7': case '8': case '9':
 385                 break;
 386             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
 387             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
 388             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
 389             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
 390             case '\u0015': case '\u0016': case '\u0017':
 391             case '\u0018': case '\u0019': case '\u001B':
 392             case '\u007F':
 393                 reader.scanChar();
 394                 continue;
 395             case '\u001A': // EOI is also a legal identifier part
 396                 if (reader.bp >= reader.buflen) {
 397                     name = reader.name();
 398                     tk = tokens.lookupKind(name);
 399                     return;
 400                 }
 401                 reader.scanChar();
 402                 continue;
 403             default:
 404                 if (reader.ch < '\u0080') {
 405                     // all ASCII range chars already handled, above
 406                     isJavaIdentifierPart = false;
 407                 } else {
 408                     if (Character.isIdentifierIgnorable(reader.ch)) {
 409                         reader.scanChar();
 410                         continue;
 411                     } else {
 412                         int codePoint = reader.peekSurrogates();
 413                         if (codePoint >= 0) {
 414                             if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
 415                                 reader.putChar(true);
 416                             }
 417                         } else {
 418                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
 419                         }
 420                     }
 421                 }
 422                 if (!isJavaIdentifierPart) {
 423                     name = reader.name();
 424                     tk = tokens.lookupKind(name);
 425                     return;
 426                 }
 427             }
 428             reader.putChar(true);
 429         } while (true);
 430     }
 431 
 432     /** Return true if reader.ch can be part of an operator.
 433      */
 434     private boolean isSpecial(char ch) {
 435         switch (ch) {
 436         case '!': case '%': case '&': case '*': case '?':
 437         case '+': case '-': case ':': case '<': case '=':
 438         case '>': case '^': case '|': case '~':
 439         case '@':
 440             return true;
 441         default:
 442             return false;
 443         }
 444     }
 445 
 446     /** Read longest possible sequence of special characters and convert
 447      *  to token.
 448      */
 449     private void scanOperator() {
 450         while (true) {
 451             reader.putChar(false);
 452             Name newname = reader.name();
 453             TokenKind tk1 = tokens.lookupKind(newname);
 454             if (tk1 == TokenKind.IDENTIFIER) {
 455                 reader.sp--;
 456                 break;
 457             }
 458             tk = tk1;
 459             reader.scanChar();
 460             if (!isSpecial(reader.ch)) break;
 461         }
 462     }
 463 
 464     /** Read token.
 465      */
 466     public Token readToken() {
 467 
 468         reader.sp = 0;
 469         name = null;
 470         radix = 0;
 471 
 472         int pos = 0;
 473         int endPos = 0;
 474         List<Comment> comments = null;
 475 
 476         try {
 477             loop: while (true) {
 478                 pos = reader.bp;
 479                 switch (reader.ch) {
 480                 case ' ': // (Spec 3.6)
 481                 case '\t': // (Spec 3.6)
 482                 case FF: // (Spec 3.6)
 483                     do {
 484                         reader.scanChar();
 485                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
 486                     processWhiteSpace(pos, reader.bp);
 487                     break;
 488                 case LF: // (Spec 3.4)
 489                     reader.scanChar();
 490                     processLineTerminator(pos, reader.bp);
 491                     break;
 492                 case CR: // (Spec 3.4)
 493                     reader.scanChar();
 494                     if (reader.ch == LF) {
 495                         reader.scanChar();
 496                     }
 497                     processLineTerminator(pos, reader.bp);
 498                     break;
 499                 case 'A': case 'B': case 'C': case 'D': case 'E':
 500                 case 'F': case 'G': case 'H': case 'I': case 'J':
 501                 case 'K': case 'L': case 'M': case 'N': case 'O':
 502                 case 'P': case 'Q': case 'R': case 'S': case 'T':
 503                 case 'U': case 'V': case 'W': case 'X': case 'Y':
 504                 case 'Z':
 505                 case 'a': case 'b': case 'c': case 'd': case 'e':
 506                 case 'f': case 'g': case 'h': case 'i': case 'j':
 507                 case 'k': case 'l': case 'm': case 'n': case 'o':
 508                 case 'p': case 'q': case 'r': case 's': case 't':
 509                 case 'u': case 'v': case 'w': case 'x': case 'y':
 510                 case 'z':
 511                 case '$': case '_':
 512                     scanIdent();
 513                     break loop;
 514                 case '0':
 515                     reader.scanChar();
 516                     if (reader.ch == 'x' || reader.ch == 'X') {
 517                         reader.scanChar();
 518                         skipIllegalUnderscores();
 519                         scanNumber(pos, 16);
 520                     } else if (reader.ch == 'b' || reader.ch == 'B') {
 521                         checkSourceLevel(pos, Feature.BINARY_LITERALS);
 522                         reader.scanChar();
 523                         skipIllegalUnderscores();
 524                         scanNumber(pos, 2);
 525                     } else {
 526                         reader.putChar('0');
 527                         if (reader.ch == '_') {
 528                             int savePos = reader.bp;
 529                             do {
 530                                 reader.scanChar();
 531                             } while (reader.ch == '_');
 532                             if (reader.digit(pos, 10) < 0) {
 533                                 lexError(savePos, Errors.IllegalUnderscore);
 534                             }
 535                         }
 536                         scanNumber(pos, 8);
 537                     }
 538                     break loop;
 539                 case '1': case '2': case '3': case '4':
 540                 case '5': case '6': case '7': case '8': case '9':
 541                     scanNumber(pos, 10);
 542                     break loop;
 543                 case '.':
 544                     reader.scanChar();
 545                     if (reader.digit(pos, 10) >= 0) {
 546                         reader.putChar('.');
 547                         scanFractionAndSuffix(pos);
 548                     } else if (reader.ch == '.') {
 549                         int savePos = reader.bp;
 550                         reader.putChar('.'); reader.putChar('.', true);
 551                         if (reader.ch == '.') {
 552                             reader.scanChar();
 553                             reader.putChar('.');
 554                             tk = TokenKind.ELLIPSIS;
 555                         } else {
 556                             lexError(savePos, Errors.IllegalDot);
 557                         }
 558                     } else {
 559                         tk = TokenKind.DOT;
 560                     }
 561                     break loop;
 562                 case ',':
 563                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
 564                 case ';':
 565                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
 566                 case '(':
 567                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
 568                 case ')':
 569                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
 570                 case '[':
 571                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
 572                 case ']':
 573                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
 574                 case '{':
 575                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
 576                 case '}':
 577                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;
 578                 case '/':
 579                     reader.scanChar();
 580                     if (reader.ch == '/') {
 581                         do {
 582                             reader.scanCommentChar();
 583                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
 584                         if (reader.bp < reader.buflen) {
 585                             comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
 586                         }
 587                         break;
 588                     } else if (reader.ch == '*') {
 589                         boolean isEmpty = false;
 590                         reader.scanChar();
 591                         CommentStyle style;
 592                         if (reader.ch == '*') {
 593                             style = CommentStyle.JAVADOC;
 594                             reader.scanCommentChar();
 595                             if (reader.ch == '/') {
 596                                 isEmpty = true;
 597                             }
 598                         } else {
 599                             style = CommentStyle.BLOCK;
 600                         }
 601                         while (!isEmpty && reader.bp < reader.buflen) {
 602                             if (reader.ch == '*') {
 603                                 reader.scanChar();
 604                                 if (reader.ch == '/') break;
 605                             } else {
 606                                 reader.scanCommentChar();
 607                             }
 608                         }
 609                         if (reader.ch == '/') {
 610                             reader.scanChar();
 611                             comments = addComment(comments, processComment(pos, reader.bp, style));
 612                             break;
 613                         } else {
 614                             lexError(pos, Errors.UnclosedComment);
 615                             break loop;
 616                         }
 617                     } else if (reader.ch == '=') {
 618                         tk = TokenKind.SLASHEQ;
 619                         reader.scanChar();
 620                     } else {
 621                         tk = TokenKind.SLASH;
 622                     }
 623                     break loop;
 624                 case '\'':
 625                     reader.scanChar();
 626                     if (reader.ch == '\'') {
 627                         lexError(pos, Errors.EmptyCharLit);
 628                         reader.scanChar();
 629                     } else {
 630                         if (reader.ch == CR || reader.ch == LF)
 631                             lexError(pos, Errors.IllegalLineEndInCharLit);
 632                         scanLitChar(pos);
 633                         if (reader.ch == '\'') {
 634                             reader.scanChar();
 635                             tk = TokenKind.CHARLITERAL;
 636                         } else {
 637                             lexError(pos, Errors.UnclosedCharLit);
 638                         }
 639                     }
 640                     break loop;
 641                 case '\"':
 642                     reader.scanChar();
 643                     while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
 644                         scanLitChar(pos);
 645                     if (reader.ch == '\"') {
 646                         tk = TokenKind.STRINGLITERAL;
 647                         reader.scanChar();
 648                     } else {
 649                         lexError(pos, Errors.UnclosedStrLit);
 650                     }
 651                     break loop;
 652                 default:
 653                     if (isSpecial(reader.ch)) {
 654                         scanOperator();
 655                     } else {
 656                         boolean isJavaIdentifierStart;
 657                         int codePoint = -1;
 658                         if (reader.ch < '\u0080') {
 659                             // all ASCII range chars already handled, above
 660                             isJavaIdentifierStart = false;
 661                         } else {
 662                             codePoint = reader.peekSurrogates();
 663                             if (codePoint >= 0) {
 664                                 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
 665                                     reader.putChar(true);
 666                                 }
 667                             } else {
 668                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
 669                             }
 670                         }
 671                         if (isJavaIdentifierStart) {
 672                             scanIdent();
 673                         } else if (reader.digit(pos, 10) >= 0) {
 674                             scanNumber(pos, 10);
 675                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
 676                             tk = TokenKind.EOF;
 677                             pos = reader.realLength;
 678                         } else {
 679                             String arg;
 680 
 681                             if (codePoint >= 0) {
 682                                 char high = reader.ch;
 683                                 reader.scanChar();
 684                                 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
 685                             } else {
 686                                 arg = (32 < reader.ch && reader.ch < 127) ?
 687                                                 String.format("%s", reader.ch) :
 688                                                 String.format("\\u%04x", (int)reader.ch);
 689                             }
 690                             lexError(pos, Errors.IllegalChar(arg));
 691                             reader.scanChar();
 692                         }
 693                     }
 694                     break loop;
 695                 }
 696             }
 697             endPos = reader.bp;
 698             switch (tk.tag) {
 699                 case DEFAULT: return new Token(tk, pos, endPos, comments);
 700                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
 701                 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
 702                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
 703                 default: throw new AssertionError();
 704             }
 705         }
 706         finally {
 707             if (scannerDebug) {
 708                     System.out.println("nextToken(" + pos
 709                                        + "," + endPos + ")=|" +
 710                                        new String(reader.getRawCharacters(pos, endPos))
 711                                        + "|");
 712             }
 713         }
 714     }
 715     //where
 716         List<Comment> addComment(List<Comment> comments, Comment comment) {
 717             return comments == null ?
 718                     List.of(comment) :
 719                     comments.prepend(comment);
 720         }
 721 
 722     /** Return the position where a lexical error occurred;
 723      */
 724     public int errPos() {
 725         return errPos;
 726     }
 727 
 728     /** Set the position where a lexical error occurred;
 729      */
 730     public void errPos(int pos) {
 731         errPos = pos;
 732     }
 733 
 734     /**
 735      * Called when a complete comment has been scanned. pos and endPos
 736      * will mark the comment boundary.
 737      */
 738     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
 739         if (scannerDebug)
 740             System.out.println("processComment(" + pos
 741                                + "," + endPos + "," + style + ")=|"
 742                                + new String(reader.getRawCharacters(pos, endPos))
 743                                + "|");
 744         char[] buf = reader.getRawCharacters(pos, endPos);
 745         return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
 746     }
 747 
 748     /**
 749      * Called when a complete whitespace run has been scanned. pos and endPos
 750      * will mark the whitespace boundary.
 751      */
 752     protected void processWhiteSpace(int pos, int endPos) {
 753         if (scannerDebug)
 754             System.out.println("processWhitespace(" + pos
 755                                + "," + endPos + ")=|" +
 756                                new String(reader.getRawCharacters(pos, endPos))
 757                                + "|");
 758     }
 759 
 760     /**
 761      * Called when a line terminator has been processed.
 762      */
 763     protected void processLineTerminator(int pos, int endPos) {
 764         if (scannerDebug)
 765             System.out.println("processTerminator(" + pos
 766                                + "," + endPos + ")=|" +
 767                                new String(reader.getRawCharacters(pos, endPos))
 768                                + "|");
 769     }
 770 
 771     /** Build a map for translating between line numbers and
 772      * positions in the input.
 773      *
 774      * @return a LineMap */
 775     public Position.LineMap getLineMap() {
 776         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
 777     }
 778 
 779 
 780     /**
 781     * Scan a documentation comment; determine if a deprecated tag is present.
 782     * Called once the initial /, * have been skipped, positioned at the second *
 783     * (which is treated as the beginning of the first line).
 784     * Stops positioned at the closing '/'.
 785     */
 786     protected static class BasicComment<U extends UnicodeReader> implements Comment {
 787 
 788         CommentStyle cs;
 789         U comment_reader;
 790 
 791         protected boolean deprecatedFlag = false;
 792         protected boolean scanned = false;
 793 
 794         protected BasicComment(U comment_reader, CommentStyle cs) {
 795             this.comment_reader = comment_reader;
 796             this.cs = cs;
 797         }
 798 
 799         public String getText() {
 800             return null;
 801         }
 802 
 803         public int getSourcePos(int pos) {
 804             return -1;
 805         }
 806 
 807         public CommentStyle getStyle() {
 808             return cs;
 809         }
 810 
 811         public boolean isDeprecated() {
 812             if (!scanned && cs == CommentStyle.JAVADOC) {
 813                 scanDocComment();
 814             }
 815             return deprecatedFlag;
 816         }
 817 
 818         @SuppressWarnings("fallthrough")
 819         protected void scanDocComment() {
 820             try {
 821                 boolean deprecatedPrefix = false;
 822 
 823                 comment_reader.bp += 3; // '/**'
 824                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
 825 
 826                 forEachLine:
 827                 while (comment_reader.bp < comment_reader.buflen) {
 828 
 829                     // Skip optional WhiteSpace at beginning of line
 830                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
 831                         comment_reader.scanCommentChar();
 832                     }
 833 
 834                     // Skip optional consecutive Stars
 835                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
 836                         comment_reader.scanCommentChar();
 837                         if (comment_reader.ch == '/') {
 838                             return;
 839                         }
 840                     }
 841 
 842                     // Skip optional WhiteSpace after Stars
 843                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
 844                         comment_reader.scanCommentChar();
 845                     }
 846 
 847                     deprecatedPrefix = false;
 848                     // At beginning of line in the JavaDoc sense.
 849                     if (!deprecatedFlag) {
 850                         String deprecated = "@deprecated";
 851                         int i = 0;
 852                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
 853                             comment_reader.scanCommentChar();
 854                             i++;
 855                             if (i == deprecated.length()) {
 856                                 deprecatedPrefix = true;
 857                                 break;
 858                             }
 859                         }
 860                     }
 861 
 862                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
 863                         if (Character.isWhitespace(comment_reader.ch)) {
 864                             deprecatedFlag = true;
 865                         } else if (comment_reader.ch == '*') {
 866                             comment_reader.scanCommentChar();
 867                             if (comment_reader.ch == '/') {
 868                                 deprecatedFlag = true;
 869                                 return;
 870                             }
 871                         }
 872                     }
 873 
 874                     // Skip rest of line
 875                     while (comment_reader.bp < comment_reader.buflen) {
 876                         switch (comment_reader.ch) {
 877                             case '*':
 878                                 comment_reader.scanCommentChar();
 879                                 if (comment_reader.ch == '/') {
 880                                     return;
 881                                 }
 882                                 break;
 883                             case CR: // (Spec 3.4)
 884                                 comment_reader.scanCommentChar();
 885                                 if (comment_reader.ch != LF) {
 886                                     continue forEachLine;
 887                                 }
 888                             /* fall through to LF case */
 889                             case LF: // (Spec 3.4)
 890                                 comment_reader.scanCommentChar();
 891                                 continue forEachLine;
 892                             default:
 893                                 comment_reader.scanCommentChar();
 894                         }
 895                     } // rest of line
 896                 } // forEachLine
 897                 return;
 898             } finally {
 899                 scanned = true;
 900             }
 901         }
 902     }
 903 }