1 /*
   2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import com.sun.tools.javac.code.Preview;
  29 import com.sun.tools.javac.code.Source;
  30 import com.sun.tools.javac.code.Source.Feature;
  31 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  32 import com.sun.tools.javac.resources.CompilerProperties.Errors;
  33 import com.sun.tools.javac.util.*;
  34 import com.sun.tools.javac.util.JCDiagnostic.DiagnosticFlag;
  35 
  36 import java.nio.CharBuffer;
  37 
  38 import static com.sun.tools.javac.parser.Tokens.*;
  39 import static com.sun.tools.javac.util.LayoutCharacters.*;
  40 
  41 /** The lexical analyzer maps an input stream consisting of
  42  *  ASCII characters and Unicode escapes into a token sequence.
  43  *
  44  *  <p><b>This is NOT part of any supported API.
  45  *  If you write code that depends on this, you do so at your own risk.
  46  *  This code and its internal interfaces are subject to change or
  47  *  deletion without notice.</b>
  48  */
  49 public class JavaTokenizer {
  50 
  51     private static final boolean scannerDebug = false;
  52 
  53     /** The source language setting.
  54      */
  55     private Source source;
  56 
  57     /** The preview language setting. */
  58     private Preview preview;
  59 
  60     /** The log to be used for error reporting.
  61      */
  62     private final Log log;
  63 
  64     /** The token factory. */
  65     private final Tokens tokens;
  66 
  67     /** The token kind, set by nextToken().
  68      */
  69     protected TokenKind tk;
  70 
  71     /** The token's radix, set by nextToken().
  72      */
  73     protected int radix;
  74 
  75     /** The token's name, set by nextToken().
  76      */
  77     protected Name name;
  78 
  79     /** The position where a lexical error occurred;
  80      */
  81     protected int errPos = Position.NOPOS;
  82 
  83     /** The Unicode reader (low-level stream reader).
  84      */
  85     protected UnicodeReader reader;
  86 
  87     protected ScannerFactory fac;
  88 
  89     private static final boolean hexFloatsWork = hexFloatsWork();
  90     private static boolean hexFloatsWork() {
  91         try {
  92             Float.valueOf("0x1.0p1");
  93             return true;
  94         } catch (NumberFormatException ex) {
  95             return false;
  96         }
  97     }
  98 
  99     /**
 100      * Create a scanner from the input array.  This method might
 101      * modify the array.  To avoid copying the input array, ensure
 102      * that {@code inputLength < input.length} or
 103      * {@code input[input.length -1]} is a white space character.
 104      *
 105      * @param fac the factory which created this Scanner
 106      * @param buf the input, might be modified
 107      * Must be positive and less than or equal to input.length.
 108      */
 109     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
 110         this(fac, new UnicodeReader(fac, buf));
 111     }
 112 
 113     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
 114         this(fac, new UnicodeReader(fac, buf, inputLength));
 115     }
 116 
 117     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
 118         this.fac = fac;
 119         this.log = fac.log;
 120         this.tokens = fac.tokens;
 121         this.source = fac.source;
 122         this.preview = fac.preview;
 123         this.reader = reader;
 124     }
 125 
 126     protected void checkSourceLevel(int pos, Feature feature) {
 127         if (preview.isPreview(feature) && !preview.isEnabled()) {
 128             //preview feature without --preview flag, error
 129             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 130         } else if (!feature.allowedInSource(source)) {
 131             //incompatible source level, error
 132             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 133         } else if (preview.isPreview(feature)) {
 134             //use of preview feature, warn
 135             preview.warnPreview(pos, feature);
 136         }
 137     }
 138 
 139     /** Report an error at the given position using the provided arguments.
 140      */
 141     protected void lexError(int pos, JCDiagnostic.Error key) {
 142         log.error(pos, key);
 143         tk = TokenKind.ERROR;
 144         errPos = pos;
 145     }
 146 
 147     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 148         log.error(flags, pos, key);
 149         tk = TokenKind.ERROR;
 150         errPos = pos;
 151     }
 152 
 153     /** Read next character in character or string literal and copy into sbuf.
 154      */
 155     private void scanLitChar(int pos) {
 156         if (reader.ch == '\\') {
 157             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 158                 reader.skipChar();
 159                 reader.putChar('\\', true);
 160             } else {
 161                 reader.scanChar();
 162                 switch (reader.ch) {
 163                 case '0': case '1': case '2': case '3':
 164                 case '4': case '5': case '6': case '7':
 165                     char leadch = reader.ch;
 166                     int oct = reader.digit(pos, 8);
 167                     reader.scanChar();
 168                     if ('0' <= reader.ch && reader.ch <= '7') {
 169                         oct = oct * 8 + reader.digit(pos, 8);
 170                         reader.scanChar();
 171                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 172                             oct = oct * 8 + reader.digit(pos, 8);
 173                             reader.scanChar();
 174                         }
 175                     }
 176                     reader.putChar((char)oct);
 177                     break;
 178                 case 'b':
 179                     reader.putChar('\b', true); break;
 180                 case 't':
 181                     reader.putChar('\t', true); break;
 182                 case 'n':
 183                     reader.putChar('\n', true); break;
 184                 case 'f':
 185                     reader.putChar('\f', true); break;
 186                 case 'r':
 187                     reader.putChar('\r', true); break;
 188                 case '\'':
 189                     reader.putChar('\'', true); break;
 190                 case '\"':
 191                     reader.putChar('\"', true); break;
 192                 case '\\':
 193                     reader.putChar('\\', true); break;
 194                 default:
 195                     lexError(reader.bp, Errors.IllegalEscChar);
 196                 }
 197             }
 198         } else if (reader.bp != reader.buflen) {
 199             reader.putChar(true);
 200         }
 201     }
 202 
 203     private void scanDigits(int pos, int digitRadix) {
 204         char saveCh;
 205         int savePos;
 206         do {
 207             if (reader.ch != '_') {
 208                 reader.putChar(false);
 209             }
 210             saveCh = reader.ch;
 211             savePos = reader.bp;
 212             reader.scanChar();
 213         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
 214         if (saveCh == '_')
 215             lexError(savePos, Errors.IllegalUnderscore);
 216     }
 217 
 218     /** Read fractional part of hexadecimal floating point number.
 219      */
 220     private void scanHexExponentAndSuffix(int pos) {
 221         if (reader.ch == 'p' || reader.ch == 'P') {
 222             reader.putChar(true);
 223             skipIllegalUnderscores();
 224             if (reader.ch == '+' || reader.ch == '-') {
 225                 reader.putChar(true);
 226             }
 227             skipIllegalUnderscores();
 228             if (reader.digit(pos, 10) >= 0) {
 229                 scanDigits(pos, 10);
 230                 if (!hexFloatsWork)
 231                     lexError(pos, Errors.UnsupportedCrossFpLit);
 232             } else
 233                 lexError(pos, Errors.MalformedFpLit);
 234         } else {
 235             lexError(pos, Errors.MalformedFpLit);
 236         }
 237         if (reader.ch == 'f' || reader.ch == 'F') {
 238             reader.putChar(true);
 239             tk = TokenKind.FLOATLITERAL;
 240             radix = 16;
 241         } else {
 242             if (reader.ch == 'd' || reader.ch == 'D') {
 243                 reader.putChar(true);
 244             }
 245             tk = TokenKind.DOUBLELITERAL;
 246             radix = 16;
 247         }
 248     }
 249 
 250     /** Read fractional part of floating point number.
 251      */
 252     private void scanFraction(int pos) {
 253         skipIllegalUnderscores();
 254         if (reader.digit(pos, 10) >= 0) {
 255             scanDigits(pos, 10);
 256         }
 257         int sp1 = reader.sp;
 258         if (reader.ch == 'e' || reader.ch == 'E') {
 259             reader.putChar(true);
 260             skipIllegalUnderscores();
 261             if (reader.ch == '+' || reader.ch == '-') {
 262                 reader.putChar(true);
 263             }
 264             skipIllegalUnderscores();
 265             if (reader.digit(pos, 10) >= 0) {
 266                 scanDigits(pos, 10);
 267                 return;
 268             }
 269             lexError(pos, Errors.MalformedFpLit);
 270             reader.sp = sp1;
 271         }
 272     }
 273 
 274     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 275      */
 276     private void scanFractionAndSuffix(int pos) {
 277         radix = 10;
 278         scanFraction(pos);
 279         if (reader.ch == 'f' || reader.ch == 'F') {
 280             reader.putChar(true);
 281             tk = TokenKind.FLOATLITERAL;
 282         } else {
 283             if (reader.ch == 'd' || reader.ch == 'D') {
 284                 reader.putChar(true);
 285             }
 286             tk = TokenKind.DOUBLELITERAL;
 287         }
 288     }
 289 
 290     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 291      */
 292     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
 293         radix = 16;
 294         Assert.check(reader.ch == '.');
 295         reader.putChar(true);
 296         skipIllegalUnderscores();
 297         if (reader.digit(pos, 16) >= 0) {
 298             seendigit = true;
 299             scanDigits(pos, 16);
 300         }
 301         if (!seendigit)
 302             lexError(pos, Errors.InvalidHexNumber);
 303         else
 304             scanHexExponentAndSuffix(pos);
 305     }
 306 
 307     private void skipIllegalUnderscores() {
 308         if (reader.ch == '_') {
 309             lexError(reader.bp, Errors.IllegalUnderscore);
 310             while (reader.ch == '_')
 311                 reader.scanChar();
 312         }
 313     }
 314 
 315     /** Read a number.
 316      *  @param radix  The radix of the number; one of 2, 8, 10, 16.
 317      */
 318     private void scanNumber(int pos, int radix) {
 319         // for octal, allow base-10 digit in case it's a float literal
 320         this.radix = radix;
 321         int digitRadix = (radix == 8 ? 10 : radix);
 322         int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
 323         boolean seendigit = firstDigit >= 0;
 324         boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
 325         if (seendigit) {
 326             scanDigits(pos, digitRadix);
 327         }
 328         if (radix == 16 && reader.ch == '.') {
 329             scanHexFractionAndSuffix(pos, seendigit);
 330         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
 331             scanHexExponentAndSuffix(pos);
 332         } else if (digitRadix == 10 && reader.ch == '.') {
 333             reader.putChar(true);
 334             scanFractionAndSuffix(pos);
 335         } else if (digitRadix == 10 &&
 336                    (reader.ch == 'e' || reader.ch == 'E' ||
 337                     reader.ch == 'f' || reader.ch == 'F' ||
 338                     reader.ch == 'd' || reader.ch == 'D')) {
 339             scanFractionAndSuffix(pos);
 340         } else {
 341             if (!seenValidDigit) {
 342                 switch (radix) {
 343                 case 2:
 344                     lexError(pos, Errors.InvalidBinaryNumber);
 345                     break;
 346                 case 16:
 347                     lexError(pos, Errors.InvalidHexNumber);
 348                     break;
 349                 }
 350             }
 351             if (reader.ch == 'l' || reader.ch == 'L') {
 352                 reader.scanChar();
 353                 tk = TokenKind.LONGLITERAL;
 354             } else {
 355                 tk = TokenKind.INTLITERAL;
 356             }
 357         }
 358     }
 359 
 360     /** Read an identifier.
 361      */
 362     private void scanIdent() {
 363         boolean isJavaIdentifierPart;
 364         char high;
 365         reader.putChar(true);
 366         do {
 367             switch (reader.ch) {
 368             case 'A': case 'B': case 'C': case 'D': case 'E':
 369             case 'F': case 'G': case 'H': case 'I': case 'J':
 370             case 'K': case 'L': case 'M': case 'N': case 'O':
 371             case 'P': case 'Q': case 'R': case 'S': case 'T':
 372             case 'U': case 'V': case 'W': case 'X': case 'Y':
 373             case 'Z':
 374             case 'a': case 'b': case 'c': case 'd': case 'e':
 375             case 'f': case 'g': case 'h': case 'i': case 'j':
 376             case 'k': case 'l': case 'm': case 'n': case 'o':
 377             case 'p': case 'q': case 'r': case 's': case 't':
 378             case 'u': case 'v': case 'w': case 'x': case 'y':
 379             case 'z':
 380             case '$': case '_':
 381             case '0': case '1': case '2': case '3': case '4':
 382             case '5': case '6': case '7': case '8': case '9':
 383                 break;
 384             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
 385             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
 386             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
 387             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
 388             case '\u0015': case '\u0016': case '\u0017':
 389             case '\u0018': case '\u0019': case '\u001B':
 390             case '\u007F':
 391                 reader.scanChar();
 392                 continue;
 393             case '\u001A': // EOI is also a legal identifier part
 394                 if (reader.bp >= reader.buflen) {
 395                     name = reader.name();
 396                     tk = tokens.lookupKind(name);
 397                     return;
 398                 }
 399                 reader.scanChar();
 400                 continue;
 401             default:
 402                 if (reader.ch < '\u0080') {
 403                     // all ASCII range chars already handled, above
 404                     isJavaIdentifierPart = false;
 405                 } else {
 406                     if (Character.isIdentifierIgnorable(reader.ch)) {
 407                         reader.scanChar();
 408                         continue;
 409                     } else {
 410                         int codePoint = reader.peekSurrogates();
 411                         if (codePoint >= 0) {
 412                             if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
 413                                 reader.putChar(true);
 414                             }
 415                         } else {
 416                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
 417                         }
 418                     }
 419                 }
 420                 if (!isJavaIdentifierPart) {
 421                     name = reader.name();
 422                     tk = tokens.lookupKind(name);
 423                     return;
 424                 }
 425             }
 426             reader.putChar(true);
 427         } while (true);
 428     }
 429 
 430     /** Return true if reader.ch can be part of an operator.
 431      */
 432     private boolean isSpecial(char ch) {
 433         switch (ch) {
 434         case '!': case '%': case '&': case '*': case '?':
 435         case '+': case '-': case ':': case '<': case '=':
 436         case '>': case '^': case '|': case '~':
 437         case '@':
 438             return true;
 439         default:
 440             return false;
 441         }
 442     }
 443 
 444     /** Read longest possible sequence of special characters and convert
 445      *  to token.
 446      */
 447     private void scanOperator() {
 448         while (true) {
 449             reader.putChar(false);
 450             Name newname = reader.name();
 451             TokenKind tk1 = tokens.lookupKind(newname);
 452             if (tk1 == TokenKind.IDENTIFIER) {
 453                 reader.sp--;
 454                 break;
 455             }
 456             tk = tk1;
 457             reader.scanChar();
 458             if (!isSpecial(reader.ch)) break;
 459         }
 460     }
 461 
 462     /** Read token.
 463      */
 464     public Token readToken() {
 465 
 466         reader.sp = 0;
 467         name = null;
 468         radix = 0;
 469 
 470         int pos = 0;
 471         int endPos = 0;
 472         List<Comment> comments = null;
 473 
 474         try {
 475             loop: while (true) {
 476                 pos = reader.bp;
 477                 switch (reader.ch) {
 478                 case ' ': // (Spec 3.6)
 479                 case '\t': // (Spec 3.6)
 480                 case FF: // (Spec 3.6)
 481                     do {
 482                         reader.scanChar();
 483                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
 484                     processWhiteSpace(pos, reader.bp);
 485                     break;
 486                 case LF: // (Spec 3.4)
 487                     reader.scanChar();
 488                     processLineTerminator(pos, reader.bp);
 489                     break;
 490                 case CR: // (Spec 3.4)
 491                     reader.scanChar();
 492                     if (reader.ch == LF) {
 493                         reader.scanChar();
 494                     }
 495                     processLineTerminator(pos, reader.bp);
 496                     break;
 497                 case 'A': case 'B': case 'C': case 'D': case 'E':
 498                 case 'F': case 'G': case 'H': case 'I': case 'J':
 499                 case 'K': case 'L': case 'M': case 'N': case 'O':
 500                 case 'P': case 'Q': case 'R': case 'S': case 'T':
 501                 case 'U': case 'V': case 'W': case 'X': case 'Y':
 502                 case 'Z':
 503                 case 'a': case 'b': case 'c': case 'd': case 'e':
 504                 case 'f': case 'g': case 'h': case 'i': case 'j':
 505                 case 'k': case 'l': case 'm': case 'n': case 'o':
 506                 case 'p': case 'q': case 'r': case 's': case 't':
 507                 case 'u': case 'v': case 'w': case 'x': case 'y':
 508                 case 'z':
 509                 case '$': case '_':
 510                     scanIdent();
 511                     break loop;
 512                 case '0':
 513                     reader.scanChar();
 514                     if (reader.ch == 'x' || reader.ch == 'X') {
 515                         reader.scanChar();
 516                         skipIllegalUnderscores();
 517                         scanNumber(pos, 16);
 518                     } else if (reader.ch == 'b' || reader.ch == 'B') {
 519                         reader.scanChar();
 520                         skipIllegalUnderscores();
 521                         scanNumber(pos, 2);
 522                     } else {
 523                         reader.putChar('0');
 524                         if (reader.ch == '_') {
 525                             int savePos = reader.bp;
 526                             do {
 527                                 reader.scanChar();
 528                             } while (reader.ch == '_');
 529                             if (reader.digit(pos, 10) < 0) {
 530                                 lexError(savePos, Errors.IllegalUnderscore);
 531                             }
 532                         }
 533                         scanNumber(pos, 8);
 534                     }
 535                     break loop;
 536                 case '1': case '2': case '3': case '4':
 537                 case '5': case '6': case '7': case '8': case '9':
 538                     scanNumber(pos, 10);
 539                     break loop;
 540                 case '.':
 541                     reader.scanChar();
 542                     if (reader.digit(pos, 10) >= 0) {
 543                         reader.putChar('.');
 544                         scanFractionAndSuffix(pos);
 545                     } else if (reader.ch == '.') {
 546                         int savePos = reader.bp;
 547                         reader.putChar('.'); reader.putChar('.', true);
 548                         if (reader.ch == '.') {
 549                             reader.scanChar();
 550                             reader.putChar('.');
 551                             tk = TokenKind.ELLIPSIS;
 552                         } else {
 553                             lexError(savePos, Errors.IllegalDot);
 554                         }
 555                     } else {
 556                         tk = TokenKind.DOT;
 557                     }
 558                     break loop;
 559                 case ',':
 560                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
 561                 case ';':
 562                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
 563                 case '(':
 564                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
 565                 case ')':
 566                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
 567                 case '[':
 568                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
 569                 case ']':
 570                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
 571                 case '{':
 572                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
 573                 case '}':
 574                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;
 575                 case '/':
 576                     reader.scanChar();
 577                     if (reader.ch == '/') {
 578                         do {
 579                             reader.scanCommentChar();
 580                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
 581                         if (reader.bp < reader.buflen) {
 582                             comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
 583                         }
 584                         break;
 585                     } else if (reader.ch == '*') {
 586                         boolean isEmpty = false;
 587                         reader.scanChar();
 588                         CommentStyle style;
 589                         if (reader.ch == '*') {
 590                             style = CommentStyle.JAVADOC;
 591                             reader.scanCommentChar();
 592                             if (reader.ch == '/') {
 593                                 isEmpty = true;
 594                             }
 595                         } else {
 596                             style = CommentStyle.BLOCK;
 597                         }
 598                         while (!isEmpty && reader.bp < reader.buflen) {
 599                             if (reader.ch == '*') {
 600                                 reader.scanChar();
 601                                 if (reader.ch == '/') break;
 602                             } else {
 603                                 reader.scanCommentChar();
 604                             }
 605                         }
 606                         if (reader.ch == '/') {
 607                             reader.scanChar();
 608                             comments = addComment(comments, processComment(pos, reader.bp, style));
 609                             break;
 610                         } else {
 611                             lexError(pos, Errors.UnclosedComment);
 612                             break loop;
 613                         }
 614                     } else if (reader.ch == '=') {
 615                         tk = TokenKind.SLASHEQ;
 616                         reader.scanChar();
 617                     } else {
 618                         tk = TokenKind.SLASH;
 619                     }
 620                     break loop;
 621                 case '\'':
 622                     reader.scanChar();
 623                     if (reader.ch == '\'') {
 624                         lexError(pos, Errors.EmptyCharLit);
 625                         reader.scanChar();
 626                     } else {
 627                         if (reader.ch == CR || reader.ch == LF)
 628                             lexError(pos, Errors.IllegalLineEndInCharLit);
 629                         scanLitChar(pos);
 630                         if (reader.ch == '\'') {
 631                             reader.scanChar();
 632                             tk = TokenKind.CHARLITERAL;
 633                         } else {
 634                             lexError(pos, Errors.UnclosedCharLit);
 635                         }
 636                     }
 637                     break loop;
 638                 case '\"':
 639                     reader.scanChar();
 640                     while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
 641                         scanLitChar(pos);
 642                     if (reader.ch == '\"') {
 643                         tk = TokenKind.STRINGLITERAL;
 644                         reader.scanChar();
 645                     } else {
 646                         lexError(pos, Errors.UnclosedStrLit);
 647                     }
 648                     break loop;
 649                 default:
 650                     if (isSpecial(reader.ch)) {
 651                         scanOperator();
 652                     } else {
 653                         boolean isJavaIdentifierStart;
 654                         int codePoint = -1;
 655                         if (reader.ch < '\u0080') {
 656                             // all ASCII range chars already handled, above
 657                             isJavaIdentifierStart = false;
 658                         } else {
 659                             codePoint = reader.peekSurrogates();
 660                             if (codePoint >= 0) {
 661                                 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
 662                                     reader.putChar(true);
 663                                 }
 664                             } else {
 665                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
 666                             }
 667                         }
 668                         if (isJavaIdentifierStart) {
 669                             scanIdent();
 670                         } else if (reader.digit(pos, 10) >= 0) {
 671                             scanNumber(pos, 10);
 672                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
 673                             tk = TokenKind.EOF;
 674                             pos = reader.realLength;
 675                         } else {
 676                             String arg;
 677 
 678                             if (codePoint >= 0) {
 679                                 char high = reader.ch;
 680                                 reader.scanChar();
 681                                 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
 682                             } else {
 683                                 arg = (32 < reader.ch && reader.ch < 127) ?
 684                                                 String.format("%s", reader.ch) :
 685                                                 String.format("\\u%04x", (int)reader.ch);
 686                             }
 687                             lexError(pos, Errors.IllegalChar(arg));
 688                             reader.scanChar();
 689                         }
 690                     }
 691                     break loop;
 692                 }
 693             }
 694             endPos = reader.bp;
 695             switch (tk.tag) {
 696                 case DEFAULT: return new Token(tk, pos, endPos, comments);
 697                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
 698                 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
 699                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
 700                 default: throw new AssertionError();
 701             }
 702         }
 703         finally {
 704             if (scannerDebug) {
 705                     System.out.println("nextToken(" + pos
 706                                        + "," + endPos + ")=|" +
 707                                        new String(reader.getRawCharacters(pos, endPos))
 708                                        + "|");
 709             }
 710         }
 711     }
 712     //where
 713         List<Comment> addComment(List<Comment> comments, Comment comment) {
 714             return comments == null ?
 715                     List.of(comment) :
 716                     comments.prepend(comment);
 717         }
 718 
 719     /** Return the position where a lexical error occurred;
 720      */
 721     public int errPos() {
 722         return errPos;
 723     }
 724 
 725     /** Set the position where a lexical error occurred;
 726      */
 727     public void errPos(int pos) {
 728         errPos = pos;
 729     }
 730 
 731     /**
 732      * Called when a complete comment has been scanned. pos and endPos
 733      * will mark the comment boundary.
 734      */
 735     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
 736         if (scannerDebug)
 737             System.out.println("processComment(" + pos
 738                                + "," + endPos + "," + style + ")=|"
 739                                + new String(reader.getRawCharacters(pos, endPos))
 740                                + "|");
 741         char[] buf = reader.getRawCharacters(pos, endPos);
 742         return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
 743     }
 744 
 745     /**
 746      * Called when a complete whitespace run has been scanned. pos and endPos
 747      * will mark the whitespace boundary.
 748      */
 749     protected void processWhiteSpace(int pos, int endPos) {
 750         if (scannerDebug)
 751             System.out.println("processWhitespace(" + pos
 752                                + "," + endPos + ")=|" +
 753                                new String(reader.getRawCharacters(pos, endPos))
 754                                + "|");
 755     }
 756 
 757     /**
 758      * Called when a line terminator has been processed.
 759      */
 760     protected void processLineTerminator(int pos, int endPos) {
 761         if (scannerDebug)
 762             System.out.println("processTerminator(" + pos
 763                                + "," + endPos + ")=|" +
 764                                new String(reader.getRawCharacters(pos, endPos))
 765                                + "|");
 766     }
 767 
 768     /** Build a map for translating between line numbers and
 769      * positions in the input.
 770      *
 771      * @return a LineMap */
 772     public Position.LineMap getLineMap() {
 773         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
 774     }
 775 
 776 
 777     /**
 778     * Scan a documentation comment; determine if a deprecated tag is present.
 779     * Called once the initial /, * have been skipped, positioned at the second *
 780     * (which is treated as the beginning of the first line).
 781     * Stops positioned at the closing '/'.
 782     */
 783     protected static class BasicComment<U extends UnicodeReader> implements Comment {
 784 
 785         CommentStyle cs;
 786         U comment_reader;
 787 
 788         protected boolean deprecatedFlag = false;
 789         protected boolean scanned = false;
 790 
 791         protected BasicComment(U comment_reader, CommentStyle cs) {
 792             this.comment_reader = comment_reader;
 793             this.cs = cs;
 794         }
 795 
 796         public String getText() {
 797             return null;
 798         }
 799 
 800         public int getSourcePos(int pos) {
 801             return -1;
 802         }
 803 
 804         public CommentStyle getStyle() {
 805             return cs;
 806         }
 807 
 808         public boolean isDeprecated() {
 809             if (!scanned && cs == CommentStyle.JAVADOC) {
 810                 scanDocComment();
 811             }
 812             return deprecatedFlag;
 813         }
 814 
 815         @SuppressWarnings("fallthrough")
 816         protected void scanDocComment() {
 817             try {
 818                 boolean deprecatedPrefix = false;
 819 
 820                 comment_reader.bp += 3; // '/**'
 821                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
 822 
 823                 forEachLine:
 824                 while (comment_reader.bp < comment_reader.buflen) {
 825 
 826                     // Skip optional WhiteSpace at beginning of line
 827                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
 828                         comment_reader.scanCommentChar();
 829                     }
 830 
 831                     // Skip optional consecutive Stars
 832                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
 833                         comment_reader.scanCommentChar();
 834                         if (comment_reader.ch == '/') {
 835                             return;
 836                         }
 837                     }
 838 
 839                     // Skip optional WhiteSpace after Stars
 840                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
 841                         comment_reader.scanCommentChar();
 842                     }
 843 
 844                     deprecatedPrefix = false;
 845                     // At beginning of line in the JavaDoc sense.
 846                     if (!deprecatedFlag) {
 847                         String deprecated = "@deprecated";
 848                         int i = 0;
 849                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
 850                             comment_reader.scanCommentChar();
 851                             i++;
 852                             if (i == deprecated.length()) {
 853                                 deprecatedPrefix = true;
 854                                 break;
 855                             }
 856                         }
 857                     }
 858 
 859                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
 860                         if (Character.isWhitespace(comment_reader.ch)) {
 861                             deprecatedFlag = true;
 862                         } else if (comment_reader.ch == '*') {
 863                             comment_reader.scanCommentChar();
 864                             if (comment_reader.ch == '/') {
 865                                 deprecatedFlag = true;
 866                                 return;
 867                             }
 868                         }
 869                     }
 870 
 871                     // Skip rest of line
 872                     while (comment_reader.bp < comment_reader.buflen) {
 873                         switch (comment_reader.ch) {
 874                             case '*':
 875                                 comment_reader.scanCommentChar();
 876                                 if (comment_reader.ch == '/') {
 877                                     return;
 878                                 }
 879                                 break;
 880                             case CR: // (Spec 3.4)
 881                                 comment_reader.scanCommentChar();
 882                                 if (comment_reader.ch != LF) {
 883                                     continue forEachLine;
 884                                 }
 885                             /* fall through to LF case */
 886                             case LF: // (Spec 3.4)
 887                                 comment_reader.scanCommentChar();
 888                                 continue forEachLine;
 889                             default:
 890                                 comment_reader.scanCommentChar();
 891                         }
 892                     } // rest of line
 893                 } // forEachLine
 894                 return;
 895             } finally {
 896                 scanned = true;
 897             }
 898         }
 899     }
 900 }