1 /*
   2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import com.sun.tools.javac.code.Lint;
  29 import com.sun.tools.javac.code.Lint.LintCategory;
  30 import com.sun.tools.javac.code.Preview;
  31 import com.sun.tools.javac.code.Source;
  32 import com.sun.tools.javac.code.Source.Feature;
  33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  34 import com.sun.tools.javac.resources.CompilerProperties.Errors;
  35 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
  36 import com.sun.tools.javac.util.*;
  37 import com.sun.tools.javac.util.JCDiagnostic.*;
  38 
  39 import java.lang.reflect.InvocationTargetException;
  40 import java.lang.reflect.Method;
  41 import java.nio.CharBuffer;
  42 import java.util.HashSet;
  43 import java.util.Set;
  44 
  45 import static com.sun.tools.javac.parser.Tokens.*;
  46 import static com.sun.tools.javac.util.LayoutCharacters.*;
  47 
  48 /** The lexical analyzer maps an input stream consisting of
  49  *  ASCII characters and Unicode escapes into a token sequence.
  50  *
  51  *  <p><b>This is NOT part of any supported API.
  52  *  If you write code that depends on this, you do so at your own risk.
  53  *  This code and its internal interfaces are subject to change or
  54  *  deletion without notice.</b>
  55  */
  56 public class JavaTokenizer {
  57 
  58     private static final boolean scannerDebug = false;
  59 
  60     /** The source language setting.
  61      */
  62     private Source source;
  63 
  64     /** The preview language setting. */
  65     private Preview preview;
  66 
  67     /** The log to be used for error reporting.
  68      */
  69     private final Log log;
  70 
  71     /** The token factory. */
  72     private final Tokens tokens;
  73 
  74     /** The token kind, set by nextToken().
  75      */
  76     protected TokenKind tk;
  77 
  78     /** The token's radix, set by nextToken().
  79      */
  80     protected int radix;
  81 
  82     /** The token's name, set by nextToken().
  83      */
  84     protected Name name;
  85 
  86     /** The position where a lexical error occurred;
  87      */
  88     protected int errPos = Position.NOPOS;
  89 
  90     /** The Unicode reader (low-level stream reader).
  91      */
  92     protected UnicodeReader reader;
  93 
  94     /** Should the string stripped of indentation?
  95      */
  96     protected boolean shouldStripIndent;
  97 
  98     /** Should the string's escapes be translated?
  99      */
 100     protected boolean shouldTranslateEscapes;
 101 
 102     /** Has the string broken escapes?
 103      */
 104     protected boolean hasBrokenEscapes;
 105 
 106     protected ScannerFactory fac;
 107 
 108     // The set of lint options currently in effect. It is initialized
 109     // from the context, and then is set/reset as needed by Attr as it
 110     // visits all the various parts of the trees during attribution.
 111     protected Lint lint;
 112 
 113     private static final boolean hexFloatsWork = hexFloatsWork();
 114     private static boolean hexFloatsWork() {
 115         try {
 116             Float.valueOf("0x1.0p1");
 117             return true;
 118         } catch (NumberFormatException ex) {
 119             return false;
 120         }
 121     }
 122 
 123     /**
 124      * Create a scanner from the input array.  This method might
 125      * modify the array.  To avoid copying the input array, ensure
 126      * that {@code inputLength < input.length} or
 127      * {@code input[input.length -1]} is a white space character.
 128      *
 129      * @param fac the factory which created this Scanner
 130      * @param buf the input, might be modified
 131      * Must be positive and less than or equal to input.length.
 132      */
 133     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
 134         this(fac, new UnicodeReader(fac, buf));
 135     }
 136 
 137     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
 138         this(fac, new UnicodeReader(fac, buf, inputLength));
 139     }
 140 
 141     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
 142         this.fac = fac;
 143         this.log = fac.log;
 144         this.tokens = fac.tokens;
 145         this.source = fac.source;
 146         this.preview = fac.preview;
 147         this.reader = reader;
 148         this.lint = fac.lint;
 149     }
 150 
 151     protected void checkSourceLevel(int pos, Feature feature) {
 152         if (preview.isPreview(feature) && !preview.isEnabled()) {
 153             //preview feature without --preview flag, error
 154             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 155         } else if (!feature.allowedInSource(source)) {
 156             //incompatible source level, error
 157             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 158         } else if (preview.isPreview(feature)) {
 159             //use of preview feature, warn
 160             preview.warnPreview(pos, feature);
 161         }
 162     }
 163 
 164     /** Report an error at the given position using the provided arguments.
 165      */
 166     protected void lexError(int pos, JCDiagnostic.Error key) {
 167         log.error(pos, key);
 168         tk = TokenKind.ERROR;
 169         errPos = pos;
 170     }
 171 
 172     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 173         log.error(flags, pos, key);
 174         tk = TokenKind.ERROR;
 175         errPos = pos;
 176     }
 177 
 178     protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
 179         DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
 180         log.warning(lc, dp, key);
 181     }
 182 
 183     /** Read next character in character or string literal and copy into sbuf.
 184      */
 185     private void scanLitChar(int pos) {
 186         if (reader.ch == '\\') {
 187             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 188                 reader.skipChar();
 189                 reader.putChar('\\', true);
 190             } else {
 191                 reader.scanChar();
 192                 switch (reader.ch) {
 193                 case '0': case '1': case '2': case '3':
 194                 case '4': case '5': case '6': case '7':
 195                     char leadch = reader.ch;
 196                     int oct = reader.digit(pos, 8);
 197                     reader.scanChar();
 198                     if ('0' <= reader.ch && reader.ch <= '7') {
 199                         oct = oct * 8 + reader.digit(pos, 8);
 200                         reader.scanChar();
 201                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 202                             oct = oct * 8 + reader.digit(pos, 8);
 203                             reader.scanChar();
 204                         }
 205                     }
 206                     reader.putChar((char)oct);
 207                     break;
 208                 case 'b':
 209                     reader.putChar('\b', true); break;
 210                 case 't':
 211                     reader.putChar('\t', true); break;
 212                 case 'n':
 213                     reader.putChar('\n', true); break;
 214                 case 'f':
 215                     reader.putChar('\f', true); break;
 216                 case 'r':
 217                     reader.putChar('\r', true); break;
 218                 case '\'':
 219                     reader.putChar('\'', true); break;
 220                 case '\"':
 221                     reader.putChar('\"', true); break;
 222                 case '\\':
 223                     reader.putChar('\\', true); break;
 224                 default:
 225                     lexError(reader.bp, Errors.IllegalEscChar);
 226                 }
 227             }
 228         } else if (reader.bp != reader.buflen) {
 229             reader.putChar(true);
 230         }
 231     }
 232 
 233     /** Read next character in character or string literal and copy into sbuf
 234      *  without translating escapes. Used by text blocks to preflight verify
 235      *  escapes sequences.
 236      */
 237     private void scanLitCharRaw(int pos) {
 238         if (reader.ch == '\\') {
 239             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 240                 reader.skipChar();
 241                 reader.putChar('\\', false);
 242                 reader.putChar('\\', true);
 243             } else {
 244                 reader.putChar('\\', true);
 245                 switch (reader.ch) {
 246                 case '0': case '1': case '2': case '3':
 247                 case '4': case '5': case '6': case '7':
 248                     char leadch = reader.ch;
 249                     reader.putChar(true);
 250                     if ('0' <= reader.ch && reader.ch <= '7') {
 251                         reader.putChar(true);
 252                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 253                             reader.putChar(true);
 254                         }
 255                     }
 256                     break;
 257                 // Effectively list of valid escape sequences.
 258                 case 'b':
 259                 case 't':
 260                 case 'n':
 261                 case 'f':
 262                 case 'r':
 263                 case '\'':
 264                 case '\"':
 265                 case '\\':
 266                     reader.putChar(true); break;
 267                 default:
 268                     hasBrokenEscapes = true;
 269                     lexError(reader.bp, Errors.IllegalEscChar);
 270                 }
 271             }
 272         } else if (reader.bp != reader.buflen) {
 273             reader.putChar(true);
 274         }
 275     }
 276 
 277     /** Interim access to String methods used to support text blocks.
 278      *  Required to handle bootstrapping with pre-text block jdks.
 279      *  Could be reworked in the 'next' jdk.
 280      */
 281     static class TextBlockSupport {
 282         /** Reflection method to remove incidental indentation.
 283          */
 284         private static final Method stripIndent;
 285 
 286         /** Reflection method to translate escape sequences.
 287          */
 288         private static final Method translateEscapes;
 289 
 290         /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
 291          */
 292         private static final boolean hasSupport;
 293 
 294         /** Get a string method via refection or null if not available.
 295          */
 296         private static Method getStringMethodOrNull(String name) {
 297             try {
 298                 return String.class.getMethod(name);
 299             } catch (Exception ex) {
 300                 // Method not available, return null.
 301             }
 302             return null;
 303         }
 304 
 305         static {
 306             // Get text block string methods.
 307             stripIndent = getStringMethodOrNull("stripIndent");
 308             translateEscapes = getStringMethodOrNull("translateEscapes");
 309             // true if stripIndent and translateEscapes are available in the bootstrap jdk.
 310             hasSupport = stripIndent != null && translateEscapes != null;
 311         }
 312 
 313         /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
 314          */
 315         static boolean hasSupport() {
 316             return hasSupport;
 317         }
 318 
 319         /** Return the leading whitespace count (indentation) of the line.
 320          */
 321         private static int indent(String line) {
 322             return line.length() - line.stripLeading().length();
 323         }
 324 
 325         enum WhitespaceChecks {
 326             INCONSISTENT,
 327             TRAILING
 328         };
 329 
 330         /** Check that the use of white space in content is not problematic.
 331          */
 332         static Set<WhitespaceChecks> checkWhitespace(String string) {
 333             // Start with empty result set.
 334             Set<WhitespaceChecks> checks = new HashSet<>();
 335             // No need to check empty strings.
 336             if (string.isEmpty()) {
 337                 return checks;
 338             }
 339             // Maximum common indentation.
 340             int outdent = 0;
 341             // No need to check indentation if opting out (last line is empty.)
 342             char lastChar = string.charAt(string.length() - 1);
 343             boolean optOut = lastChar == '\n' || lastChar == '\r';
 344             // Split string based at line terminators.
 345             String[] lines = string.split("\\R");
 346             int length = lines.length;
 347             // Extract last line.
 348             String lastLine = lines[length - 1];
 349             if (!optOut) {
 350                 // Prime with the last line indentation (may be blank.)
 351                 outdent = indent(lastLine);
 352                 for (String line : lines) {
 353                     // Blanks lines have no influence (last line accounted for.)
 354                     if (!line.isBlank()) {
 355                         outdent = Integer.min(outdent, indent(line));
 356                         if (outdent == 0) {
 357                             break;
 358                         }
 359                     }
 360                 }
 361             }
 362             // Last line is representative.
 363             String start = lastLine.substring(0, outdent);
 364             for (String line : lines) {
 365                 // Fail if a line does not have the same indentation.
 366                 if (!line.isBlank() && !line.startsWith(start)) {
 367                     // Mix of different white space
 368                     checks.add(WhitespaceChecks.INCONSISTENT);
 369                 }
 370                 // Line has content even after indent is removed.
 371                 if (outdent < line.length()) {
 372                     // Is the last character a white space.
 373                     lastChar = line.charAt(line.length() - 1);
 374                     if (Character.isWhitespace(lastChar)) {
 375                         // Has trailing white space.
 376                         checks.add(WhitespaceChecks.TRAILING);
 377                     }
 378                 }
 379             }
 380             return checks;
 381         }
 382 
 383         /** Invoke String::stripIndent through reflection.
 384          */
 385         static String stripIndent(String string) {
 386             try {
 387                 string = (String)stripIndent.invoke(string);
 388             } catch (InvocationTargetException | IllegalAccessException ex) {
 389                 throw new RuntimeException(ex);
 390             }
 391             return string;
 392         }
 393 
 394         /** Invoke String::translateEscapes through reflection.
 395          */
 396         static String translateEscapes(String string) {
 397             try {
 398                 string = (String)translateEscapes.invoke(string);
 399             } catch (InvocationTargetException | IllegalAccessException ex) {
 400                 throw new RuntimeException(ex);
 401             }
 402             return string;
 403         }
 404     }
 405 
 406     /** Test for EOLN.
 407      */
 408     private boolean isEOLN() {
 409         return reader.ch == LF || reader.ch == CR;
 410     }
 411 
 412     /** Test for CRLF.
 413      */
 414     private boolean isCRLF() {
 415         return reader.ch == CR && reader.peekChar() == LF;
 416     }
 417 
 418     /** Count and skip repeated occurances of the specified character.
 419      */
 420     private int countChar(char ch, int max) {
 421         int count = 0;
 422         for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
 423             reader.scanChar();
 424         }
 425         return count;
 426     }
 427 
 428     /** Scan a string literal or text block.
 429      */
 430     private void scanString(int pos) {
 431         // Clear flags.
 432         shouldStripIndent = false;
 433         shouldTranslateEscapes = false;
 434         hasBrokenEscapes = false;
 435         // Check if text block string methods are present.
 436         boolean hasTextBlockSupport = TextBlockSupport.hasSupport();
 437         // Track the end of first line for error recovery.
 438         int firstEOLN = -1;
 439         // Attempt to scan for up to 3 double quotes.
 440         int openCount = countChar('\"', 3);
 441         switch (openCount) {
 442         case 1: // Starting a string literal.
 443             break;
 444         case 2: // Starting an empty string literal.
 445             // Start again but only consume one quote.
 446             reader.reset(pos);
 447             openCount = countChar('\"', 1);
 448             break;
 449         case 3: // Starting a text block.
 450             // Check if preview feature is enabled for text blocks.
 451             checkSourceLevel(pos, Feature.TEXT_BLOCKS);
 452             // Only proceed if text block string methods are present.
 453             if (hasTextBlockSupport) {
 454                 // Indicate that the final string should have incidental indentation removed.
 455                 shouldStripIndent = true;
 456                 // Verify the open delimiter sequence.
 457                 boolean hasOpenEOLN = false;
 458                 while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
 459                     hasOpenEOLN = isEOLN();
 460                     if (hasOpenEOLN) {
 461                         break;
 462                     }
 463                     reader.scanChar();
 464                 }
 465                 // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
 466                 if (!hasOpenEOLN) {
 467                     lexError(reader.bp, Errors.IllegalTextBlockOpen);
 468                     return;
 469                 }
 470                 // Skip line terminator.
 471                 int start = reader.bp;
 472                 if (isCRLF()) {
 473                     reader.scanChar();
 474                 }
 475                 reader.scanChar();
 476                 processLineTerminator(start, reader.bp);
 477             } else {
 478                 // No text block string methods are present, so reset and treat like string literal.
 479                 reader.reset(pos);
 480                 openCount = countChar('\"', 1);
 481             }
 482             break;
 483         }
 484         // While characters are available.
 485         while (reader.bp < reader.buflen) {
 486             // If possible close delimiter sequence.
 487             if (reader.ch == '\"') {
 488                 // Check to see if enough double quotes are present.
 489                 int closeCount = countChar('\"', openCount);
 490                 if (openCount == closeCount) {
 491                     // Good result.
 492                     tk = Tokens.TokenKind.STRINGLITERAL;
 493                     return;
 494                 }
 495                 // False alarm, add double quotes to string buffer.
 496                 reader.repeat('\"', closeCount);
 497             } else if (isEOLN()) {
 498                 // Line terminator in string literal is an error.
 499                 // Fall out to unclosed string literal error.
 500                 if (openCount == 1) {
 501                     break;
 502                 }
 503                  // Add line terminator to string buffer.
 504                 int start = reader.bp;
 505                 if (isCRLF()) {
 506                     reader.scanChar();
 507                 }
 508                 reader.putChar('\n', true);
 509                 processLineTerminator(start, reader.bp);
 510                 // Record first line terminator for error recovery.
 511                 if (firstEOLN == -1) {
 512                     firstEOLN = reader.bp;
 513                 }
 514             } else if (reader.ch == '\\') {
 515                 // Handle escape sequences.
 516                 if (hasTextBlockSupport) {
 517                     // Indicate that the final string should have escapes translated.
 518                     shouldTranslateEscapes = true;
 519                     // Validate escape sequence and add to string buffer.
 520                     scanLitCharRaw(pos);
 521                 } else {
 522                     // Translate escape sequence and add result to string buffer.
 523                     scanLitChar(pos);
 524                 }
 525             } else {
 526                 // Add character to string buffer.
 527                 reader.putChar(true);
 528             }
 529         }
 530         // String ended without close delimiter sequence.
 531         lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
 532         if (firstEOLN  != -1) {
 533             // Reset recovery position to point after open delimiter sequence.
 534             reader.reset(firstEOLN);
 535         }
 536     }
 537 
 538     private void scanDigits(int pos, int digitRadix) {
 539         char saveCh;
 540         int savePos;
 541         do {
 542             if (reader.ch != '_') {
 543                 reader.putChar(false);
 544             }
 545             saveCh = reader.ch;
 546             savePos = reader.bp;
 547             reader.scanChar();
 548         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
 549         if (saveCh == '_')
 550             lexError(savePos, Errors.IllegalUnderscore);
 551     }
 552 
 553     /** Read fractional part of hexadecimal floating point number.
 554      */
 555     private void scanHexExponentAndSuffix(int pos) {
 556         if (reader.ch == 'p' || reader.ch == 'P') {
 557             reader.putChar(true);
 558             skipIllegalUnderscores();
 559             if (reader.ch == '+' || reader.ch == '-') {
 560                 reader.putChar(true);
 561             }
 562             skipIllegalUnderscores();
 563             if (reader.digit(pos, 10) >= 0) {
 564                 scanDigits(pos, 10);
 565                 if (!hexFloatsWork)
 566                     lexError(pos, Errors.UnsupportedCrossFpLit);
 567             } else
 568                 lexError(pos, Errors.MalformedFpLit);
 569         } else {
 570             lexError(pos, Errors.MalformedFpLit);
 571         }
 572         if (reader.ch == 'f' || reader.ch == 'F') {
 573             reader.putChar(true);
 574             tk = TokenKind.FLOATLITERAL;
 575             radix = 16;
 576         } else {
 577             if (reader.ch == 'd' || reader.ch == 'D') {
 578                 reader.putChar(true);
 579             }
 580             tk = TokenKind.DOUBLELITERAL;
 581             radix = 16;
 582         }
 583     }
 584 
 585     /** Read fractional part of floating point number.
 586      */
 587     private void scanFraction(int pos) {
 588         skipIllegalUnderscores();
 589         if (reader.digit(pos, 10) >= 0) {
 590             scanDigits(pos, 10);
 591         }
 592         int sp1 = reader.sp;
 593         if (reader.ch == 'e' || reader.ch == 'E') {
 594             reader.putChar(true);
 595             skipIllegalUnderscores();
 596             if (reader.ch == '+' || reader.ch == '-') {
 597                 reader.putChar(true);
 598             }
 599             skipIllegalUnderscores();
 600             if (reader.digit(pos, 10) >= 0) {
 601                 scanDigits(pos, 10);
 602                 return;
 603             }
 604             lexError(pos, Errors.MalformedFpLit);
 605             reader.sp = sp1;
 606         }
 607     }
 608 
 609     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 610      */
 611     private void scanFractionAndSuffix(int pos) {
 612         radix = 10;
 613         scanFraction(pos);
 614         if (reader.ch == 'f' || reader.ch == 'F') {
 615             reader.putChar(true);
 616             tk = TokenKind.FLOATLITERAL;
 617         } else {
 618             if (reader.ch == 'd' || reader.ch == 'D') {
 619                 reader.putChar(true);
 620             }
 621             tk = TokenKind.DOUBLELITERAL;
 622         }
 623     }
 624 
 625     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 626      */
 627     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
 628         radix = 16;
 629         Assert.check(reader.ch == '.');
 630         reader.putChar(true);
 631         skipIllegalUnderscores();
 632         if (reader.digit(pos, 16) >= 0) {
 633             seendigit = true;
 634             scanDigits(pos, 16);
 635         }
 636         if (!seendigit)
 637             lexError(pos, Errors.InvalidHexNumber);
 638         else
 639             scanHexExponentAndSuffix(pos);
 640     }
 641 
 642     private void skipIllegalUnderscores() {
 643         if (reader.ch == '_') {
 644             lexError(reader.bp, Errors.IllegalUnderscore);
 645             while (reader.ch == '_')
 646                 reader.scanChar();
 647         }
 648     }
 649 
 650     /** Read a number.
 651      *  @param radix  The radix of the number; one of 2, 8, 10, 16.
 652      */
 653     private void scanNumber(int pos, int radix) {
 654         // for octal, allow base-10 digit in case it's a float literal
 655         this.radix = radix;
 656         int digitRadix = (radix == 8 ? 10 : radix);
 657         int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
 658         boolean seendigit = firstDigit >= 0;
 659         boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
 660         if (seendigit) {
 661             scanDigits(pos, digitRadix);
 662         }
 663         if (radix == 16 && reader.ch == '.') {
 664             scanHexFractionAndSuffix(pos, seendigit);
 665         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
 666             scanHexExponentAndSuffix(pos);
 667         } else if (digitRadix == 10 && reader.ch == '.') {
 668             reader.putChar(true);
 669             scanFractionAndSuffix(pos);
 670         } else if (digitRadix == 10 &&
 671                    (reader.ch == 'e' || reader.ch == 'E' ||
 672                     reader.ch == 'f' || reader.ch == 'F' ||
 673                     reader.ch == 'd' || reader.ch == 'D')) {
 674             scanFractionAndSuffix(pos);
 675         } else {
 676             if (!seenValidDigit) {
 677                 switch (radix) {
 678                 case 2:
 679                     lexError(pos, Errors.InvalidBinaryNumber);
 680                     break;
 681                 case 16:
 682                     lexError(pos, Errors.InvalidHexNumber);
 683                     break;
 684                 }
 685             }
 686             if (reader.ch == 'l' || reader.ch == 'L') {
 687                 reader.scanChar();
 688                 tk = TokenKind.LONGLITERAL;
 689             } else {
 690                 tk = TokenKind.INTLITERAL;
 691             }
 692         }
 693     }
 694 
 695     /** Read an identifier.
 696      */
 697     private void scanIdent() {
 698         boolean isJavaIdentifierPart;
 699         char high;
 700         reader.putChar(true);
 701         do {
 702             switch (reader.ch) {
 703             case 'A': case 'B': case 'C': case 'D': case 'E':
 704             case 'F': case 'G': case 'H': case 'I': case 'J':
 705             case 'K': case 'L': case 'M': case 'N': case 'O':
 706             case 'P': case 'Q': case 'R': case 'S': case 'T':
 707             case 'U': case 'V': case 'W': case 'X': case 'Y':
 708             case 'Z':
 709             case 'a': case 'b': case 'c': case 'd': case 'e':
 710             case 'f': case 'g': case 'h': case 'i': case 'j':
 711             case 'k': case 'l': case 'm': case 'n': case 'o':
 712             case 'p': case 'q': case 'r': case 's': case 't':
 713             case 'u': case 'v': case 'w': case 'x': case 'y':
 714             case 'z':
 715             case '$': case '_':
 716             case '0': case '1': case '2': case '3': case '4':
 717             case '5': case '6': case '7': case '8': case '9':
 718                 break;
 719             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
 720             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
 721             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
 722             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
 723             case '\u0015': case '\u0016': case '\u0017':
 724             case '\u0018': case '\u0019': case '\u001B':
 725             case '\u007F':
 726                 reader.scanChar();
 727                 continue;
 728             case '\u001A': // EOI is also a legal identifier part
 729                 if (reader.bp >= reader.buflen) {
 730                     name = reader.name();
 731                     tk = tokens.lookupKind(name);
 732                     return;
 733                 }
 734                 reader.scanChar();
 735                 continue;
 736             default:
 737                 if (reader.ch < '\u0080') {
 738                     // all ASCII range chars already handled, above
 739                     isJavaIdentifierPart = false;
 740                 } else {
 741                     if (Character.isIdentifierIgnorable(reader.ch)) {
 742                         reader.scanChar();
 743                         continue;
 744                     } else {
 745                         int codePoint = reader.peekSurrogates();
 746                         if (codePoint >= 0) {
 747                             if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
 748                                 reader.putChar(true);
 749                             }
 750                         } else {
 751                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
 752                         }
 753                     }
 754                 }
 755                 if (!isJavaIdentifierPart) {
 756                     name = reader.name();
 757                     tk = tokens.lookupKind(name);
 758                     return;
 759                 }
 760             }
 761             reader.putChar(true);
 762         } while (true);
 763     }
 764 
 765     /** Return true if reader.ch can be part of an operator.
 766      */
 767     private boolean isSpecial(char ch) {
 768         switch (ch) {
 769         case '!': case '%': case '&': case '*': case '?':
 770         case '+': case '-': case ':': case '<': case '=':
 771         case '>': case '^': case '|': case '~':
 772         case '@':
 773             return true;
 774         default:
 775             return false;
 776         }
 777     }
 778 
 779     /** Read longest possible sequence of special characters and convert
 780      *  to token.
 781      */
 782     private void scanOperator() {
 783         while (true) {
 784             reader.putChar(false);
 785             Name newname = reader.name();
 786             TokenKind tk1 = tokens.lookupKind(newname);
 787             if (tk1 == TokenKind.IDENTIFIER) {
 788                 reader.sp--;
 789                 break;
 790             }
 791             tk = tk1;
 792             reader.scanChar();
 793             if (!isSpecial(reader.ch)) break;
 794         }
 795     }
 796 
 797     /** Read token.
 798      */
 799     public Token readToken() {
 800 
 801         reader.sp = 0;
 802         name = null;
 803         radix = 0;
 804 
 805         int pos = 0;
 806         int endPos = 0;
 807         List<Comment> comments = null;
 808 
 809         try {
 810             loop: while (true) {
 811                 pos = reader.bp;
 812                 switch (reader.ch) {
 813                 case ' ': // (Spec 3.6)
 814                 case '\t': // (Spec 3.6)
 815                 case FF: // (Spec 3.6)
 816                     do {
 817                         reader.scanChar();
 818                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
 819                     processWhiteSpace(pos, reader.bp);
 820                     break;
 821                 case LF: // (Spec 3.4)
 822                     reader.scanChar();
 823                     processLineTerminator(pos, reader.bp);
 824                     break;
 825                 case CR: // (Spec 3.4)
 826                     reader.scanChar();
 827                     if (reader.ch == LF) {
 828                         reader.scanChar();
 829                     }
 830                     processLineTerminator(pos, reader.bp);
 831                     break;
 832                 case 'A': case 'B': case 'C': case 'D': case 'E':
 833                 case 'F': case 'G': case 'H': case 'I': case 'J':
 834                 case 'K': case 'L': case 'M': case 'N': case 'O':
 835                 case 'P': case 'Q': case 'R': case 'S': case 'T':
 836                 case 'U': case 'V': case 'W': case 'X': case 'Y':
 837                 case 'Z':
 838                 case 'a': case 'b': case 'c': case 'd': case 'e':
 839                 case 'f': case 'g': case 'h': case 'i': case 'j':
 840                 case 'k': case 'l': case 'm': case 'n': case 'o':
 841                 case 'p': case 'q': case 'r': case 's': case 't':
 842                 case 'u': case 'v': case 'w': case 'x': case 'y':
 843                 case 'z':
 844                 case '$': case '_':
 845                     scanIdent();
 846                     break loop;
 847                 case '0':
 848                     reader.scanChar();
 849                     if (reader.ch == 'x' || reader.ch == 'X') {
 850                         reader.scanChar();
 851                         skipIllegalUnderscores();
 852                         scanNumber(pos, 16);
 853                     } else if (reader.ch == 'b' || reader.ch == 'B') {
 854                         reader.scanChar();
 855                         skipIllegalUnderscores();
 856                         scanNumber(pos, 2);
 857                     } else {
 858                         reader.putChar('0');
 859                         if (reader.ch == '_') {
 860                             int savePos = reader.bp;
 861                             do {
 862                                 reader.scanChar();
 863                             } while (reader.ch == '_');
 864                             if (reader.digit(pos, 10) < 0) {
 865                                 lexError(savePos, Errors.IllegalUnderscore);
 866                             }
 867                         }
 868                         scanNumber(pos, 8);
 869                     }
 870                     break loop;
 871                 case '1': case '2': case '3': case '4':
 872                 case '5': case '6': case '7': case '8': case '9':
 873                     scanNumber(pos, 10);
 874                     break loop;
 875                 case '.':
 876                     reader.scanChar();
 877                     if (reader.digit(pos, 10) >= 0) {
 878                         reader.putChar('.');
 879                         scanFractionAndSuffix(pos);
 880                     } else if (reader.ch == '.') {
 881                         int savePos = reader.bp;
 882                         reader.putChar('.'); reader.putChar('.', true);
 883                         if (reader.ch == '.') {
 884                             reader.scanChar();
 885                             reader.putChar('.');
 886                             tk = TokenKind.ELLIPSIS;
 887                         } else {
 888                             lexError(savePos, Errors.IllegalDot);
 889                         }
 890                     } else {
 891                         tk = TokenKind.DOT;
 892                     }
 893                     break loop;
 894                 case ',':
 895                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
 896                 case ';':
 897                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
 898                 case '(':
 899                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
 900                 case ')':
 901                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
 902                 case '[':
 903                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
 904                 case ']':
 905                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
 906                 case '{':
 907                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
 908                 case '}':
 909                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;
 910                 case '/':
 911                     reader.scanChar();
 912                     if (reader.ch == '/') {
 913                         do {
 914                             reader.scanCommentChar();
 915                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
 916                         if (reader.bp < reader.buflen) {
 917                             comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
 918                         }
 919                         break;
 920                     } else if (reader.ch == '*') {
 921                         boolean isEmpty = false;
 922                         reader.scanChar();
 923                         CommentStyle style;
 924                         if (reader.ch == '*') {
 925                             style = CommentStyle.JAVADOC;
 926                             reader.scanCommentChar();
 927                             if (reader.ch == '/') {
 928                                 isEmpty = true;
 929                             }
 930                         } else {
 931                             style = CommentStyle.BLOCK;
 932                         }
 933                         while (!isEmpty && reader.bp < reader.buflen) {
 934                             if (reader.ch == '*') {
 935                                 reader.scanChar();
 936                                 if (reader.ch == '/') break;
 937                             } else {
 938                                 reader.scanCommentChar();
 939                             }
 940                         }
 941                         if (reader.ch == '/') {
 942                             reader.scanChar();
 943                             comments = addComment(comments, processComment(pos, reader.bp, style));
 944                             break;
 945                         } else {
 946                             lexError(pos, Errors.UnclosedComment);
 947                             break loop;
 948                         }
 949                     } else if (reader.ch == '=') {
 950                         tk = TokenKind.SLASHEQ;
 951                         reader.scanChar();
 952                     } else {
 953                         tk = TokenKind.SLASH;
 954                     }
 955                     break loop;
 956                 case '\'':
 957                     reader.scanChar();
 958                     if (reader.ch == '\'') {
 959                         lexError(pos, Errors.EmptyCharLit);
 960                         reader.scanChar();
 961                     } else {
 962                         if (isEOLN())
 963                             lexError(pos, Errors.IllegalLineEndInCharLit);
 964                         scanLitChar(pos);
 965                         if (reader.ch == '\'') {
 966                             reader.scanChar();
 967                             tk = TokenKind.CHARLITERAL;
 968                         } else {
 969                             lexError(pos, Errors.UnclosedCharLit);
 970                         }
 971                     }
 972                     break loop;
 973                 case '\"':
 974                     scanString(pos);
 975                     break loop;
 976                 default:
 977                     if (isSpecial(reader.ch)) {
 978                         scanOperator();
 979                     } else {
 980                         boolean isJavaIdentifierStart;
 981                         int codePoint = -1;
 982                         if (reader.ch < '\u0080') {
 983                             // all ASCII range chars already handled, above
 984                             isJavaIdentifierStart = false;
 985                         } else {
 986                             codePoint = reader.peekSurrogates();
 987                             if (codePoint >= 0) {
 988                                 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
 989                                     reader.putChar(true);
 990                                 }
 991                             } else {
 992                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
 993                             }
 994                         }
 995                         if (isJavaIdentifierStart) {
 996                             scanIdent();
 997                         } else if (reader.digit(pos, 10) >= 0) {
 998                             scanNumber(pos, 10);
 999                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
1000                             tk = TokenKind.EOF;
1001                             pos = reader.realLength;
1002                         } else {
1003                             String arg;
1004 
1005                             if (codePoint >= 0) {
1006                                 char high = reader.ch;
1007                                 reader.scanChar();
1008                                 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
1009                             } else {
1010                                 arg = (32 < reader.ch && reader.ch < 127) ?
1011                                                 String.format("%s", reader.ch) :
1012                                                 String.format("\\u%04x", (int)reader.ch);
1013                             }
1014                             lexError(pos, Errors.IllegalChar(arg));
1015                             reader.scanChar();
1016                         }
1017                     }
1018                     break loop;
1019                 }
1020             }
1021             endPos = reader.bp;
1022             switch (tk.tag) {
1023                 case DEFAULT: return new Token(tk, pos, endPos, comments);
1024                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
1025                 case STRING: {
1026                     // Get characters from string buffer.
1027                     String string = reader.chars();
1028                     // If a text block.
1029                     if (shouldStripIndent) {
1030                         // Verify that the incidental indentation is consistent.
1031                         if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
1032                             Set<TextBlockSupport.WhitespaceChecks> checks =
1033                                     TextBlockSupport.checkWhitespace(string);
1034                             if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
1035                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1036                                         Warnings.InconsistentWhiteSpaceIndentation);
1037                             }
1038                             if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
1039                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1040                                         Warnings.TrailingWhiteSpaceWillBeRemoved);
1041                             }
1042                         }
1043                         // Remove incidental indentation.
1044                         string = TextBlockSupport.stripIndent(string);
1045                     }
1046                     // Translate escape sequences if present.
1047                     if (shouldTranslateEscapes && !hasBrokenEscapes) {
1048                         string = TextBlockSupport.translateEscapes(string);
1049                     }
1050                     // Build string token.
1051                     return new StringToken(tk, pos, endPos, string, comments);
1052                 }
1053                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
1054                 default: throw new AssertionError();
1055             }
1056         }
1057         finally {
1058             if (scannerDebug) {
1059                     System.out.println("nextToken(" + pos
1060                                        + "," + endPos + ")=|" +
1061                                        new String(reader.getRawCharacters(pos, endPos))
1062                                        + "|");
1063             }
1064         }
1065     }
1066     //where
1067         List<Comment> addComment(List<Comment> comments, Comment comment) {
1068             return comments == null ?
1069                     List.of(comment) :
1070                     comments.prepend(comment);
1071         }
1072 
1073     /** Return the position where a lexical error occurred;
1074      */
1075     public int errPos() {
1076         return errPos;
1077     }
1078 
1079     /** Set the position where a lexical error occurred;
1080      */
1081     public void errPos(int pos) {
1082         errPos = pos;
1083     }
1084 
1085     /**
1086      * Called when a complete comment has been scanned. pos and endPos
1087      * will mark the comment boundary.
1088      */
1089     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
1090         if (scannerDebug)
1091             System.out.println("processComment(" + pos
1092                                + "," + endPos + "," + style + ")=|"
1093                                + new String(reader.getRawCharacters(pos, endPos))
1094                                + "|");
1095         char[] buf = reader.getRawCharacters(pos, endPos);
1096         return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
1097     }
1098 
1099     /**
1100      * Called when a complete whitespace run has been scanned. pos and endPos
1101      * will mark the whitespace boundary.
1102      */
1103     protected void processWhiteSpace(int pos, int endPos) {
1104         if (scannerDebug)
1105             System.out.println("processWhitespace(" + pos
1106                                + "," + endPos + ")=|" +
1107                                new String(reader.getRawCharacters(pos, endPos))
1108                                + "|");
1109     }
1110 
1111     /**
1112      * Called when a line terminator has been processed.
1113      */
1114     protected void processLineTerminator(int pos, int endPos) {
1115         if (scannerDebug)
1116             System.out.println("processTerminator(" + pos
1117                                + "," + endPos + ")=|" +
1118                                new String(reader.getRawCharacters(pos, endPos))
1119                                + "|");
1120     }
1121 
1122     /** Build a map for translating between line numbers and
1123      * positions in the input.
1124      *
1125      * @return a LineMap */
1126     public Position.LineMap getLineMap() {
1127         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
1128     }
1129 
1130 
1131     /**
1132     * Scan a documentation comment; determine if a deprecated tag is present.
1133     * Called once the initial /, * have been skipped, positioned at the second *
1134     * (which is treated as the beginning of the first line).
1135     * Stops positioned at the closing '/'.
1136     */
1137     protected static class BasicComment<U extends UnicodeReader> implements Comment {
1138 
1139         CommentStyle cs;
1140         U comment_reader;
1141 
1142         protected boolean deprecatedFlag = false;
1143         protected boolean scanned = false;
1144 
1145         protected BasicComment(U comment_reader, CommentStyle cs) {
1146             this.comment_reader = comment_reader;
1147             this.cs = cs;
1148         }
1149 
1150         public String getText() {
1151             return null;
1152         }
1153 
1154         public int getSourcePos(int pos) {
1155             return -1;
1156         }
1157 
1158         public CommentStyle getStyle() {
1159             return cs;
1160         }
1161 
1162         public boolean isDeprecated() {
1163             if (!scanned && cs == CommentStyle.JAVADOC) {
1164                 scanDocComment();
1165             }
1166             return deprecatedFlag;
1167         }
1168 
1169         @SuppressWarnings("fallthrough")
1170         protected void scanDocComment() {
1171             try {
1172                 boolean deprecatedPrefix = false;
1173 
1174                 comment_reader.bp += 3; // '/**'
1175                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
1176 
1177                 forEachLine:
1178                 while (comment_reader.bp < comment_reader.buflen) {
1179 
1180                     // Skip optional WhiteSpace at beginning of line
1181                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1182                         comment_reader.scanCommentChar();
1183                     }
1184 
1185                     // Skip optional consecutive Stars
1186                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
1187                         comment_reader.scanCommentChar();
1188                         if (comment_reader.ch == '/') {
1189                             return;
1190                         }
1191                     }
1192 
1193                     // Skip optional WhiteSpace after Stars
1194                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1195                         comment_reader.scanCommentChar();
1196                     }
1197 
1198                     deprecatedPrefix = false;
1199                     // At beginning of line in the JavaDoc sense.
1200                     if (!deprecatedFlag) {
1201                         String deprecated = "@deprecated";
1202                         int i = 0;
1203                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
1204                             comment_reader.scanCommentChar();
1205                             i++;
1206                             if (i == deprecated.length()) {
1207                                 deprecatedPrefix = true;
1208                                 break;
1209                             }
1210                         }
1211                     }
1212 
1213                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
1214                         if (Character.isWhitespace(comment_reader.ch)) {
1215                             deprecatedFlag = true;
1216                         } else if (comment_reader.ch == '*') {
1217                             comment_reader.scanCommentChar();
1218                             if (comment_reader.ch == '/') {
1219                                 deprecatedFlag = true;
1220                                 return;
1221                             }
1222                         }
1223                     }
1224 
1225                     // Skip rest of line
1226                     while (comment_reader.bp < comment_reader.buflen) {
1227                         switch (comment_reader.ch) {
1228                             case '*':
1229                                 comment_reader.scanCommentChar();
1230                                 if (comment_reader.ch == '/') {
1231                                     return;
1232                                 }
1233                                 break;
1234                             case CR: // (Spec 3.4)
1235                                 comment_reader.scanCommentChar();
1236                                 if (comment_reader.ch != LF) {
1237                                     continue forEachLine;
1238                                 }
1239                             /* fall through to LF case */
1240                             case LF: // (Spec 3.4)
1241                                 comment_reader.scanCommentChar();
1242                                 continue forEachLine;
1243                             default:
1244                                 comment_reader.scanCommentChar();
1245                         }
1246                     } // rest of line
1247                 } // forEachLine
1248                 return;
1249             } finally {
1250                 scanned = true;
1251             }
1252         }
1253     }
1254 }