New src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

   1 /*
   2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import com.sun.tools.javac.code.Lint;
  29 import com.sun.tools.javac.code.Lint.LintCategory;
  30 import com.sun.tools.javac.code.Preview;
  31 import com.sun.tools.javac.code.Source;
  32 import com.sun.tools.javac.code.Source.Feature;
  33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  34 import com.sun.tools.javac.resources.CompilerProperties.Errors;
  35 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
  36 import com.sun.tools.javac.util.*;
  37 import com.sun.tools.javac.util.JCDiagnostic.*;
  38 
  39 import java.lang.reflect.InvocationTargetException;
  40 import java.lang.reflect.Method;
  41 import java.nio.CharBuffer;
  42 import java.util.HashSet;
  43 import java.util.Set;
  44 
  45 import static com.sun.tools.javac.parser.Tokens.*;
  46 import static com.sun.tools.javac.util.LayoutCharacters.*;
  47 
  48 /** The lexical analyzer maps an input stream consisting of
  49  *  ASCII characters and Unicode escapes into a token sequence.
  50  *
  51  *  <p><b>This is NOT part of any supported API.
  52  *  If you write code that depends on this, you do so at your own risk.
  53  *  This code and its internal interfaces are subject to change or
  54  *  deletion without notice.</b>
  55  */
  56 public class JavaTokenizer {
  57 
  58     private static final boolean scannerDebug = false;
  59 
  60     /** The source language setting.
  61      */
  62     private Source source;
  63 
  64     /** The preview language setting. */
  65     private Preview preview;
  66 
  67     /** The log to be used for error reporting.
  68      */
  69     private final Log log;
  70 
  71     /** The token factory. */
  72     private final Tokens tokens;
  73 
  74     /** The token kind, set by nextToken().
  75      */
  76     protected TokenKind tk;
  77 
  78     /** The token's radix, set by nextToken().
  79      */
  80     protected int radix;
  81 
  82     /** The token's name, set by nextToken().
  83      */
  84     protected Name name;
  85 
  86     /** The position where a lexical error occurred;
  87      */
  88     protected int errPos = Position.NOPOS;
  89 
  90     /** The Unicode reader (low-level stream reader).
  91      */
  92     protected UnicodeReader reader;
  93 
  94     /** If is a text block
  95      */
  96     protected boolean isTextBlock;
  97 
  98     /** If contains escape sequences
  99      */
 100     protected boolean hasEscapeSequences;
 101 
 102     protected ScannerFactory fac;
 103 
 104     // The set of lint options currently in effect. It is initialized
 105     // from the context, and then is set/reset as needed by Attr as it
 106     // visits all the various parts of the trees during attribution.
 107     protected Lint lint;
 108 
 109     private static final boolean hexFloatsWork = hexFloatsWork();
 110     private static boolean hexFloatsWork() {
 111         try {
 112             Float.valueOf("0x1.0p1");
 113             return true;
 114         } catch (NumberFormatException ex) {
 115             return false;
 116         }
 117     }
 118 
 119     /**
 120      * Create a scanner from the input array.  This method might
 121      * modify the array.  To avoid copying the input array, ensure
 122      * that {@code inputLength < input.length} or
 123      * {@code input[input.length -1]} is a white space character.
 124      *
 125      * @param fac the factory which created this Scanner
 126      * @param buf the input, might be modified
 127      * Must be positive and less than or equal to input.length.
 128      */
 129     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
 130         this(fac, new UnicodeReader(fac, buf));
 131     }
 132 
 133     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
 134         this(fac, new UnicodeReader(fac, buf, inputLength));
 135     }
 136 
 137     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
 138         this.fac = fac;
 139         this.log = fac.log;
 140         this.tokens = fac.tokens;
 141         this.source = fac.source;
 142         this.preview = fac.preview;
 143         this.reader = reader;
 144         this.lint = fac.lint;
 145     }
 146 
 147     protected void checkSourceLevel(int pos, Feature feature) {
 148         if (preview.isPreview(feature) && !preview.isEnabled()) {
 149             //preview feature without --preview flag, error
 150             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 151         } else if (!feature.allowedInSource(source)) {
 152             //incompatible source level, error
 153             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 154         } else if (preview.isPreview(feature)) {
 155             //use of preview feature, warn
 156             preview.warnPreview(pos, feature);
 157         }
 158     }
 159 
 160     /** Report an error at the given position using the provided arguments.
 161      */
 162     protected void lexError(int pos, JCDiagnostic.Error key) {
 163         log.error(pos, key);
 164         tk = TokenKind.ERROR;
 165         errPos = pos;
 166     }
 167 
 168     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 169         log.error(flags, pos, key);
 170         tk = TokenKind.ERROR;
 171         errPos = pos;
 172     }
 173 
 174     protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
 175         DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
 176         log.warning(lc, dp, key);
 177     }
 178 
 179     /** Read next character in character or string literal and copy into sbuf.
 180      *      pos - start of literal offset
 181      *      translateEscapesNow - true if String::translateEscapes is not available
 182      *                            in the java.base libs. Occurs during bootstrapping.
 183      *      multiline - true if scanning a text block. Allows newlines to be embedded
 184      *                  in the result.
 185      */
 186     private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
 187          if (reader.ch == '\\') {
 188             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 189                 reader.skipChar();
 190                 if (!translateEscapesNow) {
 191                     reader.putChar(false);
 192                 }
 193                 reader.putChar(true);
 194             } else {
 195                 reader.nextChar(translateEscapesNow);
 196                 switch (reader.ch) {
 197                 case '0': case '1': case '2': case '3':
 198                 case '4': case '5': case '6': case '7':
 199                     char leadch = reader.ch;
 200                     int oct = reader.digit(pos, 8);
 201                     reader.nextChar(translateEscapesNow);
 202                     if ('0' <= reader.ch && reader.ch <= '7') {
 203                         oct = oct * 8 + reader.digit(pos, 8);
 204                         reader.nextChar(translateEscapesNow);
 205                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 206                             oct = oct * 8 + reader.digit(pos, 8);
 207                             reader.nextChar(translateEscapesNow);
 208                         }
 209                     }
 210                     if (translateEscapesNow) {
 211                         reader.putChar((char)oct);
 212                     }
 213                     break;
 214                 case 'b':
 215                     reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
 216                 case 't':
 217                     reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
 218                 case 'n':
 219                     reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
 220                 case 'f':
 221                     reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
 222                 case 'r':
 223                     reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
 224                 case '\'':
 225                 case '\"':
 226                 case '\\':
 227                     reader.putChar(true); break;
 228                 case 's':
 229                     checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
 230                     reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
 231                 case '\n':
 232                 case '\r':
 233                     if (!multiline) {
 234                         lexError(reader.bp, Errors.IllegalEscChar);
 235                     } else {
 236                         int start = reader.bp;
 237                         checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
 238                         if (reader.ch == '\r' && reader.peekChar() == '\n') {
 239                            reader.nextChar(translateEscapesNow);
 240                         }
 241                         reader.nextChar(translateEscapesNow);
 242                         processLineTerminator(start, reader.bp);
 243                     }
 244                     break;
 245                 default:
 246                     lexError(reader.bp, Errors.IllegalEscChar);
 247                 }
 248             }
 249         } else if (reader.bp != reader.buflen) {
 250             reader.putChar(true);
 251         }
 252     }
 253 
 254     /** Interim access to String methods used to support text blocks.
 255      *  Required to handle bootstrapping with pre-text block jdks.
 256      *  Should be replaced with direct calls in the 'next' jdk.
 257      */
 258     static class TextBlockSupport {
 259         /** Reflection method to remove incidental indentation.
 260          */
 261         private static final Method stripIndent;
 262 
 263         /** Reflection method to translate escape sequences.
 264          */
 265         private static final Method translateEscapes;
 266 
 267         /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
 268          */
 269         private static final boolean hasSupport;
 270 
 271         /** Get a string method via refection or null if not available.
 272          */
 273         private static Method getStringMethodOrNull(String name) {
 274             try {
 275                 return String.class.getMethod(name);
 276             } catch (Exception ex) {
 277                 // Method not available, return null.
 278             }
 279             return null;
 280         }
 281 
 282         static {
 283             // Get text block string methods.
 284             stripIndent = getStringMethodOrNull("stripIndent");
 285             translateEscapes = getStringMethodOrNull("translateEscapes");
 286             // true if stripIndent and translateEscapes are available in the bootstrap jdk.
 287             hasSupport = stripIndent != null && translateEscapes != null;
 288         }
 289 
 290         /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
 291          */
 292         static boolean hasSupport() {
 293             return hasSupport;
 294         }
 295 
 296         /** Return the leading whitespace count (indentation) of the line.
 297          */
 298         private static int indent(String line) {
 299             return line.length() - line.stripLeading().length();
 300         }
 301 
 302         enum WhitespaceChecks {
 303             INCONSISTENT,
 304             TRAILING
 305         };
 306 
 307         /** Check that the use of white space in content is not problematic.
 308          */
 309         static Set<WhitespaceChecks> checkWhitespace(String string) {
 310             // Start with empty result set.
 311             Set<WhitespaceChecks> checks = new HashSet<>();
 312             // No need to check empty strings.
 313             if (string.isEmpty()) {
 314                 return checks;
 315             }
 316             // Maximum common indentation.
 317             int outdent = 0;
 318             // No need to check indentation if opting out (last line is empty.)
 319             char lastChar = string.charAt(string.length() - 1);
 320             boolean optOut = lastChar == '\n' || lastChar == '\r';
 321             // Split string based at line terminators.
 322             String[] lines = string.split("\\R");
 323             int length = lines.length;
 324             // Extract last line.
 325             String lastLine = length == 0 ? "" : lines[length - 1];
 326              if (!optOut) {
 327                 // Prime with the last line indentation (may be blank.)
 328                 outdent = indent(lastLine);
 329                 for (String line : lines) {
 330                     // Blanks lines have no influence (last line accounted for.)
 331                     if (!line.isBlank()) {
 332                         outdent = Integer.min(outdent, indent(line));
 333                         if (outdent == 0) {
 334                             break;
 335                         }
 336                     }
 337                 }
 338             }
 339             // Last line is representative.
 340             String start = lastLine.substring(0, outdent);
 341             for (String line : lines) {
 342                 // Fail if a line does not have the same indentation.
 343                 if (!line.isBlank() && !line.startsWith(start)) {
 344                     // Mix of different white space
 345                     checks.add(WhitespaceChecks.INCONSISTENT);
 346                 }
 347                 // Line has content even after indent is removed.
 348                 if (outdent < line.length()) {
 349                     // Is the last character a white space.
 350                     lastChar = line.charAt(line.length() - 1);
 351                     if (Character.isWhitespace(lastChar)) {
 352                         // Has trailing white space.
 353                         checks.add(WhitespaceChecks.TRAILING);
 354                     }
 355                 }
 356             }
 357             return checks;
 358         }
 359 
 360         /** Invoke String::stripIndent through reflection.
 361          */
 362         static String stripIndent(String string) {
 363             try {
 364                 string = (String)stripIndent.invoke(string);
 365             } catch (InvocationTargetException | IllegalAccessException ex) {
 366                 throw new RuntimeException(ex);
 367             }
 368             return string;
 369         }
 370 
 371         /** Invoke String::translateEscapes through reflection.
 372          */
 373         static String translateEscapes(String string) {
 374             try {
 375                 string = (String)translateEscapes.invoke(string);
 376             } catch (InvocationTargetException | IllegalAccessException ex) {
 377                 throw new RuntimeException(ex);
 378             }
 379             return string;
 380         }
 381     }
 382 
 383     /** Test for EOLN.
 384      */
 385     private boolean isEOLN() {
 386         return reader.ch == LF || reader.ch == CR;
 387     }
 388 
 389     /** Test for CRLF.
 390      */
 391     private boolean isCRLF() {
 392         return reader.ch == CR && reader.peekChar() == LF;
 393     }
 394 
 395     /** Count and skip repeated occurrences of the specified character.
 396      */
 397     private int countChar(char ch, int max) {
 398         int count = 0;
 399         for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
 400             reader.scanChar();
 401         }
 402         return count;
 403     }
 404 
 405     /** Scan a string literal or text block.
 406      */
 407     private void scanString(int pos) {
 408         // Clear flags.
 409         isTextBlock = false;
 410         hasEscapeSequences = false;
 411         // Track the end of first line for error recovery.
 412         int firstEOLN = -1;
 413         // Attempt to scan for up to 3 double quotes.
 414         int openCount = countChar('\"', 3);
 415         switch (openCount) {
 416         case 1: // Starting a string literal.
 417             break;
 418         case 2: // Starting an empty string literal.
 419             // Start again but only consume one quote.
 420             reader.reset(pos);
 421             openCount = countChar('\"', 1);
 422             break;
 423         case 3: // Starting a text block.
 424             // Check if preview feature is enabled for text blocks.
 425             checkSourceLevel(pos, Feature.TEXT_BLOCKS);
 426             isTextBlock = true;
 427             // Verify the open delimiter sequence.
 428             boolean hasOpenEOLN = false;
 429             while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
 430                 hasOpenEOLN = isEOLN();
 431                 if (hasOpenEOLN) {
 432                     break;
 433                 }
 434                 reader.scanChar();
 435             }
 436             // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
 437             if (!hasOpenEOLN) {
 438                 lexError(reader.bp, Errors.IllegalTextBlockOpen);
 439                 return;
 440             }
 441             // Skip line terminator.
 442             int start = reader.bp;
 443             if (isCRLF()) {
 444                 reader.scanChar();
 445             }
 446             reader.scanChar();
 447             processLineTerminator(start, reader.bp);
 448             break;
 449         }
 450         // While characters are available.
 451         while (reader.bp < reader.buflen) {
 452             // If possible close delimiter sequence.
 453             if (reader.ch == '\"') {
 454                 // Check to see if enough double quotes are present.
 455                 int closeCount = countChar('\"', openCount);
 456                 if (openCount == closeCount) {
 457                     // Good result.
 458                     tk = Tokens.TokenKind.STRINGLITERAL;
 459                     return;
 460                 }
 461                 // False alarm, add double quotes to string buffer.
 462                 reader.repeat('\"', closeCount);
 463             } else if (isEOLN()) {
 464                 // Line terminator in string literal is an error.
 465                 // Fall out to unclosed string literal error.
 466                 if (openCount == 1) {
 467                     break;
 468                 }
 469                  // Add line terminator to string buffer.
 470                 int start = reader.bp;
 471                 if (isCRLF()) {
 472                     reader.scanChar();
 473                 }
 474                 reader.putChar('\n', true);
 475                 processLineTerminator(start, reader.bp);
 476                 // Record first line terminator for error recovery.
 477                 if (firstEOLN == -1) {
 478                     firstEOLN = reader.bp;
 479                 }
 480             } else if (reader.ch == '\\') {
 481                 // Handle escape sequences.
 482                 hasEscapeSequences = true;
 483                 // Translate escapes immediately if TextBlockSupport is not available
 484                 // during bootstrapping.
 485                 boolean translateEscapesNow = !TextBlockSupport.hasSupport();
 486                 scanLitChar(pos, translateEscapesNow, openCount != 1);
 487             } else {
 488                 // Add character to string buffer.
 489                 reader.putChar(true);
 490             }
 491         }
 492         // String ended without close delimiter sequence.
 493         lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
 494         if (firstEOLN  != -1) {
 495             // Reset recovery position to point after open delimiter sequence.
 496             reader.reset(firstEOLN);
 497         }
 498     }
 499 
 500     private void scanDigits(int pos, int digitRadix) {
 501         char saveCh;
 502         int savePos;
 503         do {
 504             if (reader.ch != '_') {
 505                 reader.putChar(false);
 506             }
 507             saveCh = reader.ch;
 508             savePos = reader.bp;
 509             reader.scanChar();
 510         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
 511         if (saveCh == '_')
 512             lexError(savePos, Errors.IllegalUnderscore);
 513     }
 514 
 515     /** Read fractional part of hexadecimal floating point number.
 516      */
 517     private void scanHexExponentAndSuffix(int pos) {
 518         if (reader.ch == 'p' || reader.ch == 'P') {
 519             reader.putChar(true);
 520             skipIllegalUnderscores();
 521             if (reader.ch == '+' || reader.ch == '-') {
 522                 reader.putChar(true);
 523             }
 524             skipIllegalUnderscores();
 525             if (reader.digit(pos, 10) >= 0) {
 526                 scanDigits(pos, 10);
 527                 if (!hexFloatsWork)
 528                     lexError(pos, Errors.UnsupportedCrossFpLit);
 529             } else
 530                 lexError(pos, Errors.MalformedFpLit);
 531         } else {
 532             lexError(pos, Errors.MalformedFpLit);
 533         }
 534         if (reader.ch == 'f' || reader.ch == 'F') {
 535             reader.putChar(true);
 536             tk = TokenKind.FLOATLITERAL;
 537             radix = 16;
 538         } else {
 539             if (reader.ch == 'd' || reader.ch == 'D') {
 540                 reader.putChar(true);
 541             }
 542             tk = TokenKind.DOUBLELITERAL;
 543             radix = 16;
 544         }
 545     }
 546 
 547     /** Read fractional part of floating point number.
 548      */
 549     private void scanFraction(int pos) {
 550         skipIllegalUnderscores();
 551         if (reader.digit(pos, 10) >= 0) {
 552             scanDigits(pos, 10);
 553         }
 554         int sp1 = reader.sp;
 555         if (reader.ch == 'e' || reader.ch == 'E') {
 556             reader.putChar(true);
 557             skipIllegalUnderscores();
 558             if (reader.ch == '+' || reader.ch == '-') {
 559                 reader.putChar(true);
 560             }
 561             skipIllegalUnderscores();
 562             if (reader.digit(pos, 10) >= 0) {
 563                 scanDigits(pos, 10);
 564                 return;
 565             }
 566             lexError(pos, Errors.MalformedFpLit);
 567             reader.sp = sp1;
 568         }
 569     }
 570 
 571     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 572      */
 573     private void scanFractionAndSuffix(int pos) {
 574         radix = 10;
 575         scanFraction(pos);
 576         if (reader.ch == 'f' || reader.ch == 'F') {
 577             reader.putChar(true);
 578             tk = TokenKind.FLOATLITERAL;
 579         } else {
 580             if (reader.ch == 'd' || reader.ch == 'D') {
 581                 reader.putChar(true);
 582             }
 583             tk = TokenKind.DOUBLELITERAL;
 584         }
 585     }
 586 
 587     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 588      */
 589     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
 590         radix = 16;
 591         Assert.check(reader.ch == '.');
 592         reader.putChar(true);
 593         skipIllegalUnderscores();
 594         if (reader.digit(pos, 16) >= 0) {
 595             seendigit = true;
 596             scanDigits(pos, 16);
 597         }
 598         if (!seendigit)
 599             lexError(pos, Errors.InvalidHexNumber);
 600         else
 601             scanHexExponentAndSuffix(pos);
 602     }
 603 
 604     private void skipIllegalUnderscores() {
 605         if (reader.ch == '_') {
 606             lexError(reader.bp, Errors.IllegalUnderscore);
 607             while (reader.ch == '_')
 608                 reader.scanChar();
 609         }
 610     }
 611 
 612     /** Read a number.
 613      *  @param radix  The radix of the number; one of 2, 8, 10, 16.
 614      */
 615     private void scanNumber(int pos, int radix) {
 616         // for octal, allow base-10 digit in case it's a float literal
 617         this.radix = radix;
 618         int digitRadix = (radix == 8 ? 10 : radix);
 619         int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
 620         boolean seendigit = firstDigit >= 0;
 621         boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
 622         if (seendigit) {
 623             scanDigits(pos, digitRadix);
 624         }
 625         if (radix == 16 && reader.ch == '.') {
 626             scanHexFractionAndSuffix(pos, seendigit);
 627         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
 628             scanHexExponentAndSuffix(pos);
 629         } else if (digitRadix == 10 && reader.ch == '.') {
 630             reader.putChar(true);
 631             scanFractionAndSuffix(pos);
 632         } else if (digitRadix == 10 &&
 633                    (reader.ch == 'e' || reader.ch == 'E' ||
 634                     reader.ch == 'f' || reader.ch == 'F' ||
 635                     reader.ch == 'd' || reader.ch == 'D')) {
 636             scanFractionAndSuffix(pos);
 637         } else {
 638             if (!seenValidDigit) {
 639                 switch (radix) {
 640                 case 2:
 641                     lexError(pos, Errors.InvalidBinaryNumber);
 642                     break;
 643                 case 16:
 644                     lexError(pos, Errors.InvalidHexNumber);
 645                     break;
 646                 }
 647             }
 648             if (reader.ch == 'l' || reader.ch == 'L') {
 649                 reader.scanChar();
 650                 tk = TokenKind.LONGLITERAL;
 651             } else {
 652                 tk = TokenKind.INTLITERAL;
 653             }
 654         }
 655     }
 656 
 657     /** Read an identifier.
 658      */
 659     private void scanIdent() {
 660         boolean isJavaIdentifierPart;
 661         char high;
 662         reader.putChar(true);
 663         do {
 664             switch (reader.ch) {
 665             case 'A': case 'B': case 'C': case 'D': case 'E':
 666             case 'F': case 'G': case 'H': case 'I': case 'J':
 667             case 'K': case 'L': case 'M': case 'N': case 'O':
 668             case 'P': case 'Q': case 'R': case 'S': case 'T':
 669             case 'U': case 'V': case 'W': case 'X': case 'Y':
 670             case 'Z':
 671             case 'a': case 'b': case 'c': case 'd': case 'e':
 672             case 'f': case 'g': case 'h': case 'i': case 'j':
 673             case 'k': case 'l': case 'm': case 'n': case 'o':
 674             case 'p': case 'q': case 'r': case 's': case 't':
 675             case 'u': case 'v': case 'w': case 'x': case 'y':
 676             case 'z':
 677             case '$': case '_':
 678             case '0': case '1': case '2': case '3': case '4':
 679             case '5': case '6': case '7': case '8': case '9':
 680                 break;
 681             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
 682             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
 683             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
 684             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
 685             case '\u0015': case '\u0016': case '\u0017':
 686             case '\u0018': case '\u0019': case '\u001B':
 687             case '\u007F':
 688                 reader.scanChar();
 689                 continue;
 690             case '\u001A': // EOI is also a legal identifier part
 691                 if (reader.bp >= reader.buflen) {
 692                     name = reader.name();
 693                     tk = tokens.lookupKind(name);
 694                     return;
 695                 }
 696                 reader.scanChar();
 697                 continue;
 698             default:
 699                 if (reader.ch < '\u0080') {
 700                     // all ASCII range chars already handled, above
 701                     isJavaIdentifierPart = false;
 702                 } else {
 703                     if (Character.isIdentifierIgnorable(reader.ch)) {
 704                         reader.scanChar();
 705                         continue;
 706                     } else {
 707                         int codePoint = reader.peekSurrogates();
 708                         if (codePoint >= 0) {
 709                             if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
 710                                 reader.putChar(true);
 711                             }
 712                         } else {
 713                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
 714                         }
 715                     }
 716                 }
 717                 if (!isJavaIdentifierPart) {
 718                     name = reader.name();
 719                     tk = tokens.lookupKind(name);
 720                     return;
 721                 }
 722             }
 723             reader.putChar(true);
 724         } while (true);
 725     }
 726 
 727     /** Return true if reader.ch can be part of an operator.
 728      */
 729     private boolean isSpecial(char ch) {
 730         switch (ch) {
 731         case '!': case '%': case '&': case '*': case '?':
 732         case '+': case '-': case ':': case '<': case '=':
 733         case '>': case '^': case '|': case '~':
 734         case '@':
 735             return true;
 736         default:
 737             return false;
 738         }
 739     }
 740 
 741     /** Read longest possible sequence of special characters and convert
 742      *  to token.
 743      */
 744     private void scanOperator() {
 745         while (true) {
 746             reader.putChar(false);
 747             Name newname = reader.name();
 748             TokenKind tk1 = tokens.lookupKind(newname);
 749             if (tk1 == TokenKind.IDENTIFIER) {
 750                 reader.sp--;
 751                 break;
 752             }
 753             tk = tk1;
 754             reader.scanChar();
 755             if (!isSpecial(reader.ch)) break;
 756         }
 757     }
 758 
 759     /** Read token.
 760      */
 761     public Token readToken() {
 762 
 763         reader.sp = 0;
 764         name = null;
 765         radix = 0;
 766 
 767         int pos = 0;
 768         int endPos = 0;
 769         List<Comment> comments = null;
 770 
 771         try {
 772             loop: while (true) {
 773                 pos = reader.bp;
 774                 switch (reader.ch) {
 775                 case ' ': // (Spec 3.6)
 776                 case '\t': // (Spec 3.6)
 777                 case FF: // (Spec 3.6)
 778                     do {
 779                         reader.scanChar();
 780                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
 781                     processWhiteSpace(pos, reader.bp);
 782                     break;
 783                 case LF: // (Spec 3.4)
 784                     reader.scanChar();
 785                     processLineTerminator(pos, reader.bp);
 786                     break;
 787                 case CR: // (Spec 3.4)
 788                     reader.scanChar();
 789                     if (reader.ch == LF) {
 790                         reader.scanChar();
 791                     }
 792                     processLineTerminator(pos, reader.bp);
 793                     break;
 794                 case 'A': case 'B': case 'C': case 'D': case 'E':
 795                 case 'F': case 'G': case 'H': case 'I': case 'J':
 796                 case 'K': case 'L': case 'M': case 'N': case 'O':
 797                 case 'P': case 'Q': case 'R': case 'S': case 'T':
 798                 case 'U': case 'V': case 'W': case 'X': case 'Y':
 799                 case 'Z':
 800                 case 'a': case 'b': case 'c': case 'd': case 'e':
 801                 case 'f': case 'g': case 'h': case 'i': case 'j':
 802                 case 'k': case 'l': case 'm': case 'n': case 'o':
 803                 case 'p': case 'q': case 'r': case 's': case 't':
 804                 case 'u': case 'v': case 'w': case 'x': case 'y':
 805                 case 'z':
 806                 case '$': case '_':
 807                     scanIdent();
 808                     break loop;
 809                 case '0':
 810                     reader.scanChar();
 811                     if (reader.ch == 'x' || reader.ch == 'X') {
 812                         reader.scanChar();
 813                         skipIllegalUnderscores();
 814                         scanNumber(pos, 16);
 815                     } else if (reader.ch == 'b' || reader.ch == 'B') {
 816                         reader.scanChar();
 817                         skipIllegalUnderscores();
 818                         scanNumber(pos, 2);
 819                     } else {
 820                         reader.putChar('0');
 821                         if (reader.ch == '_') {
 822                             int savePos = reader.bp;
 823                             do {
 824                                 reader.scanChar();
 825                             } while (reader.ch == '_');
 826                             if (reader.digit(pos, 10) < 0) {
 827                                 lexError(savePos, Errors.IllegalUnderscore);
 828                             }
 829                         }
 830                         scanNumber(pos, 8);
 831                     }
 832                     break loop;
 833                 case '1': case '2': case '3': case '4':
 834                 case '5': case '6': case '7': case '8': case '9':
 835                     scanNumber(pos, 10);
 836                     break loop;
 837                 case '.':
 838                     reader.scanChar();
 839                     if (reader.digit(pos, 10) >= 0) {
 840                         reader.putChar('.');
 841                         scanFractionAndSuffix(pos);
 842                     } else if (reader.ch == '.') {
 843                         int savePos = reader.bp;
 844                         reader.putChar('.'); reader.putChar('.', true);
 845                         if (reader.ch == '.') {
 846                             reader.scanChar();
 847                             reader.putChar('.');
 848                             tk = TokenKind.ELLIPSIS;
 849                         } else {
 850                             lexError(savePos, Errors.IllegalDot);
 851                         }
 852                     } else {
 853                         tk = TokenKind.DOT;
 854                     }
 855                     break loop;
 856                 case ',':
 857                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
 858                 case ';':
 859                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
 860                 case '(':
 861                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
 862                 case ')':
 863                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
 864                 case '[':
 865                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
 866                 case ']':
 867                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
 868                 case '{':
 869                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
 870                 case '}':
 871                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;
 872                 case '/':
 873                     reader.scanChar();
 874                     if (reader.ch == '/') {
 875                         do {
 876                             reader.scanCommentChar();
 877                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
 878                         if (reader.bp < reader.buflen) {
 879                             comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
 880                         }
 881                         break;
 882                     } else if (reader.ch == '*') {
 883                         boolean isEmpty = false;
 884                         reader.scanChar();
 885                         CommentStyle style;
 886                         if (reader.ch == '*') {
 887                             style = CommentStyle.JAVADOC;
 888                             reader.scanCommentChar();
 889                             if (reader.ch == '/') {
 890                                 isEmpty = true;
 891                             }
 892                         } else {
 893                             style = CommentStyle.BLOCK;
 894                         }
 895                         while (!isEmpty && reader.bp < reader.buflen) {
 896                             if (reader.ch == '*') {
 897                                 reader.scanChar();
 898                                 if (reader.ch == '/') break;
 899                             } else {
 900                                 reader.scanCommentChar();
 901                             }
 902                         }
 903                         if (reader.ch == '/') {
 904                             reader.scanChar();
 905                             comments = addComment(comments, processComment(pos, reader.bp, style));
 906                             break;
 907                         } else {
 908                             lexError(pos, Errors.UnclosedComment);
 909                             break loop;
 910                         }
 911                     } else if (reader.ch == '=') {
 912                         tk = TokenKind.SLASHEQ;
 913                         reader.scanChar();
 914                     } else {
 915                         tk = TokenKind.SLASH;
 916                     }
 917                     break loop;
 918                 case '\'':
 919                     reader.scanChar();
 920                     if (reader.ch == '\'') {
 921                         lexError(pos, Errors.EmptyCharLit);
 922                         reader.scanChar();
 923                     } else {
 924                         if (isEOLN())
 925                             lexError(pos, Errors.IllegalLineEndInCharLit);
 926                         scanLitChar(pos, true, false);
 927                         if (reader.ch == '\'') {
 928                             reader.scanChar();
 929                             tk = TokenKind.CHARLITERAL;
 930                         } else {
 931                             lexError(pos, Errors.UnclosedCharLit);
 932                         }
 933                     }
 934                     break loop;
 935                 case '\"':
 936                     scanString(pos);
 937                     break loop;
 938                 default:
 939                     if (isSpecial(reader.ch)) {
 940                         scanOperator();
 941                     } else {
 942                         boolean isJavaIdentifierStart;
 943                         int codePoint = -1;
 944                         if (reader.ch < '\u0080') {
 945                             // all ASCII range chars already handled, above
 946                             isJavaIdentifierStart = false;
 947                         } else {
 948                             codePoint = reader.peekSurrogates();
 949                             if (codePoint >= 0) {
 950                                 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
 951                                     reader.putChar(true);
 952                                 }
 953                             } else {
 954                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
 955                             }
 956                         }
 957                         if (isJavaIdentifierStart) {
 958                             scanIdent();
 959                         } else if (reader.digit(pos, 10) >= 0) {
 960                             scanNumber(pos, 10);
 961                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
 962                             tk = TokenKind.EOF;
 963                             pos = reader.realLength;
 964                         } else {
 965                             String arg;
 966 
 967                             if (codePoint >= 0) {
 968                                 char high = reader.ch;
 969                                 reader.scanChar();
 970                                 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
 971                             } else {
 972                                 arg = (32 < reader.ch && reader.ch < 127) ?
 973                                                 String.format("%s", reader.ch) :
 974                                                 String.format("\\u%04x", (int)reader.ch);
 975                             }
 976                             lexError(pos, Errors.IllegalChar(arg));
 977                             reader.scanChar();
 978                         }
 979                     }
 980                     break loop;
 981                 }
 982             }
 983             endPos = reader.bp;
 984             switch (tk.tag) {
 985                 case DEFAULT: return new Token(tk, pos, endPos, comments);
 986                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
 987                 case STRING: {
 988                     // Get characters from string buffer.
 989                     String string = reader.chars();
 990                     // If a text block.
 991                     if (isTextBlock && TextBlockSupport.hasSupport()) {
 992                         // Verify that the incidental indentation is consistent.
 993                         if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
 994                             Set<TextBlockSupport.WhitespaceChecks> checks =
 995                                     TextBlockSupport.checkWhitespace(string);
 996                             if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
 997                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
 998                                         Warnings.InconsistentWhiteSpaceIndentation);
 999                             }
1000                             if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
1001                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1002                                         Warnings.TrailingWhiteSpaceWillBeRemoved);
1003                             }
1004                         }
1005                         // Remove incidental indentation.
1006                         try {
1007                             string = TextBlockSupport.stripIndent(string);
1008                         } catch (Exception ex) {
1009                             // Error already reported, just use unstripped string.
1010                         }
1011                     }
1012                     // Translate escape sequences if present.
1013                     if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
1014                         try {
1015                             string = TextBlockSupport.translateEscapes(string);
1016                         } catch (Exception ex) {
1017                             // Error already reported, just use untranslated string.
1018                         }
1019                     }
1020                     // Build string token.
1021                     return new StringToken(tk, pos, endPos, string, comments);
1022                 }
1023                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
1024                 default: throw new AssertionError();
1025             }
1026         }
1027         finally {
1028             if (scannerDebug) {
1029                     System.out.println("nextToken(" + pos
1030                                        + "," + endPos + ")=|" +
1031                                        new String(reader.getRawCharacters(pos, endPos))
1032                                        + "|");
1033             }
1034         }
1035     }
1036     //where
1037         List<Comment> addComment(List<Comment> comments, Comment comment) {
1038             return comments == null ?
1039                     List.of(comment) :
1040                     comments.prepend(comment);
1041         }
1042 
1043     /** Return the position where a lexical error occurred;
1044      */
1045     public int errPos() {
1046         return errPos;
1047     }
1048 
1049     /** Set the position where a lexical error occurred;
1050      */
1051     public void errPos(int pos) {
1052         errPos = pos;
1053     }
1054 
1055     /**
1056      * Called when a complete comment has been scanned. pos and endPos
1057      * will mark the comment boundary.
1058      */
1059     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
1060         if (scannerDebug)
1061             System.out.println("processComment(" + pos
1062                                + "," + endPos + "," + style + ")=|"
1063                                + new String(reader.getRawCharacters(pos, endPos))
1064                                + "|");
1065         char[] buf = reader.getRawCharacters(pos, endPos);
1066         return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
1067     }
1068 
1069     /**
1070      * Called when a complete whitespace run has been scanned. pos and endPos
1071      * will mark the whitespace boundary.
1072      */
1073     protected void processWhiteSpace(int pos, int endPos) {
1074         if (scannerDebug)
1075             System.out.println("processWhitespace(" + pos
1076                                + "," + endPos + ")=|" +
1077                                new String(reader.getRawCharacters(pos, endPos))
1078                                + "|");
1079     }
1080 
1081     /**
1082      * Called when a line terminator has been processed.
1083      */
1084     protected void processLineTerminator(int pos, int endPos) {
1085         if (scannerDebug)
1086             System.out.println("processTerminator(" + pos
1087                                + "," + endPos + ")=|" +
1088                                new String(reader.getRawCharacters(pos, endPos))
1089                                + "|");
1090     }
1091 
1092     /** Build a map for translating between line numbers and
1093      * positions in the input.
1094      *
1095      * @return a LineMap */
1096     public Position.LineMap getLineMap() {
1097         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
1098     }
1099 
1100 
1101     /**
1102     * Scan a documentation comment; determine if a deprecated tag is present.
1103     * Called once the initial /, * have been skipped, positioned at the second *
1104     * (which is treated as the beginning of the first line).
1105     * Stops positioned at the closing '/'.
1106     */
1107     protected static class BasicComment<U extends UnicodeReader> implements Comment {
1108 
1109         CommentStyle cs;
1110         U comment_reader;
1111 
1112         protected boolean deprecatedFlag = false;
1113         protected boolean scanned = false;
1114 
1115         protected BasicComment(U comment_reader, CommentStyle cs) {
1116             this.comment_reader = comment_reader;
1117             this.cs = cs;
1118         }
1119 
1120         public String getText() {
1121             return null;
1122         }
1123 
1124         public int getSourcePos(int pos) {
1125             return -1;
1126         }
1127 
1128         public CommentStyle getStyle() {
1129             return cs;
1130         }
1131 
1132         public boolean isDeprecated() {
1133             if (!scanned && cs == CommentStyle.JAVADOC) {
1134                 scanDocComment();
1135             }
1136             return deprecatedFlag;
1137         }
1138 
1139         @SuppressWarnings("fallthrough")
1140         protected void scanDocComment() {
1141             try {
1142                 boolean deprecatedPrefix = false;
1143 
1144                 comment_reader.bp += 3; // '/**'
1145                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
1146 
1147                 forEachLine:
1148                 while (comment_reader.bp < comment_reader.buflen) {
1149 
1150                     // Skip optional WhiteSpace at beginning of line
1151                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1152                         comment_reader.scanCommentChar();
1153                     }
1154 
1155                     // Skip optional consecutive Stars
1156                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
1157                         comment_reader.scanCommentChar();
1158                         if (comment_reader.ch == '/') {
1159                             return;
1160                         }
1161                     }
1162 
1163                     // Skip optional WhiteSpace after Stars
1164                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1165                         comment_reader.scanCommentChar();
1166                     }
1167 
1168                     deprecatedPrefix = false;
1169                     // At beginning of line in the JavaDoc sense.
1170                     if (!deprecatedFlag) {
1171                         String deprecated = "@deprecated";
1172                         int i = 0;
1173                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
1174                             comment_reader.scanCommentChar();
1175                             i++;
1176                             if (i == deprecated.length()) {
1177                                 deprecatedPrefix = true;
1178                                 break;
1179                             }
1180                         }
1181                     }
1182 
1183                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
1184                         if (Character.isWhitespace(comment_reader.ch)) {
1185                             deprecatedFlag = true;
1186                         } else if (comment_reader.ch == '*') {
1187                             comment_reader.scanCommentChar();
1188                             if (comment_reader.ch == '/') {
1189                                 deprecatedFlag = true;
1190                                 return;
1191                             }
1192                         }
1193                     }
1194 
1195                     // Skip rest of line
1196                     while (comment_reader.bp < comment_reader.buflen) {
1197                         switch (comment_reader.ch) {
1198                             case '*':
1199                                 comment_reader.scanCommentChar();
1200                                 if (comment_reader.ch == '/') {
1201                                     return;
1202                                 }
1203                                 break;
1204                             case CR: // (Spec 3.4)
1205                                 comment_reader.scanCommentChar();
1206                                 if (comment_reader.ch != LF) {
1207                                     continue forEachLine;
1208                                 }
1209                             /* fall through to LF case */
1210                             case LF: // (Spec 3.4)
1211                                 comment_reader.scanCommentChar();
1212                                 continue forEachLine;
1213                             default:
1214                                 comment_reader.scanCommentChar();
1215                         }
1216                     } // rest of line
1217                 } // forEachLine
1218                 return;
1219             } finally {
1220                 scanned = true;
1221             }
1222         }
1223     }
1224 }